1 /* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <utils/String8.h> 18 19 #include <utils/Log.h> 20 #include <utils/String16.h> 21 #include <utils/TextOutput.h> 22 #include <utils/threads.h> 23 24 #include <private/utils/Static.h> 25 26 #include <ctype.h> 27 28 /* 29 * Functions outside android is below the namespace android, since they use 30 * functions and constants in android namespace. 31 */ 32 33 // --------------------------------------------------------------------------- 34 35 namespace android { 36 37 static const char32_t kByteMask = 0x000000BF; 38 static const char32_t kByteMark = 0x00000080; 39 40 // Surrogates aren't valid for UTF-32 characters, so define some 41 // constants that will let us screen them out. 42 static const char32_t kUnicodeSurrogateHighStart = 0x0000D800; 43 static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; 44 static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00; 45 static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; 46 static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; 47 static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; 48 static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF; 49 50 // Mask used to set appropriate bits in first byte of UTF-8 sequence, 51 // indexed by number of bytes in the sequence. 52 // 0xxxxxxx 53 // -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000 54 // 110yyyyx 10xxxxxx 55 // -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0 56 // 1110yyyy 10yxxxxx 10xxxxxx 57 // -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0 58 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx 59 // -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0 60 static const char32_t kFirstByteMark[] = { 61 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 62 }; 63 64 // Separator used by resource paths. This is not platform dependent contrary 65 // to OS_PATH_SEPARATOR. 66 #define RES_PATH_SEPARATOR '/' 67 68 // Return number of utf8 bytes required for the character. 69 static size_t utf32_to_utf8_bytes(char32_t srcChar) 70 { 71 size_t bytesToWrite; 72 73 // Figure out how many bytes the result will require. 74 if (srcChar < 0x00000080) 75 { 76 bytesToWrite = 1; 77 } 78 else if (srcChar < 0x00000800) 79 { 80 bytesToWrite = 2; 81 } 82 else if (srcChar < 0x00010000) 83 { 84 if ((srcChar < kUnicodeSurrogateStart) 85 || (srcChar > kUnicodeSurrogateEnd)) 86 { 87 bytesToWrite = 3; 88 } 89 else 90 { 91 // Surrogates are invalid UTF-32 characters. 92 return 0; 93 } 94 } 95 // Max code point for Unicode is 0x0010FFFF. 96 else if (srcChar <= kUnicodeMaxCodepoint) 97 { 98 bytesToWrite = 4; 99 } 100 else 101 { 102 // Invalid UTF-32 character. 103 return 0; 104 } 105 106 return bytesToWrite; 107 } 108 109 // Write out the source character to <dstP>. 110 111 static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes) 112 { 113 dstP += bytes; 114 switch (bytes) 115 { /* note: everything falls through. */ 116 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 117 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 118 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 119 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); 120 } 121 } 122 123 // --------------------------------------------------------------------------- 124 125 static SharedBuffer* gEmptyStringBuf = NULL; 126 static char* gEmptyString = NULL; 127 128 extern int gDarwinCantLoadAllObjects; 129 int gDarwinIsReallyAnnoying; 130 131 static inline char* getEmptyString() 132 { 133 gEmptyStringBuf->acquire(); 134 return gEmptyString; 135 } 136 137 void initialize_string8() 138 { 139 // HACK: This dummy dependency forces linking libutils Static.cpp, 140 // which is needed to initialize String8/String16 classes. 141 // These variables are named for Darwin, but are needed elsewhere too, 142 // including static linking on any platform. 143 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects; 144 145 SharedBuffer* buf = SharedBuffer::alloc(1); 146 char* str = (char*)buf->data(); 147 *str = 0; 148 gEmptyStringBuf = buf; 149 gEmptyString = str; 150 } 151 152 void terminate_string8() 153 { 154 SharedBuffer::bufferFromData(gEmptyString)->release(); 155 gEmptyStringBuf = NULL; 156 gEmptyString = NULL; 157 } 158 159 // --------------------------------------------------------------------------- 160 161 static char* allocFromUTF8(const char* in, size_t len) 162 { 163 if (len > 0) { 164 SharedBuffer* buf = SharedBuffer::alloc(len+1); 165 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 166 if (buf) { 167 char* str = (char*)buf->data(); 168 memcpy(str, in, len); 169 str[len] = 0; 170 return str; 171 } 172 return NULL; 173 } 174 175 return getEmptyString(); 176 } 177 178 template<typename T, typename L> 179 static char* allocFromUTF16OrUTF32(const T* in, L len) 180 { 181 if (len == 0) return getEmptyString(); 182 183 size_t bytes = 0; 184 const T* end = in+len; 185 const T* p = in; 186 187 while (p < end) { 188 bytes += utf32_to_utf8_bytes(*p); 189 p++; 190 } 191 192 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 193 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 194 if (buf) { 195 p = in; 196 char* str = (char*)buf->data(); 197 char* d = str; 198 while (p < end) { 199 const T c = *p++; 200 size_t len = utf32_to_utf8_bytes(c); 201 utf32_to_utf8((uint8_t*)d, c, len); 202 d += len; 203 } 204 *d = 0; 205 206 return str; 207 } 208 209 return getEmptyString(); 210 } 211 212 static char* allocFromUTF16(const char16_t* in, size_t len) 213 { 214 if (len == 0) return getEmptyString(); 215 216 const size_t bytes = utf8_length_from_utf16(in, len); 217 218 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 219 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 220 if (buf) { 221 char* str = (char*)buf->data(); 222 223 utf16_to_utf8(in, len, str, bytes+1); 224 225 return str; 226 } 227 228 return getEmptyString(); 229 } 230 231 static char* allocFromUTF32(const char32_t* in, size_t len) 232 { 233 return allocFromUTF16OrUTF32<char32_t, size_t>(in, len); 234 } 235 236 // --------------------------------------------------------------------------- 237 238 String8::String8() 239 : mString(getEmptyString()) 240 { 241 } 242 243 String8::String8(const String8& o) 244 : mString(o.mString) 245 { 246 SharedBuffer::bufferFromData(mString)->acquire(); 247 } 248 249 String8::String8(const char* o) 250 : mString(allocFromUTF8(o, strlen(o))) 251 { 252 if (mString == NULL) { 253 mString = getEmptyString(); 254 } 255 } 256 257 String8::String8(const char* o, size_t len) 258 : mString(allocFromUTF8(o, len)) 259 { 260 if (mString == NULL) { 261 mString = getEmptyString(); 262 } 263 } 264 265 String8::String8(const String16& o) 266 : mString(allocFromUTF16(o.string(), o.size())) 267 { 268 } 269 270 String8::String8(const char16_t* o) 271 : mString(allocFromUTF16(o, strlen16(o))) 272 { 273 } 274 275 String8::String8(const char16_t* o, size_t len) 276 : mString(allocFromUTF16(o, len)) 277 { 278 } 279 280 String8::String8(const char32_t* o) 281 : mString(allocFromUTF32(o, strlen32(o))) 282 { 283 } 284 285 String8::String8(const char32_t* o, size_t len) 286 : mString(allocFromUTF32(o, len)) 287 { 288 } 289 290 String8::~String8() 291 { 292 SharedBuffer::bufferFromData(mString)->release(); 293 } 294 295 void String8::setTo(const String8& other) 296 { 297 SharedBuffer::bufferFromData(other.mString)->acquire(); 298 SharedBuffer::bufferFromData(mString)->release(); 299 mString = other.mString; 300 } 301 302 status_t String8::setTo(const char* other) 303 { 304 SharedBuffer::bufferFromData(mString)->release(); 305 mString = allocFromUTF8(other, strlen(other)); 306 if (mString) return NO_ERROR; 307 308 mString = getEmptyString(); 309 return NO_MEMORY; 310 } 311 312 status_t String8::setTo(const char* other, size_t len) 313 { 314 SharedBuffer::bufferFromData(mString)->release(); 315 mString = allocFromUTF8(other, len); 316 if (mString) return NO_ERROR; 317 318 mString = getEmptyString(); 319 return NO_MEMORY; 320 } 321 322 status_t String8::setTo(const char16_t* other, size_t len) 323 { 324 SharedBuffer::bufferFromData(mString)->release(); 325 mString = allocFromUTF16(other, len); 326 if (mString) return NO_ERROR; 327 328 mString = getEmptyString(); 329 return NO_MEMORY; 330 } 331 332 status_t String8::setTo(const char32_t* other, size_t len) 333 { 334 SharedBuffer::bufferFromData(mString)->release(); 335 mString = allocFromUTF32(other, len); 336 if (mString) return NO_ERROR; 337 338 mString = getEmptyString(); 339 return NO_MEMORY; 340 } 341 342 status_t String8::append(const String8& other) 343 { 344 const size_t otherLen = other.bytes(); 345 if (bytes() == 0) { 346 setTo(other); 347 return NO_ERROR; 348 } else if (otherLen == 0) { 349 return NO_ERROR; 350 } 351 352 return real_append(other.string(), otherLen); 353 } 354 355 status_t String8::append(const char* other) 356 { 357 return append(other, strlen(other)); 358 } 359 360 status_t String8::append(const char* other, size_t otherLen) 361 { 362 if (bytes() == 0) { 363 return setTo(other, otherLen); 364 } else if (otherLen == 0) { 365 return NO_ERROR; 366 } 367 368 return real_append(other, otherLen); 369 } 370 371 status_t String8::real_append(const char* other, size_t otherLen) 372 { 373 const size_t myLen = bytes(); 374 375 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 376 ->editResize(myLen+otherLen+1); 377 if (buf) { 378 char* str = (char*)buf->data(); 379 mString = str; 380 str += myLen; 381 memcpy(str, other, otherLen); 382 str[otherLen] = '\0'; 383 return NO_ERROR; 384 } 385 return NO_MEMORY; 386 } 387 388 char* String8::lockBuffer(size_t size) 389 { 390 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 391 ->editResize(size+1); 392 if (buf) { 393 char* str = (char*)buf->data(); 394 mString = str; 395 return str; 396 } 397 return NULL; 398 } 399 400 void String8::unlockBuffer() 401 { 402 unlockBuffer(strlen(mString)); 403 } 404 405 status_t String8::unlockBuffer(size_t size) 406 { 407 if (size != this->size()) { 408 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 409 ->editResize(size+1); 410 if (buf) { 411 char* str = (char*)buf->data(); 412 str[size] = 0; 413 mString = str; 414 return NO_ERROR; 415 } 416 } 417 418 return NO_MEMORY; 419 } 420 421 ssize_t String8::find(const char* other, size_t start) const 422 { 423 size_t len = size(); 424 if (start >= len) { 425 return -1; 426 } 427 const char* s = mString+start; 428 const char* p = strstr(s, other); 429 return p ? p-mString : -1; 430 } 431 432 void String8::toLower() 433 { 434 toLower(0, size()); 435 } 436 437 void String8::toLower(size_t start, size_t length) 438 { 439 const size_t len = size(); 440 if (start >= len) { 441 return; 442 } 443 if (start+length > len) { 444 length = len-start; 445 } 446 char* buf = lockBuffer(len); 447 buf += start; 448 while (length > 0) { 449 *buf = tolower(*buf); 450 buf++; 451 length--; 452 } 453 unlockBuffer(len); 454 } 455 456 void String8::toUpper() 457 { 458 toUpper(0, size()); 459 } 460 461 void String8::toUpper(size_t start, size_t length) 462 { 463 const size_t len = size(); 464 if (start >= len) { 465 return; 466 } 467 if (start+length > len) { 468 length = len-start; 469 } 470 char* buf = lockBuffer(len); 471 buf += start; 472 while (length > 0) { 473 *buf = toupper(*buf); 474 buf++; 475 length--; 476 } 477 unlockBuffer(len); 478 } 479 480 size_t String8::getUtf32Length() const 481 { 482 return utf32_length(mString, length()); 483 } 484 485 int32_t String8::getUtf32At(size_t index, size_t *next_index) const 486 { 487 return utf32_at(mString, length(), index, next_index); 488 } 489 490 size_t String8::getUtf32(char32_t* dst, size_t dst_len) const 491 { 492 return utf8_to_utf32(mString, length(), dst, dst_len); 493 } 494 495 TextOutput& operator<<(TextOutput& to, const String8& val) 496 { 497 to << val.string(); 498 return to; 499 } 500 501 // --------------------------------------------------------------------------- 502 // Path functions 503 504 void String8::setPathName(const char* name) 505 { 506 setPathName(name, strlen(name)); 507 } 508 509 void String8::setPathName(const char* name, size_t len) 510 { 511 char* buf = lockBuffer(len); 512 513 memcpy(buf, name, len); 514 515 // remove trailing path separator, if present 516 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR) 517 len--; 518 519 buf[len] = '\0'; 520 521 unlockBuffer(len); 522 } 523 524 String8 String8::getPathLeaf(void) const 525 { 526 const char* cp; 527 const char*const buf = mString; 528 529 cp = strrchr(buf, OS_PATH_SEPARATOR); 530 if (cp == NULL) 531 return String8(*this); 532 else 533 return String8(cp+1); 534 } 535 536 String8 String8::getPathDir(void) const 537 { 538 const char* cp; 539 const char*const str = mString; 540 541 cp = strrchr(str, OS_PATH_SEPARATOR); 542 if (cp == NULL) 543 return String8(""); 544 else 545 return String8(str, cp - str); 546 } 547 548 String8 String8::walkPath(String8* outRemains) const 549 { 550 const char* cp; 551 const char*const str = mString; 552 const char* buf = str; 553 554 cp = strchr(buf, OS_PATH_SEPARATOR); 555 if (cp == buf) { 556 // don't include a leading '/'. 557 buf = buf+1; 558 cp = strchr(buf, OS_PATH_SEPARATOR); 559 } 560 561 if (cp == NULL) { 562 String8 res = buf != str ? String8(buf) : *this; 563 if (outRemains) *outRemains = String8(""); 564 return res; 565 } 566 567 String8 res(buf, cp-buf); 568 if (outRemains) *outRemains = String8(cp+1); 569 return res; 570 } 571 572 /* 573 * Helper function for finding the start of an extension in a pathname. 574 * 575 * Returns a pointer inside mString, or NULL if no extension was found. 576 */ 577 char* String8::find_extension(void) const 578 { 579 const char* lastSlash; 580 const char* lastDot; 581 int extLen; 582 const char* const str = mString; 583 584 // only look at the filename 585 lastSlash = strrchr(str, OS_PATH_SEPARATOR); 586 if (lastSlash == NULL) 587 lastSlash = str; 588 else 589 lastSlash++; 590 591 // find the last dot 592 lastDot = strrchr(lastSlash, '.'); 593 if (lastDot == NULL) 594 return NULL; 595 596 // looks good, ship it 597 return const_cast<char*>(lastDot); 598 } 599 600 String8 String8::getPathExtension(void) const 601 { 602 char* ext; 603 604 ext = find_extension(); 605 if (ext != NULL) 606 return String8(ext); 607 else 608 return String8(""); 609 } 610 611 String8 String8::getBasePath(void) const 612 { 613 char* ext; 614 const char* const str = mString; 615 616 ext = find_extension(); 617 if (ext == NULL) 618 return String8(*this); 619 else 620 return String8(str, ext - str); 621 } 622 623 String8& String8::appendPath(const char* name) 624 { 625 // TODO: The test below will fail for Win32 paths. Fix later or ignore. 626 if (name[0] != OS_PATH_SEPARATOR) { 627 if (*name == '\0') { 628 // nothing to do 629 return *this; 630 } 631 632 size_t len = length(); 633 if (len == 0) { 634 // no existing filename, just use the new one 635 setPathName(name); 636 return *this; 637 } 638 639 // make room for oldPath + '/' + newPath 640 int newlen = strlen(name); 641 642 char* buf = lockBuffer(len+1+newlen); 643 644 // insert a '/' if needed 645 if (buf[len-1] != OS_PATH_SEPARATOR) 646 buf[len++] = OS_PATH_SEPARATOR; 647 648 memcpy(buf+len, name, newlen+1); 649 len += newlen; 650 651 unlockBuffer(len); 652 653 return *this; 654 } else { 655 setPathName(name); 656 return *this; 657 } 658 } 659 660 String8& String8::convertToResPath() 661 { 662 #if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR 663 size_t len = length(); 664 if (len > 0) { 665 char * buf = lockBuffer(len); 666 for (char * end = buf + len; buf < end; ++buf) { 667 if (*buf == OS_PATH_SEPARATOR) 668 *buf = RES_PATH_SEPARATOR; 669 } 670 unlockBuffer(len); 671 } 672 #endif 673 return *this; 674 } 675 676 }; // namespace android 677 678 // --------------------------------------------------------------------------- 679 680 size_t strlen32(const char32_t *s) 681 { 682 const char32_t *ss = s; 683 while ( *ss ) 684 ss++; 685 return ss-s; 686 } 687 688 size_t strnlen32(const char32_t *s, size_t maxlen) 689 { 690 const char32_t *ss = s; 691 while ((maxlen > 0) && *ss) { 692 ss++; 693 maxlen--; 694 } 695 return ss-s; 696 } 697 698 size_t utf8_length(const char *src) 699 { 700 const char *cur = src; 701 size_t ret = 0; 702 while (*cur != '\0') { 703 const char first_char = *cur++; 704 if ((first_char & 0x80) == 0) { // ASCII 705 ret += 1; 706 continue; 707 } 708 // (UTF-8's character must not be like 10xxxxxx, 709 // but 110xxxxx, 1110xxxx, ... or 1111110x) 710 if ((first_char & 0x40) == 0) { 711 return 0; 712 } 713 714 int32_t mask, to_ignore_mask; 715 size_t num_to_read = 0; 716 char32_t utf32 = 0; 717 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; 718 num_to_read < 5 && (first_char & mask); 719 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 720 if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx 721 return 0; 722 } 723 // 0x3F == 00111111 724 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 725 } 726 // "first_char" must be (110xxxxx - 11110xxx) 727 if (num_to_read == 5) { 728 return 0; 729 } 730 to_ignore_mask |= mask; 731 utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); 732 if (utf32 > android::kUnicodeMaxCodepoint) { 733 return 0; 734 } 735 736 ret += num_to_read; 737 } 738 return ret; 739 } 740 741 size_t utf32_length(const char *src, size_t src_len) 742 { 743 if (src == NULL || src_len == 0) { 744 return 0; 745 } 746 size_t ret = 0; 747 const char* cur; 748 const char* end; 749 size_t num_to_skip; 750 for (cur = src, end = src + src_len, num_to_skip = 1; 751 cur < end; 752 cur += num_to_skip, ret++) { 753 const char first_char = *cur; 754 num_to_skip = 1; 755 if ((first_char & 0x80) == 0) { // ASCII 756 continue; 757 } 758 int32_t mask; 759 760 for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) { 761 } 762 } 763 return ret; 764 } 765 766 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) 767 { 768 if (src == NULL || src_len == 0) { 769 return 0; 770 } 771 size_t ret = 0; 772 const char32_t *end = src + src_len; 773 while (src < end) { 774 ret += android::utf32_to_utf8_bytes(*src++); 775 } 776 return ret; 777 } 778 779 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) 780 { 781 if (src == NULL || src_len == 0) { 782 return 0; 783 } 784 size_t ret = 0; 785 const char16_t* const end = src + src_len; 786 while (src < end) { 787 if ((*src & 0xFC00) == 0xD800 && (src + 1) < end 788 && (*++src & 0xFC00) == 0xDC00) { 789 // surrogate pairs are always 4 bytes. 790 ret += 4; 791 src++; 792 } else { 793 ret += android::utf32_to_utf8_bytes((char32_t) *src++); 794 } 795 } 796 return ret; 797 } 798 799 static int32_t utf32_at_internal(const char* cur, size_t *num_read) 800 { 801 const char first_char = *cur; 802 if ((first_char & 0x80) == 0) { // ASCII 803 *num_read = 1; 804 return *cur; 805 } 806 cur++; 807 char32_t mask, to_ignore_mask; 808 size_t num_to_read = 0; 809 char32_t utf32 = first_char; 810 for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80; 811 (first_char & mask); 812 num_to_read++, to_ignore_mask |= mask, mask >>= 1) { 813 // 0x3F == 00111111 814 utf32 = (utf32 << 6) + (*cur++ & 0x3F); 815 } 816 to_ignore_mask |= mask; 817 utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1))); 818 819 *num_read = num_to_read; 820 return static_cast<int32_t>(utf32); 821 } 822 823 int32_t utf32_at(const char *src, size_t src_len, 824 size_t index, size_t *next_index) 825 { 826 if (index >= src_len) { 827 return -1; 828 } 829 size_t dummy_index; 830 if (next_index == NULL) { 831 next_index = &dummy_index; 832 } 833 size_t num_read; 834 int32_t ret = utf32_at_internal(src + index, &num_read); 835 if (ret >= 0) { 836 *next_index = index + num_read; 837 } 838 839 return ret; 840 } 841 842 size_t utf8_to_utf32(const char* src, size_t src_len, 843 char32_t* dst, size_t dst_len) 844 { 845 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 846 return 0; 847 } 848 849 const char* cur = src; 850 const char* end = src + src_len; 851 char32_t* cur_utf32 = dst; 852 const char32_t* end_utf32 = dst + dst_len; 853 while (cur_utf32 < end_utf32 && cur < end) { 854 size_t num_read; 855 *cur_utf32++ = 856 static_cast<char32_t>(utf32_at_internal(cur, &num_read)); 857 cur += num_read; 858 } 859 if (cur_utf32 < end_utf32) { 860 *cur_utf32 = 0; 861 } 862 return static_cast<size_t>(cur_utf32 - dst); 863 } 864 865 size_t utf32_to_utf8(const char32_t* src, size_t src_len, 866 char* dst, size_t dst_len) 867 { 868 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 869 return 0; 870 } 871 const char32_t *cur_utf32 = src; 872 const char32_t *end_utf32 = src + src_len; 873 char *cur = dst; 874 const char *end = dst + dst_len; 875 while (cur_utf32 < end_utf32 && cur < end) { 876 size_t len = android::utf32_to_utf8_bytes(*cur_utf32); 877 android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len); 878 cur += len; 879 } 880 if (cur < end) { 881 *cur = '\0'; 882 } 883 return cur - dst; 884 } 885 886 size_t utf16_to_utf8(const char16_t* src, size_t src_len, 887 char* dst, size_t dst_len) 888 { 889 if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { 890 return 0; 891 } 892 const char16_t* cur_utf16 = src; 893 const char16_t* const end_utf16 = src + src_len; 894 char *cur = dst; 895 const char* const end = dst + dst_len; 896 while (cur_utf16 < end_utf16 && cur < end) { 897 char32_t utf32; 898 // surrogate pairs 899 if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { 900 utf32 = (*cur_utf16++ - 0xD800) << 10; 901 utf32 |= *cur_utf16++ - 0xDC00; 902 utf32 += 0x10000; 903 } else { 904 utf32 = (char32_t) *cur_utf16++; 905 } 906 size_t len = android::utf32_to_utf8_bytes(utf32); 907 android::utf32_to_utf8((uint8_t*)cur, utf32, len); 908 cur += len; 909 } 910 if (cur < end) { 911 *cur = '\0'; 912 } 913 return cur - dst; 914 } 915