Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2005 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <utils/String8.h>
     18 
     19 #include <utils/Log.h>
     20 #include <utils/String16.h>
     21 #include <utils/TextOutput.h>
     22 #include <utils/threads.h>
     23 
     24 #include <private/utils/Static.h>
     25 
     26 #include <ctype.h>
     27 
     28 /*
     29  * Functions outside android is below the namespace android, since they use
     30  * functions and constants in android namespace.
     31  */
     32 
     33 // ---------------------------------------------------------------------------
     34 
     35 namespace android {
     36 
     37 static const char32_t kByteMask = 0x000000BF;
     38 static const char32_t kByteMark = 0x00000080;
     39 
     40 // Surrogates aren't valid for UTF-32 characters, so define some
     41 // constants that will let us screen them out.
     42 static const char32_t kUnicodeSurrogateHighStart  = 0x0000D800;
     43 static const char32_t kUnicodeSurrogateHighEnd    = 0x0000DBFF;
     44 static const char32_t kUnicodeSurrogateLowStart   = 0x0000DC00;
     45 static const char32_t kUnicodeSurrogateLowEnd     = 0x0000DFFF;
     46 static const char32_t kUnicodeSurrogateStart      = kUnicodeSurrogateHighStart;
     47 static const char32_t kUnicodeSurrogateEnd        = kUnicodeSurrogateLowEnd;
     48 static const char32_t kUnicodeMaxCodepoint        = 0x0010FFFF;
     49 
     50 // Mask used to set appropriate bits in first byte of UTF-8 sequence,
     51 // indexed by number of bytes in the sequence.
     52 // 0xxxxxxx
     53 // -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000
     54 // 110yyyyx 10xxxxxx
     55 // -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0
     56 // 1110yyyy 10yxxxxx 10xxxxxx
     57 // -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0
     58 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
     59 // -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0
     60 static const char32_t kFirstByteMark[] = {
     61     0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
     62 };
     63 
     64 // Separator used by resource paths. This is not platform dependent contrary
     65 // to OS_PATH_SEPARATOR.
     66 #define RES_PATH_SEPARATOR '/'
     67 
     68 // Return number of utf8 bytes required for the character.
     69 static size_t utf32_to_utf8_bytes(char32_t srcChar)
     70 {
     71     size_t bytesToWrite;
     72 
     73     // Figure out how many bytes the result will require.
     74     if (srcChar < 0x00000080)
     75     {
     76         bytesToWrite = 1;
     77     }
     78     else if (srcChar < 0x00000800)
     79     {
     80         bytesToWrite = 2;
     81     }
     82     else if (srcChar < 0x00010000)
     83     {
     84         if ((srcChar < kUnicodeSurrogateStart)
     85          || (srcChar > kUnicodeSurrogateEnd))
     86         {
     87             bytesToWrite = 3;
     88         }
     89         else
     90         {
     91             // Surrogates are invalid UTF-32 characters.
     92             return 0;
     93         }
     94     }
     95     // Max code point for Unicode is 0x0010FFFF.
     96     else if (srcChar <= kUnicodeMaxCodepoint)
     97     {
     98         bytesToWrite = 4;
     99     }
    100     else
    101     {
    102         // Invalid UTF-32 character.
    103         return 0;
    104     }
    105 
    106     return bytesToWrite;
    107 }
    108 
    109 // Write out the source character to <dstP>.
    110 
    111 static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
    112 {
    113     dstP += bytes;
    114     switch (bytes)
    115     {   /* note: everything falls through. */
    116         case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    117         case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    118         case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    119         case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]);
    120     }
    121 }
    122 
    123 // ---------------------------------------------------------------------------
    124 
    125 static SharedBuffer* gEmptyStringBuf = NULL;
    126 static char* gEmptyString = NULL;
    127 
    128 extern int gDarwinCantLoadAllObjects;
    129 int gDarwinIsReallyAnnoying;
    130 
    131 static inline char* getEmptyString()
    132 {
    133     gEmptyStringBuf->acquire();
    134     return gEmptyString;
    135 }
    136 
    137 void initialize_string8()
    138 {
    139     // HACK: This dummy dependency forces linking libutils Static.cpp,
    140     // which is needed to initialize String8/String16 classes.
    141     // These variables are named for Darwin, but are needed elsewhere too,
    142     // including static linking on any platform.
    143     gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;
    144 
    145     SharedBuffer* buf = SharedBuffer::alloc(1);
    146     char* str = (char*)buf->data();
    147     *str = 0;
    148     gEmptyStringBuf = buf;
    149     gEmptyString = str;
    150 }
    151 
    152 void terminate_string8()
    153 {
    154     SharedBuffer::bufferFromData(gEmptyString)->release();
    155     gEmptyStringBuf = NULL;
    156     gEmptyString = NULL;
    157 }
    158 
    159 // ---------------------------------------------------------------------------
    160 
    161 static char* allocFromUTF8(const char* in, size_t len)
    162 {
    163     if (len > 0) {
    164         SharedBuffer* buf = SharedBuffer::alloc(len+1);
    165         LOG_ASSERT(buf, "Unable to allocate shared buffer");
    166         if (buf) {
    167             char* str = (char*)buf->data();
    168             memcpy(str, in, len);
    169             str[len] = 0;
    170             return str;
    171         }
    172         return NULL;
    173     }
    174 
    175     return getEmptyString();
    176 }
    177 
    178 template<typename T, typename L>
    179 static char* allocFromUTF16OrUTF32(const T* in, L len)
    180 {
    181     if (len == 0) return getEmptyString();
    182 
    183     size_t bytes = 0;
    184     const T* end = in+len;
    185     const T* p = in;
    186 
    187     while (p < end) {
    188         bytes += utf32_to_utf8_bytes(*p);
    189         p++;
    190     }
    191 
    192     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
    193     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    194     if (buf) {
    195         p = in;
    196         char* str = (char*)buf->data();
    197         char* d = str;
    198         while (p < end) {
    199             const T c = *p++;
    200             size_t len = utf32_to_utf8_bytes(c);
    201             utf32_to_utf8((uint8_t*)d, c, len);
    202             d += len;
    203         }
    204         *d = 0;
    205 
    206         return str;
    207     }
    208 
    209     return getEmptyString();
    210 }
    211 
    212 static char* allocFromUTF16(const char16_t* in, size_t len)
    213 {
    214     if (len == 0) return getEmptyString();
    215 
    216     const size_t bytes = utf8_length_from_utf16(in, len);
    217 
    218     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
    219     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    220     if (buf) {
    221         char* str = (char*)buf->data();
    222 
    223         utf16_to_utf8(in, len, str, bytes+1);
    224 
    225         return str;
    226     }
    227 
    228     return getEmptyString();
    229 }
    230 
    231 static char* allocFromUTF32(const char32_t* in, size_t len)
    232 {
    233     return allocFromUTF16OrUTF32<char32_t, size_t>(in, len);
    234 }
    235 
    236 // ---------------------------------------------------------------------------
    237 
    238 String8::String8()
    239     : mString(getEmptyString())
    240 {
    241 }
    242 
    243 String8::String8(const String8& o)
    244     : mString(o.mString)
    245 {
    246     SharedBuffer::bufferFromData(mString)->acquire();
    247 }
    248 
    249 String8::String8(const char* o)
    250     : mString(allocFromUTF8(o, strlen(o)))
    251 {
    252     if (mString == NULL) {
    253         mString = getEmptyString();
    254     }
    255 }
    256 
    257 String8::String8(const char* o, size_t len)
    258     : mString(allocFromUTF8(o, len))
    259 {
    260     if (mString == NULL) {
    261         mString = getEmptyString();
    262     }
    263 }
    264 
    265 String8::String8(const String16& o)
    266     : mString(allocFromUTF16(o.string(), o.size()))
    267 {
    268 }
    269 
    270 String8::String8(const char16_t* o)
    271     : mString(allocFromUTF16(o, strlen16(o)))
    272 {
    273 }
    274 
    275 String8::String8(const char16_t* o, size_t len)
    276     : mString(allocFromUTF16(o, len))
    277 {
    278 }
    279 
    280 String8::String8(const char32_t* o)
    281     : mString(allocFromUTF32(o, strlen32(o)))
    282 {
    283 }
    284 
    285 String8::String8(const char32_t* o, size_t len)
    286     : mString(allocFromUTF32(o, len))
    287 {
    288 }
    289 
    290 String8::~String8()
    291 {
    292     SharedBuffer::bufferFromData(mString)->release();
    293 }
    294 
    295 void String8::setTo(const String8& other)
    296 {
    297     SharedBuffer::bufferFromData(other.mString)->acquire();
    298     SharedBuffer::bufferFromData(mString)->release();
    299     mString = other.mString;
    300 }
    301 
    302 status_t String8::setTo(const char* other)
    303 {
    304     const char *newString = allocFromUTF8(other, strlen(other));
    305     SharedBuffer::bufferFromData(mString)->release();
    306     mString = newString;
    307     if (mString) return NO_ERROR;
    308 
    309     mString = getEmptyString();
    310     return NO_MEMORY;
    311 }
    312 
    313 status_t String8::setTo(const char* other, size_t len)
    314 {
    315     const char *newString = allocFromUTF8(other, len);
    316     SharedBuffer::bufferFromData(mString)->release();
    317     mString = newString;
    318     if (mString) return NO_ERROR;
    319 
    320     mString = getEmptyString();
    321     return NO_MEMORY;
    322 }
    323 
    324 status_t String8::setTo(const char16_t* other, size_t len)
    325 {
    326     const char *newString = allocFromUTF16(other, len);
    327     SharedBuffer::bufferFromData(mString)->release();
    328     mString = newString;
    329     if (mString) return NO_ERROR;
    330 
    331     mString = getEmptyString();
    332     return NO_MEMORY;
    333 }
    334 
    335 status_t String8::setTo(const char32_t* other, size_t len)
    336 {
    337     const char *newString = allocFromUTF32(other, len);
    338     SharedBuffer::bufferFromData(mString)->release();
    339     mString = newString;
    340     if (mString) return NO_ERROR;
    341 
    342     mString = getEmptyString();
    343     return NO_MEMORY;
    344 }
    345 
    346 status_t String8::append(const String8& other)
    347 {
    348     const size_t otherLen = other.bytes();
    349     if (bytes() == 0) {
    350         setTo(other);
    351         return NO_ERROR;
    352     } else if (otherLen == 0) {
    353         return NO_ERROR;
    354     }
    355 
    356     return real_append(other.string(), otherLen);
    357 }
    358 
    359 status_t String8::append(const char* other)
    360 {
    361     return append(other, strlen(other));
    362 }
    363 
    364 status_t String8::append(const char* other, size_t otherLen)
    365 {
    366     if (bytes() == 0) {
    367         return setTo(other, otherLen);
    368     } else if (otherLen == 0) {
    369         return NO_ERROR;
    370     }
    371 
    372     return real_append(other, otherLen);
    373 }
    374 
    375 status_t String8::appendFormat(const char* fmt, ...)
    376 {
    377     va_list ap;
    378     va_start(ap, fmt);
    379 
    380     int result = NO_ERROR;
    381     int n = vsnprintf(NULL, 0, fmt, ap);
    382     if (n != 0) {
    383         size_t oldLength = length();
    384         char* buf = lockBuffer(oldLength + n);
    385         if (buf) {
    386             vsnprintf(buf + oldLength, n + 1, fmt, ap);
    387         } else {
    388             result = NO_MEMORY;
    389         }
    390     }
    391 
    392     va_end(ap);
    393     return result;
    394 }
    395 
    396 status_t String8::real_append(const char* other, size_t otherLen)
    397 {
    398     const size_t myLen = bytes();
    399 
    400     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    401         ->editResize(myLen+otherLen+1);
    402     if (buf) {
    403         char* str = (char*)buf->data();
    404         mString = str;
    405         str += myLen;
    406         memcpy(str, other, otherLen);
    407         str[otherLen] = '\0';
    408         return NO_ERROR;
    409     }
    410     return NO_MEMORY;
    411 }
    412 
    413 char* String8::lockBuffer(size_t size)
    414 {
    415     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    416         ->editResize(size+1);
    417     if (buf) {
    418         char* str = (char*)buf->data();
    419         mString = str;
    420         return str;
    421     }
    422     return NULL;
    423 }
    424 
    425 void String8::unlockBuffer()
    426 {
    427     unlockBuffer(strlen(mString));
    428 }
    429 
    430 status_t String8::unlockBuffer(size_t size)
    431 {
    432     if (size != this->size()) {
    433         SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    434             ->editResize(size+1);
    435         if (! buf) {
    436             return NO_MEMORY;
    437         }
    438 
    439         char* str = (char*)buf->data();
    440         str[size] = 0;
    441         mString = str;
    442     }
    443 
    444     return NO_ERROR;
    445 }
    446 
    447 ssize_t String8::find(const char* other, size_t start) const
    448 {
    449     size_t len = size();
    450     if (start >= len) {
    451         return -1;
    452     }
    453     const char* s = mString+start;
    454     const char* p = strstr(s, other);
    455     return p ? p-mString : -1;
    456 }
    457 
    458 void String8::toLower()
    459 {
    460     toLower(0, size());
    461 }
    462 
    463 void String8::toLower(size_t start, size_t length)
    464 {
    465     const size_t len = size();
    466     if (start >= len) {
    467         return;
    468     }
    469     if (start+length > len) {
    470         length = len-start;
    471     }
    472     char* buf = lockBuffer(len);
    473     buf += start;
    474     while (length > 0) {
    475         *buf = tolower(*buf);
    476         buf++;
    477         length--;
    478     }
    479     unlockBuffer(len);
    480 }
    481 
    482 void String8::toUpper()
    483 {
    484     toUpper(0, size());
    485 }
    486 
    487 void String8::toUpper(size_t start, size_t length)
    488 {
    489     const size_t len = size();
    490     if (start >= len) {
    491         return;
    492     }
    493     if (start+length > len) {
    494         length = len-start;
    495     }
    496     char* buf = lockBuffer(len);
    497     buf += start;
    498     while (length > 0) {
    499         *buf = toupper(*buf);
    500         buf++;
    501         length--;
    502     }
    503     unlockBuffer(len);
    504 }
    505 
    506 size_t String8::getUtf32Length() const
    507 {
    508     return utf32_length(mString, length());
    509 }
    510 
    511 int32_t String8::getUtf32At(size_t index, size_t *next_index) const
    512 {
    513     return utf32_at(mString, length(), index, next_index);
    514 }
    515 
    516 size_t String8::getUtf32(char32_t* dst, size_t dst_len) const
    517 {
    518     return utf8_to_utf32(mString, length(), dst, dst_len);
    519 }
    520 
    521 TextOutput& operator<<(TextOutput& to, const String8& val)
    522 {
    523     to << val.string();
    524     return to;
    525 }
    526 
    527 // ---------------------------------------------------------------------------
    528 // Path functions
    529 
    530 void String8::setPathName(const char* name)
    531 {
    532     setPathName(name, strlen(name));
    533 }
    534 
    535 void String8::setPathName(const char* name, size_t len)
    536 {
    537     char* buf = lockBuffer(len);
    538 
    539     memcpy(buf, name, len);
    540 
    541     // remove trailing path separator, if present
    542     if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
    543         len--;
    544 
    545     buf[len] = '\0';
    546 
    547     unlockBuffer(len);
    548 }
    549 
    550 String8 String8::getPathLeaf(void) const
    551 {
    552     const char* cp;
    553     const char*const buf = mString;
    554 
    555     cp = strrchr(buf, OS_PATH_SEPARATOR);
    556     if (cp == NULL)
    557         return String8(*this);
    558     else
    559         return String8(cp+1);
    560 }
    561 
    562 String8 String8::getPathDir(void) const
    563 {
    564     const char* cp;
    565     const char*const str = mString;
    566 
    567     cp = strrchr(str, OS_PATH_SEPARATOR);
    568     if (cp == NULL)
    569         return String8("");
    570     else
    571         return String8(str, cp - str);
    572 }
    573 
    574 String8 String8::walkPath(String8* outRemains) const
    575 {
    576     const char* cp;
    577     const char*const str = mString;
    578     const char* buf = str;
    579 
    580     cp = strchr(buf, OS_PATH_SEPARATOR);
    581     if (cp == buf) {
    582         // don't include a leading '/'.
    583         buf = buf+1;
    584         cp = strchr(buf, OS_PATH_SEPARATOR);
    585     }
    586 
    587     if (cp == NULL) {
    588         String8 res = buf != str ? String8(buf) : *this;
    589         if (outRemains) *outRemains = String8("");
    590         return res;
    591     }
    592 
    593     String8 res(buf, cp-buf);
    594     if (outRemains) *outRemains = String8(cp+1);
    595     return res;
    596 }
    597 
    598 /*
    599  * Helper function for finding the start of an extension in a pathname.
    600  *
    601  * Returns a pointer inside mString, or NULL if no extension was found.
    602  */
    603 char* String8::find_extension(void) const
    604 {
    605     const char* lastSlash;
    606     const char* lastDot;
    607     int extLen;
    608     const char* const str = mString;
    609 
    610     // only look at the filename
    611     lastSlash = strrchr(str, OS_PATH_SEPARATOR);
    612     if (lastSlash == NULL)
    613         lastSlash = str;
    614     else
    615         lastSlash++;
    616 
    617     // find the last dot
    618     lastDot = strrchr(lastSlash, '.');
    619     if (lastDot == NULL)
    620         return NULL;
    621 
    622     // looks good, ship it
    623     return const_cast<char*>(lastDot);
    624 }
    625 
    626 String8 String8::getPathExtension(void) const
    627 {
    628     char* ext;
    629 
    630     ext = find_extension();
    631     if (ext != NULL)
    632         return String8(ext);
    633     else
    634         return String8("");
    635 }
    636 
    637 String8 String8::getBasePath(void) const
    638 {
    639     char* ext;
    640     const char* const str = mString;
    641 
    642     ext = find_extension();
    643     if (ext == NULL)
    644         return String8(*this);
    645     else
    646         return String8(str, ext - str);
    647 }
    648 
    649 String8& String8::appendPath(const char* name)
    650 {
    651     // TODO: The test below will fail for Win32 paths. Fix later or ignore.
    652     if (name[0] != OS_PATH_SEPARATOR) {
    653         if (*name == '\0') {
    654             // nothing to do
    655             return *this;
    656         }
    657 
    658         size_t len = length();
    659         if (len == 0) {
    660             // no existing filename, just use the new one
    661             setPathName(name);
    662             return *this;
    663         }
    664 
    665         // make room for oldPath + '/' + newPath
    666         int newlen = strlen(name);
    667 
    668         char* buf = lockBuffer(len+1+newlen);
    669 
    670         // insert a '/' if needed
    671         if (buf[len-1] != OS_PATH_SEPARATOR)
    672             buf[len++] = OS_PATH_SEPARATOR;
    673 
    674         memcpy(buf+len, name, newlen+1);
    675         len += newlen;
    676 
    677         unlockBuffer(len);
    678 
    679         return *this;
    680     } else {
    681         setPathName(name);
    682         return *this;
    683     }
    684 }
    685 
    686 String8& String8::convertToResPath()
    687 {
    688 #if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
    689     size_t len = length();
    690     if (len > 0) {
    691         char * buf = lockBuffer(len);
    692         for (char * end = buf + len; buf < end; ++buf) {
    693             if (*buf == OS_PATH_SEPARATOR)
    694                 *buf = RES_PATH_SEPARATOR;
    695         }
    696         unlockBuffer(len);
    697     }
    698 #endif
    699     return *this;
    700 }
    701 
    702 }; // namespace android
    703 
    704 // ---------------------------------------------------------------------------
    705 
    706 size_t strlen32(const char32_t *s)
    707 {
    708   const char32_t *ss = s;
    709   while ( *ss )
    710     ss++;
    711   return ss-s;
    712 }
    713 
    714 size_t strnlen32(const char32_t *s, size_t maxlen)
    715 {
    716   const char32_t *ss = s;
    717   while ((maxlen > 0) && *ss) {
    718     ss++;
    719     maxlen--;
    720   }
    721   return ss-s;
    722 }
    723 
    724 size_t utf8_length(const char *src)
    725 {
    726     const char *cur = src;
    727     size_t ret = 0;
    728     while (*cur != '\0') {
    729         const char first_char = *cur++;
    730         if ((first_char & 0x80) == 0) { // ASCII
    731             ret += 1;
    732             continue;
    733         }
    734         // (UTF-8's character must not be like 10xxxxxx,
    735         //  but 110xxxxx, 1110xxxx, ... or 1111110x)
    736         if ((first_char & 0x40) == 0) {
    737             return 0;
    738         }
    739 
    740         int32_t mask, to_ignore_mask;
    741         size_t num_to_read = 0;
    742         char32_t utf32 = 0;
    743         for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
    744              num_to_read < 5 && (first_char & mask);
    745              num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
    746             if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx
    747                 return 0;
    748             }
    749             // 0x3F == 00111111
    750             utf32 = (utf32 << 6) + (*cur++ & 0x3F);
    751         }
    752         // "first_char" must be (110xxxxx - 11110xxx)
    753         if (num_to_read == 5) {
    754             return 0;
    755         }
    756         to_ignore_mask |= mask;
    757         utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
    758         if (utf32 > android::kUnicodeMaxCodepoint) {
    759             return 0;
    760         }
    761 
    762         ret += num_to_read;
    763     }
    764     return ret;
    765 }
    766 
    767 size_t utf32_length(const char *src, size_t src_len)
    768 {
    769     if (src == NULL || src_len == 0) {
    770         return 0;
    771     }
    772     size_t ret = 0;
    773     const char* cur;
    774     const char* end;
    775     size_t num_to_skip;
    776     for (cur = src, end = src + src_len, num_to_skip = 1;
    777          cur < end;
    778          cur += num_to_skip, ret++) {
    779         const char first_char = *cur;
    780         num_to_skip = 1;
    781         if ((first_char & 0x80) == 0) {  // ASCII
    782             continue;
    783         }
    784         int32_t mask;
    785 
    786         for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) {
    787         }
    788     }
    789     return ret;
    790 }
    791 
    792 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
    793 {
    794     if (src == NULL || src_len == 0) {
    795         return 0;
    796     }
    797     size_t ret = 0;
    798     const char32_t *end = src + src_len;
    799     while (src < end) {
    800         ret += android::utf32_to_utf8_bytes(*src++);
    801     }
    802     return ret;
    803 }
    804 
    805 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
    806 {
    807     if (src == NULL || src_len == 0) {
    808         return 0;
    809     }
    810     size_t ret = 0;
    811     const char16_t* const end = src + src_len;
    812     while (src < end) {
    813         if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
    814                 && (*++src & 0xFC00) == 0xDC00) {
    815             // surrogate pairs are always 4 bytes.
    816             ret += 4;
    817             src++;
    818         } else {
    819             ret += android::utf32_to_utf8_bytes((char32_t) *src++);
    820         }
    821     }
    822     return ret;
    823 }
    824 
    825 static int32_t utf32_at_internal(const char* cur, size_t *num_read)
    826 {
    827     const char first_char = *cur;
    828     if ((first_char & 0x80) == 0) { // ASCII
    829         *num_read = 1;
    830         return *cur;
    831     }
    832     cur++;
    833     char32_t mask, to_ignore_mask;
    834     size_t num_to_read = 0;
    835     char32_t utf32 = first_char;
    836     for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80;
    837          (first_char & mask);
    838          num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
    839         // 0x3F == 00111111
    840         utf32 = (utf32 << 6) + (*cur++ & 0x3F);
    841     }
    842     to_ignore_mask |= mask;
    843     utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1)));
    844 
    845     *num_read = num_to_read;
    846     return static_cast<int32_t>(utf32);
    847 }
    848 
    849 int32_t utf32_at(const char *src, size_t src_len,
    850                  size_t index, size_t *next_index)
    851 {
    852     if (index >= src_len) {
    853         return -1;
    854     }
    855     size_t dummy_index;
    856     if (next_index == NULL) {
    857         next_index = &dummy_index;
    858     }
    859     size_t num_read;
    860     int32_t ret = utf32_at_internal(src + index, &num_read);
    861     if (ret >= 0) {
    862         *next_index = index + num_read;
    863     }
    864 
    865     return ret;
    866 }
    867 
    868 size_t utf8_to_utf32(const char* src, size_t src_len,
    869                      char32_t* dst, size_t dst_len)
    870 {
    871     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    872         return 0;
    873     }
    874 
    875     const char* cur = src;
    876     const char* end = src + src_len;
    877     char32_t* cur_utf32 = dst;
    878     const char32_t* end_utf32 = dst + dst_len;
    879     while (cur_utf32 < end_utf32 && cur < end) {
    880         size_t num_read;
    881         *cur_utf32++ =
    882                 static_cast<char32_t>(utf32_at_internal(cur, &num_read));
    883         cur += num_read;
    884     }
    885     if (cur_utf32 < end_utf32) {
    886         *cur_utf32 = 0;
    887     }
    888     return static_cast<size_t>(cur_utf32 - dst);
    889 }
    890 
    891 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
    892                      char* dst, size_t dst_len)
    893 {
    894     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    895         return 0;
    896     }
    897     const char32_t *cur_utf32 = src;
    898     const char32_t *end_utf32 = src + src_len;
    899     char *cur = dst;
    900     const char *end = dst + dst_len;
    901     while (cur_utf32 < end_utf32 && cur < end) {
    902         size_t len = android::utf32_to_utf8_bytes(*cur_utf32);
    903         android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len);
    904         cur += len;
    905     }
    906     if (cur < end) {
    907         *cur = '\0';
    908     }
    909     return cur - dst;
    910 }
    911 
    912 size_t utf16_to_utf8(const char16_t* src, size_t src_len,
    913                      char* dst, size_t dst_len)
    914 {
    915     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    916         return 0;
    917     }
    918     const char16_t* cur_utf16 = src;
    919     const char16_t* const end_utf16 = src + src_len;
    920     char *cur = dst;
    921     const char* const end = dst + dst_len;
    922     while (cur_utf16 < end_utf16 && cur < end) {
    923         char32_t utf32;
    924         // surrogate pairs
    925         if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
    926             utf32 = (*cur_utf16++ - 0xD800) << 10;
    927             utf32 |= *cur_utf16++ - 0xDC00;
    928             utf32 += 0x10000;
    929         } else {
    930             utf32 = (char32_t) *cur_utf16++;
    931         }
    932         size_t len = android::utf32_to_utf8_bytes(utf32);
    933         android::utf32_to_utf8((uint8_t*)cur, utf32, len);
    934         cur += len;
    935     }
    936     if (cur < end) {
    937         *cur = '\0';
    938     }
    939     return cur - dst;
    940 }
    941