Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2005 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <utils/String8.h>
     18 
     19 #include <utils/Log.h>
     20 #include <utils/String16.h>
     21 #include <utils/TextOutput.h>
     22 #include <utils/threads.h>
     23 
     24 #include <private/utils/Static.h>
     25 
     26 #include <ctype.h>
     27 
     28 /*
     29  * Functions outside android is below the namespace android, since they use
     30  * functions and constants in android namespace.
     31  */
     32 
     33 // ---------------------------------------------------------------------------
     34 
     35 namespace android {
     36 
     37 static const char32_t kByteMask = 0x000000BF;
     38 static const char32_t kByteMark = 0x00000080;
     39 
     40 // Surrogates aren't valid for UTF-32 characters, so define some
     41 // constants that will let us screen them out.
     42 static const char32_t kUnicodeSurrogateHighStart  = 0x0000D800;
     43 static const char32_t kUnicodeSurrogateHighEnd    = 0x0000DBFF;
     44 static const char32_t kUnicodeSurrogateLowStart   = 0x0000DC00;
     45 static const char32_t kUnicodeSurrogateLowEnd     = 0x0000DFFF;
     46 static const char32_t kUnicodeSurrogateStart      = kUnicodeSurrogateHighStart;
     47 static const char32_t kUnicodeSurrogateEnd        = kUnicodeSurrogateLowEnd;
     48 static const char32_t kUnicodeMaxCodepoint        = 0x0010FFFF;
     49 
     50 // Mask used to set appropriate bits in first byte of UTF-8 sequence,
     51 // indexed by number of bytes in the sequence.
     52 // 0xxxxxxx
     53 // -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000
     54 // 110yyyyx 10xxxxxx
     55 // -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0
     56 // 1110yyyy 10yxxxxx 10xxxxxx
     57 // -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0
     58 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
     59 // -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0
     60 static const char32_t kFirstByteMark[] = {
     61     0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
     62 };
     63 
     64 // Separator used by resource paths. This is not platform dependent contrary
     65 // to OS_PATH_SEPARATOR.
     66 #define RES_PATH_SEPARATOR '/'
     67 
     68 // Return number of utf8 bytes required for the character.
     69 static size_t utf32_to_utf8_bytes(char32_t srcChar)
     70 {
     71     size_t bytesToWrite;
     72 
     73     // Figure out how many bytes the result will require.
     74     if (srcChar < 0x00000080)
     75     {
     76         bytesToWrite = 1;
     77     }
     78     else if (srcChar < 0x00000800)
     79     {
     80         bytesToWrite = 2;
     81     }
     82     else if (srcChar < 0x00010000)
     83     {
     84         if ((srcChar < kUnicodeSurrogateStart)
     85          || (srcChar > kUnicodeSurrogateEnd))
     86         {
     87             bytesToWrite = 3;
     88         }
     89         else
     90         {
     91             // Surrogates are invalid UTF-32 characters.
     92             return 0;
     93         }
     94     }
     95     // Max code point for Unicode is 0x0010FFFF.
     96     else if (srcChar <= kUnicodeMaxCodepoint)
     97     {
     98         bytesToWrite = 4;
     99     }
    100     else
    101     {
    102         // Invalid UTF-32 character.
    103         return 0;
    104     }
    105 
    106     return bytesToWrite;
    107 }
    108 
    109 // Write out the source character to <dstP>.
    110 
    111 static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
    112 {
    113     dstP += bytes;
    114     switch (bytes)
    115     {   /* note: everything falls through. */
    116         case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    117         case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    118         case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6;
    119         case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]);
    120     }
    121 }
    122 
    123 // ---------------------------------------------------------------------------
    124 
    125 static SharedBuffer* gEmptyStringBuf = NULL;
    126 static char* gEmptyString = NULL;
    127 
    128 extern int gDarwinCantLoadAllObjects;
    129 int gDarwinIsReallyAnnoying;
    130 
    131 static inline char* getEmptyString()
    132 {
    133     gEmptyStringBuf->acquire();
    134     return gEmptyString;
    135 }
    136 
    137 void initialize_string8()
    138 {
    139     // HACK: This dummy dependency forces linking libutils Static.cpp,
    140     // which is needed to initialize String8/String16 classes.
    141     // These variables are named for Darwin, but are needed elsewhere too,
    142     // including static linking on any platform.
    143     gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects;
    144 
    145     SharedBuffer* buf = SharedBuffer::alloc(1);
    146     char* str = (char*)buf->data();
    147     *str = 0;
    148     gEmptyStringBuf = buf;
    149     gEmptyString = str;
    150 }
    151 
    152 void terminate_string8()
    153 {
    154     SharedBuffer::bufferFromData(gEmptyString)->release();
    155     gEmptyStringBuf = NULL;
    156     gEmptyString = NULL;
    157 }
    158 
    159 // ---------------------------------------------------------------------------
    160 
    161 static char* allocFromUTF8(const char* in, size_t len)
    162 {
    163     if (len > 0) {
    164         SharedBuffer* buf = SharedBuffer::alloc(len+1);
    165         LOG_ASSERT(buf, "Unable to allocate shared buffer");
    166         if (buf) {
    167             char* str = (char*)buf->data();
    168             memcpy(str, in, len);
    169             str[len] = 0;
    170             return str;
    171         }
    172         return NULL;
    173     }
    174 
    175     return getEmptyString();
    176 }
    177 
    178 template<typename T, typename L>
    179 static char* allocFromUTF16OrUTF32(const T* in, L len)
    180 {
    181     if (len == 0) return getEmptyString();
    182 
    183     size_t bytes = 0;
    184     const T* end = in+len;
    185     const T* p = in;
    186 
    187     while (p < end) {
    188         bytes += utf32_to_utf8_bytes(*p);
    189         p++;
    190     }
    191 
    192     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
    193     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    194     if (buf) {
    195         p = in;
    196         char* str = (char*)buf->data();
    197         char* d = str;
    198         while (p < end) {
    199             const T c = *p++;
    200             size_t len = utf32_to_utf8_bytes(c);
    201             utf32_to_utf8((uint8_t*)d, c, len);
    202             d += len;
    203         }
    204         *d = 0;
    205 
    206         return str;
    207     }
    208 
    209     return getEmptyString();
    210 }
    211 
    212 static char* allocFromUTF16(const char16_t* in, size_t len)
    213 {
    214     if (len == 0) return getEmptyString();
    215 
    216     const size_t bytes = utf8_length_from_utf16(in, len);
    217 
    218     SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
    219     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    220     if (buf) {
    221         char* str = (char*)buf->data();
    222 
    223         utf16_to_utf8(in, len, str, bytes+1);
    224 
    225         return str;
    226     }
    227 
    228     return getEmptyString();
    229 }
    230 
    231 static char* allocFromUTF32(const char32_t* in, size_t len)
    232 {
    233     return allocFromUTF16OrUTF32<char32_t, size_t>(in, len);
    234 }
    235 
    236 // ---------------------------------------------------------------------------
    237 
    238 String8::String8()
    239     : mString(getEmptyString())
    240 {
    241 }
    242 
    243 String8::String8(const String8& o)
    244     : mString(o.mString)
    245 {
    246     SharedBuffer::bufferFromData(mString)->acquire();
    247 }
    248 
    249 String8::String8(const char* o)
    250     : mString(allocFromUTF8(o, strlen(o)))
    251 {
    252     if (mString == NULL) {
    253         mString = getEmptyString();
    254     }
    255 }
    256 
    257 String8::String8(const char* o, size_t len)
    258     : mString(allocFromUTF8(o, len))
    259 {
    260     if (mString == NULL) {
    261         mString = getEmptyString();
    262     }
    263 }
    264 
    265 String8::String8(const String16& o)
    266     : mString(allocFromUTF16(o.string(), o.size()))
    267 {
    268 }
    269 
    270 String8::String8(const char16_t* o)
    271     : mString(allocFromUTF16(o, strlen16(o)))
    272 {
    273 }
    274 
    275 String8::String8(const char16_t* o, size_t len)
    276     : mString(allocFromUTF16(o, len))
    277 {
    278 }
    279 
    280 String8::String8(const char32_t* o)
    281     : mString(allocFromUTF32(o, strlen32(o)))
    282 {
    283 }
    284 
    285 String8::String8(const char32_t* o, size_t len)
    286     : mString(allocFromUTF32(o, len))
    287 {
    288 }
    289 
    290 String8::~String8()
    291 {
    292     SharedBuffer::bufferFromData(mString)->release();
    293 }
    294 
    295 void String8::setTo(const String8& other)
    296 {
    297     SharedBuffer::bufferFromData(other.mString)->acquire();
    298     SharedBuffer::bufferFromData(mString)->release();
    299     mString = other.mString;
    300 }
    301 
    302 status_t String8::setTo(const char* other)
    303 {
    304     SharedBuffer::bufferFromData(mString)->release();
    305     mString = allocFromUTF8(other, strlen(other));
    306     if (mString) return NO_ERROR;
    307 
    308     mString = getEmptyString();
    309     return NO_MEMORY;
    310 }
    311 
    312 status_t String8::setTo(const char* other, size_t len)
    313 {
    314     SharedBuffer::bufferFromData(mString)->release();
    315     mString = allocFromUTF8(other, len);
    316     if (mString) return NO_ERROR;
    317 
    318     mString = getEmptyString();
    319     return NO_MEMORY;
    320 }
    321 
    322 status_t String8::setTo(const char16_t* other, size_t len)
    323 {
    324     SharedBuffer::bufferFromData(mString)->release();
    325     mString = allocFromUTF16(other, len);
    326     if (mString) return NO_ERROR;
    327 
    328     mString = getEmptyString();
    329     return NO_MEMORY;
    330 }
    331 
    332 status_t String8::setTo(const char32_t* other, size_t len)
    333 {
    334     SharedBuffer::bufferFromData(mString)->release();
    335     mString = allocFromUTF32(other, len);
    336     if (mString) return NO_ERROR;
    337 
    338     mString = getEmptyString();
    339     return NO_MEMORY;
    340 }
    341 
    342 status_t String8::append(const String8& other)
    343 {
    344     const size_t otherLen = other.bytes();
    345     if (bytes() == 0) {
    346         setTo(other);
    347         return NO_ERROR;
    348     } else if (otherLen == 0) {
    349         return NO_ERROR;
    350     }
    351 
    352     return real_append(other.string(), otherLen);
    353 }
    354 
    355 status_t String8::append(const char* other)
    356 {
    357     return append(other, strlen(other));
    358 }
    359 
    360 status_t String8::append(const char* other, size_t otherLen)
    361 {
    362     if (bytes() == 0) {
    363         return setTo(other, otherLen);
    364     } else if (otherLen == 0) {
    365         return NO_ERROR;
    366     }
    367 
    368     return real_append(other, otherLen);
    369 }
    370 
    371 status_t String8::real_append(const char* other, size_t otherLen)
    372 {
    373     const size_t myLen = bytes();
    374 
    375     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    376         ->editResize(myLen+otherLen+1);
    377     if (buf) {
    378         char* str = (char*)buf->data();
    379         mString = str;
    380         str += myLen;
    381         memcpy(str, other, otherLen);
    382         str[otherLen] = '\0';
    383         return NO_ERROR;
    384     }
    385     return NO_MEMORY;
    386 }
    387 
    388 char* String8::lockBuffer(size_t size)
    389 {
    390     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    391         ->editResize(size+1);
    392     if (buf) {
    393         char* str = (char*)buf->data();
    394         mString = str;
    395         return str;
    396     }
    397     return NULL;
    398 }
    399 
    400 void String8::unlockBuffer()
    401 {
    402     unlockBuffer(strlen(mString));
    403 }
    404 
    405 status_t String8::unlockBuffer(size_t size)
    406 {
    407     if (size != this->size()) {
    408         SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    409             ->editResize(size+1);
    410         if (buf) {
    411             char* str = (char*)buf->data();
    412             str[size] = 0;
    413             mString = str;
    414             return NO_ERROR;
    415         }
    416     }
    417 
    418     return NO_MEMORY;
    419 }
    420 
    421 ssize_t String8::find(const char* other, size_t start) const
    422 {
    423     size_t len = size();
    424     if (start >= len) {
    425         return -1;
    426     }
    427     const char* s = mString+start;
    428     const char* p = strstr(s, other);
    429     return p ? p-mString : -1;
    430 }
    431 
    432 void String8::toLower()
    433 {
    434     toLower(0, size());
    435 }
    436 
    437 void String8::toLower(size_t start, size_t length)
    438 {
    439     const size_t len = size();
    440     if (start >= len) {
    441         return;
    442     }
    443     if (start+length > len) {
    444         length = len-start;
    445     }
    446     char* buf = lockBuffer(len);
    447     buf += start;
    448     while (length > 0) {
    449         *buf = tolower(*buf);
    450         buf++;
    451         length--;
    452     }
    453     unlockBuffer(len);
    454 }
    455 
    456 void String8::toUpper()
    457 {
    458     toUpper(0, size());
    459 }
    460 
    461 void String8::toUpper(size_t start, size_t length)
    462 {
    463     const size_t len = size();
    464     if (start >= len) {
    465         return;
    466     }
    467     if (start+length > len) {
    468         length = len-start;
    469     }
    470     char* buf = lockBuffer(len);
    471     buf += start;
    472     while (length > 0) {
    473         *buf = toupper(*buf);
    474         buf++;
    475         length--;
    476     }
    477     unlockBuffer(len);
    478 }
    479 
    480 size_t String8::getUtf32Length() const
    481 {
    482     return utf32_length(mString, length());
    483 }
    484 
    485 int32_t String8::getUtf32At(size_t index, size_t *next_index) const
    486 {
    487     return utf32_at(mString, length(), index, next_index);
    488 }
    489 
    490 size_t String8::getUtf32(char32_t* dst, size_t dst_len) const
    491 {
    492     return utf8_to_utf32(mString, length(), dst, dst_len);
    493 }
    494 
    495 TextOutput& operator<<(TextOutput& to, const String8& val)
    496 {
    497     to << val.string();
    498     return to;
    499 }
    500 
    501 // ---------------------------------------------------------------------------
    502 // Path functions
    503 
    504 void String8::setPathName(const char* name)
    505 {
    506     setPathName(name, strlen(name));
    507 }
    508 
    509 void String8::setPathName(const char* name, size_t len)
    510 {
    511     char* buf = lockBuffer(len);
    512 
    513     memcpy(buf, name, len);
    514 
    515     // remove trailing path separator, if present
    516     if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR)
    517         len--;
    518 
    519     buf[len] = '\0';
    520 
    521     unlockBuffer(len);
    522 }
    523 
    524 String8 String8::getPathLeaf(void) const
    525 {
    526     const char* cp;
    527     const char*const buf = mString;
    528 
    529     cp = strrchr(buf, OS_PATH_SEPARATOR);
    530     if (cp == NULL)
    531         return String8(*this);
    532     else
    533         return String8(cp+1);
    534 }
    535 
    536 String8 String8::getPathDir(void) const
    537 {
    538     const char* cp;
    539     const char*const str = mString;
    540 
    541     cp = strrchr(str, OS_PATH_SEPARATOR);
    542     if (cp == NULL)
    543         return String8("");
    544     else
    545         return String8(str, cp - str);
    546 }
    547 
    548 String8 String8::walkPath(String8* outRemains) const
    549 {
    550     const char* cp;
    551     const char*const str = mString;
    552     const char* buf = str;
    553 
    554     cp = strchr(buf, OS_PATH_SEPARATOR);
    555     if (cp == buf) {
    556         // don't include a leading '/'.
    557         buf = buf+1;
    558         cp = strchr(buf, OS_PATH_SEPARATOR);
    559     }
    560 
    561     if (cp == NULL) {
    562         String8 res = buf != str ? String8(buf) : *this;
    563         if (outRemains) *outRemains = String8("");
    564         return res;
    565     }
    566 
    567     String8 res(buf, cp-buf);
    568     if (outRemains) *outRemains = String8(cp+1);
    569     return res;
    570 }
    571 
    572 /*
    573  * Helper function for finding the start of an extension in a pathname.
    574  *
    575  * Returns a pointer inside mString, or NULL if no extension was found.
    576  */
    577 char* String8::find_extension(void) const
    578 {
    579     const char* lastSlash;
    580     const char* lastDot;
    581     int extLen;
    582     const char* const str = mString;
    583 
    584     // only look at the filename
    585     lastSlash = strrchr(str, OS_PATH_SEPARATOR);
    586     if (lastSlash == NULL)
    587         lastSlash = str;
    588     else
    589         lastSlash++;
    590 
    591     // find the last dot
    592     lastDot = strrchr(lastSlash, '.');
    593     if (lastDot == NULL)
    594         return NULL;
    595 
    596     // looks good, ship it
    597     return const_cast<char*>(lastDot);
    598 }
    599 
    600 String8 String8::getPathExtension(void) const
    601 {
    602     char* ext;
    603 
    604     ext = find_extension();
    605     if (ext != NULL)
    606         return String8(ext);
    607     else
    608         return String8("");
    609 }
    610 
    611 String8 String8::getBasePath(void) const
    612 {
    613     char* ext;
    614     const char* const str = mString;
    615 
    616     ext = find_extension();
    617     if (ext == NULL)
    618         return String8(*this);
    619     else
    620         return String8(str, ext - str);
    621 }
    622 
    623 String8& String8::appendPath(const char* name)
    624 {
    625     // TODO: The test below will fail for Win32 paths. Fix later or ignore.
    626     if (name[0] != OS_PATH_SEPARATOR) {
    627         if (*name == '\0') {
    628             // nothing to do
    629             return *this;
    630         }
    631 
    632         size_t len = length();
    633         if (len == 0) {
    634             // no existing filename, just use the new one
    635             setPathName(name);
    636             return *this;
    637         }
    638 
    639         // make room for oldPath + '/' + newPath
    640         int newlen = strlen(name);
    641 
    642         char* buf = lockBuffer(len+1+newlen);
    643 
    644         // insert a '/' if needed
    645         if (buf[len-1] != OS_PATH_SEPARATOR)
    646             buf[len++] = OS_PATH_SEPARATOR;
    647 
    648         memcpy(buf+len, name, newlen+1);
    649         len += newlen;
    650 
    651         unlockBuffer(len);
    652 
    653         return *this;
    654     } else {
    655         setPathName(name);
    656         return *this;
    657     }
    658 }
    659 
    660 String8& String8::convertToResPath()
    661 {
    662 #if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR
    663     size_t len = length();
    664     if (len > 0) {
    665         char * buf = lockBuffer(len);
    666         for (char * end = buf + len; buf < end; ++buf) {
    667             if (*buf == OS_PATH_SEPARATOR)
    668                 *buf = RES_PATH_SEPARATOR;
    669         }
    670         unlockBuffer(len);
    671     }
    672 #endif
    673     return *this;
    674 }
    675 
    676 }; // namespace android
    677 
    678 // ---------------------------------------------------------------------------
    679 
    680 size_t strlen32(const char32_t *s)
    681 {
    682   const char32_t *ss = s;
    683   while ( *ss )
    684     ss++;
    685   return ss-s;
    686 }
    687 
    688 size_t strnlen32(const char32_t *s, size_t maxlen)
    689 {
    690   const char32_t *ss = s;
    691   while ((maxlen > 0) && *ss) {
    692     ss++;
    693     maxlen--;
    694   }
    695   return ss-s;
    696 }
    697 
    698 size_t utf8_length(const char *src)
    699 {
    700     const char *cur = src;
    701     size_t ret = 0;
    702     while (*cur != '\0') {
    703         const char first_char = *cur++;
    704         if ((first_char & 0x80) == 0) { // ASCII
    705             ret += 1;
    706             continue;
    707         }
    708         // (UTF-8's character must not be like 10xxxxxx,
    709         //  but 110xxxxx, 1110xxxx, ... or 1111110x)
    710         if ((first_char & 0x40) == 0) {
    711             return 0;
    712         }
    713 
    714         int32_t mask, to_ignore_mask;
    715         size_t num_to_read = 0;
    716         char32_t utf32 = 0;
    717         for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
    718              num_to_read < 5 && (first_char & mask);
    719              num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
    720             if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx
    721                 return 0;
    722             }
    723             // 0x3F == 00111111
    724             utf32 = (utf32 << 6) + (*cur++ & 0x3F);
    725         }
    726         // "first_char" must be (110xxxxx - 11110xxx)
    727         if (num_to_read == 5) {
    728             return 0;
    729         }
    730         to_ignore_mask |= mask;
    731         utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
    732         if (utf32 > android::kUnicodeMaxCodepoint) {
    733             return 0;
    734         }
    735 
    736         ret += num_to_read;
    737     }
    738     return ret;
    739 }
    740 
    741 size_t utf32_length(const char *src, size_t src_len)
    742 {
    743     if (src == NULL || src_len == 0) {
    744         return 0;
    745     }
    746     size_t ret = 0;
    747     const char* cur;
    748     const char* end;
    749     size_t num_to_skip;
    750     for (cur = src, end = src + src_len, num_to_skip = 1;
    751          cur < end;
    752          cur += num_to_skip, ret++) {
    753         const char first_char = *cur;
    754         num_to_skip = 1;
    755         if ((first_char & 0x80) == 0) {  // ASCII
    756             continue;
    757         }
    758         int32_t mask;
    759 
    760         for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) {
    761         }
    762     }
    763     return ret;
    764 }
    765 
    766 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
    767 {
    768     if (src == NULL || src_len == 0) {
    769         return 0;
    770     }
    771     size_t ret = 0;
    772     const char32_t *end = src + src_len;
    773     while (src < end) {
    774         ret += android::utf32_to_utf8_bytes(*src++);
    775     }
    776     return ret;
    777 }
    778 
    779 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
    780 {
    781     if (src == NULL || src_len == 0) {
    782         return 0;
    783     }
    784     size_t ret = 0;
    785     const char16_t* const end = src + src_len;
    786     while (src < end) {
    787         if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
    788                 && (*++src & 0xFC00) == 0xDC00) {
    789             // surrogate pairs are always 4 bytes.
    790             ret += 4;
    791             src++;
    792         } else {
    793             ret += android::utf32_to_utf8_bytes((char32_t) *src++);
    794         }
    795     }
    796     return ret;
    797 }
    798 
    799 static int32_t utf32_at_internal(const char* cur, size_t *num_read)
    800 {
    801     const char first_char = *cur;
    802     if ((first_char & 0x80) == 0) { // ASCII
    803         *num_read = 1;
    804         return *cur;
    805     }
    806     cur++;
    807     char32_t mask, to_ignore_mask;
    808     size_t num_to_read = 0;
    809     char32_t utf32 = first_char;
    810     for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80;
    811          (first_char & mask);
    812          num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
    813         // 0x3F == 00111111
    814         utf32 = (utf32 << 6) + (*cur++ & 0x3F);
    815     }
    816     to_ignore_mask |= mask;
    817     utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1)));
    818 
    819     *num_read = num_to_read;
    820     return static_cast<int32_t>(utf32);
    821 }
    822 
    823 int32_t utf32_at(const char *src, size_t src_len,
    824                  size_t index, size_t *next_index)
    825 {
    826     if (index >= src_len) {
    827         return -1;
    828     }
    829     size_t dummy_index;
    830     if (next_index == NULL) {
    831         next_index = &dummy_index;
    832     }
    833     size_t num_read;
    834     int32_t ret = utf32_at_internal(src + index, &num_read);
    835     if (ret >= 0) {
    836         *next_index = index + num_read;
    837     }
    838 
    839     return ret;
    840 }
    841 
    842 size_t utf8_to_utf32(const char* src, size_t src_len,
    843                      char32_t* dst, size_t dst_len)
    844 {
    845     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    846         return 0;
    847     }
    848 
    849     const char* cur = src;
    850     const char* end = src + src_len;
    851     char32_t* cur_utf32 = dst;
    852     const char32_t* end_utf32 = dst + dst_len;
    853     while (cur_utf32 < end_utf32 && cur < end) {
    854         size_t num_read;
    855         *cur_utf32++ =
    856                 static_cast<char32_t>(utf32_at_internal(cur, &num_read));
    857         cur += num_read;
    858     }
    859     if (cur_utf32 < end_utf32) {
    860         *cur_utf32 = 0;
    861     }
    862     return static_cast<size_t>(cur_utf32 - dst);
    863 }
    864 
    865 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
    866                      char* dst, size_t dst_len)
    867 {
    868     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    869         return 0;
    870     }
    871     const char32_t *cur_utf32 = src;
    872     const char32_t *end_utf32 = src + src_len;
    873     char *cur = dst;
    874     const char *end = dst + dst_len;
    875     while (cur_utf32 < end_utf32 && cur < end) {
    876         size_t len = android::utf32_to_utf8_bytes(*cur_utf32);
    877         android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len);
    878         cur += len;
    879     }
    880     if (cur < end) {
    881         *cur = '\0';
    882     }
    883     return cur - dst;
    884 }
    885 
    886 size_t utf16_to_utf8(const char16_t* src, size_t src_len,
    887                      char* dst, size_t dst_len)
    888 {
    889     if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
    890         return 0;
    891     }
    892     const char16_t* cur_utf16 = src;
    893     const char16_t* const end_utf16 = src + src_len;
    894     char *cur = dst;
    895     const char* const end = dst + dst_len;
    896     while (cur_utf16 < end_utf16 && cur < end) {
    897         char32_t utf32;
    898         // surrogate pairs
    899         if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
    900             utf32 = (*cur_utf16++ - 0xD800) << 10;
    901             utf32 |= *cur_utf16++ - 0xDC00;
    902             utf32 += 0x10000;
    903         } else {
    904             utf32 = (char32_t) *cur_utf16++;
    905         }
    906         size_t len = android::utf32_to_utf8_bytes(utf32);
    907         android::utf32_to_utf8((uint8_t*)cur, utf32, len);
    908         cur += len;
    909     }
    910     if (cur < end) {
    911         *cur = '\0';
    912     }
    913     return cur - dst;
    914 }
    915