Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2005 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <utils/String16.h>
     18 
     19 #include <utils/Debug.h>
     20 #include <utils/Log.h>
     21 #include <utils/String8.h>
     22 #include <utils/TextOutput.h>
     23 #include <utils/threads.h>
     24 
     25 #include <private/utils/Static.h>
     26 
     27 #ifdef HAVE_WINSOCK
     28 # undef  nhtol
     29 # undef  htonl
     30 # undef  nhtos
     31 # undef  htons
     32 
     33 # ifdef HAVE_LITTLE_ENDIAN
     34 #  define ntohl(x)    ( ((x) << 24) | (((x) >> 24) & 255) | (((x) << 8) & 0xff0000) | (((x) >> 8) & 0xff00) )
     35 #  define htonl(x)    ntohl(x)
     36 #  define ntohs(x)    ( (((x) << 8) & 0xff00) | (((x) >> 8) & 255) )
     37 #  define htons(x)    ntohs(x)
     38 # else
     39 #  define ntohl(x)    (x)
     40 #  define htonl(x)    (x)
     41 #  define ntohs(x)    (x)
     42 #  define htons(x)    (x)
     43 # endif
     44 #else
     45 # include <netinet/in.h>
     46 #endif
     47 
     48 #include <memory.h>
     49 #include <stdio.h>
     50 #include <ctype.h>
     51 
     52 // ---------------------------------------------------------------------------
     53 
     54 int strcmp16(const char16_t *s1, const char16_t *s2)
     55 {
     56   char16_t ch;
     57   int d = 0;
     58 
     59   while ( 1 ) {
     60     d = (int)(ch = *s1++) - (int)*s2++;
     61     if ( d || !ch )
     62       break;
     63   }
     64 
     65   return d;
     66 }
     67 
     68 int strncmp16(const char16_t *s1, const char16_t *s2, size_t n)
     69 {
     70   char16_t ch;
     71   int d = 0;
     72 
     73   while ( n-- ) {
     74     d = (int)(ch = *s1++) - (int)*s2++;
     75     if ( d || !ch )
     76       break;
     77   }
     78 
     79   return d;
     80 }
     81 
     82 char16_t *strcpy16(char16_t *dst, const char16_t *src)
     83 {
     84   char16_t *q = dst;
     85   const char16_t *p = src;
     86   char16_t ch;
     87 
     88   do {
     89     *q++ = ch = *p++;
     90   } while ( ch );
     91 
     92   return dst;
     93 }
     94 
     95 size_t strlen16(const char16_t *s)
     96 {
     97   const char16_t *ss = s;
     98   while ( *ss )
     99     ss++;
    100   return ss-s;
    101 }
    102 
    103 
    104 char16_t *strncpy16(char16_t *dst, const char16_t *src, size_t n)
    105 {
    106   char16_t *q = dst;
    107   const char16_t *p = src;
    108   char ch;
    109 
    110   while (n) {
    111     n--;
    112     *q++ = ch = *p++;
    113     if ( !ch )
    114       break;
    115   }
    116 
    117   *q = 0;
    118 
    119   return dst;
    120 }
    121 
    122 size_t strnlen16(const char16_t *s, size_t maxlen)
    123 {
    124   const char16_t *ss = s;
    125 
    126   /* Important: the maxlen test must precede the reference through ss;
    127      since the byte beyond the maximum may segfault */
    128   while ((maxlen > 0) && *ss) {
    129     ss++;
    130     maxlen--;
    131   }
    132   return ss-s;
    133 }
    134 
    135 int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2)
    136 {
    137     const char16_t* e1 = s1+n1;
    138     const char16_t* e2 = s2+n2;
    139 
    140     while (s1 < e1 && s2 < e2) {
    141         const int d = (int)*s1++ - (int)*s2++;
    142         if (d) {
    143             return d;
    144         }
    145     }
    146 
    147     return n1 < n2
    148         ? (0 - (int)*s2)
    149         : (n1 > n2
    150            ? ((int)*s1 - 0)
    151            : 0);
    152 }
    153 
    154 int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2)
    155 {
    156     const char16_t* e1 = s1H+n1;
    157     const char16_t* e2 = s2N+n2;
    158 
    159     while (s1H < e1 && s2N < e2) {
    160         const char16_t c2 = ntohs(*s2N);
    161         const int d = (int)*s1H++ - (int)c2;
    162         s2N++;
    163         if (d) {
    164             return d;
    165         }
    166     }
    167 
    168     return n1 < n2
    169         ? (0 - (int)ntohs(*s2N))
    170         : (n1 > n2
    171            ? ((int)*s1H - 0)
    172            : 0);
    173 }
    174 
    175 static inline size_t
    176 utf8_char_len(uint8_t ch)
    177 {
    178     return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1;
    179 }
    180 
    181 #define UTF8_SHIFT_AND_MASK(unicode, byte)  (unicode)<<=6; (unicode) |= (0x3f & (byte));
    182 
    183 static inline uint32_t
    184 utf8_to_utf32(const uint8_t *src, size_t length)
    185 {
    186     uint32_t unicode;
    187 
    188     switch (length)
    189     {
    190         case 1:
    191             return src[0];
    192         case 2:
    193             unicode = src[0] & 0x1f;
    194             UTF8_SHIFT_AND_MASK(unicode, src[1])
    195             return unicode;
    196         case 3:
    197             unicode = src[0] & 0x0f;
    198             UTF8_SHIFT_AND_MASK(unicode, src[1])
    199             UTF8_SHIFT_AND_MASK(unicode, src[2])
    200             return unicode;
    201         case 4:
    202             unicode = src[0] & 0x07;
    203             UTF8_SHIFT_AND_MASK(unicode, src[1])
    204             UTF8_SHIFT_AND_MASK(unicode, src[2])
    205             UTF8_SHIFT_AND_MASK(unicode, src[3])
    206             return unicode;
    207         default:
    208             return 0xffff;
    209     }
    210 
    211     //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
    212 }
    213 
    214 void
    215 utf8_to_utf16(const uint8_t *src, size_t srcLen,
    216         char16_t* dst, const size_t dstLen)
    217 {
    218     const uint8_t* const end = src + srcLen;
    219     const char16_t* const dstEnd = dst + dstLen;
    220     while (src < end && dst < dstEnd) {
    221         size_t len = utf8_char_len(*src);
    222         uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
    223 
    224         // Convert the UTF32 codepoint to one or more UTF16 codepoints
    225         if (codepoint <= 0xFFFF) {
    226             // Single UTF16 character
    227             *dst++ = (char16_t) codepoint;
    228         } else {
    229             // Multiple UTF16 characters with surrogates
    230             codepoint = codepoint - 0x10000;
    231             *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
    232             *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
    233         }
    234 
    235         src += len;
    236     }
    237     if (dst < dstEnd) {
    238         *dst = 0;
    239     }
    240 }
    241 
    242 // ---------------------------------------------------------------------------
    243 
    244 namespace android {
    245 
    246 static SharedBuffer* gEmptyStringBuf = NULL;
    247 static char16_t* gEmptyString = NULL;
    248 
    249 static inline char16_t* getEmptyString()
    250 {
    251     gEmptyStringBuf->acquire();
    252    return gEmptyString;
    253 }
    254 
    255 void initialize_string16()
    256 {
    257     SharedBuffer* buf = SharedBuffer::alloc(sizeof(char16_t));
    258     char16_t* str = (char16_t*)buf->data();
    259     *str = 0;
    260     gEmptyStringBuf = buf;
    261     gEmptyString = str;
    262 }
    263 
    264 void terminate_string16()
    265 {
    266     SharedBuffer::bufferFromData(gEmptyString)->release();
    267     gEmptyStringBuf = NULL;
    268     gEmptyString = NULL;
    269 }
    270 
    271 // ---------------------------------------------------------------------------
    272 
    273 static char16_t* allocFromUTF8(const char* in, size_t len)
    274 {
    275     if (len == 0) return getEmptyString();
    276 
    277     size_t chars = 0;
    278     const char* end = in+len;
    279     const char* p = in;
    280 
    281     while (p < end) {
    282         chars++;
    283         int utf8len = utf8_char_len(*p);
    284         uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, utf8len);
    285         if (codepoint > 0xFFFF) chars++; // this will be a surrogate pair in utf16
    286         p += utf8len;
    287     }
    288 
    289     size_t bufSize = (chars+1)*sizeof(char16_t);
    290     SharedBuffer* buf = SharedBuffer::alloc(bufSize);
    291     if (buf) {
    292         p = in;
    293         char16_t* str = (char16_t*)buf->data();
    294 
    295         utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
    296 
    297         //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
    298         //printHexData(1, str, buf->size(), 16, 1);
    299         //printf("\n");
    300 
    301         return str;
    302     }
    303 
    304     return getEmptyString();
    305 }
    306 
    307 // ---------------------------------------------------------------------------
    308 
    309 String16::String16()
    310     : mString(getEmptyString())
    311 {
    312 }
    313 
    314 String16::String16(const String16& o)
    315     : mString(o.mString)
    316 {
    317     SharedBuffer::bufferFromData(mString)->acquire();
    318 }
    319 
    320 String16::String16(const String16& o, size_t len, size_t begin)
    321     : mString(getEmptyString())
    322 {
    323     setTo(o, len, begin);
    324 }
    325 
    326 String16::String16(const char16_t* o)
    327 {
    328     size_t len = strlen16(o);
    329     SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t));
    330     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    331     if (buf) {
    332         char16_t* str = (char16_t*)buf->data();
    333         strcpy16(str, o);
    334         mString = str;
    335         return;
    336     }
    337 
    338     mString = getEmptyString();
    339 }
    340 
    341 String16::String16(const char16_t* o, size_t len)
    342 {
    343     SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t));
    344     LOG_ASSERT(buf, "Unable to allocate shared buffer");
    345     if (buf) {
    346         char16_t* str = (char16_t*)buf->data();
    347         memcpy(str, o, len*sizeof(char16_t));
    348         str[len] = 0;
    349         mString = str;
    350         return;
    351     }
    352 
    353     mString = getEmptyString();
    354 }
    355 
    356 String16::String16(const String8& o)
    357     : mString(allocFromUTF8(o.string(), o.size()))
    358 {
    359 }
    360 
    361 String16::String16(const char* o)
    362     : mString(allocFromUTF8(o, strlen(o)))
    363 {
    364 }
    365 
    366 String16::String16(const char* o, size_t len)
    367     : mString(allocFromUTF8(o, len))
    368 {
    369 }
    370 
    371 String16::~String16()
    372 {
    373     SharedBuffer::bufferFromData(mString)->release();
    374 }
    375 
    376 void String16::setTo(const String16& other)
    377 {
    378     SharedBuffer::bufferFromData(other.mString)->acquire();
    379     SharedBuffer::bufferFromData(mString)->release();
    380     mString = other.mString;
    381 }
    382 
    383 status_t String16::setTo(const String16& other, size_t len, size_t begin)
    384 {
    385     const size_t N = other.size();
    386     if (begin >= N) {
    387         SharedBuffer::bufferFromData(mString)->release();
    388         mString = getEmptyString();
    389         return NO_ERROR;
    390     }
    391     if ((begin+len) > N) len = N-begin;
    392     if (begin == 0 && len == N) {
    393         setTo(other);
    394         return NO_ERROR;
    395     }
    396 
    397     if (&other == this) {
    398         LOG_ALWAYS_FATAL("Not implemented");
    399     }
    400 
    401     return setTo(other.string()+begin, len);
    402 }
    403 
    404 status_t String16::setTo(const char16_t* other)
    405 {
    406     return setTo(other, strlen16(other));
    407 }
    408 
    409 status_t String16::setTo(const char16_t* other, size_t len)
    410 {
    411     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    412         ->editResize((len+1)*sizeof(char16_t));
    413     if (buf) {
    414         char16_t* str = (char16_t*)buf->data();
    415         memmove(str, other, len*sizeof(char16_t));
    416         str[len] = 0;
    417         mString = str;
    418         return NO_ERROR;
    419     }
    420     return NO_MEMORY;
    421 }
    422 
    423 status_t String16::append(const String16& other)
    424 {
    425     const size_t myLen = size();
    426     const size_t otherLen = other.size();
    427     if (myLen == 0) {
    428         setTo(other);
    429         return NO_ERROR;
    430     } else if (otherLen == 0) {
    431         return NO_ERROR;
    432     }
    433 
    434     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    435         ->editResize((myLen+otherLen+1)*sizeof(char16_t));
    436     if (buf) {
    437         char16_t* str = (char16_t*)buf->data();
    438         memcpy(str+myLen, other, (otherLen+1)*sizeof(char16_t));
    439         mString = str;
    440         return NO_ERROR;
    441     }
    442     return NO_MEMORY;
    443 }
    444 
    445 status_t String16::append(const char16_t* chrs, size_t otherLen)
    446 {
    447     const size_t myLen = size();
    448     if (myLen == 0) {
    449         setTo(chrs, otherLen);
    450         return NO_ERROR;
    451     } else if (otherLen == 0) {
    452         return NO_ERROR;
    453     }
    454 
    455     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    456         ->editResize((myLen+otherLen+1)*sizeof(char16_t));
    457     if (buf) {
    458         char16_t* str = (char16_t*)buf->data();
    459         memcpy(str+myLen, chrs, otherLen*sizeof(char16_t));
    460         str[myLen+otherLen] = 0;
    461         mString = str;
    462         return NO_ERROR;
    463     }
    464     return NO_MEMORY;
    465 }
    466 
    467 status_t String16::insert(size_t pos, const char16_t* chrs)
    468 {
    469     return insert(pos, chrs, strlen16(chrs));
    470 }
    471 
    472 status_t String16::insert(size_t pos, const char16_t* chrs, size_t len)
    473 {
    474     const size_t myLen = size();
    475     if (myLen == 0) {
    476         return setTo(chrs, len);
    477         return NO_ERROR;
    478     } else if (len == 0) {
    479         return NO_ERROR;
    480     }
    481 
    482     if (pos > myLen) pos = myLen;
    483 
    484     #if 0
    485     printf("Insert in to %s: pos=%d, len=%d, myLen=%d, chrs=%s\n",
    486            String8(*this).string(), pos,
    487            len, myLen, String8(chrs, len).string());
    488     #endif
    489 
    490     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    491         ->editResize((myLen+len+1)*sizeof(char16_t));
    492     if (buf) {
    493         char16_t* str = (char16_t*)buf->data();
    494         if (pos < myLen) {
    495             memmove(str+pos+len, str+pos, (myLen-pos)*sizeof(char16_t));
    496         }
    497         memcpy(str+pos, chrs, len*sizeof(char16_t));
    498         str[myLen+len] = 0;
    499         mString = str;
    500         #if 0
    501         printf("Result (%d chrs): %s\n", size(), String8(*this).string());
    502         #endif
    503         return NO_ERROR;
    504     }
    505     return NO_MEMORY;
    506 }
    507 
    508 ssize_t String16::findFirst(char16_t c) const
    509 {
    510     const char16_t* str = string();
    511     const char16_t* p = str;
    512     const char16_t* e = p + size();
    513     while (p < e) {
    514         if (*p == c) {
    515             return p-str;
    516         }
    517         p++;
    518     }
    519     return -1;
    520 }
    521 
    522 ssize_t String16::findLast(char16_t c) const
    523 {
    524     const char16_t* str = string();
    525     const char16_t* p = str;
    526     const char16_t* e = p + size();
    527     while (p < e) {
    528         e--;
    529         if (*e == c) {
    530             return e-str;
    531         }
    532     }
    533     return -1;
    534 }
    535 
    536 bool String16::startsWith(const String16& prefix) const
    537 {
    538     const size_t ps = prefix.size();
    539     if (ps > size()) return false;
    540     return strzcmp16(mString, ps, prefix.string(), ps) == 0;
    541 }
    542 
    543 bool String16::startsWith(const char16_t* prefix) const
    544 {
    545     const size_t ps = strlen16(prefix);
    546     if (ps > size()) return false;
    547     return strncmp16(mString, prefix, ps) == 0;
    548 }
    549 
    550 status_t String16::makeLower()
    551 {
    552     const size_t N = size();
    553     const char16_t* str = string();
    554     char16_t* edit = NULL;
    555     for (size_t i=0; i<N; i++) {
    556         const char16_t v = str[i];
    557         if (v >= 'A' && v <= 'Z') {
    558             if (!edit) {
    559                 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit();
    560                 if (!buf) {
    561                     return NO_MEMORY;
    562                 }
    563                 edit = (char16_t*)buf->data();
    564                 mString = str = edit;
    565             }
    566             edit[i] = tolower((char)v);
    567         }
    568     }
    569     return NO_ERROR;
    570 }
    571 
    572 status_t String16::replaceAll(char16_t replaceThis, char16_t withThis)
    573 {
    574     const size_t N = size();
    575     const char16_t* str = string();
    576     char16_t* edit = NULL;
    577     for (size_t i=0; i<N; i++) {
    578         if (str[i] == replaceThis) {
    579             if (!edit) {
    580                 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit();
    581                 if (!buf) {
    582                     return NO_MEMORY;
    583                 }
    584                 edit = (char16_t*)buf->data();
    585                 mString = str = edit;
    586             }
    587             edit[i] = withThis;
    588         }
    589     }
    590     return NO_ERROR;
    591 }
    592 
    593 status_t String16::remove(size_t len, size_t begin)
    594 {
    595     const size_t N = size();
    596     if (begin >= N) {
    597         SharedBuffer::bufferFromData(mString)->release();
    598         mString = getEmptyString();
    599         return NO_ERROR;
    600     }
    601     if ((begin+len) > N) len = N-begin;
    602     if (begin == 0 && len == N) {
    603         return NO_ERROR;
    604     }
    605 
    606     if (begin > 0) {
    607         SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    608             ->editResize((N+1)*sizeof(char16_t));
    609         if (!buf) {
    610             return NO_MEMORY;
    611         }
    612         char16_t* str = (char16_t*)buf->data();
    613         memmove(str, str+begin, (N-begin+1)*sizeof(char16_t));
    614         mString = str;
    615     }
    616     SharedBuffer* buf = SharedBuffer::bufferFromData(mString)
    617         ->editResize((len+1)*sizeof(char16_t));
    618     if (buf) {
    619         char16_t* str = (char16_t*)buf->data();
    620         str[len] = 0;
    621         mString = str;
    622         return NO_ERROR;
    623     }
    624     return NO_MEMORY;
    625 }
    626 
    627 TextOutput& operator<<(TextOutput& to, const String16& val)
    628 {
    629     to << String8(val).string();
    630     return to;
    631 }
    632 
    633 }; // namespace android
    634