Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 
      9 #include "SkUtils.h"
     10 
     11 /*  0xxxxxxx    1 total
     12     10xxxxxx    // never a leading byte
     13     110xxxxx    2 total
     14     1110xxxx    3 total
     15     11110xxx    4 total
     16 
     17     11 10 01 01 xx xx xx xx 0...
     18     0xE5XX0000
     19     0xE5 << 24
     20 */
     21 
     22 static bool utf8_byte_is_valid(uint8_t c) {
     23     return c < 0xF5 && (c & 0xFE) != 0xC0;
     24 }
     25 static bool utf8_byte_is_continuation(uint8_t c) {
     26     return  (c & 0xC0) == 0x80;
     27 }
     28 static bool utf8_byte_is_leading_byte(uint8_t c) {
     29     return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
     30 }
     31 
     32 #ifdef SK_DEBUG
     33     static void assert_utf8_leadingbyte(unsigned c) {
     34         SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
     35     }
     36 
     37     int SkUTF8_LeadByteToCount(unsigned c) {
     38         assert_utf8_leadingbyte(c);
     39         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
     40     }
     41 #else
     42     #define assert_utf8_leadingbyte(c)
     43 #endif
     44 
     45 /**
     46  * @returns -1  iff invalid UTF8 byte,
     47  *           0  iff UTF8 continuation byte,
     48  *           1  iff ASCII byte,
     49  *           2  iff leading byte of 2-byte sequence,
     50  *           3  iff leading byte of 3-byte sequence, and
     51  *           4  iff leading byte of 4-byte sequence.
     52  *
     53  * I.e.: if return value > 0, then gives length of sequence.
     54 */
     55 static int utf8_byte_type(uint8_t c) {
     56     if (c < 0x80) {
     57         return 1;
     58     } else if (c < 0xC0) {
     59         return 0;
     60     } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
     61         return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
     62     } else {
     63         return -1;
     64     }
     65 }
     66 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
     67 
     68 int SkUTF8_CountUnichars(const char utf8[]) {
     69     SkASSERT(utf8);
     70 
     71     int count = 0;
     72 
     73     for (;;) {
     74         int c = *(const uint8_t*)utf8;
     75         if (c == 0) {
     76             break;
     77         }
     78         utf8 += SkUTF8_LeadByteToCount(c);
     79         count += 1;
     80     }
     81     return count;
     82 }
     83 
     84 // SAFE: returns -1 if invalid UTF-8
     85 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
     86     SkASSERT(utf8 || 0 == byteLength);
     87 
     88     int         count = 0;
     89     const char* stop = utf8 + byteLength;
     90 
     91     while (utf8 < stop) {
     92         int type = utf8_byte_type(*(const uint8_t*)utf8);
     93         SkASSERT(type >= -1 && type <= 4);
     94         if (!utf8_type_is_valid_leading_byte(type) ||
     95             utf8 + type > stop) {  // Sequence extends beyond end.
     96             return -1;
     97         }
     98         while(type-- > 1) {
     99             ++utf8;
    100             if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
    101                 return -1;
    102             }
    103         }
    104         ++utf8;
    105         ++count;
    106     }
    107     return count;
    108 }
    109 
    110 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
    111     SkASSERT(utf8);
    112 
    113     const uint8_t*  p = (const uint8_t*)utf8;
    114     int             c = *p;
    115     int             hic = c << 24;
    116 
    117     assert_utf8_leadingbyte(c);
    118 
    119     if (hic < 0) {
    120         uint32_t mask = (uint32_t)~0x3F;
    121         hic = SkLeftShift(hic, 1);
    122         do {
    123             c = (c << 6) | (*++p & 0x3F);
    124             mask <<= 5;
    125         } while ((hic = SkLeftShift(hic, 1)) < 0);
    126         c &= ~mask;
    127     }
    128     return c;
    129 }
    130 
    131 // SAFE: returns -1 on invalid UTF-8 sequence.
    132 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
    133     SkASSERT(ptr && *ptr);
    134     SkASSERT(*ptr < end);
    135     const uint8_t*  p = (const uint8_t*)*ptr;
    136     int             c = *p;
    137     int             hic = c << 24;
    138 
    139     if (!utf8_byte_is_leading_byte(c)) {
    140         return -1;
    141     }
    142     if (hic < 0) {
    143         uint32_t mask = (uint32_t)~0x3F;
    144         hic = SkLeftShift(hic, 1);
    145         do {
    146             ++p;
    147             if (p >= (const uint8_t*)end) {
    148                 return -1;
    149             }
    150             // check before reading off end of array.
    151             uint8_t nextByte = *p;
    152             if (!utf8_byte_is_continuation(nextByte)) {
    153                 return -1;
    154             }
    155             c = (c << 6) | (nextByte & 0x3F);
    156             mask <<= 5;
    157         } while ((hic = SkLeftShift(hic, 1)) < 0);
    158         c &= ~mask;
    159     }
    160     *ptr = (char*)p + 1;
    161     return c;
    162 }
    163 
    164 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
    165     SkASSERT(ptr && *ptr);
    166 
    167     const uint8_t*  p = (const uint8_t*)*ptr;
    168     int             c = *p;
    169     int             hic = c << 24;
    170 
    171     assert_utf8_leadingbyte(c);
    172 
    173     if (hic < 0) {
    174         uint32_t mask = (uint32_t)~0x3F;
    175         hic = SkLeftShift(hic, 1);
    176         do {
    177             c = (c << 6) | (*++p & 0x3F);
    178             mask <<= 5;
    179         } while ((hic = SkLeftShift(hic, 1)) < 0);
    180         c &= ~mask;
    181     }
    182     *ptr = (char*)p + 1;
    183     return c;
    184 }
    185 
    186 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
    187     SkASSERT(ptr && *ptr);
    188 
    189     const char* p = *ptr;
    190 
    191     if (*--p & 0x80) {
    192         while (*--p & 0x40) {
    193             ;
    194         }
    195     }
    196 
    197     *ptr = (char*)p;
    198     return SkUTF8_NextUnichar(&p);
    199 }
    200 
    201 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
    202     if ((uint32_t)uni > 0x10FFFF) {
    203         SkDEBUGFAIL("bad unichar");
    204         return 0;
    205     }
    206 
    207     if (uni <= 127) {
    208         if (utf8) {
    209             *utf8 = (char)uni;
    210         }
    211         return 1;
    212     }
    213 
    214     char    tmp[4];
    215     char*   p = tmp;
    216     size_t  count = 1;
    217 
    218     SkDEBUGCODE(SkUnichar orig = uni;)
    219 
    220     while (uni > 0x7F >> count) {
    221         *p++ = (char)(0x80 | (uni & 0x3F));
    222         uni >>= 6;
    223         count += 1;
    224     }
    225 
    226     if (utf8) {
    227         p = tmp;
    228         utf8 += count;
    229         while (p < tmp + count - 1) {
    230             *--utf8 = *p++;
    231         }
    232         *--utf8 = (char)(~(0xFF >> count) | uni);
    233     }
    234 
    235     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
    236     return count;
    237 }
    238 
    239 ///////////////////////////////////////////////////////////////////////////////
    240 
    241 int SkUTF16_CountUnichars(const uint16_t src[]) {
    242     SkASSERT(src);
    243 
    244     int count = 0;
    245     unsigned c;
    246     while ((c = *src++) != 0) {
    247         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    248         if (SkUTF16_IsHighSurrogate(c)) {
    249             c = *src++;
    250             SkASSERT(SkUTF16_IsLowSurrogate(c));
    251         }
    252         count += 1;
    253     }
    254     return count;
    255 }
    256 
    257 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
    258     SkASSERT(src);
    259 
    260     const uint16_t* stop = src + numberOf16BitValues;
    261     int count = 0;
    262     while (src < stop) {
    263         unsigned c = *src++;
    264         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    265         if (SkUTF16_IsHighSurrogate(c)) {
    266             SkASSERT(src < stop);
    267             c = *src++;
    268             SkASSERT(SkUTF16_IsLowSurrogate(c));
    269         }
    270         count += 1;
    271     }
    272     return count;
    273 }
    274 
    275 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
    276     SkASSERT(srcPtr && *srcPtr);
    277 
    278     const uint16_t* src = *srcPtr;
    279     SkUnichar       c = *src++;
    280 
    281     SkASSERT(!SkUTF16_IsLowSurrogate(c));
    282     if (SkUTF16_IsHighSurrogate(c)) {
    283         unsigned c2 = *src++;
    284         SkASSERT(SkUTF16_IsLowSurrogate(c2));
    285 
    286         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
    287         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
    288         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
    289     }
    290     *srcPtr = src;
    291     return c;
    292 }
    293 
    294 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
    295     SkASSERT(srcPtr && *srcPtr);
    296 
    297     const uint16_t* src = *srcPtr;
    298     SkUnichar       c = *--src;
    299 
    300     SkASSERT(!SkUTF16_IsHighSurrogate(c));
    301     if (SkUTF16_IsLowSurrogate(c)) {
    302         unsigned c2 = *--src;
    303         SkASSERT(SkUTF16_IsHighSurrogate(c2));
    304         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
    305     }
    306     *srcPtr = src;
    307     return c;
    308 }
    309 
    310 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
    311     SkASSERT((unsigned)uni <= 0x10FFFF);
    312 
    313     int extra = (uni > 0xFFFF);
    314 
    315     if (dst) {
    316         if (extra) {
    317             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
    318             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
    319             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
    320             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
    321 
    322             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
    323             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
    324         } else {
    325             dst[0] = SkToU16(uni);
    326             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
    327             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
    328         }
    329     }
    330     return 1 + extra;
    331 }
    332 
    333 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
    334                       char utf8[]) {
    335     SkASSERT(numberOf16BitValues >= 0);
    336     if (numberOf16BitValues <= 0) {
    337         return 0;
    338     }
    339 
    340     SkASSERT(utf16 != nullptr);
    341 
    342     const uint16_t* stop = utf16 + numberOf16BitValues;
    343     size_t          size = 0;
    344 
    345     if (utf8 == nullptr) {    // just count
    346         while (utf16 < stop) {
    347             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
    348         }
    349     } else {
    350         char* start = utf8;
    351         while (utf16 < stop) {
    352             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
    353         }
    354         size = utf8 - start;
    355     }
    356     return size;
    357 }
    358 
    359 const char SkHexadecimalDigits::gUpper[16] =
    360            { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
    361 const char SkHexadecimalDigits::gLower[16] =
    362            { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
    363 
    364