Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 
      9 #include "SkUtils.h"
     10 
     11 void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
     12     for (int i = 0; i < count; i++) {
     13         buffer[i] = value;
     14     }
     15 }
     16 void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
     17     for (int i = 0; i < count; i++) {
     18         buffer[i] = value;
     19     }
     20 }
     21 void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
     22     for (int i = 0; i < count; i++) {
     23         buffer[i] = value;
     24     }
     25 }
     26 
     27 /*  0xxxxxxx    1 total
     28     10xxxxxx    // never a leading byte
     29     110xxxxx    2 total
     30     1110xxxx    3 total
     31     11110xxx    4 total
     32 
     33     11 10 01 01 xx xx xx xx 0...
     34     0xE5XX0000
     35     0xE5 << 24
     36 */
     37 
     38 static bool utf8_byte_is_valid(uint8_t c) {
     39     return c < 0xF5 && (c & 0xFE) != 0xC0;
     40 }
     41 static bool utf8_byte_is_continuation(uint8_t c) {
     42     return  (c & 0xC0) == 0x80;
     43 }
     44 static bool utf8_byte_is_leading_byte(uint8_t c) {
     45     return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
     46 }
     47 
     48 #ifdef SK_DEBUG
     49     static void assert_utf8_leadingbyte(unsigned c) {
     50         SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
     51     }
     52 
     53     int SkUTF8_LeadByteToCount(unsigned c) {
     54         assert_utf8_leadingbyte(c);
     55         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
     56     }
     57 #else
     58     #define assert_utf8_leadingbyte(c)
     59 #endif
     60 
     61 /**
     62  * @returns -1  iff invalid UTF8 byte,
     63  *           0  iff UTF8 continuation byte,
     64  *           1  iff ASCII byte,
     65  *           2  iff leading byte of 2-byte sequence,
     66  *           3  iff leading byte of 3-byte sequence, and
     67  *           4  iff leading byte of 4-byte sequence.
     68  *
     69  * I.e.: if return value > 0, then gives length of sequence.
     70 */
     71 static int utf8_byte_type(uint8_t c) {
     72     if (c < 0x80) {
     73         return 1;
     74     } else if (c < 0xC0) {
     75         return 0;
     76     } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
     77         return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
     78     } else {
     79         return -1;
     80     }
     81 }
     82 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
     83 
     84 int SkUTF8_CountUnichars(const char utf8[]) {
     85     SkASSERT(utf8);
     86 
     87     int count = 0;
     88 
     89     for (;;) {
     90         int c = *(const uint8_t*)utf8;
     91         if (c == 0) {
     92             break;
     93         }
     94         utf8 += SkUTF8_LeadByteToCount(c);
     95         count += 1;
     96     }
     97     return count;
     98 }
     99 
    100 // SAFE: returns -1 if invalid UTF-8
    101 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
    102     SkASSERT(utf8 || 0 == byteLength);
    103 
    104     int         count = 0;
    105     const char* stop = utf8 + byteLength;
    106 
    107     while (utf8 < stop) {
    108         int type = utf8_byte_type(*(const uint8_t*)utf8);
    109         SkASSERT(type >= -1 && type <= 4);
    110         if (!utf8_type_is_valid_leading_byte(type) ||
    111             utf8 + type > stop) {  // Sequence extends beyond end.
    112             return -1;
    113         }
    114         while(type-- > 1) {
    115             ++utf8;
    116             if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
    117                 return -1;
    118             }
    119         }
    120         ++utf8;
    121         ++count;
    122     }
    123     return count;
    124 }
    125 
    126 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
    127     SkASSERT(utf8);
    128 
    129     const uint8_t*  p = (const uint8_t*)utf8;
    130     int             c = *p;
    131     int             hic = c << 24;
    132 
    133     assert_utf8_leadingbyte(c);
    134 
    135     if (hic < 0) {
    136         uint32_t mask = (uint32_t)~0x3F;
    137         hic = SkLeftShift(hic, 1);
    138         do {
    139             c = (c << 6) | (*++p & 0x3F);
    140             mask <<= 5;
    141         } while ((hic = SkLeftShift(hic, 1)) < 0);
    142         c &= ~mask;
    143     }
    144     return c;
    145 }
    146 
    147 // SAFE: returns -1 on invalid UTF-8 sequence.
    148 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
    149     SkASSERT(ptr && *ptr);
    150     SkASSERT(*ptr < end);
    151     const uint8_t*  p = (const uint8_t*)*ptr;
    152     int             c = *p;
    153     int             hic = c << 24;
    154 
    155     if (!utf8_byte_is_leading_byte(c)) {
    156         return -1;
    157     }
    158     if (hic < 0) {
    159         uint32_t mask = (uint32_t)~0x3F;
    160         hic = SkLeftShift(hic, 1);
    161         do {
    162             ++p;
    163             if (p >= (const uint8_t*)end) {
    164                 return -1;
    165             }
    166             // check before reading off end of array.
    167             uint8_t nextByte = *p;
    168             if (!utf8_byte_is_continuation(nextByte)) {
    169                 return -1;
    170             }
    171             c = (c << 6) | (nextByte & 0x3F);
    172             mask <<= 5;
    173         } while ((hic = SkLeftShift(hic, 1)) < 0);
    174         c &= ~mask;
    175     }
    176     *ptr = (char*)p + 1;
    177     return c;
    178 }
    179 
    180 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
    181     SkASSERT(ptr && *ptr);
    182 
    183     const uint8_t*  p = (const uint8_t*)*ptr;
    184     int             c = *p;
    185     int             hic = c << 24;
    186 
    187     assert_utf8_leadingbyte(c);
    188 
    189     if (hic < 0) {
    190         uint32_t mask = (uint32_t)~0x3F;
    191         hic = SkLeftShift(hic, 1);
    192         do {
    193             c = (c << 6) | (*++p & 0x3F);
    194             mask <<= 5;
    195         } while ((hic = SkLeftShift(hic, 1)) < 0);
    196         c &= ~mask;
    197     }
    198     *ptr = (char*)p + 1;
    199     return c;
    200 }
    201 
    202 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
    203     SkASSERT(ptr && *ptr);
    204 
    205     const char* p = *ptr;
    206 
    207     if (*--p & 0x80) {
    208         while (*--p & 0x40) {
    209             ;
    210         }
    211     }
    212 
    213     *ptr = (char*)p;
    214     return SkUTF8_NextUnichar(&p);
    215 }
    216 
    217 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
    218     if ((uint32_t)uni > 0x10FFFF) {
    219         SkDEBUGFAIL("bad unichar");
    220         return 0;
    221     }
    222 
    223     if (uni <= 127) {
    224         if (utf8) {
    225             *utf8 = (char)uni;
    226         }
    227         return 1;
    228     }
    229 
    230     char    tmp[4];
    231     char*   p = tmp;
    232     size_t  count = 1;
    233 
    234     SkDEBUGCODE(SkUnichar orig = uni;)
    235 
    236     while (uni > 0x7F >> count) {
    237         *p++ = (char)(0x80 | (uni & 0x3F));
    238         uni >>= 6;
    239         count += 1;
    240     }
    241 
    242     if (utf8) {
    243         p = tmp;
    244         utf8 += count;
    245         while (p < tmp + count - 1) {
    246             *--utf8 = *p++;
    247         }
    248         *--utf8 = (char)(~(0xFF >> count) | uni);
    249     }
    250 
    251     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
    252     return count;
    253 }
    254 
    255 ///////////////////////////////////////////////////////////////////////////////
    256 
    257 int SkUTF16_CountUnichars(const uint16_t src[]) {
    258     SkASSERT(src);
    259 
    260     int count = 0;
    261     unsigned c;
    262     while ((c = *src++) != 0) {
    263         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    264         if (SkUTF16_IsHighSurrogate(c)) {
    265             c = *src++;
    266             SkASSERT(SkUTF16_IsLowSurrogate(c));
    267         }
    268         count += 1;
    269     }
    270     return count;
    271 }
    272 
    273 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
    274     SkASSERT(src);
    275 
    276     const uint16_t* stop = src + numberOf16BitValues;
    277     int count = 0;
    278     while (src < stop) {
    279         unsigned c = *src++;
    280         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    281         if (SkUTF16_IsHighSurrogate(c)) {
    282             SkASSERT(src < stop);
    283             c = *src++;
    284             SkASSERT(SkUTF16_IsLowSurrogate(c));
    285         }
    286         count += 1;
    287     }
    288     return count;
    289 }
    290 
    291 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
    292     SkASSERT(srcPtr && *srcPtr);
    293 
    294     const uint16_t* src = *srcPtr;
    295     SkUnichar       c = *src++;
    296 
    297     SkASSERT(!SkUTF16_IsLowSurrogate(c));
    298     if (SkUTF16_IsHighSurrogate(c)) {
    299         unsigned c2 = *src++;
    300         SkASSERT(SkUTF16_IsLowSurrogate(c2));
    301 
    302         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
    303         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
    304         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
    305     }
    306     *srcPtr = src;
    307     return c;
    308 }
    309 
    310 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
    311     SkASSERT(srcPtr && *srcPtr);
    312 
    313     const uint16_t* src = *srcPtr;
    314     SkUnichar       c = *--src;
    315 
    316     SkASSERT(!SkUTF16_IsHighSurrogate(c));
    317     if (SkUTF16_IsLowSurrogate(c)) {
    318         unsigned c2 = *--src;
    319         SkASSERT(SkUTF16_IsHighSurrogate(c2));
    320         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
    321     }
    322     *srcPtr = src;
    323     return c;
    324 }
    325 
    326 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
    327     SkASSERT((unsigned)uni <= 0x10FFFF);
    328 
    329     int extra = (uni > 0xFFFF);
    330 
    331     if (dst) {
    332         if (extra) {
    333             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
    334             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
    335             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
    336             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
    337 
    338             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
    339             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
    340         } else {
    341             dst[0] = SkToU16(uni);
    342             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
    343             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
    344         }
    345     }
    346     return 1 + extra;
    347 }
    348 
    349 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
    350                       char utf8[]) {
    351     SkASSERT(numberOf16BitValues >= 0);
    352     if (numberOf16BitValues <= 0) {
    353         return 0;
    354     }
    355 
    356     SkASSERT(utf16 != nullptr);
    357 
    358     const uint16_t* stop = utf16 + numberOf16BitValues;
    359     size_t          size = 0;
    360 
    361     if (utf8 == nullptr) {    // just count
    362         while (utf16 < stop) {
    363             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
    364         }
    365     } else {
    366         char* start = utf8;
    367         while (utf16 < stop) {
    368             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
    369         }
    370         size = utf8 - start;
    371     }
    372     return size;
    373 }
    374