Home | History | Annotate | Download | only in core
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #include "SkUtils.h"
     11 
     12 /*  0xxxxxxx    1 total
     13     10xxxxxx    // never a leading byte
     14     110xxxxx    2 total
     15     1110xxxx    3 total
     16     11110xxx    4 total
     17 
     18     11 10 01 01 xx xx xx xx 0...
     19     0xE5XX0000
     20     0xE5 << 24
     21 */
     22 
     23 #ifdef SK_DEBUG
     24     static void assert_utf8_leadingbyte(unsigned c) {
     25         SkASSERT(c <= 0xF7);    // otherwise leading byte is too big (more than 4 bytes)
     26         SkASSERT((c & 0xC0) != 0x80);   // can't begin with a middle char
     27     }
     28 
     29     int SkUTF8_LeadByteToCount(unsigned c) {
     30         assert_utf8_leadingbyte(c);
     31         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
     32     }
     33 #else
     34     #define assert_utf8_leadingbyte(c)
     35 #endif
     36 
     37 int SkUTF8_CountUnichars(const char utf8[]) {
     38     SkASSERT(utf8);
     39 
     40     int count = 0;
     41 
     42     for (;;) {
     43         int c = *(const uint8_t*)utf8;
     44         if (c == 0) {
     45             break;
     46         }
     47         utf8 += SkUTF8_LeadByteToCount(c);
     48         count += 1;
     49     }
     50     return count;
     51 }
     52 
     53 int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) {
     54     SkASSERT(utf8 || 0 == byteLength);
     55 
     56     int         count = 0;
     57     const char* stop = utf8 + byteLength;
     58 
     59     while (utf8 < stop) {
     60         utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8);
     61         count += 1;
     62     }
     63     return count;
     64 }
     65 
     66 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
     67     SkASSERT(utf8);
     68 
     69     const uint8_t*  p = (const uint8_t*)utf8;
     70     int             c = *p;
     71     int             hic = c << 24;
     72 
     73     assert_utf8_leadingbyte(c);
     74 
     75     if (hic < 0) {
     76         uint32_t mask = (uint32_t)~0x3F;
     77         hic = SkLeftShift(hic, 1);
     78         do {
     79             c = (c << 6) | (*++p & 0x3F);
     80             mask <<= 5;
     81         } while ((hic = SkLeftShift(hic, 1)) < 0);
     82         c &= ~mask;
     83     }
     84     return c;
     85 }
     86 
     87 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
     88     SkASSERT(ptr && *ptr);
     89 
     90     const uint8_t*  p = (const uint8_t*)*ptr;
     91     int             c = *p;
     92     int             hic = c << 24;
     93 
     94     assert_utf8_leadingbyte(c);
     95 
     96     if (hic < 0) {
     97         uint32_t mask = (uint32_t)~0x3F;
     98         hic = SkLeftShift(hic, 1);
     99         do {
    100             c = (c << 6) | (*++p & 0x3F);
    101             mask <<= 5;
    102         } while ((hic = SkLeftShift(hic, 1)) < 0);
    103         c &= ~mask;
    104     }
    105     *ptr = (char*)p + 1;
    106     return c;
    107 }
    108 
    109 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
    110     SkASSERT(ptr && *ptr);
    111 
    112     const char* p = *ptr;
    113 
    114     if (*--p & 0x80) {
    115         while (*--p & 0x40) {
    116             ;
    117         }
    118     }
    119 
    120     *ptr = (char*)p;
    121     return SkUTF8_NextUnichar(&p);
    122 }
    123 
    124 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
    125     if ((uint32_t)uni > 0x10FFFF) {
    126         SkDEBUGFAIL("bad unichar");
    127         return 0;
    128     }
    129 
    130     if (uni <= 127) {
    131         if (utf8) {
    132             *utf8 = (char)uni;
    133         }
    134         return 1;
    135     }
    136 
    137     char    tmp[4];
    138     char*   p = tmp;
    139     size_t  count = 1;
    140 
    141     SkDEBUGCODE(SkUnichar orig = uni;)
    142 
    143     while (uni > 0x7F >> count) {
    144         *p++ = (char)(0x80 | (uni & 0x3F));
    145         uni >>= 6;
    146         count += 1;
    147     }
    148 
    149     if (utf8) {
    150         p = tmp;
    151         utf8 += count;
    152         while (p < tmp + count - 1) {
    153             *--utf8 = *p++;
    154         }
    155         *--utf8 = (char)(~(0xFF >> count) | uni);
    156     }
    157 
    158     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
    159     return count;
    160 }
    161 
    162 ///////////////////////////////////////////////////////////////////////////////
    163 
    164 int SkUTF16_CountUnichars(const uint16_t src[]) {
    165     SkASSERT(src);
    166 
    167     int count = 0;
    168     unsigned c;
    169     while ((c = *src++) != 0) {
    170         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    171         if (SkUTF16_IsHighSurrogate(c)) {
    172             c = *src++;
    173             SkASSERT(SkUTF16_IsLowSurrogate(c));
    174         }
    175         count += 1;
    176     }
    177     return count;
    178 }
    179 
    180 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
    181     SkASSERT(src);
    182 
    183     const uint16_t* stop = src + numberOf16BitValues;
    184     int count = 0;
    185     while (src < stop) {
    186         unsigned c = *src++;
    187         SkASSERT(!SkUTF16_IsLowSurrogate(c));
    188         if (SkUTF16_IsHighSurrogate(c)) {
    189             SkASSERT(src < stop);
    190             c = *src++;
    191             SkASSERT(SkUTF16_IsLowSurrogate(c));
    192         }
    193         count += 1;
    194     }
    195     return count;
    196 }
    197 
    198 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
    199     SkASSERT(srcPtr && *srcPtr);
    200 
    201     const uint16_t* src = *srcPtr;
    202     SkUnichar       c = *src++;
    203 
    204     SkASSERT(!SkUTF16_IsLowSurrogate(c));
    205     if (SkUTF16_IsHighSurrogate(c)) {
    206         unsigned c2 = *src++;
    207         SkASSERT(SkUTF16_IsLowSurrogate(c2));
    208 
    209         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
    210         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
    211         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
    212     }
    213     *srcPtr = src;
    214     return c;
    215 }
    216 
    217 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
    218     SkASSERT(srcPtr && *srcPtr);
    219 
    220     const uint16_t* src = *srcPtr;
    221     SkUnichar       c = *--src;
    222 
    223     SkASSERT(!SkUTF16_IsHighSurrogate(c));
    224     if (SkUTF16_IsLowSurrogate(c)) {
    225         unsigned c2 = *--src;
    226         SkASSERT(SkUTF16_IsHighSurrogate(c2));
    227         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
    228     }
    229     *srcPtr = src;
    230     return c;
    231 }
    232 
    233 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
    234     SkASSERT((unsigned)uni <= 0x10FFFF);
    235 
    236     int extra = (uni > 0xFFFF);
    237 
    238     if (dst) {
    239         if (extra) {
    240             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
    241             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
    242             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
    243             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
    244 
    245             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
    246             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
    247         } else {
    248             dst[0] = SkToU16(uni);
    249             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
    250             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
    251         }
    252     }
    253     return 1 + extra;
    254 }
    255 
    256 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
    257                       char utf8[]) {
    258     SkASSERT(numberOf16BitValues >= 0);
    259     if (numberOf16BitValues <= 0) {
    260         return 0;
    261     }
    262 
    263     SkASSERT(utf16 != nullptr);
    264 
    265     const uint16_t* stop = utf16 + numberOf16BitValues;
    266     size_t          size = 0;
    267 
    268     if (utf8 == nullptr) {    // just count
    269         while (utf16 < stop) {
    270             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
    271         }
    272     } else {
    273         char* start = utf8;
    274         while (utf16 < stop) {
    275             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
    276         }
    277         size = utf8 - start;
    278     }
    279     return size;
    280 }
    281