1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #include "SkUtils.h" 11 12 /* 0xxxxxxx 1 total 13 10xxxxxx // never a leading byte 14 110xxxxx 2 total 15 1110xxxx 3 total 16 11110xxx 4 total 17 18 11 10 01 01 xx xx xx xx 0... 19 0xE5XX0000 20 0xE5 << 24 21 */ 22 23 #ifdef SK_DEBUG 24 static void assert_utf8_leadingbyte(unsigned c) { 25 SkASSERT(c <= 0xF7); // otherwise leading byte is too big (more than 4 bytes) 26 SkASSERT((c & 0xC0) != 0x80); // can't begin with a middle char 27 } 28 29 int SkUTF8_LeadByteToCount(unsigned c) { 30 assert_utf8_leadingbyte(c); 31 return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1; 32 } 33 #else 34 #define assert_utf8_leadingbyte(c) 35 #endif 36 37 int SkUTF8_CountUnichars(const char utf8[]) { 38 SkASSERT(utf8); 39 40 int count = 0; 41 42 for (;;) { 43 int c = *(const uint8_t*)utf8; 44 if (c == 0) { 45 break; 46 } 47 utf8 += SkUTF8_LeadByteToCount(c); 48 count += 1; 49 } 50 return count; 51 } 52 53 int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) { 54 SkASSERT(utf8 || 0 == byteLength); 55 56 int count = 0; 57 const char* stop = utf8 + byteLength; 58 59 while (utf8 < stop) { 60 utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8); 61 count += 1; 62 } 63 return count; 64 } 65 66 SkUnichar SkUTF8_ToUnichar(const char utf8[]) { 67 SkASSERT(utf8); 68 69 const uint8_t* p = (const uint8_t*)utf8; 70 int c = *p; 71 int hic = c << 24; 72 73 assert_utf8_leadingbyte(c); 74 75 if (hic < 0) { 76 uint32_t mask = (uint32_t)~0x3F; 77 hic = SkLeftShift(hic, 1); 78 do { 79 c = (c << 6) | (*++p & 0x3F); 80 mask <<= 5; 81 } while ((hic = SkLeftShift(hic, 1)) < 0); 82 c &= ~mask; 83 } 84 return c; 85 } 86 87 SkUnichar SkUTF8_NextUnichar(const char** ptr) { 88 SkASSERT(ptr && *ptr); 89 90 const uint8_t* p = (const uint8_t*)*ptr; 91 int c = *p; 92 int hic = c << 24; 93 94 assert_utf8_leadingbyte(c); 95 96 if (hic < 0) { 97 uint32_t mask = (uint32_t)~0x3F; 98 hic = SkLeftShift(hic, 1); 99 do { 100 c = (c << 6) | (*++p & 0x3F); 101 mask <<= 5; 102 } while ((hic = SkLeftShift(hic, 1)) < 0); 103 c &= ~mask; 104 } 105 *ptr = (char*)p + 1; 106 return c; 107 } 108 109 SkUnichar SkUTF8_PrevUnichar(const char** ptr) { 110 SkASSERT(ptr && *ptr); 111 112 const char* p = *ptr; 113 114 if (*--p & 0x80) { 115 while (*--p & 0x40) { 116 ; 117 } 118 } 119 120 *ptr = (char*)p; 121 return SkUTF8_NextUnichar(&p); 122 } 123 124 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) { 125 if ((uint32_t)uni > 0x10FFFF) { 126 SkDEBUGFAIL("bad unichar"); 127 return 0; 128 } 129 130 if (uni <= 127) { 131 if (utf8) { 132 *utf8 = (char)uni; 133 } 134 return 1; 135 } 136 137 char tmp[4]; 138 char* p = tmp; 139 size_t count = 1; 140 141 SkDEBUGCODE(SkUnichar orig = uni;) 142 143 while (uni > 0x7F >> count) { 144 *p++ = (char)(0x80 | (uni & 0x3F)); 145 uni >>= 6; 146 count += 1; 147 } 148 149 if (utf8) { 150 p = tmp; 151 utf8 += count; 152 while (p < tmp + count - 1) { 153 *--utf8 = *p++; 154 } 155 *--utf8 = (char)(~(0xFF >> count) | uni); 156 } 157 158 SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8)); 159 return count; 160 } 161 162 /////////////////////////////////////////////////////////////////////////////// 163 164 int SkUTF16_CountUnichars(const uint16_t src[]) { 165 SkASSERT(src); 166 167 int count = 0; 168 unsigned c; 169 while ((c = *src++) != 0) { 170 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 171 if (SkUTF16_IsHighSurrogate(c)) { 172 c = *src++; 173 SkASSERT(SkUTF16_IsLowSurrogate(c)); 174 } 175 count += 1; 176 } 177 return count; 178 } 179 180 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) { 181 SkASSERT(src); 182 183 const uint16_t* stop = src + numberOf16BitValues; 184 int count = 0; 185 while (src < stop) { 186 unsigned c = *src++; 187 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 188 if (SkUTF16_IsHighSurrogate(c)) { 189 SkASSERT(src < stop); 190 c = *src++; 191 SkASSERT(SkUTF16_IsLowSurrogate(c)); 192 } 193 count += 1; 194 } 195 return count; 196 } 197 198 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) { 199 SkASSERT(srcPtr && *srcPtr); 200 201 const uint16_t* src = *srcPtr; 202 SkUnichar c = *src++; 203 204 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 205 if (SkUTF16_IsHighSurrogate(c)) { 206 unsigned c2 = *src++; 207 SkASSERT(SkUTF16_IsLowSurrogate(c2)); 208 209 // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000 210 // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF) 211 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); 212 } 213 *srcPtr = src; 214 return c; 215 } 216 217 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) { 218 SkASSERT(srcPtr && *srcPtr); 219 220 const uint16_t* src = *srcPtr; 221 SkUnichar c = *--src; 222 223 SkASSERT(!SkUTF16_IsHighSurrogate(c)); 224 if (SkUTF16_IsLowSurrogate(c)) { 225 unsigned c2 = *--src; 226 SkASSERT(SkUTF16_IsHighSurrogate(c2)); 227 c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00); 228 } 229 *srcPtr = src; 230 return c; 231 } 232 233 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) { 234 SkASSERT((unsigned)uni <= 0x10FFFF); 235 236 int extra = (uni > 0xFFFF); 237 238 if (dst) { 239 if (extra) { 240 // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10)); 241 // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64)); 242 dst[0] = SkToU16((0xD800 - 64) + (uni >> 10)); 243 dst[1] = SkToU16(0xDC00 | (uni & 0x3FF)); 244 245 SkASSERT(SkUTF16_IsHighSurrogate(dst[0])); 246 SkASSERT(SkUTF16_IsLowSurrogate(dst[1])); 247 } else { 248 dst[0] = SkToU16(uni); 249 SkASSERT(!SkUTF16_IsHighSurrogate(dst[0])); 250 SkASSERT(!SkUTF16_IsLowSurrogate(dst[0])); 251 } 252 } 253 return 1 + extra; 254 } 255 256 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, 257 char utf8[]) { 258 SkASSERT(numberOf16BitValues >= 0); 259 if (numberOf16BitValues <= 0) { 260 return 0; 261 } 262 263 SkASSERT(utf16 != nullptr); 264 265 const uint16_t* stop = utf16 + numberOf16BitValues; 266 size_t size = 0; 267 268 if (utf8 == nullptr) { // just count 269 while (utf16 < stop) { 270 size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr); 271 } 272 } else { 273 char* start = utf8; 274 while (utf16 < stop) { 275 utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8); 276 } 277 size = utf8 - start; 278 } 279 return size; 280 } 281