1 #include <stdint.h> 2 #include <stdlib.h> 3 4 #include <harfbuzz-external.h> 5 #include <harfbuzz-impl.h> 6 #include <harfbuzz-shaper.h> 7 #include "harfbuzz-unicode.h" 8 9 #include "tables/grapheme-break-properties.h" 10 #include "tables/mirroring-properties.h" 11 #include "tables/script-properties.h" 12 13 uint32_t 14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { 15 const uint16_t v = chars[(*iter)++]; 16 if (HB_IsHighSurrogate(v)) { 17 // surrogate pair 18 if (*iter >= len) { 19 // the surrogate is incomplete. 20 return HB_InvalidCodePoint; 21 } 22 const uint16_t v2 = chars[(*iter)++]; 23 if (!HB_IsLowSurrogate(v2)) { 24 // invalidate surrogate pair. 25 return HB_InvalidCodePoint; 26 } 27 28 return HB_SurrogateToUcs4(v, v2); 29 } 30 31 if (HB_IsLowSurrogate(v)) { 32 // this isn't a valid code point 33 return HB_InvalidCodePoint; 34 } 35 36 return v; 37 } 38 39 uint32_t 40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { 41 const uint16_t v = chars[(*iter)--]; 42 if (HB_IsLowSurrogate(v)) { 43 // surrogate pair 44 if (*iter < 0) { 45 // the surrogate is incomplete. 46 return HB_InvalidCodePoint; 47 } 48 const uint16_t v2 = chars[(*iter)--]; 49 if (!HB_IsHighSurrogate(v2)) { 50 // invalidate surrogate pair. 51 return HB_InvalidCodePoint; 52 } 53 54 return HB_SurrogateToUcs4(v2, v); 55 } 56 57 if (HB_IsHighSurrogate(v)) { 58 // this isn't a valid code point 59 return HB_InvalidCodePoint; 60 } 61 62 return v; 63 } 64 65 static int 66 script_property_cmp(const void *vkey, const void *vcandidate) { 67 const uint32_t key = (uint32_t) (intptr_t) vkey; 68 const struct script_property *candidate = vcandidate; 69 70 if (key < candidate->range_start) { 71 return -1; 72 } else if (key > candidate->range_end) { 73 return 1; 74 } else { 75 return 0; 76 } 77 } 78 79 HB_Script 80 code_point_to_script(uint32_t cp) { 81 if (cp == 0) { 82 // bsearch can throw an assertion on null pointer, so skip if zero 83 return HB_Script_Common; 84 } 85 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, 86 script_properties_count, 87 sizeof(struct script_property), 88 script_property_cmp); 89 if (!vprop) 90 return HB_Script_Common; 91 92 return ((const struct script_property *) vprop)->script; 93 } 94 95 char 96 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, 97 const uint16_t *chars, size_t len, ssize_t *iter) { 98 if (*iter == len) 99 return 0; 100 101 output->pos = *iter; 102 const uint32_t init_cp = utf16_to_code_point(chars, len, iter); 103 unsigned cps = 1; 104 if (init_cp == HB_InvalidCodePoint) 105 return 0; 106 const HB_Script init_script = code_point_to_script(init_cp); 107 HB_Script current_script = init_script; 108 output->script = init_script; 109 110 for (;;) { 111 if (*iter == len) 112 break; 113 const ssize_t prev_iter = *iter; 114 const uint32_t cp = utf16_to_code_point(chars, len, iter); 115 if (cp == HB_InvalidCodePoint) 116 return 0; 117 cps++; 118 const HB_Script script = code_point_to_script(cp); 119 120 if (script != current_script) { 121 /* BEGIN android-changed 122 The condition was not correct by doing "a == b == constant" 123 END android-changed */ 124 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 125 // If we started off as inherited, we take whatever we can find. 126 output->script = script; 127 current_script = script; 128 continue; 129 } else if (script == HB_Script_Inherited) { 130 continue; 131 } else { 132 *iter = prev_iter; 133 cps--; 134 break; 135 } 136 } 137 } 138 139 if (output->script == HB_Script_Inherited) 140 output->script = HB_Script_Common; 141 142 output->length = *iter - output->pos; 143 if (num_code_points) 144 *num_code_points = cps; 145 return 1; 146 } 147 148 char 149 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, 150 const uint16_t *chars, size_t len, ssize_t *iter) { 151 if (*iter == (size_t) -1) 152 return 0; 153 154 const size_t ending_index = *iter; 155 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); 156 unsigned cps = 1; 157 if (init_cp == HB_InvalidCodePoint) 158 return 0; 159 const HB_Script init_script = code_point_to_script(init_cp); 160 HB_Script current_script = init_script; 161 output->script = init_script; 162 163 for (;;) { 164 if (*iter < 0) 165 break; 166 const ssize_t prev_iter = *iter; 167 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); 168 if (cp == HB_InvalidCodePoint) 169 return 0; 170 cps++; 171 const HB_Script script = code_point_to_script(cp); 172 173 if (script != current_script) { 174 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 175 // If we started off as inherited, we take whatever we can find. 176 output->script = script; 177 current_script = script; 178 continue; 179 } else if (script == HB_Script_Inherited) { 180 /* BEGIN android-changed 181 We apply the same fix for Chrome to Android. 182 Chrome team will talk with upsteam about it. 183 Just assume that whatever follows this combining character is within 184 the same script. This is incorrect if you had language1 + combining 185 char + language 2, but that is rare and this code is suspicious 186 anyway. 187 END android-changed */ 188 continue; 189 } else { 190 *iter = prev_iter; 191 cps--; 192 break; 193 } 194 } 195 } 196 197 if (output->script == HB_Script_Inherited) 198 output->script = HB_Script_Common; 199 200 output->pos = *iter + 1; 201 output->length = ending_index - *iter; 202 if (num_code_points) 203 *num_code_points = cps; 204 return 1; 205 } 206 207 static int 208 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { 209 const uint32_t key = (uint32_t) (intptr_t) vkey; 210 const struct grapheme_break_property *candidate = vcandidate; 211 212 if (key < candidate->range_start) { 213 return -1; 214 } else if (key > candidate->range_end) { 215 return 1; 216 } else { 217 return 0; 218 } 219 } 220 221 HB_GraphemeClass 222 HB_GetGraphemeClass(HB_UChar32 ch) { 223 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, 224 grapheme_break_properties_count, 225 sizeof(struct grapheme_break_property), 226 grapheme_break_property_cmp); 227 if (!vprop) 228 return HB_Grapheme_Other; 229 230 return ((const struct grapheme_break_property *) vprop)->klass; 231 } 232 233 HB_WordClass 234 HB_GetWordClass(HB_UChar32 ch) { 235 abort(); 236 return 0; 237 } 238 239 HB_SentenceClass 240 HB_GetSentenceClass(HB_UChar32 ch) { 241 abort(); 242 return 0; 243 } 244 245 void 246 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { 247 *gclass = HB_GetGraphemeClass(ch); 248 *breakclass = HB_GetLineBreakClass(ch); 249 } 250 251 static int 252 mirroring_property_cmp(const void *vkey, const void *vcandidate) { 253 const uint32_t key = (uint32_t) (intptr_t) vkey; 254 const struct mirroring_property *candidate = vcandidate; 255 256 if (key < candidate->a) { 257 return -1; 258 } else if (key > candidate->a) { 259 return 1; 260 } else { 261 return 0; 262 } 263 } 264 265 HB_UChar16 266 HB_GetMirroredChar(HB_UChar16 ch) { 267 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties, 268 mirroring_properties_count, 269 sizeof(struct mirroring_property), 270 mirroring_property_cmp); 271 if (!mprop) 272 return ch; 273 274 return ((const struct mirroring_property *) mprop)->b; 275 } 276 277 void * 278 HB_Library_Resolve(const char *library, int version, const char *symbol) { 279 abort(); 280 return NULL; 281 } 282