1 #include <stdint.h> 2 #include <stdlib.h> 3 4 #include <harfbuzz-external.h> 5 #include <harfbuzz-impl.h> 6 #include <harfbuzz-shaper.h> 7 #include "harfbuzz-unicode.h" 8 9 #include "tables/grapheme-break-properties.h" 10 #include "tables/mirroring-properties.h" 11 #include "tables/script-properties.h" 12 13 uint32_t 14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { 15 const uint16_t v = chars[(*iter)++]; 16 if (HB_IsHighSurrogate(v)) { 17 // surrogate pair 18 if (*iter >= len) { 19 // the surrogate is incomplete. 20 return HB_InvalidCodePoint; 21 } 22 const uint16_t v2 = chars[(*iter)++]; 23 if (!HB_IsLowSurrogate(v2)) { 24 // invalidate surrogate pair. 25 return HB_InvalidCodePoint; 26 } 27 28 return HB_SurrogateToUcs4(v, v2); 29 } 30 31 if (HB_IsLowSurrogate(v)) { 32 // this isn't a valid code point 33 return HB_InvalidCodePoint; 34 } 35 36 return v; 37 } 38 39 uint32_t 40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { 41 const uint16_t v = chars[(*iter)--]; 42 if (HB_IsLowSurrogate(v)) { 43 // surrogate pair 44 if (*iter < 0) { 45 // the surrogate is incomplete. 46 return HB_InvalidCodePoint; 47 } 48 const uint16_t v2 = chars[(*iter)--]; 49 if (!HB_IsHighSurrogate(v2)) { 50 // invalidate surrogate pair. 51 return HB_InvalidCodePoint; 52 } 53 54 return HB_SurrogateToUcs4(v2, v); 55 } 56 57 if (HB_IsHighSurrogate(v)) { 58 // this isn't a valid code point 59 return HB_InvalidCodePoint; 60 } 61 62 return v; 63 } 64 65 static int 66 script_property_cmp(const void *vkey, const void *vcandidate) { 67 const uint32_t key = (uint32_t) (intptr_t) vkey; 68 const struct script_property *candidate = vcandidate; 69 70 if (key < candidate->range_start) { 71 return -1; 72 } else if (key > candidate->range_end) { 73 return 1; 74 } else { 75 return 0; 76 } 77 } 78 79 HB_Script 80 code_point_to_script(uint32_t cp) { 81 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, 82 script_properties_count, 83 sizeof(struct script_property), 84 script_property_cmp); 85 if (!vprop) 86 return HB_Script_Common; 87 88 return ((const struct script_property *) vprop)->script; 89 } 90 91 char 92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, 93 const uint16_t *chars, size_t len, ssize_t *iter) { 94 if (*iter == len) 95 return 0; 96 97 output->pos = *iter; 98 const uint32_t init_cp = utf16_to_code_point(chars, len, iter); 99 unsigned cps = 1; 100 if (init_cp == HB_InvalidCodePoint) 101 return 0; 102 const HB_Script init_script = code_point_to_script(init_cp); 103 HB_Script current_script = init_script; 104 output->script = init_script; 105 106 for (;;) { 107 if (*iter == len) 108 break; 109 const ssize_t prev_iter = *iter; 110 const uint32_t cp = utf16_to_code_point(chars, len, iter); 111 if (cp == HB_InvalidCodePoint) 112 return 0; 113 cps++; 114 const HB_Script script = code_point_to_script(cp); 115 116 if (script != current_script) { 117 /* BEGIN android-changed 118 The condition was not correct by doing "a == b == constant" 119 END android-changed */ 120 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 121 // If we started off as inherited, we take whatever we can find. 122 output->script = script; 123 current_script = script; 124 continue; 125 } else if (script == HB_Script_Inherited) { 126 continue; 127 } else { 128 *iter = prev_iter; 129 cps--; 130 break; 131 } 132 } 133 } 134 135 if (output->script == HB_Script_Inherited) 136 output->script = HB_Script_Common; 137 138 output->length = *iter - output->pos; 139 if (num_code_points) 140 *num_code_points = cps; 141 return 1; 142 } 143 144 char 145 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, 146 const uint16_t *chars, size_t len, ssize_t *iter) { 147 if (*iter == (size_t) -1) 148 return 0; 149 150 const size_t ending_index = *iter; 151 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); 152 unsigned cps = 1; 153 if (init_cp == HB_InvalidCodePoint) 154 return 0; 155 const HB_Script init_script = code_point_to_script(init_cp); 156 HB_Script current_script = init_script; 157 output->script = init_script; 158 159 for (;;) { 160 if (*iter < 0) 161 break; 162 const ssize_t prev_iter = *iter; 163 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); 164 if (cp == HB_InvalidCodePoint) 165 return 0; 166 cps++; 167 const HB_Script script = code_point_to_script(cp); 168 169 if (script != current_script) { 170 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 171 // If we started off as inherited, we take whatever we can find. 172 output->script = script; 173 current_script = script; 174 continue; 175 } else if (script == HB_Script_Inherited) { 176 /* BEGIN android-changed 177 We apply the same fix for Chrome to Android. 178 Chrome team will talk with upsteam about it. 179 Just assume that whatever follows this combining character is within 180 the same script. This is incorrect if you had language1 + combining 181 char + language 2, but that is rare and this code is suspicious 182 anyway. 183 END android-changed */ 184 continue; 185 } else { 186 *iter = prev_iter; 187 cps--; 188 break; 189 } 190 } 191 } 192 193 if (output->script == HB_Script_Inherited) 194 output->script = HB_Script_Common; 195 196 output->pos = *iter + 1; 197 output->length = ending_index - *iter; 198 if (num_code_points) 199 *num_code_points = cps; 200 return 1; 201 } 202 203 static int 204 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { 205 const uint32_t key = (uint32_t) (intptr_t) vkey; 206 const struct grapheme_break_property *candidate = vcandidate; 207 208 if (key < candidate->range_start) { 209 return -1; 210 } else if (key > candidate->range_end) { 211 return 1; 212 } else { 213 return 0; 214 } 215 } 216 217 HB_GraphemeClass 218 HB_GetGraphemeClass(HB_UChar32 ch) { 219 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, 220 grapheme_break_properties_count, 221 sizeof(struct grapheme_break_property), 222 grapheme_break_property_cmp); 223 if (!vprop) 224 return HB_Grapheme_Other; 225 226 return ((const struct grapheme_break_property *) vprop)->klass; 227 } 228 229 HB_WordClass 230 HB_GetWordClass(HB_UChar32 ch) { 231 abort(); 232 return 0; 233 } 234 235 HB_SentenceClass 236 HB_GetSentenceClass(HB_UChar32 ch) { 237 abort(); 238 return 0; 239 } 240 241 void 242 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { 243 *gclass = HB_GetGraphemeClass(ch); 244 *breakclass = HB_GetLineBreakClass(ch); 245 } 246 247 static int 248 mirroring_property_cmp(const void *vkey, const void *vcandidate) { 249 const uint32_t key = (uint32_t) (intptr_t) vkey; 250 const struct mirroring_property *candidate = vcandidate; 251 252 if (key < candidate->a) { 253 return -1; 254 } else if (key > candidate->a) { 255 return 1; 256 } else { 257 return 0; 258 } 259 } 260 261 HB_UChar16 262 HB_GetMirroredChar(HB_UChar16 ch) { 263 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties, 264 mirroring_properties_count, 265 sizeof(struct mirroring_property), 266 mirroring_property_cmp); 267 if (!mprop) 268 return ch; 269 270 return ((const struct mirroring_property *) mprop)->b; 271 } 272 273 void * 274 HB_Library_Resolve(const char *library, int version, const char *symbol) { 275 abort(); 276 return NULL; 277 } 278