1 #include <stdint.h> 2 #include <stdlib.h> 3 4 #include <harfbuzz-external.h> 5 #include <harfbuzz-impl.h> 6 #include <harfbuzz-shaper.h> 7 #include "harfbuzz-unicode.h" 8 9 #include "tables/grapheme-break-properties.h" 10 #include "tables/mirroring-properties.h" 11 #include "tables/script-properties.h" 12 13 uint32_t 14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { 15 const uint16_t v = chars[(*iter)++]; 16 if (HB_IsHighSurrogate(v)) { 17 // surrogate pair 18 if (*iter >= len) { 19 // the surrogate is incomplete. 20 return HB_InvalidCodePoint; 21 } 22 const uint16_t v2 = chars[(*iter)++]; 23 if (!HB_IsLowSurrogate(v2)) { 24 // invalidate surrogate pair. 25 return HB_InvalidCodePoint; 26 } 27 28 return HB_SurrogateToUcs4(v, v2); 29 } 30 31 if (HB_IsLowSurrogate(v)) { 32 // this isn't a valid code point 33 return HB_InvalidCodePoint; 34 } 35 36 return v; 37 } 38 39 uint32_t 40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { 41 const uint16_t v = chars[(*iter)--]; 42 if (HB_IsLowSurrogate(v)) { 43 // surrogate pair 44 if (*iter < 0) { 45 // the surrogate is incomplete. 46 return HB_InvalidCodePoint; 47 } 48 const uint16_t v2 = chars[(*iter)--]; 49 if (!HB_IsHighSurrogate(v2)) { 50 // invalidate surrogate pair. 51 return HB_InvalidCodePoint; 52 } 53 54 return HB_SurrogateToUcs4(v2, v); 55 } 56 57 if (HB_IsHighSurrogate(v)) { 58 // this isn't a valid code point 59 return HB_InvalidCodePoint; 60 } 61 62 return v; 63 } 64 65 static int 66 script_property_cmp(const void *vkey, const void *vcandidate) { 67 const uint32_t key = (uint32_t) (intptr_t) vkey; 68 const struct script_property *candidate = vcandidate; 69 70 if (key < candidate->range_start) { 71 return -1; 72 } else if (key > candidate->range_end) { 73 return 1; 74 } else { 75 return 0; 76 } 77 } 78 79 HB_Script 80 code_point_to_script(uint32_t cp) { 81 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, 82 script_properties_count, 83 sizeof(struct script_property), 84 script_property_cmp); 85 if (!vprop) 86 return HB_Script_Common; 87 88 return ((const struct script_property *) vprop)->script; 89 } 90 91 char 92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, 93 const uint16_t *chars, size_t len, ssize_t *iter) { 94 if (*iter == len) 95 return 0; 96 97 output->pos = *iter; 98 const uint32_t init_cp = utf16_to_code_point(chars, len, iter); 99 unsigned cps = 1; 100 if (init_cp == HB_InvalidCodePoint) 101 return 0; 102 const HB_Script init_script = code_point_to_script(init_cp); 103 HB_Script current_script = init_script; 104 output->script = init_script; 105 106 for (;;) { 107 if (*iter == len) 108 break; 109 const ssize_t prev_iter = *iter; 110 const uint32_t cp = utf16_to_code_point(chars, len, iter); 111 if (cp == HB_InvalidCodePoint) 112 return 0; 113 cps++; 114 const HB_Script script = code_point_to_script(cp); 115 116 if (script != current_script) { 117 if (current_script == init_script == HB_Script_Inherited) { 118 // If we started off as inherited, we take whatever we can find. 119 output->script = script; 120 current_script = script; 121 continue; 122 } else if (script == HB_Script_Inherited) { 123 continue; 124 } else { 125 *iter = prev_iter; 126 cps--; 127 break; 128 } 129 } 130 } 131 132 if (output->script == HB_Script_Inherited) 133 output->script = HB_Script_Common; 134 135 output->length = *iter - output->pos; 136 if (num_code_points) 137 *num_code_points = cps; 138 return 1; 139 } 140 141 char 142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, 143 const uint16_t *chars, size_t len, ssize_t *iter) { 144 if (*iter == (size_t) -1) 145 return 0; 146 147 const size_t ending_index = *iter; 148 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); 149 unsigned cps = 1; 150 if (init_cp == HB_InvalidCodePoint) 151 return 0; 152 const HB_Script init_script = code_point_to_script(init_cp); 153 HB_Script current_script = init_script; 154 output->script = init_script; 155 156 for (;;) { 157 if (*iter < 0) 158 break; 159 const ssize_t prev_iter = *iter; 160 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); 161 if (cp == HB_InvalidCodePoint) 162 return 0; 163 cps++; 164 const HB_Script script = code_point_to_script(cp); 165 166 if (script != current_script) { 167 /* BEGIN android-changed 168 The condition was not correct by doing "a == b == constant" 169 END android-changed */ 170 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 171 // If we started off as inherited, we take whatever we can find. 172 output->script = script; 173 current_script = script; 174 continue; 175 } else if (script == HB_Script_Inherited) { 176 /* BEGIN android-changed 177 We apply the same fix for Chrome to Android. 178 Chrome team will talk with upsteam about it. 179 Just assume that whatever follows this combining character is within 180 the same script. This is incorrect if you had language1 + combining 181 char + language 2, but that is rare and this code is suspicious 182 anyway. 183 END android-changed */ 184 continue; 185 } else { 186 *iter = prev_iter; 187 cps--; 188 break; 189 } 190 } 191 } 192 193 if (output->script == HB_Script_Inherited) 194 output->script = HB_Script_Common; 195 196 output->pos = *iter + 1; 197 output->length = ending_index - *iter; 198 if (num_code_points) 199 *num_code_points = cps; 200 return 1; 201 } 202 203 static int 204 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { 205 const uint32_t key = (uint32_t) (intptr_t) vkey; 206 const struct grapheme_break_property *candidate = vcandidate; 207 208 if (key < candidate->range_start) { 209 return -1; 210 } else if (key > candidate->range_end) { 211 return 1; 212 } else { 213 return 0; 214 } 215 } 216 217 HB_GraphemeClass 218 HB_GetGraphemeClass(HB_UChar32 ch) { 219 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, 220 grapheme_break_properties_count, 221 sizeof(struct grapheme_break_property), 222 grapheme_break_property_cmp); 223 if (!vprop) 224 return HB_Grapheme_Other; 225 226 return ((const struct grapheme_break_property *) vprop)->klass; 227 } 228 229 HB_WordClass 230 HB_GetWordClass(HB_UChar32 ch) { 231 abort(); 232 return 0; 233 } 234 235 HB_SentenceClass 236 HB_GetSentenceClass(HB_UChar32 ch) { 237 abort(); 238 return 0; 239 } 240 241 void 242 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { 243 *gclass = HB_GetGraphemeClass(ch); 244 *breakclass = HB_GetLineBreakClass(ch); 245 } 246 247 static int 248 mirroring_property_cmp(const void *vkey, const void *vcandidate) { 249 const uint32_t key = (uint32_t) (intptr_t) vkey; 250 const struct mirroring_property *candidate = vcandidate; 251 252 if (key < candidate->a) { 253 return -1; 254 } else if (key > candidate->a) { 255 return 1; 256 } else { 257 return 0; 258 } 259 } 260 261 HB_UChar16 262 HB_GetMirroredChar(HB_UChar16 ch) { 263 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties, 264 mirroring_properties_count, 265 sizeof(struct mirroring_property), 266 mirroring_property_cmp); 267 if (!mprop) 268 return ch; 269 270 return ((const struct mirroring_property *) mprop)->b; 271 } 272 273 void * 274 HB_Library_Resolve(const char *library, int version, const char *symbol) { 275 abort(); 276 return NULL; 277 } 278