1 #include <stdint.h> 2 #include <stdlib.h> 3 4 #include <harfbuzz-external.h> 5 #include <harfbuzz-impl.h> 6 #include <harfbuzz-shaper.h> 7 #include "harfbuzz-unicode.h" 8 9 #include "tables/grapheme-break-properties.h" 10 #include "tables/mirroring-properties.h" 11 #include "tables/script-properties.h" 12 13 uint32_t 14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { 15 const uint16_t v = chars[(*iter)++]; 16 if (HB_IsHighSurrogate(v)) { 17 // surrogate pair 18 if (*iter >= len) { 19 // the surrogate is incomplete. 20 return HB_InvalidCodePoint; 21 } 22 const uint16_t v2 = chars[(*iter)++]; 23 if (!HB_IsLowSurrogate(v2)) { 24 // invalidate surrogate pair. 25 return HB_InvalidCodePoint; 26 } 27 28 return HB_SurrogateToUcs4(v, v2); 29 } 30 31 if (HB_IsLowSurrogate(v)) { 32 // this isn't a valid code point 33 return HB_InvalidCodePoint; 34 } 35 36 return v; 37 } 38 39 uint32_t 40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { 41 const uint16_t v = chars[(*iter)--]; 42 if (HB_IsLowSurrogate(v)) { 43 // surrogate pair 44 if (*iter < 0) { 45 // the surrogate is incomplete. 46 return HB_InvalidCodePoint; 47 } 48 const uint16_t v2 = chars[(*iter)--]; 49 if (!HB_IsHighSurrogate(v2)) { 50 // invalidate surrogate pair. 51 return HB_InvalidCodePoint; 52 } 53 54 return HB_SurrogateToUcs4(v2, v); 55 } 56 57 if (HB_IsHighSurrogate(v)) { 58 // this isn't a valid code point 59 return HB_InvalidCodePoint; 60 } 61 62 return v; 63 } 64 65 static int 66 script_property_cmp(const void *vkey, const void *vcandidate) { 67 const uint32_t key = (uint32_t) (intptr_t) vkey; 68 const struct script_property *candidate = vcandidate; 69 70 if (key < candidate->range_start) { 71 return -1; 72 } else if (key > candidate->range_end) { 73 return 1; 74 } else { 75 return 0; 76 } 77 } 78 79 HB_Script 80 code_point_to_script(uint32_t cp) { 81 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, 82 script_properties_count, 83 sizeof(struct script_property), 84 script_property_cmp); 85 if (!vprop) 86 return HB_Script_Common; 87 88 return ((const struct script_property *) vprop)->script; 89 } 90 91 char 92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, 93 const uint16_t *chars, size_t len, ssize_t *iter) { 94 if (*iter == len) 95 return 0; 96 97 output->pos = *iter; 98 const uint32_t init_cp = utf16_to_code_point(chars, len, iter); 99 unsigned cps = 1; 100 if (init_cp == HB_InvalidCodePoint) 101 return 0; 102 const HB_Script init_script = code_point_to_script(init_cp); 103 HB_Script current_script = init_script; 104 output->script = init_script; 105 106 for (;;) { 107 if (*iter == len) 108 break; 109 const ssize_t prev_iter = *iter; 110 const uint32_t cp = utf16_to_code_point(chars, len, iter); 111 if (cp == HB_InvalidCodePoint) 112 return 0; 113 cps++; 114 const HB_Script script = code_point_to_script(cp); 115 116 if (script != current_script) { 117 if (current_script == init_script == HB_Script_Inherited) { 118 // If we started off as inherited, we take whatever we can find. 119 output->script = script; 120 current_script = script; 121 continue; 122 } else if (script == HB_Script_Inherited) { 123 continue; 124 } else { 125 *iter = prev_iter; 126 cps--; 127 break; 128 } 129 } 130 } 131 132 if (output->script == HB_Script_Inherited) 133 output->script = HB_Script_Common; 134 135 output->length = *iter - output->pos; 136 if (num_code_points) 137 *num_code_points = cps; 138 return 1; 139 } 140 141 char 142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, 143 const uint16_t *chars, size_t len, ssize_t *iter) { 144 if (*iter == (size_t) -1) 145 return 0; 146 147 const size_t ending_index = *iter; 148 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); 149 unsigned cps = 1; 150 if (init_cp == HB_InvalidCodePoint) 151 return 0; 152 const HB_Script init_script = code_point_to_script(init_cp); 153 HB_Script current_script = init_script; 154 output->script = init_script; 155 156 for (;;) { 157 if (*iter < 0) 158 break; 159 const ssize_t prev_iter = *iter; 160 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); 161 if (cp == HB_InvalidCodePoint) 162 return 0; 163 cps++; 164 const HB_Script script = code_point_to_script(cp); 165 166 if (script != current_script) { 167 if (current_script == init_script == HB_Script_Inherited) { 168 // If we started off as inherited, we take whatever we can find. 169 output->script = script; 170 current_script = script; 171 continue; 172 } else if (script == HB_Script_Inherited) { 173 /* BEGIN android-changed 174 We apply the same fix for Chrome to Android. 175 Chrome team will talk with upsteam about it. 176 Just assume that whatever follows this combining character is within 177 the same script. This is incorrect if you had language1 + combining 178 char + language 2, but that is rare and this code is suspicious 179 anyway. 180 END android-changed */ 181 continue; 182 } else { 183 *iter = prev_iter; 184 cps--; 185 break; 186 } 187 } 188 } 189 190 if (output->script == HB_Script_Inherited) 191 output->script = HB_Script_Common; 192 193 output->pos = *iter + 1; 194 output->length = ending_index - *iter; 195 if (num_code_points) 196 *num_code_points = cps; 197 return 1; 198 } 199 200 static int 201 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { 202 const uint32_t key = (uint32_t) (intptr_t) vkey; 203 const struct grapheme_break_property *candidate = vcandidate; 204 205 if (key < candidate->range_start) { 206 return -1; 207 } else if (key > candidate->range_end) { 208 return 1; 209 } else { 210 return 0; 211 } 212 } 213 214 HB_GraphemeClass 215 HB_GetGraphemeClass(HB_UChar32 ch) { 216 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, 217 grapheme_break_properties_count, 218 sizeof(struct grapheme_break_property), 219 grapheme_break_property_cmp); 220 if (!vprop) 221 return HB_Grapheme_Other; 222 223 return ((const struct grapheme_break_property *) vprop)->klass; 224 } 225 226 HB_WordClass 227 HB_GetWordClass(HB_UChar32 ch) { 228 abort(); 229 return 0; 230 } 231 232 HB_SentenceClass 233 HB_GetSentenceClass(HB_UChar32 ch) { 234 abort(); 235 return 0; 236 } 237 238 void 239 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { 240 *gclass = HB_GetGraphemeClass(ch); 241 *breakclass = HB_GetLineBreakClass(ch); 242 } 243 244 static int 245 mirroring_property_cmp(const void *vkey, const void *vcandidate) { 246 const uint32_t key = (uint32_t) (intptr_t) vkey; 247 const struct mirroring_property *candidate = vcandidate; 248 249 if (key < candidate->a) { 250 return -1; 251 } else if (key > candidate->a) { 252 return 1; 253 } else { 254 return 0; 255 } 256 } 257 258 HB_UChar16 259 HB_GetMirroredChar(HB_UChar16 ch) { 260 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties, 261 mirroring_properties_count, 262 sizeof(struct mirroring_property), 263 mirroring_property_cmp); 264 if (!mprop) 265 return ch; 266 267 return ((const struct mirroring_property *) mprop)->b; 268 } 269 270 void * 271 HB_Library_Resolve(const char *library, int version, const char *symbol) { 272 abort(); 273 return NULL; 274 } 275