1 #include <stdint.h> 2 #include <stdlib.h> 3 4 #include <harfbuzz-external.h> 5 #include <harfbuzz-impl.h> 6 #include <harfbuzz-shaper.h> 7 #include "harfbuzz-unicode.h" 8 9 #include "tables/grapheme-break-properties.h" 10 #include "tables/mirroring-properties.h" 11 #include "tables/script-properties.h" 12 13 uint32_t 14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) { 15 const uint16_t v = chars[(*iter)++]; 16 if (HB_IsHighSurrogate(v)) { 17 // surrogate pair 18 if (*iter >= len) { 19 // the surrogate is incomplete. 20 return HB_InvalidCodePoint; 21 } 22 const uint16_t v2 = chars[(*iter)++]; 23 if (!HB_IsLowSurrogate(v2)) { 24 // invalidate surrogate pair. 25 return HB_InvalidCodePoint; 26 } 27 28 return HB_SurrogateToUcs4(v, v2); 29 } 30 31 if (HB_IsLowSurrogate(v)) { 32 // this isn't a valid code point 33 return HB_InvalidCodePoint; 34 } 35 36 return v; 37 } 38 39 uint32_t 40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) { 41 const uint16_t v = chars[(*iter)--]; 42 if (HB_IsLowSurrogate(v)) { 43 // surrogate pair 44 if (*iter < 0) { 45 // the surrogate is incomplete. 46 return HB_InvalidCodePoint; 47 } 48 const uint16_t v2 = chars[(*iter)--]; 49 if (!HB_IsHighSurrogate(v2)) { 50 // invalidate surrogate pair. 51 return HB_InvalidCodePoint; 52 } 53 54 return HB_SurrogateToUcs4(v2, v); 55 } 56 57 if (HB_IsHighSurrogate(v)) { 58 // this isn't a valid code point 59 return HB_InvalidCodePoint; 60 } 61 62 return v; 63 } 64 65 static int 66 script_property_cmp(const void *vkey, const void *vcandidate) { 67 const uint32_t key = (uint32_t) (intptr_t) vkey; 68 const struct script_property *candidate = vcandidate; 69 70 if (key < candidate->range_start) { 71 return -1; 72 } else if (key > candidate->range_end) { 73 return 1; 74 } else { 75 return 0; 76 } 77 } 78 79 HB_Script 80 code_point_to_script(uint32_t cp) { 81 /* BEGIN android-changed 82 For the purpose of aggregating script runs together, we treat space 83 as belonging to the same script as surrounding characters. This is a 84 performance optimization to keep the number of runs down. */ 85 if (cp == ' ') return HB_Script_Inherited; 86 /* END android-changed */ 87 88 const void *vprop = bsearch((void *) (intptr_t) cp, script_properties, 89 script_properties_count, 90 sizeof(struct script_property), 91 script_property_cmp); 92 if (!vprop) 93 return HB_Script_Common; 94 95 return ((const struct script_property *) vprop)->script; 96 } 97 98 char 99 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output, 100 const uint16_t *chars, size_t len, ssize_t *iter) { 101 if (*iter == len) 102 return 0; 103 104 output->pos = *iter; 105 const uint32_t init_cp = utf16_to_code_point(chars, len, iter); 106 unsigned cps = 1; 107 if (init_cp == HB_InvalidCodePoint) 108 return 0; 109 const HB_Script init_script = code_point_to_script(init_cp); 110 HB_Script current_script = init_script; 111 output->script = init_script; 112 113 for (;;) { 114 if (*iter == len) 115 break; 116 const ssize_t prev_iter = *iter; 117 const uint32_t cp = utf16_to_code_point(chars, len, iter); 118 if (cp == HB_InvalidCodePoint) 119 return 0; 120 cps++; 121 const HB_Script script = code_point_to_script(cp); 122 123 if (script != current_script) { 124 /* BEGIN android-changed 125 The condition was not correct by doing "a == b == constant" 126 END android-changed */ 127 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 128 // If we started off as inherited, we take whatever we can find. 129 output->script = script; 130 current_script = script; 131 continue; 132 } else if (script == HB_Script_Inherited) { 133 continue; 134 } else { 135 *iter = prev_iter; 136 cps--; 137 break; 138 } 139 } 140 } 141 142 if (output->script == HB_Script_Inherited) 143 output->script = HB_Script_Common; 144 145 output->length = *iter - output->pos; 146 if (num_code_points) 147 *num_code_points = cps; 148 return 1; 149 } 150 151 char 152 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output, 153 const uint16_t *chars, size_t len, ssize_t *iter) { 154 if (*iter == (size_t) -1) 155 return 0; 156 157 const size_t ending_index = *iter; 158 const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter); 159 unsigned cps = 1; 160 if (init_cp == HB_InvalidCodePoint) 161 return 0; 162 const HB_Script init_script = code_point_to_script(init_cp); 163 HB_Script current_script = init_script; 164 output->script = init_script; 165 166 for (;;) { 167 if (*iter < 0) 168 break; 169 const ssize_t prev_iter = *iter; 170 const uint32_t cp = utf16_to_code_point_prev(chars, len, iter); 171 if (cp == HB_InvalidCodePoint) 172 return 0; 173 cps++; 174 const HB_Script script = code_point_to_script(cp); 175 176 if (script != current_script) { 177 if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) { 178 // If we started off as inherited, we take whatever we can find. 179 output->script = script; 180 current_script = script; 181 continue; 182 } else if (script == HB_Script_Inherited) { 183 /* BEGIN android-changed 184 We apply the same fix for Chrome to Android. 185 Chrome team will talk with upsteam about it. 186 Just assume that whatever follows this combining character is within 187 the same script. This is incorrect if you had language1 + combining 188 char + language 2, but that is rare and this code is suspicious 189 anyway. 190 END android-changed */ 191 continue; 192 } else { 193 *iter = prev_iter; 194 cps--; 195 break; 196 } 197 } 198 } 199 200 if (output->script == HB_Script_Inherited) 201 output->script = HB_Script_Common; 202 203 output->pos = *iter + 1; 204 output->length = ending_index - *iter; 205 if (num_code_points) 206 *num_code_points = cps; 207 return 1; 208 } 209 210 static int 211 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) { 212 const uint32_t key = (uint32_t) (intptr_t) vkey; 213 const struct grapheme_break_property *candidate = vcandidate; 214 215 if (key < candidate->range_start) { 216 return -1; 217 } else if (key > candidate->range_end) { 218 return 1; 219 } else { 220 return 0; 221 } 222 } 223 224 HB_GraphemeClass 225 HB_GetGraphemeClass(HB_UChar32 ch) { 226 const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties, 227 grapheme_break_properties_count, 228 sizeof(struct grapheme_break_property), 229 grapheme_break_property_cmp); 230 if (!vprop) 231 return HB_Grapheme_Other; 232 233 return ((const struct grapheme_break_property *) vprop)->klass; 234 } 235 236 HB_WordClass 237 HB_GetWordClass(HB_UChar32 ch) { 238 abort(); 239 return 0; 240 } 241 242 HB_SentenceClass 243 HB_GetSentenceClass(HB_UChar32 ch) { 244 abort(); 245 return 0; 246 } 247 248 void 249 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) { 250 *gclass = HB_GetGraphemeClass(ch); 251 *breakclass = HB_GetLineBreakClass(ch); 252 } 253 254 static int 255 mirroring_property_cmp(const void *vkey, const void *vcandidate) { 256 const uint32_t key = (uint32_t) (intptr_t) vkey; 257 const struct mirroring_property *candidate = vcandidate; 258 259 if (key < candidate->a) { 260 return -1; 261 } else if (key > candidate->a) { 262 return 1; 263 } else { 264 return 0; 265 } 266 } 267 268 HB_UChar16 269 HB_GetMirroredChar(HB_UChar16 ch) { 270 const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties, 271 mirroring_properties_count, 272 sizeof(struct mirroring_property), 273 mirroring_property_cmp); 274 if (!mprop) 275 return ch; 276 277 return ((const struct mirroring_property *) mprop)->b; 278 } 279 280 void * 281 HB_Library_Resolve(const char *library, int version, const char *symbol) { 282 abort(); 283 return NULL; 284 } 285