Home | History | Annotate | Download | only in contrib
      1 #include <stdint.h>
      2 #include <stdlib.h>
      3 
      4 #include <harfbuzz-external.h>
      5 #include <harfbuzz-impl.h>
      6 #include <harfbuzz-shaper.h>
      7 #include "harfbuzz-unicode.h"
      8 
      9 #include "tables/grapheme-break-properties.h"
     10 #include "tables/mirroring-properties.h"
     11 #include "tables/script-properties.h"
     12 
     13 uint32_t
     14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
     15   const uint16_t v = chars[(*iter)++];
     16   if (HB_IsHighSurrogate(v)) {
     17     // surrogate pair
     18     if (*iter >= len) {
     19       // the surrogate is incomplete.
     20       return HB_InvalidCodePoint;
     21     }
     22     const uint16_t v2 = chars[(*iter)++];
     23     if (!HB_IsLowSurrogate(v2)) {
     24       // invalidate surrogate pair.
     25       return HB_InvalidCodePoint;
     26     }
     27 
     28     return HB_SurrogateToUcs4(v, v2);
     29   }
     30 
     31   if (HB_IsLowSurrogate(v)) {
     32     // this isn't a valid code point
     33     return HB_InvalidCodePoint;
     34   }
     35 
     36   return v;
     37 }
     38 
     39 uint32_t
     40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
     41   const uint16_t v = chars[(*iter)--];
     42   if (HB_IsLowSurrogate(v)) {
     43     // surrogate pair
     44     if (*iter < 0) {
     45       // the surrogate is incomplete.
     46       return HB_InvalidCodePoint;
     47     }
     48     const uint16_t v2 = chars[(*iter)--];
     49     if (!HB_IsHighSurrogate(v2)) {
     50       // invalidate surrogate pair.
     51       return HB_InvalidCodePoint;
     52     }
     53 
     54     return HB_SurrogateToUcs4(v2, v);
     55   }
     56 
     57   if (HB_IsHighSurrogate(v)) {
     58     // this isn't a valid code point
     59     return HB_InvalidCodePoint;
     60   }
     61 
     62   return v;
     63 }
     64 
     65 static int
     66 script_property_cmp(const void *vkey, const void *vcandidate) {
     67   const uint32_t key = (uint32_t) (intptr_t) vkey;
     68   const struct script_property *candidate = vcandidate;
     69 
     70   if (key < candidate->range_start) {
     71     return -1;
     72   } else if (key > candidate->range_end) {
     73     return 1;
     74   } else {
     75     return 0;
     76   }
     77 }
     78 
     79 HB_Script
     80 code_point_to_script(uint32_t cp) {
     81   if (cp == 0) {
     82     // bsearch can throw an assertion on null pointer, so skip if zero
     83     return HB_Script_Common;
     84   }
     85   const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
     86                               script_properties_count,
     87                               sizeof(struct script_property),
     88                               script_property_cmp);
     89   if (!vprop)
     90     return HB_Script_Common;
     91 
     92   return ((const struct script_property *) vprop)->script;
     93 }
     94 
     95 char
     96 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
     97                          const uint16_t *chars, size_t len, ssize_t *iter) {
     98   if (*iter == len)
     99     return 0;
    100 
    101   output->pos = *iter;
    102   const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
    103   unsigned cps = 1;
    104   if (init_cp == HB_InvalidCodePoint)
    105     return 0;
    106   const HB_Script init_script = code_point_to_script(init_cp);
    107   HB_Script current_script = init_script;
    108   output->script = init_script;
    109 
    110   for (;;) {
    111     if (*iter == len)
    112       break;
    113     const ssize_t prev_iter = *iter;
    114     const uint32_t cp = utf16_to_code_point(chars, len, iter);
    115     if (cp == HB_InvalidCodePoint)
    116       return 0;
    117     cps++;
    118     const HB_Script script = code_point_to_script(cp);
    119 
    120     if (script != current_script) {
    121         /* BEGIN android-changed
    122            The condition was not correct by doing "a == b == constant"
    123            END android-changed */
    124       if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
    125         // If we started off as inherited, we take whatever we can find.
    126         output->script = script;
    127         current_script = script;
    128         continue;
    129       } else if (script == HB_Script_Inherited) {
    130         continue;
    131       } else {
    132         *iter = prev_iter;
    133         cps--;
    134         break;
    135       }
    136     }
    137   }
    138 
    139   if (output->script == HB_Script_Inherited)
    140     output->script = HB_Script_Common;
    141 
    142   output->length = *iter - output->pos;
    143   if (num_code_points)
    144     *num_code_points = cps;
    145   return 1;
    146 }
    147 
    148 char
    149 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
    150                          const uint16_t *chars, size_t len, ssize_t *iter) {
    151   if (*iter == (size_t) -1)
    152     return 0;
    153 
    154   const size_t ending_index = *iter;
    155   const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
    156   unsigned cps = 1;
    157   if (init_cp == HB_InvalidCodePoint)
    158     return 0;
    159   const HB_Script init_script = code_point_to_script(init_cp);
    160   HB_Script current_script = init_script;
    161   output->script = init_script;
    162 
    163   for (;;) {
    164     if (*iter < 0)
    165       break;
    166     const ssize_t prev_iter = *iter;
    167     const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
    168     if (cp == HB_InvalidCodePoint)
    169       return 0;
    170     cps++;
    171     const HB_Script script = code_point_to_script(cp);
    172 
    173     if (script != current_script) {
    174       if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
    175         // If we started off as inherited, we take whatever we can find.
    176         output->script = script;
    177         current_script = script;
    178         continue;
    179       } else if (script == HB_Script_Inherited) {
    180         /* BEGIN android-changed
    181            We apply the same fix for Chrome to Android.
    182            Chrome team will talk with upsteam about it.
    183            Just assume that whatever follows this combining character is within
    184            the same script.  This is incorrect if you had language1 + combining
    185            char + language 2, but that is rare and this code is suspicious
    186            anyway.
    187            END android-changed */
    188         continue;
    189       } else {
    190         *iter = prev_iter;
    191         cps--;
    192         break;
    193       }
    194     }
    195   }
    196 
    197   if (output->script == HB_Script_Inherited)
    198     output->script = HB_Script_Common;
    199 
    200   output->pos = *iter + 1;
    201   output->length = ending_index - *iter;
    202   if (num_code_points)
    203     *num_code_points = cps;
    204   return 1;
    205 }
    206 
    207 static int
    208 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
    209   const uint32_t key = (uint32_t) (intptr_t) vkey;
    210   const struct grapheme_break_property *candidate = vcandidate;
    211 
    212   if (key < candidate->range_start) {
    213     return -1;
    214   } else if (key > candidate->range_end) {
    215     return 1;
    216   } else {
    217     return 0;
    218   }
    219 }
    220 
    221 HB_GraphemeClass
    222 HB_GetGraphemeClass(HB_UChar32 ch) {
    223   const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
    224                               grapheme_break_properties_count,
    225                               sizeof(struct grapheme_break_property),
    226                               grapheme_break_property_cmp);
    227   if (!vprop)
    228     return HB_Grapheme_Other;
    229 
    230   return ((const struct grapheme_break_property *) vprop)->klass;
    231 }
    232 
    233 HB_WordClass
    234 HB_GetWordClass(HB_UChar32 ch) {
    235   abort();
    236   return 0;
    237 }
    238 
    239 HB_SentenceClass
    240 HB_GetSentenceClass(HB_UChar32 ch) {
    241   abort();
    242   return 0;
    243 }
    244 
    245 void
    246 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
    247   *gclass = HB_GetGraphemeClass(ch);
    248   *breakclass = HB_GetLineBreakClass(ch);
    249 }
    250 
    251 static int
    252 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
    253   const uint32_t key = (uint32_t) (intptr_t) vkey;
    254   const struct mirroring_property *candidate = vcandidate;
    255 
    256   if (key < candidate->a) {
    257     return -1;
    258   } else if (key > candidate->a) {
    259     return 1;
    260   } else {
    261     return 0;
    262   }
    263 }
    264 
    265 HB_UChar16
    266 HB_GetMirroredChar(HB_UChar16 ch) {
    267   const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
    268                               mirroring_properties_count,
    269                               sizeof(struct mirroring_property),
    270                               mirroring_property_cmp);
    271   if (!mprop)
    272     return ch;
    273 
    274   return ((const struct mirroring_property *) mprop)->b;
    275 }
    276 
    277 void *
    278 HB_Library_Resolve(const char *library, int version, const char *symbol) {
    279   abort();
    280   return NULL;
    281 }
    282