Home | History | Annotate | Download | only in contrib
      1 #include <stdint.h>
      2 #include <stdlib.h>
      3 
      4 #include <harfbuzz-external.h>
      5 #include <harfbuzz-impl.h>
      6 #include <harfbuzz-shaper.h>
      7 #include "harfbuzz-unicode.h"
      8 
      9 #include "tables/grapheme-break-properties.h"
     10 #include "tables/mirroring-properties.h"
     11 #include "tables/script-properties.h"
     12 
     13 uint32_t
     14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
     15   const uint16_t v = chars[(*iter)++];
     16   if (HB_IsHighSurrogate(v)) {
     17     // surrogate pair
     18     if (*iter >= len) {
     19       // the surrogate is incomplete.
     20       return HB_InvalidCodePoint;
     21     }
     22     const uint16_t v2 = chars[(*iter)++];
     23     if (!HB_IsLowSurrogate(v2)) {
     24       // invalidate surrogate pair.
     25       return HB_InvalidCodePoint;
     26     }
     27 
     28     return HB_SurrogateToUcs4(v, v2);
     29   }
     30 
     31   if (HB_IsLowSurrogate(v)) {
     32     // this isn't a valid code point
     33     return HB_InvalidCodePoint;
     34   }
     35 
     36   return v;
     37 }
     38 
     39 uint32_t
     40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
     41   const uint16_t v = chars[(*iter)--];
     42   if (HB_IsLowSurrogate(v)) {
     43     // surrogate pair
     44     if (*iter < 0) {
     45       // the surrogate is incomplete.
     46       return HB_InvalidCodePoint;
     47     }
     48     const uint16_t v2 = chars[(*iter)--];
     49     if (!HB_IsHighSurrogate(v2)) {
     50       // invalidate surrogate pair.
     51       return HB_InvalidCodePoint;
     52     }
     53 
     54     return HB_SurrogateToUcs4(v2, v);
     55   }
     56 
     57   if (HB_IsHighSurrogate(v)) {
     58     // this isn't a valid code point
     59     return HB_InvalidCodePoint;
     60   }
     61 
     62   return v;
     63 }
     64 
     65 static int
     66 script_property_cmp(const void *vkey, const void *vcandidate) {
     67   const uint32_t key = (uint32_t) (intptr_t) vkey;
     68   const struct script_property *candidate = vcandidate;
     69 
     70   if (key < candidate->range_start) {
     71     return -1;
     72   } else if (key > candidate->range_end) {
     73     return 1;
     74   } else {
     75     return 0;
     76   }
     77 }
     78 
     79 HB_Script
     80 code_point_to_script(uint32_t cp) {
     81  /* BEGIN android-changed
     82     For the purpose of aggregating script runs together, we treat space
     83     as belonging to the same script as surrounding characters. This is a
     84     performance optimization to keep the number of runs down. */
     85   if (cp == ' ') return HB_Script_Inherited;
     86   /* END android-changed */
     87 
     88   const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
     89                               script_properties_count,
     90                               sizeof(struct script_property),
     91                               script_property_cmp);
     92   if (!vprop)
     93     return HB_Script_Common;
     94 
     95   return ((const struct script_property *) vprop)->script;
     96 }
     97 
     98 char
     99 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
    100                          const uint16_t *chars, size_t len, ssize_t *iter) {
    101   if (*iter == len)
    102     return 0;
    103 
    104   output->pos = *iter;
    105   const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
    106   unsigned cps = 1;
    107   if (init_cp == HB_InvalidCodePoint)
    108     return 0;
    109   const HB_Script init_script = code_point_to_script(init_cp);
    110   HB_Script current_script = init_script;
    111   output->script = init_script;
    112 
    113   for (;;) {
    114     if (*iter == len)
    115       break;
    116     const ssize_t prev_iter = *iter;
    117     const uint32_t cp = utf16_to_code_point(chars, len, iter);
    118     if (cp == HB_InvalidCodePoint)
    119       return 0;
    120     cps++;
    121     const HB_Script script = code_point_to_script(cp);
    122 
    123     if (script != current_script) {
    124         /* BEGIN android-changed
    125            The condition was not correct by doing "a == b == constant"
    126            END android-changed */
    127       if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
    128         // If we started off as inherited, we take whatever we can find.
    129         output->script = script;
    130         current_script = script;
    131         continue;
    132       } else if (script == HB_Script_Inherited) {
    133         continue;
    134       } else {
    135         *iter = prev_iter;
    136         cps--;
    137         break;
    138       }
    139     }
    140   }
    141 
    142   if (output->script == HB_Script_Inherited)
    143     output->script = HB_Script_Common;
    144 
    145   output->length = *iter - output->pos;
    146   if (num_code_points)
    147     *num_code_points = cps;
    148   return 1;
    149 }
    150 
    151 char
    152 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
    153                          const uint16_t *chars, size_t len, ssize_t *iter) {
    154   if (*iter == (size_t) -1)
    155     return 0;
    156 
    157   const size_t ending_index = *iter;
    158   const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
    159   unsigned cps = 1;
    160   if (init_cp == HB_InvalidCodePoint)
    161     return 0;
    162   const HB_Script init_script = code_point_to_script(init_cp);
    163   HB_Script current_script = init_script;
    164   output->script = init_script;
    165 
    166   for (;;) {
    167     if (*iter < 0)
    168       break;
    169     const ssize_t prev_iter = *iter;
    170     const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
    171     if (cp == HB_InvalidCodePoint)
    172       return 0;
    173     cps++;
    174     const HB_Script script = code_point_to_script(cp);
    175 
    176     if (script != current_script) {
    177       if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
    178         // If we started off as inherited, we take whatever we can find.
    179         output->script = script;
    180         current_script = script;
    181         continue;
    182       } else if (script == HB_Script_Inherited) {
    183         /* BEGIN android-changed
    184            We apply the same fix for Chrome to Android.
    185            Chrome team will talk with upsteam about it.
    186            Just assume that whatever follows this combining character is within
    187            the same script.  This is incorrect if you had language1 + combining
    188            char + language 2, but that is rare and this code is suspicious
    189            anyway.
    190            END android-changed */
    191         continue;
    192       } else {
    193         *iter = prev_iter;
    194         cps--;
    195         break;
    196       }
    197     }
    198   }
    199 
    200   if (output->script == HB_Script_Inherited)
    201     output->script = HB_Script_Common;
    202 
    203   output->pos = *iter + 1;
    204   output->length = ending_index - *iter;
    205   if (num_code_points)
    206     *num_code_points = cps;
    207   return 1;
    208 }
    209 
    210 static int
    211 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
    212   const uint32_t key = (uint32_t) (intptr_t) vkey;
    213   const struct grapheme_break_property *candidate = vcandidate;
    214 
    215   if (key < candidate->range_start) {
    216     return -1;
    217   } else if (key > candidate->range_end) {
    218     return 1;
    219   } else {
    220     return 0;
    221   }
    222 }
    223 
    224 HB_GraphemeClass
    225 HB_GetGraphemeClass(HB_UChar32 ch) {
    226   const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
    227                               grapheme_break_properties_count,
    228                               sizeof(struct grapheme_break_property),
    229                               grapheme_break_property_cmp);
    230   if (!vprop)
    231     return HB_Grapheme_Other;
    232 
    233   return ((const struct grapheme_break_property *) vprop)->klass;
    234 }
    235 
    236 HB_WordClass
    237 HB_GetWordClass(HB_UChar32 ch) {
    238   abort();
    239   return 0;
    240 }
    241 
    242 HB_SentenceClass
    243 HB_GetSentenceClass(HB_UChar32 ch) {
    244   abort();
    245   return 0;
    246 }
    247 
    248 void
    249 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
    250   *gclass = HB_GetGraphemeClass(ch);
    251   *breakclass = HB_GetLineBreakClass(ch);
    252 }
    253 
    254 static int
    255 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
    256   const uint32_t key = (uint32_t) (intptr_t) vkey;
    257   const struct mirroring_property *candidate = vcandidate;
    258 
    259   if (key < candidate->a) {
    260     return -1;
    261   } else if (key > candidate->a) {
    262     return 1;
    263   } else {
    264     return 0;
    265   }
    266 }
    267 
    268 HB_UChar16
    269 HB_GetMirroredChar(HB_UChar16 ch) {
    270   const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
    271                               mirroring_properties_count,
    272                               sizeof(struct mirroring_property),
    273                               mirroring_property_cmp);
    274   if (!mprop)
    275     return ch;
    276 
    277   return ((const struct mirroring_property *) mprop)->b;
    278 }
    279 
    280 void *
    281 HB_Library_Resolve(const char *library, int version, const char *symbol) {
    282   abort();
    283   return NULL;
    284 }
    285