Home | History | Annotate | Download | only in contrib
      1 #include <stdint.h>
      2 #include <stdlib.h>
      3 
      4 #include <harfbuzz-external.h>
      5 #include <harfbuzz-impl.h>
      6 #include <harfbuzz-shaper.h>
      7 #include "harfbuzz-unicode.h"
      8 
      9 #include "tables/grapheme-break-properties.h"
     10 #include "tables/mirroring-properties.h"
     11 #include "tables/script-properties.h"
     12 
     13 uint32_t
     14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
     15   const uint16_t v = chars[(*iter)++];
     16   if (HB_IsHighSurrogate(v)) {
     17     // surrogate pair
     18     if (*iter >= len) {
     19       // the surrogate is incomplete.
     20       return HB_InvalidCodePoint;
     21     }
     22     const uint16_t v2 = chars[(*iter)++];
     23     if (!HB_IsLowSurrogate(v2)) {
     24       // invalidate surrogate pair.
     25       return HB_InvalidCodePoint;
     26     }
     27 
     28     return HB_SurrogateToUcs4(v, v2);
     29   }
     30 
     31   if (HB_IsLowSurrogate(v)) {
     32     // this isn't a valid code point
     33     return HB_InvalidCodePoint;
     34   }
     35 
     36   return v;
     37 }
     38 
     39 uint32_t
     40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
     41   const uint16_t v = chars[(*iter)--];
     42   if (HB_IsLowSurrogate(v)) {
     43     // surrogate pair
     44     if (*iter < 0) {
     45       // the surrogate is incomplete.
     46       return HB_InvalidCodePoint;
     47     }
     48     const uint16_t v2 = chars[(*iter)--];
     49     if (!HB_IsHighSurrogate(v2)) {
     50       // invalidate surrogate pair.
     51       return HB_InvalidCodePoint;
     52     }
     53 
     54     return HB_SurrogateToUcs4(v2, v);
     55   }
     56 
     57   if (HB_IsHighSurrogate(v)) {
     58     // this isn't a valid code point
     59     return HB_InvalidCodePoint;
     60   }
     61 
     62   return v;
     63 }
     64 
     65 static int
     66 script_property_cmp(const void *vkey, const void *vcandidate) {
     67   const uint32_t key = (uint32_t) (intptr_t) vkey;
     68   const struct script_property *candidate = vcandidate;
     69 
     70   if (key < candidate->range_start) {
     71     return -1;
     72   } else if (key > candidate->range_end) {
     73     return 1;
     74   } else {
     75     return 0;
     76   }
     77 }
     78 
     79 HB_Script
     80 code_point_to_script(uint32_t cp) {
     81   const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
     82                               script_properties_count,
     83                               sizeof(struct script_property),
     84                               script_property_cmp);
     85   if (!vprop)
     86     return HB_Script_Common;
     87 
     88   return ((const struct script_property *) vprop)->script;
     89 }
     90 
     91 char
     92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
     93                          const uint16_t *chars, size_t len, ssize_t *iter) {
     94   if (*iter == len)
     95     return 0;
     96 
     97   output->pos = *iter;
     98   const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
     99   unsigned cps = 1;
    100   if (init_cp == HB_InvalidCodePoint)
    101     return 0;
    102   const HB_Script init_script = code_point_to_script(init_cp);
    103   HB_Script current_script = init_script;
    104   output->script = init_script;
    105 
    106   for (;;) {
    107     if (*iter == len)
    108       break;
    109     const ssize_t prev_iter = *iter;
    110     const uint32_t cp = utf16_to_code_point(chars, len, iter);
    111     if (cp == HB_InvalidCodePoint)
    112       return 0;
    113     cps++;
    114     const HB_Script script = code_point_to_script(cp);
    115 
    116     if (script != current_script) {
    117       if (current_script == init_script == HB_Script_Inherited) {
    118         // If we started off as inherited, we take whatever we can find.
    119         output->script = script;
    120         current_script = script;
    121         continue;
    122       } else if (script == HB_Script_Inherited) {
    123         continue;
    124       } else {
    125         *iter = prev_iter;
    126         cps--;
    127         break;
    128       }
    129     }
    130   }
    131 
    132   if (output->script == HB_Script_Inherited)
    133     output->script = HB_Script_Common;
    134 
    135   output->length = *iter - output->pos;
    136   if (num_code_points)
    137     *num_code_points = cps;
    138   return 1;
    139 }
    140 
    141 char
    142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
    143                          const uint16_t *chars, size_t len, ssize_t *iter) {
    144   if (*iter == (size_t) -1)
    145     return 0;
    146 
    147   const size_t ending_index = *iter;
    148   const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
    149   unsigned cps = 1;
    150   if (init_cp == HB_InvalidCodePoint)
    151     return 0;
    152   const HB_Script init_script = code_point_to_script(init_cp);
    153   HB_Script current_script = init_script;
    154   output->script = init_script;
    155 
    156   for (;;) {
    157     if (*iter < 0)
    158       break;
    159     const ssize_t prev_iter = *iter;
    160     const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
    161     if (cp == HB_InvalidCodePoint)
    162       return 0;
    163     cps++;
    164     const HB_Script script = code_point_to_script(cp);
    165 
    166     if (script != current_script) {
    167         /* BEGIN android-changed
    168            The condition was not correct by doing "a == b == constant"
    169            END android-changed */
    170       if (current_script == HB_Script_Inherited && init_script == HB_Script_Inherited) {
    171         // If we started off as inherited, we take whatever we can find.
    172         output->script = script;
    173         current_script = script;
    174         continue;
    175       } else if (script == HB_Script_Inherited) {
    176         /* BEGIN android-changed
    177            We apply the same fix for Chrome to Android.
    178            Chrome team will talk with upsteam about it.
    179            Just assume that whatever follows this combining character is within
    180            the same script.  This is incorrect if you had language1 + combining
    181            char + language 2, but that is rare and this code is suspicious
    182            anyway.
    183            END android-changed */
    184         continue;
    185       } else {
    186         *iter = prev_iter;
    187         cps--;
    188         break;
    189       }
    190     }
    191   }
    192 
    193   if (output->script == HB_Script_Inherited)
    194     output->script = HB_Script_Common;
    195 
    196   output->pos = *iter + 1;
    197   output->length = ending_index - *iter;
    198   if (num_code_points)
    199     *num_code_points = cps;
    200   return 1;
    201 }
    202 
    203 static int
    204 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
    205   const uint32_t key = (uint32_t) (intptr_t) vkey;
    206   const struct grapheme_break_property *candidate = vcandidate;
    207 
    208   if (key < candidate->range_start) {
    209     return -1;
    210   } else if (key > candidate->range_end) {
    211     return 1;
    212   } else {
    213     return 0;
    214   }
    215 }
    216 
    217 HB_GraphemeClass
    218 HB_GetGraphemeClass(HB_UChar32 ch) {
    219   const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
    220                               grapheme_break_properties_count,
    221                               sizeof(struct grapheme_break_property),
    222                               grapheme_break_property_cmp);
    223   if (!vprop)
    224     return HB_Grapheme_Other;
    225 
    226   return ((const struct grapheme_break_property *) vprop)->klass;
    227 }
    228 
    229 HB_WordClass
    230 HB_GetWordClass(HB_UChar32 ch) {
    231   abort();
    232   return 0;
    233 }
    234 
    235 HB_SentenceClass
    236 HB_GetSentenceClass(HB_UChar32 ch) {
    237   abort();
    238   return 0;
    239 }
    240 
    241 void
    242 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
    243   *gclass = HB_GetGraphemeClass(ch);
    244   *breakclass = HB_GetLineBreakClass(ch);
    245 }
    246 
    247 static int
    248 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
    249   const uint32_t key = (uint32_t) (intptr_t) vkey;
    250   const struct mirroring_property *candidate = vcandidate;
    251 
    252   if (key < candidate->a) {
    253     return -1;
    254   } else if (key > candidate->a) {
    255     return 1;
    256   } else {
    257     return 0;
    258   }
    259 }
    260 
    261 HB_UChar16
    262 HB_GetMirroredChar(HB_UChar16 ch) {
    263   const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
    264                               mirroring_properties_count,
    265                               sizeof(struct mirroring_property),
    266                               mirroring_property_cmp);
    267   if (!mprop)
    268     return ch;
    269 
    270   return ((const struct mirroring_property *) mprop)->b;
    271 }
    272 
    273 void *
    274 HB_Library_Resolve(const char *library, int version, const char *symbol) {
    275   abort();
    276   return NULL;
    277 }
    278