Home | History | Annotate | Download | only in contrib
      1 #include <stdint.h>
      2 #include <stdlib.h>
      3 
      4 #include <harfbuzz-external.h>
      5 #include <harfbuzz-impl.h>
      6 #include <harfbuzz-shaper.h>
      7 #include "harfbuzz-unicode.h"
      8 
      9 #include "tables/grapheme-break-properties.h"
     10 #include "tables/mirroring-properties.h"
     11 #include "tables/script-properties.h"
     12 
     13 uint32_t
     14 utf16_to_code_point(const uint16_t *chars, size_t len, ssize_t *iter) {
     15   const uint16_t v = chars[(*iter)++];
     16   if (HB_IsHighSurrogate(v)) {
     17     // surrogate pair
     18     if (*iter >= len) {
     19       // the surrogate is incomplete.
     20       return HB_InvalidCodePoint;
     21     }
     22     const uint16_t v2 = chars[(*iter)++];
     23     if (!HB_IsLowSurrogate(v2)) {
     24       // invalidate surrogate pair.
     25       return HB_InvalidCodePoint;
     26     }
     27 
     28     return HB_SurrogateToUcs4(v, v2);
     29   }
     30 
     31   if (HB_IsLowSurrogate(v)) {
     32     // this isn't a valid code point
     33     return HB_InvalidCodePoint;
     34   }
     35 
     36   return v;
     37 }
     38 
     39 uint32_t
     40 utf16_to_code_point_prev(const uint16_t *chars, size_t len, ssize_t *iter) {
     41   const uint16_t v = chars[(*iter)--];
     42   if (HB_IsLowSurrogate(v)) {
     43     // surrogate pair
     44     if (*iter < 0) {
     45       // the surrogate is incomplete.
     46       return HB_InvalidCodePoint;
     47     }
     48     const uint16_t v2 = chars[(*iter)--];
     49     if (!HB_IsHighSurrogate(v2)) {
     50       // invalidate surrogate pair.
     51       return HB_InvalidCodePoint;
     52     }
     53 
     54     return HB_SurrogateToUcs4(v2, v);
     55   }
     56 
     57   if (HB_IsHighSurrogate(v)) {
     58     // this isn't a valid code point
     59     return HB_InvalidCodePoint;
     60   }
     61 
     62   return v;
     63 }
     64 
     65 static int
     66 script_property_cmp(const void *vkey, const void *vcandidate) {
     67   const uint32_t key = (uint32_t) (intptr_t) vkey;
     68   const struct script_property *candidate = vcandidate;
     69 
     70   if (key < candidate->range_start) {
     71     return -1;
     72   } else if (key > candidate->range_end) {
     73     return 1;
     74   } else {
     75     return 0;
     76   }
     77 }
     78 
     79 HB_Script
     80 code_point_to_script(uint32_t cp) {
     81   const void *vprop = bsearch((void *) (intptr_t) cp, script_properties,
     82                               script_properties_count,
     83                               sizeof(struct script_property),
     84                               script_property_cmp);
     85   if (!vprop)
     86     return HB_Script_Common;
     87 
     88   return ((const struct script_property *) vprop)->script;
     89 }
     90 
     91 char
     92 hb_utf16_script_run_next(unsigned *num_code_points, HB_ScriptItem *output,
     93                          const uint16_t *chars, size_t len, ssize_t *iter) {
     94   if (*iter == len)
     95     return 0;
     96 
     97   output->pos = *iter;
     98   const uint32_t init_cp = utf16_to_code_point(chars, len, iter);
     99   unsigned cps = 1;
    100   if (init_cp == HB_InvalidCodePoint)
    101     return 0;
    102   const HB_Script init_script = code_point_to_script(init_cp);
    103   HB_Script current_script = init_script;
    104   output->script = init_script;
    105 
    106   for (;;) {
    107     if (*iter == len)
    108       break;
    109     const ssize_t prev_iter = *iter;
    110     const uint32_t cp = utf16_to_code_point(chars, len, iter);
    111     if (cp == HB_InvalidCodePoint)
    112       return 0;
    113     cps++;
    114     const HB_Script script = code_point_to_script(cp);
    115 
    116     if (script != current_script) {
    117       if (current_script == init_script == HB_Script_Inherited) {
    118         // If we started off as inherited, we take whatever we can find.
    119         output->script = script;
    120         current_script = script;
    121         continue;
    122       } else if (script == HB_Script_Inherited) {
    123         continue;
    124       } else {
    125         *iter = prev_iter;
    126         cps--;
    127         break;
    128       }
    129     }
    130   }
    131 
    132   if (output->script == HB_Script_Inherited)
    133     output->script = HB_Script_Common;
    134 
    135   output->length = *iter - output->pos;
    136   if (num_code_points)
    137     *num_code_points = cps;
    138   return 1;
    139 }
    140 
    141 char
    142 hb_utf16_script_run_prev(unsigned *num_code_points, HB_ScriptItem *output,
    143                          const uint16_t *chars, size_t len, ssize_t *iter) {
    144   if (*iter == (size_t) -1)
    145     return 0;
    146 
    147   const size_t ending_index = *iter;
    148   const uint32_t init_cp = utf16_to_code_point_prev(chars, len, iter);
    149   unsigned cps = 1;
    150   if (init_cp == HB_InvalidCodePoint)
    151     return 0;
    152   const HB_Script init_script = code_point_to_script(init_cp);
    153   HB_Script current_script = init_script;
    154   output->script = init_script;
    155 
    156   for (;;) {
    157     if (*iter < 0)
    158       break;
    159     const ssize_t prev_iter = *iter;
    160     const uint32_t cp = utf16_to_code_point_prev(chars, len, iter);
    161     if (cp == HB_InvalidCodePoint)
    162       return 0;
    163     cps++;
    164     const HB_Script script = code_point_to_script(cp);
    165 
    166     if (script != current_script) {
    167       if (current_script == init_script == HB_Script_Inherited) {
    168         // If we started off as inherited, we take whatever we can find.
    169         output->script = script;
    170         current_script = script;
    171         continue;
    172       } else if (script == HB_Script_Inherited) {
    173         /* BEGIN android-changed
    174            We apply the same fix for Chrome to Android.
    175            Chrome team will talk with upsteam about it.
    176            Just assume that whatever follows this combining character is within
    177            the same script.  This is incorrect if you had language1 + combining
    178            char + language 2, but that is rare and this code is suspicious
    179            anyway.
    180            END android-changed */
    181         continue;
    182       } else {
    183         *iter = prev_iter;
    184         cps--;
    185         break;
    186       }
    187     }
    188   }
    189 
    190   if (output->script == HB_Script_Inherited)
    191     output->script = HB_Script_Common;
    192 
    193   output->pos = *iter + 1;
    194   output->length = ending_index - *iter;
    195   if (num_code_points)
    196     *num_code_points = cps;
    197   return 1;
    198 }
    199 
    200 static int
    201 grapheme_break_property_cmp(const void *vkey, const void *vcandidate) {
    202   const uint32_t key = (uint32_t) (intptr_t) vkey;
    203   const struct grapheme_break_property *candidate = vcandidate;
    204 
    205   if (key < candidate->range_start) {
    206     return -1;
    207   } else if (key > candidate->range_end) {
    208     return 1;
    209   } else {
    210     return 0;
    211   }
    212 }
    213 
    214 HB_GraphemeClass
    215 HB_GetGraphemeClass(HB_UChar32 ch) {
    216   const void *vprop = bsearch((void *) (intptr_t) ch, grapheme_break_properties,
    217                               grapheme_break_properties_count,
    218                               sizeof(struct grapheme_break_property),
    219                               grapheme_break_property_cmp);
    220   if (!vprop)
    221     return HB_Grapheme_Other;
    222 
    223   return ((const struct grapheme_break_property *) vprop)->klass;
    224 }
    225 
    226 HB_WordClass
    227 HB_GetWordClass(HB_UChar32 ch) {
    228   abort();
    229   return 0;
    230 }
    231 
    232 HB_SentenceClass
    233 HB_GetSentenceClass(HB_UChar32 ch) {
    234   abort();
    235   return 0;
    236 }
    237 
    238 void
    239 HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *gclass, HB_LineBreakClass *breakclass) {
    240   *gclass = HB_GetGraphemeClass(ch);
    241   *breakclass = HB_GetLineBreakClass(ch);
    242 }
    243 
    244 static int
    245 mirroring_property_cmp(const void *vkey, const void *vcandidate) {
    246   const uint32_t key = (uint32_t) (intptr_t) vkey;
    247   const struct mirroring_property *candidate = vcandidate;
    248 
    249   if (key < candidate->a) {
    250     return -1;
    251   } else if (key > candidate->a) {
    252     return 1;
    253   } else {
    254     return 0;
    255   }
    256 }
    257 
    258 HB_UChar16
    259 HB_GetMirroredChar(HB_UChar16 ch) {
    260   const void *mprop = bsearch((void *) (intptr_t) ch, mirroring_properties,
    261                               mirroring_properties_count,
    262                               sizeof(struct mirroring_property),
    263                               mirroring_property_cmp);
    264   if (!mprop)
    265     return ch;
    266 
    267   return ((const struct mirroring_property *) mprop)->b;
    268 }
    269 
    270 void *
    271 HB_Library_Resolve(const char *library, int version, const char *symbol) {
    272   abort();
    273   return NULL;
    274 }
    275