Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2009,2010  Red Hat, Inc.
      3  * Copyright  2011,2012  Google, Inc.
      4  *
      5  *  This is part of HarfBuzz, a text shaping library.
      6  *
      7  * Permission is hereby granted, without written agreement and without
      8  * license or royalty fees, to use, copy, modify, and distribute this
      9  * software and its documentation for any purpose, provided that the
     10  * above copyright notice and the following two paragraphs appear in
     11  * all copies of this software.
     12  *
     13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17  * DAMAGE.
     18  *
     19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24  *
     25  * Red Hat Author(s): Behdad Esfahbod
     26  * Google Author(s): Behdad Esfahbod
     27  */
     28 
     29 #include "hb.hh"
     30 
     31 #include "hb-machinery.hh"
     32 
     33 #include <locale.h>
     34 #ifdef HAVE_XLOCALE_H
     35 #include <xlocale.h>
     36 #endif
     37 
     38 
     39 /**
     40  * SECTION:hb-common
     41  * @title: hb-common
     42  * @short_description: Common data types
     43  * @include: hb.h
     44  *
     45  * Common data types used across HarfBuzz are defined here.
     46  **/
     47 
     48 
     49 /* hb_options_t */
     50 
     51 hb_atomic_int_t _hb_options;
     52 
     53 void
     54 _hb_options_init ()
     55 {
     56   hb_options_union_t u;
     57   u.i = 0;
     58   u.opts.initialized = true;
     59 
     60   const char *c = getenv ("HB_OPTIONS");
     61   if (c)
     62   {
     63     while (*c)
     64     {
     65       const char *p = strchr (c, ':');
     66       if (!p)
     67         p = c + strlen (c);
     68 
     69 #define OPTION(name, symbol) \
     70 	if (0 == strncmp (c, name, p - c) && strlen (name) == p - c) u.opts.symbol = true;
     71 
     72       OPTION ("uniscribe-bug-compatible", uniscribe_bug_compatible);
     73       OPTION ("aat", aat);
     74 
     75 #undef OPTION
     76 
     77       c = *p ? p + 1 : p;
     78     }
     79 
     80   }
     81 
     82   /* This is idempotent and threadsafe. */
     83   _hb_options.set_relaxed (u.i);
     84 }
     85 
     86 
     87 /* hb_tag_t */
     88 
     89 /**
     90  * hb_tag_from_string:
     91  * @str: (array length=len) (element-type uint8_t):
     92  * @len:
     93  *
     94  *
     95  *
     96  * Return value:
     97  *
     98  * Since: 0.9.2
     99  **/
    100 hb_tag_t
    101 hb_tag_from_string (const char *str, int len)
    102 {
    103   char tag[4];
    104   unsigned int i;
    105 
    106   if (!str || !len || !*str)
    107     return HB_TAG_NONE;
    108 
    109   if (len < 0 || len > 4)
    110     len = 4;
    111   for (i = 0; i < (unsigned) len && str[i]; i++)
    112     tag[i] = str[i];
    113   for (; i < 4; i++)
    114     tag[i] = ' ';
    115 
    116   return HB_TAG (tag[0], tag[1], tag[2], tag[3]);
    117 }
    118 
    119 /**
    120  * hb_tag_to_string:
    121  * @tag:
    122  * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t):
    123  *
    124  *
    125  *
    126  * Since: 0.9.5
    127  **/
    128 void
    129 hb_tag_to_string (hb_tag_t tag, char *buf)
    130 {
    131   buf[0] = (char) (uint8_t) (tag >> 24);
    132   buf[1] = (char) (uint8_t) (tag >> 16);
    133   buf[2] = (char) (uint8_t) (tag >>  8);
    134   buf[3] = (char) (uint8_t) (tag >>  0);
    135 }
    136 
    137 
    138 /* hb_direction_t */
    139 
    140 const char direction_strings[][4] = {
    141   "ltr",
    142   "rtl",
    143   "ttb",
    144   "btt"
    145 };
    146 
    147 /**
    148  * hb_direction_from_string:
    149  * @str: (array length=len) (element-type uint8_t):
    150  * @len:
    151  *
    152  *
    153  *
    154  * Return value:
    155  *
    156  * Since: 0.9.2
    157  **/
    158 hb_direction_t
    159 hb_direction_from_string (const char *str, int len)
    160 {
    161   if (unlikely (!str || !len || !*str))
    162     return HB_DIRECTION_INVALID;
    163 
    164   /* Lets match loosely: just match the first letter, such that
    165    * all of "ltr", "left-to-right", etc work!
    166    */
    167   char c = TOLOWER (str[0]);
    168   for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
    169     if (c == direction_strings[i][0])
    170       return (hb_direction_t) (HB_DIRECTION_LTR + i);
    171 
    172   return HB_DIRECTION_INVALID;
    173 }
    174 
    175 /**
    176  * hb_direction_to_string:
    177  * @direction:
    178  *
    179  *
    180  *
    181  * Return value: (transfer none):
    182  *
    183  * Since: 0.9.2
    184  **/
    185 const char *
    186 hb_direction_to_string (hb_direction_t direction)
    187 {
    188   if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
    189 	      < ARRAY_LENGTH (direction_strings)))
    190     return direction_strings[direction - HB_DIRECTION_LTR];
    191 
    192   return "invalid";
    193 }
    194 
    195 
    196 /* hb_language_t */
    197 
    198 struct hb_language_impl_t {
    199   const char s[1];
    200 };
    201 
    202 static const char canon_map[256] = {
    203    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
    204    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
    205    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
    206   '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
    207    0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
    208   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
    209    0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
    210   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
    211 };
    212 
    213 static bool
    214 lang_equal (hb_language_t  v1,
    215 	    const void    *v2)
    216 {
    217   const unsigned char *p1 = (const unsigned char *) v1;
    218   const unsigned char *p2 = (const unsigned char *) v2;
    219 
    220   while (*p1 && *p1 == canon_map[*p2]) {
    221     p1++;
    222     p2++;
    223   }
    224 
    225   return *p1 == canon_map[*p2];
    226 }
    227 
    228 #if 0
    229 static unsigned int
    230 lang_hash (const void *key)
    231 {
    232   const unsigned char *p = key;
    233   unsigned int h = 0;
    234   while (canon_map[*p])
    235     {
    236       h = (h << 5) - h + canon_map[*p];
    237       p++;
    238     }
    239 
    240   return h;
    241 }
    242 #endif
    243 
    244 
    245 struct hb_language_item_t {
    246 
    247   struct hb_language_item_t *next;
    248   hb_language_t lang;
    249 
    250   bool operator == (const char *s) const
    251   { return lang_equal (lang, s); }
    252 
    253   hb_language_item_t & operator = (const char *s) {
    254     /* If a custom allocated is used calling strdup() pairs
    255     badly with a call to the custom free() in fini() below.
    256     Therefore don't call strdup(), implement its behavior.
    257     */
    258     size_t len = strlen(s) + 1;
    259     lang = (hb_language_t) malloc(len);
    260     if (likely (lang))
    261     {
    262       memcpy((unsigned char *) lang, s, len);
    263       for (unsigned char *p = (unsigned char *) lang; *p; p++)
    264 	*p = canon_map[*p];
    265     }
    266 
    267     return *this;
    268   }
    269 
    270   void fini () { free ((void *) lang); }
    271 };
    272 
    273 
    274 /* Thread-safe lock-free language list */
    275 
    276 static hb_atomic_ptr_t <hb_language_item_t> langs;
    277 
    278 #if HB_USE_ATEXIT
    279 static void
    280 free_langs ()
    281 {
    282 retry:
    283   hb_language_item_t *first_lang = langs;
    284   if (unlikely (!langs.cmpexch (first_lang, nullptr)))
    285     goto retry;
    286 
    287   while (first_lang) {
    288     hb_language_item_t *next = first_lang->next;
    289     first_lang->fini ();
    290     free (first_lang);
    291     first_lang = next;
    292   }
    293 }
    294 #endif
    295 
    296 static hb_language_item_t *
    297 lang_find_or_insert (const char *key)
    298 {
    299 retry:
    300   hb_language_item_t *first_lang = langs;
    301 
    302   for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
    303     if (*lang == key)
    304       return lang;
    305 
    306   /* Not found; allocate one. */
    307   hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
    308   if (unlikely (!lang))
    309     return nullptr;
    310   lang->next = first_lang;
    311   *lang = key;
    312   if (unlikely (!lang->lang))
    313   {
    314     free (lang);
    315     return nullptr;
    316   }
    317 
    318   if (unlikely (!langs.cmpexch (first_lang, lang)))
    319   {
    320     lang->fini ();
    321     free (lang);
    322     goto retry;
    323   }
    324 
    325 #if HB_USE_ATEXIT
    326   if (!first_lang)
    327     atexit (free_langs); /* First person registers atexit() callback. */
    328 #endif
    329 
    330   return lang;
    331 }
    332 
    333 
    334 /**
    335  * hb_language_from_string:
    336  * @str: (array length=len) (element-type uint8_t): a string representing
    337  *       a BCP47 language tag
    338  * @len: length of the @str, or -1 if it is %NULL-terminated.
    339  *
    340  * Converts @str representing a BCP47 language tag to the corresponding
    341  * #hb_language_t.
    342  *
    343  * Return value: (transfer none):
    344  * The #hb_language_t corresponding to the BCP47 language tag.
    345  *
    346  * Since: 0.9.2
    347  **/
    348 hb_language_t
    349 hb_language_from_string (const char *str, int len)
    350 {
    351   if (!str || !len || !*str)
    352     return HB_LANGUAGE_INVALID;
    353 
    354   hb_language_item_t *item = nullptr;
    355   if (len >= 0)
    356   {
    357     /* NUL-terminate it. */
    358     char strbuf[64];
    359     len = MIN (len, (int) sizeof (strbuf) - 1);
    360     memcpy (strbuf, str, len);
    361     strbuf[len] = '\0';
    362     item = lang_find_or_insert (strbuf);
    363   }
    364   else
    365     item = lang_find_or_insert (str);
    366 
    367   return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
    368 }
    369 
    370 /**
    371  * hb_language_to_string:
    372  * @language: an #hb_language_t to convert.
    373  *
    374  * See hb_language_from_string().
    375  *
    376  * Return value: (transfer none):
    377  * A %NULL-terminated string representing the @language. Must not be freed by
    378  * the caller.
    379  *
    380  * Since: 0.9.2
    381  **/
    382 const char *
    383 hb_language_to_string (hb_language_t language)
    384 {
    385   /* This is actually nullptr-safe! */
    386   return language->s;
    387 }
    388 
    389 /**
    390  * hb_language_get_default:
    391  *
    392  * Get default language from current locale.
    393  *
    394  * Note that the first time this function is called, it calls
    395  * "setlocale (LC_CTYPE, nullptr)" to fetch current locale.  The underlying
    396  * setlocale function is, in many implementations, NOT threadsafe.  To avoid
    397  * problems, call this function once before multiple threads can call it.
    398  * This function is only used from hb_buffer_guess_segment_properties() by
    399  * HarfBuzz itself.
    400  *
    401  * Return value: (transfer none):
    402  *
    403  * Since: 0.9.2
    404  **/
    405 hb_language_t
    406 hb_language_get_default ()
    407 {
    408   static hb_atomic_ptr_t <hb_language_t> default_language;
    409 
    410   hb_language_t language = default_language;
    411   if (unlikely (language == HB_LANGUAGE_INVALID))
    412   {
    413     language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1);
    414     (void) default_language.cmpexch (HB_LANGUAGE_INVALID, language);
    415   }
    416 
    417   return language;
    418 }
    419 
    420 
    421 /* hb_script_t */
    422 
    423 /**
    424  * hb_script_from_iso15924_tag:
    425  * @tag: an #hb_tag_t representing an ISO15924 tag.
    426  *
    427  * Converts an ISO15924 script tag to a corresponding #hb_script_t.
    428  *
    429  * Return value:
    430  * An #hb_script_t corresponding to the ISO15924 tag.
    431  *
    432  * Since: 0.9.2
    433  **/
    434 hb_script_t
    435 hb_script_from_iso15924_tag (hb_tag_t tag)
    436 {
    437   if (unlikely (tag == HB_TAG_NONE))
    438     return HB_SCRIPT_INVALID;
    439 
    440   /* Be lenient, adjust case (one capital letter followed by three small letters) */
    441   tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
    442 
    443   switch (tag) {
    444 
    445     /* These graduated from the 'Q' private-area codes, but
    446      * the old code is still aliased by Unicode, and the Qaai
    447      * one in use by ICU. */
    448     case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
    449     case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
    450 
    451     /* Script variants from https://unicode.org/iso15924/ */
    452     case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
    453     case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
    454     case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
    455     case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
    456     case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
    457     case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
    458   }
    459 
    460   /* If it looks right, just use the tag as a script */
    461   if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
    462     return (hb_script_t) tag;
    463 
    464   /* Otherwise, return unknown */
    465   return HB_SCRIPT_UNKNOWN;
    466 }
    467 
    468 /**
    469  * hb_script_from_string:
    470  * @str: (array length=len) (element-type uint8_t): a string representing an
    471  *       ISO15924 tag.
    472  * @len: length of the @str, or -1 if it is %NULL-terminated.
    473  *
    474  * Converts a string @str representing an ISO15924 script tag to a
    475  * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
    476  * hb_script_from_iso15924_tag().
    477  *
    478  * Return value:
    479  * An #hb_script_t corresponding to the ISO15924 tag.
    480  *
    481  * Since: 0.9.2
    482  **/
    483 hb_script_t
    484 hb_script_from_string (const char *str, int len)
    485 {
    486   return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
    487 }
    488 
    489 /**
    490  * hb_script_to_iso15924_tag:
    491  * @script: an #hb_script_ to convert.
    492  *
    493  * See hb_script_from_iso15924_tag().
    494  *
    495  * Return value:
    496  * An #hb_tag_t representing an ISO15924 script tag.
    497  *
    498  * Since: 0.9.2
    499  **/
    500 hb_tag_t
    501 hb_script_to_iso15924_tag (hb_script_t script)
    502 {
    503   return (hb_tag_t) script;
    504 }
    505 
    506 /**
    507  * hb_script_get_horizontal_direction:
    508  * @script:
    509  *
    510  *
    511  *
    512  * Return value:
    513  *
    514  * Since: 0.9.2
    515  **/
    516 hb_direction_t
    517 hb_script_get_horizontal_direction (hb_script_t script)
    518 {
    519   /* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */
    520   switch ((hb_tag_t) script)
    521   {
    522     /* Unicode-1.1 additions */
    523     case HB_SCRIPT_ARABIC:
    524     case HB_SCRIPT_HEBREW:
    525 
    526     /* Unicode-3.0 additions */
    527     case HB_SCRIPT_SYRIAC:
    528     case HB_SCRIPT_THAANA:
    529 
    530     /* Unicode-4.0 additions */
    531     case HB_SCRIPT_CYPRIOT:
    532 
    533     /* Unicode-4.1 additions */
    534     case HB_SCRIPT_KHAROSHTHI:
    535 
    536     /* Unicode-5.0 additions */
    537     case HB_SCRIPT_PHOENICIAN:
    538     case HB_SCRIPT_NKO:
    539 
    540     /* Unicode-5.1 additions */
    541     case HB_SCRIPT_LYDIAN:
    542 
    543     /* Unicode-5.2 additions */
    544     case HB_SCRIPT_AVESTAN:
    545     case HB_SCRIPT_IMPERIAL_ARAMAIC:
    546     case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
    547     case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
    548     case HB_SCRIPT_OLD_SOUTH_ARABIAN:
    549     case HB_SCRIPT_OLD_TURKIC:
    550     case HB_SCRIPT_SAMARITAN:
    551 
    552     /* Unicode-6.0 additions */
    553     case HB_SCRIPT_MANDAIC:
    554 
    555     /* Unicode-6.1 additions */
    556     case HB_SCRIPT_MEROITIC_CURSIVE:
    557     case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
    558 
    559     /* Unicode-7.0 additions */
    560     case HB_SCRIPT_MANICHAEAN:
    561     case HB_SCRIPT_MENDE_KIKAKUI:
    562     case HB_SCRIPT_NABATAEAN:
    563     case HB_SCRIPT_OLD_NORTH_ARABIAN:
    564     case HB_SCRIPT_PALMYRENE:
    565     case HB_SCRIPT_PSALTER_PAHLAVI:
    566 
    567     /* Unicode-8.0 additions */
    568     case HB_SCRIPT_HATRAN:
    569 
    570     /* Unicode-9.0 additions */
    571     case HB_SCRIPT_ADLAM:
    572 
    573     /* Unicode-11.0 additions */
    574     case HB_SCRIPT_HANIFI_ROHINGYA:
    575     case HB_SCRIPT_OLD_SOGDIAN:
    576     case HB_SCRIPT_SOGDIAN:
    577 
    578       return HB_DIRECTION_RTL;
    579 
    580 
    581     /* https://github.com/harfbuzz/harfbuzz/issues/1000 */
    582     case HB_SCRIPT_OLD_HUNGARIAN:
    583     case HB_SCRIPT_OLD_ITALIC:
    584     case HB_SCRIPT_RUNIC:
    585 
    586       return HB_DIRECTION_INVALID;
    587   }
    588 
    589   return HB_DIRECTION_LTR;
    590 }
    591 
    592 
    593 /* hb_user_data_array_t */
    594 
    595 bool
    596 hb_user_data_array_t::set (hb_user_data_key_t *key,
    597 			   void *              data,
    598 			   hb_destroy_func_t   destroy,
    599 			   hb_bool_t           replace)
    600 {
    601   if (!key)
    602     return false;
    603 
    604   if (replace) {
    605     if (!data && !destroy) {
    606       items.remove (key, lock);
    607       return true;
    608     }
    609   }
    610   hb_user_data_item_t item = {key, data, destroy};
    611   bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
    612 
    613   return ret;
    614 }
    615 
    616 void *
    617 hb_user_data_array_t::get (hb_user_data_key_t *key)
    618 {
    619   hb_user_data_item_t item = {nullptr, nullptr, nullptr};
    620 
    621   return items.find (key, &item, lock) ? item.data : nullptr;
    622 }
    623 
    624 
    625 /* hb_version */
    626 
    627 
    628 /**
    629  * SECTION:hb-version
    630  * @title: hb-version
    631  * @short_description: Information about the version of HarfBuzz in use
    632  * @include: hb.h
    633  *
    634  * These functions and macros allow accessing version of the HarfBuzz
    635  * library used at compile- as well as run-time, and to direct code
    636  * conditionally based on those versions, again, at compile- or run-time.
    637  **/
    638 
    639 
    640 /**
    641  * hb_version:
    642  * @major: (out): Library major version component.
    643  * @minor: (out): Library minor version component.
    644  * @micro: (out): Library micro version component.
    645  *
    646  * Returns library version as three integer components.
    647  *
    648  * Since: 0.9.2
    649  **/
    650 void
    651 hb_version (unsigned int *major,
    652 	    unsigned int *minor,
    653 	    unsigned int *micro)
    654 {
    655   *major = HB_VERSION_MAJOR;
    656   *minor = HB_VERSION_MINOR;
    657   *micro = HB_VERSION_MICRO;
    658 }
    659 
    660 /**
    661  * hb_version_string:
    662  *
    663  * Returns library version as a string with three components.
    664  *
    665  * Return value: library version string.
    666  *
    667  * Since: 0.9.2
    668  **/
    669 const char *
    670 hb_version_string ()
    671 {
    672   return HB_VERSION_STRING;
    673 }
    674 
    675 /**
    676  * hb_version_atleast:
    677  * @major:
    678  * @minor:
    679  * @micro:
    680  *
    681  *
    682  *
    683  * Return value:
    684  *
    685  * Since: 0.9.30
    686  **/
    687 hb_bool_t
    688 hb_version_atleast (unsigned int major,
    689 		    unsigned int minor,
    690 		    unsigned int micro)
    691 {
    692   return HB_VERSION_ATLEAST (major, minor, micro);
    693 }
    694 
    695 
    696 
    697 /* hb_feature_t and hb_variation_t */
    698 
    699 static bool
    700 parse_space (const char **pp, const char *end)
    701 {
    702   while (*pp < end && ISSPACE (**pp))
    703     (*pp)++;
    704   return true;
    705 }
    706 
    707 static bool
    708 parse_char (const char **pp, const char *end, char c)
    709 {
    710   parse_space (pp, end);
    711 
    712   if (*pp == end || **pp != c)
    713     return false;
    714 
    715   (*pp)++;
    716   return true;
    717 }
    718 
    719 static bool
    720 parse_uint (const char **pp, const char *end, unsigned int *pv)
    721 {
    722   char buf[32];
    723   unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
    724   strncpy (buf, *pp, len);
    725   buf[len] = '\0';
    726 
    727   char *p = buf;
    728   char *pend = p;
    729   unsigned int v;
    730 
    731   /* Intentionally use strtol instead of strtoul, such that
    732    * -1 turns into "big number"... */
    733   errno = 0;
    734   v = strtol (p, &pend, 0);
    735   if (errno || p == pend)
    736     return false;
    737 
    738   *pv = v;
    739   *pp += pend - p;
    740   return true;
    741 }
    742 
    743 static bool
    744 parse_uint32 (const char **pp, const char *end, uint32_t *pv)
    745 {
    746   char buf[32];
    747   unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
    748   strncpy (buf, *pp, len);
    749   buf[len] = '\0';
    750 
    751   char *p = buf;
    752   char *pend = p;
    753   unsigned int v;
    754 
    755   /* Intentionally use strtol instead of strtoul, such that
    756    * -1 turns into "big number"... */
    757   errno = 0;
    758   v = strtol (p, &pend, 0);
    759   if (errno || p == pend)
    760     return false;
    761 
    762   *pv = v;
    763   *pp += pend - p;
    764   return true;
    765 }
    766 
    767 #if defined (HAVE_NEWLOCALE) && defined (HAVE_STRTOD_L)
    768 #define USE_XLOCALE 1
    769 #define HB_LOCALE_T locale_t
    770 #define HB_CREATE_LOCALE(locName) newlocale (LC_ALL_MASK, locName, nullptr)
    771 #define HB_FREE_LOCALE(loc) freelocale (loc)
    772 #elif defined(_MSC_VER)
    773 #define USE_XLOCALE 1
    774 #define HB_LOCALE_T _locale_t
    775 #define HB_CREATE_LOCALE(locName) _create_locale (LC_ALL, locName)
    776 #define HB_FREE_LOCALE(loc) _free_locale (loc)
    777 #define strtod_l(a, b, c) _strtod_l ((a), (b), (c))
    778 #endif
    779 
    780 #ifdef USE_XLOCALE
    781 
    782 #if HB_USE_ATEXIT
    783 static void free_static_C_locale ();
    784 #endif
    785 
    786 static struct hb_C_locale_lazy_loader_t : hb_lazy_loader_t<hb_remove_pointer (HB_LOCALE_T),
    787 							  hb_C_locale_lazy_loader_t>
    788 {
    789   static HB_LOCALE_T create ()
    790   {
    791     HB_LOCALE_T C_locale = HB_CREATE_LOCALE ("C");
    792 
    793 #if HB_USE_ATEXIT
    794     atexit (free_static_C_locale);
    795 #endif
    796 
    797     return C_locale;
    798   }
    799   static void destroy (HB_LOCALE_T p)
    800   {
    801     HB_FREE_LOCALE (p);
    802   }
    803   static HB_LOCALE_T get_null ()
    804   {
    805     return nullptr;
    806   }
    807 } static_C_locale;
    808 
    809 #if HB_USE_ATEXIT
    810 static
    811 void free_static_C_locale ()
    812 {
    813   static_C_locale.free_instance ();
    814 }
    815 #endif
    816 
    817 static HB_LOCALE_T
    818 get_C_locale ()
    819 {
    820   return static_C_locale.get_unconst ();
    821 }
    822 #endif /* USE_XLOCALE */
    823 
    824 static bool
    825 parse_float (const char **pp, const char *end, float *pv)
    826 {
    827   char buf[32];
    828   unsigned int len = MIN (ARRAY_LENGTH (buf) - 1, (unsigned int) (end - *pp));
    829   strncpy (buf, *pp, len);
    830   buf[len] = '\0';
    831 
    832   char *p = buf;
    833   char *pend = p;
    834   float v;
    835 
    836   errno = 0;
    837 #ifdef USE_XLOCALE
    838   v = strtod_l (p, &pend, get_C_locale ());
    839 #else
    840   v = strtod (p, &pend);
    841 #endif
    842   if (errno || p == pend)
    843     return false;
    844 
    845   *pv = v;
    846   *pp += pend - p;
    847   return true;
    848 }
    849 
    850 static bool
    851 parse_bool (const char **pp, const char *end, uint32_t *pv)
    852 {
    853   parse_space (pp, end);
    854 
    855   const char *p = *pp;
    856   while (*pp < end && ISALPHA(**pp))
    857     (*pp)++;
    858 
    859   /* CSS allows on/off as aliases 1/0. */
    860   if (*pp - p == 2 && 0 == strncmp (p, "on", 2))
    861     *pv = 1;
    862   else if (*pp - p == 3 && 0 == strncmp (p, "off", 3))
    863     *pv = 0;
    864   else
    865     return false;
    866 
    867   return true;
    868 }
    869 
    870 /* hb_feature_t */
    871 
    872 static bool
    873 parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature)
    874 {
    875   if (parse_char (pp, end, '-'))
    876     feature->value = 0;
    877   else {
    878     parse_char (pp, end, '+');
    879     feature->value = 1;
    880   }
    881 
    882   return true;
    883 }
    884 
    885 static bool
    886 parse_tag (const char **pp, const char *end, hb_tag_t *tag)
    887 {
    888   parse_space (pp, end);
    889 
    890   char quote = 0;
    891 
    892   if (*pp < end && (**pp == '\'' || **pp == '"'))
    893   {
    894     quote = **pp;
    895     (*pp)++;
    896   }
    897 
    898   const char *p = *pp;
    899   while (*pp < end && (ISALNUM(**pp) || **pp == '_'))
    900     (*pp)++;
    901 
    902   if (p == *pp || *pp - p > 4)
    903     return false;
    904 
    905   *tag = hb_tag_from_string (p, *pp - p);
    906 
    907   if (quote)
    908   {
    909     /* CSS expects exactly four bytes.  And we only allow quotations for
    910      * CSS compatibility.  So, enforce the length. */
    911      if (*pp - p != 4)
    912        return false;
    913     if (*pp == end || **pp != quote)
    914       return false;
    915     (*pp)++;
    916   }
    917 
    918   return true;
    919 }
    920 
    921 static bool
    922 parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature)
    923 {
    924   parse_space (pp, end);
    925 
    926   bool has_start;
    927 
    928   feature->start = HB_FEATURE_GLOBAL_START;
    929   feature->end = HB_FEATURE_GLOBAL_END;
    930 
    931   if (!parse_char (pp, end, '['))
    932     return true;
    933 
    934   has_start = parse_uint (pp, end, &feature->start);
    935 
    936   if (parse_char (pp, end, ':') || parse_char (pp, end, ';')) {
    937     parse_uint (pp, end, &feature->end);
    938   } else {
    939     if (has_start)
    940       feature->end = feature->start + 1;
    941   }
    942 
    943   return parse_char (pp, end, ']');
    944 }
    945 
    946 static bool
    947 parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature)
    948 {
    949   bool had_equal = parse_char (pp, end, '=');
    950   bool had_value = parse_uint32 (pp, end, &feature->value) ||
    951                    parse_bool (pp, end, &feature->value);
    952   /* CSS doesn't use equal-sign between tag and value.
    953    * If there was an equal-sign, then there *must* be a value.
    954    * A value without an equal-sign is ok, but not required. */
    955   return !had_equal || had_value;
    956 }
    957 
    958 static bool
    959 parse_one_feature (const char **pp, const char *end, hb_feature_t *feature)
    960 {
    961   return parse_feature_value_prefix (pp, end, feature) &&
    962 	 parse_tag (pp, end, &feature->tag) &&
    963 	 parse_feature_indices (pp, end, feature) &&
    964 	 parse_feature_value_postfix (pp, end, feature) &&
    965 	 parse_space (pp, end) &&
    966 	 *pp == end;
    967 }
    968 
    969 /**
    970  * hb_feature_from_string:
    971  * @str: (array length=len) (element-type uint8_t): a string to parse
    972  * @len: length of @str, or -1 if string is %NULL terminated
    973  * @feature: (out): the #hb_feature_t to initialize with the parsed values
    974  *
    975  * Parses a string into a #hb_feature_t.
    976  *
    977  * TODO: document the syntax here.
    978  *
    979  * Return value:
    980  * %true if @str is successfully parsed, %false otherwise.
    981  *
    982  * Since: 0.9.5
    983  **/
    984 hb_bool_t
    985 hb_feature_from_string (const char *str, int len,
    986 			hb_feature_t *feature)
    987 {
    988   hb_feature_t feat;
    989 
    990   if (len < 0)
    991     len = strlen (str);
    992 
    993   if (likely (parse_one_feature (&str, str + len, &feat)))
    994   {
    995     if (feature)
    996       *feature = feat;
    997     return true;
    998   }
    999 
   1000   if (feature)
   1001     memset (feature, 0, sizeof (*feature));
   1002   return false;
   1003 }
   1004 
   1005 /**
   1006  * hb_feature_to_string:
   1007  * @feature: an #hb_feature_t to convert
   1008  * @buf: (array length=size) (out): output string
   1009  * @size: the allocated size of @buf
   1010  *
   1011  * Converts a #hb_feature_t into a %NULL-terminated string in the format
   1012  * understood by hb_feature_from_string(). The client in responsible for
   1013  * allocating big enough size for @buf, 128 bytes is more than enough.
   1014  *
   1015  * Since: 0.9.5
   1016  **/
   1017 void
   1018 hb_feature_to_string (hb_feature_t *feature,
   1019 		      char *buf, unsigned int size)
   1020 {
   1021   if (unlikely (!size)) return;
   1022 
   1023   char s[128];
   1024   unsigned int len = 0;
   1025   if (feature->value == 0)
   1026     s[len++] = '-';
   1027   hb_tag_to_string (feature->tag, s + len);
   1028   len += 4;
   1029   while (len && s[len - 1] == ' ')
   1030     len--;
   1031   if (feature->start != 0 || feature->end != (unsigned int) -1)
   1032   {
   1033     s[len++] = '[';
   1034     if (feature->start)
   1035       len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start));
   1036     if (feature->end != feature->start + 1) {
   1037       s[len++] = ':';
   1038       if (feature->end != (unsigned int) -1)
   1039 	len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end));
   1040     }
   1041     s[len++] = ']';
   1042   }
   1043   if (feature->value > 1)
   1044   {
   1045     s[len++] = '=';
   1046     len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
   1047   }
   1048   assert (len < ARRAY_LENGTH (s));
   1049   len = MIN (len, size - 1);
   1050   memcpy (buf, s, len);
   1051   buf[len] = '\0';
   1052 }
   1053 
   1054 /* hb_variation_t */
   1055 
   1056 static bool
   1057 parse_variation_value (const char **pp, const char *end, hb_variation_t *variation)
   1058 {
   1059   parse_char (pp, end, '='); /* Optional. */
   1060   return parse_float (pp, end, &variation->value);
   1061 }
   1062 
   1063 static bool
   1064 parse_one_variation (const char **pp, const char *end, hb_variation_t *variation)
   1065 {
   1066   return parse_tag (pp, end, &variation->tag) &&
   1067 	 parse_variation_value (pp, end, variation) &&
   1068 	 parse_space (pp, end) &&
   1069 	 *pp == end;
   1070 }
   1071 
   1072 /**
   1073  * hb_variation_from_string:
   1074  *
   1075  * Since: 1.4.2
   1076  */
   1077 hb_bool_t
   1078 hb_variation_from_string (const char *str, int len,
   1079 			  hb_variation_t *variation)
   1080 {
   1081   hb_variation_t var;
   1082 
   1083   if (len < 0)
   1084     len = strlen (str);
   1085 
   1086   if (likely (parse_one_variation (&str, str + len, &var)))
   1087   {
   1088     if (variation)
   1089       *variation = var;
   1090     return true;
   1091   }
   1092 
   1093   if (variation)
   1094     memset (variation, 0, sizeof (*variation));
   1095   return false;
   1096 }
   1097 
   1098 /**
   1099  * hb_variation_to_string:
   1100  *
   1101  * Since: 1.4.2
   1102  */
   1103 void
   1104 hb_variation_to_string (hb_variation_t *variation,
   1105 			char *buf, unsigned int size)
   1106 {
   1107   if (unlikely (!size)) return;
   1108 
   1109   char s[128];
   1110   unsigned int len = 0;
   1111   hb_tag_to_string (variation->tag, s + len);
   1112   len += 4;
   1113   while (len && s[len - 1] == ' ')
   1114     len--;
   1115   s[len++] = '=';
   1116   len += MAX (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", (double) variation->value));
   1117 
   1118   assert (len < ARRAY_LENGTH (s));
   1119   len = MIN (len, size - 1);
   1120   memcpy (buf, s, len);
   1121   buf[len] = '\0';
   1122 }
   1123 
   1124 /* If there is no visibility control, then hb-static.cc will NOT
   1125  * define anything.  Instead, we get it to define one set in here
   1126  * only, so only libharfbuzz.so defines them, not other libs. */
   1127 #ifdef HB_NO_VISIBILITY
   1128 #undef HB_NO_VISIBILITY
   1129 #include "hb-static.cc"
   1130 #define HB_NO_VISIBILITY 1
   1131 #endif
   1132