Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2009,2010  Red Hat, Inc.
      3  * Copyright  2011,2012  Google, Inc.
      4  *
      5  *  This is part of HarfBuzz, a text shaping library.
      6  *
      7  * Permission is hereby granted, without written agreement and without
      8  * license or royalty fees, to use, copy, modify, and distribute this
      9  * software and its documentation for any purpose, provided that the
     10  * above copyright notice and the following two paragraphs appear in
     11  * all copies of this software.
     12  *
     13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17  * DAMAGE.
     18  *
     19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24  *
     25  * Red Hat Author(s): Behdad Esfahbod
     26  * Google Author(s): Behdad Esfahbod
     27  */
     28 
     29 #include "hb-private.hh"
     30 
     31 #include "hb-version.h"
     32 
     33 #include "hb-mutex-private.hh"
     34 #include "hb-object-private.hh"
     35 
     36 #include <locale.h>
     37 
     38 
     39 /* hb_options_t */
     40 
     41 hb_options_union_t _hb_options;
     42 
     43 void
     44 _hb_options_init (void)
     45 {
     46   hb_options_union_t u;
     47   u.i = 0;
     48   u.opts.initialized = 1;
     49 
     50   char *c = getenv ("HB_OPTIONS");
     51   u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
     52 
     53   /* This is idempotent and threadsafe. */
     54   _hb_options = u;
     55 }
     56 
     57 
     58 /* hb_tag_t */
     59 
     60 hb_tag_t
     61 hb_tag_from_string (const char *s, int len)
     62 {
     63   char tag[4];
     64   unsigned int i;
     65 
     66   if (!s || !len || !*s)
     67     return HB_TAG_NONE;
     68 
     69   if (len < 0 || len > 4)
     70     len = 4;
     71   for (i = 0; i < (unsigned) len && s[i]; i++)
     72     tag[i] = s[i];
     73   for (; i < 4; i++)
     74     tag[i] = ' ';
     75 
     76   return HB_TAG_CHAR4 (tag);
     77 }
     78 
     79 void
     80 hb_tag_to_string (hb_tag_t tag, char *buf)
     81 {
     82   buf[0] = (char) (uint8_t) (tag >> 24);
     83   buf[1] = (char) (uint8_t) (tag >> 16);
     84   buf[2] = (char) (uint8_t) (tag >>  8);
     85   buf[3] = (char) (uint8_t) (tag >>  0);
     86 }
     87 
     88 
     89 /* hb_direction_t */
     90 
     91 const char direction_strings[][4] = {
     92   "ltr",
     93   "rtl",
     94   "ttb",
     95   "btt"
     96 };
     97 
     98 hb_direction_t
     99 hb_direction_from_string (const char *str, int len)
    100 {
    101   if (unlikely (!str || !len || !*str))
    102     return HB_DIRECTION_INVALID;
    103 
    104   /* Lets match loosely: just match the first letter, such that
    105    * all of "ltr", "left-to-right", etc work!
    106    */
    107   char c = TOLOWER (str[0]);
    108   for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
    109     if (c == direction_strings[i][0])
    110       return (hb_direction_t) (HB_DIRECTION_LTR + i);
    111 
    112   return HB_DIRECTION_INVALID;
    113 }
    114 
    115 const char *
    116 hb_direction_to_string (hb_direction_t direction)
    117 {
    118   if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
    119 	      < ARRAY_LENGTH (direction_strings)))
    120     return direction_strings[direction - HB_DIRECTION_LTR];
    121 
    122   return "invalid";
    123 }
    124 
    125 
    126 /* hb_language_t */
    127 
    128 struct hb_language_impl_t {
    129   const char s[1];
    130 };
    131 
    132 static const char canon_map[256] = {
    133    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
    134    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
    135    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
    136   '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
    137   '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
    138   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
    139    0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
    140   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
    141 };
    142 
    143 static hb_bool_t
    144 lang_equal (hb_language_t  v1,
    145 	    const void    *v2)
    146 {
    147   const unsigned char *p1 = (const unsigned char *) v1;
    148   const unsigned char *p2 = (const unsigned char *) v2;
    149 
    150   while (*p1 && *p1 == canon_map[*p2])
    151     p1++, p2++;
    152 
    153   return *p1 == canon_map[*p2];
    154 }
    155 
    156 #if 0
    157 static unsigned int
    158 lang_hash (const void *key)
    159 {
    160   const unsigned char *p = key;
    161   unsigned int h = 0;
    162   while (canon_map[*p])
    163     {
    164       h = (h << 5) - h + canon_map[*p];
    165       p++;
    166     }
    167 
    168   return h;
    169 }
    170 #endif
    171 
    172 
    173 struct hb_language_item_t {
    174 
    175   struct hb_language_item_t *next;
    176   hb_language_t lang;
    177 
    178   inline bool operator == (const char *s) const {
    179     return lang_equal (lang, s);
    180   }
    181 
    182   inline hb_language_item_t & operator = (const char *s) {
    183     lang = (hb_language_t) strdup (s);
    184     for (unsigned char *p = (unsigned char *) lang; *p; p++)
    185       *p = canon_map[*p];
    186 
    187     return *this;
    188   }
    189 
    190   void finish (void) { free (lang); }
    191 };
    192 
    193 
    194 /* Thread-safe lock-free language list */
    195 
    196 static hb_language_item_t *langs;
    197 
    198 static inline
    199 void free_langs (void)
    200 {
    201   while (langs) {
    202     hb_language_item_t *next = langs->next;
    203     langs->finish ();
    204     free (langs);
    205     langs = next;
    206   }
    207 }
    208 
    209 static hb_language_item_t *
    210 lang_find_or_insert (const char *key)
    211 {
    212 retry:
    213   hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
    214 
    215   for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
    216     if (*lang == key)
    217       return lang;
    218 
    219   /* Not found; allocate one. */
    220   hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
    221   if (unlikely (!lang))
    222     return NULL;
    223   lang->next = first_lang;
    224   *lang = key;
    225 
    226   if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
    227     free (lang);
    228     goto retry;
    229   }
    230 
    231 #ifdef HAVE_ATEXIT
    232   if (!first_lang)
    233     atexit (free_langs); /* First person registers atexit() callback. */
    234 #endif
    235 
    236   return lang;
    237 }
    238 
    239 
    240 hb_language_t
    241 hb_language_from_string (const char *str, int len)
    242 {
    243   if (!str || !len || !*str)
    244     return HB_LANGUAGE_INVALID;
    245 
    246   char strbuf[32];
    247   if (len >= 0) {
    248     len = MIN (len, (int) sizeof (strbuf) - 1);
    249     str = (char *) memcpy (strbuf, str, len);
    250     strbuf[len] = '\0';
    251   }
    252 
    253   hb_language_item_t *item = lang_find_or_insert (str);
    254 
    255   return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
    256 }
    257 
    258 const char *
    259 hb_language_to_string (hb_language_t language)
    260 {
    261   /* This is actually NULL-safe! */
    262   return language->s;
    263 }
    264 
    265 hb_language_t
    266 hb_language_get_default (void)
    267 {
    268   static hb_language_t default_language = HB_LANGUAGE_INVALID;
    269 
    270   hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
    271   if (unlikely (language == HB_LANGUAGE_INVALID)) {
    272     language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
    273     hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
    274   }
    275 
    276   return default_language;
    277 }
    278 
    279 
    280 /* hb_script_t */
    281 
    282 hb_script_t
    283 hb_script_from_iso15924_tag (hb_tag_t tag)
    284 {
    285   if (unlikely (tag == HB_TAG_NONE))
    286     return HB_SCRIPT_INVALID;
    287 
    288   /* Be lenient, adjust case (one capital letter followed by three small letters) */
    289   tag = (tag & 0xDFDFDFDF) | 0x00202020;
    290 
    291   switch (tag) {
    292 
    293     /* These graduated from the 'Q' private-area codes, but
    294      * the old code is still aliased by Unicode, and the Qaai
    295      * one in use by ICU. */
    296     case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
    297     case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
    298 
    299     /* Script variants from http://unicode.org/iso15924/ */
    300     case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
    301     case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
    302     case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
    303     case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
    304     case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
    305     case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
    306   }
    307 
    308   /* If it looks right, just use the tag as a script */
    309   if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
    310     return (hb_script_t) tag;
    311 
    312   /* Otherwise, return unknown */
    313   return HB_SCRIPT_UNKNOWN;
    314 }
    315 
    316 hb_script_t
    317 hb_script_from_string (const char *s, int len)
    318 {
    319   return hb_script_from_iso15924_tag (hb_tag_from_string (s, len));
    320 }
    321 
    322 hb_tag_t
    323 hb_script_to_iso15924_tag (hb_script_t script)
    324 {
    325   return (hb_tag_t) script;
    326 }
    327 
    328 hb_direction_t
    329 hb_script_get_horizontal_direction (hb_script_t script)
    330 {
    331   /* http://goo.gl/x9ilM */
    332   switch ((hb_tag_t) script)
    333   {
    334     /* Unicode-1.1 additions */
    335     case HB_SCRIPT_ARABIC:
    336     case HB_SCRIPT_HEBREW:
    337 
    338     /* Unicode-3.0 additions */
    339     case HB_SCRIPT_SYRIAC:
    340     case HB_SCRIPT_THAANA:
    341 
    342     /* Unicode-4.0 additions */
    343     case HB_SCRIPT_CYPRIOT:
    344 
    345     /* Unicode-4.1 additions */
    346     case HB_SCRIPT_KHAROSHTHI:
    347 
    348     /* Unicode-5.0 additions */
    349     case HB_SCRIPT_PHOENICIAN:
    350     case HB_SCRIPT_NKO:
    351 
    352     /* Unicode-5.1 additions */
    353     case HB_SCRIPT_LYDIAN:
    354 
    355     /* Unicode-5.2 additions */
    356     case HB_SCRIPT_AVESTAN:
    357     case HB_SCRIPT_IMPERIAL_ARAMAIC:
    358     case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
    359     case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
    360     case HB_SCRIPT_OLD_SOUTH_ARABIAN:
    361     case HB_SCRIPT_OLD_TURKIC:
    362     case HB_SCRIPT_SAMARITAN:
    363 
    364     /* Unicode-6.0 additions */
    365     case HB_SCRIPT_MANDAIC:
    366 
    367     /* Unicode-6.1 additions */
    368     case HB_SCRIPT_MEROITIC_CURSIVE:
    369     case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
    370 
    371       return HB_DIRECTION_RTL;
    372   }
    373 
    374   return HB_DIRECTION_LTR;
    375 }
    376 
    377 
    378 /* hb_user_data_array_t */
    379 
    380 bool
    381 hb_user_data_array_t::set (hb_user_data_key_t *key,
    382 			   void *              data,
    383 			   hb_destroy_func_t   destroy,
    384 			   hb_bool_t           replace)
    385 {
    386   if (!key)
    387     return false;
    388 
    389   if (replace) {
    390     if (!data && !destroy) {
    391       items.remove (key, lock);
    392       return true;
    393     }
    394   }
    395   hb_user_data_item_t item = {key, data, destroy};
    396   bool ret = !!items.replace_or_insert (item, lock, replace);
    397 
    398   return ret;
    399 }
    400 
    401 void *
    402 hb_user_data_array_t::get (hb_user_data_key_t *key)
    403 {
    404   hb_user_data_item_t item = {NULL };
    405 
    406   return items.find (key, &item, lock) ? item.data : NULL;
    407 }
    408 
    409 
    410 /* hb_version */
    411 
    412 void
    413 hb_version (unsigned int *major,
    414 	    unsigned int *minor,
    415 	    unsigned int *micro)
    416 {
    417   *major = HB_VERSION_MAJOR;
    418   *minor = HB_VERSION_MINOR;
    419   *micro = HB_VERSION_MICRO;
    420 }
    421 
    422 const char *
    423 hb_version_string (void)
    424 {
    425   return HB_VERSION_STRING;
    426 }
    427 
    428 hb_bool_t
    429 hb_version_check (unsigned int major,
    430 		  unsigned int minor,
    431 		  unsigned int micro)
    432 {
    433   return HB_VERSION_CHECK (major, minor, micro);
    434 }
    435