Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2009  Red Hat, Inc.
      3  * Copyright  2011  Google, Inc.
      4  *
      5  *  This is part of HarfBuzz, a text shaping library.
      6  *
      7  * Permission is hereby granted, without written agreement and without
      8  * license or royalty fees, to use, copy, modify, and distribute this
      9  * software and its documentation for any purpose, provided that the
     10  * above copyright notice and the following two paragraphs appear in
     11  * all copies of this software.
     12  *
     13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17  * DAMAGE.
     18  *
     19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24  *
     25  * Red Hat Author(s): Behdad Esfahbod
     26  * Google Author(s): Behdad Esfahbod
     27  */
     28 
     29 #include "hb.hh"
     30 
     31 #include "hb-glib.h"
     32 
     33 #include "hb-machinery.hh"
     34 
     35 
     36 /**
     37  * SECTION:hb-glib
     38  * @title: hb-glib
     39  * @short_description: GLib integration
     40  * @include: hb-glib.h
     41  *
     42  * Functions for using HarfBuzz with the GLib library to provide Unicode data.
     43  **/
     44 
     45 
     46 #if !GLIB_CHECK_VERSION(2,29,14)
     47 static const hb_script_t
     48 glib_script_to_script[] =
     49 {
     50   HB_SCRIPT_COMMON,
     51   HB_SCRIPT_INHERITED,
     52   HB_SCRIPT_ARABIC,
     53   HB_SCRIPT_ARMENIAN,
     54   HB_SCRIPT_BENGALI,
     55   HB_SCRIPT_BOPOMOFO,
     56   HB_SCRIPT_CHEROKEE,
     57   HB_SCRIPT_COPTIC,
     58   HB_SCRIPT_CYRILLIC,
     59   HB_SCRIPT_DESERET,
     60   HB_SCRIPT_DEVANAGARI,
     61   HB_SCRIPT_ETHIOPIC,
     62   HB_SCRIPT_GEORGIAN,
     63   HB_SCRIPT_GOTHIC,
     64   HB_SCRIPT_GREEK,
     65   HB_SCRIPT_GUJARATI,
     66   HB_SCRIPT_GURMUKHI,
     67   HB_SCRIPT_HAN,
     68   HB_SCRIPT_HANGUL,
     69   HB_SCRIPT_HEBREW,
     70   HB_SCRIPT_HIRAGANA,
     71   HB_SCRIPT_KANNADA,
     72   HB_SCRIPT_KATAKANA,
     73   HB_SCRIPT_KHMER,
     74   HB_SCRIPT_LAO,
     75   HB_SCRIPT_LATIN,
     76   HB_SCRIPT_MALAYALAM,
     77   HB_SCRIPT_MONGOLIAN,
     78   HB_SCRIPT_MYANMAR,
     79   HB_SCRIPT_OGHAM,
     80   HB_SCRIPT_OLD_ITALIC,
     81   HB_SCRIPT_ORIYA,
     82   HB_SCRIPT_RUNIC,
     83   HB_SCRIPT_SINHALA,
     84   HB_SCRIPT_SYRIAC,
     85   HB_SCRIPT_TAMIL,
     86   HB_SCRIPT_TELUGU,
     87   HB_SCRIPT_THAANA,
     88   HB_SCRIPT_THAI,
     89   HB_SCRIPT_TIBETAN,
     90   HB_SCRIPT_CANADIAN_SYLLABICS,
     91   HB_SCRIPT_YI,
     92   HB_SCRIPT_TAGALOG,
     93   HB_SCRIPT_HANUNOO,
     94   HB_SCRIPT_BUHID,
     95   HB_SCRIPT_TAGBANWA,
     96 
     97   /* Unicode-4.0 additions */
     98   HB_SCRIPT_BRAILLE,
     99   HB_SCRIPT_CYPRIOT,
    100   HB_SCRIPT_LIMBU,
    101   HB_SCRIPT_OSMANYA,
    102   HB_SCRIPT_SHAVIAN,
    103   HB_SCRIPT_LINEAR_B,
    104   HB_SCRIPT_TAI_LE,
    105   HB_SCRIPT_UGARITIC,
    106 
    107   /* Unicode-4.1 additions */
    108   HB_SCRIPT_NEW_TAI_LUE,
    109   HB_SCRIPT_BUGINESE,
    110   HB_SCRIPT_GLAGOLITIC,
    111   HB_SCRIPT_TIFINAGH,
    112   HB_SCRIPT_SYLOTI_NAGRI,
    113   HB_SCRIPT_OLD_PERSIAN,
    114   HB_SCRIPT_KHAROSHTHI,
    115 
    116   /* Unicode-5.0 additions */
    117   HB_SCRIPT_UNKNOWN,
    118   HB_SCRIPT_BALINESE,
    119   HB_SCRIPT_CUNEIFORM,
    120   HB_SCRIPT_PHOENICIAN,
    121   HB_SCRIPT_PHAGS_PA,
    122   HB_SCRIPT_NKO,
    123 
    124   /* Unicode-5.1 additions */
    125   HB_SCRIPT_KAYAH_LI,
    126   HB_SCRIPT_LEPCHA,
    127   HB_SCRIPT_REJANG,
    128   HB_SCRIPT_SUNDANESE,
    129   HB_SCRIPT_SAURASHTRA,
    130   HB_SCRIPT_CHAM,
    131   HB_SCRIPT_OL_CHIKI,
    132   HB_SCRIPT_VAI,
    133   HB_SCRIPT_CARIAN,
    134   HB_SCRIPT_LYCIAN,
    135   HB_SCRIPT_LYDIAN,
    136 
    137   /* Unicode-5.2 additions */
    138   HB_SCRIPT_AVESTAN,
    139   HB_SCRIPT_BAMUM,
    140   HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
    141   HB_SCRIPT_IMPERIAL_ARAMAIC,
    142   HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
    143   HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
    144   HB_SCRIPT_JAVANESE,
    145   HB_SCRIPT_KAITHI,
    146   HB_SCRIPT_TAI_THAM,
    147   HB_SCRIPT_LISU,
    148   HB_SCRIPT_MEETEI_MAYEK,
    149   HB_SCRIPT_OLD_SOUTH_ARABIAN,
    150   HB_SCRIPT_OLD_TURKIC,
    151   HB_SCRIPT_SAMARITAN,
    152   HB_SCRIPT_TAI_VIET,
    153 
    154   /* Unicode-6.0 additions */
    155   HB_SCRIPT_BATAK,
    156   HB_SCRIPT_BRAHMI,
    157   HB_SCRIPT_MANDAIC,
    158 
    159   /* Unicode-6.1 additions */
    160   HB_SCRIPT_CHAKMA,
    161   HB_SCRIPT_MEROITIC_CURSIVE,
    162   HB_SCRIPT_MEROITIC_HIEROGLYPHS,
    163   HB_SCRIPT_MIAO,
    164   HB_SCRIPT_SHARADA,
    165   HB_SCRIPT_SORA_SOMPENG,
    166   HB_SCRIPT_TAKRI
    167 };
    168 #endif
    169 
    170 hb_script_t
    171 hb_glib_script_to_script (GUnicodeScript script)
    172 {
    173 #if GLIB_CHECK_VERSION(2,29,14)
    174   return (hb_script_t) g_unicode_script_to_iso15924 (script);
    175 #else
    176   if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script)))
    177     return glib_script_to_script[script];
    178 
    179   if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE))
    180     return HB_SCRIPT_INVALID;
    181 
    182   return HB_SCRIPT_UNKNOWN;
    183 #endif
    184 }
    185 
    186 GUnicodeScript
    187 hb_glib_script_from_script (hb_script_t script)
    188 {
    189 #if GLIB_CHECK_VERSION(2,29,14)
    190   return g_unicode_script_from_iso15924 (script);
    191 #else
    192   unsigned int count = ARRAY_LENGTH (glib_script_to_script);
    193   for (unsigned int i = 0; i < count; i++)
    194     if (glib_script_to_script[i] == script)
    195       return (GUnicodeScript) i;
    196 
    197   if (unlikely (script == HB_SCRIPT_INVALID))
    198     return G_UNICODE_SCRIPT_INVALID_CODE;
    199 
    200   return G_UNICODE_SCRIPT_UNKNOWN;
    201 #endif
    202 }
    203 
    204 
    205 static hb_unicode_combining_class_t
    206 hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    207 				 hb_codepoint_t      unicode,
    208 				 void               *user_data HB_UNUSED)
    209 
    210 {
    211   return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode);
    212 }
    213 
    214 static hb_unicode_general_category_t
    215 hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    216 				  hb_codepoint_t      unicode,
    217 				  void               *user_data HB_UNUSED)
    218 
    219 {
    220   /* hb_unicode_general_category_t and GUnicodeType are identical */
    221   return (hb_unicode_general_category_t) g_unichar_type (unicode);
    222 }
    223 
    224 static hb_codepoint_t
    225 hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    226 			   hb_codepoint_t      unicode,
    227 			   void               *user_data HB_UNUSED)
    228 {
    229   g_unichar_get_mirror_char (unicode, &unicode);
    230   return unicode;
    231 }
    232 
    233 static hb_script_t
    234 hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    235 			hb_codepoint_t      unicode,
    236 			void               *user_data HB_UNUSED)
    237 {
    238   return hb_glib_script_to_script (g_unichar_get_script (unicode));
    239 }
    240 
    241 static hb_bool_t
    242 hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    243 			 hb_codepoint_t      a,
    244 			 hb_codepoint_t      b,
    245 			 hb_codepoint_t     *ab,
    246 			 void               *user_data HB_UNUSED)
    247 {
    248 #if GLIB_CHECK_VERSION(2,29,12)
    249   return g_unichar_compose (a, b, ab);
    250 #endif
    251 
    252   /* We don't ifdef-out the fallback code such that compiler always
    253    * sees it and makes sure it's compilable. */
    254 
    255   gchar utf8[12];
    256   gchar *normalized;
    257   int len;
    258   hb_bool_t ret;
    259 
    260   len = g_unichar_to_utf8 (a, utf8);
    261   len += g_unichar_to_utf8 (b, utf8 + len);
    262   normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC);
    263   len = g_utf8_strlen (normalized, -1);
    264   if (unlikely (!len))
    265     return false;
    266 
    267   if (len == 1) {
    268     *ab = g_utf8_get_char (normalized);
    269     ret = true;
    270   } else {
    271     ret = false;
    272   }
    273 
    274   g_free (normalized);
    275   return ret;
    276 }
    277 
    278 static hb_bool_t
    279 hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    280 			   hb_codepoint_t      ab,
    281 			   hb_codepoint_t     *a,
    282 			   hb_codepoint_t     *b,
    283 			   void               *user_data HB_UNUSED)
    284 {
    285 #if GLIB_CHECK_VERSION(2,29,12)
    286   return g_unichar_decompose (ab, a, b);
    287 #endif
    288 
    289   /* We don't ifdef-out the fallback code such that compiler always
    290    * sees it and makes sure it's compilable. */
    291 
    292   gchar utf8[6];
    293   gchar *normalized;
    294   int len;
    295   hb_bool_t ret;
    296 
    297   len = g_unichar_to_utf8 (ab, utf8);
    298   normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD);
    299   len = g_utf8_strlen (normalized, -1);
    300   if (unlikely (!len))
    301     return false;
    302 
    303   if (len == 1) {
    304     *a = g_utf8_get_char (normalized);
    305     *b = 0;
    306     ret = *a != ab;
    307   } else if (len == 2) {
    308     *a = g_utf8_get_char (normalized);
    309     *b = g_utf8_get_char (g_utf8_next_char (normalized));
    310     /* Here's the ugly part: if ab decomposes to a single character and
    311      * that character decomposes again, we have to detect that and undo
    312      * the second part :-(. */
    313     gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
    314     hb_codepoint_t c = g_utf8_get_char (recomposed);
    315     if (c != ab && c != *a) {
    316       *a = c;
    317       *b = 0;
    318     }
    319     g_free (recomposed);
    320     ret = true;
    321   } else {
    322     /* If decomposed to more than two characters, take the last one,
    323      * and recompose the rest to get the first component. */
    324     gchar *end = g_utf8_offset_to_pointer (normalized, len - 1);
    325     gchar *recomposed;
    326     *b = g_utf8_get_char (end);
    327     recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC);
    328     /* We expect that recomposed has exactly one character now. */
    329     *a = g_utf8_get_char (recomposed);
    330     g_free (recomposed);
    331     ret = true;
    332   }
    333 
    334   g_free (normalized);
    335   return ret;
    336 }
    337 
    338 
    339 #if HB_USE_ATEXIT
    340 static void free_static_glib_funcs ();
    341 #endif
    342 
    343 static struct hb_glib_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_glib_unicode_funcs_lazy_loader_t>
    344 {
    345   static hb_unicode_funcs_t *create ()
    346   {
    347     hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
    348 
    349     hb_unicode_funcs_set_combining_class_func (funcs, hb_glib_unicode_combining_class, nullptr, nullptr);
    350     hb_unicode_funcs_set_general_category_func (funcs, hb_glib_unicode_general_category, nullptr, nullptr);
    351     hb_unicode_funcs_set_mirroring_func (funcs, hb_glib_unicode_mirroring, nullptr, nullptr);
    352     hb_unicode_funcs_set_script_func (funcs, hb_glib_unicode_script, nullptr, nullptr);
    353     hb_unicode_funcs_set_compose_func (funcs, hb_glib_unicode_compose, nullptr, nullptr);
    354     hb_unicode_funcs_set_decompose_func (funcs, hb_glib_unicode_decompose, nullptr, nullptr);
    355 
    356     hb_unicode_funcs_make_immutable (funcs);
    357 
    358 #if HB_USE_ATEXIT
    359     atexit (free_static_glib_funcs);
    360 #endif
    361 
    362     return funcs;
    363   }
    364 } static_glib_funcs;
    365 
    366 #if HB_USE_ATEXIT
    367 static
    368 void free_static_glib_funcs ()
    369 {
    370   static_glib_funcs.free_instance ();
    371 }
    372 #endif
    373 
    374 hb_unicode_funcs_t *
    375 hb_glib_get_unicode_funcs ()
    376 {
    377   return static_glib_funcs.get_unconst ();
    378 }
    379 
    380 
    381 
    382 #if GLIB_CHECK_VERSION(2,31,10)
    383 
    384 static void
    385 _hb_g_bytes_unref (void *data)
    386 {
    387   g_bytes_unref ((GBytes *) data);
    388 }
    389 
    390 /**
    391  * hb_glib_blob_create:
    392  *
    393  * Since: 0.9.38
    394  **/
    395 hb_blob_t *
    396 hb_glib_blob_create (GBytes *gbytes)
    397 {
    398   gsize size = 0;
    399   gconstpointer data = g_bytes_get_data (gbytes, &size);
    400   return hb_blob_create ((const char *) data,
    401 			 size,
    402 			 HB_MEMORY_MODE_READONLY,
    403 			 g_bytes_ref (gbytes),
    404 			 _hb_g_bytes_unref);
    405 }
    406 #endif
    407