Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2009  Red Hat, Inc.
      3  * Copyright  2011  Codethink Limited
      4  * Copyright  2010,2011,2012  Google, Inc.
      5  *
      6  *  This is part of HarfBuzz, a text shaping library.
      7  *
      8  * Permission is hereby granted, without written agreement and without
      9  * license or royalty fees, to use, copy, modify, and distribute this
     10  * software and its documentation for any purpose, provided that the
     11  * above copyright notice and the following two paragraphs appear in
     12  * all copies of this software.
     13  *
     14  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     15  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     16  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     17  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     18  * DAMAGE.
     19  *
     20  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     21  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     22  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     23  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     25  *
     26  * Red Hat Author(s): Behdad Esfahbod
     27  * Codethink Author(s): Ryan Lortie
     28  * Google Author(s): Behdad Esfahbod
     29  */
     30 
     31 #include "hb.hh"
     32 
     33 #include "hb-unicode.hh"
     34 
     35 
     36 /**
     37  * SECTION: hb-unicode
     38  * @title: hb-unicode
     39  * @short_description: Unicode character property access
     40  * @include: hb.h
     41  *
     42  * Unicode functions are used to access Unicode character properties.
     43  * Client can pass its own Unicode functions to HarfBuzz, or access
     44  * the built-in Unicode functions that come with HarfBuzz.
     45  *
     46  * With the Unicode functions, one can query variour Unicode character
     47  * properties, such as General Category, Script, Combining Class, etc.
     48  **/
     49 
     50 
     51 /*
     52  * hb_unicode_funcs_t
     53  */
     54 
     55 static hb_unicode_combining_class_t
     56 hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     57 				hb_codepoint_t      unicode   HB_UNUSED,
     58 				void               *user_data HB_UNUSED)
     59 {
     60   return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
     61 }
     62 
     63 static unsigned int
     64 hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     65 				hb_codepoint_t      unicode   HB_UNUSED,
     66 				void               *user_data HB_UNUSED)
     67 {
     68   return 1;
     69 }
     70 
     71 static hb_unicode_general_category_t
     72 hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     73 				 hb_codepoint_t      unicode   HB_UNUSED,
     74 				 void               *user_data HB_UNUSED)
     75 {
     76   return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
     77 }
     78 
     79 static hb_codepoint_t
     80 hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     81 			  hb_codepoint_t      unicode,
     82 			  void               *user_data HB_UNUSED)
     83 {
     84   return unicode;
     85 }
     86 
     87 static hb_script_t
     88 hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     89 		       hb_codepoint_t      unicode   HB_UNUSED,
     90 		       void               *user_data HB_UNUSED)
     91 {
     92   return HB_SCRIPT_UNKNOWN;
     93 }
     94 
     95 static hb_bool_t
     96 hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
     97 			hb_codepoint_t      a         HB_UNUSED,
     98 			hb_codepoint_t      b         HB_UNUSED,
     99 			hb_codepoint_t     *ab        HB_UNUSED,
    100 			void               *user_data HB_UNUSED)
    101 {
    102   return false;
    103 }
    104 
    105 static hb_bool_t
    106 hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
    107 			  hb_codepoint_t      ab        HB_UNUSED,
    108 			  hb_codepoint_t     *a         HB_UNUSED,
    109 			  hb_codepoint_t     *b         HB_UNUSED,
    110 			  void               *user_data HB_UNUSED)
    111 {
    112   return false;
    113 }
    114 
    115 
    116 static unsigned int
    117 hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs     HB_UNUSED,
    118 					hb_codepoint_t      u          HB_UNUSED,
    119 					hb_codepoint_t     *decomposed HB_UNUSED,
    120 					void               *user_data  HB_UNUSED)
    121 {
    122   return 0;
    123 }
    124 
    125 
    126 extern "C" hb_unicode_funcs_t *hb_glib_get_unicode_funcs ();
    127 extern "C" hb_unicode_funcs_t *hb_icu_get_unicode_funcs ();
    128 extern "C" hb_unicode_funcs_t *hb_ucdn_get_unicode_funcs ();
    129 
    130 hb_unicode_funcs_t *
    131 hb_unicode_funcs_get_default ()
    132 {
    133 #if defined(HAVE_UCDN)
    134   return hb_ucdn_get_unicode_funcs ();
    135 #elif defined(HAVE_GLIB)
    136   return hb_glib_get_unicode_funcs ();
    137 #elif defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
    138   return hb_icu_get_unicode_funcs ();
    139 #else
    140 #define HB_UNICODE_FUNCS_NIL 1
    141   return hb_unicode_funcs_get_empty ();
    142 #endif
    143 }
    144 
    145 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
    146 #error "Could not find any Unicode functions implementation, you have to provide your own"
    147 #error "Consider building hb-ucdn.c.  If you absolutely want to build without any, check the code."
    148 #endif
    149 
    150 /**
    151  * hb_unicode_funcs_create: (Xconstructor)
    152  * @parent: (nullable):
    153  *
    154  *
    155  *
    156  * Return value: (transfer full):
    157  *
    158  * Since: 0.9.2
    159  **/
    160 hb_unicode_funcs_t *
    161 hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
    162 {
    163   hb_unicode_funcs_t *ufuncs;
    164 
    165   if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
    166     return hb_unicode_funcs_get_empty ();
    167 
    168   if (!parent)
    169     parent = hb_unicode_funcs_get_empty ();
    170 
    171   hb_unicode_funcs_make_immutable (parent);
    172   ufuncs->parent = hb_unicode_funcs_reference (parent);
    173 
    174   ufuncs->func = parent->func;
    175 
    176   /* We can safely copy user_data from parent since we hold a reference
    177    * onto it and it's immutable.  We should not copy the destroy notifiers
    178    * though. */
    179   ufuncs->user_data = parent->user_data;
    180 
    181   return ufuncs;
    182 }
    183 
    184 
    185 DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
    186 {
    187   HB_OBJECT_HEADER_STATIC,
    188 
    189   nullptr, /* parent */
    190   {
    191 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
    192     HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
    193 #undef HB_UNICODE_FUNC_IMPLEMENT
    194   }
    195 };
    196 
    197 /**
    198  * hb_unicode_funcs_get_empty:
    199  *
    200  *
    201  *
    202  * Return value: (transfer full):
    203  *
    204  * Since: 0.9.2
    205  **/
    206 hb_unicode_funcs_t *
    207 hb_unicode_funcs_get_empty ()
    208 {
    209   return const_cast<hb_unicode_funcs_t *> (&Null(hb_unicode_funcs_t));
    210 }
    211 
    212 /**
    213  * hb_unicode_funcs_reference: (skip)
    214  * @ufuncs: Unicode functions.
    215  *
    216  *
    217  *
    218  * Return value: (transfer full):
    219  *
    220  * Since: 0.9.2
    221  **/
    222 hb_unicode_funcs_t *
    223 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
    224 {
    225   return hb_object_reference (ufuncs);
    226 }
    227 
    228 /**
    229  * hb_unicode_funcs_destroy: (skip)
    230  * @ufuncs: Unicode functions.
    231  *
    232  *
    233  *
    234  * Since: 0.9.2
    235  **/
    236 void
    237 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
    238 {
    239   if (!hb_object_destroy (ufuncs)) return;
    240 
    241 #define HB_UNICODE_FUNC_IMPLEMENT(name) \
    242   if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
    243     HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
    244 #undef HB_UNICODE_FUNC_IMPLEMENT
    245 
    246   hb_unicode_funcs_destroy (ufuncs->parent);
    247 
    248   free (ufuncs);
    249 }
    250 
    251 /**
    252  * hb_unicode_funcs_set_user_data: (skip)
    253  * @ufuncs: Unicode functions.
    254  * @key:
    255  * @data:
    256  * @destroy:
    257  * @replace:
    258  *
    259  *
    260  *
    261  * Return value:
    262  *
    263  * Since: 0.9.2
    264  **/
    265 hb_bool_t
    266 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
    267 			        hb_user_data_key_t *key,
    268 			        void *              data,
    269 			        hb_destroy_func_t   destroy,
    270 				hb_bool_t           replace)
    271 {
    272   return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
    273 }
    274 
    275 /**
    276  * hb_unicode_funcs_get_user_data: (skip)
    277  * @ufuncs: Unicode functions.
    278  * @key:
    279  *
    280  *
    281  *
    282  * Return value: (transfer none):
    283  *
    284  * Since: 0.9.2
    285  **/
    286 void *
    287 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
    288 			        hb_user_data_key_t *key)
    289 {
    290   return hb_object_get_user_data (ufuncs, key);
    291 }
    292 
    293 
    294 /**
    295  * hb_unicode_funcs_make_immutable:
    296  * @ufuncs: Unicode functions.
    297  *
    298  *
    299  *
    300  * Since: 0.9.2
    301  **/
    302 void
    303 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
    304 {
    305   if (hb_object_is_immutable (ufuncs))
    306     return;
    307 
    308   hb_object_make_immutable (ufuncs);
    309 }
    310 
    311 /**
    312  * hb_unicode_funcs_is_immutable:
    313  * @ufuncs: Unicode functions.
    314  *
    315  *
    316  *
    317  * Return value:
    318  *
    319  * Since: 0.9.2
    320  **/
    321 hb_bool_t
    322 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
    323 {
    324   return hb_object_is_immutable (ufuncs);
    325 }
    326 
    327 /**
    328  * hb_unicode_funcs_get_parent:
    329  * @ufuncs: Unicode functions.
    330  *
    331  *
    332  *
    333  * Return value:
    334  *
    335  * Since: 0.9.2
    336  **/
    337 hb_unicode_funcs_t *
    338 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
    339 {
    340   return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
    341 }
    342 
    343 
    344 #define HB_UNICODE_FUNC_IMPLEMENT(name)						\
    345 										\
    346 void										\
    347 hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t		   *ufuncs,	\
    348 				    hb_unicode_##name##_func_t	    func,	\
    349 				    void			   *user_data,	\
    350 				    hb_destroy_func_t		    destroy)	\
    351 {										\
    352   if (hb_object_is_immutable (ufuncs))						\
    353     return;									\
    354 										\
    355   if (ufuncs->destroy.name)							\
    356     ufuncs->destroy.name (ufuncs->user_data.name);				\
    357 										\
    358   if (func) {									\
    359     ufuncs->func.name = func;							\
    360     ufuncs->user_data.name = user_data;						\
    361     ufuncs->destroy.name = destroy;						\
    362   } else {									\
    363     ufuncs->func.name = ufuncs->parent->func.name;				\
    364     ufuncs->user_data.name = ufuncs->parent->user_data.name;			\
    365     ufuncs->destroy.name = nullptr;						\
    366   }										\
    367 }
    368 
    369 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
    370 #undef HB_UNICODE_FUNC_IMPLEMENT
    371 
    372 
    373 #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name)				\
    374 										\
    375 return_type									\
    376 hb_unicode_##name (hb_unicode_funcs_t *ufuncs,					\
    377 		   hb_codepoint_t      unicode)					\
    378 {										\
    379   return ufuncs->name (unicode);						\
    380 }
    381 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
    382 #undef HB_UNICODE_FUNC_IMPLEMENT
    383 
    384 /**
    385  * hb_unicode_compose:
    386  * @ufuncs: Unicode functions.
    387  * @a:
    388  * @b:
    389  * @ab: (out):
    390  *
    391  *
    392  *
    393  * Return value:
    394  *
    395  * Since: 0.9.2
    396  **/
    397 hb_bool_t
    398 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
    399 		    hb_codepoint_t      a,
    400 		    hb_codepoint_t      b,
    401 		    hb_codepoint_t     *ab)
    402 {
    403   return ufuncs->compose (a, b, ab);
    404 }
    405 
    406 /**
    407  * hb_unicode_decompose:
    408  * @ufuncs: Unicode functions.
    409  * @ab:
    410  * @a: (out):
    411  * @b: (out):
    412  *
    413  *
    414  *
    415  * Return value:
    416  *
    417  * Since: 0.9.2
    418  **/
    419 hb_bool_t
    420 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
    421 		      hb_codepoint_t      ab,
    422 		      hb_codepoint_t     *a,
    423 		      hb_codepoint_t     *b)
    424 {
    425   return ufuncs->decompose (ab, a, b);
    426 }
    427 
    428 /**
    429  * hb_unicode_decompose_compatibility:
    430  * @ufuncs: Unicode functions.
    431  * @u:
    432  * @decomposed: (out):
    433  *
    434  *
    435  *
    436  * Return value:
    437  *
    438  * Since: 0.9.2
    439  * Deprecated: 2.0.0
    440  **/
    441 unsigned int
    442 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
    443 				    hb_codepoint_t      u,
    444 				    hb_codepoint_t     *decomposed)
    445 {
    446   return ufuncs->decompose_compatibility (u, decomposed);
    447 }
    448 
    449 
    450 /* See hb-unicode.hh for details. */
    451 const uint8_t
    452 _hb_modified_combining_class[256] =
    453 {
    454   0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
    455   1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
    456   2, 3, 4, 5, 6,
    457   7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
    458   8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
    459   9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
    460 
    461   /* Hebrew */
    462   HB_MODIFIED_COMBINING_CLASS_CCC10,
    463   HB_MODIFIED_COMBINING_CLASS_CCC11,
    464   HB_MODIFIED_COMBINING_CLASS_CCC12,
    465   HB_MODIFIED_COMBINING_CLASS_CCC13,
    466   HB_MODIFIED_COMBINING_CLASS_CCC14,
    467   HB_MODIFIED_COMBINING_CLASS_CCC15,
    468   HB_MODIFIED_COMBINING_CLASS_CCC16,
    469   HB_MODIFIED_COMBINING_CLASS_CCC17,
    470   HB_MODIFIED_COMBINING_CLASS_CCC18,
    471   HB_MODIFIED_COMBINING_CLASS_CCC19,
    472   HB_MODIFIED_COMBINING_CLASS_CCC20,
    473   HB_MODIFIED_COMBINING_CLASS_CCC21,
    474   HB_MODIFIED_COMBINING_CLASS_CCC22,
    475   HB_MODIFIED_COMBINING_CLASS_CCC23,
    476   HB_MODIFIED_COMBINING_CLASS_CCC24,
    477   HB_MODIFIED_COMBINING_CLASS_CCC25,
    478   HB_MODIFIED_COMBINING_CLASS_CCC26,
    479 
    480   /* Arabic */
    481   HB_MODIFIED_COMBINING_CLASS_CCC27,
    482   HB_MODIFIED_COMBINING_CLASS_CCC28,
    483   HB_MODIFIED_COMBINING_CLASS_CCC29,
    484   HB_MODIFIED_COMBINING_CLASS_CCC30,
    485   HB_MODIFIED_COMBINING_CLASS_CCC31,
    486   HB_MODIFIED_COMBINING_CLASS_CCC32,
    487   HB_MODIFIED_COMBINING_CLASS_CCC33,
    488   HB_MODIFIED_COMBINING_CLASS_CCC34,
    489   HB_MODIFIED_COMBINING_CLASS_CCC35,
    490 
    491   /* Syriac */
    492   HB_MODIFIED_COMBINING_CLASS_CCC36,
    493 
    494   37, 38, 39,
    495   40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    496   60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    497   80, 81, 82, 83,
    498 
    499   /* Telugu */
    500   HB_MODIFIED_COMBINING_CLASS_CCC84,
    501   85, 86, 87, 88, 89, 90,
    502   HB_MODIFIED_COMBINING_CLASS_CCC91,
    503   92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
    504 
    505   /* Thai */
    506   HB_MODIFIED_COMBINING_CLASS_CCC103,
    507   104, 105, 106,
    508   HB_MODIFIED_COMBINING_CLASS_CCC107,
    509   108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
    510 
    511   /* Lao */
    512   HB_MODIFIED_COMBINING_CLASS_CCC118,
    513   119, 120, 121,
    514   HB_MODIFIED_COMBINING_CLASS_CCC122,
    515   123, 124, 125, 126, 127, 128,
    516 
    517   /* Tibetan */
    518   HB_MODIFIED_COMBINING_CLASS_CCC129,
    519   HB_MODIFIED_COMBINING_CLASS_CCC130,
    520   131,
    521   HB_MODIFIED_COMBINING_CLASS_CCC132,
    522   133, 134, 135, 136, 137, 138, 139,
    523 
    524 
    525   140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
    526   150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
    527   160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
    528   170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
    529   180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
    530   190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
    531 
    532   200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
    533   201,
    534   202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
    535   203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
    536   214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
    537   215,
    538   216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
    539   217,
    540   218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
    541   219,
    542   220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
    543   221,
    544   222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
    545   223,
    546   224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
    547   225,
    548   226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
    549   227,
    550   228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
    551   229,
    552   230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
    553   231,
    554   232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
    555   233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
    556   234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
    557   235, 236, 237, 238, 239,
    558   240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
    559   241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
    560   255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
    561 };
    562 
    563 
    564 /*
    565  * Emoji
    566  */
    567 
    568 #include "hb-unicode-emoji-table.hh"
    569 
    570 bool
    571 _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
    572 {
    573   return hb_bsearch (&cp, _hb_unicode_emoji_Extended_Pictographic_table,
    574 		     ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table),
    575 		     sizeof (hb_unicode_range_t),
    576 		     hb_unicode_range_t::cmp);
    577 }
    578