Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2009  Red Hat, Inc.
      3  * Copyright  2011  Codethink Limited
      4  * Copyright  2011,2012  Google, Inc.
      5  *
      6  *  This is part of HarfBuzz, a text shaping library.
      7  *
      8  * Permission is hereby granted, without written agreement and without
      9  * license or royalty fees, to use, copy, modify, and distribute this
     10  * software and its documentation for any purpose, provided that the
     11  * above copyright notice and the following two paragraphs appear in
     12  * all copies of this software.
     13  *
     14  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     15  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     16  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     17  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     18  * DAMAGE.
     19  *
     20  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     21  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     22  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     23  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     25  *
     26  * Red Hat Author(s): Behdad Esfahbod
     27  * Codethink Author(s): Ryan Lortie
     28  * Google Author(s): Behdad Esfahbod
     29  */
     30 
     31 #ifndef HB_H_IN
     32 #error "Include <hb.h> instead."
     33 #endif
     34 
     35 #ifndef HB_UNICODE_H
     36 #define HB_UNICODE_H
     37 
     38 #include "hb-common.h"
     39 
     40 HB_BEGIN_DECLS
     41 
     42 
     43 /* hb_unicode_general_category_t */
     44 
     45 /* Unicode Character Database property: General_Category (gc) */
     46 typedef enum
     47 {
     48   HB_UNICODE_GENERAL_CATEGORY_CONTROL,			/* Cc */
     49   HB_UNICODE_GENERAL_CATEGORY_FORMAT,			/* Cf */
     50   HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,		/* Cn */
     51   HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,		/* Co */
     52   HB_UNICODE_GENERAL_CATEGORY_SURROGATE,		/* Cs */
     53   HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,		/* Ll */
     54   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,		/* Lm */
     55   HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,		/* Lo */
     56   HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,		/* Lt */
     57   HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,		/* Lu */
     58   HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,		/* Mc */
     59   HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,		/* Me */
     60   HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,		/* Mn */
     61   HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,		/* Nd */
     62   HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,		/* Nl */
     63   HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,		/* No */
     64   HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,	/* Pc */
     65   HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,		/* Pd */
     66   HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,	/* Pe */
     67   HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,	/* Pf */
     68   HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,	/* Pi */
     69   HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,	/* Po */
     70   HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,		/* Ps */
     71   HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,		/* Sc */
     72   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,		/* Sk */
     73   HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,		/* Sm */
     74   HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,		/* So */
     75   HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,		/* Zl */
     76   HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,	/* Zp */
     77   HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR		/* Zs */
     78 } hb_unicode_general_category_t;
     79 
     80 /* hb_unicode_combining_class_t */
     81 
     82 /* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
     83  * any value in the 0..254 range being returned from hb_unicode_combining_class().
     84  */
     85 
     86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
     87 typedef enum
     88 {
     89   HB_UNICODE_COMBINING_CLASS_NOT_REORDERED	= 0,
     90   HB_UNICODE_COMBINING_CLASS_OVERLAY		= 1,
     91   HB_UNICODE_COMBINING_CLASS_NUKTA		= 7,
     92   HB_UNICODE_COMBINING_CLASS_KANA_VOICING	= 8,
     93   HB_UNICODE_COMBINING_CLASS_VIRAMA		= 9,
     94 
     95   /* Hebrew */
     96   HB_UNICODE_COMBINING_CLASS_CCC10	=  10,
     97   HB_UNICODE_COMBINING_CLASS_CCC11	=  11,
     98   HB_UNICODE_COMBINING_CLASS_CCC12	=  12,
     99   HB_UNICODE_COMBINING_CLASS_CCC13	=  13,
    100   HB_UNICODE_COMBINING_CLASS_CCC14	=  14,
    101   HB_UNICODE_COMBINING_CLASS_CCC15	=  15,
    102   HB_UNICODE_COMBINING_CLASS_CCC16	=  16,
    103   HB_UNICODE_COMBINING_CLASS_CCC17	=  17,
    104   HB_UNICODE_COMBINING_CLASS_CCC18	=  18,
    105   HB_UNICODE_COMBINING_CLASS_CCC19	=  19,
    106   HB_UNICODE_COMBINING_CLASS_CCC20	=  20,
    107   HB_UNICODE_COMBINING_CLASS_CCC21	=  21,
    108   HB_UNICODE_COMBINING_CLASS_CCC22	=  22,
    109   HB_UNICODE_COMBINING_CLASS_CCC23	=  23,
    110   HB_UNICODE_COMBINING_CLASS_CCC24	=  24,
    111   HB_UNICODE_COMBINING_CLASS_CCC25	=  25,
    112   HB_UNICODE_COMBINING_CLASS_CCC26	=  26,
    113 
    114   /* Arabic */
    115   HB_UNICODE_COMBINING_CLASS_CCC27	=  27,
    116   HB_UNICODE_COMBINING_CLASS_CCC28	=  28,
    117   HB_UNICODE_COMBINING_CLASS_CCC29	=  29,
    118   HB_UNICODE_COMBINING_CLASS_CCC30	=  30,
    119   HB_UNICODE_COMBINING_CLASS_CCC31	=  31,
    120   HB_UNICODE_COMBINING_CLASS_CCC32	=  32,
    121   HB_UNICODE_COMBINING_CLASS_CCC33	=  33,
    122   HB_UNICODE_COMBINING_CLASS_CCC34	=  34,
    123   HB_UNICODE_COMBINING_CLASS_CCC35	=  35,
    124 
    125   /* Syriac */
    126   HB_UNICODE_COMBINING_CLASS_CCC36	=  36,
    127 
    128   /* Telugu */
    129   HB_UNICODE_COMBINING_CLASS_CCC84	=  84,
    130   HB_UNICODE_COMBINING_CLASS_CCC91	=  91,
    131 
    132   /* Thai */
    133   HB_UNICODE_COMBINING_CLASS_CCC103	= 103,
    134   HB_UNICODE_COMBINING_CLASS_CCC107	= 107,
    135 
    136   /* Lao */
    137   HB_UNICODE_COMBINING_CLASS_CCC118	= 118,
    138   HB_UNICODE_COMBINING_CLASS_CCC122	= 122,
    139 
    140   /* Tibetan */
    141   HB_UNICODE_COMBINING_CLASS_CCC129	= 129,
    142   HB_UNICODE_COMBINING_CLASS_CCC130	= 130,
    143   HB_UNICODE_COMBINING_CLASS_CCC133	= 132,
    144 
    145 
    146   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT	= 200,
    147   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW		= 202,
    148   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE		= 214,
    149   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT	= 216,
    150   HB_UNICODE_COMBINING_CLASS_BELOW_LEFT			= 218,
    151   HB_UNICODE_COMBINING_CLASS_BELOW			= 220,
    152   HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT		= 222,
    153   HB_UNICODE_COMBINING_CLASS_LEFT			= 224,
    154   HB_UNICODE_COMBINING_CLASS_RIGHT			= 226,
    155   HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT			= 228,
    156   HB_UNICODE_COMBINING_CLASS_ABOVE			= 230,
    157   HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT		= 232,
    158   HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW		= 233,
    159   HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE		= 234,
    160 
    161   HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT		= 240,
    162 
    163   HB_UNICODE_COMBINING_CLASS_INVALID	= 255
    164 } hb_unicode_combining_class_t;
    165 
    166 
    167 /*
    168  * hb_unicode_funcs_t
    169  */
    170 
    171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
    172 
    173 
    174 /*
    175  * just give me the best implementation you've got there.
    176  */
    177 hb_unicode_funcs_t *
    178 hb_unicode_funcs_get_default (void);
    179 
    180 
    181 hb_unicode_funcs_t *
    182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
    183 
    184 hb_unicode_funcs_t *
    185 hb_unicode_funcs_get_empty (void);
    186 
    187 hb_unicode_funcs_t *
    188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
    189 
    190 void
    191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
    192 
    193 hb_bool_t
    194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
    195 			        hb_user_data_key_t *key,
    196 			        void *              data,
    197 			        hb_destroy_func_t   destroy,
    198 				hb_bool_t           replace);
    199 
    200 
    201 void *
    202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
    203 			        hb_user_data_key_t *key);
    204 
    205 
    206 void
    207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
    208 
    209 hb_bool_t
    210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
    211 
    212 hb_unicode_funcs_t *
    213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
    214 
    215 
    216 /*
    217  * funcs
    218  */
    219 
    220 /* typedefs */
    221 
    222 typedef hb_unicode_combining_class_t	(*hb_unicode_combining_class_func_t)	(hb_unicode_funcs_t *ufuncs,
    223 										 hb_codepoint_t      unicode,
    224 										 void               *user_data);
    225 typedef unsigned int			(*hb_unicode_eastasian_width_func_t)	(hb_unicode_funcs_t *ufuncs,
    226 										 hb_codepoint_t      unicode,
    227 										 void               *user_data);
    228 typedef hb_unicode_general_category_t	(*hb_unicode_general_category_func_t)	(hb_unicode_funcs_t *ufuncs,
    229 										 hb_codepoint_t      unicode,
    230 										 void               *user_data);
    231 typedef hb_codepoint_t			(*hb_unicode_mirroring_func_t)		(hb_unicode_funcs_t *ufuncs,
    232 										 hb_codepoint_t      unicode,
    233 										 void               *user_data);
    234 typedef hb_script_t			(*hb_unicode_script_func_t)		(hb_unicode_funcs_t *ufuncs,
    235 										 hb_codepoint_t      unicode,
    236 										 void               *user_data);
    237 
    238 typedef hb_bool_t			(*hb_unicode_compose_func_t)		(hb_unicode_funcs_t *ufuncs,
    239 										 hb_codepoint_t      a,
    240 										 hb_codepoint_t      b,
    241 										 hb_codepoint_t     *ab,
    242 										 void               *user_data);
    243 typedef hb_bool_t			(*hb_unicode_decompose_func_t)		(hb_unicode_funcs_t *ufuncs,
    244 										 hb_codepoint_t      ab,
    245 										 hb_codepoint_t     *a,
    246 										 hb_codepoint_t     *b,
    247 										 void               *user_data);
    248 
    249 /**
    250  * hb_unicode_decompose_compatibility_func_t:
    251  * @ufuncs: a Unicode function structure
    252  * @u: codepoint to decompose
    253  * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
    254  * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
    255  *
    256  * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
    257  * The complete length of the decomposition will be returned.
    258  *
    259  * If @u has no compatibility decomposition, zero should be returned.
    260  *
    261  * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
    262  * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
    263  * of this function type must ensure that they do not write past the provided array.
    264  *
    265  * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
    266  */
    267 typedef unsigned int			(*hb_unicode_decompose_compatibility_func_t)	(hb_unicode_funcs_t *ufuncs,
    268 											 hb_codepoint_t      u,
    269 											 hb_codepoint_t     *decomposed,
    270 											 void               *user_data);
    271 
    272 /* See Unicode 6.1 for details on the maximum decomposition length. */
    273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
    274 
    275 /* setters */
    276 
    277 /**
    278  * hb_unicode_funcs_set_combining_class_func:
    279  * @ufuncs: a Unicode function structure
    280  * @func: (closure user_data) (destroy destroy) (scope notified):
    281  * @user_data:
    282  * @destroy:
    283  *
    284  *
    285  *
    286  * Since: 1.0
    287  **/
    288 void
    289 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
    290 					   hb_unicode_combining_class_func_t func,
    291 					   void *user_data, hb_destroy_func_t destroy);
    292 
    293 /**
    294  * hb_unicode_funcs_set_eastasian_width_func:
    295  * @ufuncs: a Unicode function structure
    296  * @func: (closure user_data) (destroy destroy) (scope notified):
    297  * @user_data:
    298  * @destroy:
    299  *
    300  *
    301  *
    302  * Since: 1.0
    303  **/
    304 void
    305 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
    306 					   hb_unicode_eastasian_width_func_t func,
    307 					   void *user_data, hb_destroy_func_t destroy);
    308 
    309 /**
    310  * hb_unicode_funcs_set_general_category_func:
    311  * @ufuncs: a Unicode function structure
    312  * @func: (closure user_data) (destroy destroy) (scope notified):
    313  * @user_data:
    314  * @destroy:
    315  *
    316  *
    317  *
    318  * Since: 1.0
    319  **/
    320 void
    321 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
    322 					    hb_unicode_general_category_func_t func,
    323 					    void *user_data, hb_destroy_func_t destroy);
    324 
    325 /**
    326  * hb_unicode_funcs_set_mirroring_func:
    327  * @ufuncs: a Unicode function structure
    328  * @func: (closure user_data) (destroy destroy) (scope notified):
    329  * @user_data:
    330  * @destroy:
    331  *
    332  *
    333  *
    334  * Since: 1.0
    335  **/
    336 void
    337 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
    338 				     hb_unicode_mirroring_func_t func,
    339 				     void *user_data, hb_destroy_func_t destroy);
    340 
    341 /**
    342  * hb_unicode_funcs_set_script_func:
    343  * @ufuncs: a Unicode function structure
    344  * @func: (closure user_data) (destroy destroy) (scope notified):
    345  * @user_data:
    346  * @destroy:
    347  *
    348  *
    349  *
    350  * Since: 1.0
    351  **/
    352 void
    353 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
    354 				  hb_unicode_script_func_t func,
    355 				  void *user_data, hb_destroy_func_t destroy);
    356 
    357 /**
    358  * hb_unicode_funcs_set_compose_func:
    359  * @ufuncs: a Unicode function structure
    360  * @func: (closure user_data) (destroy destroy) (scope notified):
    361  * @user_data:
    362  * @destroy:
    363  *
    364  *
    365  *
    366  * Since: 1.0
    367  **/
    368 void
    369 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
    370 				   hb_unicode_compose_func_t func,
    371 				   void *user_data, hb_destroy_func_t destroy);
    372 
    373 /**
    374  * hb_unicode_funcs_set_decompose_func:
    375  * @ufuncs: a Unicode function structure
    376  * @func: (closure user_data) (destroy destroy) (scope notified):
    377  * @user_data:
    378  * @destroy:
    379  *
    380  *
    381  *
    382  * Since: 1.0
    383  **/
    384 void
    385 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
    386 				     hb_unicode_decompose_func_t func,
    387 				     void *user_data, hb_destroy_func_t destroy);
    388 
    389 /**
    390  * hb_unicode_funcs_set_decompose_compatibility_func:
    391  * @ufuncs: a Unicode function structure
    392  * @func: (closure user_data) (destroy destroy) (scope notified):
    393  * @user_data:
    394  * @destroy:
    395  *
    396  *
    397  *
    398  * Since: 1.0
    399  **/
    400 void
    401 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
    402 						   hb_unicode_decompose_compatibility_func_t func,
    403 						   void *user_data, hb_destroy_func_t destroy);
    404 
    405 /* accessors */
    406 
    407 hb_unicode_combining_class_t
    408 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
    409 			    hb_codepoint_t unicode);
    410 
    411 unsigned int
    412 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
    413 			    hb_codepoint_t unicode);
    414 
    415 hb_unicode_general_category_t
    416 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
    417 			     hb_codepoint_t unicode);
    418 
    419 hb_codepoint_t
    420 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
    421 		      hb_codepoint_t unicode);
    422 
    423 hb_script_t
    424 hb_unicode_script (hb_unicode_funcs_t *ufuncs,
    425 		   hb_codepoint_t unicode);
    426 
    427 hb_bool_t
    428 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
    429 		    hb_codepoint_t      a,
    430 		    hb_codepoint_t      b,
    431 		    hb_codepoint_t     *ab);
    432 hb_bool_t
    433 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
    434 		      hb_codepoint_t      ab,
    435 		      hb_codepoint_t     *a,
    436 		      hb_codepoint_t     *b);
    437 
    438 unsigned int
    439 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
    440 				    hb_codepoint_t      u,
    441 				    hb_codepoint_t     *decomposed);
    442 
    443 HB_END_DECLS
    444 
    445 #endif /* HB_UNICODE_H */
    446