1 /* 2 * Copyright 2009 Red Hat, Inc. 3 * Copyright 2011 Codethink Limited 4 * Copyright 2011,2012 Google, Inc. 5 * 6 * This is part of HarfBuzz, a text shaping library. 7 * 8 * Permission is hereby granted, without written agreement and without 9 * license or royalty fees, to use, copy, modify, and distribute this 10 * software and its documentation for any purpose, provided that the 11 * above copyright notice and the following two paragraphs appear in 12 * all copies of this software. 13 * 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18 * DAMAGE. 19 * 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 * 26 * Red Hat Author(s): Behdad Esfahbod 27 * Codethink Author(s): Ryan Lortie 28 * Google Author(s): Behdad Esfahbod 29 */ 30 31 #ifndef HB_H_IN 32 #error "Include <hb.h> instead." 33 #endif 34 35 #ifndef HB_UNICODE_H 36 #define HB_UNICODE_H 37 38 #include "hb-common.h" 39 40 HB_BEGIN_DECLS 41 42 43 /* hb_unicode_general_category_t */ 44 45 /* Unicode Character Database property: General_Category (gc) */ 46 typedef enum 47 { 48 HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ 49 HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ 50 HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ 51 HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ 52 HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ 53 HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ 54 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ 55 HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ 56 HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ 57 HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ 58 HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ 59 HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ 60 HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ 61 HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ 62 HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ 63 HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ 64 HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ 65 HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ 66 HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ 67 HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ 68 HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ 69 HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ 70 HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ 71 HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ 72 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ 73 HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ 74 HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ 75 HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ 76 HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ 77 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ 78 } hb_unicode_general_category_t; 79 80 /* hb_unicode_combining_class_t */ 81 82 /* Note: newer versions of Unicode may add new values. Clients should be ready to handle 83 * any value in the 0..254 range being returned from hb_unicode_combining_class(). 84 */ 85 86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */ 87 typedef enum 88 { 89 HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, 90 HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, 91 HB_UNICODE_COMBINING_CLASS_NUKTA = 7, 92 HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, 93 HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, 94 95 /* Hebrew */ 96 HB_UNICODE_COMBINING_CLASS_CCC10 = 10, 97 HB_UNICODE_COMBINING_CLASS_CCC11 = 11, 98 HB_UNICODE_COMBINING_CLASS_CCC12 = 12, 99 HB_UNICODE_COMBINING_CLASS_CCC13 = 13, 100 HB_UNICODE_COMBINING_CLASS_CCC14 = 14, 101 HB_UNICODE_COMBINING_CLASS_CCC15 = 15, 102 HB_UNICODE_COMBINING_CLASS_CCC16 = 16, 103 HB_UNICODE_COMBINING_CLASS_CCC17 = 17, 104 HB_UNICODE_COMBINING_CLASS_CCC18 = 18, 105 HB_UNICODE_COMBINING_CLASS_CCC19 = 19, 106 HB_UNICODE_COMBINING_CLASS_CCC20 = 20, 107 HB_UNICODE_COMBINING_CLASS_CCC21 = 21, 108 HB_UNICODE_COMBINING_CLASS_CCC22 = 22, 109 HB_UNICODE_COMBINING_CLASS_CCC23 = 23, 110 HB_UNICODE_COMBINING_CLASS_CCC24 = 24, 111 HB_UNICODE_COMBINING_CLASS_CCC25 = 25, 112 HB_UNICODE_COMBINING_CLASS_CCC26 = 26, 113 114 /* Arabic */ 115 HB_UNICODE_COMBINING_CLASS_CCC27 = 27, 116 HB_UNICODE_COMBINING_CLASS_CCC28 = 28, 117 HB_UNICODE_COMBINING_CLASS_CCC29 = 29, 118 HB_UNICODE_COMBINING_CLASS_CCC30 = 30, 119 HB_UNICODE_COMBINING_CLASS_CCC31 = 31, 120 HB_UNICODE_COMBINING_CLASS_CCC32 = 32, 121 HB_UNICODE_COMBINING_CLASS_CCC33 = 33, 122 HB_UNICODE_COMBINING_CLASS_CCC34 = 34, 123 HB_UNICODE_COMBINING_CLASS_CCC35 = 35, 124 125 /* Syriac */ 126 HB_UNICODE_COMBINING_CLASS_CCC36 = 36, 127 128 /* Telugu */ 129 HB_UNICODE_COMBINING_CLASS_CCC84 = 84, 130 HB_UNICODE_COMBINING_CLASS_CCC91 = 91, 131 132 /* Thai */ 133 HB_UNICODE_COMBINING_CLASS_CCC103 = 103, 134 HB_UNICODE_COMBINING_CLASS_CCC107 = 107, 135 136 /* Lao */ 137 HB_UNICODE_COMBINING_CLASS_CCC118 = 118, 138 HB_UNICODE_COMBINING_CLASS_CCC122 = 122, 139 140 /* Tibetan */ 141 HB_UNICODE_COMBINING_CLASS_CCC129 = 129, 142 HB_UNICODE_COMBINING_CLASS_CCC130 = 130, 143 HB_UNICODE_COMBINING_CLASS_CCC133 = 132, 144 145 146 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, 147 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, 148 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, 149 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, 150 HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, 151 HB_UNICODE_COMBINING_CLASS_BELOW = 220, 152 HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, 153 HB_UNICODE_COMBINING_CLASS_LEFT = 224, 154 HB_UNICODE_COMBINING_CLASS_RIGHT = 226, 155 HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, 156 HB_UNICODE_COMBINING_CLASS_ABOVE = 230, 157 HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, 158 HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, 159 HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, 160 161 HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, 162 163 HB_UNICODE_COMBINING_CLASS_INVALID = 255 164 } hb_unicode_combining_class_t; 165 166 167 /* 168 * hb_unicode_funcs_t 169 */ 170 171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; 172 173 174 /* 175 * just give me the best implementation you've got there. 176 */ 177 hb_unicode_funcs_t * 178 hb_unicode_funcs_get_default (void); 179 180 181 hb_unicode_funcs_t * 182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent); 183 184 hb_unicode_funcs_t * 185 hb_unicode_funcs_get_empty (void); 186 187 hb_unicode_funcs_t * 188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); 189 190 void 191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); 192 193 hb_bool_t 194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, 195 hb_user_data_key_t *key, 196 void * data, 197 hb_destroy_func_t destroy, 198 hb_bool_t replace); 199 200 201 void * 202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, 203 hb_user_data_key_t *key); 204 205 206 void 207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); 208 209 hb_bool_t 210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); 211 212 hb_unicode_funcs_t * 213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); 214 215 216 /* 217 * funcs 218 */ 219 220 /* typedefs */ 221 222 typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, 223 hb_codepoint_t unicode, 224 void *user_data); 225 typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, 226 hb_codepoint_t unicode, 227 void *user_data); 228 typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, 229 hb_codepoint_t unicode, 230 void *user_data); 231 typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, 232 hb_codepoint_t unicode, 233 void *user_data); 234 typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, 235 hb_codepoint_t unicode, 236 void *user_data); 237 238 typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, 239 hb_codepoint_t a, 240 hb_codepoint_t b, 241 hb_codepoint_t *ab, 242 void *user_data); 243 typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, 244 hb_codepoint_t ab, 245 hb_codepoint_t *a, 246 hb_codepoint_t *b, 247 void *user_data); 248 249 /** 250 * hb_unicode_decompose_compatibility_func_t: 251 * @ufuncs: Unicode function structure 252 * @u: codepoint to decompose 253 * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into 254 * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() 255 * 256 * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. 257 * The complete length of the decomposition will be returned. 258 * 259 * If @u has no compatibility decomposition, zero should be returned. 260 * 261 * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any 262 * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations 263 * of this function type must ensure that they do not write past the provided array. 264 * 265 * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. 266 */ 267 typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, 268 hb_codepoint_t u, 269 hb_codepoint_t *decomposed, 270 void *user_data); 271 272 /* See Unicode 6.1 for details on the maximum decomposition length. */ 273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ 274 275 /* setters */ 276 277 void 278 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, 279 hb_unicode_combining_class_func_t combining_class_func, 280 void *user_data, hb_destroy_func_t destroy); 281 282 void 283 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, 284 hb_unicode_eastasian_width_func_t eastasian_width_func, 285 void *user_data, hb_destroy_func_t destroy); 286 287 void 288 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, 289 hb_unicode_general_category_func_t general_category_func, 290 void *user_data, hb_destroy_func_t destroy); 291 292 void 293 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, 294 hb_unicode_mirroring_func_t mirroring_func, 295 void *user_data, hb_destroy_func_t destroy); 296 297 void 298 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, 299 hb_unicode_script_func_t script_func, 300 void *user_data, hb_destroy_func_t destroy); 301 302 void 303 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, 304 hb_unicode_compose_func_t compose_func, 305 void *user_data, hb_destroy_func_t destroy); 306 307 void 308 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, 309 hb_unicode_decompose_func_t decompose_func, 310 void *user_data, hb_destroy_func_t destroy); 311 312 void 313 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, 314 hb_unicode_decompose_compatibility_func_t decompose_compatibility_func, 315 void *user_data, hb_destroy_func_t destroy); 316 317 /* accessors */ 318 319 hb_unicode_combining_class_t 320 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, 321 hb_codepoint_t unicode); 322 323 unsigned int 324 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, 325 hb_codepoint_t unicode); 326 327 hb_unicode_general_category_t 328 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, 329 hb_codepoint_t unicode); 330 331 hb_codepoint_t 332 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, 333 hb_codepoint_t unicode); 334 335 hb_script_t 336 hb_unicode_script (hb_unicode_funcs_t *ufuncs, 337 hb_codepoint_t unicode); 338 339 hb_bool_t 340 hb_unicode_compose (hb_unicode_funcs_t *ufuncs, 341 hb_codepoint_t a, 342 hb_codepoint_t b, 343 hb_codepoint_t *ab); 344 hb_bool_t 345 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, 346 hb_codepoint_t ab, 347 hb_codepoint_t *a, 348 hb_codepoint_t *b); 349 350 unsigned int 351 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, 352 hb_codepoint_t u, 353 hb_codepoint_t *decomposed); 354 355 HB_END_DECLS 356 357 #endif /* HB_UNICODE_H */ 358