1 /* 2 * Copyright 2009 Red Hat, Inc. 3 * Copyright 2011 Codethink Limited 4 * Copyright 2011,2012 Google, Inc. 5 * 6 * This is part of HarfBuzz, a text shaping library. 7 * 8 * Permission is hereby granted, without written agreement and without 9 * license or royalty fees, to use, copy, modify, and distribute this 10 * software and its documentation for any purpose, provided that the 11 * above copyright notice and the following two paragraphs appear in 12 * all copies of this software. 13 * 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18 * DAMAGE. 19 * 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 * 26 * Red Hat Author(s): Behdad Esfahbod 27 * Codethink Author(s): Ryan Lortie 28 * Google Author(s): Behdad Esfahbod 29 */ 30 31 #ifndef HB_H_IN 32 #error "Include <hb.h> instead." 33 #endif 34 35 #ifndef HB_UNICODE_H 36 #define HB_UNICODE_H 37 38 #include "hb-common.h" 39 40 HB_BEGIN_DECLS 41 42 43 /* hb_unicode_general_category_t */ 44 45 /* Unicode Character Database property: General_Category (gc) */ 46 typedef enum 47 { 48 HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ 49 HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ 50 HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ 51 HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ 52 HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ 53 HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ 54 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ 55 HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ 56 HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ 57 HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ 58 HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ 59 HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ 60 HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ 61 HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ 62 HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ 63 HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ 64 HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ 65 HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ 66 HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ 67 HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ 68 HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ 69 HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ 70 HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ 71 HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ 72 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ 73 HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ 74 HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ 75 HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ 76 HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ 77 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ 78 } hb_unicode_general_category_t; 79 80 /* hb_unicode_combining_class_t */ 81 82 /* Note: newer versions of Unicode may add new values. Clients should be ready to handle 83 * any value in the 0..254 range being returned from hb_unicode_combining_class(). 84 */ 85 86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */ 87 typedef enum 88 { 89 HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, 90 HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, 91 HB_UNICODE_COMBINING_CLASS_NUKTA = 7, 92 HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, 93 HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, 94 95 /* Hebrew */ 96 HB_UNICODE_COMBINING_CLASS_CCC10 = 10, 97 HB_UNICODE_COMBINING_CLASS_CCC11 = 11, 98 HB_UNICODE_COMBINING_CLASS_CCC12 = 12, 99 HB_UNICODE_COMBINING_CLASS_CCC13 = 13, 100 HB_UNICODE_COMBINING_CLASS_CCC14 = 14, 101 HB_UNICODE_COMBINING_CLASS_CCC15 = 15, 102 HB_UNICODE_COMBINING_CLASS_CCC16 = 16, 103 HB_UNICODE_COMBINING_CLASS_CCC17 = 17, 104 HB_UNICODE_COMBINING_CLASS_CCC18 = 18, 105 HB_UNICODE_COMBINING_CLASS_CCC19 = 19, 106 HB_UNICODE_COMBINING_CLASS_CCC20 = 20, 107 HB_UNICODE_COMBINING_CLASS_CCC21 = 21, 108 HB_UNICODE_COMBINING_CLASS_CCC22 = 22, 109 HB_UNICODE_COMBINING_CLASS_CCC23 = 23, 110 HB_UNICODE_COMBINING_CLASS_CCC24 = 24, 111 HB_UNICODE_COMBINING_CLASS_CCC25 = 25, 112 HB_UNICODE_COMBINING_CLASS_CCC26 = 26, 113 114 /* Arabic */ 115 HB_UNICODE_COMBINING_CLASS_CCC27 = 27, 116 HB_UNICODE_COMBINING_CLASS_CCC28 = 28, 117 HB_UNICODE_COMBINING_CLASS_CCC29 = 29, 118 HB_UNICODE_COMBINING_CLASS_CCC30 = 30, 119 HB_UNICODE_COMBINING_CLASS_CCC31 = 31, 120 HB_UNICODE_COMBINING_CLASS_CCC32 = 32, 121 HB_UNICODE_COMBINING_CLASS_CCC33 = 33, 122 HB_UNICODE_COMBINING_CLASS_CCC34 = 34, 123 HB_UNICODE_COMBINING_CLASS_CCC35 = 35, 124 125 /* Syriac */ 126 HB_UNICODE_COMBINING_CLASS_CCC36 = 36, 127 128 /* Telugu */ 129 HB_UNICODE_COMBINING_CLASS_CCC84 = 84, 130 HB_UNICODE_COMBINING_CLASS_CCC91 = 91, 131 132 /* Thai */ 133 HB_UNICODE_COMBINING_CLASS_CCC103 = 103, 134 HB_UNICODE_COMBINING_CLASS_CCC107 = 107, 135 136 /* Lao */ 137 HB_UNICODE_COMBINING_CLASS_CCC118 = 118, 138 HB_UNICODE_COMBINING_CLASS_CCC122 = 122, 139 140 /* Tibetan */ 141 HB_UNICODE_COMBINING_CLASS_CCC129 = 129, 142 HB_UNICODE_COMBINING_CLASS_CCC130 = 130, 143 HB_UNICODE_COMBINING_CLASS_CCC133 = 132, 144 145 146 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, 147 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, 148 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, 149 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, 150 HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, 151 HB_UNICODE_COMBINING_CLASS_BELOW = 220, 152 HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, 153 HB_UNICODE_COMBINING_CLASS_LEFT = 224, 154 HB_UNICODE_COMBINING_CLASS_RIGHT = 226, 155 HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, 156 HB_UNICODE_COMBINING_CLASS_ABOVE = 230, 157 HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, 158 HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, 159 HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, 160 161 HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, 162 163 HB_UNICODE_COMBINING_CLASS_INVALID = 255 164 } hb_unicode_combining_class_t; 165 166 167 /* 168 * hb_unicode_funcs_t 169 */ 170 171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; 172 173 174 /* 175 * just give me the best implementation you've got there. 176 */ 177 hb_unicode_funcs_t * 178 hb_unicode_funcs_get_default (void); 179 180 181 hb_unicode_funcs_t * 182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent); 183 184 hb_unicode_funcs_t * 185 hb_unicode_funcs_get_empty (void); 186 187 hb_unicode_funcs_t * 188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); 189 190 void 191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); 192 193 hb_bool_t 194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, 195 hb_user_data_key_t *key, 196 void * data, 197 hb_destroy_func_t destroy, 198 hb_bool_t replace); 199 200 201 void * 202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, 203 hb_user_data_key_t *key); 204 205 206 void 207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); 208 209 hb_bool_t 210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); 211 212 hb_unicode_funcs_t * 213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); 214 215 216 /* 217 * funcs 218 */ 219 220 /* typedefs */ 221 222 typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, 223 hb_codepoint_t unicode, 224 void *user_data); 225 typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, 226 hb_codepoint_t unicode, 227 void *user_data); 228 typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, 229 hb_codepoint_t unicode, 230 void *user_data); 231 typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, 232 hb_codepoint_t unicode, 233 void *user_data); 234 typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, 235 hb_codepoint_t unicode, 236 void *user_data); 237 238 typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, 239 hb_codepoint_t a, 240 hb_codepoint_t b, 241 hb_codepoint_t *ab, 242 void *user_data); 243 typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, 244 hb_codepoint_t ab, 245 hb_codepoint_t *a, 246 hb_codepoint_t *b, 247 void *user_data); 248 249 /** 250 * hb_unicode_decompose_compatibility_func_t: 251 * @ufuncs: a Unicode function structure 252 * @u: codepoint to decompose 253 * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into 254 * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() 255 * 256 * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. 257 * The complete length of the decomposition will be returned. 258 * 259 * If @u has no compatibility decomposition, zero should be returned. 260 * 261 * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any 262 * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations 263 * of this function type must ensure that they do not write past the provided array. 264 * 265 * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. 266 */ 267 typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, 268 hb_codepoint_t u, 269 hb_codepoint_t *decomposed, 270 void *user_data); 271 272 /* See Unicode 6.1 for details on the maximum decomposition length. */ 273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ 274 275 /* setters */ 276 277 /** 278 * hb_unicode_funcs_set_combining_class_func: 279 * @ufuncs: a Unicode function structure 280 * @func: (closure user_data) (destroy destroy) (scope notified): 281 * @user_data: 282 * @destroy: 283 * 284 * 285 * 286 * Since: 1.0 287 **/ 288 void 289 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, 290 hb_unicode_combining_class_func_t func, 291 void *user_data, hb_destroy_func_t destroy); 292 293 /** 294 * hb_unicode_funcs_set_eastasian_width_func: 295 * @ufuncs: a Unicode function structure 296 * @func: (closure user_data) (destroy destroy) (scope notified): 297 * @user_data: 298 * @destroy: 299 * 300 * 301 * 302 * Since: 1.0 303 **/ 304 void 305 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, 306 hb_unicode_eastasian_width_func_t func, 307 void *user_data, hb_destroy_func_t destroy); 308 309 /** 310 * hb_unicode_funcs_set_general_category_func: 311 * @ufuncs: a Unicode function structure 312 * @func: (closure user_data) (destroy destroy) (scope notified): 313 * @user_data: 314 * @destroy: 315 * 316 * 317 * 318 * Since: 1.0 319 **/ 320 void 321 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, 322 hb_unicode_general_category_func_t func, 323 void *user_data, hb_destroy_func_t destroy); 324 325 /** 326 * hb_unicode_funcs_set_mirroring_func: 327 * @ufuncs: a Unicode function structure 328 * @func: (closure user_data) (destroy destroy) (scope notified): 329 * @user_data: 330 * @destroy: 331 * 332 * 333 * 334 * Since: 1.0 335 **/ 336 void 337 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, 338 hb_unicode_mirroring_func_t func, 339 void *user_data, hb_destroy_func_t destroy); 340 341 /** 342 * hb_unicode_funcs_set_script_func: 343 * @ufuncs: a Unicode function structure 344 * @func: (closure user_data) (destroy destroy) (scope notified): 345 * @user_data: 346 * @destroy: 347 * 348 * 349 * 350 * Since: 1.0 351 **/ 352 void 353 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, 354 hb_unicode_script_func_t func, 355 void *user_data, hb_destroy_func_t destroy); 356 357 /** 358 * hb_unicode_funcs_set_compose_func: 359 * @ufuncs: a Unicode function structure 360 * @func: (closure user_data) (destroy destroy) (scope notified): 361 * @user_data: 362 * @destroy: 363 * 364 * 365 * 366 * Since: 1.0 367 **/ 368 void 369 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, 370 hb_unicode_compose_func_t func, 371 void *user_data, hb_destroy_func_t destroy); 372 373 /** 374 * hb_unicode_funcs_set_decompose_func: 375 * @ufuncs: a Unicode function structure 376 * @func: (closure user_data) (destroy destroy) (scope notified): 377 * @user_data: 378 * @destroy: 379 * 380 * 381 * 382 * Since: 1.0 383 **/ 384 void 385 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, 386 hb_unicode_decompose_func_t func, 387 void *user_data, hb_destroy_func_t destroy); 388 389 /** 390 * hb_unicode_funcs_set_decompose_compatibility_func: 391 * @ufuncs: a Unicode function structure 392 * @func: (closure user_data) (destroy destroy) (scope notified): 393 * @user_data: 394 * @destroy: 395 * 396 * 397 * 398 * Since: 1.0 399 **/ 400 void 401 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, 402 hb_unicode_decompose_compatibility_func_t func, 403 void *user_data, hb_destroy_func_t destroy); 404 405 /* accessors */ 406 407 hb_unicode_combining_class_t 408 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, 409 hb_codepoint_t unicode); 410 411 unsigned int 412 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, 413 hb_codepoint_t unicode); 414 415 hb_unicode_general_category_t 416 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, 417 hb_codepoint_t unicode); 418 419 hb_codepoint_t 420 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, 421 hb_codepoint_t unicode); 422 423 hb_script_t 424 hb_unicode_script (hb_unicode_funcs_t *ufuncs, 425 hb_codepoint_t unicode); 426 427 hb_bool_t 428 hb_unicode_compose (hb_unicode_funcs_t *ufuncs, 429 hb_codepoint_t a, 430 hb_codepoint_t b, 431 hb_codepoint_t *ab); 432 hb_bool_t 433 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, 434 hb_codepoint_t ab, 435 hb_codepoint_t *a, 436 hb_codepoint_t *b); 437 438 unsigned int 439 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, 440 hb_codepoint_t u, 441 hb_codepoint_t *decomposed); 442 443 HB_END_DECLS 444 445 #endif /* HB_UNICODE_H */ 446