1 /* 2 * Copyright 2009 Red Hat, Inc. 3 * Copyright 2011 Codethink Limited 4 * Copyright 2010,2011,2012 Google, Inc. 5 * 6 * This is part of HarfBuzz, a text shaping library. 7 * 8 * Permission is hereby granted, without written agreement and without 9 * license or royalty fees, to use, copy, modify, and distribute this 10 * software and its documentation for any purpose, provided that the 11 * above copyright notice and the following two paragraphs appear in 12 * all copies of this software. 13 * 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18 * DAMAGE. 19 * 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 * 26 * Red Hat Author(s): Behdad Esfahbod 27 * Codethink Author(s): Ryan Lortie 28 * Google Author(s): Behdad Esfahbod 29 */ 30 31 #include "hb.hh" 32 33 #include "hb-unicode.hh" 34 35 36 /** 37 * SECTION: hb-unicode 38 * @title: hb-unicode 39 * @short_description: Unicode character property access 40 * @include: hb.h 41 * 42 * Unicode functions are used to access Unicode character properties. 43 * Client can pass its own Unicode functions to HarfBuzz, or access 44 * the built-in Unicode functions that come with HarfBuzz. 45 * 46 * With the Unicode functions, one can query variour Unicode character 47 * properties, such as General Category, Script, Combining Class, etc. 48 **/ 49 50 51 /* 52 * hb_unicode_funcs_t 53 */ 54 55 static hb_unicode_combining_class_t 56 hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 57 hb_codepoint_t unicode HB_UNUSED, 58 void *user_data HB_UNUSED) 59 { 60 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED; 61 } 62 63 static unsigned int 64 hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 65 hb_codepoint_t unicode HB_UNUSED, 66 void *user_data HB_UNUSED) 67 { 68 return 1; 69 } 70 71 static hb_unicode_general_category_t 72 hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 73 hb_codepoint_t unicode HB_UNUSED, 74 void *user_data HB_UNUSED) 75 { 76 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; 77 } 78 79 static hb_codepoint_t 80 hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 81 hb_codepoint_t unicode, 82 void *user_data HB_UNUSED) 83 { 84 return unicode; 85 } 86 87 static hb_script_t 88 hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 89 hb_codepoint_t unicode HB_UNUSED, 90 void *user_data HB_UNUSED) 91 { 92 return HB_SCRIPT_UNKNOWN; 93 } 94 95 static hb_bool_t 96 hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 97 hb_codepoint_t a HB_UNUSED, 98 hb_codepoint_t b HB_UNUSED, 99 hb_codepoint_t *ab HB_UNUSED, 100 void *user_data HB_UNUSED) 101 { 102 return false; 103 } 104 105 static hb_bool_t 106 hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 107 hb_codepoint_t ab HB_UNUSED, 108 hb_codepoint_t *a HB_UNUSED, 109 hb_codepoint_t *b HB_UNUSED, 110 void *user_data HB_UNUSED) 111 { 112 return false; 113 } 114 115 116 static unsigned int 117 hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, 118 hb_codepoint_t u HB_UNUSED, 119 hb_codepoint_t *decomposed HB_UNUSED, 120 void *user_data HB_UNUSED) 121 { 122 return 0; 123 } 124 125 126 extern "C" hb_unicode_funcs_t *hb_glib_get_unicode_funcs (); 127 extern "C" hb_unicode_funcs_t *hb_icu_get_unicode_funcs (); 128 extern "C" hb_unicode_funcs_t *hb_ucdn_get_unicode_funcs (); 129 130 hb_unicode_funcs_t * 131 hb_unicode_funcs_get_default () 132 { 133 #if defined(HAVE_UCDN) 134 return hb_ucdn_get_unicode_funcs (); 135 #elif defined(HAVE_GLIB) 136 return hb_glib_get_unicode_funcs (); 137 #elif defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN) 138 return hb_icu_get_unicode_funcs (); 139 #else 140 #define HB_UNICODE_FUNCS_NIL 1 141 return hb_unicode_funcs_get_empty (); 142 #endif 143 } 144 145 #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL) 146 #error "Could not find any Unicode functions implementation, you have to provide your own" 147 #error "Consider building hb-ucdn.c. If you absolutely want to build without any, check the code." 148 #endif 149 150 /** 151 * hb_unicode_funcs_create: (Xconstructor) 152 * @parent: (nullable): 153 * 154 * 155 * 156 * Return value: (transfer full): 157 * 158 * Since: 0.9.2 159 **/ 160 hb_unicode_funcs_t * 161 hb_unicode_funcs_create (hb_unicode_funcs_t *parent) 162 { 163 hb_unicode_funcs_t *ufuncs; 164 165 if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ())) 166 return hb_unicode_funcs_get_empty (); 167 168 if (!parent) 169 parent = hb_unicode_funcs_get_empty (); 170 171 hb_unicode_funcs_make_immutable (parent); 172 ufuncs->parent = hb_unicode_funcs_reference (parent); 173 174 ufuncs->func = parent->func; 175 176 /* We can safely copy user_data from parent since we hold a reference 177 * onto it and it's immutable. We should not copy the destroy notifiers 178 * though. */ 179 ufuncs->user_data = parent->user_data; 180 181 return ufuncs; 182 } 183 184 185 DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) = 186 { 187 HB_OBJECT_HEADER_STATIC, 188 189 nullptr, /* parent */ 190 { 191 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil, 192 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 193 #undef HB_UNICODE_FUNC_IMPLEMENT 194 } 195 }; 196 197 /** 198 * hb_unicode_funcs_get_empty: 199 * 200 * 201 * 202 * Return value: (transfer full): 203 * 204 * Since: 0.9.2 205 **/ 206 hb_unicode_funcs_t * 207 hb_unicode_funcs_get_empty () 208 { 209 return const_cast<hb_unicode_funcs_t *> (&Null(hb_unicode_funcs_t)); 210 } 211 212 /** 213 * hb_unicode_funcs_reference: (skip) 214 * @ufuncs: Unicode functions. 215 * 216 * 217 * 218 * Return value: (transfer full): 219 * 220 * Since: 0.9.2 221 **/ 222 hb_unicode_funcs_t * 223 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs) 224 { 225 return hb_object_reference (ufuncs); 226 } 227 228 /** 229 * hb_unicode_funcs_destroy: (skip) 230 * @ufuncs: Unicode functions. 231 * 232 * 233 * 234 * Since: 0.9.2 235 **/ 236 void 237 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs) 238 { 239 if (!hb_object_destroy (ufuncs)) return; 240 241 #define HB_UNICODE_FUNC_IMPLEMENT(name) \ 242 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name); 243 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 244 #undef HB_UNICODE_FUNC_IMPLEMENT 245 246 hb_unicode_funcs_destroy (ufuncs->parent); 247 248 free (ufuncs); 249 } 250 251 /** 252 * hb_unicode_funcs_set_user_data: (skip) 253 * @ufuncs: Unicode functions. 254 * @key: 255 * @data: 256 * @destroy: 257 * @replace: 258 * 259 * 260 * 261 * Return value: 262 * 263 * Since: 0.9.2 264 **/ 265 hb_bool_t 266 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, 267 hb_user_data_key_t *key, 268 void * data, 269 hb_destroy_func_t destroy, 270 hb_bool_t replace) 271 { 272 return hb_object_set_user_data (ufuncs, key, data, destroy, replace); 273 } 274 275 /** 276 * hb_unicode_funcs_get_user_data: (skip) 277 * @ufuncs: Unicode functions. 278 * @key: 279 * 280 * 281 * 282 * Return value: (transfer none): 283 * 284 * Since: 0.9.2 285 **/ 286 void * 287 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, 288 hb_user_data_key_t *key) 289 { 290 return hb_object_get_user_data (ufuncs, key); 291 } 292 293 294 /** 295 * hb_unicode_funcs_make_immutable: 296 * @ufuncs: Unicode functions. 297 * 298 * 299 * 300 * Since: 0.9.2 301 **/ 302 void 303 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs) 304 { 305 if (hb_object_is_immutable (ufuncs)) 306 return; 307 308 hb_object_make_immutable (ufuncs); 309 } 310 311 /** 312 * hb_unicode_funcs_is_immutable: 313 * @ufuncs: Unicode functions. 314 * 315 * 316 * 317 * Return value: 318 * 319 * Since: 0.9.2 320 **/ 321 hb_bool_t 322 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs) 323 { 324 return hb_object_is_immutable (ufuncs); 325 } 326 327 /** 328 * hb_unicode_funcs_get_parent: 329 * @ufuncs: Unicode functions. 330 * 331 * 332 * 333 * Return value: 334 * 335 * Since: 0.9.2 336 **/ 337 hb_unicode_funcs_t * 338 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs) 339 { 340 return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty (); 341 } 342 343 344 #define HB_UNICODE_FUNC_IMPLEMENT(name) \ 345 \ 346 void \ 347 hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ 348 hb_unicode_##name##_func_t func, \ 349 void *user_data, \ 350 hb_destroy_func_t destroy) \ 351 { \ 352 if (hb_object_is_immutable (ufuncs)) \ 353 return; \ 354 \ 355 if (ufuncs->destroy.name) \ 356 ufuncs->destroy.name (ufuncs->user_data.name); \ 357 \ 358 if (func) { \ 359 ufuncs->func.name = func; \ 360 ufuncs->user_data.name = user_data; \ 361 ufuncs->destroy.name = destroy; \ 362 } else { \ 363 ufuncs->func.name = ufuncs->parent->func.name; \ 364 ufuncs->user_data.name = ufuncs->parent->user_data.name; \ 365 ufuncs->destroy.name = nullptr; \ 366 } \ 367 } 368 369 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 370 #undef HB_UNICODE_FUNC_IMPLEMENT 371 372 373 #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ 374 \ 375 return_type \ 376 hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \ 377 hb_codepoint_t unicode) \ 378 { \ 379 return ufuncs->name (unicode); \ 380 } 381 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE 382 #undef HB_UNICODE_FUNC_IMPLEMENT 383 384 /** 385 * hb_unicode_compose: 386 * @ufuncs: Unicode functions. 387 * @a: 388 * @b: 389 * @ab: (out): 390 * 391 * 392 * 393 * Return value: 394 * 395 * Since: 0.9.2 396 **/ 397 hb_bool_t 398 hb_unicode_compose (hb_unicode_funcs_t *ufuncs, 399 hb_codepoint_t a, 400 hb_codepoint_t b, 401 hb_codepoint_t *ab) 402 { 403 return ufuncs->compose (a, b, ab); 404 } 405 406 /** 407 * hb_unicode_decompose: 408 * @ufuncs: Unicode functions. 409 * @ab: 410 * @a: (out): 411 * @b: (out): 412 * 413 * 414 * 415 * Return value: 416 * 417 * Since: 0.9.2 418 **/ 419 hb_bool_t 420 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, 421 hb_codepoint_t ab, 422 hb_codepoint_t *a, 423 hb_codepoint_t *b) 424 { 425 return ufuncs->decompose (ab, a, b); 426 } 427 428 /** 429 * hb_unicode_decompose_compatibility: 430 * @ufuncs: Unicode functions. 431 * @u: 432 * @decomposed: (out): 433 * 434 * 435 * 436 * Return value: 437 * 438 * Since: 0.9.2 439 * Deprecated: 2.0.0 440 **/ 441 unsigned int 442 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, 443 hb_codepoint_t u, 444 hb_codepoint_t *decomposed) 445 { 446 return ufuncs->decompose_compatibility (u, decomposed); 447 } 448 449 450 /* See hb-unicode.hh for details. */ 451 const uint8_t 452 _hb_modified_combining_class[256] = 453 { 454 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */ 455 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */ 456 2, 3, 4, 5, 6, 457 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */ 458 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */ 459 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */ 460 461 /* Hebrew */ 462 HB_MODIFIED_COMBINING_CLASS_CCC10, 463 HB_MODIFIED_COMBINING_CLASS_CCC11, 464 HB_MODIFIED_COMBINING_CLASS_CCC12, 465 HB_MODIFIED_COMBINING_CLASS_CCC13, 466 HB_MODIFIED_COMBINING_CLASS_CCC14, 467 HB_MODIFIED_COMBINING_CLASS_CCC15, 468 HB_MODIFIED_COMBINING_CLASS_CCC16, 469 HB_MODIFIED_COMBINING_CLASS_CCC17, 470 HB_MODIFIED_COMBINING_CLASS_CCC18, 471 HB_MODIFIED_COMBINING_CLASS_CCC19, 472 HB_MODIFIED_COMBINING_CLASS_CCC20, 473 HB_MODIFIED_COMBINING_CLASS_CCC21, 474 HB_MODIFIED_COMBINING_CLASS_CCC22, 475 HB_MODIFIED_COMBINING_CLASS_CCC23, 476 HB_MODIFIED_COMBINING_CLASS_CCC24, 477 HB_MODIFIED_COMBINING_CLASS_CCC25, 478 HB_MODIFIED_COMBINING_CLASS_CCC26, 479 480 /* Arabic */ 481 HB_MODIFIED_COMBINING_CLASS_CCC27, 482 HB_MODIFIED_COMBINING_CLASS_CCC28, 483 HB_MODIFIED_COMBINING_CLASS_CCC29, 484 HB_MODIFIED_COMBINING_CLASS_CCC30, 485 HB_MODIFIED_COMBINING_CLASS_CCC31, 486 HB_MODIFIED_COMBINING_CLASS_CCC32, 487 HB_MODIFIED_COMBINING_CLASS_CCC33, 488 HB_MODIFIED_COMBINING_CLASS_CCC34, 489 HB_MODIFIED_COMBINING_CLASS_CCC35, 490 491 /* Syriac */ 492 HB_MODIFIED_COMBINING_CLASS_CCC36, 493 494 37, 38, 39, 495 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 496 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 497 80, 81, 82, 83, 498 499 /* Telugu */ 500 HB_MODIFIED_COMBINING_CLASS_CCC84, 501 85, 86, 87, 88, 89, 90, 502 HB_MODIFIED_COMBINING_CLASS_CCC91, 503 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 504 505 /* Thai */ 506 HB_MODIFIED_COMBINING_CLASS_CCC103, 507 104, 105, 106, 508 HB_MODIFIED_COMBINING_CLASS_CCC107, 509 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 510 511 /* Lao */ 512 HB_MODIFIED_COMBINING_CLASS_CCC118, 513 119, 120, 121, 514 HB_MODIFIED_COMBINING_CLASS_CCC122, 515 123, 124, 125, 126, 127, 128, 516 517 /* Tibetan */ 518 HB_MODIFIED_COMBINING_CLASS_CCC129, 519 HB_MODIFIED_COMBINING_CLASS_CCC130, 520 131, 521 HB_MODIFIED_COMBINING_CLASS_CCC132, 522 133, 134, 135, 136, 137, 138, 139, 523 524 525 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 526 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 527 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 528 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 529 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 530 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 531 532 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */ 533 201, 534 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */ 535 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 536 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */ 537 215, 538 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */ 539 217, 540 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */ 541 219, 542 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */ 543 221, 544 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */ 545 223, 546 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */ 547 225, 548 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */ 549 227, 550 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */ 551 229, 552 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */ 553 231, 554 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */ 555 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */ 556 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */ 557 235, 236, 237, 238, 239, 558 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */ 559 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 560 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */ 561 }; 562 563 564 /* 565 * Emoji 566 */ 567 568 #include "hb-unicode-emoji-table.hh" 569 570 bool 571 _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp) 572 { 573 return hb_bsearch (&cp, _hb_unicode_emoji_Extended_Pictographic_table, 574 ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table), 575 sizeof (hb_unicode_range_t), 576 hb_unicode_range_t::cmp); 577 } 578