1 /* 2 * Copyright 2009,2010 Red Hat, Inc. 3 * Copyright 2011,2012 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29 #include "hb-private.hh" 30 31 #include "hb-mutex-private.hh" 32 #include "hb-object-private.hh" 33 34 #include <locale.h> 35 36 37 /* hb_options_t */ 38 39 hb_options_union_t _hb_options; 40 41 void 42 _hb_options_init (void) 43 { 44 hb_options_union_t u; 45 u.i = 0; 46 u.opts.initialized = 1; 47 48 char *c = getenv ("HB_OPTIONS"); 49 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); 50 51 /* This is idempotent and threadsafe. */ 52 _hb_options = u; 53 } 54 55 56 /* hb_tag_t */ 57 58 /** 59 * hb_tag_from_string: 60 * @str: (array length=len) (element-type uint8_t): 61 * @len: 62 * 63 * 64 * 65 * Return value: 66 * 67 * Since: 0.9.2 68 **/ 69 hb_tag_t 70 hb_tag_from_string (const char *str, int len) 71 { 72 char tag[4]; 73 unsigned int i; 74 75 if (!str || !len || !*str) 76 return HB_TAG_NONE; 77 78 if (len < 0 || len > 4) 79 len = 4; 80 for (i = 0; i < (unsigned) len && str[i]; i++) 81 tag[i] = str[i]; 82 for (; i < 4; i++) 83 tag[i] = ' '; 84 85 return HB_TAG_CHAR4 (tag); 86 } 87 88 /** 89 * hb_tag_to_string: 90 * @tag: 91 * @buf: (array fixed-size=4): 92 * 93 * 94 * 95 * Since: 0.9.5 96 **/ 97 void 98 hb_tag_to_string (hb_tag_t tag, char *buf) 99 { 100 buf[0] = (char) (uint8_t) (tag >> 24); 101 buf[1] = (char) (uint8_t) (tag >> 16); 102 buf[2] = (char) (uint8_t) (tag >> 8); 103 buf[3] = (char) (uint8_t) (tag >> 0); 104 } 105 106 107 /* hb_direction_t */ 108 109 const char direction_strings[][4] = { 110 "ltr", 111 "rtl", 112 "ttb", 113 "btt" 114 }; 115 116 /** 117 * hb_direction_from_string: 118 * @str: (array length=len) (element-type uint8_t): 119 * @len: 120 * 121 * 122 * 123 * Return value: 124 * 125 * Since: 0.9.2 126 **/ 127 hb_direction_t 128 hb_direction_from_string (const char *str, int len) 129 { 130 if (unlikely (!str || !len || !*str)) 131 return HB_DIRECTION_INVALID; 132 133 /* Lets match loosely: just match the first letter, such that 134 * all of "ltr", "left-to-right", etc work! 135 */ 136 char c = TOLOWER (str[0]); 137 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++) 138 if (c == direction_strings[i][0]) 139 return (hb_direction_t) (HB_DIRECTION_LTR + i); 140 141 return HB_DIRECTION_INVALID; 142 } 143 144 /** 145 * hb_direction_to_string: 146 * @direction: 147 * 148 * 149 * 150 * Return value: (transfer none): 151 * 152 * Since: 0.9.2 153 **/ 154 const char * 155 hb_direction_to_string (hb_direction_t direction) 156 { 157 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR) 158 < ARRAY_LENGTH (direction_strings))) 159 return direction_strings[direction - HB_DIRECTION_LTR]; 160 161 return "invalid"; 162 } 163 164 165 /* hb_language_t */ 166 167 struct hb_language_impl_t { 168 const char s[1]; 169 }; 170 171 static const char canon_map[256] = { 172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, 175 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 176 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 177 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', 178 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 179 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 180 }; 181 182 static bool 183 lang_equal (hb_language_t v1, 184 const void *v2) 185 { 186 const unsigned char *p1 = (const unsigned char *) v1; 187 const unsigned char *p2 = (const unsigned char *) v2; 188 189 while (*p1 && *p1 == canon_map[*p2]) 190 p1++, p2++; 191 192 return *p1 == canon_map[*p2]; 193 } 194 195 #if 0 196 static unsigned int 197 lang_hash (const void *key) 198 { 199 const unsigned char *p = key; 200 unsigned int h = 0; 201 while (canon_map[*p]) 202 { 203 h = (h << 5) - h + canon_map[*p]; 204 p++; 205 } 206 207 return h; 208 } 209 #endif 210 211 212 struct hb_language_item_t { 213 214 struct hb_language_item_t *next; 215 hb_language_t lang; 216 217 inline bool operator == (const char *s) const { 218 return lang_equal (lang, s); 219 } 220 221 inline hb_language_item_t & operator = (const char *s) { 222 lang = (hb_language_t) strdup (s); 223 for (unsigned char *p = (unsigned char *) lang; *p; p++) 224 *p = canon_map[*p]; 225 226 return *this; 227 } 228 229 void finish (void) { free ((void *) lang); } 230 }; 231 232 233 /* Thread-safe lock-free language list */ 234 235 static hb_language_item_t *langs; 236 237 #ifdef HB_USE_ATEXIT 238 static 239 void free_langs (void) 240 { 241 while (langs) { 242 hb_language_item_t *next = langs->next; 243 langs->finish (); 244 free (langs); 245 langs = next; 246 } 247 } 248 #endif 249 250 static hb_language_item_t * 251 lang_find_or_insert (const char *key) 252 { 253 retry: 254 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs); 255 256 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next) 257 if (*lang == key) 258 return lang; 259 260 /* Not found; allocate one. */ 261 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t)); 262 if (unlikely (!lang)) 263 return NULL; 264 lang->next = first_lang; 265 *lang = key; 266 267 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) { 268 lang->finish (); 269 free (lang); 270 goto retry; 271 } 272 273 #ifdef HB_USE_ATEXIT 274 if (!first_lang) 275 atexit (free_langs); /* First person registers atexit() callback. */ 276 #endif 277 278 return lang; 279 } 280 281 282 /** 283 * hb_language_from_string: 284 * @str: (array length=len) (element-type uint8_t): a string representing 285 * ISO639 language code 286 * @len: length of the @str, or -1 if it is %NULL-terminated. 287 * 288 * Converts @str representing an ISO639 language code to the corresponding 289 * #hb_language_t. 290 * 291 * Return value: (transfer none): 292 * The #hb_language_t corresponding to the ISO639 language code. 293 * 294 * Since: 0.9.2 295 **/ 296 hb_language_t 297 hb_language_from_string (const char *str, int len) 298 { 299 if (!str || !len || !*str) 300 return HB_LANGUAGE_INVALID; 301 302 hb_language_item_t *item = NULL; 303 if (len >= 0) 304 { 305 /* NUL-terminate it. */ 306 char strbuf[64]; 307 len = MIN (len, (int) sizeof (strbuf) - 1); 308 memcpy (strbuf, str, len); 309 strbuf[len] = '\0'; 310 item = lang_find_or_insert (strbuf); 311 } 312 else 313 item = lang_find_or_insert (str); 314 315 return likely (item) ? item->lang : HB_LANGUAGE_INVALID; 316 } 317 318 /** 319 * hb_language_to_string: 320 * @language: an #hb_language_t to convert. 321 * 322 * See hb_language_from_string(). 323 * 324 * Return value: (transfer none): 325 * A %NULL-terminated string representing the @language. Must not be freed by 326 * the caller. 327 * 328 * Since: 0.9.2 329 **/ 330 const char * 331 hb_language_to_string (hb_language_t language) 332 { 333 /* This is actually NULL-safe! */ 334 return language->s; 335 } 336 337 /** 338 * hb_language_get_default: 339 * 340 * 341 * 342 * Return value: (transfer none): 343 * 344 * Since: 0.9.2 345 **/ 346 hb_language_t 347 hb_language_get_default (void) 348 { 349 static hb_language_t default_language = HB_LANGUAGE_INVALID; 350 351 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language); 352 if (unlikely (language == HB_LANGUAGE_INVALID)) { 353 language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1); 354 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language); 355 } 356 357 return default_language; 358 } 359 360 361 /* hb_script_t */ 362 363 /** 364 * hb_script_from_iso15924_tag: 365 * @tag: an #hb_tag_t representing an ISO15924 tag. 366 * 367 * Converts an ISO15924 script tag to a corresponding #hb_script_t. 368 * 369 * Return value: 370 * An #hb_script_t corresponding to the ISO15924 tag. 371 * 372 * Since: 0.9.2 373 **/ 374 hb_script_t 375 hb_script_from_iso15924_tag (hb_tag_t tag) 376 { 377 if (unlikely (tag == HB_TAG_NONE)) 378 return HB_SCRIPT_INVALID; 379 380 /* Be lenient, adjust case (one capital letter followed by three small letters) */ 381 tag = (tag & 0xDFDFDFDFu) | 0x00202020u; 382 383 switch (tag) { 384 385 /* These graduated from the 'Q' private-area codes, but 386 * the old code is still aliased by Unicode, and the Qaai 387 * one in use by ICU. */ 388 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED; 389 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC; 390 391 /* Script variants from http://unicode.org/iso15924/ */ 392 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC; 393 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN; 394 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN; 395 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC; 396 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC; 397 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC; 398 } 399 400 /* If it looks right, just use the tag as a script */ 401 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u) 402 return (hb_script_t) tag; 403 404 /* Otherwise, return unknown */ 405 return HB_SCRIPT_UNKNOWN; 406 } 407 408 /** 409 * hb_script_from_string: 410 * @str: (array length=len) (element-type uint8_t): a string representing an 411 * ISO15924 tag. 412 * @len: length of the @str, or -1 if it is %NULL-terminated. 413 * 414 * Converts a string @str representing an ISO15924 script tag to a 415 * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then 416 * hb_script_from_iso15924_tag(). 417 * 418 * Return value: 419 * An #hb_script_t corresponding to the ISO15924 tag. 420 * 421 * Since: 0.9.2 422 **/ 423 hb_script_t 424 hb_script_from_string (const char *str, int len) 425 { 426 return hb_script_from_iso15924_tag (hb_tag_from_string (str, len)); 427 } 428 429 /** 430 * hb_script_to_iso15924_tag: 431 * @script: an #hb_script_ to convert. 432 * 433 * See hb_script_from_iso15924_tag(). 434 * 435 * Return value: 436 * An #hb_tag_t representing an ISO15924 script tag. 437 * 438 * Since: 0.9.2 439 **/ 440 hb_tag_t 441 hb_script_to_iso15924_tag (hb_script_t script) 442 { 443 return (hb_tag_t) script; 444 } 445 446 /** 447 * hb_script_get_horizontal_direction: 448 * @script: 449 * 450 * 451 * 452 * Return value: 453 * 454 * Since: 0.9.2 455 **/ 456 hb_direction_t 457 hb_script_get_horizontal_direction (hb_script_t script) 458 { 459 /* http://goo.gl/x9ilM */ 460 switch ((hb_tag_t) script) 461 { 462 /* Unicode-1.1 additions */ 463 case HB_SCRIPT_ARABIC: 464 case HB_SCRIPT_HEBREW: 465 466 /* Unicode-3.0 additions */ 467 case HB_SCRIPT_SYRIAC: 468 case HB_SCRIPT_THAANA: 469 470 /* Unicode-4.0 additions */ 471 case HB_SCRIPT_CYPRIOT: 472 473 /* Unicode-4.1 additions */ 474 case HB_SCRIPT_KHAROSHTHI: 475 476 /* Unicode-5.0 additions */ 477 case HB_SCRIPT_PHOENICIAN: 478 case HB_SCRIPT_NKO: 479 480 /* Unicode-5.1 additions */ 481 case HB_SCRIPT_LYDIAN: 482 483 /* Unicode-5.2 additions */ 484 case HB_SCRIPT_AVESTAN: 485 case HB_SCRIPT_IMPERIAL_ARAMAIC: 486 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI: 487 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN: 488 case HB_SCRIPT_OLD_SOUTH_ARABIAN: 489 case HB_SCRIPT_OLD_TURKIC: 490 case HB_SCRIPT_SAMARITAN: 491 492 /* Unicode-6.0 additions */ 493 case HB_SCRIPT_MANDAIC: 494 495 /* Unicode-6.1 additions */ 496 case HB_SCRIPT_MEROITIC_CURSIVE: 497 case HB_SCRIPT_MEROITIC_HIEROGLYPHS: 498 499 /* Unicode-7.0 additions */ 500 case HB_SCRIPT_MANICHAEAN: 501 case HB_SCRIPT_MENDE_KIKAKUI: 502 case HB_SCRIPT_NABATAEAN: 503 case HB_SCRIPT_OLD_NORTH_ARABIAN: 504 case HB_SCRIPT_PALMYRENE: 505 case HB_SCRIPT_PSALTER_PAHLAVI: 506 507 /* Unicode-8.0 additions */ 508 case HB_SCRIPT_OLD_HUNGARIAN: 509 510 return HB_DIRECTION_RTL; 511 } 512 513 return HB_DIRECTION_LTR; 514 } 515 516 517 /* hb_user_data_array_t */ 518 519 bool 520 hb_user_data_array_t::set (hb_user_data_key_t *key, 521 void * data, 522 hb_destroy_func_t destroy, 523 hb_bool_t replace) 524 { 525 if (!key) 526 return false; 527 528 if (replace) { 529 if (!data && !destroy) { 530 items.remove (key, lock); 531 return true; 532 } 533 } 534 hb_user_data_item_t item = {key, data, destroy}; 535 bool ret = !!items.replace_or_insert (item, lock, (bool) replace); 536 537 return ret; 538 } 539 540 void * 541 hb_user_data_array_t::get (hb_user_data_key_t *key) 542 { 543 hb_user_data_item_t item = {NULL, NULL, NULL}; 544 545 return items.find (key, &item, lock) ? item.data : NULL; 546 } 547 548 549 /* hb_version */ 550 551 /** 552 * hb_version: 553 * @major: (out): Library major version component. 554 * @minor: (out): Library minor version component. 555 * @micro: (out): Library micro version component. 556 * 557 * Returns library version as three integer components. 558 * 559 * Since: 0.9.2 560 **/ 561 void 562 hb_version (unsigned int *major, 563 unsigned int *minor, 564 unsigned int *micro) 565 { 566 *major = HB_VERSION_MAJOR; 567 *minor = HB_VERSION_MINOR; 568 *micro = HB_VERSION_MICRO; 569 } 570 571 /** 572 * hb_version_string: 573 * 574 * Returns library version as a string with three components. 575 * 576 * Return value: library version string. 577 * 578 * Since: 0.9.2 579 **/ 580 const char * 581 hb_version_string (void) 582 { 583 return HB_VERSION_STRING; 584 } 585 586 /** 587 * hb_version_atleast: 588 * @major: 589 * @minor: 590 * @micro: 591 * 592 * 593 * 594 * Return value: 595 * 596 * Since: 0.9.30 597 **/ 598 hb_bool_t 599 hb_version_atleast (unsigned int major, 600 unsigned int minor, 601 unsigned int micro) 602 { 603 return HB_VERSION_ATLEAST (major, minor, micro); 604 } 605