1 /* 2 * Copyright 2009 Red Hat, Inc. 3 * Copyright 2011 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29 #include "hb.hh" 30 31 #include "hb-glib.h" 32 33 #include "hb-machinery.hh" 34 35 36 /** 37 * SECTION:hb-glib 38 * @title: hb-glib 39 * @short_description: GLib integration 40 * @include: hb-glib.h 41 * 42 * Functions for using HarfBuzz with the GLib library to provide Unicode data. 43 **/ 44 45 46 #if !GLIB_CHECK_VERSION(2,29,14) 47 static const hb_script_t 48 glib_script_to_script[] = 49 { 50 HB_SCRIPT_COMMON, 51 HB_SCRIPT_INHERITED, 52 HB_SCRIPT_ARABIC, 53 HB_SCRIPT_ARMENIAN, 54 HB_SCRIPT_BENGALI, 55 HB_SCRIPT_BOPOMOFO, 56 HB_SCRIPT_CHEROKEE, 57 HB_SCRIPT_COPTIC, 58 HB_SCRIPT_CYRILLIC, 59 HB_SCRIPT_DESERET, 60 HB_SCRIPT_DEVANAGARI, 61 HB_SCRIPT_ETHIOPIC, 62 HB_SCRIPT_GEORGIAN, 63 HB_SCRIPT_GOTHIC, 64 HB_SCRIPT_GREEK, 65 HB_SCRIPT_GUJARATI, 66 HB_SCRIPT_GURMUKHI, 67 HB_SCRIPT_HAN, 68 HB_SCRIPT_HANGUL, 69 HB_SCRIPT_HEBREW, 70 HB_SCRIPT_HIRAGANA, 71 HB_SCRIPT_KANNADA, 72 HB_SCRIPT_KATAKANA, 73 HB_SCRIPT_KHMER, 74 HB_SCRIPT_LAO, 75 HB_SCRIPT_LATIN, 76 HB_SCRIPT_MALAYALAM, 77 HB_SCRIPT_MONGOLIAN, 78 HB_SCRIPT_MYANMAR, 79 HB_SCRIPT_OGHAM, 80 HB_SCRIPT_OLD_ITALIC, 81 HB_SCRIPT_ORIYA, 82 HB_SCRIPT_RUNIC, 83 HB_SCRIPT_SINHALA, 84 HB_SCRIPT_SYRIAC, 85 HB_SCRIPT_TAMIL, 86 HB_SCRIPT_TELUGU, 87 HB_SCRIPT_THAANA, 88 HB_SCRIPT_THAI, 89 HB_SCRIPT_TIBETAN, 90 HB_SCRIPT_CANADIAN_SYLLABICS, 91 HB_SCRIPT_YI, 92 HB_SCRIPT_TAGALOG, 93 HB_SCRIPT_HANUNOO, 94 HB_SCRIPT_BUHID, 95 HB_SCRIPT_TAGBANWA, 96 97 /* Unicode-4.0 additions */ 98 HB_SCRIPT_BRAILLE, 99 HB_SCRIPT_CYPRIOT, 100 HB_SCRIPT_LIMBU, 101 HB_SCRIPT_OSMANYA, 102 HB_SCRIPT_SHAVIAN, 103 HB_SCRIPT_LINEAR_B, 104 HB_SCRIPT_TAI_LE, 105 HB_SCRIPT_UGARITIC, 106 107 /* Unicode-4.1 additions */ 108 HB_SCRIPT_NEW_TAI_LUE, 109 HB_SCRIPT_BUGINESE, 110 HB_SCRIPT_GLAGOLITIC, 111 HB_SCRIPT_TIFINAGH, 112 HB_SCRIPT_SYLOTI_NAGRI, 113 HB_SCRIPT_OLD_PERSIAN, 114 HB_SCRIPT_KHAROSHTHI, 115 116 /* Unicode-5.0 additions */ 117 HB_SCRIPT_UNKNOWN, 118 HB_SCRIPT_BALINESE, 119 HB_SCRIPT_CUNEIFORM, 120 HB_SCRIPT_PHOENICIAN, 121 HB_SCRIPT_PHAGS_PA, 122 HB_SCRIPT_NKO, 123 124 /* Unicode-5.1 additions */ 125 HB_SCRIPT_KAYAH_LI, 126 HB_SCRIPT_LEPCHA, 127 HB_SCRIPT_REJANG, 128 HB_SCRIPT_SUNDANESE, 129 HB_SCRIPT_SAURASHTRA, 130 HB_SCRIPT_CHAM, 131 HB_SCRIPT_OL_CHIKI, 132 HB_SCRIPT_VAI, 133 HB_SCRIPT_CARIAN, 134 HB_SCRIPT_LYCIAN, 135 HB_SCRIPT_LYDIAN, 136 137 /* Unicode-5.2 additions */ 138 HB_SCRIPT_AVESTAN, 139 HB_SCRIPT_BAMUM, 140 HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, 141 HB_SCRIPT_IMPERIAL_ARAMAIC, 142 HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, 143 HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, 144 HB_SCRIPT_JAVANESE, 145 HB_SCRIPT_KAITHI, 146 HB_SCRIPT_TAI_THAM, 147 HB_SCRIPT_LISU, 148 HB_SCRIPT_MEETEI_MAYEK, 149 HB_SCRIPT_OLD_SOUTH_ARABIAN, 150 HB_SCRIPT_OLD_TURKIC, 151 HB_SCRIPT_SAMARITAN, 152 HB_SCRIPT_TAI_VIET, 153 154 /* Unicode-6.0 additions */ 155 HB_SCRIPT_BATAK, 156 HB_SCRIPT_BRAHMI, 157 HB_SCRIPT_MANDAIC, 158 159 /* Unicode-6.1 additions */ 160 HB_SCRIPT_CHAKMA, 161 HB_SCRIPT_MEROITIC_CURSIVE, 162 HB_SCRIPT_MEROITIC_HIEROGLYPHS, 163 HB_SCRIPT_MIAO, 164 HB_SCRIPT_SHARADA, 165 HB_SCRIPT_SORA_SOMPENG, 166 HB_SCRIPT_TAKRI 167 }; 168 #endif 169 170 hb_script_t 171 hb_glib_script_to_script (GUnicodeScript script) 172 { 173 #if GLIB_CHECK_VERSION(2,29,14) 174 return (hb_script_t) g_unicode_script_to_iso15924 (script); 175 #else 176 if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script))) 177 return glib_script_to_script[script]; 178 179 if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE)) 180 return HB_SCRIPT_INVALID; 181 182 return HB_SCRIPT_UNKNOWN; 183 #endif 184 } 185 186 GUnicodeScript 187 hb_glib_script_from_script (hb_script_t script) 188 { 189 #if GLIB_CHECK_VERSION(2,29,14) 190 return g_unicode_script_from_iso15924 (script); 191 #else 192 unsigned int count = ARRAY_LENGTH (glib_script_to_script); 193 for (unsigned int i = 0; i < count; i++) 194 if (glib_script_to_script[i] == script) 195 return (GUnicodeScript) i; 196 197 if (unlikely (script == HB_SCRIPT_INVALID)) 198 return G_UNICODE_SCRIPT_INVALID_CODE; 199 200 return G_UNICODE_SCRIPT_UNKNOWN; 201 #endif 202 } 203 204 205 static hb_unicode_combining_class_t 206 hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, 207 hb_codepoint_t unicode, 208 void *user_data HB_UNUSED) 209 210 { 211 return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode); 212 } 213 214 static hb_unicode_general_category_t 215 hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, 216 hb_codepoint_t unicode, 217 void *user_data HB_UNUSED) 218 219 { 220 /* hb_unicode_general_category_t and GUnicodeType are identical */ 221 return (hb_unicode_general_category_t) g_unichar_type (unicode); 222 } 223 224 static hb_codepoint_t 225 hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, 226 hb_codepoint_t unicode, 227 void *user_data HB_UNUSED) 228 { 229 g_unichar_get_mirror_char (unicode, &unicode); 230 return unicode; 231 } 232 233 static hb_script_t 234 hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, 235 hb_codepoint_t unicode, 236 void *user_data HB_UNUSED) 237 { 238 return hb_glib_script_to_script (g_unichar_get_script (unicode)); 239 } 240 241 static hb_bool_t 242 hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 243 hb_codepoint_t a, 244 hb_codepoint_t b, 245 hb_codepoint_t *ab, 246 void *user_data HB_UNUSED) 247 { 248 #if GLIB_CHECK_VERSION(2,29,12) 249 return g_unichar_compose (a, b, ab); 250 #endif 251 252 /* We don't ifdef-out the fallback code such that compiler always 253 * sees it and makes sure it's compilable. */ 254 255 gchar utf8[12]; 256 gchar *normalized; 257 int len; 258 hb_bool_t ret; 259 260 len = g_unichar_to_utf8 (a, utf8); 261 len += g_unichar_to_utf8 (b, utf8 + len); 262 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC); 263 len = g_utf8_strlen (normalized, -1); 264 if (unlikely (!len)) 265 return false; 266 267 if (len == 1) { 268 *ab = g_utf8_get_char (normalized); 269 ret = true; 270 } else { 271 ret = false; 272 } 273 274 g_free (normalized); 275 return ret; 276 } 277 278 static hb_bool_t 279 hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 280 hb_codepoint_t ab, 281 hb_codepoint_t *a, 282 hb_codepoint_t *b, 283 void *user_data HB_UNUSED) 284 { 285 #if GLIB_CHECK_VERSION(2,29,12) 286 return g_unichar_decompose (ab, a, b); 287 #endif 288 289 /* We don't ifdef-out the fallback code such that compiler always 290 * sees it and makes sure it's compilable. */ 291 292 gchar utf8[6]; 293 gchar *normalized; 294 int len; 295 hb_bool_t ret; 296 297 len = g_unichar_to_utf8 (ab, utf8); 298 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD); 299 len = g_utf8_strlen (normalized, -1); 300 if (unlikely (!len)) 301 return false; 302 303 if (len == 1) { 304 *a = g_utf8_get_char (normalized); 305 *b = 0; 306 ret = *a != ab; 307 } else if (len == 2) { 308 *a = g_utf8_get_char (normalized); 309 *b = g_utf8_get_char (g_utf8_next_char (normalized)); 310 /* Here's the ugly part: if ab decomposes to a single character and 311 * that character decomposes again, we have to detect that and undo 312 * the second part :-(. */ 313 gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); 314 hb_codepoint_t c = g_utf8_get_char (recomposed); 315 if (c != ab && c != *a) { 316 *a = c; 317 *b = 0; 318 } 319 g_free (recomposed); 320 ret = true; 321 } else { 322 /* If decomposed to more than two characters, take the last one, 323 * and recompose the rest to get the first component. */ 324 gchar *end = g_utf8_offset_to_pointer (normalized, len - 1); 325 gchar *recomposed; 326 *b = g_utf8_get_char (end); 327 recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC); 328 /* We expect that recomposed has exactly one character now. */ 329 *a = g_utf8_get_char (recomposed); 330 g_free (recomposed); 331 ret = true; 332 } 333 334 g_free (normalized); 335 return ret; 336 } 337 338 339 #if HB_USE_ATEXIT 340 static void free_static_glib_funcs (); 341 #endif 342 343 static struct hb_glib_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_glib_unicode_funcs_lazy_loader_t> 344 { 345 static hb_unicode_funcs_t *create () 346 { 347 hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); 348 349 hb_unicode_funcs_set_combining_class_func (funcs, hb_glib_unicode_combining_class, nullptr, nullptr); 350 hb_unicode_funcs_set_general_category_func (funcs, hb_glib_unicode_general_category, nullptr, nullptr); 351 hb_unicode_funcs_set_mirroring_func (funcs, hb_glib_unicode_mirroring, nullptr, nullptr); 352 hb_unicode_funcs_set_script_func (funcs, hb_glib_unicode_script, nullptr, nullptr); 353 hb_unicode_funcs_set_compose_func (funcs, hb_glib_unicode_compose, nullptr, nullptr); 354 hb_unicode_funcs_set_decompose_func (funcs, hb_glib_unicode_decompose, nullptr, nullptr); 355 356 hb_unicode_funcs_make_immutable (funcs); 357 358 #if HB_USE_ATEXIT 359 atexit (free_static_glib_funcs); 360 #endif 361 362 return funcs; 363 } 364 } static_glib_funcs; 365 366 #if HB_USE_ATEXIT 367 static 368 void free_static_glib_funcs () 369 { 370 static_glib_funcs.free_instance (); 371 } 372 #endif 373 374 hb_unicode_funcs_t * 375 hb_glib_get_unicode_funcs () 376 { 377 return static_glib_funcs.get_unconst (); 378 } 379 380 381 382 #if GLIB_CHECK_VERSION(2,31,10) 383 384 static void 385 _hb_g_bytes_unref (void *data) 386 { 387 g_bytes_unref ((GBytes *) data); 388 } 389 390 /** 391 * hb_glib_blob_create: 392 * 393 * Since: 0.9.38 394 **/ 395 hb_blob_t * 396 hb_glib_blob_create (GBytes *gbytes) 397 { 398 gsize size = 0; 399 gconstpointer data = g_bytes_get_data (gbytes, &size); 400 return hb_blob_create ((const char *) data, 401 size, 402 HB_MEMORY_MODE_READONLY, 403 g_bytes_ref (gbytes), 404 _hb_g_bytes_unref); 405 } 406 #endif 407