Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2015  Mozilla Foundation.
      3  * Copyright  2015  Google, Inc.
      4  *
      5  *  This is part of HarfBuzz, a text shaping library.
      6  *
      7  * Permission is hereby granted, without written agreement and without
      8  * license or royalty fees, to use, copy, modify, and distribute this
      9  * software and its documentation for any purpose, provided that the
     10  * above copyright notice and the following two paragraphs appear in
     11  * all copies of this software.
     12  *
     13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17  * DAMAGE.
     18  *
     19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24  *
     25  * Mozilla Author(s): Jonathan Kew
     26  * Google Author(s): Behdad Esfahbod
     27  */
     28 
     29 #include "hb-ot-shape-complex-use-private.hh"
     30 #include "hb-ot-shape-complex-arabic-private.hh"
     31 
     32 /* buffer var allocations */
     33 #define use_category() complex_var_u8_0()
     34 
     35 
     36 /*
     37  * Universal Shaping Engine.
     38  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
     39  */
     40 
     41 static const hb_tag_t
     42 basic_features[] =
     43 {
     44   /*
     45    * Basic features.
     46    * These features are applied all at once, before reordering.
     47    */
     48   HB_TAG('r','k','r','f'),
     49   HB_TAG('a','b','v','f'),
     50   HB_TAG('b','l','w','f'),
     51   HB_TAG('h','a','l','f'),
     52   HB_TAG('p','s','t','f'),
     53   HB_TAG('v','a','t','u'),
     54   HB_TAG('c','j','c','t'),
     55 };
     56 static const hb_tag_t
     57 arabic_features[] =
     58 {
     59   HB_TAG('i','s','o','l'),
     60   HB_TAG('i','n','i','t'),
     61   HB_TAG('m','e','d','i'),
     62   HB_TAG('f','i','n','a'),
     63   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
     64    * does.  These are only used in Syriac spec. */
     65   HB_TAG('m','e','d','2'),
     66   HB_TAG('f','i','n','2'),
     67   HB_TAG('f','i','n','3'),
     68 };
     69 /* Same order as arabic_features.  Don't need Syriac stuff.*/
     70 enum joining_form_t {
     71   ISOL,
     72   INIT,
     73   MEDI,
     74   FINA,
     75   _NONE
     76 };
     77 static const hb_tag_t
     78 other_features[] =
     79 {
     80   /*
     81    * Other features.
     82    * These features are applied all at once, after reordering.
     83    */
     84   HB_TAG('a','b','v','s'),
     85   HB_TAG('b','l','w','s'),
     86   HB_TAG('h','a','l','n'),
     87   HB_TAG('p','r','e','s'),
     88   HB_TAG('p','s','t','s'),
     89   /* Positioning features, though we don't care about the types. */
     90   HB_TAG('d','i','s','t'),
     91   HB_TAG('a','b','v','m'),
     92   HB_TAG('b','l','w','m'),
     93 };
     94 
     95 static void
     96 setup_syllables (const hb_ot_shape_plan_t *plan,
     97 		 hb_font_t *font,
     98 		 hb_buffer_t *buffer);
     99 static void
    100 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
    101 			  hb_font_t *font,
    102 			  hb_buffer_t *buffer);
    103 static void
    104 record_rphf (const hb_ot_shape_plan_t *plan,
    105 	     hb_font_t *font,
    106 	     hb_buffer_t *buffer);
    107 static void
    108 record_pref (const hb_ot_shape_plan_t *plan,
    109 	     hb_font_t *font,
    110 	     hb_buffer_t *buffer);
    111 static void
    112 reorder (const hb_ot_shape_plan_t *plan,
    113 	 hb_font_t *font,
    114 	 hb_buffer_t *buffer);
    115 
    116 static void
    117 collect_features_use (hb_ot_shape_planner_t *plan)
    118 {
    119   hb_ot_map_builder_t *map = &plan->map;
    120 
    121   /* Do this before any lookups have been applied. */
    122   map->add_gsub_pause (setup_syllables);
    123 
    124   /* "Default glyph pre-processing group" */
    125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
    126   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
    127   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
    128   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
    129 
    130   /* "Reordering group" */
    131   map->add_gsub_pause (clear_substitution_flags);
    132   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
    133   map->add_gsub_pause (record_rphf);
    134   map->add_gsub_pause (clear_substitution_flags);
    135   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
    136   map->add_gsub_pause (record_pref);
    137 
    138   /* "Orthographic unit shaping group" */
    139   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
    140     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
    141 
    142   map->add_gsub_pause (reorder);
    143 
    144   /* "Topographical features" */
    145   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
    146     map->add_feature (arabic_features[i], 1, F_NONE);
    147   map->add_gsub_pause (NULL);
    148 
    149   /* "Standard typographic presentation" and "Positional feature application" */
    150   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
    151     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
    152 }
    153 
    154 struct use_shape_plan_t
    155 {
    156   ASSERT_POD ();
    157 
    158   hb_mask_t rphf_mask;
    159 
    160   arabic_shape_plan_t *arabic_plan;
    161 };
    162 
    163 static bool
    164 has_arabic_joining (hb_script_t script)
    165 {
    166   /* List of scripts that have data in arabic-table. */
    167   switch ((int) script)
    168   {
    169     /* Unicode-1.1 additions */
    170     case HB_SCRIPT_ARABIC:
    171 
    172     /* Unicode-3.0 additions */
    173     case HB_SCRIPT_MONGOLIAN:
    174     case HB_SCRIPT_SYRIAC:
    175 
    176     /* Unicode-5.0 additions */
    177     case HB_SCRIPT_NKO:
    178     case HB_SCRIPT_PHAGS_PA:
    179 
    180     /* Unicode-6.0 additions */
    181     case HB_SCRIPT_MANDAIC:
    182 
    183     /* Unicode-7.0 additions */
    184     case HB_SCRIPT_MANICHAEAN:
    185     case HB_SCRIPT_PSALTER_PAHLAVI:
    186 
    187     /* Unicode-9.0 additions */
    188     case HB_SCRIPT_ADLAM:
    189 
    190       return true;
    191 
    192     default:
    193       return false;
    194   }
    195 }
    196 
    197 static void *
    198 data_create_use (const hb_ot_shape_plan_t *plan)
    199 {
    200   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
    201   if (unlikely (!use_plan))
    202     return NULL;
    203 
    204   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
    205 
    206   if (has_arabic_joining (plan->props.script))
    207   {
    208     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
    209     if (unlikely (!use_plan->arabic_plan))
    210     {
    211       free (use_plan);
    212       return NULL;
    213     }
    214   }
    215 
    216   return use_plan;
    217 }
    218 
    219 static void
    220 data_destroy_use (void *data)
    221 {
    222   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
    223 
    224   if (use_plan->arabic_plan)
    225     data_destroy_arabic (use_plan->arabic_plan);
    226 
    227   free (data);
    228 }
    229 
    230 enum syllable_type_t {
    231   independent_cluster,
    232   virama_terminated_cluster,
    233   standard_cluster,
    234   number_joiner_terminated_cluster,
    235   numeral_cluster,
    236   symbol_cluster,
    237   broken_cluster,
    238   non_cluster,
    239 };
    240 
    241 #include "hb-ot-shape-complex-use-machine.hh"
    242 
    243 
    244 static void
    245 setup_masks_use (const hb_ot_shape_plan_t *plan,
    246 		 hb_buffer_t              *buffer,
    247 		 hb_font_t                *font HB_UNUSED)
    248 {
    249   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    250 
    251   /* Do this before allocating use_category(). */
    252   if (use_plan->arabic_plan)
    253   {
    254     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
    255   }
    256 
    257   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
    258 
    259   /* We cannot setup masks here.  We save information about characters
    260    * and setup masks later on in a pause-callback. */
    261 
    262   unsigned int count = buffer->len;
    263   hb_glyph_info_t *info = buffer->info;
    264   for (unsigned int i = 0; i < count; i++)
    265     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
    266 }
    267 
    268 static void
    269 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
    270 		 hb_buffer_t *buffer)
    271 {
    272   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    273 
    274   hb_mask_t mask = use_plan->rphf_mask;
    275   if (!mask) return;
    276 
    277   hb_glyph_info_t *info = buffer->info;
    278 
    279   foreach_syllable (buffer, start, end)
    280   {
    281     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
    282     for (unsigned int i = start; i < start + limit; i++)
    283       info[i].mask |= mask;
    284   }
    285 }
    286 
    287 static void
    288 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
    289 			   hb_buffer_t *buffer)
    290 {
    291   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    292   if (use_plan->arabic_plan)
    293     return;
    294 
    295   ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
    296   hb_mask_t masks[4], all_masks = 0;
    297   for (unsigned int i = 0; i < 4; i++)
    298   {
    299     masks[i] = plan->map.get_1_mask (arabic_features[i]);
    300     if (masks[i] == plan->map.get_global_mask ())
    301       masks[i] = 0;
    302     all_masks |= masks[i];
    303   }
    304   if (!all_masks)
    305     return;
    306   hb_mask_t other_masks = ~all_masks;
    307 
    308   unsigned int last_start = 0;
    309   joining_form_t last_form = _NONE;
    310   hb_glyph_info_t *info = buffer->info;
    311   foreach_syllable (buffer, start, end)
    312   {
    313     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
    314     switch (syllable_type)
    315     {
    316       case independent_cluster:
    317       case symbol_cluster:
    318       case non_cluster:
    319 	/* These don't join.  Nothing to do. */
    320 	last_form = _NONE;
    321 	break;
    322 
    323       case virama_terminated_cluster:
    324       case standard_cluster:
    325       case number_joiner_terminated_cluster:
    326       case numeral_cluster:
    327       case broken_cluster:
    328 
    329 	bool join = last_form == FINA || last_form == ISOL;
    330 
    331 	if (join)
    332 	{
    333 	  /* Fixup previous syllable's form. */
    334 	  last_form = last_form == FINA ? MEDI : INIT;
    335 	  for (unsigned int i = last_start; i < start; i++)
    336 	    info[i].mask = (info[i].mask & other_masks) | masks[last_form];
    337 	}
    338 
    339 	/* Form for this syllable. */
    340 	last_form = join ? FINA : ISOL;
    341 	for (unsigned int i = start; i < end; i++)
    342 	  info[i].mask = (info[i].mask & other_masks) | masks[last_form];
    343 
    344 	break;
    345     }
    346 
    347     last_start = start;
    348   }
    349 }
    350 
    351 static void
    352 setup_syllables (const hb_ot_shape_plan_t *plan,
    353 		 hb_font_t *font HB_UNUSED,
    354 		 hb_buffer_t *buffer)
    355 {
    356   find_syllables (buffer);
    357   setup_rphf_mask (plan, buffer);
    358   setup_topographical_masks (plan, buffer);
    359 }
    360 
    361 static void
    362 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
    363 			  hb_font_t *font HB_UNUSED,
    364 			  hb_buffer_t *buffer)
    365 {
    366   hb_glyph_info_t *info = buffer->info;
    367   unsigned int count = buffer->len;
    368   for (unsigned int i = 0; i < count; i++)
    369     _hb_glyph_info_clear_substituted (&info[i]);
    370 }
    371 
    372 static void
    373 record_rphf (const hb_ot_shape_plan_t *plan,
    374 	     hb_font_t *font,
    375 	     hb_buffer_t *buffer)
    376 {
    377   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    378 
    379   hb_mask_t mask = use_plan->rphf_mask;
    380   if (!mask) return;
    381   hb_glyph_info_t *info = buffer->info;
    382 
    383   foreach_syllable (buffer, start, end)
    384   {
    385     /* Mark a substituted repha as USE_R. */
    386     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
    387       if (_hb_glyph_info_substituted (&info[i]))
    388       {
    389 	info[i].use_category() = USE_R;
    390 	break;
    391       }
    392   }
    393 }
    394 
    395 static void
    396 record_pref (const hb_ot_shape_plan_t *plan,
    397 	     hb_font_t *font,
    398 	     hb_buffer_t *buffer)
    399 {
    400   hb_glyph_info_t *info = buffer->info;
    401 
    402   foreach_syllable (buffer, start, end)
    403   {
    404     /* Mark a substituted pref as VPre, as they behave the same way. */
    405     for (unsigned int i = start; i < end; i++)
    406       if (_hb_glyph_info_substituted (&info[i]))
    407       {
    408 	info[i].use_category() = USE_VPre;
    409 	break;
    410       }
    411   }
    412 }
    413 
    414 static inline bool
    415 is_halant (const hb_glyph_info_t &info)
    416 {
    417   return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
    418 }
    419 
    420 static void
    421 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
    422 {
    423   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
    424   /* Only a few syllable types need reordering. */
    425   if (unlikely (!(FLAG_SAFE (syllable_type) &
    426 		  (FLAG (virama_terminated_cluster) |
    427 		   FLAG (standard_cluster) |
    428 		   FLAG (broken_cluster) |
    429 		   0))))
    430     return;
    431 
    432   hb_glyph_info_t *info = buffer->info;
    433 
    434 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB))
    435 
    436   /* Move things forward. */
    437   if (info[start].use_category() == USE_R && end - start > 1)
    438   {
    439     /* Got a repha.  Reorder it to after first base, before first halant. */
    440     for (unsigned int i = start + 1; i < end; i++)
    441       if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
    442       {
    443 	/* If we hit a halant, move before it; otherwise it's a base: move to it's
    444 	 * place, and shift things in between backward. */
    445 
    446 	if (is_halant (info[i]))
    447 	  i--;
    448 
    449 	buffer->merge_clusters (start, i + 1);
    450 	hb_glyph_info_t t = info[start];
    451 	memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
    452 	info[i] = t;
    453 
    454 	break;
    455       }
    456   }
    457 
    458   /* Move things back. */
    459   unsigned int j = end;
    460   for (unsigned int i = start; i < end; i++)
    461   {
    462     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
    463     if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
    464     {
    465       /* If we hit a halant, move after it; otherwise it's a base: move to it's
    466        * place, and shift things in between backward. */
    467       if (is_halant (info[i]))
    468 	j = i + 1;
    469       else
    470 	j = i;
    471     }
    472     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
    473 	     /* Only move the first component of a MultipleSubst. */
    474 	     0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
    475 	     j < i)
    476     {
    477       buffer->merge_clusters (j, i + 1);
    478       hb_glyph_info_t t = info[i];
    479       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
    480       info[j] = t;
    481     }
    482   }
    483 }
    484 
    485 static inline void
    486 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
    487 		       hb_font_t *font,
    488 		       hb_buffer_t *buffer)
    489 {
    490   /* Note: This loop is extra overhead, but should not be measurable. */
    491   bool has_broken_syllables = false;
    492   unsigned int count = buffer->len;
    493   hb_glyph_info_t *info = buffer->info;
    494   for (unsigned int i = 0; i < count; i++)
    495     if ((info[i].syllable() & 0x0F) == broken_cluster)
    496     {
    497       has_broken_syllables = true;
    498       break;
    499     }
    500   if (likely (!has_broken_syllables))
    501     return;
    502 
    503   hb_glyph_info_t dottedcircle = {0};
    504   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
    505     return;
    506   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
    507 
    508   buffer->clear_output ();
    509 
    510   buffer->idx = 0;
    511   unsigned int last_syllable = 0;
    512   while (buffer->idx < buffer->len && !buffer->in_error)
    513   {
    514     unsigned int syllable = buffer->cur().syllable();
    515     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
    516     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
    517     {
    518       last_syllable = syllable;
    519 
    520       hb_glyph_info_t ginfo = dottedcircle;
    521       ginfo.cluster = buffer->cur().cluster;
    522       ginfo.mask = buffer->cur().mask;
    523       ginfo.syllable() = buffer->cur().syllable();
    524       /* TODO Set glyph_props? */
    525 
    526       /* Insert dottedcircle after possible Repha. */
    527       while (buffer->idx < buffer->len && !buffer->in_error &&
    528 	     last_syllable == buffer->cur().syllable() &&
    529 	     buffer->cur().use_category() == USE_R)
    530         buffer->next_glyph ();
    531 
    532       buffer->output_info (ginfo);
    533     }
    534     else
    535       buffer->next_glyph ();
    536   }
    537 
    538   buffer->swap_buffers ();
    539 }
    540 
    541 static void
    542 reorder (const hb_ot_shape_plan_t *plan,
    543 	 hb_font_t *font,
    544 	 hb_buffer_t *buffer)
    545 {
    546   insert_dotted_circles (plan, font, buffer);
    547 
    548   hb_glyph_info_t *info = buffer->info;
    549 
    550   foreach_syllable (buffer, start, end)
    551     reorder_syllable (buffer, start, end);
    552 
    553   /* Zero syllables now... */
    554   unsigned int count = buffer->len;
    555   for (unsigned int i = 0; i < count; i++)
    556     info[i].syllable() = 0;
    557 
    558   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
    559 }
    560 
    561 static bool
    562 decompose_use (const hb_ot_shape_normalize_context_t *c,
    563                 hb_codepoint_t  ab,
    564                 hb_codepoint_t *a,
    565                 hb_codepoint_t *b)
    566 {
    567   switch (ab)
    568   {
    569     /* Chakma:
    570      * Special case where the Unicode decomp gives matras in the wrong order
    571      * for cluster validation.
    572      */
    573     case 0x1112Eu : *a = 0x11127u; *b= 0x11131u; return true;
    574     case 0x1112Fu : *a = 0x11127u; *b= 0x11132u; return true;
    575   }
    576 
    577   return (bool) c->unicode->decompose (ab, a, b);
    578 }
    579 
    580 static bool
    581 compose_use (const hb_ot_shape_normalize_context_t *c,
    582 	     hb_codepoint_t  a,
    583 	     hb_codepoint_t  b,
    584 	     hb_codepoint_t *ab)
    585 {
    586   /* Avoid recomposing split matras. */
    587   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
    588     return false;
    589 
    590   return (bool)c->unicode->compose (a, b, ab);
    591 }
    592 
    593 
    594 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
    595 {
    596   "use",
    597   collect_features_use,
    598   NULL, /* override_features */
    599   data_create_use,
    600   data_destroy_use,
    601   NULL, /* preprocess_text */
    602   NULL, /* postprocess_glyphs */
    603   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
    604   decompose_use,
    605   compose_use,
    606   setup_masks_use,
    607   NULL, /* disable_otl */
    608   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
    609   false, /* fallback_position */
    610 };
    611