Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2015  Mozilla Foundation.
      3  * Copyright  2015  Google, Inc.
      4  *
      5  *  This is part of HarfBuzz, a text shaping library.
      6  *
      7  * Permission is hereby granted, without written agreement and without
      8  * license or royalty fees, to use, copy, modify, and distribute this
      9  * software and its documentation for any purpose, provided that the
     10  * above copyright notice and the following two paragraphs appear in
     11  * all copies of this software.
     12  *
     13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17  * DAMAGE.
     18  *
     19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24  *
     25  * Mozilla Author(s): Jonathan Kew
     26  * Google Author(s): Behdad Esfahbod
     27  */
     28 
     29 #include "hb-ot-shape-complex-use-private.hh"
     30 #include "hb-ot-shape-complex-arabic-private.hh"
     31 
     32 /* buffer var allocations */
     33 #define use_category() complex_var_u8_0()
     34 
     35 
     36 /*
     37  * Universal Shaping Engine.
     38  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
     39  */
     40 
     41 static const hb_tag_t
     42 basic_features[] =
     43 {
     44   /*
     45    * Basic features.
     46    * These features are applied all at once, before reordering.
     47    */
     48   HB_TAG('r','k','r','f'),
     49   HB_TAG('a','b','v','f'),
     50   HB_TAG('b','l','w','f'),
     51   HB_TAG('h','a','l','f'),
     52   HB_TAG('p','s','t','f'),
     53   HB_TAG('v','a','t','u'),
     54   HB_TAG('c','j','c','t'),
     55 };
     56 static const hb_tag_t
     57 arabic_features[] =
     58 {
     59   HB_TAG('i','s','o','l'),
     60   HB_TAG('i','n','i','t'),
     61   HB_TAG('m','e','d','i'),
     62   HB_TAG('f','i','n','a'),
     63   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
     64    * does.  These are only used in Syriac spec. */
     65   HB_TAG('m','e','d','2'),
     66   HB_TAG('f','i','n','2'),
     67   HB_TAG('f','i','n','3'),
     68 };
     69 /* Same order as arabic_features.  Don't need Syriac stuff.*/
     70 enum joining_form_t {
     71   ISOL,
     72   INIT,
     73   MEDI,
     74   FINA,
     75   _NONE
     76 };
     77 static const hb_tag_t
     78 other_features[] =
     79 {
     80   /*
     81    * Other features.
     82    * These features are applied all at once, after reordering.
     83    */
     84   HB_TAG('a','b','v','s'),
     85   HB_TAG('b','l','w','s'),
     86   HB_TAG('h','a','l','n'),
     87   HB_TAG('p','r','e','s'),
     88   HB_TAG('p','s','t','s'),
     89   /* Positioning features, though we don't care about the types. */
     90   HB_TAG('d','i','s','t'),
     91   HB_TAG('a','b','v','m'),
     92   HB_TAG('b','l','w','m'),
     93 };
     94 
     95 static void
     96 setup_syllables (const hb_ot_shape_plan_t *plan,
     97 		 hb_font_t *font,
     98 		 hb_buffer_t *buffer);
     99 static void
    100 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
    101 			  hb_font_t *font,
    102 			  hb_buffer_t *buffer);
    103 static void
    104 record_rphf (const hb_ot_shape_plan_t *plan,
    105 	     hb_font_t *font,
    106 	     hb_buffer_t *buffer);
    107 static void
    108 record_pref (const hb_ot_shape_plan_t *plan,
    109 	     hb_font_t *font,
    110 	     hb_buffer_t *buffer);
    111 static void
    112 reorder (const hb_ot_shape_plan_t *plan,
    113 	 hb_font_t *font,
    114 	 hb_buffer_t *buffer);
    115 
    116 static void
    117 collect_features_use (hb_ot_shape_planner_t *plan)
    118 {
    119   hb_ot_map_builder_t *map = &plan->map;
    120 
    121   /* Do this before any lookups have been applied. */
    122   map->add_gsub_pause (setup_syllables);
    123 
    124   /* "Default glyph pre-processing group" */
    125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
    126   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
    127   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
    128   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
    129 
    130   /* "Reordering group" */
    131   map->add_gsub_pause (clear_substitution_flags);
    132   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
    133   map->add_gsub_pause (record_rphf);
    134   map->add_gsub_pause (clear_substitution_flags);
    135   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
    136   map->add_gsub_pause (record_pref);
    137 
    138   /* "Orthographic unit shaping group" */
    139   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
    140     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
    141 
    142   map->add_gsub_pause (reorder);
    143 
    144   /* "Topographical features" */
    145   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
    146     map->add_feature (arabic_features[i], 1, F_NONE);
    147   map->add_gsub_pause (nullptr);
    148 
    149   /* "Standard typographic presentation" and "Positional feature application" */
    150   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
    151     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
    152 }
    153 
    154 struct use_shape_plan_t
    155 {
    156   ASSERT_POD ();
    157 
    158   hb_mask_t rphf_mask;
    159 
    160   arabic_shape_plan_t *arabic_plan;
    161 };
    162 
    163 static bool
    164 has_arabic_joining (hb_script_t script)
    165 {
    166   /* List of scripts that have data in arabic-table. */
    167   switch ((int) script)
    168   {
    169     /* Unicode-1.1 additions */
    170     case HB_SCRIPT_ARABIC:
    171 
    172     /* Unicode-3.0 additions */
    173     case HB_SCRIPT_MONGOLIAN:
    174     case HB_SCRIPT_SYRIAC:
    175 
    176     /* Unicode-5.0 additions */
    177     case HB_SCRIPT_NKO:
    178     case HB_SCRIPT_PHAGS_PA:
    179 
    180     /* Unicode-6.0 additions */
    181     case HB_SCRIPT_MANDAIC:
    182 
    183     /* Unicode-7.0 additions */
    184     case HB_SCRIPT_MANICHAEAN:
    185     case HB_SCRIPT_PSALTER_PAHLAVI:
    186 
    187     /* Unicode-9.0 additions */
    188     case HB_SCRIPT_ADLAM:
    189 
    190       return true;
    191 
    192     default:
    193       return false;
    194   }
    195 }
    196 
    197 static void *
    198 data_create_use (const hb_ot_shape_plan_t *plan)
    199 {
    200   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
    201   if (unlikely (!use_plan))
    202     return nullptr;
    203 
    204   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
    205 
    206   if (has_arabic_joining (plan->props.script))
    207   {
    208     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
    209     if (unlikely (!use_plan->arabic_plan))
    210     {
    211       free (use_plan);
    212       return nullptr;
    213     }
    214   }
    215 
    216   return use_plan;
    217 }
    218 
    219 static void
    220 data_destroy_use (void *data)
    221 {
    222   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
    223 
    224   if (use_plan->arabic_plan)
    225     data_destroy_arabic (use_plan->arabic_plan);
    226 
    227   free (data);
    228 }
    229 
    230 enum syllable_type_t {
    231   independent_cluster,
    232   virama_terminated_cluster,
    233   standard_cluster,
    234   number_joiner_terminated_cluster,
    235   numeral_cluster,
    236   symbol_cluster,
    237   broken_cluster,
    238   non_cluster,
    239 };
    240 
    241 #include "hb-ot-shape-complex-use-machine.hh"
    242 
    243 
    244 static void
    245 setup_masks_use (const hb_ot_shape_plan_t *plan,
    246 		 hb_buffer_t              *buffer,
    247 		 hb_font_t                *font HB_UNUSED)
    248 {
    249   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    250 
    251   /* Do this before allocating use_category(). */
    252   if (use_plan->arabic_plan)
    253   {
    254     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
    255   }
    256 
    257   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
    258 
    259   /* We cannot setup masks here.  We save information about characters
    260    * and setup masks later on in a pause-callback. */
    261 
    262   unsigned int count = buffer->len;
    263   hb_glyph_info_t *info = buffer->info;
    264   for (unsigned int i = 0; i < count; i++)
    265     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
    266 }
    267 
    268 static void
    269 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
    270 		 hb_buffer_t *buffer)
    271 {
    272   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    273 
    274   hb_mask_t mask = use_plan->rphf_mask;
    275   if (!mask) return;
    276 
    277   hb_glyph_info_t *info = buffer->info;
    278 
    279   foreach_syllable (buffer, start, end)
    280   {
    281     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
    282     for (unsigned int i = start; i < start + limit; i++)
    283       info[i].mask |= mask;
    284   }
    285 }
    286 
    287 static void
    288 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
    289 			   hb_buffer_t *buffer)
    290 {
    291   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    292   if (use_plan->arabic_plan)
    293     return;
    294 
    295   static_assert ((INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4), "");
    296   hb_mask_t masks[4], all_masks = 0;
    297   for (unsigned int i = 0; i < 4; i++)
    298   {
    299     masks[i] = plan->map.get_1_mask (arabic_features[i]);
    300     if (masks[i] == plan->map.get_global_mask ())
    301       masks[i] = 0;
    302     all_masks |= masks[i];
    303   }
    304   if (!all_masks)
    305     return;
    306   hb_mask_t other_masks = ~all_masks;
    307 
    308   unsigned int last_start = 0;
    309   joining_form_t last_form = _NONE;
    310   hb_glyph_info_t *info = buffer->info;
    311   foreach_syllable (buffer, start, end)
    312   {
    313     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
    314     switch (syllable_type)
    315     {
    316       case independent_cluster:
    317       case symbol_cluster:
    318       case non_cluster:
    319 	/* These don't join.  Nothing to do. */
    320 	last_form = _NONE;
    321 	break;
    322 
    323       case virama_terminated_cluster:
    324       case standard_cluster:
    325       case number_joiner_terminated_cluster:
    326       case numeral_cluster:
    327       case broken_cluster:
    328 
    329 	bool join = last_form == FINA || last_form == ISOL;
    330 
    331 	if (join)
    332 	{
    333 	  /* Fixup previous syllable's form. */
    334 	  last_form = last_form == FINA ? MEDI : INIT;
    335 	  for (unsigned int i = last_start; i < start; i++)
    336 	    info[i].mask = (info[i].mask & other_masks) | masks[last_form];
    337 	}
    338 
    339 	/* Form for this syllable. */
    340 	last_form = join ? FINA : ISOL;
    341 	for (unsigned int i = start; i < end; i++)
    342 	  info[i].mask = (info[i].mask & other_masks) | masks[last_form];
    343 
    344 	break;
    345     }
    346 
    347     last_start = start;
    348   }
    349 }
    350 
    351 static void
    352 setup_syllables (const hb_ot_shape_plan_t *plan,
    353 		 hb_font_t *font HB_UNUSED,
    354 		 hb_buffer_t *buffer)
    355 {
    356   find_syllables (buffer);
    357   foreach_syllable (buffer, start, end)
    358     buffer->unsafe_to_break (start, end);
    359   setup_rphf_mask (plan, buffer);
    360   setup_topographical_masks (plan, buffer);
    361 }
    362 
    363 static void
    364 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
    365 			  hb_font_t *font HB_UNUSED,
    366 			  hb_buffer_t *buffer)
    367 {
    368   hb_glyph_info_t *info = buffer->info;
    369   unsigned int count = buffer->len;
    370   for (unsigned int i = 0; i < count; i++)
    371     _hb_glyph_info_clear_substituted (&info[i]);
    372 }
    373 
    374 static void
    375 record_rphf (const hb_ot_shape_plan_t *plan,
    376 	     hb_font_t *font,
    377 	     hb_buffer_t *buffer)
    378 {
    379   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
    380 
    381   hb_mask_t mask = use_plan->rphf_mask;
    382   if (!mask) return;
    383   hb_glyph_info_t *info = buffer->info;
    384 
    385   foreach_syllable (buffer, start, end)
    386   {
    387     /* Mark a substituted repha as USE_R. */
    388     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
    389       if (_hb_glyph_info_substituted (&info[i]))
    390       {
    391 	info[i].use_category() = USE_R;
    392 	break;
    393       }
    394   }
    395 }
    396 
    397 static void
    398 record_pref (const hb_ot_shape_plan_t *plan,
    399 	     hb_font_t *font,
    400 	     hb_buffer_t *buffer)
    401 {
    402   hb_glyph_info_t *info = buffer->info;
    403 
    404   foreach_syllable (buffer, start, end)
    405   {
    406     /* Mark a substituted pref as VPre, as they behave the same way. */
    407     for (unsigned int i = start; i < end; i++)
    408       if (_hb_glyph_info_substituted (&info[i]))
    409       {
    410 	info[i].use_category() = USE_VPre;
    411 	break;
    412       }
    413   }
    414 }
    415 
    416 static inline bool
    417 is_halant (const hb_glyph_info_t &info)
    418 {
    419   return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
    420 }
    421 
    422 static void
    423 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
    424 {
    425   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
    426   /* Only a few syllable types need reordering. */
    427   if (unlikely (!(FLAG_UNSAFE (syllable_type) &
    428 		  (FLAG (virama_terminated_cluster) |
    429 		   FLAG (standard_cluster) |
    430 		   FLAG (broken_cluster) |
    431 		   0))))
    432     return;
    433 
    434   hb_glyph_info_t *info = buffer->info;
    435 
    436 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB))
    437 
    438   /* Move things forward. */
    439   if (info[start].use_category() == USE_R && end - start > 1)
    440   {
    441     /* Got a repha.  Reorder it to after first base, before first halant. */
    442     for (unsigned int i = start + 1; i < end; i++)
    443       if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
    444       {
    445 	/* If we hit a halant, move before it; otherwise it's a base: move to it's
    446 	 * place, and shift things in between backward. */
    447 
    448 	if (is_halant (info[i]))
    449 	  i--;
    450 
    451 	buffer->merge_clusters (start, i + 1);
    452 	hb_glyph_info_t t = info[start];
    453 	memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
    454 	info[i] = t;
    455 
    456 	break;
    457       }
    458   }
    459 
    460   /* Move things back. */
    461   unsigned int j = end;
    462   for (unsigned int i = start; i < end; i++)
    463   {
    464     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
    465     if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
    466     {
    467       /* If we hit a halant, move after it; otherwise it's a base: move to it's
    468        * place, and shift things in between backward. */
    469       if (is_halant (info[i]))
    470 	j = i + 1;
    471       else
    472 	j = i;
    473     }
    474     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
    475 	     /* Only move the first component of a MultipleSubst. */
    476 	     0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
    477 	     j < i)
    478     {
    479       buffer->merge_clusters (j, i + 1);
    480       hb_glyph_info_t t = info[i];
    481       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
    482       info[j] = t;
    483     }
    484   }
    485 }
    486 
    487 static inline void
    488 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
    489 		       hb_font_t *font,
    490 		       hb_buffer_t *buffer)
    491 {
    492   /* Note: This loop is extra overhead, but should not be measurable. */
    493   bool has_broken_syllables = false;
    494   unsigned int count = buffer->len;
    495   hb_glyph_info_t *info = buffer->info;
    496   for (unsigned int i = 0; i < count; i++)
    497     if ((info[i].syllable() & 0x0F) == broken_cluster)
    498     {
    499       has_broken_syllables = true;
    500       break;
    501     }
    502   if (likely (!has_broken_syllables))
    503     return;
    504 
    505   hb_glyph_info_t dottedcircle = {0};
    506   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
    507     return;
    508   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
    509 
    510   buffer->clear_output ();
    511 
    512   buffer->idx = 0;
    513   unsigned int last_syllable = 0;
    514   while (buffer->idx < buffer->len && !buffer->in_error)
    515   {
    516     unsigned int syllable = buffer->cur().syllable();
    517     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
    518     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
    519     {
    520       last_syllable = syllable;
    521 
    522       hb_glyph_info_t ginfo = dottedcircle;
    523       ginfo.cluster = buffer->cur().cluster;
    524       ginfo.mask = buffer->cur().mask;
    525       ginfo.syllable() = buffer->cur().syllable();
    526       /* TODO Set glyph_props? */
    527 
    528       /* Insert dottedcircle after possible Repha. */
    529       while (buffer->idx < buffer->len && !buffer->in_error &&
    530 	     last_syllable == buffer->cur().syllable() &&
    531 	     buffer->cur().use_category() == USE_R)
    532         buffer->next_glyph ();
    533 
    534       buffer->output_info (ginfo);
    535     }
    536     else
    537       buffer->next_glyph ();
    538   }
    539 
    540   buffer->swap_buffers ();
    541 }
    542 
    543 static void
    544 reorder (const hb_ot_shape_plan_t *plan,
    545 	 hb_font_t *font,
    546 	 hb_buffer_t *buffer)
    547 {
    548   insert_dotted_circles (plan, font, buffer);
    549 
    550   hb_glyph_info_t *info = buffer->info;
    551 
    552   foreach_syllable (buffer, start, end)
    553     reorder_syllable (buffer, start, end);
    554 
    555   /* Zero syllables now... */
    556   unsigned int count = buffer->len;
    557   for (unsigned int i = 0; i < count; i++)
    558     info[i].syllable() = 0;
    559 
    560   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
    561 }
    562 
    563 static bool
    564 decompose_use (const hb_ot_shape_normalize_context_t *c,
    565                 hb_codepoint_t  ab,
    566                 hb_codepoint_t *a,
    567                 hb_codepoint_t *b)
    568 {
    569   switch (ab)
    570   {
    571     /* Chakma:
    572      * Special case where the Unicode decomp gives matras in the wrong order
    573      * for cluster validation.
    574      */
    575     case 0x1112Eu : *a = 0x11127u; *b= 0x11131u; return true;
    576     case 0x1112Fu : *a = 0x11127u; *b= 0x11132u; return true;
    577   }
    578 
    579   return (bool) c->unicode->decompose (ab, a, b);
    580 }
    581 
    582 static bool
    583 compose_use (const hb_ot_shape_normalize_context_t *c,
    584 	     hb_codepoint_t  a,
    585 	     hb_codepoint_t  b,
    586 	     hb_codepoint_t *ab)
    587 {
    588   /* Avoid recomposing split matras. */
    589   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
    590     return false;
    591 
    592   return (bool)c->unicode->compose (a, b, ab);
    593 }
    594 
    595 
    596 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
    597 {
    598   collect_features_use,
    599   nullptr, /* override_features */
    600   data_create_use,
    601   data_destroy_use,
    602   nullptr, /* preprocess_text */
    603   nullptr, /* postprocess_glyphs */
    604   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
    605   decompose_use,
    606   compose_use,
    607   setup_masks_use,
    608   nullptr, /* disable_otl */
    609   nullptr, /* reorder_marks */
    610   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
    611   false, /* fallback_position */
    612 };
    613