Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2011,2012,2013  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #include "hb-ot-shape-complex-indic-private.hh"
     28 
     29 /* buffer var allocations */
     30 #define myanmar_category() complex_var_u8_0() /* myanmar_category_t */
     31 #define myanmar_position() complex_var_u8_1() /* myanmar_position_t */
     32 
     33 
     34 /*
     35  * Myanmar shaper.
     36  */
     37 
     38 static const hb_tag_t
     39 basic_features[] =
     40 {
     41   /*
     42    * Basic features.
     43    * These features are applied in order, one at a time, after initial_reordering.
     44    */
     45   HB_TAG('r','p','h','f'),
     46   HB_TAG('p','r','e','f'),
     47   HB_TAG('b','l','w','f'),
     48   HB_TAG('p','s','t','f'),
     49 };
     50 static const hb_tag_t
     51 other_features[] =
     52 {
     53   /*
     54    * Other features.
     55    * These features are applied all at once, after final_reordering.
     56    */
     57   HB_TAG('p','r','e','s'),
     58   HB_TAG('a','b','v','s'),
     59   HB_TAG('b','l','w','s'),
     60   HB_TAG('p','s','t','s'),
     61   /* Positioning features, though we don't care about the types. */
     62   HB_TAG('d','i','s','t'),
     63 };
     64 
     65 static void
     66 setup_syllables (const hb_ot_shape_plan_t *plan,
     67 		 hb_font_t *font,
     68 		 hb_buffer_t *buffer);
     69 static void
     70 initial_reordering (const hb_ot_shape_plan_t *plan,
     71 		    hb_font_t *font,
     72 		    hb_buffer_t *buffer);
     73 static void
     74 final_reordering (const hb_ot_shape_plan_t *plan,
     75 		  hb_font_t *font,
     76 		  hb_buffer_t *buffer);
     77 
     78 static void
     79 collect_features_myanmar (hb_ot_shape_planner_t *plan)
     80 {
     81   hb_ot_map_builder_t *map = &plan->map;
     82 
     83   /* Do this before any lookups have been applied. */
     84   map->add_gsub_pause (setup_syllables);
     85 
     86   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
     87   /* The Indic specs do not require ccmp, but we apply it here since if
     88    * there is a use of it, it's typically at the beginning. */
     89   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
     90 
     91 
     92   map->add_gsub_pause (initial_reordering);
     93   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
     94   {
     95     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
     96     map->add_gsub_pause (NULL);
     97   }
     98   map->add_gsub_pause (final_reordering);
     99   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
    100     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
    101 }
    102 
    103 static void
    104 override_features_myanmar (hb_ot_shape_planner_t *plan)
    105 {
    106   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
    107 
    108   /*
    109    * Note:
    110    *
    111    * Spec says 'mark' is used, and the mmrtext.ttf font from
    112    * Windows 8 has lookups for it.  But testing suggests that
    113    * Windows 8 Uniscribe is NOT applying it.  It *is* applying
    114    * 'mkmk' however.
    115    */
    116   if (hb_options ().uniscribe_bug_compatible)
    117     plan->map.add_feature (HB_TAG('m','a','r','k'), 0, F_GLOBAL);
    118 }
    119 
    120 
    121 enum syllable_type_t {
    122   consonant_syllable,
    123   broken_cluster,
    124   non_myanmar_cluster,
    125 };
    126 
    127 #include "hb-ot-shape-complex-myanmar-machine.hh"
    128 
    129 
    130 /* Note: This enum is duplicated in the -machine.rl source file.
    131  * Not sure how to avoid duplication. */
    132 enum myanmar_category_t {
    133   OT_As  = 18, /* Asat */
    134   OT_D   = 19, /* Digits except zero */
    135   OT_D0  = 20, /* Digit zero */
    136   OT_DB  = OT_N, /* Dot below */
    137   OT_GB  = OT_DOTTEDCIRCLE,
    138   OT_MH  = 21, /* Various consonant medial types */
    139   OT_MR  = 22, /* Various consonant medial types */
    140   OT_MW  = 23, /* Various consonant medial types */
    141   OT_MY  = 24, /* Various consonant medial types */
    142   OT_PT  = 25, /* Pwo and other tones */
    143   OT_VAbv = 26,
    144   OT_VBlw = 27,
    145   OT_VPre = 28,
    146   OT_VPst = 29,
    147   OT_VS   = 30 /* Variation selectors */
    148 };
    149 
    150 
    151 static inline bool
    152 is_one_of (const hb_glyph_info_t &info, unsigned int flags)
    153 {
    154   /* If it ligated, all bets are off. */
    155   if (is_a_ligature (info)) return false;
    156   return !!(FLAG (info.myanmar_category()) & flags);
    157 }
    158 
    159 /* Note:
    160  *
    161  * We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
    162  * cannot happen in a consonant syllable.  The plus side however is, we can call the
    163  * consonant syllable logic from the vowel syllable function and get it all right! */
    164 #define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CM) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_GB))
    165 static inline bool
    166 is_consonant (const hb_glyph_info_t &info)
    167 {
    168   return is_one_of (info, CONSONANT_FLAGS);
    169 }
    170 
    171 
    172 static inline void
    173 set_myanmar_properties (hb_glyph_info_t &info)
    174 {
    175   hb_codepoint_t u = info.codepoint;
    176   unsigned int type = hb_indic_get_categories (u);
    177   indic_category_t cat = (indic_category_t) (type & 0x7F);
    178   indic_position_t pos = (indic_position_t) (type >> 8);
    179 
    180   /* Myanmar
    181    * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze
    182    */
    183   if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00, 0xFE0F)))
    184     cat = (indic_category_t) OT_VS;
    185   else if (unlikely (u == 0x200C)) cat = (indic_category_t) OT_ZWNJ;
    186   else if (unlikely (u == 0x200D)) cat = (indic_category_t) OT_ZWJ;
    187 
    188   switch (u)
    189   {
    190     case 0x002D: case 0x00A0: case 0x00D7: case 0x2012:
    191     case 0x2013: case 0x2014: case 0x2015: case 0x2022:
    192     case 0x25CC: case 0x25FB: case 0x25FC: case 0x25FD:
    193     case 0x25FE:
    194       cat = (indic_category_t) OT_GB;
    195       break;
    196 
    197     case 0x1004: case 0x101B: case 0x105A:
    198       cat = (indic_category_t) OT_Ra;
    199       break;
    200 
    201     case 0x1032: case 0x1036:
    202       cat = (indic_category_t) OT_A;
    203       break;
    204 
    205     case 0x103A:
    206       cat = (indic_category_t) OT_As;
    207       break;
    208 
    209     case 0x1041: case 0x1042: case 0x1043: case 0x1044:
    210     case 0x1045: case 0x1046: case 0x1047: case 0x1048:
    211     case 0x1049: case 0x1090: case 0x1091: case 0x1092:
    212     case 0x1093: case 0x1094: case 0x1095: case 0x1096:
    213     case 0x1097: case 0x1098: case 0x1099:
    214       cat = (indic_category_t) OT_D;
    215       break;
    216 
    217     case 0x1040:
    218       cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
    219       break;
    220 
    221     case 0x103E: case 0x1060:
    222       cat = (indic_category_t) OT_MH;
    223       break;
    224 
    225     case 0x103C:
    226       cat = (indic_category_t) OT_MR;
    227       break;
    228 
    229     case 0x103D: case 0x1082:
    230       cat = (indic_category_t) OT_MW;
    231       break;
    232 
    233     case 0x103B: case 0x105E: case 0x105F:
    234       cat = (indic_category_t) OT_MY;
    235       break;
    236 
    237     case 0x1063: case 0x1064: case 0x1069: case 0x106A:
    238     case 0x106B: case 0x106C: case 0x106D: case 0xAA7B:
    239       cat = (indic_category_t) OT_PT;
    240       break;
    241 
    242     case 0x1038: case 0x1087: case 0x1088: case 0x1089:
    243     case 0x108A: case 0x108B: case 0x108C: case 0x108D:
    244     case 0x108F: case 0x109A: case 0x109B: case 0x109C:
    245       cat = (indic_category_t) OT_SM;
    246       break;
    247   }
    248 
    249   if (cat == OT_M)
    250   {
    251     switch ((int) pos)
    252     {
    253       case POS_PRE_C:	cat = (indic_category_t) OT_VPre;
    254 			pos = POS_PRE_M;                  break;
    255       case POS_ABOVE_C:	cat = (indic_category_t) OT_VAbv; break;
    256       case POS_BELOW_C:	cat = (indic_category_t) OT_VBlw; break;
    257       case POS_POST_C:	cat = (indic_category_t) OT_VPst; break;
    258     }
    259   }
    260 
    261   info.myanmar_category() = (myanmar_category_t) cat;
    262   info.myanmar_position() = pos;
    263 }
    264 
    265 
    266 
    267 static void
    268 setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
    269 		   hb_buffer_t              *buffer,
    270 		   hb_font_t                *font HB_UNUSED)
    271 {
    272   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
    273   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
    274 
    275   /* We cannot setup masks here.  We save information about characters
    276    * and setup masks later on in a pause-callback. */
    277 
    278   unsigned int count = buffer->len;
    279   for (unsigned int i = 0; i < count; i++)
    280     set_myanmar_properties (buffer->info[i]);
    281 }
    282 
    283 static void
    284 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
    285 		 hb_font_t *font HB_UNUSED,
    286 		 hb_buffer_t *buffer)
    287 {
    288   find_syllables (buffer);
    289 }
    290 
    291 static int
    292 compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
    293 {
    294   int a = pa->myanmar_position();
    295   int b = pb->myanmar_position();
    296 
    297   return a < b ? -1 : a == b ? 0 : +1;
    298 }
    299 
    300 
    301 /* Rules from:
    302  * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */
    303 
    304 static void
    305 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    306 				       hb_face_t *face,
    307 				       hb_buffer_t *buffer,
    308 				       unsigned int start, unsigned int end)
    309 {
    310   hb_glyph_info_t *info = buffer->info;
    311 
    312   unsigned int base = end;
    313   bool has_reph = false;
    314 
    315   {
    316     unsigned int limit = start;
    317     if (start + 3 <= end &&
    318 	info[start  ].myanmar_category() == OT_Ra &&
    319 	info[start+1].myanmar_category() == OT_As &&
    320 	info[start+2].myanmar_category() == OT_H)
    321     {
    322       limit += 3;
    323       base = start;
    324       has_reph = true;
    325     }
    326 
    327     {
    328       if (!has_reph)
    329 	base = limit;
    330 
    331       for (unsigned int i = limit; i < end; i++)
    332 	if (is_consonant (info[i]))
    333 	{
    334 	  base = i;
    335 	  break;
    336 	}
    337     }
    338   }
    339 
    340   /* Reorder! */
    341   {
    342     unsigned int i = start;
    343     for (; i < start + (has_reph ? 3 : 0); i++)
    344       info[i].myanmar_position() = POS_AFTER_MAIN;
    345     for (; i < base; i++)
    346       info[i].myanmar_position() = POS_PRE_C;
    347     if (i < end)
    348     {
    349       info[i].myanmar_position() = POS_BASE_C;
    350       i++;
    351     }
    352     indic_position_t pos = POS_AFTER_MAIN;
    353     /* The following loop may be ugly, but it implements all of
    354      * Myanmar reordering! */
    355     for (; i < end; i++)
    356     {
    357       if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
    358       {
    359 	info[i].myanmar_position() = POS_PRE_C;
    360 	continue;
    361       }
    362       if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
    363       {
    364 	continue;
    365       }
    366 
    367       if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
    368       {
    369 	pos = POS_BELOW_C;
    370 	info[i].myanmar_position() = pos;
    371 	continue;
    372       }
    373 
    374       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
    375       {
    376 	info[i].myanmar_position() = POS_BEFORE_SUB;
    377 	continue;
    378       }
    379       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
    380       {
    381 	info[i].myanmar_position() = pos;
    382 	continue;
    383       }
    384       if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
    385       {
    386         pos = POS_AFTER_SUB;
    387 	info[i].myanmar_position() = pos;
    388 	continue;
    389       }
    390       info[i].myanmar_position() = pos;
    391     }
    392   }
    393 
    394   buffer->merge_clusters (start, end);
    395   /* Sit tight, rock 'n roll! */
    396   hb_bubble_sort (info + start, end - start, compare_myanmar_order);
    397 }
    398 
    399 static void
    400 initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
    401 				   hb_face_t *face,
    402 				   hb_buffer_t *buffer,
    403 				   unsigned int start, unsigned int end)
    404 {
    405   /* We already inserted dotted-circles, so just call the consonant_syllable. */
    406   initial_reordering_consonant_syllable (plan, face, buffer, start, end);
    407 }
    408 
    409 static void
    410 initial_reordering_non_myanmar_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
    411 					hb_face_t *face HB_UNUSED,
    412 					hb_buffer_t *buffer HB_UNUSED,
    413 					unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
    414 {
    415   /* Nothing to do right now.  If we ever switch to using the output
    416    * buffer in the reordering process, we'd need to next_glyph() here. */
    417 }
    418 
    419 
    420 static void
    421 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
    422 			     hb_face_t *face,
    423 			     hb_buffer_t *buffer,
    424 			     unsigned int start, unsigned int end)
    425 {
    426   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
    427   switch (syllable_type) {
    428   case consonant_syllable:	initial_reordering_consonant_syllable  (plan, face, buffer, start, end); return;
    429   case broken_cluster:		initial_reordering_broken_cluster      (plan, face, buffer, start, end); return;
    430   case non_myanmar_cluster:	initial_reordering_non_myanmar_cluster (plan, face, buffer, start, end); return;
    431   }
    432 }
    433 
    434 static inline void
    435 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
    436 		       hb_font_t *font,
    437 		       hb_buffer_t *buffer)
    438 {
    439   /* Note: This loop is extra overhead, but should not be measurable. */
    440   bool has_broken_syllables = false;
    441   unsigned int count = buffer->len;
    442   for (unsigned int i = 0; i < count; i++)
    443     if ((buffer->info[i].syllable() & 0x0F) == broken_cluster) {
    444       has_broken_syllables = true;
    445       break;
    446     }
    447   if (likely (!has_broken_syllables))
    448     return;
    449 
    450 
    451   hb_codepoint_t dottedcircle_glyph;
    452   if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph))
    453     return;
    454 
    455   hb_glyph_info_t dottedcircle = {0};
    456   dottedcircle.codepoint = 0x25CC;
    457   set_myanmar_properties (dottedcircle);
    458   dottedcircle.codepoint = dottedcircle_glyph;
    459 
    460   buffer->clear_output ();
    461 
    462   buffer->idx = 0;
    463   unsigned int last_syllable = 0;
    464   while (buffer->idx < buffer->len)
    465   {
    466     unsigned int syllable = buffer->cur().syllable();
    467     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
    468     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
    469     {
    470       last_syllable = syllable;
    471 
    472       hb_glyph_info_t info = dottedcircle;
    473       info.cluster = buffer->cur().cluster;
    474       info.mask = buffer->cur().mask;
    475       info.syllable() = buffer->cur().syllable();
    476 
    477       buffer->output_info (info);
    478     }
    479     else
    480       buffer->next_glyph ();
    481   }
    482 
    483   buffer->swap_buffers ();
    484 }
    485 
    486 static void
    487 initial_reordering (const hb_ot_shape_plan_t *plan,
    488 		    hb_font_t *font,
    489 		    hb_buffer_t *buffer)
    490 {
    491   insert_dotted_circles (plan, font, buffer);
    492 
    493   hb_glyph_info_t *info = buffer->info;
    494   unsigned int count = buffer->len;
    495   if (unlikely (!count)) return;
    496   unsigned int last = 0;
    497   unsigned int last_syllable = info[0].syllable();
    498   for (unsigned int i = 1; i < count; i++)
    499     if (last_syllable != info[i].syllable()) {
    500       initial_reordering_syllable (plan, font->face, buffer, last, i);
    501       last = i;
    502       last_syllable = info[last].syllable();
    503     }
    504   initial_reordering_syllable (plan, font->face, buffer, last, count);
    505 }
    506 
    507 static void
    508 final_reordering (const hb_ot_shape_plan_t *plan,
    509 		  hb_font_t *font HB_UNUSED,
    510 		  hb_buffer_t *buffer)
    511 {
    512   hb_glyph_info_t *info = buffer->info;
    513   unsigned int count = buffer->len;
    514 
    515   /* Zero syllables now... */
    516   for (unsigned int i = 0; i < count; i++)
    517     info[i].syllable() = 0;
    518 
    519   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
    520   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
    521 }
    522 
    523 
    524 static hb_ot_shape_normalization_mode_t
    525 normalization_preference_myanmar (const hb_segment_properties_t *props HB_UNUSED)
    526 {
    527   return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
    528 }
    529 
    530 
    531 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
    532 {
    533   "myanmar",
    534   collect_features_myanmar,
    535   override_features_myanmar,
    536   NULL, /* data_create */
    537   NULL, /* data_destroy */
    538   NULL, /* preprocess_text */
    539   normalization_preference_myanmar,
    540   NULL, /* decompose */
    541   NULL, /* compose */
    542   setup_masks_myanmar,
    543   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF,
    544   false, /* fallback_position */
    545 };
    546