Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      3  *
      4  * This is part of HarfBuzz, an OpenType Layout engine library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  */
     24 
     25 #include "harfbuzz-shaper.h"
     26 #include "harfbuzz-shaper-private.h"
     27 
     28 #include <assert.h>
     29 #include <stdio.h>
     30 
     31 /*
     32 //  Vocabulary
     33 //      Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
     34 //                      center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
     35 //                      split vowels, signs... but there is only one base in a syllable, it has to be coded as
     36 //                      the first character of the syllable.
     37 //      split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
     38 //                      Khmer language has five of them. Khmer split vowels either have one part before the
     39 //                      base and one after the base or they have a part before the base and a part above the base.
     40 //                      The first part of all Khmer split vowels is the same character, identical to
     41 //                      the glyph of Khmer dependent vowel SRA EI
     42 //      coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
     43 //                 Differently than indian languages, the coeng modifies the consonant that follows it,
     44 //                 not the one preceding it  Each consonant has two forms, the base form and the subscript form
     45 //                 the base form is the normal one (using the consonants code-point), the subscript form is
     46 //                 displayed when the combination coeng + consonant is encountered.
     47 //      Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
     48 //      Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
     49 //      Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
     50 //      Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
     51 //                           if it is attached to a consonant of the first series or a consonant of the second series
     52 //                           Most consonants have an equivalent in the other series, but some of theme exist only in
     53 //                           one series (for example SA). If we want to use the consonant SA with a vowel sound that
     54 //                           can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
     55 //                           of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
     56 //                           x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
     57 //                           MUSIKATOAN a second series consonant to have a first series vowel sound.
     58 //                           Consonant shifter are both normally supercript marks, but, when they are followed by a
     59 //                           superscript, they change shape and take the form of subscript dependent vowel SRA U.
     60 //                           If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
     61 //                           should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
     62 //                           be placed after the coeng consonant.
     63 //      Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
     64 //                           Each vowel has its own position. Only one vowel per syllable is allowed.
     65 //      Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
     66 //                           Allowed in a syllable.
     67 //
     68 //
     69 //   order is important here! This order must be the same that is found in each horizontal
     70 //   line in the statetable for Khmer (see khmerStateTable) .
     71 */
     72 enum KhmerCharClassValues {
     73     CC_RESERVED             =  0,
     74     CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
     75     CC_CONSONANT2           =  2, /* Consonant of type 2 */
     76     CC_CONSONANT3           =  3, /* Consonant of type 3 */
     77     CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
     78     CC_CONSONANT_SHIFTER    =  5,
     79     CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
     80     CC_COENG                =  7, /* Subscript consonant combining character */
     81     CC_DEPENDENT_VOWEL      =  8,
     82     CC_SIGN_ABOVE           =  9,
     83     CC_SIGN_AFTER           = 10,
     84     CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
     85     CC_COUNT                = 12  /* This is the number of character classes */
     86 };
     87 
     88 
     89 enum KhmerCharClassFlags {
     90     CF_CLASS_MASK    = 0x0000FFFF,
     91 
     92     CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
     93     CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
     94     CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
     95     CF_COENG         = 0x08000000,  /* flag to speed up comparing */
     96     CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
     97     CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */
     98 
     99     /* position flags */
    100     CF_POS_BEFORE    = 0x00080000,
    101     CF_POS_BELOW     = 0x00040000,
    102     CF_POS_ABOVE     = 0x00020000,
    103     CF_POS_AFTER     = 0x00010000,
    104     CF_POS_MASK      = 0x000f0000
    105 };
    106 
    107 
    108 /* Characters that get referred to by name */
    109 enum KhmerChar {
    110     C_SIGN_ZWNJ     = 0x200C,
    111     C_SIGN_ZWJ      = 0x200D,
    112     C_RO            = 0x179A,
    113     C_VOWEL_AA      = 0x17B6,
    114     C_SIGN_NIKAHIT  = 0x17C6,
    115     C_VOWEL_E       = 0x17C1,
    116     C_COENG         = 0x17D2
    117 };
    118 
    119 
    120 /*
    121 //  simple classes, they are used in the statetable (in this file) to control the length of a syllable
    122 //  they are also used to know where a character should be placed (location in reference to the base character)
    123 //  and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
    124 //  indicate error in syllable construction
    125 */
    126 enum {
    127     _xx = CC_RESERVED,
    128     _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
    129     _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
    130     _c1 = CC_CONSONANT | CF_CONSONANT,
    131     _c2 = CC_CONSONANT2 | CF_CONSONANT,
    132     _c3 = CC_CONSONANT3 | CF_CONSONANT,
    133     _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
    134     _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
    135     _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
    136     _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
    137     _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
    138     _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
    139     _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
    140 
    141     /* split vowel */
    142     _va = _da | CF_SPLIT_VOWEL,
    143     _vr = _dr | CF_SPLIT_VOWEL
    144 };
    145 
    146 
    147 /*
    148 //   Character class: a character class value
    149 //   ORed with character class flags.
    150 */
    151 typedef unsigned long KhmerCharClass;
    152 
    153 
    154 /*
    155 //  Character class tables
    156 //  _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
    157 //  _sa Sign placed above the base
    158 //  _sp Sign placed after the base
    159 //  _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
    160 //  _c2 Consonant of type 2 (only RO)
    161 //  _c3 Consonant of type 3
    162 //  _rb Khmer sign robat u17CC. combining mark for subscript consonants
    163 //  _cd Consonant-shifter
    164 //  _dl Dependent vowel placed before the base (left of the base)
    165 //  _db Dependent vowel placed below the base
    166 //  _da Dependent vowel placed above the base
    167 //  _dr Dependent vowel placed behind the base (right of the base)
    168 //  _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
    169 //      it to create a subscript consonant or independent vowel
    170 //  _va Khmer split vowel in which the first part is before the base and the second one above the base
    171 //  _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
    172 */
    173 static const KhmerCharClass khmerCharClasses[] = {
    174     _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
    175     _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
    176     _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
    177     _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
    178     _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
    179     _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx  /* 17D0 - 17DF */
    180 };
    181 
    182 /* this enum must reflect the range of khmerCharClasses */
    183 enum KhmerCharClassesRange {
    184     KhmerFirstChar = 0x1780,
    185     KhmerLastChar  = 0x17df
    186 };
    187 
    188 /*
    189 //  Below we define how a character in the input string is either in the khmerCharClasses table
    190 //  (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
    191 //  within the syllable, but are not in the table) we also get their type back, or an unknown object
    192 //  in which case we get _xx (CC_RESERVED) back
    193 */
    194 static KhmerCharClass getKhmerCharClass(HB_UChar16 uc)
    195 {
    196     if (uc == C_SIGN_ZWJ) {
    197         return CC_ZERO_WIDTH_J_MARK;
    198     }
    199 
    200     if (uc == C_SIGN_ZWNJ) {
    201         return CC_ZERO_WIDTH_NJ_MARK;
    202     }
    203 
    204     if (uc < KhmerFirstChar || uc > KhmerLastChar) {
    205         return CC_RESERVED;
    206     }
    207 
    208     return khmerCharClasses[uc - KhmerFirstChar];
    209 }
    210 
    211 
    212 /*
    213 //  The stateTable is used to calculate the end (the length) of a well
    214 //  formed Khmer Syllable.
    215 //
    216 //  Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
    217 //  CharClassValues. This coincidence of values allows the follow up of the table.
    218 //
    219 //  Each line corresponds to a state, which does not necessarily need to be a type
    220 //  of component... for example, state 2 is a base, with is always a first character
    221 //  in the syllable, but the state could be produced a consonant of any type when
    222 //  it is the first character that is analysed (in ground state).
    223 //
    224 //  Differentiating 3 types of consonants is necessary in order to
    225 //  forbid the use of certain combinations, such as having a second
    226 //  coeng after a coeng RO,
    227 //  The inexistent possibility of having a type 3 after another type 3 is permitted,
    228 //  eliminating it would very much complicate the table, and it does not create typing
    229 //  problems, as the case above.
    230 //
    231 //  The table is quite complex, in order to limit the number of coeng consonants
    232 //  to 2 (by means of the table).
    233 //
    234 //  There a peculiarity, as far as Unicode is concerned:
    235 //  - The consonant-shifter is considered in two possible different
    236 //    locations, the one considered in Unicode 3.0 and the one considered in
    237 //    Unicode 4.0. (there is a backwards compatibility problem in this standard).
    238 //
    239 //
    240 //  xx    independent character, such as a number, punctuation sign or non-khmer char
    241 //
    242 //  c1    Khmer consonant of type 1 or an independent vowel
    243 //        that is, a letter in which the subscript for is only under the
    244 //        base, not taking any space to the right or to the left
    245 //
    246 //  c2    Khmer consonant of type 2, the coeng form takes space under
    247 //        and to the left of the base (only RO is of this type)
    248 //
    249 //  c3    Khmer consonant of type 3. Its subscript form takes space under
    250 //        and to the right of the base.
    251 //
    252 //  cs    Khmer consonant shifter
    253 //
    254 //  rb    Khmer robat
    255 //
    256 //  co    coeng character (u17D2)
    257 //
    258 //  dv    dependent vowel (including split vowels, they are treated in the same way).
    259 //        even if dv is not defined above, the component that is really tested for is
    260 //        KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
    261 //
    262 //  zwj   Zero Width joiner
    263 //
    264 //  zwnj  Zero width non joiner
    265 //
    266 //  sa    above sign
    267 //
    268 //  sp    post sign
    269 //
    270 //  there are lines with equal content but for an easier understanding
    271 //  (and maybe change in the future) we did not join them
    272 */
    273 static const signed char khmerStateTable[][CC_COUNT] =
    274 {
    275     /* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj */
    276     { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
    277     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
    278     {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
    279     {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
    280     {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
    281     {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
    282     {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
    283     {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
    284     {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
    285     {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
    286     {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
    287     {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
    288     {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
    289     {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
    290     {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
    291     {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
    292     {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
    293     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
    294     {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
    295     {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
    296     {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
    297 };
    298 
    299 
    300 /*  #define KHMER_DEBUG */
    301 #ifdef KHMER_DEBUG
    302 #define KHDEBUG qDebug
    303 #else
    304 #define KHDEBUG if(0) printf
    305 #endif
    306 
    307 /*
    308 //  Given an input string of characters and a location in which to start looking
    309 //  calculate, using the state table, which one is the last character of the syllable
    310 //  that starts in the starting position.
    311 */
    312 static int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid)
    313 {
    314     const HB_UChar16 *uc = s + start;
    315     int state = 0;
    316     int pos = start;
    317     *invalid = FALSE;
    318 
    319     while (pos < end) {
    320         KhmerCharClass charClass = getKhmerCharClass(*uc);
    321         if (pos == start) {
    322             *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
    323         }
    324         state = khmerStateTable[state][charClass & CF_CLASS_MASK];
    325 
    326         KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
    327                 charClass, *uc );
    328 
    329         if (state < 0) {
    330             break;
    331         }
    332         ++uc;
    333         ++pos;
    334     }
    335     return pos;
    336 }
    337 
    338 #ifndef NO_OPENTYPE
    339 static const HB_OpenTypeFeature khmer_features[] = {
    340     { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
    341     { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
    342     { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
    343     { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
    344     { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
    345     { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
    346     { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
    347     { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
    348     { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },
    349     { 0, 0 }
    350 };
    351 #endif
    352 
    353 
    354 static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)
    355 {
    356 /*    KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
    357   	    item->string->mid(item->from, item->length).toUtf8().data()); */
    358 
    359     int len = 0;
    360     int syllableEnd = item->item.pos + item->item.length;
    361     unsigned short reordered[16];
    362     unsigned char properties[16];
    363     enum {
    364 	AboveForm = 0x01,
    365 	PreForm = 0x02,
    366 	PostForm = 0x04,
    367 	BelowForm = 0x08
    368     };
    369 #ifndef NO_OPENTYPE
    370     const int availableGlyphs = item->num_glyphs;
    371 #endif
    372     int coengRo;
    373     int i;
    374 
    375     /* according to the specs this is the max length one can get
    376        ### the real value should be smaller */
    377     assert(item->item.length < 13);
    378 
    379     memset(properties, 0, 16*sizeof(unsigned char));
    380 
    381 #ifdef KHMER_DEBUG
    382     qDebug("original:");
    383     for (int i = from; i < syllableEnd; i++) {
    384         qDebug("    %d: %4x", i, string[i]);
    385     }
    386 #endif
    387 
    388     /*
    389     // write a pre vowel or the pre part of a split vowel first
    390     // and look out for coeng + ro. RO is the only vowel of type 2, and
    391     // therefore the only one that requires saving space before the base.
    392     */
    393     coengRo = -1;  /* There is no Coeng Ro, if found this value will change */
    394     for (i = item->item.pos; i < syllableEnd; i += 1) {
    395         KhmerCharClass charClass = getKhmerCharClass(item->string[i]);
    396 
    397         /* if a split vowel, write the pre part. In Khmer the pre part
    398            is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */
    399         if (charClass & CF_SPLIT_VOWEL) {
    400             reordered[len] = C_VOWEL_E;
    401             properties[len] = PreForm;
    402             ++len;
    403             break; /* there can be only one vowel */
    404         }
    405         /* if a vowel with pos before write it out */
    406         if (charClass & CF_POS_BEFORE) {
    407             reordered[len] = item->string[i];
    408             properties[len] = PreForm;
    409             ++len;
    410             break; /* there can be only one vowel */
    411         }
    412         /* look for coeng + ro and remember position
    413            works because coeng + ro is always in front of a vowel (if there is a vowel)
    414            and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
    415            with this flag */
    416         if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&
    417               ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) {
    418             coengRo = i;
    419         }
    420     }
    421 
    422     /* write coeng + ro if found */
    423     if (coengRo > -1) {
    424         reordered[len] = C_COENG;
    425         properties[len] = PreForm;
    426         ++len;
    427         reordered[len] = C_RO;
    428         properties[len] = PreForm;
    429         ++len;
    430     }
    431 
    432     /*
    433        shall we add a dotted circle?
    434        If in the position in which the base should be (first char in the string) there is
    435        a character that has the Dotted circle flag (a character that cannot be a base)
    436        then write a dotted circle */
    437     if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {
    438         reordered[len] = C_DOTTED_CIRCLE;
    439         ++len;
    440     }
    441 
    442     /* copy what is left to the output, skipping before vowels and
    443        coeng Ro if they are present */
    444     for (i = item->item.pos; i < syllableEnd; i += 1) {
    445         HB_UChar16 uc = item->string[i];
    446         KhmerCharClass charClass = getKhmerCharClass(uc);
    447 
    448         /* skip a before vowel, it was already processed */
    449         if (charClass & CF_POS_BEFORE) {
    450             continue;
    451         }
    452 
    453         /* skip coeng + ro, it was already processed */
    454         if (i == coengRo) {
    455             i += 1;
    456             continue;
    457         }
    458 
    459         switch (charClass & CF_POS_MASK)
    460         {
    461             case CF_POS_ABOVE :
    462                 reordered[len] = uc;
    463                 properties[len] = AboveForm;
    464                 ++len;
    465                 break;
    466 
    467             case CF_POS_AFTER :
    468                 reordered[len] = uc;
    469                 properties[len] = PostForm;
    470                 ++len;
    471                 break;
    472 
    473             case CF_POS_BELOW :
    474                 reordered[len] = uc;
    475                 properties[len] = BelowForm;
    476                 ++len;
    477                 break;
    478 
    479             default:
    480                 /* assign the correct flags to a coeng consonant
    481                    Consonants of type 3 are taged as Post forms and those type 1 as below forms */
    482                 if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {
    483                     unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ?
    484                                               PostForm : BelowForm;
    485                     reordered[len] = uc;
    486                     properties[len] = property;
    487                     ++len;
    488                     i += 1;
    489                     reordered[len] = item->string[i];
    490                     properties[len] = property;
    491                     ++len;
    492                     break;
    493                 }
    494 
    495                 /* if a shifter is followed by an above vowel change the shifter to below form,
    496                    an above vowel can have two possible positions i + 1 or i + 3
    497                    (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
    498                    and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
    499                    different positions, right after the shifter or after a vowel (Unicode 4) */
    500                 if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {
    501                     if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {
    502                         reordered[len] = uc;
    503                         properties[len] = BelowForm;
    504                         ++len;
    505                         break;
    506                     }
    507                     if (i + 2 < syllableEnd &&
    508                         (item->string[i+1] == C_VOWEL_AA) &&
    509                         (item->string[i+2] == C_SIGN_NIKAHIT) )
    510                     {
    511                         reordered[len] = uc;
    512                         properties[len] = BelowForm;
    513                         ++len;
    514                         break;
    515                     }
    516                     if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) {
    517                         reordered[len] = uc;
    518                         properties[len] = BelowForm;
    519                         ++len;
    520                         break;
    521                     }
    522                     if (i + 4 < syllableEnd &&
    523                         (item->string[i+3] == C_VOWEL_AA) &&
    524                         (item->string[i+4] == C_SIGN_NIKAHIT) )
    525                     {
    526                         reordered[len] = uc;
    527                         properties[len] = BelowForm;
    528                         ++len;
    529                         break;
    530                     }
    531                 }
    532 
    533                 /* default - any other characters */
    534                 reordered[len] = uc;
    535                 ++len;
    536                 break;
    537         } /* switch */
    538     } /* for */
    539 
    540     if (!item->font->klass->convertStringToGlyphIndices(item->font,
    541                                                         reordered, len,
    542                                                         item->glyphs, &item->num_glyphs,
    543                                                         item->item.bidiLevel % 2))
    544         return FALSE;
    545 
    546 
    547     KHDEBUG("after shaping: len=%d", len);
    548     for (i = 0; i < len; i++) {
    549 	item->attributes[i].mark = FALSE;
    550 	item->attributes[i].clusterStart = FALSE;
    551 	item->attributes[i].justification = 0;
    552 	item->attributes[i].zeroWidth = FALSE;
    553 	KHDEBUG("    %d: %4x property=%x", i, reordered[i], properties[i]);
    554     }
    555 
    556     /* now we have the syllable in the right order, and can start running it through open type. */
    557 
    558 #ifndef NO_OPENTYPE
    559     if (openType) {
    560  	hb_uint32 where[16];
    561         for (i = 0; i < len; ++i) {
    562             where[i] = ~(PreSubstProperty
    563                          | BelowSubstProperty
    564                          | AboveSubstProperty
    565                          | PostSubstProperty
    566                          | CligProperty
    567                          | PositioningProperties);
    568             if (properties[i] == PreForm)
    569                 where[i] &= ~PreFormProperty;
    570             else if (properties[i] == BelowForm)
    571                 where[i] &= ~BelowFormProperty;
    572             else if (properties[i] == AboveForm)
    573                 where[i] &= ~AboveFormProperty;
    574             else if (properties[i] == PostForm)
    575                 where[i] &= ~PostFormProperty;
    576         }
    577 
    578         HB_OpenTypeShape(item, where);
    579         if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE))
    580             return FALSE;
    581     } else
    582 #endif
    583     {
    584 	KHDEBUG("Not using openType");
    585         HB_HeuristicPosition(item);
    586     }
    587 
    588     item->attributes[0].clusterStart = TRUE;
    589     return TRUE;
    590 }
    591 
    592 HB_Bool HB_KhmerShape(HB_ShaperItem *item)
    593 {
    594     HB_Bool openType = FALSE;
    595     unsigned short *logClusters = item->log_clusters;
    596     int i;
    597 
    598     HB_ShaperItem syllable = *item;
    599     int first_glyph = 0;
    600 
    601     int sstart = item->item.pos;
    602     int end = sstart + item->item.length;
    603 
    604     assert(item->item.script == HB_Script_Khmer);
    605 
    606 #ifndef NO_OPENTYPE
    607     openType = HB_SelectScript(item, khmer_features);
    608 #endif
    609 
    610     KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length);
    611     while (sstart < end) {
    612         HB_Bool invalid;
    613         int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid);
    614         KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
    615                invalid ? "TRUE" : "FALSE");
    616         syllable.item.pos = sstart;
    617         syllable.item.length = send-sstart;
    618         syllable.glyphs = item->glyphs + first_glyph;
    619         syllable.attributes = item->attributes + first_glyph;
    620         syllable.offsets = item->offsets + first_glyph;
    621         syllable.advances = item->advances + first_glyph;
    622         syllable.num_glyphs = item->num_glyphs - first_glyph;
    623         if (!khmer_shape_syllable(openType, &syllable)) {
    624             KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
    625             item->num_glyphs += syllable.num_glyphs;
    626             return FALSE;
    627         }
    628         /* fix logcluster array */
    629         KHDEBUG("syllable:");
    630         for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)
    631             KHDEBUG("        %d -> glyph %x", i, item->glyphs[i]);
    632         KHDEBUG("    logclusters:");
    633         for (i = sstart; i < send; ++i) {
    634             KHDEBUG("        %d -> glyph %d", i, first_glyph);
    635             logClusters[i-item->item.pos] = first_glyph;
    636         }
    637         sstart = send;
    638         first_glyph += syllable.num_glyphs;
    639     }
    640     item->num_glyphs = first_glyph;
    641     return TRUE;
    642 }
    643 
    644 void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
    645 {
    646     int end = from + len;
    647     const HB_UChar16 *uc = text + from;
    648     hb_uint32 i = 0;
    649     HB_UNUSED(script);
    650     attributes += from;
    651     while ( i < len ) {
    652 	HB_Bool invalid;
    653 	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
    654 
    655 	attributes[i].charStop = TRUE;
    656 
    657 	if ( boundary > len-1 ) boundary = len;
    658 	i++;
    659 	while ( i < boundary ) {
    660 	    attributes[i].charStop = FALSE;
    661 	    ++uc;
    662 	    ++i;
    663 	}
    664 	assert( i == boundary );
    665     }
    666 }
    667 
    668