Home | History | Annotate | Download | only in layout
      1 /*
      2  * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
      3  *
      4  * This file is a modification of the ICU file IndicReordering.h
      5  * by Jens Herden and Javier Sola for Khmer language
      6  *
      7  */
      8 
      9 #ifndef __KHMERREORDERING_H
     10 #define __KHMERREORDERING_H
     11 
     12 /**
     13  * \file
     14  * \internal
     15  */
     16 
     17 #include "LETypes.h"
     18 #include "OpenTypeTables.h"
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 class LEGlyphStorage;
     23 
     24 // Vocabulary
     25 //     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
     26 //                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
     27 //                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
     28 //                     the first character of the syllable.
     29 //     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
     30 //                     Khmer language has five of them. Khmer split vowels either have one part before the
     31 //                     base and one after the base or they have a part before the base and a part above the base.
     32 //                     The first part of all Khmer split vowels is the same character, identical to
     33 //                     the glyph of Khmer dependent vowel SRA EI
     34 //     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
     35 //                Differently than indian languages, the coeng modifies the consonant that follows it,
     36 //                not the one preceding it  Each consonant has two forms, the base form and the subscript form
     37 //                the base form is the normal one (using the consonants code-point), the subscript form is
     38 //                displayed when the combination coeng + consonant is encountered.
     39 //     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
     40 //     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
     41 //     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
     42 //     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
     43 //                          if it is attached to a consonant of the first series or a consonant of the second series
     44 //                          Most consonants have an equivalent in the other series, but some of theme exist only in
     45 //                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
     46 //                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
     47 //                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
     48 //                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
     49 //                          MUSIKATOAN a second series consonant to have a first series vowel sound.
     50 //                          Consonant shifter are both normally supercript marks, but, when they are followed by a
     51 //                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
     52 //                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
     53 //                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
     54 //                          be placed after the coeng consonant.
     55 //     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
     56 //                          Each vowel has its own position. Only one vowel per syllable is allowed.
     57 //     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
     58 //                          Allowed in a syllable.
     59 //
     60 //
     61 
     62 struct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
     63 {
     64     enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal
     65                           // line in the statetable for Khmer (file KhmerReordering.cpp).
     66     {
     67         CC_RESERVED             =  0,
     68         CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
     69         CC_CONSONANT2           =  2, // Consonant of type 2
     70         CC_CONSONANT3           =  3, // Consonant of type 3
     71         CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
     72         CC_CONSONANT_SHIFTER    =  5,
     73         CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
     74         CC_COENG                =  7, // Subscript consonant combining character
     75         CC_DEPENDENT_VOWEL      =  8,
     76         CC_SIGN_ABOVE           =  9,
     77         CC_SIGN_AFTER           = 10,
     78         CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
     79         CC_COUNT                = 12  // This is the number of character classes
     80     };
     81 
     82     enum CharClassFlags
     83     {
     84         CF_CLASS_MASK    = 0x0000FFFF,
     85 
     86         CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
     87         CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
     88         CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
     89         CF_COENG         = 0x08000000,  // flag to speed up comparing
     90         CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
     91         CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
     92 
     93         // position flags
     94         CF_POS_BEFORE    = 0x00080000,
     95         CF_POS_BELOW     = 0x00040000,
     96         CF_POS_ABOVE     = 0x00020000,
     97         CF_POS_AFTER     = 0x00010000,
     98         CF_POS_MASK      = 0x000f0000
     99     };
    100 
    101     typedef le_uint32 CharClass;
    102 
    103     typedef le_int32 ScriptFlags;
    104 
    105     LEUnicode firstChar;   // for Khmer this will become x1780
    106     LEUnicode lastChar;    //  and this x17DF
    107     const CharClass *classTable;
    108 
    109     CharClass getCharClass(LEUnicode ch) const;
    110 
    111     static const KhmerClassTable *getKhmerClassTable();
    112 };
    113 
    114 
    115 class KhmerReordering /* not : public UObject because all methods are static */ {
    116 public:
    117     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
    118         LEUnicode *outChars, LEGlyphStorage &glyphStorage);
    119 
    120     static const FeatureMap *getFeatureMap(le_int32 &count);
    121 
    122 private:
    123     // do not instantiate
    124     KhmerReordering();
    125 
    126     static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
    127 
    128 };
    129 
    130 
    131 U_NAMESPACE_END
    132 #endif
    133