Home | History | Annotate | Download | only in layout
      1 /*
      2  *
      3  * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
      4  *
      5  * Developed at DIT - Government of Bhutan
      6  *
      7  * Contact person: Pema Geyleg - <pema_geyleg (at) druknet.bt>
      8  *
      9  * This file is a modification of the ICU file KhmerReordering.h
     10  * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan
     11  * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding.
     12  * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola
     13  *
     14  */
     15 
     16 #ifndef __TIBETANREORDERING_H
     17 #define __TIBETANORDERING_H
     18 
     19 /**
     20  * \file
     21  * \internal
     22  */
     23 
     24 // #include "LETypes.h"
     25 // #include "OpenTypeTables.h"
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 class LEGlyphStorage;
     30 
     31 // Vocabulary
     32 //     Base ->         A consonant in its full (not subscript) form. It is the
     33 //                     center of the syllable, it can be souranded by subjoined consonants, vowels,
     34 //                     signs... but there is only one base in a stack, it has to be coded as
     35 //                     the first character of the syllable.Included here are also groups of base + subjoined
     36 //										 which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take
     37 //                     subjoined consonants or other combining characters.
     38 //     Subjoined ->    Subjoined consonants and groups of subjoined consonants which have a single code-point
     39 //                     to repersent the group (even if each subjoined consonant is represented independently
     40 //                     by anothe code-point
     41 //     Tsa Phru -->    Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to
     42 // 										"normalization"
     43 //										 is placed after all the subjoined consonants, and it is also permitted there.
     44 //     A Chung  Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
     45 //     Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
     46 //                     a given code-point (in spite of each single part of them having also a code-point
     47 //                     They are avoided, and users are encouraged to use the combination of code-points that
     48 //                     represents the same sound instead of using this combined characters. This is included here
     49 //                     for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
     50 //     Halanta ->      The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel,
     51 //                     in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
     52 //                     but in some special cases it can also be placed after a subjoined consonant, so this is also
     53 //                     permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
     54 //
     55 //     Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
     56 //                     might be as much as three subjoined vowels in a given stack (only one in general text, but up
     57 //                     to three for abreviations, they have to be permitted).
     58 //     Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
     59 //                     times. They can combine with subjoined vowels, and are always coded after these.
     60 //     Anusvara -->    Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
     61 //                     special cases it can be placed before a vowel, so this is also permitted
     62 //     Candrabindu ->  Forms of the Anusvara with different glyphs (and different in identity) which can be placed
     63 //                     without vowel or after the vowel, but never before. Cannot combine with Anusvara.
     64 //     Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
     65 //                     marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
     66 //
     67 //     Digits ->       Digits are not considered as non-combining characters because there are a few characters which
     68 //                     combine with them, so they have to be considered independently.
     69 //     Digit combining marks -> dependent marks that combine with digits.
     70 //
     71 //     TODO
     72 //     There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
     73 //     are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
     74 //     of the tibetan block, they have not been treated in this program.
     75 
     76 
     77 struct TibetanClassTable    // This list must include all types of components that can be used inside a syllable
     78 {
     79     enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal
     80                           // line in the statetable for Tibetan (file TibetanReordering.cpp). It assigns one number
     81                           // to each type of character that has to be considered when analysing the order in which
     82                           // characters can be placed
     83     {
     84         CC_RESERVED             =  0, //Non Combining Characters
     85         CC_BASE                 =  1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks
     86         CC_SUBJOINED            =  2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point
     87         CC_TSA_PHRU             =  3, // Tsa-Phru character 0F39
     88         CC_A_CHUNG              =  4, // Vowel Lenthening a-chung mark 0F71
     89         CC_COMP_SANSKRIT        =  5, // Precomposed Sanskrit vowels including Subjoined characters and vowels
     90         CC_HALANTA              =  6, // Halanta Character 0F84
     91         CC_BELOW_VOWEL          =  7, // Subjoined vowels
     92         CC_ABOVE_VOWEL          =  8, // Superscript vowels
     93         CC_ANUSVARA             =  9, // Tibetan sign Rjes Su Nga Ro 0F7E
     94         CC_CANDRABINDU          = 10, // Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83
     95         CC_VISARGA              = 11, // Tibetan sign Rnam Bcad (0F7F)
     96         CC_ABOVE_S_MARK         = 12, // Stress Marks placed above the text
     97         CC_BELOW_S_MARK         = 13, // Stress Marks placed below the text
     98         CC_DIGIT                = 14, // Dzongkha Digits
     99         CC_PRE_DIGIT_MARK       = 15, // Mark placed before the digit
    100         CC_POST_BELOW_DIGIT_M   = 16, // Mark placed below or after the digit
    101         CC_COUNT                = 17  // This is the number of character classes
    102     };
    103 
    104     enum CharClassFlags
    105     {
    106         CF_CLASS_MASK    = 0x0000FFFF,
    107 
    108         CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
    109         CF_DIGIT         = 0x01000000,  // flag to speed up comparaisson
    110         CF_PREDIGIT      = 0x02000000,  // flag to detect pre-digit marks for reordering
    111 
    112         // position flags
    113         CF_POS_BEFORE    = 0x00080000,
    114         CF_POS_BELOW     = 0x00040000,
    115         CF_POS_ABOVE     = 0x00020000,
    116         CF_POS_AFTER     = 0x00010000,
    117         CF_POS_MASK      = 0x000f0000
    118     };
    119 
    120     typedef le_uint32 CharClass;
    121 
    122     typedef le_int32 ScriptFlags;
    123 
    124     LEUnicode firstChar;   // for Tibetan this will become xOF00
    125     LEUnicode lastChar;    //  and this x0FFF
    126     const CharClass *classTable;
    127 
    128     CharClass getCharClass(LEUnicode ch) const;
    129 
    130     static const TibetanClassTable *getTibetanClassTable();
    131 };
    132 
    133 
    134 class TibetanReordering /* not : public UObject because all methods are static */ {
    135 public:
    136     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
    137         LEUnicode *outChars, LEGlyphStorage &glyphStorage);
    138 
    139     static const FeatureMap *getFeatureMap(le_int32 &count);
    140 
    141 private:
    142     // do not instantiate
    143     TibetanReordering();
    144 
    145     static le_int32 findSyllable(const TibetanClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
    146 
    147 };
    148 
    149 
    150 U_NAMESPACE_END
    151 #endif
    152