Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2018 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 14412 $"/>
     10 	<transforms>
     11 		<transform source="my" target="Zawgyi" direction="forward" alias="my-t-my-d0-zawgyi">
     12 			<tRule><![CDATA[
     13 # This transform converts Unicode Burmese text into Zawgyi font encoded
     14 # form. Zawgyi is a popular, non-standard encoding scheme in Myanmar
     15 # that uses the same code range as Myanmar Unicode but assigns different
     16 # characters or glyphs to some codepoints. In addition to character remapping,
     17 # context-based reordering of codepoints is needed to give readable
     18 # output when the output is displayed with a Zawgyi font such as
     19 # ZawgyiOne.ttf or ZawgyiOne2008.ttf.
     20 #
     21 # The transform is done in two main stages:
     22 # (1) Map all Unicode codepoints to their Zawgyi counterparts.
     23 # (2) Perform reordering.
     24 
     25 # Modern Burmese digits & Unicode code points.
     26 $nondigits = [^\u1040-\u1049];
     27 $consonant = [\u1000-\u1021];
     28 $narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f];
     29 $wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021];
     30 $widenya = [\u100a\u106b];
     31 $othernya = [\u1009\u106a];
     32 $vowelsign = [\u102B-\u1030\u1032];
     33 $vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F];
     34 $ukinzi = [\u1004\u101b\u105a]\u103A\u1039;
     35 
     36 $medialraZ = [\u103b\u107e-\u1084];
     37 $lowsignZ = [\u102f\u1030\u1037\u103a\u103c\u103d\u1087-\u108a];
     38 $highsignZ = [\u102d\u102e\u1032\u1036\u1039\u103d-\u103e\u1064];
     39 $subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096];
     40 
     41 $vowelsAndConsonants = [\u1000-\u102a];
     42 
     43 #### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI
     44 $ukinzi ($consonant) \u103B > $1 \u103A \u1064 ;
     45 
     46 $ukinzi ($consonant) \u102D \u1036 > $1 \u108e ;
     47 $ukinzi ($consonant) \u102D > $1 \u108b ;
     48 $ukinzi ($consonant) \u102E > $1 \u108C ;
     49 $ukinzi ($consonant) \u1036 > $1 \u108D ;
     50 $ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ;
     51 $ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ;
     52 $ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b  ;
     53 $ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ;
     54 $ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ;
     55 $ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ;
     56 
     57 $ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra
     58 
     59 $ukinzi \u102D > \u108B ;
     60 $ukinzi \u102E  > \u108C  ;
     61 $ukinzi \u1036 > \u108D  ;
     62 
     63 $ukinzi ($consonant) > $1 \u1064 ;
     64 
     65 \u1025 ($vowelsign) \u1038  > \u106A $1 \u1038 ;
     66 \u1025 \u102f \u1036  > \u1025 \u1036 \u1033 ;
     67 
     68 \u102D \u1036 > \u108E  ;
     69 
     70 # Some composed lower output
     71 \u103d \u103e > \u108a ;
     72 
     73 \u103e \u102f > \u1088 ;
     74 
     75 \u103E \u1030 > \u1089 ;
     76 
     77 \u103A > \u1039 ;
     78 \u103B > \u103A ;
     79 \u103C > \u103B ;
     80 \u103D > \u103C ;
     81 \u103E  > \u103D ;
     82 \u103F > \u1086 ;
     83 
     84 ([\u1019]) \u103e \u1030 > $1 \u103d \u1034;  # A special case with signs.
     85 
     86 \u102B \u103A > \u105A ;
     87 
     88 \u1039 \u1010 \u103d > \u1096 ; # Very special case
     89 
     90 \u1039 \u1000 > \u1060 ;
     91 \u1039 \u1001 > \u1061 ;
     92 \u1039 \u1002 > \u1062 ;
     93 \u1039 \u1003 > \u1063 ;
     94 \u1039 \u1005 > \u1065 ;
     95 \u1039 \u1006 > \u1067 ;
     96 \u1039 \u1007 > \u1068 ;
     97 \u1039 \u1008 > \u1069 ;
     98 \u1039 \u100B > \u106C ;
     99 \u1039 \u100C > \u106D ;
    100 \u1039 \u100D > \u106E ;
    101 
    102 \u100d \u1039 \u100E > \u106F ;
    103 \u1039 \u100E > \u106F ;
    104 
    105 \u1039 \u100F > \u1070 ;
    106 \u1039 \u1010 > \u1072 ;
    107 \u1039 \u1011 > \u1074 ;
    108 \u1039 \u1012 > \u1075 ;
    109 \u1039 \u1013 > \u1076 ;
    110 \u1039 \u1014 > \u1077 ;
    111 \u1039 \u1015 > \u1078 ;
    112 \u1039 \u1016 > \u1079 ;
    113 \u1039 \u1017 > \u107A ;
    114 \u1039 \u1018 > \u1093 ;
    115 \u1039 \u1019 > \u107C ;
    116 \u1039 \u101C > \u1085 ;
    117 
    118 \u100d\u1039\u100D > \u106E ;
    119 \u100F\u1039\u100D > \u1091 ;
    120 \u100B\u1039\u100C > \u1092 ;
    121 \u100B\u1039\u100B > \u1097 ;
    122 \u104E\u1004\u103A\u1038 > \u104E ;
    123 
    124 #### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES.
    125 ::Null;
    126 
    127 # E Vowel + medial ra. Move the e vowel
    128 ($consonant) \u103b \u1031 > \u1031 \u103b $1 ;
    129 
    130 ($consonant) \u103b > \u103b $1 ;
    131 
    132 ($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ;
    133 
    134 ($consonant) (\u108a) \u1031 > \u1031 $1 $2 ;
    135 ($consonant) ([\u103a\u103d\u103e]+) \u1031 > \u1031 $1 $2 ;
    136 
    137 # Ra + kinzi
    138 ($consonant) \u1064 \u103b > \u103b $1 \u1064 ;
    139 
    140 # E vowel plus medials
    141 ($consonant) ([\u103a\u103c-\u103d]) \u1031 > \u1031 $1 $2 ;
    142 
    143 # No medials intervening.
    144 ($vowelsAndConsonants) \u1031 > \u1031 $1 ;
    145 
    146 # Handle Na with lower modifiers.
    147 \u1014 ($subscriptitem) > \u108f $1 ;
    148 
    149 \u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094;
    150 \u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094;
    151 \u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094;
    152 
    153 # Two medials
    154 \u103a \u103c > \u107d \u103c;
    155 
    156 # a special case
    157 \u1014 \u1032 \u1037 > \u1014 \u1032 \u1094;
    158 \u1014 \u1037 > \u1014 \u1094;
    159 
    160 \u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094;
    161 \u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2;
    162 \u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2;
    163 
    164 \u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094;
    165 
    166 \u1014 ($lowsignZ) > \u108f $1;
    167 
    168 # Move 1037 dot to right with other descenders.
    169 ($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094;
    170 
    171 ($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2;
    172 # Handle lack of 104E  MYANMAR SYMBOL AFOREMENTIONED
    173 ($nondigits) \u104e > $1 \u1044;
    174 \u1031 \u1040 ($nondigits) > \u1031 \u101D $1;
    175 \u1009 \u103A > \u1025 \u103A;
    176 \u1025 \u102E > \u1026;
    177 \u1037 \u103A > \u103A \u1037;
    178 
    179 ([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1;
    180 
    181 # Medial plus vowel sign U
    182 ($medialraZ) ($consonant) \u102f > $1 $2 \u1033;
    183 
    184 ## Phase 2: Further adjustments
    185 ::Null;
    186 
    187 # Handle consonant, subscripted consonant, medial ra
    188 ($narrowconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1083 $1 $2 $3 ;
    189 ($wideconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1084 $1 $2 ;
    190 ($narrowconsonant) ($subscriptitem) $medialraZ > \u1081 $1 $2 ;
    191 ($wideconsonant) ($subscriptitem) $medialraZ > \u1082 $1 $2 ;
    192 
    193 \u103c \u1094 > \u103c \u1095 ;
    194 
    195 # Medial ra variations, context dependent
    196 $medialraZ ($narrowconsonant) \u102d \u103d \u102f > \u107f $1 \u102d \u1087 \u1083 ;
    197 $medialraZ ($wideconsonant) \u102d \u103d \u102f > \u1080 $1 \u102d \u1087 \u1083 ;
    198 
    199 $medialraZ ($narrowconsonant) ($lowsignZ) ($highsignZ) > \u1083 $1 $2 $3 ;
    200 $medialraZ ($wideconsonant) ($lowsignZ) ($highsignZ) > \u1084 $1 $2 $3 ;
    201 
    202 $medialraZ ($narrowconsonant) ($highsignZ) > \u107f $1 $2 ;
    203 $medialraZ ($wideconsonant) ($highsignZ) > \u1080 $1 $2 ;
    204 
    205 $medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034 ;
    206 $medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034 ;
    207 
    208 $medialraZ ($narrowconsonant) (\u102f) > \u103b $1 \u1033 ;
    209 $medialraZ ($wideconsonant) (\u102f) > \u107e $1 \u1033 ;
    210 
    211 $medialraZ ($narrowconsonant) ($lowsignZ) > \u1081 $1 $2 ;
    212 $medialraZ ($wideconsonant) ($lowsignZ) > \u1082 $1 $2 ;
    213 
    214 $medialraZ ($widenya) > \u1082 $1 ;
    215 $medialraZ ($othernya) > \u103b \u106a ;
    216 
    217 $medialraZ ($narrowconsonant) > \u103b $1 ;
    218 $medialraZ ($wideconsonant) > \u107e $1 ;
    219 
    220 \u1009 ($lowsignZ) > \u106a $1;
    221 
    222 \u100A ($lowsignZ)> \u106B $1  ;  ## NYA and NNYA
    223 
    224 \u103d \u102d > \u102d \u103d;
    225 
    226 \u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095;
    227 \u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095;
    228 
    229 \u103a \u102f > \u103a \u1033;
    230 # Kinzi combo
    231 \u1064 \u102e > \u108c ;
    232 
    233 ##### Phase 3
    234 ::Null;
    235 ([\u103C\u103D\u103E]+) \u103B > \u103B $1;
    236 ([\u103D\u103E]+) \u103C > \u103C $1;
    237 \u103E\u103D > \u103D\u103E ;
    238 
    239 \u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037;
    240 ($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3;
    241 
    242 # Combine vowel and consonant signs
    243 \u103d \u102f > \u1088;
    244 
    245 \u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot
    246 
    247 ($medialraZ) ($consonant) ($highsignZ) \u102f > $1 $2 $3 \u1033;
    248 
    249 ##### Phase 4.  More reorderings of medials
    250 ::Null;
    251 
    252 ([\u103D\u103E]) \u103C > \u103C $1;
    253 \u103E\u103D > \u103D\u103E ;
    254 \u1038 ($vowelmedial) > $1 \u1038;
    255 \u1038 ([\u1036\u1037\u103A]) > $1 \u1038;
    256 
    257 \u1036 \u102f > \u102f \u1036;
    258 \u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033;
    259 \u103a \u102d \u102f > \u103a \u102d \u1033;
    260 
    261 #### Phase 5
    262 ::Null;
    263 ($consonant) \u103B \u103A > $1 \u103A \u103B;
    264 ([\u103C\u103D\u103E]) \u103B > \u103B $1;
    265 ([\u103D\u103E]) \u103C > \u103C $1;
    266 \u103E\u103D > \u103D\u103E ;
    267 ([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A;
    268 \u102D \u103A > \u102D;
    269 \u102E \u103A > \u102E;
    270 \u102F \u103A > \u102F;
    271 \u102D \u102E > \u102E;
    272 \u102F \u1030 > \u102F;
    273 \u102B \u102B+ > \u102B;
    274 \u102C \u102C+ > \u102C;
    275 \u102D \u102D+ > \u102D;
    276 \u102E \u102E+ > \u102E;
    277 \u102F \u102F+ > \u102F;
    278 \u1030 \u1030+ > \u1030;
    279 \u1031 \u1031+ > \u1031;
    280 \u1032 \u1032+ > \u1032;
    281 \u1036 \u1036+ > \u1036;
    282 \u103A \u103A+ > \u103A;
    283 \u103B \u103B+ > \u103B;
    284 \u103C \u103C+ > \u103C;
    285 \u103D \u103D+ > \u103D;
    286 \u103E \u103E+ > \u103E;
    287 
    288 # Visually identical orderings - standardize
    289 \u102f \u102D > \u102D \u102f ;
    290 \u102f \u1036 > \u1036 \u102f ;
    291 \u1039 \u1037 > \u1037 \u1039 ;
    292 \u103c \u1032 > \u1032 \u103c ;
    293 \u103c \u102e > \u102e \u103c ;
    294 
    295 \u103d \u1088 > \u1088 ;
    296 			]]></tRule>
    297 		</transform>
    298 	</transforms>
    299 </supplementalData>
    300