Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!-- Copyright  1991-2015 Unicode, Inc.
      4 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      5 For terms of use, see http://www.unicode.org/copyright.html -->
      6 <supplementalData>
      7 	<version number="$Revision: 12347 $" />
      8 	<transforms>
      9 		<transform source="my" target="my_FONIPA" direction="forward" alias="my-fonipa-t-my">
     10 			<tRule><![CDATA[
     11 
     12 # Pronunciation rules for Burmese.
     13 #
     14 # The following rules are lexical and heuristic: lexical in the sense
     15 # that they generate phoneme strings which may further undergo
     16 # post-lexical phonological processes, in particular voicing, to
     17 # result in actual surface forms; heuristic in the sense that they try
     18 # to resolve ambiguities, especially around reduced vowels, in a
     19 # systematic way that may be incorrect in many situations. Vowel
     20 # reduction depends on many factors, such as morphemic structure,
     21 # which are not available here.
     22 
     23 #
     24 # Definitions
     25 #
     26 
     27 # Dependent vowel signs
     28 $vs_AA = \u102B;
     29 $vs_aa = \u102C;
     30 $vs_i = \u102D;
     31 $vs_ii = \u102E;
     32 $vs_u = \u102F;
     33 $vs_uu = \u1030;
     34 $vs_e = \u1031;
     35 $vs_ai = \u1032;
     36 
     37 # Various signs
     38 $anusvara = \u1036;
     39 $visarga = \u1038;
     40 $virama = \u1039;
     41 $asat = \u103A;
     42 
     43 # Dependent (medial) consonant signs
     44 $med_y = \u103B;
     45 $med_r = \u103C;
     46 $med_w = \u103D;
     47 $med_h = \u103E;
     48 
     49 # Independent letters and letter-like punctuation symbols
     50 $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
     51 
     52 $creaky = \u0330;
     53 $high = \u0301;
     54 $low = \u0300;
     55 $coda = [$creaky $high $low   ];  # TODO: remove if unused
     56 
     57 #
     58 # Preprocessing
     59 #
     60 
     61 ::NFC;
     62 
     63 # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
     64 $vs_AA  $vs_aa;
     65 
     66 # Unstack kinzi ( plus U+1039 VIRAMA) into plain .
     67 # Hmm, what would happen if the syllable ending in kinzi had non-low tone?
     68  $virama  ;
     69 
     70 # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
     71 $virama  $asat;
     72 
     73 # Unstack U+103F GREAT SA.
     74   ;
     75 
     76 # Insert a syllable boundary marker /./ before every independent letter.
     77 ::Null;
     78 [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A]  \.;
     79 
     80 # Insert default inherent vowel: /a/ at the end, // everywhere else.
     81 ::Null;
     82 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$]  $1 a $creaky;
     83 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.   $1 ;
     84 
     85 # Allow for additional coda consonants.
     86 #
     87 # This only covers a few of the cases in which full coda consonants
     88 # can appear in loanwords. The general situation is somewhat rare and
     89 # is more easily dealt with in a formalism that can impose structural
     90 # constraints on syllables more easily.
     91 ::Null;
     92 $asat ($visarga)? [\u1000-\u102A] { $asat  ;
     93 
     94 # Deal with  early.
     95   l\.a $high ;
     96 
     97 #
     98 # Rhymes
     99 #
    100 
    101 ::Null;
    102 
    103   ;
    104 
    105   ;  # in  ~  /m..z/
    106 
    107    $creaky ;
    108    $high ;
    109    $low ;
    110 
    111   ;  # maybe sometimes /e/
    112 
    113    $creaky ;
    114    $high ;
    115    $low ;
    116 
    117    $creaky;
    118    $high;
    119    $low;
    120 
    121   a $creaky ;
    122   a $high ;
    123   a $low ;
    124 
    125   a;
    126 
    127   a $creaky ;
    128   a $high ;
    129   a $low ;
    130 
    131   a;
    132 
    133   a $creaky ;
    134   a $high ;
    135   a $low ;
    136 
    137    $creaky;
    138    $high;
    139    $low;
    140 
    141   a;
    142 
    143 $vs_aa    $creaky ;
    144 $vs_aa    $high ;
    145 $vs_aa    $low ;
    146 $vs_aa   a;
    147 $vs_aa   a $creaky ;
    148 $vs_aa   a $high ;
    149 $vs_aa   a $low ;
    150 $vs_aa   a $creaky ;
    151 $vs_aa   a $high ;
    152 $vs_aa   a $low ;
    153 $vs_aa   a;  # in  /k.la.s/ (club cell)
    154 $vs_aa    $creaky;
    155 $vs_aa    $high;
    156 $vs_aa    $low;
    157 $vs_aa   a $creaky;  # redundant creaky tone
    158 $vs_aa   a $high;
    159 $vs_aa  a $low;
    160 
    161 $vs_i   e;
    162 $vs_i   e;
    163 $vs_i   e;
    164 $vs_i   e $creaky ;
    165 $vs_i   e $high ;
    166 $vs_i   e $low ;
    167 $vs_i   e;
    168 $vs_i   e $creaky ;
    169 $vs_i   e $high ;
    170 $vs_i   e $low ;
    171 $vs_i $vs_u   a;
    172 $vs_i $vs_u   a $creaky ;
    173 $vs_i $vs_u   a $high ;
    174 $vs_i $vs_u   a $low ;
    175 $vs_i $vs_u   a $creaky ;
    176 $vs_i $vs_u   a $high ;
    177 $vs_i $vs_u   a $low ;
    178 $vs_i $vs_u   o $creaky;
    179 $vs_i $vs_u   o $high;
    180 $vs_i $vs_u   o $low;  # in  /k/
    181 $vs_i $vs_u   o $creaky;
    182 $vs_i $vs_u   o $high;
    183 $vs_i $vs_u  o $low;
    184 $vs_i $anusvara   e $creaky ;
    185 $vs_i $anusvara   e $high ;
    186 $vs_i $anusvara  e $low ;
    187 $vs_i  i $creaky;
    188 
    189 $vs_ii   i $creaky;  # this does not usually occur
    190 $vs_ii   i $high;
    191 $vs_ii  i $low;
    192 
    193 $vs_u   o;
    194 $vs_u   o;
    195 $vs_u   o $creaky ;
    196 $vs_u   o $high ;
    197 $vs_u   o $low ;
    198 $vs_u   o;
    199 $vs_u   o $creaky ;
    200 $vs_u   o $high ;
    201 $vs_u   o $low ;
    202 $vs_u   o;
    203 $vs_u   o $creaky ;
    204 $vs_u   o $high ;
    205 $vs_u   o $low ;
    206 $vs_u $anusvara   o $creaky ;
    207 $vs_u $anusvara   o $high ;
    208 $vs_u $anusvara  o $low ;
    209 $vs_u  u $creaky;
    210 
    211 $vs_uu   u $creaky;  # this does not usually occur
    212 $vs_uu   u $high;
    213 $vs_uu  u $low;
    214 
    215 $vs_e   ;
    216 $vs_e $vs_aa   a;
    217 $vs_e $vs_aa   a $creaky ;
    218 $vs_e $vs_aa   a $high ;
    219 $vs_e $vs_aa   a $low ;
    220 $vs_e $vs_aa    $creaky;
    221 $vs_e $vs_aa    $high;  # redundant high tone; this does not usually occur
    222 $vs_e $vs_aa    $low;
    223 $vs_e $vs_aa   $high;
    224 $vs_e   e $creaky;
    225 $vs_e   e $high;
    226 $vs_e  e $low;
    227 
    228 $vs_ai    $creaky;
    229 $vs_ai    $high;  # redundant high tone; this does not usually occur
    230 $vs_ai   $high;
    231 
    232 $anusvara   a $creaky ;
    233 $anusvara   a $high ;
    234 $anusvara  a $low ;
    235 
    236 $med_w   ;
    237 $med_w    $creaky ;
    238 $med_w    $high ;
    239 $med_w    $low ;
    240 $med_w   ;
    241 $med_w    $creaky ;
    242 $med_w    $high ;
    243 $med_w    $low ;
    244 
    245 #
    246 # Medials
    247 #
    248 
    249 ::Null;
    250 
    251 # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
    252 # velar + /j/ ==> modern palatals.
    253 
    254   t;
    255   t;
    256   d;
    257   d;
    258 
    259   t;
    260   t;
    261   d;
    262   d;
    263 
    264 # Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
    265  { [$med_y $med_r]  ;
    266 
    267 # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
    268 # other medials.
    269 
    270 # First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
    271 \u103D \u103E  \u103E \u103D;
    272 ::Null;
    273 # Now MEDIAL WA comes last.
    274 
    275 # Produce the palatal  from (SA|LA)+YA+HA.
    276   ;
    277   ;
    278 
    279 # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
    280 \u103C \u103E  \u103E \u103C;
    281 ::Null;
    282 
    283 # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
    284 \u103B \u103E  \u103E \u103B;
    285 ::Null;
    286 
    287 # Consume MEDIAL HA and apply devoicing.
    288 
    289   ;
    290   ;
    291   ;
    292   n;
    293   n;
    294   m;
    295   ;
    296   ;
    297   l;
    298   w;
    299   l;
    300 
    301 # Drop any remaining U+103E MEDIAL HA.
    302 \u103E  ;
    303 
    304 # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
    305 # U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
    306 \u103B } \u103D  ;
    307 \u103C } \u103D  ;
    308 
    309 \u103B  j;
    310 \u103C  j;
    311 \u103D  w;
    312 
    313 #
    314 # Initials
    315 #
    316 
    317 # Velars
    318   k;
    319   k;
    320   ;
    321   ;
    322   ;
    323 
    324 # Historic palatals
    325   s;
    326   s;
    327   z;
    328   z;
    329   ;
    330   ;
    331 
    332 # Alveolars
    333   t;
    334   t;
    335   d;
    336   d;
    337   n;
    338 
    339 # Historic dentals ==> alveolars
    340   t;
    341   t;
    342   d;
    343   d;
    344   n;
    345 
    346 # Labials
    347   p;
    348   p;
    349   b;
    350   b;
    351   m;
    352 
    353 # Other letters
    354   j;
    355   j;  # historic /r/
    356   ;  # final, typically not pronounced in native words
    357   l;
    358   w;
    359   ;  # historic /s/ ==> modern dental
    360   h;
    361   l;
    362   ;
    363 
    364 # Independent vowels
    365 
    366   ;  # redundant creaky tone; this does not usually occur
    367   ;  # this does not usually occur
    368   ;
    369 
    370   ;  # this does not usually occur
    371   ;  # this does not usually occur
    372   ;
    373 
    374   ;  # redundant creaky tone; this does not usually occur
    375   ;  # this does not usually occur
    376   ;
    377 
    378   ;  # this does not usually occur
    379   ;
    380   ;
    381 
    382   ;  # this does not usually occur
    383   ;
    384   ;
    385 
    386   ;  # this does not usually occur
    387   ;  # redundant high tone; this does not usually occur
    388   ;
    389 
    390   ;  # this does not usually occur
    391   ;  # this does not usually occur
    392   ;
    393 
    394 # Various signs
    395 
    396   na;
    397   jw;
    398 #  was handled earlier.
    399   ;
    400 
    401 #
    402 # Postprocessing
    403 #
    404 
    405 # Delete any remaining U+103A ASAT.
    406 $asat  ;
    407 
    408 # Delete zero-width space, non-joiner, joiner.
    409 [\u200B-\u200D]  ;
    410 
    411 ::NFC;
    412 
    413 			]]></tRule>
    414 		</transform>
    415 	</transforms>
    416 </supplementalData>
    417