Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12139 $"/>
     10 	<transforms>
     11 		<transform source="ThaiLogical" target="Latin" direction="both" visibility="internal">
     12 			<tRule><![CDATA[
     13 # Thai-Latin
     14 # This set of rules follows ISO 11940
     15 #     see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
     16 # except that that does not mention an implicit vowel, so we use o
     17 #
     18 # The transcription is fairly ugly, so we ought to also do the UNGEGN version
     19 #     see: http://www.eki.ee/wgrs/rom1_th.pdf
     20 # and probably make that the main variant.
     21 #
     22 # Note: this is an internal file. The NFD/NFC is handled externally, in the index
     23 # The insertion of spaces between words, the reversal of the vowels
     24 # and the conversion of space to semicolon are done *outside* of these rules.
     25 # So as far as these rules are concerned, the vowels are in logical order!
     26 # insert implicit vowel (and remove it going the other way)
     27 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
     28 #$consonant = [-];
     29 #$vowel = [--];
     30 #{ ( $consonant ) } [^$vowel \uE000]  | $1 \uE000 ;
     31 #\uE000  o ;
     32 #  o ;
     33 $notAbove = [^\p{ccc=0}\p{ccc=above}] ;
     34 $notBelow = [^\p{ccc=0}\p{ccc=below}] ;
     35 # Consonants
     36 # Warning: the 'h's need to be handled carefully!
     37 # What we really want to say is the following, but we can't
     38 # $notHAccent = !($notAbove*    | $notBelow*   ) ;
     39 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
     40 $freeStandingBelow = [  ];
     41 $hAccent =  [        ];
     42 $notHAccent0 = [^$freeStandingBelow$hAccent];
     43 $notHAccent1 = $freeStandingBelow [^$hAccent];
     44   h ; # THAI CHARACTER HO HIP
     45  | $1  h ($notAbove*)    ; # backward case, account for reordering
     46   h ; # THAI CHARACTER HO NOKHUK
     47   kh ; # THAI CHARACTER KHO KHAI
     48   kh ; # THAI CHARACTER KHO KHUAT
     49   kh ; # THAI CHARACTER KHO KHON
     50   kh ; # THAI CHARACTER KHO RAKHANG
     51   kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
     52   kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
     53   k ; # THAI CHARACTER KO KAI
     54   ph ; # THAI CHARACTER PHO SAMPHAO
     55   ph ; # THAI CHARACTER PHO PHUNG
     56   ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
     57   ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
     58   p ; # THAI CHARACTER PO PLA
     59   ch ; # THAI CHARACTER CHO CHING
     60   ch ; # THAI CHARACTER CHO CHOE
     61   ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
     62   ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
     63   c ; # THAI CHARACTER CHO CHAN
     64   th ; # THAI CHARACTER THO THAN
     65   th ; # THAI CHARACTER THO NANGMONTHO
     66   th ; # THAI CHARACTER THO PHUTHAO
     67   th ; # THAI CHARACTER THO THUNG
     68   th ; # THAI CHARACTER THO THONG
     69   th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
     70   th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
     71 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
     72   t ; # THAI CHARACTER TO PATAK
     73   t ; # THAI CHARACTER TO TAO
     74 # since there is no singleton g (generated), don't worry about that.
     75   ng ; # THAI CHARACTER NGO NGU
     76   n ; # THAI CHARACTER NO NEN
     77   n ; # THAI CHARACTER NO NU
     78   y  ; # THAI CHARACTER YO YING
     79   d ; # THAI CHARACTER DO CHADA
     80   d ; # THAI CHARACTER DO DEK
     81   b ; # THAI CHARACTER BO BAIMAI
     82   f ; # THAI CHARACTER FO FA
     83  | $1  f ($notAbove*)    ; # backward case, account for reordering
     84   m ; # THAI CHARACTER MO MA
     85   y ; # THAI CHARACTER YO YAK
     86   r ; # THAI CHARACTER RO RUA
     87   v ; # THAI CHARACTER RU
     88    ; # THAI CHARACTER LU
     89   w ; # THAI CHARACTER WO WAEN
     90   s ; # THAI CHARACTER SO SALA***
     91  | $1  s     ($notAbove*)    ; # backward case, account for reordering
     92   s ; # THAI CHARACTER SO RUSI
     93   s ; # THAI CHARACTER SO SUA***
     94  | $1  s ($notAbove*)    ; # backward case, account for reordering
     95   l ; # THAI CHARACTER LO CHULA
     96   l ; # THAI CHARACTER LO LING
     97   f ; # THAI CHARACTER FO FAN
     98   x ; # THAI CHARACTER O ANG
     99   s ; # THAI CHARACTER SO SO
    100 # vowels
    101   a ; # THAI CHARACTER MAI HAN-AKAT
    102   a ; # THAI CHARACTER SARA AA
    103  | $1  a ($notAbove*)    ; # backward case, account for reordering
    104 # We deviate from ISO for SARA AM for disambiguation
    105   a  ; # THAI CHARACTER SARA AM
    106  | $1  a ($notAbove*)   ; # backward case, account for reordering
    107   a ; # THAI CHARACTER SARA A
    108   i ; # THAI CHARACTER SARA II
    109  | $1  i ($notAbove*)      ; # backward case, account for reordering
    110   u ; # THAI CHARACTER SARA UEE
    111  | $1  u    ($notAbove*)      ; # backward case, account for reordering
    112   u ; # THAI CHARACTER SARA UE
    113   u ; # THAI CHARACTER SARA UU
    114  | $1  u  ($notAbove*)      ; # backward case, account for reordering
    115   u ; # THAI CHARACTER SARA U
    116    ; # THAI CHARACTER PAIYANNOI
    117 #   XXX ; # THAI CURRENCY SYMBOL BAHT
    118   e ; # THAI CHARACTER SARA E
    119    ; # THAI CHARACTER SARA AE
    120   o ; # THAI CHARACTER SARA O
    121    ; # THAI CHARACTER SARA AI MAIMUAN
    122   i ; # THAI CHARACTER SARA AI MAIMALAI
    123    ; # THAI CHARACTER LAKKHANGYAO
    124    ; # THAI CHARACTER MAITAIKHU
    125    ; # THAI CHARACTER MAI EK
    126    ; # THAI CHARACTER MAI THO
    127    ; # THAI CHARACTER MAI TRI
    128    ; # THAI CHARACTER MAI CHATTAWA
    129    ; # THAI CHARACTER THANTHAKHAT
    130   '~' ; # THAI CHARACTER YAMAKKAN
    131 # We deviate from ISO for disambiguation
    132     ; # THAI CHARACTER NIKHAHIT
    133   '' ; # THAI CHARACTER FONGMAN
    134   0 ; # THAI DIGIT ZERO
    135   1 ; # THAI DIGIT ONE
    136   2 ; # THAI DIGIT TWO
    137   3 ; # THAI DIGIT THREE
    138   4 ; # THAI DIGIT FOUR
    139   5 ; # THAI DIGIT FIVE
    140   6 ; # THAI DIGIT SIX
    141   7 ; # THAI DIGIT SEVEN
    142   8 ; # THAI DIGIT EIGHT
    143   9 ; # THAI DIGIT NINE
    144   '||' ; # THAI CHARACTER ANGKHANKHU
    145    ; # THAI CHARACTER KHOMUT
    146    ; # THAI CHARACTER MAIYAMOK
    147 # moved down to make shorter first
    148 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
    149    ; # THAI CHARACTER PHINTHU
    150   i ; # THAI CHARACTER SARA I
    151 # fallbacks
    152 | k  g ;
    153 | k  h ;
    154 | c  j ;
    155 | k  q ;
    156 | s  z ;
    157 :: (lower);
    158 			]]></tRule>
    159 		</transform>
    160 	</transforms>
    161 </supplementalData>
    162