1 <?xml version="1.0" encoding="UTF-8" ?> 2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3 <!-- 4 Copyright 1991-2013 Unicode, Inc. 5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6 For terms of use, see http://www.unicode.org/copyright.html 7 --> 8 <supplementalData> 9 <version number="$Revision: 12139 $"/> 10 <transforms> 11 <transform source="ThaiLogical" target="Latin" direction="both" visibility="internal"> 12 <tRule><![CDATA[ 13 # Thai-Latin 14 # This set of rules follows ISO 11940 15 # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf 16 # except that that does not mention an implicit vowel, so we use o 17 # 18 # The transcription is fairly ugly, so we ought to also do the UNGEGN version 19 # see: http://www.eki.ee/wgrs/rom1_th.pdf 20 # and probably make that the main variant. 21 # 22 # Note: this is an internal file. The NFD/NFC is handled externally, in the index 23 # The insertion of spaces between words, the reversal of the vowels 24 # and the conversion of space to semicolon are done *outside* of these rules. 25 # So as far as these rules are concerned, the vowels are in logical order! 26 # insert implicit vowel (and remove it going the other way) 27 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically 28 #$consonant = [-]; 29 #$vowel = [--]; 30 #{ ( $consonant ) } [^$vowel \uE000] | $1 \uE000 ; 31 #\uE000 o ; 32 # o ; 33 $notAbove = [^\p{ccc=0}\p{ccc=above}] ; 34 $notBelow = [^\p{ccc=0}\p{ccc=below}] ; 35 # Consonants 36 # Warning: the 'h's need to be handled carefully! 37 # What we really want to say is the following, but we can't 38 # $notHAccent = !($notAbove* | $notBelow* ) ; 39 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: 40 $freeStandingBelow = [ ]; 41 $hAccent = [ ]; 42 $notHAccent0 = [^$freeStandingBelow$hAccent]; 43 $notHAccent1 = $freeStandingBelow [^$hAccent]; 44 h ; # THAI CHARACTER HO HIP 45 | $1 h ($notAbove*) ; # backward case, account for reordering 46 h ; # THAI CHARACTER HO NOKHUK 47 kh ; # THAI CHARACTER KHO KHAI 48 kh ; # THAI CHARACTER KHO KHUAT 49 kh ; # THAI CHARACTER KHO KHON 50 kh ; # THAI CHARACTER KHO RAKHANG 51 kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI 52 kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI 53 k ; # THAI CHARACTER KO KAI 54 ph ; # THAI CHARACTER PHO SAMPHAO 55 ph ; # THAI CHARACTER PHO PHUNG 56 ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN 57 ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN 58 p ; # THAI CHARACTER PO PLA 59 ch ; # THAI CHARACTER CHO CHING 60 ch ; # THAI CHARACTER CHO CHOE 61 ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG 62 ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG 63 c ; # THAI CHARACTER CHO CHAN 64 th ; # THAI CHARACTER THO THAN 65 th ; # THAI CHARACTER THO NANGMONTHO 66 th ; # THAI CHARACTER THO PHUTHAO 67 th ; # THAI CHARACTER THO THUNG 68 th ; # THAI CHARACTER THO THONG 69 th } $notHAccent1 ; # THAI CHARACTER THO THAHAN 70 th } $notHAccent0 ; # THAI CHARACTER THO THAHAN 71 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. 72 t ; # THAI CHARACTER TO PATAK 73 t ; # THAI CHARACTER TO TAO 74 # since there is no singleton g (generated), don't worry about that. 75 ng ; # THAI CHARACTER NGO NGU 76 n ; # THAI CHARACTER NO NEN 77 n ; # THAI CHARACTER NO NU 78 y ; # THAI CHARACTER YO YING 79 d ; # THAI CHARACTER DO CHADA 80 d ; # THAI CHARACTER DO DEK 81 b ; # THAI CHARACTER BO BAIMAI 82 f ; # THAI CHARACTER FO FA 83 | $1 f ($notAbove*) ; # backward case, account for reordering 84 m ; # THAI CHARACTER MO MA 85 y ; # THAI CHARACTER YO YAK 86 r ; # THAI CHARACTER RO RUA 87 v ; # THAI CHARACTER RU 88 ; # THAI CHARACTER LU 89 w ; # THAI CHARACTER WO WAEN 90 s ; # THAI CHARACTER SO SALA*** 91 | $1 s ($notAbove*) ; # backward case, account for reordering 92 s ; # THAI CHARACTER SO RUSI 93 s ; # THAI CHARACTER SO SUA*** 94 | $1 s ($notAbove*) ; # backward case, account for reordering 95 l ; # THAI CHARACTER LO CHULA 96 l ; # THAI CHARACTER LO LING 97 f ; # THAI CHARACTER FO FAN 98 x ; # THAI CHARACTER O ANG 99 s ; # THAI CHARACTER SO SO 100 # vowels 101 a ; # THAI CHARACTER MAI HAN-AKAT 102 a ; # THAI CHARACTER SARA AA 103 | $1 a ($notAbove*) ; # backward case, account for reordering 104 # We deviate from ISO for SARA AM for disambiguation 105 a ; # THAI CHARACTER SARA AM 106 | $1 a ($notAbove*) ; # backward case, account for reordering 107 a ; # THAI CHARACTER SARA A 108 i ; # THAI CHARACTER SARA II 109 | $1 i ($notAbove*) ; # backward case, account for reordering 110 u ; # THAI CHARACTER SARA UEE 111 | $1 u ($notAbove*) ; # backward case, account for reordering 112 u ; # THAI CHARACTER SARA UE 113 u ; # THAI CHARACTER SARA UU 114 | $1 u ($notAbove*) ; # backward case, account for reordering 115 u ; # THAI CHARACTER SARA U 116 ; # THAI CHARACTER PAIYANNOI 117 # XXX ; # THAI CURRENCY SYMBOL BAHT 118 e ; # THAI CHARACTER SARA E 119 ; # THAI CHARACTER SARA AE 120 o ; # THAI CHARACTER SARA O 121 ; # THAI CHARACTER SARA AI MAIMUAN 122 i ; # THAI CHARACTER SARA AI MAIMALAI 123 ; # THAI CHARACTER LAKKHANGYAO 124 ; # THAI CHARACTER MAITAIKHU 125 ; # THAI CHARACTER MAI EK 126 ; # THAI CHARACTER MAI THO 127 ; # THAI CHARACTER MAI TRI 128 ; # THAI CHARACTER MAI CHATTAWA 129 ; # THAI CHARACTER THANTHAKHAT 130 '~' ; # THAI CHARACTER YAMAKKAN 131 # We deviate from ISO for disambiguation 132 ; # THAI CHARACTER NIKHAHIT 133 '' ; # THAI CHARACTER FONGMAN 134 0 ; # THAI DIGIT ZERO 135 1 ; # THAI DIGIT ONE 136 2 ; # THAI DIGIT TWO 137 3 ; # THAI DIGIT THREE 138 4 ; # THAI DIGIT FOUR 139 5 ; # THAI DIGIT FIVE 140 6 ; # THAI DIGIT SIX 141 7 ; # THAI DIGIT SEVEN 142 8 ; # THAI DIGIT EIGHT 143 9 ; # THAI DIGIT NINE 144 '||' ; # THAI CHARACTER ANGKHANKHU 145 ; # THAI CHARACTER KHOMUT 146 ; # THAI CHARACTER MAIYAMOK 147 # moved down to make shorter first 148 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. 149 ; # THAI CHARACTER PHINTHU 150 i ; # THAI CHARACTER SARA I 151 # fallbacks 152 | k g ; 153 | k h ; 154 | c j ; 155 | k q ; 156 | s z ; 157 :: (lower); 158 ]]></tRule> 159 </transform> 160 </transforms> 161 </supplementalData> 162