1 <?xml version="1.0" encoding="UTF-8" ?> 2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3 <!-- 4 Copyright 1991-2015 Unicode, Inc. 5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6 For terms of use, see http://www.unicode.org/copyright.html 7 --> 8 <supplementalData> 9 <version number="$Revision: 12347 $"/> 10 <transforms> 11 <transform source="sat_Olck" target="sat_FONIPA" direction="forward" alias="sat-fonipa-t-sat-olck"> 12 <tRule><![CDATA[ 13 # Santali (Ol Chiki) Santali (International Phonetic Alphabet) 14 15 16 # Output 17 # ------ 18 # m m n n 19 # p p p b b t t t d d c c c k k k 20 # s s h 21 # d 22 # r 23 # l l 24 # w w w w 25 # 26 # i i u u 27 # e e o o 28 # 29 # a a 30 31 32 # References 33 # ---------- 34 # [1] Michael Everson: Final proposal to encode the Ol Chiki script 35 # in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R, 36 # September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf 37 # 38 # [2] George L. Campbell: Compendium of the World's Languages. 39 # Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000. 40 # Pages 1454 to 1458. 41 42 43 # Notes 44 # ----- 45 # According to [1] (page 3), can only follow the four ejective 46 # consonants /p/, /c/, /t/, and /k/; these become 47 # /b/, /d/, /d/, and //. In online texts, however, 48 # we have occasionally encountered following non-ejective plosives, 49 # for example after /p/. These might possibly be typos. Our rules 50 # try to be resilient and handle as /b/. 51 # 52 # According to [1] (page 2), U+1C7C PHAARKAA follows the four glottal 53 # consonants /p/, /c/, /t/, and /k/ (these are actually 54 # ejective, not glottal). In online texts, however, we have frequently 55 # encountered following non-ejective consonants. 56 57 $inword = [[:L:][:M:]]; 58 59 # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG. 60 ; 61 ; 62 ::null(); 63 64 # To simplify the rules below, enforce a uniform ordering of marks. 65 ; 66 ; 67 ; 68 ; 69 ; 70 ; 71 ::null(); 72 73 # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating 74 # long phonemes, presumably because the graphemes look similar in some fonts. 75 # Since phaarkaa is used for voicing ejectives and plosives (which cannot 76 # be lenghtened), we rewrite phaarkaa to relaa. 77 [] []* {} ; 78 ::null(); 79 80 ; 81 ; 82 ; 83 ; 84 ; 85 ; 86 ; 87 ; 88 89 t ; 90 t ; 91 d ; 92 $inword {} d ; 93 t ; 94 95 k ; 96 k ; 97 ; 98 $inword {} ; 99 k ; 100 101 ; 102 ; 103 104 l ; 105 l ; 106 107 ; 108 ; 109 ; 110 ; 111 ; 112 ; 113 a ; 114 a ; 115 116 k ; 117 k ; 118 ; 119 k ; 120 121 c ; 122 c ; 123 d ; 124 $inword {} d ; 125 c ; 126 127 m ; 128 m ; 129 130 # According to [1], is sometimes /v/ and sometimes /w/. 131 # TODO: Find out if there is a rule for this. 132 w ; 133 w ; 134 135 i ; 136 i ; 137 ; 138 ; 139 ; 140 ; 141 i ; 142 i ; 143 144 s ; 145 s ; 146 147 # According to [1], is sometimes /h/ and sometimes //. 148 # TODO: Find out if there is a rule for this. 149 h ; 150 151 ; 152 ; 153 154 r ; 155 r ; 156 157 u ; 158 u ; 159 ; 160 ; 161 ; 162 ; 163 u ; 164 u ; 165 166 c ; 167 c ; 168 d ; 169 c ; 170 171 t ; 172 t ; 173 d ; 174 $inword {} d ; 175 t ; 176 177 ; 178 ; 179 180 # TODO: bhrb seems unlikely; would be good to verify. 181 h ; 182 183 ; 184 ; 185 ; 186 ; 187 ; 188 ; 189 e ; 190 e ; 191 192 p ; 193 p ; 194 b ; 195 p ; 196 197 ; 198 ; 199 200 n ; 201 n ; 202 203 ; 204 ; 205 206 ; 207 ; 208 o ; 209 o ; 210 211 ; 212 ; 213 ; 214 ; 215 216 p ; 217 b ; 218 b ; 219 $inword {} b ; 220 p ; 221 222 w ; 223 w ; 224 225 ]]></tRule> 226 </transform> 227 </transforms> 228 </supplementalData> 229