Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2015 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12347 $"/>
     10 	<transforms>
     11 		<transform source="sat_Olck" target="sat_FONIPA" direction="forward" alias="sat-fonipa-t-sat-olck">
     12 			<tRule><![CDATA[
     13 # Santali (Ol Chiki)  Santali (International Phonetic Alphabet)
     14 
     15 
     16 # Output
     17 # ------
     18 # m m n n      
     19 # p p p b b t t t d d     c c c k k k  
     20 # s s h
     21 # d
     22 #  r
     23 # l l
     24 # w w w w
     25 #
     26 # i i   u u  
     27 # e e       o o  
     28 #        
     29 # a a  
     30 
     31 
     32 # References
     33 # ----------
     34 # [1] Michael Everson: Final proposal to encode the Ol Chiki script
     35 #     in the UCS.  ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
     36 #     September 21, 2005.  http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
     37 #
     38 # [2] George L. Campbell: Compendium of the World's Languages.
     39 #     Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3.  Taylor & Francis, 2000.
     40 #     Pages 1454 to 1458.
     41 
     42 
     43 # Notes
     44 # -----
     45 # According to [1] (page 3),  can only follow the four ejective
     46 # consonants  /p/,  /c/,  /t/, and  /k/; these become
     47 #  /b/,  /d/,  /d/, and  //.  In online texts, however,
     48 # we have occasionally encountered  following non-ejective plosives,
     49 # for example after  /p/. These might possibly be typos.  Our rules
     50 # try to be resilient and handle  as /b/.
     51 #
     52 # According to [1] (page 2), U+1C7C PHAARKAA follows the four glottal
     53 # consonants  /p/,  /c/,  /t/, and  /k/ (these are actually
     54 # ejective, not glottal).  In online texts, however, we have frequently
     55 # encountered  following non-ejective consonants.
     56 
     57 $inword = [[:L:][:M:]];
     58 
     59 # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
     60    ;
     61    ;
     62 ::null();
     63 
     64 # To simplify the rules below, enforce a uniform ordering of marks.
     65    ;
     66    ;
     67    ;
     68    ;
     69    ;
     70    ;
     71 ::null();
     72 
     73 # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
     74 # long phonemes, presumably because the graphemes look similar in some fonts.
     75 # Since phaarkaa is used for voicing ejectives and plosives (which cannot
     76 # be lenghtened), we rewrite phaarkaa to relaa.
     77 [] []* {}   ;
     78 ::null();
     79 
     80    ;
     81    ;
     82    ;
     83    ;
     84    ;
     85    ;
     86    ;
     87    ;
     88 
     89   t ;
     90   t ;
     91   d ;
     92 $inword {}  d ;
     93   t ;
     94 
     95   k ;
     96   k ;
     97    ;
     98 $inword {}   ;
     99   k ;
    100 
    101    ;
    102    ;
    103 
    104   l ;
    105   l ;
    106 
    107    ;
    108    ;
    109    ;
    110    ;
    111    ;
    112    ;
    113   a ;
    114   a ;
    115 
    116   k ;
    117   k ;
    118    ;
    119   k ;
    120 
    121   c ;
    122   c ;
    123    d ;
    124 $inword {}   d ;
    125   c ;
    126 
    127   m ;
    128   m ;
    129 
    130 # According to [1],  is sometimes /v/ and sometimes /w/.
    131 # TODO: Find out if there is a rule for this.
    132   w ;
    133   w ;
    134 
    135   i ;
    136   i ;
    137    ;
    138    ;
    139    ;
    140    ;
    141   i ;
    142   i ;
    143 
    144   s ;
    145   s ;
    146 
    147 # According to [1],  is sometimes /h/ and sometimes //.
    148 # TODO: Find out if there is a rule for this.
    149   h ;
    150 
    151    ;
    152    ;
    153 
    154   r ;
    155   r ;
    156 
    157   u ;
    158   u ;
    159    ;
    160    ;
    161    ;
    162    ;
    163   u ;
    164   u ;
    165 
    166   c ;
    167   c ;
    168    d ;
    169   c ;
    170 
    171   t ;
    172   t ;
    173   d ;
    174 $inword {}  d ;
    175   t ;
    176 
    177    ;
    178    ;
    179 
    180 # TODO:   bhrb seems unlikely; would be good to verify.
    181   h ;
    182 
    183    ;
    184    ;
    185    ;
    186    ;
    187    ;
    188    ;
    189   e ;
    190   e ;
    191 
    192   p ;
    193   p ;
    194   b ;
    195   p ;
    196 
    197    ;
    198    ;
    199 
    200   n ;
    201   n ;
    202 
    203    ;
    204    ;
    205 
    206    ;
    207    ;
    208   o ;
    209   o ;
    210 
    211    ;
    212    ;
    213    ;
    214    ;
    215 
    216   p ;
    217   b ;
    218   b ;
    219 $inword {}  b ;
    220   p ;
    221 
    222   w ;
    223   w ;
    224 
    225 			]]></tRule>
    226 		</transform>
    227 	</transforms>
    228 </supplementalData>
    229