Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 13787 $"/>
     10 	<transforms>
     11 		<transform source="ps" target="ps_Latn" variant="BGN" direction="forward" draft="contributed" alias="Pashto-Latin/BGN ps-Latn-t-ps-m0-bgn">
     12 			<tRule><![CDATA[
     13 #
     14 ########################################################################
     15 # BGN/PCGN 1968 System
     16 #
     17 # This system was adopted in 1968 for the romanization of Pashto
     18 # geographic names in Afghanistan. Persian names in Afghanistan are
     19 # romanized in accordance with the Romanization System for Persian
     20 # (BGN/PCGN 1958 System), shown on pages 87-92).
     21 #
     22 # Originally prepared by Michael Everson <everson (a] evertype.com>
     23 ########################################################################
     24 #
     25 # MINIMAL FILTER: Pashto-Latin
     26 #
     27 
     28 :: [] ;
     29 :: NFD (NFC) ;
     30 #
     31 #
     32 ########################################################################
     33 
     34 
     35 #
     36 ########################################################################
     37 #
     38 # Define All Transformation Variables
     39 #
     40 ########################################################################
     41 #
     42 
     43 $alef = ;
     44 $ayin = ;
     45 $disambig =   ;
     46 #
     47 #
     48 # Use this $wordBoundary until bug 2034 is fixed in ICU:
     49 # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
     50 #
     51 
     52 $wordBoundary =  [^[:L:][:M:][:N:]] ;
     53 #
     54 #
     55 ########################################################################
     56 
     57 [:Nd:]{}[:Nd:]  [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
     58 [:Nd:]{}[:Nd:]  [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
     59   ',' $disambig ; # ARABIC DECIMAL SEPARATOR
     60   '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
     61 #    ; # ARABIC FIVE POINTED STAR // no need to transliterate
     62   ',' ; # ARABIC COMMA
     63   ';' ; # ARABIC SEMICOLON
     64   '?' ; # ARABIC QUESTION MARK
     65   '%' ; # ARABIC PERCENT SIGN
     66   0 $disambig ; # ARABIC-INDIC DIGIT ZERO
     67   1 $disambig ; # ARABIC-INDIC DIGIT ONE
     68   2 $disambig ; # ARABIC-INDIC DIGIT TWO
     69   3 $disambig ; # ARABIC-INDIC DIGIT THREE
     70   4 $disambig ; # ARABIC-INDIC DIGIT FOUR
     71   5 $disambig ; # ARABIC-INDIC DIGIT FIVE
     72   6 $disambig ; # ARABIC-INDIC DIGIT SIX
     73   7 $disambig ; # ARABIC-INDIC DIGIT SEVEN
     74   8 $disambig ; # ARABIC-INDIC DIGIT EIGHT
     75   9 $disambig ; # ARABIC-INDIC DIGIT NINE
     76   0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO
     77   1 ; # EXTENDED ARABIC-INDIC DIGIT ONE
     78   2 ; # EXTENDED ARABIC-INDIC DIGIT TWO
     79   3 ; # EXTENDED ARABIC-INDIC DIGIT THREE
     80   4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR
     81   5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE
     82   6 ; # EXTENDED ARABIC-INDIC DIGIT SIX
     83   7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
     84   8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
     85   9 ; # EXTENDED ARABIC-INDIC DIGIT NINE
     86 
     87 #
     88 ########################################################################
     89 #
     90 # Rules moved to front to avoid masking
     91 #
     92 ########################################################################
     93 
     94 
     95 #
     96 ########################################################################
     97 #
     98 # BGN Page 89 Rule 4
     99 #
    100 # The character sequences  ,  ,  , and  may be romanized kh, zh,
    101 # sh, and gh in order to differentiate those romanizations from the
    102 # digraphs kh, zh, sh, and gh.
    103 #
    104 ########################################################################
    105 #
    106 
    107   kh ; # ARABIC LETTER KAF + HEH
    108   zh ; # ARABIC LETTER ZAIN + HEH
    109   sh ; # ARABIC LETTER SEEN + HEH
    110   gh ; # ARABIC LETTER GAF + HEH
    111 #
    112 #
    113 ########################################################################
    114 #
    115 # End Rule 4
    116 #
    117 ########################################################################
    118 
    119 
    120 
    121 #
    122 ########################################################################
    123 #
    124 # BGN Page 91 Rule 7
    125 #
    126 # Doubles consonant sounds are represented in Arabic script by
    127 # placing a shaddah (  ) over a consonant character. In romanization
    128 # the letter should be doubled. [The remainder of this rule deals with
    129 # the definite article and is lexical.]
    130 #
    131 ########################################################################
    132 #
    133 
    134   bb ; # ARABIC LETTER BEH + SHADDA
    135   pp ; # ARABIC LETTER PEH + SHADDA
    136   tt ; # ARABIC LETTER TEH + SHADDA
    137    ; # ARABIC LETTER TEH WITH RING + SHADDA
    138   ss ; # ARABIC LETTER THEH + SHADDA
    139   jj ; # ARABIC LETTER JEEM + SHADDA
    140   chch ; # ARABIC LETTER TCHEH + SHADDA
    141   tsts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE + SHADDA
    142   dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE + SHADDA
    143    ; # ARABIC LETTER HAH + SHADDA
    144   khkh ; # ARABIC LETTER KHAH + SHADDA
    145   dd ; # ARABIC LETTER DAL + SHADDA
    146    ; # ARABIC LETTER DAL WITH RING + SHADDA
    147   zz ; # ARABIC LETTER THAL + SHADDA
    148   rr ; # ARABIC LETTER REH + SHADDA
    149    ; # ARABIC LETTER REH WITH RING + SHADDA
    150   zz ; # ARABIC LETTER ZAIN + SHADDA
    151   zhzh ; # ARABIC LETTER JEH + SHADDA
    152   zhzh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE + SHADDA
    153   ss ; # ARABIC LETTER SEEN + SHADDA
    154   shsh ; # ARABIC LETTER SHEEN + SHADDA
    155   shsh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE
    156    ; # ARABIC LETTER SAD + SHADDA
    157    ; # ARABIC LETTER DAD + SHADDA
    158    ; # ARABIC LETTER TAH + SHADDA
    159    ; # ARABIC LETTER ZAH + SHADDA
    160   $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
    161   ghgh ; # ARABIC LETTER GHAIN + SHADDA
    162   ff ; # ARABIC LETTER FEH + SHADDA
    163   qq ; # ARABIC LETTER QAF + SHADDA
    164   kk ; # ARABIC LETTER KAF + SHADDA
    165   gg ; # ARABIC LETTER GAF + SHADDA
    166   gg ; # ARABIC LETTER GAF WITH RING + SHADDA
    167   ll ; # ARABIC LETTER LAM + SHADDA
    168   mm ; # ARABIC LETTER MEEM + SHADDA
    169   nn ; # ARABIC LETTER NOON + SHADDA
    170    ; # ARABIC LETTER NOON WITH RING + SHADDA
    171   hh ; # ARABIC LETTER HEH + SHADDA
    172   ww ; # ARABIC LETTER WAW + SHADDA
    173   yy ; # ARABIC LETTER FARSI YEH + SHADDA
    174   yy ; # ARABIC LETTER YEH + SHADDA
    175 #
    176 #
    177 ########################################################################
    178 #
    179 # End Rule 7
    180 #
    181 ########################################################################
    182 
    183 
    184 
    185 #
    186 ########################################################################
    187 #
    188 # Start of Transformations
    189 #
    190 ########################################################################
    191 #
    192 
    193 $wordBoundary{   ; # ARABIC LETTER HAMZA
    194   $alef ; # ARABIC LETTER HAMZA
    195 $wordBoundary{   ; # ARABIC LETTER ALEF
    196   $alef  ; # ARABIC LETTER ALEF WITH MADDA ABOVE
    197   b ; # ARABIC LETTER BEH
    198   p ; # ARABIC LETTER PEH
    199   t ; # ARABIC LETTER TEH
    200    ; # ARABIC LETTER TEH WITH RING
    201   h ; # ARABIC LETTER TEH MARBUTA
    202   s ; # ARABIC LETTER THEH
    203   j ; # ARABIC LETTER JEEM
    204   ch ; # ARABIC LETTER TCHEH
    205   ts ; # ARABIC LETTER HAH WITH THREE DOTS ABOVE
    206   dz ; # ARABIC LETTER HAH WITH HAMZA ABOVE
    207    ; # ARABIC LETTER HAH
    208   kh ; # ARABIC LETTER KHAH
    209   d ; # ARABIC LETTER DAL
    210    ; # ARABIC LETTER DAL WITH RING
    211   z ; # ARABIC LETTER THAL
    212   r ; # ARABIC LETTER REH
    213    ; # ARABIC LETTER REH WITH RING
    214   z ; # ARABIC LETTER ZAIN
    215   zh ; # ARABIC LETTER JEH
    216   zh ; # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE
    217   s ; # ARABIC LETTER SEEN
    218   sh ; # ARABIC LETTER SHEEN
    219   sh ; # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE
    220    ; # ARABIC LETTER SAD
    221    ; # ARABIC LETTER DAD
    222    ; # ARABIC LETTER TAH
    223    ; # ARABIC LETTER ZAH
    224   $ayin ; # ARABIC LETTER AIN
    225   gh ; # ARABIC LETTER GHAIN
    226   f ; # ARABIC LETTER FEH
    227   q ; # ARABIC LETTER QAF
    228   k ; # ARABIC LETTER KAF
    229   g ; # ARABIC LETTER GAF
    230   g ; # ARABIC LETTER GAF WITH RING
    231   l ; # ARABIC LETTER LAM
    232   m ; # ARABIC LETTER MEEM
    233   n ; # ARABIC LETTER NOON
    234    ; # ARABIC LETTER NOON WITH RING
    235   w ; # ARABIC LETTER WAW
    236   h ; # ARABIC LETTER HEH
    237   ey ; # ARABIC LETTER FARSI YEH + HAMZA ABOVE
    238   y ; # ARABIC LETTER FARSI YEH
    239   y ; # ARABIC LETTER YEH
    240   e ; # ARABIC LETTER E
    241 
    242    ; # ARABIC FATHA + ALEF
    243   ay ; # ARABIC FATHA + FARSI YEH + SUKUN
    244    ; # ARABIC FATHA + ALEF MAKSURA
    245   y ; # ARABIC FATHA + SUKUN + YEH WITH TAIL
    246    ; # ARABIC FATHA + SUKUN
    247   a ; # ARABIC FATHA
    248 
    249   ey ; # ARABIC KASRA + FARSI YEH + SUKUN
    250    ; # ARABIC KASRA + FARSI YEH
    251    ; # ARABIC KASRA + YEH
    252   ew ; # ARABIC KASRA + WAW
    253   i ; # ARABIC KASRA
    254 
    255   ow ; # ARABIC DAMMA + WAW + SUKUN
    256   y ; # ARABIC DAMMA + WAW + FARSI YEH
    257    ; # ARABIC DAMMA + WAW
    258   u ; # ARABIC DAMMA
    259 
    260    ; # ARABIC SUKUN
    261 
    262 
    263 #
    264 #
    265 ########################################################################
    266 
    267 			]]></tRule>
    268 		</transform>
    269 	</transforms>
    270 </supplementalData>
    271