Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 13787 $"/>
     10 	<transforms>
     11 		<transform source="ar" target="ar_Latn" variant="BGN" direction="forward" draft="contributed" alias="Arabic-Latin/BGN ar-Latn-t-ar-m0-bgn">
     12 			<tRule>
     13 #
     14 ########################################################################
     15 # BGN/PCGN 1956 System
     16 #
     17 # This system was adopted by the BGN in 1946 and by the PCGN
     18 # in 1956 and has been applied in the systematic romanization
     19 # of geographic names in Bahrain, Egypt, Iraq, Jordan,
     20 # Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan,
     21 # Syria, Tunisia, the United Arab Emirates, and Yemen, all
     22 # of which has been covered by published BGN engineers.
     23 #
     24 # Originally prepared by Michael Everson &lt;everson (a] evertype.com&gt;
     25 ########################################################################
     26 #
     27 # MINIMAL FILTER: Arabic-Latin
     28 #
     29 
     30 :: [[:arabic:][:block=ARABIC:][]] ;
     31 :: NFKD (NFC) ;
     32 #
     33 #
     34 ########################################################################
     35 
     36 
     37 #
     38 ########################################################################
     39 #
     40 # Define All Transformation Variables
     41 #
     42 ########################################################################
     43 #
     44 
     45 $alef = ;
     46 $ayin = ;
     47 $disambig =   ;
     48 #
     49 #
     50 # Use this $wordBoundary until bug 2034 is fixed in ICU:
     51 # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
     52 #
     53 
     54 $wordBoundary =  [^[:L:][:M:][:N:]] ;
     55 #
     56 #
     57 ########################################################################
     58 
     59 # non-letters
     60 [:Nd:]{}[:Nd:]  [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
     61 [:Nd:]{}[:Nd:]  [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
     62   ',' $disambig ; # ARABIC DECIMAL SEPARATOR
     63   '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
     64 #    ; # ARABIC FIVE POINTED STAR // no need to transliterate
     65   ',' ; # ARABIC COMMA
     66   ';' ; # ARABIC SEMICOLON
     67   '?' ; # ARABIC QUESTION MARK
     68   '%' ; # ARABIC PERCENT SIGN
     69   0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
     70   1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
     71   2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
     72   3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
     73   4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
     74   5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
     75   6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
     76   7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
     77   8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
     78   9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
     79   0 ; # ARABIC-INDIC DIGIT ZERO
     80   1 ; # ARABIC-INDIC DIGIT ONE
     81   2 ; # ARABIC-INDIC DIGIT TWO
     82   3 ; # ARABIC-INDIC DIGIT THREE
     83   4 ; # ARABIC-INDIC DIGIT FOUR
     84   5 ; # ARABIC-INDIC DIGIT FIVE
     85   6 ; # ARABIC-INDIC DIGIT SIX
     86   7 ; # ARABIC-INDIC DIGIT SEVEN
     87   8 ; # ARABIC-INDIC DIGIT EIGHT
     88   9 ; # ARABIC-INDIC DIGIT NINE
     89 
     90 #
     91 ########################################################################
     92 #
     93 # Rules moved to front to avoid masking
     94 #
     95 ########################################################################
     96 
     97 
     98 #
     99 ########################################################################
    100 #
    101 # BGN Page 8 Rule 5
    102 #
    103 # The character sequences  ,  ,  , and  may be romanized th, kh,
    104 # dh, and sh in order to differentiate those romanizations from the
    105 # digraphs th, kh, dh, and sh.
    106 #
    107 ########################################################################
    108 #
    109 
    110   th ; # ARABIC LETTER TEH + HEH
    111   kh ; # ARABIC LETTER KAF + HEH
    112   dh ; # ARABIC LETTER DAL + HEH
    113   sh ; # ARABIC LETTER SEEN + HEH
    114 #
    115 #
    116 ########################################################################
    117 #
    118 # End Rule 5
    119 #
    120 ########################################################################
    121 
    122 
    123 ########################################################################
    124 
    125 #
    126 #
    127 # BGN Page 8 Rule 9
    128 #
    129 # Doubles consonant sounds are represented in Arabic script by placing
    130 # a shaddah (  ) over a consonant character. In romanization the letter
    131 # should be doubled. [The remainder of this rule deals with the definite
    132 # article and is lexical.]
    133 #
    134 ########################################################################
    135 #
    136 
    137   bb ; # ARABIC LETTER BEH + SHADDA
    138   tt ; # ARABIC LETTER TEH + SHADDA
    139   thth ; # ARABIC LETTER THEH + SHADDA
    140   jj ; # ARABIC LETTER JEEM + SHADDA
    141    ; # ARABIC LETTER HAH + SHADDA
    142   khkh ; # ARABIC LETTER KHAH + SHADDA
    143   dd ; # ARABIC LETTER DAL + SHADDA
    144   dhdh ; # ARABIC LETTER THAL + SHADDA
    145   rr ; # ARABIC LETTER REH + SHADDA
    146   zz ; # ARABIC LETTER ZAIN + SHADDA
    147   ss ; # ARABIC LETTER SEEN + SHADDA
    148   shsh ; # ARABIC LETTER SHEEN + SHADDA
    149    ; # ARABIC LETTER SAD + SHADDA
    150    ; # ARABIC LETTER DAD + SHADDA
    151    ; # ARABIC LETTER TAH + SHADDA
    152    ; # ARABIC LETTER ZAH + SHADDA
    153   $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
    154   ghgh ; # ARABIC LETTER GHAIN + SHADDA
    155   ff ; # ARABIC LETTER FEH + SHADDA
    156   qq ; # ARABIC LETTER QAF + SHADDA
    157   kk ; # ARABIC LETTER KAF + SHADDA
    158   ll ; # ARABIC LETTER LAM + SHADDA
    159   mm ; # ARABIC LETTER MEEM + SHADDA
    160   nn ; # ARABIC LETTER NOON + SHADDA
    161   hh ; # ARABIC LETTER HEH + SHADDA
    162   ww ; # ARABIC LETTER WAW + SHADDA
    163   yy ; # ARABIC LETTER YEH + SHADDA
    164 #
    165 #
    166 ########################################################################
    167 #
    168 # End Rule 9
    169 #
    170 ########################################################################
    171 
    172 
    173 #
    174 ########################################################################
    175 #
    176 # Start of Transformations
    177 #
    178 ########################################################################
    179 #
    180 
    181 $wordBoundary{   ; # ARABIC LETTER HAMZA
    182   $alef ; # ARABIC LETTER HAMZA
    183 $wordBoundary{   ; # ARABIC LETTER ALEF
    184   $alef ; # ARABIC LETTER ALEF WASLA
    185 $wordBoundary{   ; # ARABIC LETTER ALEF WITH MADDA ABOVE
    186   $alef  ; # ARABIC LETTER ALEF WITH MADDA ABOVE
    187   b ; # ARABIC LETTER BEH
    188   t ; # ARABIC LETTER TEH
    189   h ; # ARABIC LETTER TEH MARBUTA
    190   th ; # ARABIC LETTER THEH
    191   j ; # ARABIC LETTER JEEM
    192    ; # ARABIC LETTER HAH
    193   kh ; # ARABIC LETTER KHAH
    194   d ; # ARABIC LETTER DAL
    195   dh ; # ARABIC LETTER THAL
    196   r ; # ARABIC LETTER REH
    197   z ; # ARABIC LETTER ZAIN
    198   s ; # ARABIC LETTER SEEN
    199   sh ; # ARABIC LETTER SHEEN
    200    ; # ARABIC LETTER SAD
    201    ; # ARABIC LETTER DAD
    202    ; # ARABIC LETTER TAH
    203   z ; # ARABIC LETTER ZAH
    204   $ayin ; # ARABIC LETTER AIN
    205   gh ; # ARABIC LETTER GHAIN
    206   f ; # ARABIC LETTER FEH
    207   q ; # ARABIC LETTER QAF
    208   k $disambig ; # ARABIC LETTER KEHEH
    209   k ; # ARABIC LETTER KAF
    210   l ; # ARABIC LETTER LAM
    211   m ; # ARABIC LETTER MEEM
    212   n ; # ARABIC LETTER NOON
    213   h ; # ARABIC LETTER HEH
    214   w ; # ARABIC LETTER WAW
    215   y ; # ARABIC LETTER YEH
    216 
    217    ; # ARABIC FATHA + ALEF
    218    ; # ARABIC FATHA + ALEF MAKSURA
    219   ay ; # ARABIC FATHA + YEH + SUKUN
    220   aw ; # ARABIC FATHA + WAW + SUKUN
    221   a ; # ARABIC FATHA
    222 
    223    ; # ARABIC KASRA + YEH
    224   i ; # ARABIC KASRA
    225 
    226    ; # ARABIC DAMMA + WAW
    227   u ; # ARABIC DAMMA
    228 
    229    ; # ARABIC SUKUN
    230   a ; # ARABIC FATHATAN
    231   i ; # ARABIC KASRATAN
    232   u ; # ARABIC DAMMATAN
    233 ::NFC (NFD) ;
    234 
    235 #
    236 #
    237 ########################################################################
    238 
    239 			</tRule>
    240 		</transform>
    241 	</transforms>
    242 </supplementalData>
    243