Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12263 $"/>
     10 	<transforms>
     11 		<transform source="ja_Latn" target="ru" direction="forward" alias="ru-t-ja-latn">
     12 			<tRule>
     13 # Japanese (Rmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
     14 # Can be run in sequence after e.g. Katakana-Latin.
     15 #
     16 # These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
     17 #
     18 # TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
     19 #  becomes , but  becomes .  We need boundary
     20 # markup in the input in order to do that properly.
     21 #
     22 
     23 ::NFD(NFC);
     24 ::[:Latin:] Lower();
     25 #
     26 #
     27 
     28 $lengthMarker = [];
     29 #
     30 #
     31 # Delete apostrophes.  Apostrophes after "n" are consumed below.
     32 
     33 \'  ;
     34 #
     35 #
     36 # Turn long /e:/ into diphthong /ei/.
     37 # Note that /ei/ across a morpheme boundary (e.g.  Takei) becomes .
     38 
     39 e $lengthMarker   ;
     40 #
     41 #
     42 # Turn long /i:/ into two vowels /ii/.
     43 
     44 i $lengthMarker  | i i ;
     45 #
     46 #
     47 # Ignore vowel length everywhere else.
     48 
     49 $lengthMarker  ;
     50 #
     51 #
     52 # Vowels.
     53 #
     54 # TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
     55 ## ai   ;
     56 
     57 a    ;
     58 i\~e  | ye ;
     59 i    ;
     60 u\~   ;  #  etc.
     61 #
     62 ## ui   ;
     63 
     64 u    ;
     65 e    ;
     66 o    ;
     67 #
     68 #
     69 # Consonants.
     70 #
     71 
     72 k   ;
     73 #
     74 #
     75 
     76 sh  | sy ;
     77 s   ;
     78 #
     79 #
     80 
     81 ch      | ty ;
     82 c } ch  t ;
     83 te\~    | t ;   # 
     84 to\~    | t ;   # 
     85 tsu\~   | ts ;  # , , etc.
     86 ts   ;
     87 t    ;
     88 #
     89 #
     90 
     91 \~tsu  | tsu ;
     92 #
     93 #
     94 
     95 n } [bpm]   ;  #   
     96 n\'   ;
     97 n   ;
     98 #
     99 #
    100 
    101 h   ;
    102 fu\~  | f ;  # 
    103 f   ;
    104 #
    105 #
    106 
    107 m   ;
    108 #
    109 #
    110 
    111 ya   ;
    112 yi   ;  # Added for convenience, after sh, ch, j.
    113 yu   ;
    114 ye   ;  # ?? unobserved
    115 yo   ;
    116 #
    117 #
    118 
    119 r   ;
    120 #
    121 #
    122 
    123 wa   ;
    124 w  ;
    125 #
    126 #
    127 
    128 g   ;
    129 #
    130 #
    131 
    132 j  | zy ;
    133 z   ;
    134 #
    135 #
    136 
    137 de\~   | d ;  # 
    138 dji\~  | z ;  # , , etc.
    139 dj     | j ;  # 
    140 do\~   | d ;  # 
    141 dzu\~  | z ;  # , , etc.
    142 dz     | z ;  # 
    143 d   ;
    144 #
    145 #
    146 
    147 b   ;
    148 vu\~  | v ;  # , etc.
    149 v   ;  # ?? unobserved
    150 #
    151 #
    152 
    153 p   ;
    154 #
    155 #
    156 
    157 ::NFC(NFD);
    158 			</tRule>
    159 		</transform>
    160 	</transforms>
    161 </supplementalData>
    162