Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12263 $"/>
     10 	<transforms>
     11 		<transform source="Hira" target="Kana" direction="both" alias="Hiragana-Katakana und-Kana-t-und-hira" backwardAlias="Katakana-Hiragana und-Hira-t-und-kana">
     12 			<tRule>
     13 # note: a global filter is more efficient, but MUST include all source chars
     14 :: [\u0000-\u007E  - - -[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
     15 :: NFKC ();
     16 # Hiragana-Katakana
     17 # This is largely a one-to-one mapping, but it has a
     18 # few kinks:
     19 # 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
     20 # Hiragana equivalents.  We use Hiragana wa/wi/we/wo
     21 # (308F-3092) with a voicing mark (3099), which is
     22 # semantically equivalent.  However, this is a non-
     23 # roundtripping transformation.
     24 # 2. The Katakana small ka/ke (30F5,30F6) have no
     25 # Hiragana equiavlents.  We convert them to normal
     26 # Hiragana ka/ke (304B,3051).  This is a one-way
     27 # information-losing transformation and precludes
     28 # round-tripping of 30F5 and 30F6.
     29 # 3. The combining marks 3099-309C are in the Hiragana
     30 # block, but they apply to Katakana as well, so we
     31 # leave them untouched.
     32 # 4. The Katakana prolonged sound mark 30FC doubles the
     33 # preceding vowel.  This is a one-way information-
     34 # losing transformation from Katakana to Hiragana.
     35 # 5. The Katakana middle dot separates words in foreign
     36 # expressions; we leave this unmodified.
     37 # The above points preclude successful round-trip
     38 # transformations of arbitrary input text.  However,
     39 # they provide naturalistic results that should conform
     40 # to user expectations.
     41 # Combining equivalents va/vi/ve/vo
     42   ;
     43   ;
     44   ;
     45   ;
     46 # One-to-one mappings, main block
     47 # 3041:3094  30A1:30F4
     48 # 309D,E  30FD,E
     49   ;
     50   ;
     51   ;
     52   ;
     53   ;
     54   ;
     55   ;
     56   ;
     57   ;
     58   ;
     59   ;
     60   ;
     61   ;
     62   ;
     63   ;
     64   ;
     65   ;
     66   ;
     67   ;
     68   ;
     69   ;
     70   ;
     71   ;
     72   ;
     73   ;
     74   ;
     75   ;
     76   ;
     77   ;
     78   ;
     79   ;
     80   ;
     81   ;
     82   ;
     83   ;
     84   ;
     85   ;
     86   ;
     87   ;
     88   ;
     89   ;
     90   ;
     91   ;
     92   ;
     93   ;
     94   ;
     95   ;
     96   ;
     97   ;
     98   ;
     99   ;
    100   ;
    101   ;
    102   ;
    103   ;
    104   ;
    105   ;
    106   ;
    107   ;
    108   ;
    109   ;
    110   ;
    111   ;
    112   ;
    113   ;
    114   ;
    115   ;
    116   ;
    117   ;
    118   ;
    119   ;
    120   ;
    121   ;
    122   ;
    123   ;
    124   ;
    125   ;
    126   ;
    127   ;
    128   ;
    129   ;
    130   ;
    131   ;
    132   ;
    133   ;
    134   ;
    135 # One-way Katakana-Hiragana xform of small K ka/ke to
    136 # normal H ka/ke.
    137   ;
    138   ;
    139 # Katakana followed by a prolonged sound mark 30FC has
    140 # its final vowel doubled.  This is a Katakana-Hiragana
    141 # one-way information-losing transformation.  We
    142 # include the small Katakana (e.g., small A 3041) and
    143 # do not distinguish them from their large
    144 # counterparts.  It doesn't make sense to double a
    145 # small counterpart vowel as a small Hiragana vowel, so
    146 # we don't do so.  In natural text this should never
    147 # occur anyway.  If a 30FC is seen without a preceding
    148 # vowel sound (e.g., after n 30F3) we do not change it.
    149 ### $long = ;
    150 # The following categories are Hiragana, not Katakana
    151 # as might be expected, since by the time we get to the
    152 # 30FC, the preceding character will have already been
    153 # transformed to Hiragana.
    154 # {The following mechanically generated from the
    155 # Unicode 3.0 data:}
    156 $xa = [ \
    157       \
    158       \
    159       \
    160 ];
    161 $xi = [ \
    162       \
    163       \
    164    \
    165 ];
    166 $xu = [ \
    167       \
    168       \
    169       \
    170 ];
    171 $xe = [ \
    172       \
    173       \
    174    \
    175 ];
    176 $xo = [ \
    177       \
    178       \
    179      \
    180 ];
    181   $xa {};
    182   $xi {};
    183   $xu {};
    184   $xe {};
    185   $xo {};
    186 :: (NFKC) ;
    187 # note: a global filter is more efficient, but MUST include all source chars!!
    188 :: ([\u0000-\u007E  - - -[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
    189 # eof
    190 			</tRule>
    191 		</transform>
    192 	</transforms>
    193 </supplementalData>
    194