Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12263 $"/>
     10 	<transforms>
     11 		<transform source="Grek" target="Latn" direction="both" alias="Greek-Latin und-Latn-t-und-grek" backwardAlias="Latin-Greek und-Grek-t-und-latn">
     12 			<tRule><![CDATA[
     13 # Rules are predicated on running NFD first, and NFC afterwards
     14 # :: [\u0000-\u007F \u0370- [:Greek:] [:nonspacing mark:]] ;
     15 # MINIMAL FILTER GENERATED FOR: Greek-Latin
     16 :: [;------------------\u07FB-------------------------------] ;
     17 :: NFD (NFC) ;
     18 # TEST CASES
     19 #       
     20 #      
     21 #     
     22 #    
     23 #    
     24 #    
     25 # , , , , , 
     26 # Useful variables
     27 $lower = [[:latin:][:greek:] & [:Ll:]];
     28 $glower = [[:greek:] & [:Ll:]];
     29 $upper = [[:latin:][:greek:] & [:Lu:]] ;
     30 $accent = [:M:] ;
     31 # NOTE: restrict to just the Greek & Latin accents that we care about
     32 # TODO: broaden out once interation is fixed
     33 $accentMinus = [ [-] & [:M:] - []] ;
     34 $macron =  ;
     35 $ddot =  ;
     36 $ddotmac = [$ddot$macron];
     37 $lcgvowel = [] ;
     38 $ucgvowel = [] ;
     39 $gvowel = [$lcgvowel $ucgvowel] ;
     40 $lcgvowelC = [$lcgvowel $accent] ;
     41 $evowel = [aeiouyAEIOUY];
     42 $evowel2 = [iuyIUY];
     43 $vowel = [ $evowel $gvowel] ;
     44 $gammaLike = [] ;
     45 $egammaLike = [GKXCgkxc] ;
     46 $smooth =  ;
     47 $rough =  ;
     48 $iotasub =  ;
     49 $evowel_i = [$evowel-[iI]] ;
     50 $evowel2_i = [uyUY];
     51 $underbar = ;
     52 $afterLetter = [:L:] [[:M:]\']* ;
     53 $beforeLetter = [[:M:]\']* [:L:] ;
     54 $beforeLower = $accent * $lower ;
     55 $notLetter = [^[:L:][:M:]] ;
     56 $under = ;
     57 # Fix punctuation
     58 # preserve original
     59 \:  \: $under ;
     60 \?  \? $under ;
     61 \;  \? ;
     62   \: ;
     63 # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
     64    ;
     65 # IOTA: convert iota subscript to iota
     66 # first make previous alpha long!
     67 $accent_minus = [[$accent]-[$iotasub$macron]];
     68  } $accent_minus * $iotasub  |  $macron ;
     69  } $accent_minus * $iotasub  |  $macron ;
     70 # now convert to uppercase if after uppercase, ow to lowercase
     71 $upper $accent * { $iotasub  I ;
     72 $iotasub  i ;
     73 | $1 $iotasub  ($evowel $macron $accentMinus *) i ;
     74 | $1 $iotasub  ($evowel $macron $accentMinus *) I ;
     75 # BREATHING
     76 # Convert rough breathing to h, and move before letters.
     77 # Make A ` x =  H a x
     78  ($macron?) $rough } $beforeLower  H |  $1;
     79  $rough } $beforeLower  H | ;
     80  $rough } $beforeLower  H |  ;
     81  ($ddot?) $rough } $beforeLower  H |   $1;
     82  $rough } $beforeLower  H |  ;
     83  $rough } $beforeLower  H |  ;
     84  ($ddot?) $rough } $beforeLower  H |  $1;
     85 # Make A x ` =  H a x
     86  ($glower $macron?) $rough  H |  $1 ;
     87  ($glower) $rough  H |  $1 ;
     88  ($glower) $rough  H |  $1 ;
     89  ($glower $ddot?) $rough  H |  $1 ;
     90  ($glower) $rough  H |  $1 ;
     91  ($glower) $rough  H |  $1 ;
     92  ($glower  $ddot?) $rough  H |  $1 ;
     93 #Otherwise, make x ` into h x and X ` into H X
     94 ($lcgvowel + $ddotmac? ) $rough  h | $1 ;
     95 ($gvowel + $ddotmac? ) $rough  H | $1 ;
     96 # Go backwards with H
     97 | $1 $rough  h ($evowel $macron $ddot? $evowel2_i $macron?) ;
     98 | $1 $rough  h ($evowel $ddot? $evowel2 $macron?) ;
     99 | $1 $rough  h ($evowel $macron? $ddot?) ;
    100 | $1 $rough  H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
    101 | $1 $rough  H ([AEIOUY] $ddot? $evowel2 $macron?) ;
    102 | $1 $rough  H ([AEIOUY] $macron? $ddot?) ;
    103 # titlecase, have to fix individually
    104 # in the future, we should add &uppercase() to make this easier
    105 | A $1 $rough  H a ($macron  $ddot? $evowel2_i $macron?) ;
    106 | E $1 $rough  H e ($macron  $ddot? $evowel2_i $macron?) ;
    107 | I $1 $rough  H i ($macron  $ddot? $evowel2_i $macron?) ;
    108 | O $1 $rough  H o ($macron  $ddot? $evowel2_i $macron?) ;
    109 | U $1 $rough  H u ($macron $ddot? $evowel2_i $macron?) ;
    110 | Y $1 $rough  H y ($macron $ddot? $evowel2_i $macron?) ;
    111 | A $1 $rough  H a ($ddot? $evowel2 $macron?) ;
    112 | E $1 $rough  H e ($ddot? $evowel2 $macron?) ;
    113 | I $1 $rough  H i ($ddot? $evowel2 $macron?) ;
    114 | O $1 $rough  H o ($ddot? $evowel2 $macron?) ;
    115 | U $1 $rough  H u ($ddot? $evowel2 $macron?) ;
    116 | Y $1 $rough  H y ($ddot? $evowel2 $macron?) ;
    117 | A $1 $rough  H a ($macron? $ddot? ) ;
    118 | E $1 $rough  H e ($macron? $ddot? ) ;
    119 | I $1 $rough  H i ($macron? $ddot? ) ;
    120 | O $1 $rough  H o ($macron? $ddot? ) ;
    121 | U $1 $rough  H u ($macron? $ddot? ) ;
    122 | Y $1 $rough  H y ($macron? $ddot? ) ;
    123 # Now do smooth
    124 #delete smooth breathing for Latin
    125 $smooth  ;
    126 # insert in Greek
    127 # the assumption is that all Marks are on letters.
    128 | $1 $smooth  $notLetter { ([rR]) } [^hH$smooth$rough] ;
    129 | $1 $smooth  $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
    130 | $1 $smooth  $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
    131 # TODO: preserve smooth/rough breathing if not
    132 # on initial vowel sequence
    133 # need to have these up here so the rules don't mask
    134 # remove now superfluous macron when returning
    135   A $macron ;
    136   a $macron ;
    137   e $macron ;
    138   E $macron ;
    139   ph ;
    140  } $beforeLower  Ps ;
    141   PS ;
    142  } $beforeLower  Ph ;
    143   PH ;
    144   ps ;
    145   o $macron ;
    146    O $macron;
    147 # NORMAL
    148   a ;
    149   A ;
    150   b ;
    151   B ;
    152  } $gammaLike  n } $egammaLike ;
    153   g ;
    154  } $gammaLike  N } $egammaLike ;
    155   G ;
    156   d ;
    157   D ;
    158   e ;
    159   E ;
    160   z ;
    161   Z ;
    162   th ;
    163  } $beforeLower  Th ;
    164   TH ;
    165   i ;
    166   I ;
    167   k ;
    168   K ;
    169   l ;
    170   L ;
    171   m ;
    172   M ;
    173  } $gammaLike  n\' ;
    174   n ;
    175  } $gammaLike  N\' ;
    176   N ;
    177   x ;
    178   X ;
    179   o ;
    180   O ;
    181   p ;
    182   P ;
    183  $rough  rh;
    184  $rough } $beforeLower  Rh ;
    185  $rough  RH ;
    186   r ;
    187   R ;
    188 # insert separator before things that turn into s
    189 [Pp] { } []  \' ;
    190 # special S variants
    191   S ; #  GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
    192   s ; # GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
    193   S ; #  GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
    194   s ; #  GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
    195 # underbar means exception
    196 # before a letter, initial
    197  } $beforeLetter  s $underbar } $beforeLetter;
    198  } $beforeLetter  s } $beforeLetter;
    199 # otherwise, after a letter = final
    200 $afterLetter {   $afterLetter { s $underbar;
    201 $afterLetter {   $afterLetter { s ;
    202 # otherwise (isolated) = initial
    203   s $underbar;
    204   s ;
    205 # [Pp] {   \'S ;
    206   S ;
    207   t ;
    208   T ;
    209 $vowel { }  u ;
    210   y ;
    211 $vowel {   U ;
    212   Y ;
    213   ch ;
    214  } $beforeLower  Ch ;
    215   CH ;
    216 # Completeness for ASCII
    217 $ignore = [[:Mark:]''] * ;
    218 | k   c ;
    219 | ph  f ;
    220 | i   j ;
    221 | k  q ;
    222 | b  v } $vowel ;
    223 | b  w } $vowel;
    224 | u  v ;
    225 | u  w;
    226 | K  C ;
    227 | Ph  F ;
    228 | I  J ;
    229 | K  Q ;
    230 | B  V  } $vowel ;
    231 | B  W  } $vowel ;
    232 | U  V ;
    233 | U  W ;
    234 $rough } $ignore [:UppercaseLetter:]  H ;
    235 $ignore [:UppercaseLetter:] { $rough  H ;
    236 $rough  H ;
    237 $rough  h ;
    238 # Completeness for Greek
    239   |  ;
    240   |  ;
    241   |  ;
    242   |  ;
    243   |  ;
    244   |  ;
    245   |  ;
    246   |  ;
    247   | ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
    248   j ;
    249   |  ;
    250   |  ;
    251   |  ;
    252   i;
    253 # delete any trailing ' marks used for roundtripping
    254  [] { \' } [Ss] ;
    255  [] { \' } $egammaLike ;
    256 ::NFC (NFD) ;
    257 # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
    258 # ([\u0000-\u007F  [:Latin:] [:nonspacing mark:]]) ;
    259 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
    260 :: ( [':?A-Za-z---------------------------------------------------------------] ) ;
    261 			]]></tRule>
    262 		</transform>
    263 	</transforms>
    264 </supplementalData>
    265