1 <?xml version="1.0" encoding="UTF-8" ?> 2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3 <!-- 4 Copyright 1991-2013 Unicode, Inc. 5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6 For terms of use, see http://www.unicode.org/copyright.html 7 --> 8 <supplementalData> 9 <version number="$Revision: 12263 $"/> 10 <transforms> 11 <transform source="Grek" target="Latn" direction="both" alias="Greek-Latin und-Latn-t-und-grek" backwardAlias="Latin-Greek und-Grek-t-und-latn"> 12 <tRule><![CDATA[ 13 # Rules are predicated on running NFD first, and NFC afterwards 14 # :: [\u0000-\u007F \u0370- [:Greek:] [:nonspacing mark:]] ; 15 # MINIMAL FILTER GENERATED FOR: Greek-Latin 16 :: [;------------------\u07FB-------------------------------] ; 17 :: NFD (NFC) ; 18 # TEST CASES 19 # 20 # 21 # 22 # 23 # 24 # 25 # , , , , , 26 # Useful variables 27 $lower = [[:latin:][:greek:] & [:Ll:]]; 28 $glower = [[:greek:] & [:Ll:]]; 29 $upper = [[:latin:][:greek:] & [:Lu:]] ; 30 $accent = [:M:] ; 31 # NOTE: restrict to just the Greek & Latin accents that we care about 32 # TODO: broaden out once interation is fixed 33 $accentMinus = [ [-] & [:M:] - []] ; 34 $macron = ; 35 $ddot = ; 36 $ddotmac = [$ddot$macron]; 37 $lcgvowel = [] ; 38 $ucgvowel = [] ; 39 $gvowel = [$lcgvowel $ucgvowel] ; 40 $lcgvowelC = [$lcgvowel $accent] ; 41 $evowel = [aeiouyAEIOUY]; 42 $evowel2 = [iuyIUY]; 43 $vowel = [ $evowel $gvowel] ; 44 $gammaLike = [] ; 45 $egammaLike = [GKXCgkxc] ; 46 $smooth = ; 47 $rough = ; 48 $iotasub = ; 49 $evowel_i = [$evowel-[iI]] ; 50 $evowel2_i = [uyUY]; 51 $underbar = ; 52 $afterLetter = [:L:] [[:M:]\']* ; 53 $beforeLetter = [[:M:]\']* [:L:] ; 54 $beforeLower = $accent * $lower ; 55 $notLetter = [^[:L:][:M:]] ; 56 $under = ; 57 # Fix punctuation 58 # preserve original 59 \: \: $under ; 60 \? \? $under ; 61 \; \? ; 62 \: ; 63 # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve 64 ; 65 # IOTA: convert iota subscript to iota 66 # first make previous alpha long! 67 $accent_minus = [[$accent]-[$iotasub$macron]]; 68 } $accent_minus * $iotasub | $macron ; 69 } $accent_minus * $iotasub | $macron ; 70 # now convert to uppercase if after uppercase, ow to lowercase 71 $upper $accent * { $iotasub I ; 72 $iotasub i ; 73 | $1 $iotasub ($evowel $macron $accentMinus *) i ; 74 | $1 $iotasub ($evowel $macron $accentMinus *) I ; 75 # BREATHING 76 # Convert rough breathing to h, and move before letters. 77 # Make A ` x = H a x 78 ($macron?) $rough } $beforeLower H | $1; 79 $rough } $beforeLower H | ; 80 $rough } $beforeLower H | ; 81 ($ddot?) $rough } $beforeLower H | $1; 82 $rough } $beforeLower H | ; 83 $rough } $beforeLower H | ; 84 ($ddot?) $rough } $beforeLower H | $1; 85 # Make A x ` = H a x 86 ($glower $macron?) $rough H | $1 ; 87 ($glower) $rough H | $1 ; 88 ($glower) $rough H | $1 ; 89 ($glower $ddot?) $rough H | $1 ; 90 ($glower) $rough H | $1 ; 91 ($glower) $rough H | $1 ; 92 ($glower $ddot?) $rough H | $1 ; 93 #Otherwise, make x ` into h x and X ` into H X 94 ($lcgvowel + $ddotmac? ) $rough h | $1 ; 95 ($gvowel + $ddotmac? ) $rough H | $1 ; 96 # Go backwards with H 97 | $1 $rough h ($evowel $macron $ddot? $evowel2_i $macron?) ; 98 | $1 $rough h ($evowel $ddot? $evowel2 $macron?) ; 99 | $1 $rough h ($evowel $macron? $ddot?) ; 100 | $1 $rough H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; 101 | $1 $rough H ([AEIOUY] $ddot? $evowel2 $macron?) ; 102 | $1 $rough H ([AEIOUY] $macron? $ddot?) ; 103 # titlecase, have to fix individually 104 # in the future, we should add &uppercase() to make this easier 105 | A $1 $rough H a ($macron $ddot? $evowel2_i $macron?) ; 106 | E $1 $rough H e ($macron $ddot? $evowel2_i $macron?) ; 107 | I $1 $rough H i ($macron $ddot? $evowel2_i $macron?) ; 108 | O $1 $rough H o ($macron $ddot? $evowel2_i $macron?) ; 109 | U $1 $rough H u ($macron $ddot? $evowel2_i $macron?) ; 110 | Y $1 $rough H y ($macron $ddot? $evowel2_i $macron?) ; 111 | A $1 $rough H a ($ddot? $evowel2 $macron?) ; 112 | E $1 $rough H e ($ddot? $evowel2 $macron?) ; 113 | I $1 $rough H i ($ddot? $evowel2 $macron?) ; 114 | O $1 $rough H o ($ddot? $evowel2 $macron?) ; 115 | U $1 $rough H u ($ddot? $evowel2 $macron?) ; 116 | Y $1 $rough H y ($ddot? $evowel2 $macron?) ; 117 | A $1 $rough H a ($macron? $ddot? ) ; 118 | E $1 $rough H e ($macron? $ddot? ) ; 119 | I $1 $rough H i ($macron? $ddot? ) ; 120 | O $1 $rough H o ($macron? $ddot? ) ; 121 | U $1 $rough H u ($macron? $ddot? ) ; 122 | Y $1 $rough H y ($macron? $ddot? ) ; 123 # Now do smooth 124 #delete smooth breathing for Latin 125 $smooth ; 126 # insert in Greek 127 # the assumption is that all Marks are on letters. 128 | $1 $smooth $notLetter { ([rR]) } [^hH$smooth$rough] ; 129 | $1 $smooth $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; 130 | $1 $smooth $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; 131 # TODO: preserve smooth/rough breathing if not 132 # on initial vowel sequence 133 # need to have these up here so the rules don't mask 134 # remove now superfluous macron when returning 135 A $macron ; 136 a $macron ; 137 e $macron ; 138 E $macron ; 139 ph ; 140 } $beforeLower Ps ; 141 PS ; 142 } $beforeLower Ph ; 143 PH ; 144 ps ; 145 o $macron ; 146 O $macron; 147 # NORMAL 148 a ; 149 A ; 150 b ; 151 B ; 152 } $gammaLike n } $egammaLike ; 153 g ; 154 } $gammaLike N } $egammaLike ; 155 G ; 156 d ; 157 D ; 158 e ; 159 E ; 160 z ; 161 Z ; 162 th ; 163 } $beforeLower Th ; 164 TH ; 165 i ; 166 I ; 167 k ; 168 K ; 169 l ; 170 L ; 171 m ; 172 M ; 173 } $gammaLike n\' ; 174 n ; 175 } $gammaLike N\' ; 176 N ; 177 x ; 178 X ; 179 o ; 180 O ; 181 p ; 182 P ; 183 $rough rh; 184 $rough } $beforeLower Rh ; 185 $rough RH ; 186 r ; 187 R ; 188 # insert separator before things that turn into s 189 [Pp] { } [] \' ; 190 # special S variants 191 S ; # GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 192 s ; # GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 193 S ; # GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 194 s ; # GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 195 # underbar means exception 196 # before a letter, initial 197 } $beforeLetter s $underbar } $beforeLetter; 198 } $beforeLetter s } $beforeLetter; 199 # otherwise, after a letter = final 200 $afterLetter { $afterLetter { s $underbar; 201 $afterLetter { $afterLetter { s ; 202 # otherwise (isolated) = initial 203 s $underbar; 204 s ; 205 # [Pp] { \'S ; 206 S ; 207 t ; 208 T ; 209 $vowel { } u ; 210 y ; 211 $vowel { U ; 212 Y ; 213 ch ; 214 } $beforeLower Ch ; 215 CH ; 216 # Completeness for ASCII 217 $ignore = [[:Mark:]''] * ; 218 | k c ; 219 | ph f ; 220 | i j ; 221 | k q ; 222 | b v } $vowel ; 223 | b w } $vowel; 224 | u v ; 225 | u w; 226 | K C ; 227 | Ph F ; 228 | I J ; 229 | K Q ; 230 | B V } $vowel ; 231 | B W } $vowel ; 232 | U V ; 233 | U W ; 234 $rough } $ignore [:UppercaseLetter:] H ; 235 $ignore [:UppercaseLetter:] { $rough H ; 236 $rough H ; 237 $rough h ; 238 # Completeness for Greek 239 | ; 240 | ; 241 | ; 242 | ; 243 | ; 244 | ; 245 | ; 246 | ; 247 | ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 248 j ; 249 | ; 250 | ; 251 | ; 252 i; 253 # delete any trailing ' marks used for roundtripping 254 [] { \' } [Ss] ; 255 [] { \' } $egammaLike ; 256 ::NFC (NFD) ; 257 # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; 258 # ([\u0000-\u007F [:Latin:] [:nonspacing mark:]]) ; 259 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD 260 :: ( [':?A-Za-z---------------------------------------------------------------] ) ; 261 ]]></tRule> 262 </transform> 263 </transforms> 264 </supplementalData> 265