common/transforms/Greek-Latin.xml

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright  1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
	<version number="$Revision: 12263 $"/>
	<transforms>
		<transform source="Grek" target="Latn" direction="both" alias="Greek-Latin und-Latn-t-und-grek" backwardAlias="Latin-Greek und-Grek-t-und-latn">
			<tRule><![CDATA[
# Rules are predicated on running NFD first, and NFC afterwards
# :: [\u0000-\u007F \u0370- [:Greek:] [:nonspacing mark:]] ;
# MINIMAL FILTER GENERATED FOR: Greek-Latin
:: [;------------------\u07FB-------------------------------] ;
:: NFD (NFC) ;
# TEST CASES
#
#
#
#
#
#
# , , , , ,
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]];
$glower = [[:greek:] & [:Ll:]];
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [:M:] ;
# NOTE: restrict to just the Greek & Latin accents that we care about
# TODO: broaden out once interation is fixed
$accentMinus = [ [-] & [:M:] - []] ;
$macron =  ;
$ddot =  ;
$ddotmac = [$ddot$macron];
$lcgvowel = [] ;
$ucgvowel = [] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$evowel2 = [iuyIUY];
$vowel = [ $evowel $gvowel] ;
$gammaLike = [] ;
$egammaLike = [GKXCgkxc] ;
$smooth =  ;
$rough =  ;
$iotasub =  ;
$evowel_i = [$evowel-[iI]] ;
$evowel2_i = [uyUY];
$underbar = ;
$afterLetter = [:L:] [[:M:]\']* ;
$beforeLetter = [[:M:]\']* [:L:] ;
$beforeLower = $accent * $lower ;
$notLetter = [^[:L:][:M:]] ;
$under = ;
# Fix punctuation
# preserve original
\:  \: $under ;
\?  \? $under ;
\;  \? ;
  \: ;
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
   ;
# IOTA: convert iota subscript to iota
# first make previous alpha long!
$accent_minus = [[$accent]-[$iotasub$macron]];
 } $accent_minus * $iotasub  |  $macron ;
 } $accent_minus * $iotasub  |  $macron ;
# now convert to uppercase if after uppercase, ow to lowercase
$upper $accent * { $iotasub  I ;
$iotasub  i ;
| $1 $iotasub  ($evowel $macron $accentMinus *) i ;
| $1 $iotasub  ($evowel $macron $accentMinus *) I ;
# BREATHING
# Convert rough breathing to h, and move before letters.
# Make A ` x =  H a x
 ($macron?) $rough } $beforeLower  H |  $1;
 $rough } $beforeLower  H | ;
 $rough } $beforeLower  H |  ;
 ($ddot?) $rough } $beforeLower  H |   $1;
 $rough } $beforeLower  H |  ;
 $rough } $beforeLower  H |  ;
 ($ddot?) $rough } $beforeLower  H |  $1;
# Make A x ` =  H a x
 ($glower $macron?) $rough  H |  $1 ;
 ($glower) $rough  H |  $1 ;
 ($glower) $rough  H |  $1 ;
 ($glower $ddot?) $rough  H |  $1 ;
 ($glower) $rough  H |  $1 ;
 ($glower) $rough  H |  $1 ;
 ($glower  $ddot?) $rough  H |  $1 ;
#Otherwise, make x ` into h x and X ` into H X
($lcgvowel + $ddotmac? ) $rough  h | $1 ;
($gvowel + $ddotmac? ) $rough  H | $1 ;
# Go backwards with H
| $1 $rough  h ($evowel $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough  h ($evowel $ddot? $evowel2 $macron?) ;
| $1 $rough  h ($evowel $macron? $ddot?) ;
| $1 $rough  H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough  H ([AEIOUY] $ddot? $evowel2 $macron?) ;
| $1 $rough  H ([AEIOUY] $macron? $ddot?) ;
# titlecase, have to fix individually
# in the future, we should add &uppercase() to make this easier
| A $1 $rough  H a ($macron  $ddot? $evowel2_i $macron?) ;
| E $1 $rough  H e ($macron  $ddot? $evowel2_i $macron?) ;
| I $1 $rough  H i ($macron  $ddot? $evowel2_i $macron?) ;
| O $1 $rough  H o ($macron  $ddot? $evowel2_i $macron?) ;
| U $1 $rough  H u ($macron $ddot? $evowel2_i $macron?) ;
| Y $1 $rough  H y ($macron $ddot? $evowel2_i $macron?) ;
| A $1 $rough  H a ($ddot? $evowel2 $macron?) ;
| E $1 $rough  H e ($ddot? $evowel2 $macron?) ;
| I $1 $rough  H i ($ddot? $evowel2 $macron?) ;
| O $1 $rough  H o ($ddot? $evowel2 $macron?) ;
| U $1 $rough  H u ($ddot? $evowel2 $macron?) ;
| Y $1 $rough  H y ($ddot? $evowel2 $macron?) ;
| A $1 $rough  H a ($macron? $ddot? ) ;
| E $1 $rough  H e ($macron? $ddot? ) ;
| I $1 $rough  H i ($macron? $ddot? ) ;
| O $1 $rough  H o ($macron? $ddot? ) ;
| U $1 $rough  H u ($macron? $ddot? ) ;
| Y $1 $rough  H y ($macron? $ddot? ) ;
# Now do smooth
#delete smooth breathing for Latin
$smooth  ;
# insert in Greek
# the assumption is that all Marks are on letters.
| $1 $smooth  $notLetter { ([rR]) } [^hH$smooth$rough] ;
| $1 $smooth  $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
| $1 $smooth  $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
# TODO: preserve smooth/rough breathing if not
# on initial vowel sequence
# need to have these up here so the rules don't mask
# remove now superfluous macron when returning
  A $macron ;
  a $macron ;
  e $macron ;
  E $macron ;
  ph ;
 } $beforeLower  Ps ;
  PS ;
 } $beforeLower  Ph ;
  PH ;
  ps ;
  o $macron ;
   O $macron;
# NORMAL
  a ;
  A ;
  b ;
  B ;
 } $gammaLike  n } $egammaLike ;
  g ;
 } $gammaLike  N } $egammaLike ;
  G ;
  d ;
  D ;
  e ;
  E ;
  z ;
  Z ;
  th ;
 } $beforeLower  Th ;
  TH ;
  i ;
  I ;
  k ;
  K ;
  l ;
  L ;
  m ;
  M ;
 } $gammaLike  n\' ;
  n ;
 } $gammaLike  N\' ;
  N ;
  x ;
  X ;
  o ;
  O ;
  p ;
  P ;
 $rough  rh;
 $rough } $beforeLower  Rh ;
 $rough  RH ;
  r ;
  R ;
# insert separator before things that turn into s
[Pp] { } []  \' ;
# special S variants
  S ; #  GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
  s ; # GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
  S ; #  GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
  s ; #  GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# underbar means exception
# before a letter, initial
 } $beforeLetter  s $underbar } $beforeLetter;
 } $beforeLetter  s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter {   $afterLetter { s $underbar;
$afterLetter {   $afterLetter { s ;
# otherwise (isolated) = initial
  s $underbar;
  s ;
# [Pp] {   \'S ;
  S ;
  t ;
  T ;
$vowel { }  u ;
  y ;
$vowel {   U ;
  Y ;
  ch ;
 } $beforeLower  Ch ;
  CH ;
# Completeness for ASCII
$ignore = [[:Mark:]''] * ;
| k   c ;
| ph  f ;
| i   j ;
| k  q ;
| b  v } $vowel ;
| b  w } $vowel;
| u  v ;
| u  w;
| K  C ;
| Ph  F ;
| I  J ;
| K  Q ;
| B  V  } $vowel ;
| B  W  } $vowel ;
| U  V ;
| U  W ;
$rough } $ignore [:UppercaseLetter:]  H ;
$ignore [:UppercaseLetter:] { $rough  H ;
$rough  H ;
$rough  h ;
# Completeness for Greek
  |  ;
  |  ;
  |  ;
  |  ;
  |  ;
  |  ;
  |  ;
  |  ;
  | ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
  j ;
  |  ;
  |  ;
  |  ;
  i;
# delete any trailing ' marks used for roundtripping
 [] { \' } [Ss] ;
 [] { \' } $egammaLike ;
::NFC (NFD) ;
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
# ([\u0000-\u007F  [:Latin:] [:nonspacing mark:]]) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
:: ( [':?A-Za-z---------------------------------------------------------------] ) ;
			]]></tRule>
		</transform>
	</transforms>
</supplementalData>