common/transforms/es_FONIPA-zh.xml

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright  1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
	<version number="$Revision: 12243 $"/>
	<transforms>
		<transform source="es_FONIPA" target="zh" direction="forward" alias="zh-t-es-fonipa">
			<tRule><![CDATA[
# Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in
# phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese.

$word_boundary = [-\ $];
$vowel = [aeijouw];       # Vowels and glides
$not_vowel = [^$vowel];

# First pass: Collapse phonetic distinctions not preserved in Mandarin.

  | d;
  | g;
  | n;
  | s;
  | r;

ff  f ;
kk  k ;
mm  m ;
nn  n ;
pp  p ;
tt  t ;
t   ;

aa  a ;
oi  oi ;
oo  o ;
uu  u ;

[^dgktx] { ei  e ;
[^-\ .$] { eu  eu ;
[^-\ .$] { ou  o;
[^j]     { ui  wi ;

[^$word_boundary] { m } [bp]  n;  # GB/T 17693.5-2009, 5.3.2
s[s]  s;               # GB/T 17693.5-2009, 5.3.4
[^] { jo  io;          # GB/T 17693.5-2009  1,  7

::Null;

j } an $not_vowel  i ;  # GB/T 17693.5-2009  1,  8


# GB/T 17693.5-2009  1,  8 also says that <uai> should be treated as if
# it was <u> plus <ai>.  This is not borne out by the observed data, which
# suggests that <ua> plus <i> is the more appropriate choice in some
# situations.

[g.$] { wai  wai ;
wai  uai ;
[g.$] { wau  wau ;
wau  uau ;
jau  iau ;

# Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one.

[^jw] { ao } [^n]      au ;
[^jw] { ao } n $vowel  au ;

# Main pass: Phoneme to Hanzi conversion.
# This generally follows GB/T 17693.5-2009  1, unless otherwise noted.
::Null;

'.'  ;
ai   ;
an } $not_vowel   ;
au   ;
a   ;
bai   ;
ban } $not_vowel   ;
bau   ;
ba   ;
ben } $not_vowel   ;
be   ;
bin } $not_vowel   ;
bi   ;
bja   ;
bjen } $not_vowel   ;
bje   ;
bju   ;
bon } $not_vowel   ;
bo   ;
bun } $not_vowel   ;
bu   ;
bwan } $not_vowel   ;
bwa   ;
bwen } $not_vowel   ;  # Should be be , per GB/T 17693.5-2009  1.
bwe   ;
bwin } $not_vowel   ;  # Nonstandard, but fits observed data.
bwi   ;
bwo   ;
b   ;
ai   ;
an } $not_vowel   ;
au   ;
a   ;
en } $not_vowel   ;
e   ;
in } $not_vowel   ;
i   ;
ja   ;
jen } $not_vowel   ;
je   ;
ju   ;
on } $not_vowel   ;
o   ;
un } $not_vowel   ;
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
wi   ;
wo   ;
   ;
dai   ;
dan } $not_vowel   ;
dau   ;
da   ;
dei   ;
den } $not_vowel   ;
de   ;
din } $not_vowel   ;
di   ;
dja   ;
djen } $not_vowel   ;
dje   ;
dju   ;
don } $not_vowel   ;
do   ;
dun } $not_vowel   ;
du   ;
dwan } $not_vowel   ;
dwa   ;
dwen } $not_vowel   ;
dwe   ;
dwi   ;
dwo   ;
d } $word_boundary  ;
d   ;
ei   ;
en } $not_vowel   ;
eu   ;
e   ;
fai   ;
fan } $not_vowel   ;
fau   ;
fa   ;
fe   ;
fin } $not_vowel   ;
fi   ;
fja   ;
fjen } $not_vowel   ;
fje   ;
fju   ;
fon } $not_vowel   ;
fo   ;
fun } $not_vowel   ;
fu   ;
fwan } $not_vowel   ;
fwa   ;
fwen } $not_vowel   ;
fwe   ;
fwi   ;
fwo   ;


# The choice of  vs.  sounds simple according to the GB/T standard, but the
# data suggest otherwise.  Ideally,  should occur at the beginning of a
# morpheme (e.g. in "villafranca" ) and  everywhere else.  Since
# we don't have morpheme boundaries, we'll fudge it by writing  at the end of
# a word and  everywhere else.

f } $word_boundary   ;
f   ;

gai   ;
gan } $not_vowel   ;
gau   ;
ga   ;
gei   ;
gen } $not_vowel   ;
ge   ;
gin } $not_vowel   ;
gi   ;
gja   ;
gjen } $not_vowel   ;
gje   ;
gju   ;
gon } $not_vowel   ;
go   ;
gun } $not_vowel   ;
gu   ;
gwan } [$]   ;        # Nonstandard, but fits observed data.
gwan } $not_vowel   ;
gwa   ;
gwen } $not_vowel   ;
gwe   ;
gwi   ;
gwo   ;
g   ;
in } $not_vowel   ;
i   ;
ai   ;
an } $not_vowel   ;
au   ;
a   ;
en } $not_vowel   ;
e   ;
in } $not_vowel   ;
i   ;
on } $not_vowel   ;
o   ;
un } $not_vowel   ;
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
wi   ;
wo   ;
   ;
kai   ;
kan } $not_vowel   ;
kau   ;
ka   ;
kei   ;
ken } $not_vowel   ;
ke   ;
kin } $not_vowel   ;
ki   ;
kja   ;
kjen } $not_vowel   ;
kje   ;
kju   ;
kon } $not_vowel   ;
ko   ;
kun } $not_vowel   ;
ku   ;
kwan } $not_vowel   ;
kwa   ;
kwen } $not_vowel   ;
kwe   ;
kwin } $not_vowel   ;
kwi   ;
kwo   ;
k   ;
lae } [^n]   ;
lai   ;
lan } $not_vowel   ;
lau   ;
la   ;
len } $not_vowel   ;
le   ;
lin } $not_vowel   ;
li   ;
lja   ;
ljen } $not_vowel   ;
lje   ;
lju   ;
lon } $not_vowel   ;
lo   ;
lun } $not_vowel   ;
lu   ;
lwan } $not_vowel   ;
lwa   ;
lwen } $not_vowel   ;
lwe   ;
lwi   ;
lwo   ;
l   ;
an } $not_vowel   ;
au   ;
a   ;
en } $not_vowel   ;
e   ;
in } $not_vowel   ;
i   ;
on } $not_vowel   ;
o   ;
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
wi   ;
wo   ;
   ;
mai   ;
man } $not_vowel   ;
martin   ;
mau   ;
ma   ;
men } $not_vowel   ;
me   ;
min } $not_vowel   ;
mi   ;
mja   ;
mjen } $not_vowel   ;
mje   ;
mju   ;
mon } $not_vowel   ;
mo   ;
mun } $not_vowel   ;
mu   ;
mwan } $not_vowel   ;
mwa   ;
mwen } $not_vowel   ;
mwe   ;
mwin } $not_vowel   ;  # Nonstandard, but fits observed data.
mwi   ;
mwo   ;
m   ;
nai   ;
nan } $not_vowel   ;
nau   ;
na   ;
nen } $not_vowel   ;
ne   ;
nin } $not_vowel   ;
ni   ;
nja   ;
njen } $not_vowel   ;
nje   ;
nju   ;
non } $not_vowel   ;
no   ;
nun } $not_vowel   ;
nu   ;
nwan } $not_vowel   ;
nwa   ;
nwen } $not_vowel   ;
nwe   ;
nwi   ;
nwo   ;
n   ;
an } $not_vowel   ;
au   ;
a   ;
en } $not_vowel   ;
e   ;
in } $not_vowel   ;
i   ;
on } $not_vowel   ;
o   ;
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
wi   ;
wo   ;
on } $not_vowel   ;
ou   ;
o   ;
pai   ;
pan } $not_vowel   ;
pau   ;
pa   ;
pen } $not_vowel   ;
pe   ;
pin } $not_vowel   ;
pi   ;
pja   ;
pjen } $not_vowel   ;
pje   ;
pju   ;
pon } $not_vowel   ;
po   ;
pun } $not_vowel   ;
pu   ;
pwan } $not_vowel   ;
pwa   ;
pwen } $not_vowel   ;
pwe   ;
pwi   ;
pwo   ;
p   ;
rai   ;
ran } $not_vowel   ;
rau   ;
ra   ;
ren } $not_vowel   ;
re   ;
rin } $not_vowel   ;
ri   ;
rja   ;
rjen } $not_vowel   ;
rje   ;
rju   ;
ron } $not_vowel   ;
ro   ;
run } $not_vowel   ;
ru   ;
rwan } $not_vowel   ;
rwa   ;
rwen } $not_vowel   ;
rwe   ;
rwi   ;
rwo   ;
r  R ;
sai   ;
san } $not_vowel   ;
sau   ;
sa   ;
sen } $not_vowel   ;
se   ;
sin } $not_vowel   ;
si   ;
sja   ;
sjen } $not_vowel   ;
sje   ;
sju   ;
son } $not_vowel   ;
so   ;
sun } $not_vowel   ;
su   ;
swan } $not_vowel   ;
swa   ;
swen } $not_vowel   ;
swe   ;
swi   ;
swo   ;
s   ;
tai   ;
tan } $not_vowel   ;
tau   ;
ta   ;
tei   ;
ten } $not_vowel   ;
te   ;
tin } $not_vowel   ;
ti   ;
tja   ;
tjen } $not_vowel   ;
tje   ;
tju   ;
ton } $not_vowel   ;
to   ;

# The rules for /ts/ (tz in the orthography) are nonstandard and derived
# entirely from the observed data.  They apply mostly to native toponyms
# in Mexico.

tsa   ;
tsen } $not_vowel   ;
tse   ;
tsin } $not_vowel   ;
tsi   ;
tso   ;
tsun } $not_vowel   ;
tsu   ;
ts   ;

tun } $not_vowel   ;
tu   ;
twan } $not_vowel  	;
twa   ;
twen } $not_vowel   ;
twe   ;
twi   ;
two   ;
t   ;
ai   ;
an } $not_vowel   ;
au   ;
a   ;
en } $not_vowel   ;
e   ;
in } $not_vowel   ;
i   ;
jan } $not_vowel   ;
ja   ;
jen } $not_vowel   ;
je   ;
jon } $not_vowel   ;
ju   ;
on } $not_vowel   ;
o   ;
un } $not_vowel   ;  # Should be , per GB/T 17693.5-2009  1.
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
wi   ;
wo   ;
   ;
un } $not_vowel   ;
u   ;
wan } $not_vowel   ;
wa   ;
wen } $not_vowel   ;
we   ;
win } $not_vowel   ;
wi   ;
won } $not_vowel   ;  # Unseen.
wo   ;
xai   ;
xan } $not_vowel   ;
xau   ;
xa   ;
xei   ;
xen } $not_vowel   ;
xe   ;
xin } $not_vowel   ;
xi   ;
xja   ;
xjen } $not_vowel   ;
xje   ;
xju   ;
xon } $not_vowel   ;
xo   ;
xun } $not_vowel   ;
xu   ;
xwan } $not_vowel   ;
xwa   ;
xwen } $not_vowel   ;
xwe   ;
xwi   ;
xwo   ;
x   ;

#  simplification pass.  The idea is to drop most occurences of
# corresponding to <r> (not to <l> or <ll>) from a word if there is another /l/
# sound nearby.  There is a vague pattern like this in the data, but the details
# remain to be determined.  At the moment, this does nothing, it just puts  in
# for every <r> in a syllable coda.

::Null;
$r = [R];
#
#
# R } . $r  ;
# R } .. $r  ;
# R } ... $r  ;
# R } .... $r  ;

R   ;

# Dong-nan-xi-hai pass.  Per GB/T 17693.5-2009  1,  4, replace confusing
# characters at the beginning and end of a word.

::Null;
$word_boundary {    ;
$word_boundary {    ;
$word_boundary {    ;
 } $word_boundary   ;

::NFC;
			]]></tRule>
		</transform>
	</transforms>
</supplementalData>