common/transforms/it-ja.xml

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright  1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
	<version number="$Revision: 12243 $"/>
	<transforms>
		<transform source="it" target="ja" direction="forward" alias="ja-t-it">
			<tRule>
# Italian to Katakana Transliteration Table for ICU
# Based on:
#   "" (, 1974. ISBN:978-4475017176)
#   http://ja.wikipedia.org/wiki/%E3%82%A4%E3%82%BF%E3%83%AA%E3%82%A2%E8%AA%9E

::NFD(NFC);
::Lower();
::[:Latin:] fullwidth-halfwidth();
#
#
# Variables.

$vowel = [aeiou];
$consonant = [bcdfghjklmnpqrstvwxyz];
#
#
# Ignore apostrophe.

($consonant) \'  | $1;
\'  ;
#
#

cqu  ;
cc   | c;
ca  ;
 { cia  ;
cio  ;
ci  ;
cu  ;
ce  ;
co  ;
#
#

cha  ;
chi  ;
chu  ;
che  ;
cho  ;
#
#

gg   | g;
ghi  ;
ghe  ;
ghu  ;
gli  | li;
gna  ;
gni  ;
gnu  ;
gne  ;
gno  ;
#
#

ga  ;
gia  ;
giu  ;
gio  ;
gi  ;
gu  ;
ge  ;
go  ;
#
#

rr   | r;
ra  ;
ri  ;
ru  ;
re  ;
ro  ;
#
#

ll   | l;
la  ;
li  ;
lu  ;
le  ;
lo  ;
#
#

tt   | t;
ta  ;
ti  ;
thi  ;
tu  ;
thu  ;
te  ;
the  ;
to  ;
tho  ;
tzu  | ;
tz  | zz;
#
#

dd   | d;
da  ;
di  ;
du  ;
de  ;
do  ;
#
#

ma  ;
mi  ;
mu  ;
me  ;
mo  ;
m } $consonant  ;
#
#

na  ;
ni  ;
nu  ;
ne  ;
no  ;
#
#

ff   | f;
fa  ;
fi  ;
fu  ;
fe  ;
fo  ;
#
#

bb   | b;
ba  ;
bi  ;
bu  ;
be  ;
bo  ;
#
#

pp   | p;
pa  ;
pi  ;
pu  ;
pe  ;
po  ;
#
#

vv   | v;
va  ;
vi  ;
vu  ;
ve  ;
vo  ;
#
#

sa } nt[ao]  ;
ss   | \~s;
#
#
# 's' is voiced before [bdglmnrv].

sb   | b;
sd   | d;
sg   | g;
sl   | l;
sm   | m;
sn   | n;
sr   | r;
sv   | v;
#
#
# Force 's' after a consonat to be unvoiced.

($consonant) s } $vowel  | $1 \~ s;
\~sa  ;
\~si  ;
\~su  ;
\~se  ;
\~so  ;
#
#
# 's' at the beginning is usually unvoiced.

[:^Letter:] { sa  ;
[:^Letter:] { si  ;
[:^Letter:] { su  ;
[:^Letter:] { se  ;
[:^Letter:] { so  ;
#
#
# Otherwise voiced 's' are common.

sa  ;
si  ;
su  ;
se  ;
so  ;
#
#

scia  ;
sci  ;
sce  ;
#
#

zz   | \~z;
#
# Force 'z' after a consonat to be unvoiced.

($consonant) z  | $1 \~z;
\~za  ;
\~zi  ;
\~zu  ;
\~ze  ;
\~zo  ;
#
#
# Otherwise voiced 'z' are common except for 'zi'.

za  ;
[:^Letter:] { zi  ;
zi  ;
zu  ;
ze  ;
zo  ;
#
#

ja  ;
je  ;
j  | i;
#
#
# Standalone vowels and consonants.

a  ;
i  ;
u  ;
e  ;
o  ;
#
#

b  ;
c  ;
d  ;
f  ;
g  ;
h  ;
k  | c;
l  ;
m  ;
n  ;
p  ;
q  | c;
r  ;
s  ;
t  ;
v  ;
x  | cs;
y  | i;
z  ;
#
#
# word delimiter of transliterated foreign phrase is ''.

' '  ;
#
#
# Latin hyphen should be transliterated to U+30A0 (KATAKANA-HIRAGANA
# DOUBLE HYPHEN), ideally. But since the character isn't supported by
# many fonts or softwares, we use U+FF1D (FULLWIDTH EQUALS SIGN),
# which is widely used as "double hyphen".
#

\-  ;
#
#

[:nonspacing mark:]  ;
::NFC(NFD);
			</tRule>
		</transform>
	</transforms>
</supplementalData>