Home | History | Annotate | Download | only in ldml
      1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
      2    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
      3 <html xmlns="http://www.w3.org/1999/xhtml">
      4 
      5 <head>
      6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
      7 <meta http-equiv="Content-Language" content="en-us" />
      8 <meta name="VI60_defaultClientScript" content="JavaScript" />
      9 <meta name="GENERATOR" content="Microsoft FrontPage 6.0" />
     10 <meta name="keywords" content="Unicode, common locale data repository" />
     11 <meta name="ProgId" content="FrontPage.Editor.Document" />
     12 <title>Common Locale Data Repository</title>
     13 <link rel="stylesheet" type="text/css" href="http://www.unicode.org/webscripts/standard_styles.css" />
     14 <style type="text/css">
     15 <!--
     16 .major     {font-size:95%; font-family: Arial, Geneva, sans-serif; color: #808080; font-weight:bold; }
     17 .minor     {font-size:85%; font-family: Arial, Geneva, sans-serif; color: #808080; font-weight:400; }
     18 .table2           { margin-top: 1.5em; margin-bottom: 0.5em }
     19 td,th {border-color:#EEEEEE; vertical-align:top; padding:2px}
     20 th           { background-color: #CCCCCC }
     21 table {border-collapse: collapse}
     22 caption      { font-weight: bold }
     23 -->
     24 </style>
     25 </head>
     26 
     27 <body>
     28 
     29 <table width="100%" cellpadding="0" cellspacing="0" border="0">
     30 	<tr>
     31 		<td colspan="2" style="padding:0; margin:0">
     32 		<table width="100%" border="0" cellpadding="0" cellspacing="0">
     33 			<tr>
     34 				<td class="icon" style="padding:2px; margin:0"><a href="http://www.unicode.org/">
     35 				<img border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" alt="[Unicode]" width="34" height="33" /></a>&nbsp;&nbsp;
     36 				<a class="bar" href="index.html"><font size="3">Common Locale Data Repository</font></a></td>
     37 				<td class="bar" style="padding:2px; margin:0">
     38 				<a href="http://www.unicode.org" class="bar">Home</a> |
     39 				<a href="http://www.unicode.org/sitemap/" class="bar">Site Map</a> |
     40 				<a href="http://www.unicode.org/search/" class="bar">Search</a></td>
     41 			</tr>
     42 		</table>
     43 		</td>
     44 	</tr>
     45 	<tr>
     46 		<td style="padding:2px; margin:0" colspan="2" class="gray">&nbsp;</td>
     47 	</tr>
     48 	<tr>
     49 		<td style="padding:2px; margin:0" valign="top" width="25%" class="navCol">
     50 		<table class="navColTable" border="0" width="100%" cellspacing="4" cellpadding="0">
     51 			<tr>
     52 				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Contents</td>
     53 			</tr>
     54 			<tr>
     55 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
     56 				<a href="#Introduction">Introduction</a></td>
     57 			</tr>
     58 			<tr>
     59 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
     60 				<a href="#Variants">Variants</a></td>
     61 			</tr>
     62 			<tr>
     63 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
     64 				<a href="#Guidelines">Guidelines</a></td>
     65 			</tr>
     66 			<tr>
     67 				<td style="padding:2px; margin:0" valign="top" class="navColCell" width="1%">
     68 				&nbsp;</td>
     69 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     70 				<a href="#Ambiguity">Ambiguity</a></td>
     71 			</tr>
     72 			<tr>
     73 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     74 				&nbsp;</td>
     75 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     76 				<a href="#Pronunciation">Pronunciation</a></td>
     77 			</tr>
     78 			<tr>
     79 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     80 				&nbsp;</td>
     81 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     82 				<a href="#Cautions">Cautions</a></td>
     83 			</tr>
     84 			<tr>
     85 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
     86 				<a href="#Available_Transliterations">Available Transliterations</a></td>
     87 			</tr>
     88 			<tr>
     89 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     90 				&nbsp;</td>
     91 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     92 				<a href="#Korean">Korean</a></td>
     93 			</tr>
     94 			<tr>
     95 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
     96 				&nbsp;</td>
     97 				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Japanese">Japanese</a></td>
     98 			</tr>
     99 			<tr>
    100 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
    101 				&nbsp;</td>
    102 				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Greek">Greek</a></td>
    103 			</tr>
    104 			<tr>
    105 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
    106 				&nbsp;</td>
    107 				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Cyrillic">Cyrillic</a></td>
    108 			</tr>
    109 			<tr>
    110 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
    111 				&nbsp;</td>
    112 				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Indic">Indic</a></td>
    113 			</tr>
    114 			<tr>
    115 				<td style="padding:2px; margin:0" valign="top" class="navColCell">
    116 				&nbsp;</td>
    117 				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Others">Others</a></td>
    118 			</tr>
    119 			<tr>
    120 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    121 				<a href="#Submitting_Transliterations">Submitting Transliterations</a></td>
    122 			</tr>
    123 			<tr>
    124 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    125 				<a href="#More_Information">More Information</a></td>
    126 			</tr>
    127 			<tr>
    128 				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Unicode CLDR</td>
    129 			</tr>
    130 			<tr>
    131 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    132 				<a href="index.html">CLDR Project</a></td>
    133 			</tr>
    134 			<tr>
    135 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    136 				<a href="repository_access.html">CLDR Releases (Downloads)</a></td>
    137 			</tr>
    138 			<tr>
    139 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    140 				<a href="survey_tool.html">CLDR Survey Tool</a></td>
    141 			</tr>
    142 			<tr>
    143 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    144 				<a href="filing_bug_reports.html">CLDR Bug Reports</a></td>
    145 			</tr>
    146 			<tr>
    147 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    148 				<a href="comparison_charts.html">CLDR Charts</a></td>
    149 			</tr>
    150 			<tr>
    151 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    152 				<a href="process.html">CLDR Process</a></td>
    153 			</tr>
    154 			<tr>
    155 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    156 				<a href="http://www.unicode.org/reports/tr35/">UTS #35: Locale Data Markup Language 
    157 				(LDML)</a></td>
    158 			</tr>
    159 			<tr>
    160 				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Related Links</td>
    161 			</tr>
    162 			<tr>
    163 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">Join the
    164 				<a href="http://www.unicode.org/consortium/consort.html">Unicode Consortium</a></td>
    165 			</tr>
    166 			<tr>
    167 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    168 				<a href="http://www.unicode.org/reports/">Unicode Technical Reports</a></td>
    169 			</tr>
    170 			<tr>
    171 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    172 				<a href="http://www.unicode.org/faq/reports_process.html">Technical Reports Development 
    173 				and Maintenance Process</a></td>
    174 			</tr>
    175 			<tr>
    176 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    177 				<a href="http://www.unicode.org/consortium/utc.html">Unicode Technical Committee</a></td>
    178 			</tr>
    179 			<tr>
    180 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    181 				<a href="http://www.unicode.org/versions/">Versions of the Unicode Standard</a></td>
    182 			</tr>
    183 			<tr>
    184 				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Other Publications</td>
    185 			</tr>
    186 			<tr>
    187 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    188 				<a href="http://www.unicode.org/standard/standard.html">The Unicode Standard</a></td>
    189 			</tr>
    190 			<tr>
    191 				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
    192 				<a href="http://www.unicode.org/notes/">Unicode Technical Notes</a></td>
    193 			</tr>
    194 		</table>
    195 		<!-- BEGIN CONTENTS --></td>
    196 		<td>
    197 		<table>
    198 			<tr>
    199 				<td class="contents" valign="top">
    200 				<div class="body">
    201 					<h1 align="center">Unicode Transliteration Guidelines</h1><br />
    202 					<blockquote>
    203 						<p><i>This document describes guidelines for the creation and use of CLDR 
    204 					transliterations. Preliminary
    205 						<a href="http://www.unicode.org/cldr/data/charts/transforms/index.html">charts</a> 
    206 					are available for the available transliterations -- be sure to read the known issues 
    207 					there. Please file any feedback 
    208 					on this document or those charts at
    209 						<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a>.</i></p>
    210 					</blockquote>
    211 					<h2><a name="Introduction">Introduction</a></h2>
    212 					<table border="1" width="33%" id="table21" cellspacing="0" cellpadding="2" style="border-collapse: collapse; float: right; margin:1em; border-color:#BB0000">
    213 						<tr>
    214 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    215 							<font size="2"><i><b>Display. </b></i>Some of the characters in this 
    216 							document may not be visible in your browser, and with some fonts the diacritics 
    217 							will not be correctly placed on the base letters. See
    218 							<a href="http://www.unicode.org/help/display_problems.html">Display Problems</a>.</font></td>
    219 						</tr>
    220 					</table>
    221 					<p>Transliteration is the general process of converting characters from one script 
    222 					to another, where the result is roughly phonetic for languages in the target script. 
    223 					For example, &quot;Phobos&quot; and &quot;Deimos&quot; are transliterations of Greek mythological &quot;&quot; 
    224 					and &quot;&quot; into Latin letters, used to name the moons of Mars.</p>
    225 					<p>Transliteration is <i>not</i> translation. Rather, transliteration is the conversion 
    226 					of letters from one script to another without translating the underlying words. 
    227 					The following shows a sample of transliteration systems:</p>
    228 					<table id="table20" style="border-collapse: collapse" border="1" cellspacing="0" cellpadding="2">
    229 						<caption>Sample Transliteration Systems</caption>
    230 						<tr>
    231 							<th width="25%" style="vertical-align: top">Source</th>
    232 							<th width="25%" style="vertical-align: top">Translation</th>
    233 							<th style="vertical-align: top" width="25%">Transliteration</th>
    234 							<th width="25%" style="vertical-align: top">System</th>
    235 						</tr>
    236 						<tr>
    237 							<td bgcolor="#cccccc" style="vertical-align: top" rowspan="2"></td>
    238 							<td bgcolor="#cccccc" style="vertical-align: top"><i>Alphabetic</i></td>
    239 							<td bgcolor="#cccccc" style="vertical-align: top">Alphabtiks</td>
    240 							<td bgcolor="#cccccc" style="vertical-align: top">Classic</td>
    241 						</tr>
    242 						<tr>
    243 							<td bgcolor="#cccccc" style="vertical-align: top">&nbsp;</td>
    244 							<td bgcolor="#cccccc" style="vertical-align: top">Alfavitiks</td>
    245 							<td bgcolor="#cccccc" style="vertical-align: top">UNGEGN</td>
    246 						</tr>
    247 						<tr>
    248 							<td style="vertical-align: top" rowspan="2"></td>
    249 							<td style="vertical-align: top" rowspan="2"><i>new bridge<br />(district in Tokyo)</i></td>
    250 							<td style="vertical-align: top">shimbashi</td>
    251 							<td style="vertical-align: top">Hepburn</td>
    252 						</tr>
    253 						<tr>
    254 							<td style="vertical-align: top">sinbasi</td>
    255 							<td style="vertical-align: top">Kunrei</td>
    256 						</tr>
    257 						<tr>
    258 							<td style="vertical-align: top" rowspan="3"> </td>
    259 							<td style="vertical-align: top" rowspan="3"><i>Faberg eggs</i></td>
    260 							<td style="vertical-align: top">yaytsa Faberzhe</td>
    261 							<td style="vertical-align: top">BGN/PCGN</td>
    262 						</tr>
    263 						<tr>
    264 							<td style="vertical-align: top">jajca Fabere</td>
    265 							<td style="vertical-align: top">Scholarly</td>
    266 						</tr>
    267 						<tr>
    268 							<td style="vertical-align: top">jca Fabere</td>
    269 							<td style="vertical-align: top">ISO</td>
    270 						</tr>
    271 					</table>
    272 					<p>While an English speaker may 
    273 					not recognize that the Japanese word <i>kyanpasu</i> is equivalent 
    274 					to the English word <i>campus</i>, the word <i>kyanpasu</i> is still far easier 
    275 					to recognize and interpret than if the letters were left in the original script. 
    276 					There are several situations where this transliteration is especially useful, 
    277 					such as the following. See the sidebar for examples.</p>
    278 					<table id="table22" cellpadding="2" style="margin:1em; border-collapse: collapse" border="1" align="right">
    279 						<caption><b>Sample Transliterations</b></caption>
    280 						<tr>
    281 							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Source</th>
    282 							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Transliteration</th>
    283 						</tr>
    284 						<tr>
    285 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, 
    286 							</td>
    287 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Gim, 
    288 							Gugsam </td>
    289 						</tr>
    290 						<tr>
    291 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, 
    292 							</td>
    293 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Gim, 
    294 							Myeonghyi </td>
    295 						</tr>
    296 						<tr>
    297 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, 
    298 							</td>
    299 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Jeong, Byeongho
    300 							</td>
    301 						</tr>
    302 						<tr>
    303 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
    304 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
    305 						</tr>
    306 						<tr>
    307 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, </td>
    308 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Takeda, Masayuki 
    309 							</td>
    310 						</tr>
    311 						<tr>
    312 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, </td>
    313 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Masuda, Yoshihiko 
    314 							</td>
    315 						</tr>
    316 						<tr>
    317 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">,   
    318 							</td>
    319 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Yamamoto, Noboru 
    320 							</td>
    321 						</tr>
    322 						<tr>
    323 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
    324 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
    325 						</tr>
    326 						<tr>
    327 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, </td>
    328 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Rots, nna</td>
    329 						</tr>
    330 						<tr>
    331 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, </td>
    332 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Kalods, Chrstos</td>
    333 						</tr>
    334 						<tr>
    335 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">, </td>
    336 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Theodrtou, Eln</td>
    337 						</tr>
    338 					</table>
    339 					<ul>
    340 						<li>When a user views names that are entered in a world-wide database, it is 
    341 						extremely helpful to view and refer to the names in the user&#39;s native script.</li>
    342 						<li>When the user performs searching and indexing tasks, transliteration can 
    343 						retrieve information in a different script.</li>
    344 						<li>When a service engineer is sent a program dump that is filled with characters 
    345 						from foreign scripts, it is much easier to diagnose the problem when the text 
    346 						is transliterated and the service engineer can recognize the characters.
    347 						</li>
    348 					</ul>
    349 					<p>The term <i>transliteration</i> 
    350 					is sometimes given a narrow meaning, implying that the transformation is <i>reversible</i> (sometimes called 
    351 					<i>lossless</i>). In CLDR this is not the case; 
    352 					the term <i>transliteration</i> 
    353 					is interpreted broadly to mean both reversible and non-reversible transforms of 
    354 					text. (Note that even if theoretically a <span class="nfakPe">transliteration</span> 
    355 					system is supposed to be reversible, in source standards it is often not 
    356 					specified in sufficient detail in the edge cases to actually be reversible.) A 
    357 					non-reversible transliteration is often called a <i>transcription</i>, or called 
    358 					a <i>lossy </i>or<i> ambiguous</i> transcription.</p>
    359 					<p>Note that reversibility is generally 
    360 					only in one direction, so for native to Latin a transliteration may be reversible, but not the 
    361 					contrary. For example, Hangul is reversible, in that any Hangul to Latin to 
    362 					Hangul should provide the same Hangul as the input. Thus we have the following:</p>
    363 					<blockquote>
    364 						<p> 
    365 						<font face="Times New Roman"></font> 
    366 						gach <font face="Times New Roman"></font> 
    367 						</p>
    368 					</blockquote>
    369 					<p>However, for completeness, many Latin 
    370 					characters have fallbacks. This means that more than one Latin character may map to the same 
    371 					Hangul. Thus <i>from</i> Latin we don&#39;t have reversibility, because two 
    372 					different Latin source strings round-trip back to the same Latin string.</p>
    373 					<blockquote>
    374 						<p>gach 
    375 						<font face="Times New Roman"></font> 
    376 						 <font face="Times New Roman"></font> 
    377 						gach<br>gac 
    378 						<font face="Times New Roman"></font> 
    379 						 <font face="Times New Roman"></font> 
    380 						gach</p>
    381 					</blockquote>
    382 					<p>Transliteration can also be used to convert unfamiliar letters within the same 
    383 					script, such as converting Icelandic THORN () to th. These are not typically reversible.</p>
    384 					<blockquote>
    385 						<p><i>There is an online demo using released CLDR data 
    386 						at <a href="http://demo.icu-project.org/icu-bin/translit">ICU Transform Demo</a>.</i></p>
    387 					</blockquote>
    388 					<h2><a name="Variants">Variants</a></h2>
    389 					<p>There are many systems for transliteration between languages: the same text can 
    390 					be transliterated in many different ways. For example, for the Greek example above, 
    391 					the transliteration is classical, while the <a href="http://www.eki.ee/wgrs/">UNGEGN</a> 
    392 					alternate has different correspondences, such as   <i>f</i> instead of   <i>ph</i>.</p>
    393 					<p>CLDR provides for generic mappings from script to script (such as Cyrillic-Latin), 
    394 					and also language-specific variants (Russian-French, or Serbian-German). There can 
    395 					also be semi-generic mappings, such as Russian-Latin or Cyrillic-French. These can 
    396 					be referred to, respectively, as script transliterations, language-specific transliterations, or 
    397 					script-language transliterations. Transliterations from other scripts to Latin are also called
    398 					<i>Romanizations</i>.</p>
    399 					<p>Even within particular languages, there can be variant systems according to different 
    400 					authorities, or even varying across time (if the authority for a system changes its recommendation). 
    401 					The canonical identifier that CLDR uses for these has the form:</p>
    402 					<blockquote>
    403 						<p><i>source-target/variant</i></p>
    404 					</blockquote>
    405 					<p>The source (and target) can be a language or script, either using the English 
    406 					name or a locale code. The variant should specify the authority for the system, and if necessary 
    407 					for disambiguation, 
    408 					the year. For example, the identifier for the Russian to Latin transliteration according 
    409 					to the UNGEGN system would be:</p>
    410 					<ul>
    411 						<li>ru-und_Latn/UNGEGN, or</li>
    412 						<li>Russian-Latin/UNGEGN</li>
    413 					</ul>
    414 					<p>If there were multiple versions of these over time, the variant would be, say, 
    415 					UNGEGN2006.</p>
    416 					<p>The assumption is that implementations will allow the use of fallbacks, if the 
    417 					exact transliteration specified is unavailable. For example, the following would 
    418 					be the fallback chain for the identifier Russian-English/UNGEGN. This is similar 
    419 					to the <i>Lookup Fallback Pattern</i> used in
    420 					<a href="http://tools.ietf.org/html/bcp47">BCP 47 Tags for Identifying Languages</a>, 
    421 					except that it uses a &quot;stepladder approach&quot; to progressively handle the fallback 
    422 					among source, target, and variant, with priorities being the target, source, and 
    423 					variant, in that order.</p>
    424 					<ul>
    425 						<li>Russian-English/UNGEGN</li>
    426 						<li>Russian-English</li>
    427 						<li>Cyrillic-English/UNGEGN</li>
    428 						<li>Cyrillic-English</li>
    429 						<li>Russian-Latin/UNGEGN</li>
    430 						<li>Russian-Latin</li>
    431 						<li>Cyrillic-Latin/UNGEGN</li>
    432 						<li>Cyrillic-Latin</li>
    433 					</ul>
    434 					<h2><a name="Guidelines">Guidelines</a></h2>
    435 					<p>There are a number of generally desirable guidelines for script transliterations. 
    436 					These guidelines are rarely satisfied simultaneously, so constructing a reasonable 
    437 					transliteration is always a process of balancing different requirements. These requirements 
    438 					are most important for people who are building transliterations, but are also useful 
    439 					as background information for users.</p>
    440 					<p>The following lists the general guidelines 
    441 					for Unicode CLDR transliterations: </p>
    442 					<ul>
    443 						<li><i>standard:</i> follow established systems (standards, authorities, or
    444 						<i>de facto</i> practice) where possible, deviating sometimes where necessary for reversibility. 
    445 						In CLDR, the systems are generally described in the comments in the XML data files found in 
    446 					the in the
    447 						<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/">transforms</a> 
    448 					folder online. For example, the system for Arabic transliteration in CLDR are 
    449 					found in the comments in
    450 						<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/Arabic-Latin.xml">Arabic<wbr>-Latin.xml</a>; 
    451 					there is a reference to the 
    452 						<a target="_blank" href="http://www.eki.ee/wgrs/rom1_ar.pdf">UNGEGN Arabic Tables</a>. 
    453 					Similarly for Hebrew, which also follows the 
    454 						<a href="http://www.eki.ee/wgrs/rom1_he.pdf">Hebrew UNGEGN Tables</a>.</li>
    455 						<li><i>complete</i>: every well-formed sequence of characters in the source 
    456 						script should transliterate to a sequence of characters from the target script, 
    457 						and vice versa.</li>
    458 						<li><i>predictable</i>: the letters themselves (without any knowledge of the 
    459 						languages written in that script) should be sufficient for the transliteration, 
    460 						based on a relatively small number of rules. This allows the transliteration 
    461 						to be performed mechanically. </li>
    462 						<li><i>pronounceable</i>: the resulting characters have reasonable 
    463 						pronunciations in the target script. Transliteration is not as useful if the process simply 
    464 						maps the characters without any regard to their pronunciation. Simply mapping 
    465 						by alphabetic order (&quot;...&quot; to &quot;abcdefgh...&quot;) could yield strings that 
    466 						might be complete and unambiguous, but the pronunciation would be completely 
    467 						unexpected.</li>
    468 						<li><i>reversible</i>: it is possible to recover the text in the source script 
    469 						from the transliteration in the target script. That is, someone that knows the transliteration 
    470 						rules would be able to recover the precise spelling of the original source text. 
    471 						For example, it is possible to go from <i>Ellda</i> back to the original , 
    472 						while if the transliteration were <i>Ellada</i> (with no accent), it would 
    473 						not be possible.</li>
    474 					</ul>
    475 					<p>Some of these principles may not be achievable simultaneously; in particular, 
    476 					adherence to a standard system <i>and</i> reversibility. Often small changes in 
    477 					existing systems can be made to accommodate reversibility. However, where a particular 
    478 					system specifies a fundamentally non-reversible transliterations, those transliterations 
    479 					as represented in CLDR may not be reversible.</p>
    480 					<h3><a name="Ambiguity">Ambiguity</a></h3>
    481 					<p>In transliteration, multiple characters may produce ambiguities 
    482 					(non-reversible mappings) unless the rules 
    483 					are carefully designed. For example, the Greek character PSI () maps to <i>ps</i>, 
    484 					but <i>ps</i> could also result from the sequence PI, SIGMA () since PI () maps 
    485 					to p and SIGMA () maps to s. </p>
    486 					<p>The Japanese transliteration standards provide a good mechanism for handling 
    487 					these kinds of ambiguities. Using the Japanese transliteration standards, whenever 
    488 					an ambiguous sequence in the target script does not result from a single letter, 
    489 					the transform uses an apostrophe to disambiguate it. For example, it uses that procedure 
    490 					to distinguish between <i>man&#39;ichi</i> and <i>manichi</i>. Using this procedure, 
    491 					the Greek character PI SIGMA () maps to <i>p&#39;s</i>. This method is recommended 
    492 					for all script transliteration methods, although sometimes the character may vary: 
    493 					for example, &quot;-&quot; is used in Korean. </p>
    494 					<blockquote>
    495 						<p><b>Note:</b> We&#39;ve had a recent proposal to consistently use the hyphenation dot 
    496 						for this code, thus we&#39;d have   ps.</p>
    497 					</blockquote>
    498 					<p>A second problem is that some characters in a target script are not normally 
    499 					found outside of certain contexts. For example, the small Japanese &quot;ya&quot; character, 
    500 					as in &quot;kya&quot; (), is not normally found in isolation. To handle such characters, 
    501 					the Unicode transliterations currently use different conventions.</p>
    502 					<ul>
    503 						<li>Tilde: &quot;&quot; in isolation is represented as &quot;~ya&quot;</li>
    504 						<li>Diacritics: Greek &quot;&quot; in isolation is represented as s</li>
    505 					</ul>
    506 					<blockquote>
    507 						<p><b>Note:</b> The CLDR committee is considering converging on a common representation for 
    508 						this. The advantage of a common representation is that it allows for easy filtering.</p>
    509 					</blockquote>
    510 					<p>For the default script transforms, the goal is to have unambiguous mappings, 
    511 					with variants for any common use mappings that are ambiguous (non-reversible). In 
    512 					some cases, however, case may not be preserved. For example, </p>
    513 					<table id="table16" cellspacing="1" cellpadding="2" border="1" style="border-collapse: collapse">
    514 						<tr>
    515 							<th>Latin</th>
    516 							<th>Greek</th>
    517 							<th>Latin</th>
    518 						</tr>
    519 						<tr>
    520 							<td>ps PS</td>
    521 							<td> </td>
    522 							<td>ps PS</td>
    523 						</tr>
    524 						<tr>
    525 							<td>psa Psa <b>PsA</b></td>
    526 							<td>  <b></b></td>
    527 							<td>psa Psa <b>PSA</b></td>
    528 						</tr>
    529 						<tr>
    530 							<td>psA PSA <b>PSa</b></td>
    531 							<td>  <b></b></td>
    532 							<td>psA PSA <b>Psa</b></td>
    533 						</tr>
    534 					</table>
    535 					<p>The following shows Greek text that is mapped to fully reversible Latin: </p>
    536 					<table id="table5" border="1">
    537 						<tr>
    538 							<th>Greek-Latin</th>
    539 							<th>&nbsp;</th>
    540 						</tr>
    541 						<tr>
    542 							<td> ;   ,  , :     , 
    543 							  .</td>
    544 							<td>t phis; graphn s tis, hs oike, ggraptai: ou gr eken ge katagnsomai, 
    545 							hs s hteron.</td>
    546 						</tr>
    547 					</table>
    548 					<p>If the user wants a version without certain accents, then CLDR&#39;s <i>chaining 
    549 					rules </i>can be 
    550 					used to remove the accents. For example, the following transliterates to Latin but 
    551 					removes the macron accents on the long vowels. </p>
    552 					<table id="table6" border="1">
    553 						<tr>
    554 							<th>Greek-Latin; nfd; [\u0304] remove; nfc</th>
    555 							<th>&nbsp;</th>
    556 						</tr>
    557 						<tr>
    558 							<td> ;   ,  , :     , 
    559 							  .</td>
    560 							<td>t phis; graphn s tis, hos oike, ggraptai: ou gr eken ge katagnsomai, 
    561 							hos s hteron.</td>
    562 						</tr>
    563 					</table>
    564 					<p>The above chaining rules, separated by semi-colons, perform the following 
    565 					commands in order:</p>
    566 					<table id="table23" border="1">
    567 						<tr>
    568 							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    569 							Rule</th>
    570 							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    571 							Description</th>
    572 						</tr>
    573 						<tr>
    574 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>Greek-Latin</td>
    575 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    576 							transliterate Greek to Latin</td>
    577 						</tr>
    578 						<tr>
    579 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>nfd
    580 							</td>
    581 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    582 							convert to Unicode NFD format (separating accents from base characters)</td>
    583 						</tr>
    584 						<tr>
    585 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>[\u0304] remove</td>
    586 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    587 							remove accents, but <i>filter</i> the command to only apply to a single 
    588 							character: <code>
    589 							<a target="c" href="http://unicode.org/cldr/utility/character.jsp?a=0304">
    590 							U+0304</a></code> (  ) COMBINING MACRON</td>
    591 						</tr>
    592 						<tr>
    593 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>nfc</td>
    594 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    595 							convert to Unicode NFC format (rejoining accents to base characters)</td>
    596 						</tr>
    597 					</table>
    598 					<p>The following transliterates to Latin but removes <i>all</i> accents. Note 
    599 					that the only change is to expand the filter for the <i>remove</i> command.</p>
    600 					<table id="table7" border="1">
    601 						<tr>
    602 							<th>Greek-Latin; nfd; [:nonspacing marks:] remove; nfc</th>
    603 							<th>&nbsp;</th>
    604 						</tr>
    605 						<tr>
    606 							<td> ;   ,  , :     , 
    607 							  .</td>
    608 							<td>ti pheis; graphen se tis, hos eoike, gegraptai: ou gar ekeino ge katagnosomai, 
    609 							hos sy heteron.</td>
    610 						</tr>
    611 					</table>
    612 					<h3><a name="Pronunciation">Pronunciation</a></h3>
    613 					<p>Standard transliteration methods often do not follow the pronunciation rules 
    614 					of any particular language in the target script. For example, the Japanese Hepburn 
    615 					system uses a &quot;j&quot; that has the English phonetic value (as opposed to French, German, 
    616 					or Spanish), but uses vowels that do not have the standard English sounds. A transliteration 
    617 					method might also require some special knowledge to have the correct pronunciation. 
    618 					For example, in the Japanese kunrei-siki system, &quot;ti&quot; is pronounced as English &quot;chee&quot;.</p>
    619 					<p>This is similar to situations where there are different languages within the same 
    620 					script. For example, knowing that the word <i>Gewalt</i> comes from German allows 
    621 					a knowledgeable reader to pronounce the &quot;w&quot; as a &quot;v&quot;.&nbsp; 
    622 					When encountering a 
    623 					foreign word like <i>jawa</i>, there is little assurance how it is to be 
    624 					pronounced even when it is not a <span class="nfakPe">transliteration (it is just from /span>another Latin-script language). The <i>j</i> could be 
    625 					pronounced (for an English speaker) as in <i>jump</i>, 
    626 					or <i>Junker</i>, 
    627 					or <i>jour</i>; 
    628 					and so on. Transcriptions are only roughly phonetic, and only so when the 
    629 					specific pronunciation rules are understood.</p>
    630 					<p>The pronunciation of the characters 
    631 					in the original script may also be influenced by context, which may be 
    632 					particularly misleading in transliteration. For, in the Bengali , 
    633 					transliterated as niaba, the <i>visarga</i> <i></i> 
    634 					is not pronounced itself (whereas elsewhere it may be) but lengthens the 
    635 					<i></i> 
    636 					sound, and the final inherent <i>a</i> is pronounced (whereas it 
    637 					commonly is not), and the two inherent a&#39;s are pronounced as <i></i> 
    638 					and <i></i>, 
    639 					respectively.</p>
    640 					<p>In some cases, transliteration may be heavily influenced by tradition. For example, 
    641 					the modern Greek letter beta () sounds like a &quot;v&quot;, but a 
    642 					transliteration may use a <i>b</i> (as in <i>biology</i>). In that case, the user would need to know 
    643 					that a &quot;b&quot; in the transliterated word corresponded to beta () and is to be pronounced 
    644 					as a <i>v</i> in modern Greek.</p>
    645 					<p>Letters may also be transliterated differently according 
    646 					to their context to make the pronunciation more predictable. For example, since 
    647 					the Greek sequence GAMMA GAMMA () is pronounced as <i>ng</i>, the first GAMMA 
    648 					can be transcribed as an &quot;n&quot; in that context. 
    649 					Similarly, the transliteration can give other guidance to the pronunciation in the 
    650 					source language, for example, using &quot;n&quot; or &quot;m&quot; for the same Japanese character 
    651 					() depending on context, even though there is no distinction in the source 
    652 					script.</p>
    653 					<p>In general, predictability means that when transliterating Latin script to other 
    654 					scripts using reversible transliterations, English text will not produce phonetic 
    655 					results. This is because the pronunciation of English cannot be predicted easily 
    656 					from the letters in a word: e.g. <i>grove</i>, <i>move</i>, and <i>love</i> all end with &quot;ove&quot;, but are 
    657 					pronounced very differently. </p>
    658 					<h3><a name="Cautions">Cautions</a></h3>
    659 					<p>Reversibility may require modifications of traditional transcription methods. 
    660 					For example, there are two standard methods for transliterating Japanese katakana 
    661 					and hiragana into Latin letters. The <i>kunrei-siki</i> method is unambiguous. The Hepburn 
    662 					method can be more easily pronounced by foreigners but is ambiguous. In the Hepburn 
    663 					method, both ZI () and DI () are represented by &quot;ji&quot; and both ZU () and DU () 
    664 					are represented by &quot;zu&quot;. A slightly amended version of Hepburn, that uses &quot;dji&quot; 
    665 					for DI and &quot;dzu&quot; for DU, is unambiguous. </p>
    666 					<p>When a sequence of two letters map to one, case mappings (uppercase and lowercase) 
    667 					must be handled carefully to ensure reversibility. For cased scripts, the two letters 
    668 					may need to have different cases, depending on the next letter. For example, the 
    669 					Greek letter PHI () maps to PH in Latin, but  maps to Pho, and not to PHo.
    670 					</p>
    671 					<p>Some scripts have characters that take on different shapes depending on their 
    672 					context. Usually, this is done at the display level (such as with Arabic) and does 
    673 					not require special transliteration support. However, in a few cases this is represented 
    674 					with different character codes, such as in Greek and Hebrew. For example, a Greek 
    675 					SIGMA is written in a final form () at the end of words, and a non-final form () 
    676 					in other locations. This also requires the transform to map different characters based 
    677 					on the context.</p>
    678 					<p>Another thing to look out for when 
    679 					dealing with cased scripts is that some of the characters in the target script may 
    680 					not be able to represent case distinctions, such as some of the IPA characters 
    681 					in the Latin script.</p>
    682 					<p>It is useful for the reverse mapping to be complete so that arbitrary strings 
    683 					in the target script can be reasonably mapped back to the source script. Complete 
    684 					reverse mapping makes it much easier to do mechanical quality checks and so on. 
    685 					For example, even though the letter &quot;q&quot; might not be necessary in a transliteration 
    686 					of Greek, it can be mapped to a KAPPA (). Such reverse mappings will not, in general, 
    687 					be unambiguous. </p>
    688 					<h2><a name="Available_Transliterations">Available Transliterations</a></h2>
    689 					<p>Currently Unicode CLDR offers Romanizations for certain scripts, plus transliterations 
    690 					between the Indic scripts (excluding Urdu). Additional script transliterations will 
    691 					be added in the future.</p>
    692 					<wbr>
    693 					<p>Except where otherwise noted, all of these systems are designed to be reversible. 
    694 					For bicameral scripts (those with uppercase and lowercase), however, case may not 
    695 					be completely preserved.</p>
    696 					<p>The transliterations are also designed to be complete for any sequence of the 
    697 					Latin letters <i>a-z</i>. A fallback is used for a letter that is not covered by 
    698 					the transliteration, and default letters may be inserted as required. For example, 
    699 					in the Hangul transliteration, <i>rink</i>    <i>linkeu</i>. That is, &quot;r&quot; is 
    700 					mapped to the closest other letter, and a default vowel is inserted at the end (since 
    701 					&quot;nk&quot; cannot end a syllable).</p>
    702 					<p><i>Preliminary
    703 					<a href="http://www.unicode.org/cldr/data/charts/transforms/index.html">charts</a> 
    704 					are available for the available transliterations. Be sure to read the known issues 
    705 					described there.</i></p>
    706 					<h3><a name="Korean">Korean</a></h3>
    707 					<p>There are many Romanizations of Korean. The default transliteration in Unicode 
    708 					CLDR follows the <a href="http://www.korean.go.kr/06_new/rule/rule06.jsp">Korean 
    709 					Ministry of Culture &amp; Tourism Transliteration</a> regulations (see also
    710 					<a href="http://www.korea.net/korea/kor_loca.asp?code=A020303">English summary</a>). 
    711 					There is an optional clause 8 variant for reversibility:</p>
    712 					<blockquote>
    713 						<p>&quot; 8                .    
    714 						2  &#39;, , , &#39; &#39;g, d, b, l&#39; .   &#39;&#39; (-)    
    715 						  .      (-) .&quot;</p>
    716 						<p><i>translation: </i>&quot;Clause 8: When it is required to recover the original 
    717 						Hangul representation faithfully as in scholarly articles, &#39; , , , &#39; must 
    718 						be always romanized as &#39;g, d, b, l&#39; while the mapping for the rest of the letters 
    719 						remains the same as specified in clause 2. The placeholder &#39;&#39; at the beginning 
    720 						of a syllable should be represented with &#39;-&#39;, but should be omitted at the beginning 
    721 						of a word. In addition, &#39;-&#39; should be used in other cases where a syllable boundary 
    722 						needs to be explicitly marked (be disambiguated.&quot;</p>
    723 					</blockquote>
    724 					<p>There are a number of cases where this Romanization may be ambiguous, because 
    725 					sometimes multiple Latin letters map to a single entity (jamo) in Hangul. This happens 
    726 					with vowels and consonants, the latter being slightly more complicated because there 
    727 					are both initial and final consonants:</p>
    728 					<table border="1" id="table18" style="border-collapse: collapse">
    729 						<tr>
    730 							<th>Type</th>
    731 							<th>Multi-Character Consonants</th>
    732 						</tr>
    733 						<tr>
    734 							<td>Initial-Only</td>
    735 							<td>tt pp jj</td>
    736 						</tr>
    737 						<tr>
    738 							<td>Initial-or-Final</td>
    739 							<td>kk ch ss</td>
    740 						</tr>
    741 						<tr>
    742 							<td>Final-Only</td>
    743 							<td>gs nj nh lg lm lb ls lt lp lh bs ng</td>
    744 						</tr>
    745 					</table>
    746 					<p>CLDR uses the following rules for disambiguation of the possible boundaries 
    747 					between letters, in order. The first rule comes from Clause 8.</p>
    748 					<ol>
    749 						<li>Don&#39;t break so as to require an implicit vowel or null consonant (if possible)</li>
    750 						<li>Don&#39;t break within Initial-Only or Initial-Or-Final sequences (if possible)</li>
    751 						<li>Favor longest match first.</li>
    752 					</ol>
    753 					<p>If there is a single consonant between vowels, then Rule #1 will group it with 
    754 					the following vowel if there is one (this is the same as the first part of Clause 
    755 					8). If there is a sequence of four consonants between vowels, then there is only 
    756 					one possible break (with well-formed text). So the only ambiguities lie with two 
    757 					or three consonants between vowels, where there are possible multi-character consonants 
    758 					involved. Even there, in most cases the resolution is simple, because there isn&#39;t 
    759 					a possible multi-character consonant in the case of two, or two possible multi-character 
    760 					consonants in the case of 3. For example, in the following cases, the left side 
    761 					is unambiguous:</p>
    762 					<blockquote>
    763 						<p>angda = ang-da  <br />apda = ap-da  </p>
    764 					</blockquote>
    765 					<p>There are a relatively small number of possible ambiguities, listed below using 
    766 					&quot;a&quot; as a sample vowel.</p>
    767 					<table border="1" id="table17" style="border-collapse: collapse" cellspacing="0" cellpadding="2">
    768 						<tr>
    769 							<th align="left">No. of<br />Cons. </th>
    770 							<th align="left">Latin</th>
    771 							<th align="left">CLDR<br />Disambiguation</th>
    772 							<th align="left">Hangul</th>
    773 							<th colspan="2" align="left">Comments</th>
    774 						</tr>
    775 						<tr>
    776 							<td rowspan="18">2</td>
    777 							<td><code>atta</code></td>
    778 							<td><code>= a-tta</code></td>
    779 							<td></td>
    780 							<td colspan="2" rowspan="3">Rule 1, then 2</td>
    781 						</tr>
    782 						<tr>
    783 							<td><code>appa</code></td>
    784 							<td><code>= a-ppa</code></td>
    785 							<td></td>
    786 						</tr>
    787 						<tr>
    788 							<td><code>ajja</code></td>
    789 							<td><code>= a-jja</code></td>
    790 							<td></td>
    791 						</tr>
    792 						<tr>
    793 							<td><code>akka</code></td>
    794 							<td><code>= a-kka</code></td>
    795 							<td></td>
    796 							<td colspan="2" rowspan="3">Rule 1, then 2</td>
    797 						</tr>
    798 						<tr>
    799 							<td><code>assa</code></td>
    800 							<td><code>= a-ssa</code></td>
    801 							<td></td>
    802 						</tr>
    803 						<tr>
    804 							<td><code>acha</code></td>
    805 							<td><code>= a-cha</code></td>
    806 							<td></td>
    807 						</tr>
    808 						<tr>
    809 							<td><code>agsa </code></td>
    810 							<td><code>= ag-sa</code></td>
    811 							<td></td>
    812 							<td colspan="2" rowspan="12">Rule 1</td>
    813 						</tr>
    814 						<tr>
    815 							<td><code>anja </code></td>
    816 							<td><code>= an-ja</code></td>
    817 							<td></td>
    818 						</tr>
    819 						<tr>
    820 							<td><code>anha </code></td>
    821 							<td><code>= an-ha</code></td>
    822 							<td></td>
    823 						</tr>
    824 						<tr>
    825 							<td><code>alga </code></td>
    826 							<td><code>= al-ga</code></td>
    827 							<td></td>
    828 						</tr>
    829 						<tr>
    830 							<td><code>alma </code></td>
    831 							<td><code>= al-ma</code></td>
    832 							<td></td>
    833 						</tr>
    834 						<tr>
    835 							<td><code>alba </code></td>
    836 							<td><code>= al-ba</code></td>
    837 							<td></td>
    838 						</tr>
    839 						<tr>
    840 							<td><code>alsa </code></td>
    841 							<td><code>= al-sa</code></td>
    842 							<td></td>
    843 						</tr>
    844 						<tr>
    845 							<td><code>alta </code></td>
    846 							<td><code>= al-ta</code></td>
    847 							<td></td>
    848 						</tr>
    849 						<tr>
    850 							<td><code>alpa </code></td>
    851 							<td><code>= al-pa</code></td>
    852 							<td></td>
    853 						</tr>
    854 						<tr>
    855 							<td><code>alha </code></td>
    856 							<td><code>= al-ha</code></td>
    857 							<td></td>
    858 						</tr>
    859 						<tr>
    860 							<td><code>absa </code></td>
    861 							<td><code>= ab-sa</code></td>
    862 							<td></td>
    863 						</tr>
    864 						<tr>
    865 							<td><code>anga </code></td>
    866 							<td><code>= an-ga</code></td>
    867 							<td></td>
    868 						</tr>
    869 						<tr>
    870 							<td rowspan="9">3</td>
    871 							<td><code>agssa </code></td>
    872 							<td><code>= ag-ssa</code></td>
    873 							<td></td>
    874 							<td colspan="2" rowspan="4">Rule 1, then 2</td>
    875 						</tr>
    876 						<tr>
    877 							<td><code>anjja </code></td>
    878 							<td><code>= an-jja</code></td>
    879 							<td></td>
    880 						</tr>
    881 						<tr>
    882 							<td><code>alssa </code></td>
    883 							<td><code>= al-ssa</code></td>
    884 							<td></td>
    885 						</tr>
    886 						<tr>
    887 							<td><code>abssa </code></td>
    888 							<td><code>= ab-ssa</code></td>
    889 							<td></td>
    890 						</tr>
    891 						<tr>
    892 							<td><code>akkka </code></td>
    893 							<td><code>= akk-ka</code></td>
    894 							<td></td>
    895 							<td colspan="2" rowspan="2">Rule 1, then 2, then 3</td>
    896 						</tr>
    897 						<tr>
    898 							<td><code>asssa </code></td>
    899 							<td><code>= ass-sa</code></td>
    900 							<td></td>
    901 						</tr>
    902 						<tr>
    903 							<td colspan="3"><i>Known bugs*</i></td>
    904 							<td colspan="2"><i>Should be Rule 1, then 2</i></td>
    905 						</tr>
    906 						<tr>
    907 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    908 							<code>altta </code></td>
    909 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
    910 							<code>= alt-ta</code></td>
    911 							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px"></td>
    912 							<td><code>= al-tta</code></td>
    913 							<td></td>
    914 						</tr>
    915 						<tr>
    916 							<td><code>alppa </code></td>
    917 							<td><code>= alp-pa</code></td>
    918 							<td></td>
    919 							<td><code>= al-ppa</code></td>
    920 							<td></td>
    921 						</tr>
    922 					</table>
    923 					<p><font size="2">* There is one other known bug in CLDR 1.5.1, where &quot;ch&quot; 
    924 					transliterates incorrectly in the degenerate case<font face="Arial">  </font>when not followed by a vowel.</font></p>
    925 					<p>For vowel sequences, the situation is simpler. Only Rule #3 applies, so aeo = 
    926 					ae-o  .</p>
    927 					<h3><a name="Japanese">Japanese</a></h3>
    928 					<p>The default transliteration for Japanese uses the a slight variant of the Hepburn 
    929 					system. With Hepburn system, both ZI () and DI () are represented by &quot;ji&quot; and 
    930 					both ZU () and DU () are represented by &quot;zu&quot;. This is amended slightly for reversibility 
    931 					by using &quot;dji&quot; for DI and &quot;dzu&quot; for DU.</p>
    932 					<h3><a name="Greek">Greek</a></h3>
    933 					<p>The default transliteration uses a standard transcription for Greek which is 
    934 					aimed at preserving etymology. The ISO 843 variant includes following differences:
    935 					</p>
    936 					<table id="table10" border="1">
    937 						<tr>
    938 							<th>Greek</th>
    939 							<th>Default</th>
    940 							<th>ISO 843</th>
    941 						</tr>
    942 						<tr>
    943 							<td></td>
    944 							<td>b</td>
    945 							<td>v</td>
    946 						</tr>
    947 						<tr>
    948 							<td>*</td>
    949 							<td>n</td>
    950 							<td>g</td>
    951 						</tr>
    952 						<tr>
    953 							<td></td>
    954 							<td></td>
    955 							<td></td>
    956 						</tr>
    957 						<tr>
    958 							<td></td>
    959 							<td>h</td>
    960 							<td>(omitted)</td>
    961 						</tr>
    962 						<tr>
    963 							<td></td>
    964 							<td></td>
    965 							<td>(omitted)</td>
    966 						</tr>
    967 						<tr>
    968 							<td>~</td>
    969 							<td>~</td>
    970 							<td>(omitted)</td>
    971 						</tr>
    972 					</table>
    973 					<p>* before , , ,  </p>
    974 					<h3><a name="Cyrillic">Cyrillic</a></h3>
    975 					<p>Cyrillic generally follows ISO 9 for the base Cyrillic set. There are tentative 
    976 					plans to add extended Cyrillic characters in the future, plus variants for GOST 
    977 					and other national standards.</p>
    978 					<h3><a name="Indic">Indic</a></h3>
    979 					<p>Transliteration of Indic scripts follows the ISO 15919<i>
    980 					<strong style="font-weight: 400">Transliteration of Devanagari and related Indic 
    981 					scripts into Latin characters</strong></i>. Internally, all Indic scripts are transliterated 
    982 					by converting first to an internal form, called Inter-Indic, then from Inter-Indic 
    983 					to the target script. Inter-Indic thus provides a pivot between the different 
    984 					scripts, and contains a superset of correspondences for all of them.</p>
    985 					<p>ISO 15919 differs from ISCII 91 in application of diacritics 
    986 					for certain characters. These differences are shown in the following example (illustrated 
    987 					with Devanagari, although the same principles apply to the other Indic scripts):
    988 					</p>
    989 					<table id="table11" border="1">
    990 						<tr>
    991 							<th>Devanagari</th>
    992 							<th>ISCII 91</th>
    993 							<th>ISO 15919</th>
    994 						</tr>
    995 						<tr>
    996 							<td></td>
    997 							<td>r</td>
    998 							<td>r</td>
    999 						</tr>
   1000 						<tr>
   1001 							<td></td>
   1002 							<td>l</td>
   1003 							<td>l</td>
   1004 						</tr>
   1005 						<tr>
   1006 							<td></td>
   1007 							<td>r</td>
   1008 							<td>r</td>
   1009 						</tr>
   1010 						<tr>
   1011 							<td></td>
   1012 							<td>l</td>
   1013 							<td>l</td>
   1014 						</tr>
   1015 						<tr>
   1016 							<td></td>
   1017 							<td>dha</td>
   1018 							<td>rha</td>
   1019 						</tr>
   1020 						<tr>
   1021 							<td></td>
   1022 							<td>da</td>
   1023 							<td>ra</td>
   1024 						</tr>
   1025 					</table>
   1026 					<p>Transliteration rules from Indic to Latin are reversible with the exception of 
   1027 					the ZWJ and ZWNJ used to request explicit rendering effects. For example:</p>
   1028 					<table id="table13" border="1">
   1029 						<tr>
   1030 							<th>Devanagari</th>
   1031 							<th>Romanization</th>
   1032 							<th>Note</th>
   1033 						</tr>
   1034 						<tr>
   1035 							<td></td>
   1036 							<td>ksa</td>
   1037 							<td>normal</td>
   1038 						</tr>
   1039 						<tr>
   1040 							<td></td>
   1041 							<td>ksa</td>
   1042 							<td>explicit halant requested</td>
   1043 						</tr>
   1044 						<tr>
   1045 							<td></td>
   1046 							<td>ksa</td>
   1047 							<td>half-consonant requested</td>
   1048 						</tr>
   1049 					</table>
   1050 					<p>Transliteration between Indic scripts are roundtrip where there are corresponding 
   1051 					letters. Otherwise, there may be fallbacks.</p>
   1052 					<p>There are two particular instances where transliterations may produce unexpected 
   1053 					results: (1) where the final vowel is suppressed in speech, and (2) with the transliteration 
   1054 					of &#39;c&#39;. </p>
   1055 					<p>For example:</p>
   1056 					<table id="table14" border="1">
   1057 						<tr>
   1058 							<th>Devanagari</th>
   1059 							<th style="vertical-align: top">Romanization</th>
   1060 							<th>Notes</th>
   1061 						</tr>
   1062 						<tr>
   1063 							<td></td>
   1064 							<td style="vertical-align: top">Sngupta</td>
   1065 							<td style="vertical-align: top">&nbsp;</td>
   1066 						</tr>
   1067 						<tr>
   1068 							<td></td>
   1069 							<td style="vertical-align: top">Snagupta</td>
   1070 							<td style="vertical-align: top">The final &#39;a&#39; is not pronounced</td>
   1071 						</tr>
   1072 						<tr>
   1073 							<td style="vertical-align: top"></td>
   1074 							<td style="vertical-align: top">Monika</td>
   1075 							<td style="vertical-align: top">&nbsp;</td>
   1076 						</tr>
   1077 						<tr>
   1078 							<td></td>
   1079 							<td style="vertical-align: top">Monica</td>
   1080 							<td style="vertical-align: top">The &#39;c&#39; is pronounced &quot;ch&quot;</td>
   1081 						</tr>
   1082 					</table>
   1083 					<h3><a name="Others">Others</a></h3>
   1084 					<p>Unicode CLDR provides other transliterations based on the
   1085 					<a href="http://geonames.usgs.gov/">U.S. Board on Geographic Names</a> (BGN) transliterations. 
   1086 					These are currently unidirectional <font face="Arial"></font> to Latin only. The goal is to make them bidirectional 
   1087 					in future versions of CLDR.</p>
   1088 					<p>Other transliterations are generally based on the
   1089 					<a href="http://www.eki.ee/wgrs/">UNGEGN: Working Group on Romanization Systems</a> 
   1090 					transliterations. These systems are in 
   1091 					wider actual implementation than most ISO standardized transliterations, and are 
   1092 					published freely available on the web (<a target="_blank" href="http://www.eki.ee/wgrs/">http://www.eki.ee/wgrs/</a>) 
   1093 					and thus easily accessible to all.
   1094 					The UNGEGN also has good documentation. For example, the
   1095 					<a href="http://www.eki.ee/wgrs/rom1_ar.pdf">UNGEGN Arabic Tables</a> 
   1096 					not only presents the UN system, but compares it with the BGN/PCGN 1956 system, 
   1097 					the I.G.N. System 1973, ISO 233:1984, the royal Jordanian Geographic Centre 
   1098 					System, and the Survey of Egypt System.</p>
   1099 					<h2><a name="Submitting_Transliterations">Submitting Transliterations</a></h2>
   1100 					<p>If you are interested in providing transliterations for one or 
   1101 					more scripts, file an initial bug report at <i>
   1102 					<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a></i>. The initial bug should contain the scripts and or languages 
   1103 					involved, and the system being followed (with a link to a full description of 
   1104 					the proposed transliteration system), and a brief example. The proposed data can 
   1105 					also be in that bug, or be added in a Reply to that bug.</p>
   1106 					<p>You can also file a bug in <i>
   1107 					<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a></i> if 
   1108 					you find a problem in an existing transliteration.</p>
   1109 					<p>For submission to CLDR, the data 
   1110 					needs to supplied in the correct XML format, and should follow an accepted 
   1111 					standard. It is best if the results are 
   1112 					tested using the <i><a href="http://demo.icu-project.org/icu-bin/translit">ICU 
   1113 					Transform Demo</a></i> first, since if the data doesn&#39;t validate it would 
   1114 					not be accepted into CLDR. As mentioned above, even if a transliteration is only used 
   1115 					in certain countries or contexts CLDR can provide for them with different 
   1116 					variant tags.</p>
   1117 					<p>The format for rules is specified in 
   1118 					<a target="_blank" href="http://www.unicode.org/reports/tr35/#Transform_Rules">Transform_Rules</a>. 
   1119 					The XML format is just a series of rules and comments. For comparison, you can see what is 
   1120 					currently in CLDR in the 
   1121 					<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/">transforms</a> 
   1122 					folder online. For example, see 
   1123 					<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/Hebrew-Latin.xml">Hebrew-Latin.xml</a>.</p>
   1124 					<h2><a name="More_Information">More Information</a></h2>
   1125 					<p>For more information, see:</p>
   1126 					<ul>
   1127 						<li>BGN: <a href="http://geonames.usgs.gov/">U.S. Board on Geographic Names</a></li>
   1128 						<li>UNGEGN: <a href="http://www.eki.ee/wgrs/">UNITED NATIONS GROUP OF 
   1129 						EXPERTS ON GEOGRAPHICAL NAMES: Working Group on Romanization 
   1130 						Systems</a></li>
   1131 						<li><a href="http://transliteration.eki.ee/">Transliteration of Non-Roman Alphabets 
   1132 						and Scripts (Sren Binks)</a> </li>
   1133 						<li><a href="http://www.archivists.org/catalog/stds99/chapter8.html">Standards 
   1134 						for Archival Description: Romanization</a> </li>
   1135 						<li><a href="http://transliteration.eki.ee/pdf/Hindi-Marathi-Nepali.pdf">ISO-15915 
   1136 						(Hindi)</a> </li>
   1137 						<li><a href="http://transliteration.eki.ee/pdf/Gujarati.pdf">ISO-15915 (Gujarati)</a>
   1138 						</li>
   1139 						<li><a href="http://transliteration.eki.ee/pdf/Kannada.pdf">ISO-15915 (Kannada)</a>
   1140 						</li>
   1141 						<li><a href="http://www.cdacindia.com/html/gist/down/iscii_d.asp">ISCII-91</a>
   1142 						</li>
   1143 						<li><a href="http://www.unicode.org/reports/tr35/">UTS #35: Locale Data Markup 
   1144 						Language (LDML)</a></li>
   1145 					</ul></div></td>
   1146 			</tr>
   1147 		</table>
   1148 		<hr width="50%" />
   1149 		<div align="center">
   1150 			<center>
   1151 			<table cellspacing="0" cellpadding="0" border="0">
   1152 				<tr>
   1153 					<td><a href="http://www.unicode.org/copyright.html">
   1154 					<img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50" /></a></td>
   1155 				</tr>
   1156 			</table>
   1157 			<script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js">
   1158 			
   1159 			
   1160 			
   1161 			
   1162       </script>
   1163 			</center></div>
   1164 		</td>
   1165 	</tr>
   1166 </table>
   1167 
   1168 </body>
   1169 
   1170 </html>