Home | History | Annotate | Download | only in rfc
      1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
      2 <html lang="en"><head><title>BCP 47 Extension T - Transformed Content</title>
      3 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      4 <meta name="description" content="BCP 47 Extension T - Transformed Content">
      5 <meta name="keywords" content="locale, bcp 47">
      6 <meta name="generator" content="xml2rfc v1.36 (http://xml.resource.org/)">
      7 <style type='text/css'><!--
      8         body {
      9                 font-family: verdana, charcoal, helvetica, arial, sans-serif;
     10                 font-size: small; color: #000; background-color: #FFF;
     11                 margin: 2em;
     12         }
     13         h1, h2, h3, h4, h5, h6 {
     14                 font-family: helvetica, monaco, "MS Sans Serif", arial, sans-serif;
     15                 font-weight: bold; font-style: normal;
     16         }
     17         h1 { color: #900; background-color: transparent; text-align: right; }
     18         h3 { color: #333; background-color: transparent; }
     19 
     20         td.RFCbug {
     21                 font-size: x-small; text-decoration: none;
     22                 width: 30px; height: 30px; padding-top: 2px;
     23                 text-align: justify; vertical-align: middle;
     24                 background-color: #000;
     25         }
     26         td.RFCbug span.RFC {
     27                 font-family: monaco, charcoal, geneva, "MS Sans Serif", helvetica, verdana, sans-serif;
     28                 font-weight: bold; color: #666;
     29         }
     30         td.RFCbug span.hotText {
     31                 font-family: charcoal, monaco, geneva, "MS Sans Serif", helvetica, verdana, sans-serif;
     32                 font-weight: normal; text-align: center; color: #FFF;
     33         }
     34 
     35         table.TOCbug { width: 30px; height: 15px; }
     36         td.TOCbug {
     37                 text-align: center; width: 30px; height: 15px;
     38                 color: #FFF; background-color: #900;
     39         }
     40         td.TOCbug a {
     41                 font-family: monaco, charcoal, geneva, "MS Sans Serif", helvetica, sans-serif;
     42                 font-weight: bold; font-size: x-small; text-decoration: none;
     43                 color: #FFF; background-color: transparent;
     44         }
     45 
     46         td.header {
     47                 font-family: arial, helvetica, sans-serif; font-size: x-small;
     48                 vertical-align: top; width: 33%;
     49                 color: #FFF; background-color: #666;
     50         }
     51         td.author { font-weight: bold; font-size: x-small; margin-left: 4em; }
     52         td.author-text { font-size: x-small; }
     53 
     54         /* info code from SantaKlauss at http://www.madaboutstyle.com/tooltip2.html */
     55         a.info {
     56                 /* This is the key. */
     57                 position: relative;
     58                 z-index: 24;
     59                 text-decoration: none;
     60         }
     61         a.info:hover {
     62                 z-index: 25;
     63                 color: #FFF; background-color: #900;
     64         }
     65         a.info span { display: none; }
     66         a.info:hover span.info {
     67                 /* The span will display just on :hover state. */
     68                 display: block;
     69                 position: absolute;
     70                 font-size: smaller;
     71                 top: 2em; left: -5em; width: 15em;
     72                 padding: 2px; border: 1px solid #333;
     73                 color: #900; background-color: #EEE;
     74                 text-align: left;
     75         }
     76 
     77         a { font-weight: bold; }
     78         a:link    { color: #900; background-color: transparent; }
     79         a:visited { color: #633; background-color: transparent; }
     80         a:active  { color: #633; background-color: transparent; }
     81 
     82         p { margin-left: 2em; margin-right: 2em; }
     83         p.copyright { font-size: x-small; }
     84         p.toc { font-size: small; font-weight: bold; margin-left: 3em; }
     85         table.toc { margin: 0 0 0 3em; padding: 0; border: 0; vertical-align: text-top; }
     86         td.toc { font-size: small; font-weight: bold; vertical-align: text-top; }
     87 
     88         ol.text { margin-left: 2em; margin-right: 2em; }
     89         ul.text { margin-left: 2em; margin-right: 2em; }
     90         li      { margin-left: 3em; }
     91 
     92         /* RFC-2629 <spanx>s and <artwork>s. */
     93         em     { font-style: italic; }
     94         strong { font-weight: bold; }
     95         dfn    { font-weight: bold; font-style: normal; }
     96         cite   { font-weight: normal; font-style: normal; }
     97         tt     { color: #036; }
     98         tt, pre, pre dfn, pre em, pre cite, pre span {
     99                 font-family: "Courier New", Courier, monospace; font-size: small;
    100         }
    101         pre {
    102                 text-align: left; padding: 4px;
    103                 color: #000; background-color: #CCC;
    104         }
    105         pre dfn  { color: #900; }
    106         pre em   { color: #66F; background-color: #FFC; font-weight: normal; }
    107         pre .key { color: #33C; font-weight: bold; }
    108         pre .id  { color: #900; }
    109         pre .str { color: #000; background-color: #CFF; }
    110         pre .val { color: #066; }
    111         pre .rep { color: #909; }
    112         pre .oth { color: #000; background-color: #FCF; }
    113         pre .err { background-color: #FCC; }
    114 
    115         /* RFC-2629 <texttable>s. */
    116         table.all, table.full, table.headers, table.none {
    117                 font-size: small; text-align: center; border-width: 2px;
    118                 vertical-align: top; border-collapse: collapse;
    119         }
    120         table.all, table.full { border-style: solid; border-color: black; }
    121         table.headers, table.none { border-style: none; }
    122         th {
    123                 font-weight: bold; border-color: black;
    124                 border-width: 2px 2px 3px 2px;
    125         }
    126         table.all th, table.full th { border-style: solid; }
    127         table.headers th { border-style: none none solid none; }
    128         table.none th { border-style: none; }
    129         table.all td {
    130                 border-style: solid; border-color: #333;
    131                 border-width: 1px 2px;
    132         }
    133         table.full td, table.headers td, table.none td { border-style: none; }
    134 
    135         hr { height: 1px; }
    136         hr.insert {
    137                 width: 80%; border-style: none; border-width: 0;
    138                 color: #CCC; background-color: #CCC;
    139         }
    140 --></style>
    141 </head>
    142 <body>
    143 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    144 <table summary="layout" width="66%" border="0" cellpadding="0" cellspacing="0"><tr><td><table summary="layout" width="100%" border="0" cellpadding="2" cellspacing="1">
    145 <tr><td class="header">Internet Engineering Task Force</td><td class="header">M. Davis</td></tr>
    146 <tr><td class="header">Internet-Draft</td><td class="header">Google</td></tr>
    147 <tr><td class="header">Intended status: Informational</td><td class="header">A. Phillips</td></tr>
    148 <tr><td class="header">Expires: June 7, 2012</td><td class="header">Lab126</td></tr>
    149 <tr><td class="header">&nbsp;</td><td class="header">Y. Umaoka</td></tr>
    150 <tr><td class="header">&nbsp;</td><td class="header">IBM</td></tr>
    151 <tr><td class="header">&nbsp;</td><td class="header">C. Falk</td></tr>
    152 <tr><td class="header">&nbsp;</td><td class="header">Infinite Automata</td></tr>
    153 <tr><td class="header">&nbsp;</td><td class="header">December 5, 2011</td></tr>
    154 </table></td></tr></table>
    155 <h1><br />BCP 47 Extension T - Transformed Content<br />draft-davis-t-langtag-ext-07</h1>
    156 
    157 <h3>Abstract</h3>
    158 
    159 <p>
    160 				This document specifies an Extension to BCP 47
    161 				which provides
    162 				subtags
    163 				for specifying the source language or script of transformed
    164 				content,
    165 				including content
    166 				that
    167 				has been transliterated, transcribed, or
    168 				translated, or in some other way influenced by the source. It also provides for additional information used for
    169 				identification.
    170 			
    171 </p>
    172 <h3>Status of this Memo</h3>
    173 <p>
    174 This Internet-Draft is submitted  in full
    175 conformance with the provisions of BCP&nbsp;78 and BCP&nbsp;79.</p>
    176 <p>
    177 Internet-Drafts are working documents of the Internet Engineering
    178 Task Force (IETF).  Note that other groups may also distribute
    179 working documents as Internet-Drafts.  The list of current
    180 Internet-Drafts is at http://datatracker.ietf.org/drafts/current/.</p>
    181 <p>
    182 Internet-Drafts are draft documents valid for a maximum of six months
    183 and may be updated, replaced, or obsoleted by other documents at any time.
    184 It is inappropriate to use Internet-Drafts as reference material or to cite
    185 them other than as &ldquo;work in progress.&rdquo;</p>
    186 <p>
    187 This Internet-Draft will expire on June 7, 2012.</p>
    188 
    189 <h3>Copyright Notice</h3>
    190 <p>
    191 Copyright (c) 2011 IETF Trust and the persons identified as the
    192 document authors.  All rights reserved.</p>
    193 <p>
    194 This document is subject to BCP 78 and the IETF Trust's Legal
    195 Provisions Relating to IETF Documents
    196 (http://trustee.ietf.org/license-info) in effect on the date of
    197 publication of this document.  Please review these documents
    198 carefully, as they describe your rights and restrictions with respect
    199 to this document.</p>
    200 <a name="toc"></a><br /><hr />
    201 <h3>Table of Contents</h3>
    202 <p class="toc">
    203 <a href="#anchor1">1.</a>&nbsp;
    204 Introduction<br />
    205 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#anchor2">1.1.</a>&nbsp;
    206 Requirements Language<br />
    207 <a href="#anchor3">2.</a>&nbsp;
    208 BCP47 Required Information<br />
    209 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#anchor4">2.1.</a>&nbsp;
    210 Overview<br />
    211 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#structure">2.2.</a>&nbsp;
    212 Structure<br />
    213 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#canonicalization">2.3.</a>&nbsp;
    214 Canonicalization<br />
    215 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#regform">2.4.</a>&nbsp;
    216 BCP47 Registration Form<br />
    217 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#summary">2.5.</a>&nbsp;
    218 Field Definitions<br />
    219 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#registration">2.6.</a>&nbsp;
    220 Registration of Field Subtags<br />
    221 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#field-registration">2.7.</a>&nbsp;
    222 Registration of Additional Fields<br />
    223 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#committee-responses">2.8.</a>&nbsp;
    224 Committee Responses to Registration Proposals<br />
    225 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#machine-readable">2.9.</a>&nbsp;
    226 Machine-Readable Data<br />
    227 <a href="#Acknowledgements">3.</a>&nbsp;
    228 Acknowledgements<br />
    229 <a href="#IANA">4.</a>&nbsp;
    230 IANA Considerations<br />
    231 <a href="#Security">5.</a>&nbsp;
    232 Security Considerations<br />
    233 <a href="#rfc.references1">6.</a>&nbsp;
    234 References<br />
    235 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#rfc.references1">6.1.</a>&nbsp;
    236 Normative References<br />
    237 &nbsp;&nbsp;&nbsp;&nbsp;<a href="#rfc.references2">6.2.</a>&nbsp;
    238 Informative References<br />
    239 <a href="#rfc.authors">&#167;</a>&nbsp;
    240 Authors' Addresses<br />
    241 </p>
    242 <br clear="all" />
    243 
    244 <a name="anchor1"></a><br /><hr />
    245 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    246 <a name="rfc.section.1"></a><h3>1.&nbsp;
    247 Introduction</h3>
    248 
    249 <p>
    250 				<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>
    251 				permits the definition and registration of language tag extensions
    252 				"that contain a language component and are compatible with
    253 				applications that
    254 				understand language tags". This document defines an
    255 				extension for
    256 				specifying the source of content that has been transformed,
    257 				including text that has been transliterated, transcribed, or
    258 				translated, or in some other way influenced by the source.
    259 				It may be used in queries to request content that has been
    260 				transformed.
    261 				The "singleton" identifier for this extension is 't'.
    262 			
    263 </p>
    264 <p>
    265 				Language tags, as defined by
    266 				<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>, are useful for identifying the language of content.
    267 				There are
    268 				mechanisms for specifying variant subtags for special purposes.
    269 				However, these variants are insufficient for specifying content that has
    270 				undergone
    271 				transformations,
    272 				including content that has been
    273 				transliterated,
    274 				transcribed, or
    275 				translated.
    276 				The correct interpretation of the content may depend upon knowledge of the conventions used for the transformation.
    277 			
    278 </p>
    279 <p>
    280 			   Suppose that Italian or Russian
    281 			   cities on a map are transcribed for Japanese users. Each name needs to be
    282 			   transliterated into katakana using rules appropriate for the specific
    283 			   source and target language.   When tagging such data, it is important
    284 			   to be able to indicate not only the resulting content language ("ja"
    285 			   in this case), but also the source language.
    286 </p>
    287 <p>Transforms such as transliterations may vary depending not only on the
    288 			   basis of the source and target script, but also on the source and target language.
    289 			   Thus the
    290 			   Russian &lt;U+041F U+0443 U+0442 U+0438 U+043D> (which corresponds to
    291 			   the Cyrillic &lt;PE, U, TE, I, EN>) transliterates into "Putin" in
    292 			   English but "Poutine" in French.  The identifier could be used to indicate
    293 			   a desired mechanical transformation in an API, or could be used to tag
    294 			   data that has been converted (mechanically or by hand) according to a
    295 			   transliteration method.
    296 </p>
    297 <p>
    298 				In addition, many different conventions have arisen for how to transform text, even between the same languages and scripts.
    299                 For example, "Gaddafi" is commonly transliterated from Arabic to English as any of (G/Q/K/Kh)a(d/dh/dd/dhdh/th/zz)af(i/y).
    300 				Some examples of  standardized conventions used for transcribing or transliterating text include:
    301                 </p>
    302 <blockquote class="text"><dl>
    303 <dt>a.</dt>
    304 <dd>United Nations Group of Experts on Geographical Names (UNGEGN)
    305 </dd>
    306 <dt>b.</dt>
    307 <dd>US Library of Congress (LOC)
    308 </dd>
    309 <dt>c.</dt>
    310 <dd>US Board on Geographic Names (BGN)
    311 </dd>
    312 <dt>d.</dt>
    313 <dd>Korean Ministry of Culture, Sports and Tourism (MCST)
    314 </dd>
    315 <dt>e.</dt>
    316 <dd>International Organization for Standardization (ISO)
    317 </dd>
    318 </dl></blockquote><p>
    319 				
    320 </p>
    321 <p>The usage of this extension is not limited to formal transformations, 
    322 				and may include other instances where the content is in some other way influenced by the source. 
    323 				For example, this extension could be used to designate a request for a speech recognizer 
    324 				that is tailored specifically for 2nd-language speakers who are 
    325 				1st-language speakers of a particular language (e.g. a recognizer for "English spoken with a Chinese accent").
    326 </p>
    327 <a name="anchor2"></a><br /><hr />
    328 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    329 <a name="rfc.section.1.1"></a><h3>1.1.&nbsp;
    330 Requirements Language</h3>
    331 
    332 <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
    333 					NOT",
    334 					"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL"
    335 					in this
    336 					document are to be interpreted as described in RFC 2119.
    337 </p>
    338 <a name="anchor3"></a><br /><hr />
    339 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    340 <a name="rfc.section.2"></a><h3>2.&nbsp;
    341 BCP47 Required Information</h3>
    342 
    343 <a name="anchor4"></a><br /><hr />
    344 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    345 <a name="rfc.section.2.1"></a><h3>2.1.&nbsp;
    346 Overview</h3>
    347 
    348 <p>
    349 					Identification of transformed content can be done using the 't' extension
    350 					defined in this document.
    351 					This extension is formed by the 't'
    352 					singleton followed by a sequence of subtags that would form a
    353 					language tag as defined by
    354 					<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>.
    355 					This allows for the source language or script to be specified to
    356 					the degree of precision required.
    357 					There are restrictions on the
    358 					sequence of subtags.
    359 					They MUST form a regular, valid, canonical
    360 					language
    361 					tag, and MUST neither include extensions nor private use
    362 					sequences introduced by the
    363 					singleton
    364 					'x'.
    365 					Where only the script is
    366 					relevant (such as identifying
    367 					a
    368 					script-script
    369 					transliteration) then
    370 					'und' is used for the primary language subtag.
    371 				
    372 </p>
    373 <p>For example:
    374 </p><table class="full" align="center" border="0" cellpadding="2" cellspacing="2">
    375 <col align="left"><col align="left">
    376 <tr><th align="left">Language Tag</th><th align="left">Description</th></tr>
    377 <tr>
    378 <td align="left">ja-t-it</td>
    379 <td align="left">The content is Japanese, transformed from Italian.</td>
    380 </tr>
    381 <tr>
    382 <td align="left">ja-Kana-t-it</td>
    383 <td align="left">The content is Japanese Katakana, transformed from Italian.</td>
    384 </tr>
    385 <tr>
    386 <td align="left">und-Latn-t-und-cyrl</td>
    387 <td align="left">The content is in the Latin script, transformed from the Cyrillic
    388 						script.</td>
    389 </tr>
    390 </table>
    391 <br clear="all" />
    392 
    393 <p>
    394 					Note that the sequence of subtags governed by 't' cannot contain a
    395 					singleton (a single-character subtag), because that would start a
    396 					new extension.
    397 					For example, the tag "ja-t-i-ami"
    398 					does not indicate
    399 					that the source is in "i-ami", because "i-ami" is not a
    400 					regular
    401 					language tag in
    402 					<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>. That tag would express an empty 't' extension followed by an 'i'
    403 					extension.
    404 				
    405 </p>
    406 <p>The 't' extension is not intended for use in structured data that already provides 
    407 				separate source and target language identifiers.
    408 				For example, this is the case in localization interchange formats such as XLIFF.
    409 				In such cases, it would be inappropriate to use "ja-t-it" for the target language tag because the source language tag
    410 				"it" would already be present in the data. Instead one would use the language tag "ja".
    411 				
    412 </p>
    413 <p>As noted earlier, it is sometimes necessary to indicate additional
    414 					information about a transformation.
    415 					This additional information is optionally supplied after the source in a series of one or more fields,
    416 					where each field consists of a field separator subtag followed by one or more non-separator subtags.
    417 					Each field separator subtag consists of a single letter followed by a single digit.
    418 					
    419 </p>
    420 <p>A transformation mechanism is an optional field that indicates
    421 					the
    422 					specification used for the transformation, such as "UNGEGN" for
    423 					the
    424 					the United Nations Group of Experts on
    425 					Geographical
    426 					Names
    427 					transliterations and transcriptions. It uses the 'm0' field separator followed by certain subtags.
    428 				
    429 </p>
    430 <p>For example:
    431 </p><table class="full" align="center" border="0" cellpadding="2" cellspacing="2">
    432 <col align="left"><col align="left">
    433 <tr><th align="left">Language Tag</th><th align="left">Description</th></tr>
    434 <tr>
    435 <td align="left">und-Cyrl-t-und-latn-m0-ungegn-2007</td>
    436 <td align="left">the content is in Cyrillic, transformed from Latn, according
    437 						to a
    438 						UNGEGN specification dated 2007.</td>
    439 </tr>
    440 </table>
    441 <br clear="all" />
    442 
    443 <p>The field separator subtags such as 'm0' were chosen because they are
    444 					short, visually distinctive,
    445 					and cannot occur in a language subtag
    446 					(outside of an extension and
    447 					after 'x'),
    448 					thus eliminating the
    449 					potential for collision or confusion with the
    450 					source language tag.
    451 </p>
    452 <p>
    453 					The field subtags are defined by
    454 					<a href='http://unicode.org/reports/tr35/'>Section 3</a>
    455 					of
    456 					<a class='info' href='#UTS35'>Unicode
    457 						Technical Standard #35: Unicode Locale Data
    458 						Markup Language<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a> [UTS35] (LDML), the main specification for the Unicode
    459                     Common Locale Data Repository (CLDR) project.
    460                     As required by BCP 47, subtags follow the language tag ABNF and
    461 					other rules for the formation of language tags and subtags, are
    462 					restricted to the ASCII letters and digits, are not case sensitive,
    463 					and do not exceed eight characters in length.
    464 				
    465 </p>
    466 <p>
    467 					EDITORIAL NOTE: This new facility has been accepted by the Unicode
    468 				    CLDR committee for incorporation into the next versions of CLDR and LDML, parallel
    469 					with the structure of the 'u' extension
    470 					<a class='info' href='#RFC6067'>[RFC6067]<span> (</span><span class='info'>Davis, M., Ed., Phillips, A., Ed., and Y. Umaoka, Ed., &ldquo;BCP 47 Extension U,&rdquo; September&nbsp;2010.</span><span>)</span></a>,
    471 					for which it is already the maintaining authority.
    472 					The data and
    473 					specification will be available by the time this internet
    474 					draft has
    475 					been
    476 					approved.
    477 				
    478 </p>
    479 <p>The LDML specification is available over the Internet and at no cost, and
    480 					is
    481 					available via a royalty-free license at
    482 					http://unicode.org/copyright.html. LDML is versioned, and each
    483 					version of LDML is numbered, dated, and stable. Extension subtags,
    484 					once
    485 					defined by LDML, are never retracted or substantially changed in meaning. 
    486 </p>
    487 <p>The maintaining authority for the 't' extension is
    488 					the Unicode
    489 					Consortium:
    490 </p><table class="full" align="center" border="0" cellpadding="2" cellspacing="2">
    491 <col align="left"><col align="left">
    492 <tr><th align="left">Item</th><th align="left">Value</th></tr>
    493 <tr>
    494 <td align="left">Name</td>
    495 <td align="left">Unicode Consortium</td>
    496 </tr>
    497 <tr>
    498 <td align="left">Contact Email</td>
    499 <td align="left">cldr-contact (a] unicode.org</td>
    500 </tr>
    501 <tr>
    502 <td align="left">Discussion List Email</td>
    503 <td align="left">cldr-users (a] unicode.org</td>
    504 </tr>
    505 <tr>
    506 <td align="left">URL Location</td>
    507 <td align="left">cldr.unicode.org</td>
    508 </tr>
    509 <tr>
    510 <td align="left">Specification</td>
    511 <td align="left">Unicode Technical Standard #35 Unicode Locale Data Markup
    512 						Language (LDML), http://unicode.org/reports/tr35/</td>
    513 </tr>
    514 <tr>
    515 <td align="left">Section</td>
    516 <td align="left">Section 3 Unicode Language and Locale Identifiers</td>
    517 </tr>
    518 </table>
    519 <br clear="all" />
    520 
    521 <a name="structure"></a><br /><hr />
    522 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    523 <a name="rfc.section.2.2"></a><h3>2.2.&nbsp;
    524 Structure</h3>
    525 
    526 <p>The subtags in the 't' extension are of the following form:
    527 </p><div style='display: table; width: 0; margin-left: 3em; margin-right: auto'><pre>
    528 <dfn>t-ext</dfn>=    "<span class='str'>t</span>"                      <em>; Extension</em>
    529           (("<span class='str'>-</span>" <cite class='id'>lang</cite> <span class='rep'>*</span>("<span class='str'>-</span>" <cite class='id'>field</cite>)) <em>; Source + optional field(s)</em>
    530           / <span class='rep'>1*</span>("<span class='str'>-</span>" <cite class='id'>field</cite>))         <em>; Field(s) only (no source)</em>
    531 
    532 <dfn>lang</dfn>=     <cite class='id'>language</cite>                 <em>; BCP47, with restrictions</em>
    533           ["<span class='str'>-</span>" <cite class='id'>script</cite>]
    534           ["<span class='str'>-</span>" <cite class='id'>region</cite>]
    535           <span class='rep'>*</span>("<span class='str'>-</span>" <cite class='id'>variant</cite>)
    536 
    537 <dfn>field</dfn>=    <cite class='id'>sep</cite> <span class='rep'>1*</span>("<span class='str'>-</span>" <span class='rep'>3*8</span><cite class='id'>alphanum</cite>)  <em>; With restrictions</em>
    538 
    539 <dfn>sep</dfn>=      <cite class='key'>ALPHA</cite> <cite class='key'>DIGIT</cite>              <em>; Subtag separators</em>
    540 <dfn>alphanum</dfn>= <cite class='key'>ALPHA</cite> / <cite class='key'>DIGIT</cite>
    541 </pre></div>
    542 <p>where &lt;language>, &lt;script>, &lt;region>, and &lt;variant> rules are specified in <a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>,
    543                 &lt;ALPHA> and &lt;DIGIT> rules - in <a class='info' href='#RFC5234'>[RFC5234]<span> (</span><span class='info'>Crocker, Ed., &ldquo;Augmented BNF for Syntax Specifications: ABNF,&rdquo; 2008.</span><span>)</span></a>.
    544 </p>
    545 <p>Description and restrictions:
    546 					</p>
    547 <blockquote class="text"><dl>
    548 <dt>a.</dt>
    549 <dd>The 't' extension MUST have at least one subtag.
    550 </dd>
    551 <dt>b.</dt>
    552 <dd>
    553 							The 't' extension normally starts with a source language tag,
    554 							which MUST be a regular, canonical language tag as specified by
    555 							<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>.
    556 							Tags described by the 'irregular' production in BCP 47 MUST NOT
    557 							be
    558 							used to form the language tag.
    559 							The source language tag MAY be
    560 							omitted: some field values do not
    561 							require it.
    562 						
    563 </dd>
    564 <dt>c.</dt>
    565 <dd>There is optionally a sequence of fields, where each field has a
    566 							separator followed by a sequence of one or more subtags.
    567 							Two identical field
    568 							separators MUST NOT be present in the language tag.
    569 </dd>
    570 <dt>d.</dt>
    571 <dd>
    572 							The order of the fields in a 't' extension is not significant. The order of subtags within a field is significant.
    573 							(See
    574 							<a class='info' href='#canonicalization'>Section&nbsp;2.3<span> (</span><span class='info'>Canonicalization</span><span>)</span></a>
    575 							Canonicalization.)
    576 						
    577 </dd>
    578 <dt>e.</dt>
    579 <dd>
    580 		                    The 't' subtag fields are defined by 
    581 		                    <a href='http://unicode.org/reports/tr35/'>Section 3</a>
    582 		                    of
    583 		                    <a class='info' href='#UTS35'>Unicode
    584 		                        Technical Standard #35: Unicode Locale
    585 		                        Data Markup Language<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a> [UTS35].
    586 		                
    587 </dd>
    588 </dl></blockquote><p>
    589 				
    590 </p>
    591 <a name="canonicalization"></a><br /><hr />
    592 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    593 <a name="rfc.section.2.3"></a><h3>2.3.&nbsp;
    594 Canonicalization</h3>
    595 
    596 <p>As required by
    597                     <a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>, the use of uppercase or lowercase letters is not significant in
    598                     the subtags used in this extension. The canonical form for all
    599                     subtags in the extension is lowercase, with the fields ordered by
    600                     the separators, alphabetically.
    601                     The order of subtags within a field is significant, and MUST NOT be changed in the process of canonicalizing.
    602 </p>
    603 <a name="regform"></a><br /><hr />
    604 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    605 <a name="rfc.section.2.4"></a><h3>2.4.&nbsp;
    606 BCP47 Registration Form</h3>
    607 
    608 <p>
    609                     Per
    610                     <a class='info' href='#BCP47'>RFC 5646, Section 3.7<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a> [BCP47]:
    611                 
    612 </p><div style='display: table; width: 0; margin-left: 3em; margin-right: auto'><pre>
    613 %%
    614 Identifier: t
    615 Description: Specifying Transformed Content
    616 Comments: Subtags for the identification of content that has been
    617 transformed, including but not limited to:
    618 transliteration, transcription, and translation.
    619 Added: 2010-mm-dd
    620 RFC: [TBD]
    621 Authority: Unicode Consortium
    622 Contact_Email: cldr-contact (a] unicode.org
    623 Mailing_List: cldr-users (a] unicode.org
    624 URL: http://www.unicode.org/Public/cldr/latest/core.zip
    625 %%</pre></div>
    626 <a name="summary"></a><br /><hr />
    627 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    628 <a name="rfc.section.2.5"></a><h3>2.5.&nbsp;
    629 Field Definitions</h3>
    630 
    631 <p>Assignment of 't' field subtags is determined by the Unicode CLDR
    632                     Technical Committee, in accordance with the policies and procedures
    633                     in
    634                     <a href='http://www.unicode.org/consortium/tc-procedures.html'>http://www.unicode.org/consortium/tc-procedures.html</a>,
    635                     and subject to the Unicode Consortium Policies on
    636                     <a href='http://www.unicode.org/policies/policies.html'>http://www.unicode.org/policies/policies.html</a>.
    637 </p>
    638 <p>
    639                     Assignments that can be made by successive versions of
    640                     <a class='info' href='#UTS35'>LDML<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a> [UTS35]
    641                     by the Unicode Consortium without requiring a new RFC include:
    642                     </p>
    643 <ul class="text">
    644 <li>The
    645                     allocation of new field separator subtags for use after the 't' extension.
    646 </li>
    647 <li>The allocation of subtags valid after a field separator subtag.
    648 </li>
    649 <li>The addition of subtag aliases and descriptions. 
    650 </li>
    651 <li>The modification of subtag descriptions.
    652 </li>
    653 </ul><p>
    654                     Changes to the syntax or meaning of the 't' extension would require a new 
    655                     RFC that obsoletes this document; such an RFC would break stability, and
    656                     would thus be contrary to the policies of the Unicode Consortium.
    657                 
    658 </p>
    659 <p>
    660 				  At the time this document was published, one field was specified in 
    661 				  <a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>: the transform mechanism.
    662                   That field is summarized here:
    663 					</p>
    664 <blockquote class="text"><dl>
    665 <dt>a.</dt>
    666 <dd>
    667 							The transform mechanism consists of a sequence of
    668 							subtags
    669 							starting
    670 							with the 'm0' separator followed by one or more
    671 							mechanism subtags.
    672 							Each mechanism subtag has a length of 3 to 8
    673 							alphanumeric
    674 							characters.
    675 							The sequence as a whole provides an
    676 							identification of the
    677 							specification
    678 							for the transform,
    679 							such as the
    680 							mechanism subtag 'ungegn' in
    681 							"und-Cyrl-t-und-latn-m0-ungegn".
    682 							In
    683 							many cases, only one mechanism subtag is necessary, but
    684 							multiple
    685 							subtags MAY be defined in
    686 							<a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>
    687 							where necessary.
    688 						
    689 </dd>
    690 <dt>b.</dt>
    691 <dd>
    692 							Any purely numeric subtag is a representation of a date in the
    693 							Gregorian calendar.
    694 							It MAY occur in any mechanism field, but it SHOULD only be used where necessary.
    695 							If it does occur:
    696 							
    697 <ul class="text">
    698 <li>it MUST occur as the final subtag in the field
    699 </li>
    700 <li>it MUST NOT be the only subtag in the field
    701 </li>
    702 <li>it MUST only consist of a sequence of digits of the form YYYY,
    703 									YYYYMM, or YYYYMMDD
    704 </li>
    705 <li>it SHOULD be as short as possible
    706 </li>
    707 </ul>
    708 							Note: The format is related to that of <a class='info' href='#RFC3339'>[RFC3339]<span> (</span><span class='info'>Klyne, Ed. and Newman, Ed., &ldquo;Date and Time on the Internet: Timestamps,&rdquo; 2002.</span><span>)</span></a>, but is not the same.
    709 							The RFC 3339 full-date won't work because it uses hyphens. The offset ("Z") is not used
    710 							because the date is a publication date (aka 'floating date'). For more information, see
    711 							 Section 3.3, Floating Time in 
    712 							 <a class='info' href='#W3C-TimeZones'>[W3C&#8209;TimeZones]<span> (</span><span class='info'>Phillips, Ed., &ldquo;W3C Working Group Note: Working with Time Zones,&rdquo; July&nbsp;2011.</span><span>)</span></a>.
    713 							 
    714 </dd>
    715 <dt>c.</dt>
    716 <dd>
    717 							Examples:
    718 							
    719 <ul class="text">
    720 <li>20110623 represents June 23rd, 2011.
    721 </li>
    722 <li>There are 3 dated versions of the UNGEGN transliteration
    723                             specification for Hebrew to Latin. They can be represented by the following language tags:
    724                             
    725 <ul class="text">
    726 <li>und-Hebr-t-und-Latn-m0-ungegn-1972
    727 </li>
    728 <li>und-Hebr-t-und-Latn-m0-ungegn-1977
    729 </li>
    730 <li>und-Hebr-t-und-Latn-m0-ungegn-2007
    731 </li>
    732 </ul>
    733 							
    734 </li>
    735 <li>Suppose that the BGN transliteration
    736 							specification for Cyrillic to Latin had three versions,
    737 							dated
    738 							June 11th, 1999; Dec 30th, 1999; and May 1st, 2011.
    739 							In that
    740 							case, the corresponding first two DATE subtags would require
    741 							months
    742 							to be distinctive (199906 and 199912), but the last
    743 							subtag
    744 							would only
    745 							require the year (2011).
    746 </li>
    747 </ul>
    748 						
    749 </dd>
    750 <dt>d.</dt>
    751 <dd>
    752 							Some mechanisms may use a versioning system that is not
    753 							distinguished by date, or not by date alone.
    754 							In the latter case,
    755 							the version will be of a form specified by
    756 							<a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>
    757 							for that mechanism.
    758 							For example, if the mechanism XXX uses
    759 							versions of the form v21a,
    760 							then a tag could look like
    761 							"ja-t-it-m0-xxx-v21a". If there are
    762 							multiple subversions
    763 							distinguished by date,
    764 							then a tag could look like
    765 							"ja-t-it-m0-xxx-v21a-2007".
    766 						
    767 </dd>
    768 </dl></blockquote><p>
    769 					
    770 				
    771 </p>
    772 <p>A language tag with the 't' extension MAY be used to request a specific transform of content.
    773 				In such a case, the recipient SHOULD return content that corresponds
    774 				as closely as feasible to the requested transform, including the specification of the mechanism.
    775 				For example, if the request is ja-t-it-m0-xxx-v21a-2007,
    776 				and the recipient has content corresponding to both ja-t-it-m0-xxx-v21a and ja-t-it-m0-xxx-v21b-2009, then the v21a version would be preferred.
    777 				As is the case for language matching as discussed in <a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>,
    778 				different implementations MAY have different measures of "closeness".
    779 </p>
    780 <a name="registration"></a><br /><hr />
    781 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    782 <a name="rfc.section.2.6"></a><h3>2.6.&nbsp;
    783 Registration of Field Subtags</h3>
    784 
    785 <p>Registration of transform mechanisms is requested by filing a ticket at
    786 					<a href='http://cldr.unicode.org/'>cldr.unicode.org</a>.
    787 					The proposal in the ticket MUST contain the following information:
    788 </p><table class="full" align="center" border="0" cellpadding="2" cellspacing="2">
    789 <col align="left"><col align="left">
    790 <tr><th align="left">Item</th><th align="left">Description</th></tr>
    791 <tr>
    792 <td align="left">Subtag</td>
    793 <td align="left">The proposed mechanism subtag (or subtag sequence).</td>
    794 </tr>
    795 <tr>
    796 <td align="left">Description</td>
    797 <td align="left">A description of the proposed mechanism; that description MUST be sufficient to distinguish it from other mechanisms in use.</td>
    798 </tr>
    799 <tr>
    800 <td align="left">Version</td>
    801 <td align="left">If versioning for the mechanism is not done according to date, then a description of the versioning conventions used for the mechanism.</td>
    802 </tr>
    803 </table>
    804 <br clear="all" />
    805 
    806 <p>Proposals for clarifications of descriptions or additional aliases may also be requested by filing a ticket.
    807 </p>
    808 <p>The committee MAY define a template for submissions that requests more information,
    809                  if it is found that such information would be useful in evaluating proposals.
    810 </p>
    811 <a name="field-registration"></a><br /><hr />
    812 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    813 <a name="rfc.section.2.7"></a><h3>2.7.&nbsp;
    814 Registration of Additional Fields</h3>
    815 
    816 <p>In the event that it proves necessary to add an additional field (such as 'm2'),
    817                 it can be requested by filing a ticket at
    818                     <a href='http://cldr.unicode.org/'>cldr.unicode.org</a>.
    819                     The proposal in the ticket MUST contain a full description of the
    820                     proposed field semantics and subtag syntax,
    821                     and MUST be conform to the ABNF syntax for "field" presented in <a class='info' href='#structure'>Section&nbsp;2.2<span> (</span><span class='info'>Structure</span><span>)</span></a>.
    822 </p>
    823 <a name="committee-responses"></a><br /><hr />
    824 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    825 <a name="rfc.section.2.8"></a><h3>2.8.&nbsp;
    826 Committee Responses to Registration Proposals</h3>
    827 
    828 <p>The committee MUST post each proposal publicly within 2 weeks after reception,
    829                 to allow for comments. The committee must respond publicly to each proposal within 4 weeks after reception.
    830 </p>
    831 <p>The response MAY:
    832                     </p>
    833 <ul class="text">
    834 <li>request more information or clarification
    835 </li>
    836 <li>accept the proposal, optionally with modifications to the subtag or description
    837 </li>
    838 <li>reject the proposal, because of significant objections raised on the mailing list or 
    839                         due to problems with constraints in this document or in <a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>
    840 </li>
    841 </ul><p>
    842                 
    843 </p>
    844 <p>Accepted tickets result in a new entry in the machine-readable CLDR BCP47 data,
    845                 or in the case of a clarified description,
    846                 modifications to the description attribute value for an existing entry.
    847 </p>
    848 <a name="machine-readable"></a><br /><hr />
    849 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    850 <a name="rfc.section.2.9"></a><h3>2.9.&nbsp;
    851 Machine-Readable Data</h3>
    852 
    853 <p>
    854 					EDITORIAL NOTE: The following parallels the structure used for the
    855 					'u' extension
    856 					<a class='info' href='#RFC6067'>[RFC6067]<span> (</span><span class='info'>Davis, M., Ed., Phillips, A., Ed., and Y. Umaoka, Ed., &ldquo;BCP 47 Extension U,&rdquo; September&nbsp;2010.</span><span>)</span></a>,
    857 					for which the Unicode Consortium is the maintaining authority.
    858 					The
    859 					data and
    860 					specification will be available by the time this internet
    861 					draft has
    862 					been
    863 					approved. The description field is in the process of being added to CLDR.
    864 				
    865 </p>
    866 <p>
    867 					Beginning with CLDR version 1.7.2, machine-readable files are
    868 					available listing the data defined for BCP47 extensions for each
    869 					successive version of
    870 					<a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>. These releases are listed on
    871 					<a href='http://cldr.unicode.org/index/downloads'>http://cldr.unicode.org/index/downloads</a>.
    872 					Each release has an associated data directory of the form
    873 					"http://unicode.org/Public/cldr/<version>;", where
    874 					"&lt;version&gt;" is replaced by the release number. For example,
    875 					for version 1.7.2, the "core.zip" file is located at
    876 					<a href='http://unicode.org/Public/cldr/1.7.2/'>http://unicode.org/Public/cldr/1.7.2/core.zip</a>.
    877 					The most
    878                     recent version is always identified by the version "latest" and can
    879                     be accessed by the URL in
    880                     <a class='info' href='#regform'>Section&nbsp;2.4<span> (</span><span class='info'>BCP47 Registration Form</span><span>)</span></a>.
    881 </p>
    882 <p>Inside the "core.zip" file, the directory "common/bcp47" contains the
    883 					data files listing the valid attributes, keys, and types for each successive version of <a class='info' href='#UTS35'>[UTS35]<span> (</span><span class='info'>Davis, M., &ldquo;Unicode Technical Standard #35: Locale Data 						Markup Language (LDML),&rdquo; December&nbsp;2007.</span><span>)</span></a>.
    884 					Each data file list the keys and types relevant to that topic. For example, mechanism.xml contains the subtags (types) for the 't' mechanisms.
    885 </p>
    886 <p>The XML structure lists the keys, such as &lt;key extension="t" name="m0" alias="collation" description="Transliteration extension mechanism">, with subelements for the types, 
    887 					such as &lt;type name="ungegn" description="United Nations Group of Experts on Geographical Names"/>. The currently defined attributes for the mechanisms include:
    888 </p><table class="full" align="center" border="0" cellpadding="2" cellspacing="2">
    889 <col align="left"><col align="left"><col align="left">
    890 <tr><th align="left">Attribute</th><th align="left">Description</th><th align="left">Examples</th></tr>
    891 <tr>
    892 <td align="left">name</td>
    893 <td align="left">The name of the mechanism, limited to 3-8 characters (or sequences of them).</td>
    894 <td align="left">UNGEGN, ALALC</td>
    895 </tr>
    896 <tr>
    897 <td align="left">description</td>
    898 <td align="left">A description of the name, with all and only that information necessary to distinguish one name
    899                      from others with which it might be confused.  Descriptions are not intended to provide general background information.</td>
    900 <td align="left">United Nations Group of Experts on Geographical Names; American Library Association-Library of Congress</td>
    901 </tr>
    902 <tr>
    903 <td align="left">since</td>
    904 <td align="left">Indicates the first version of CLDR where the name appears. (Required for new items.)</td>
    905 <td align="left">1.9, 2.0.1</td>
    906 </tr>
    907 <tr>
    908 <td align="left">alias</td>
    909 <td align="left">Alternative name of the key or type, not limited in number of characters. Aliases are intended for backwards compatibility,
    910                     not to provide all possible alternate names or designations. (Optional)</td>
    911 <td align="left">&nbsp;</td>
    912 </tr>
    913 </table>
    914 <br clear="all" />
    915 
    916 <p>The file for the transform extension is "transform.xml".
    917 				The initial version of that file contains the following information.
    918 </p><div style='display: table; width: 0; margin-left: 3em; margin-right: auto'><pre>
    919 &lt;key extension="t" name="m0" description=
    920       "Transliteration extension mechanism"/&gt;
    921    &lt;type name="ungegn" description=
    922       "United Nations Group of Experts on Geographical Names"/&gt;
    923    &lt;type name="alaloc" description=
    924       "American Library Association-Library of Congress"/&gt;
    925    &lt;type name="bgn" description=
    926       "US Board on Geographic Names"/&gt;
    927    &lt;type name="mcst" description=
    928       "Korean Ministry of Culture, Sports and Tourism"/&gt;
    929    &lt;type name="iso" description=
    930       "International Organization for Standardization"/&gt;
    931    &lt;type name="din" description=
    932       "Deutsches Institut fuer Normung"/&gt;
    933    &lt;type name="gost" description=
    934       "Euro-Asian Council for Standardization, Metrology
    935        and Certification"/&gt;
    936 &lt;/key&gt;
    937 </pre></div>
    938 <p>
    939 					To get the version information in XML when working with the data
    940 					files, the XML parser must be validating. When the 'core.zip' file
    941 					is unzipped, the 'dtd' directory will be at the same level as the
    942 					'bcp47' directory; that is required for correct validation. For
    943 					each release after CLDR 1.8, types introduced in that release are
    944 					also marked in the data files by the XML attribute "since", such as
    945 					in the following example:
    946 					</p>
    947 <div style='display: table; width: 0; margin-left: 3em; margin-right: auto'><pre>&lt;type name="adp" since="1.9"/&gt;</pre></div><p>
    948 
    949 				
    950 </p>
    951 <p>
    952 					The data is also currently maintained in a source code repository,
    953 					with each release tagged, for viewing directly without unzipping.
    954 					For example, see:
    955 					</p>
    956 <ul class="text">
    957 <li>http://unicode.org/repos/cldr/tags/release-1-7-2/common/bcp47/
    958 </li>
    959 <li>http://unicode.org/repos/cldr/tags/release-1-8/common/bcp47/
    960 </li>
    961 </ul><p>
    962 				
    963 </p>
    964 <p>For more information, see 
    965 				<a href='http://cldr.unicode.org/index/bcp47-extension'>http://cldr.unicode.org/index/bcp47-extension</a>.
    966 </p>
    967 <a name="Acknowledgements"></a><br /><hr />
    968 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    969 <a name="rfc.section.3"></a><h3>3.&nbsp;
    970 Acknowledgements</h3>
    971 
    972 <p>Thanks to John Emmons and the rest of the Unicode
    973 				CLDR Technical
    974 				Committee for their work in developing the BCP 47 subtags
    975 				for LDML.
    976 </p>
    977 <a name="IANA"></a><br /><hr />
    978 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
    979 <a name="rfc.section.4"></a><h3>4.&nbsp;
    980 IANA Considerations</h3>
    981 
    982 <p>
    983 				This document will require IANA to insert the record of
    984 				<a class='info' href='#regform'>Section&nbsp;2.4<span> (</span><span class='info'>BCP47 Registration Form</span><span>)</span></a>
    985 				into the Language Extensions Registry, according to
    986 				Section 3.7,
    987 				Extensions and the Extensions Registry of "Tags for
    988 				Identifying
    989 				Languages" in
    990 				<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>. Per Section 5.2 of
    991 				<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>, there might be occasional (rare) requests by the Unicode
    992 				Consortium (the "Authority" listed in the record) for maintenance of
    993 				this record. Changes that can be submitted to IANA without the
    994 				publication of a new RFC are limited to modification of the
    995 				Comments, Contact_Email, Mailing_List, and URL fields. Any such
    996 				requested changes MUST use the domain 'unicode.org' in any new
    997 				addresses or URIs, MUST explicitly cite this document (so that IANA
    998 				can reference these requirements), and MUST originate from the
    999 				'unicode.org' domain. The domain or authority can only be changed
   1000 				via a new RFC.
   1001 			
   1002 </p>
   1003 <p>This document does not require IANA to create or maintain a new
   1004 				registry or otherwise impact IANA.
   1005 </p>
   1006 <a name="Security"></a><br /><hr />
   1007 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
   1008 <a name="rfc.section.5"></a><h3>5.&nbsp;
   1009 Security Considerations</h3>
   1010 
   1011 <p>
   1012 				The security considerations for this extension are the same as those
   1013 				for
   1014 				<a class='info' href='#BCP47'>[BCP47]<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a>. See
   1015 				<a class='info' href='#BCP47'>RFC 5646, Section 6, Security Considerations<span> (</span><span class='info'>Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</span><span>)</span></a> [BCP47].
   1016 			
   1017 </p>
   1018 <a name="rfc.references"></a><br /><hr />
   1019 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
   1020 <a name="rfc.section.6"></a><h3>6.&nbsp;
   1021 References</h3>
   1022 
   1023 <a name="rfc.references1"></a><br /><hr />
   1024 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
   1025 <h3>6.1.&nbsp;Normative References</h3>
   1026 <table width="99%" border="0">
   1027 <tr><td class="author-text" valign="top"><a name="BCP47">[BCP47]</a></td>
   1028 <td class="author-text">Davis, M., Ed. and A. Phillips, Ed., &ldquo;Tags for the Identification of Language (BCP47),&rdquo; September&nbsp;2009.</td></tr>
   1029 <tr><td class="author-text" valign="top"><a name="RFC5234">[RFC5234]</a></td>
   1030 <td class="author-text">Crocker, Ed., &ldquo;Augmented BNF for Syntax Specifications: ABNF,&rdquo; 2008.</td></tr>
   1031 <tr><td class="author-text" valign="top"><a name="RFC6067">[RFC6067]</a></td>
   1032 <td class="author-text">Davis, M., Ed., Phillips, A., Ed., and Y. Umaoka, Ed., &ldquo;BCP 47 Extension U,&rdquo; September&nbsp;2010.</td></tr>
   1033 <tr><td class="author-text" valign="top"><a name="UTS35">[UTS35]</a></td>
   1034 <td class="author-text">Davis, M., &ldquo;<a href="http://www.unicode.org/reports/tr35/">Unicode Technical Standard #35: Locale Data
   1035 						Markup Language (LDML)</a>,&rdquo; December&nbsp;2007.</td></tr>
   1036 </table>
   1037 
   1038 <a name="rfc.references2"></a><br /><hr />
   1039 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
   1040 <h3>6.2.&nbsp;Informative References</h3>
   1041 <table width="99%" border="0">
   1042 <tr><td class="author-text" valign="top"><a name="RFC3339">[RFC3339]</a></td>
   1043 <td class="author-text">Klyne, Ed. and Newman, Ed., &ldquo;Date and Time on the Internet: Timestamps,&rdquo; 2002.</td></tr>
   1044 <tr><td class="author-text" valign="top"><a name="W3C-TimeZones">[W3C-TimeZones]</a></td>
   1045 <td class="author-text">Phillips, Ed., &ldquo;<a href="http://www.w3.org/TR/2011/NOTE-timezone-20110705/">W3C Working Group Note: Working with Time Zones</a>,&rdquo; July&nbsp;2011.</td></tr>
   1046 <tr><td class="author-text" valign="top"><a name="ldml-registry">[ldml-registry]</a></td>
   1047 <td class="author-text">&ldquo;Registry for Common Locale Data Repository tag elements,&rdquo; September&nbsp;2009.</td></tr>
   1048 </table>
   1049 
   1050 <a name="rfc.authors"></a><br /><hr />
   1051 <table summary="layout" cellpadding="0" cellspacing="2" class="TOCbug" align="right"><tr><td class="TOCbug"><a href="#toc">&nbsp;TOC&nbsp;</a></td></tr></table>
   1052 <h3>Authors' Addresses</h3>
   1053 <table width="99%" border="0" cellpadding="0" cellspacing="0">
   1054 <tr><td class="author-text">&nbsp;</td>
   1055 <td class="author-text">Mark Davis</td></tr>
   1056 <tr><td class="author-text">&nbsp;</td>
   1057 <td class="author-text">Google</td></tr>
   1058 <tr><td class="author" align="right">Email:&nbsp;</td>
   1059 <td class="author-text"><a href="mailto:mark (a] macchiato.com">mark (a] macchiato.com</a></td></tr>
   1060 <tr cellpadding="3"><td>&nbsp;</td><td>&nbsp;</td></tr>
   1061 <tr><td class="author-text">&nbsp;</td>
   1062 <td class="author-text">Addison Phillips</td></tr>
   1063 <tr><td class="author-text">&nbsp;</td>
   1064 <td class="author-text">Lab126</td></tr>
   1065 <tr><td class="author" align="right">Email:&nbsp;</td>
   1066 <td class="author-text"><a href="mailto:addison (a] lab126.com">addison (a] lab126.com</a></td></tr>
   1067 <tr cellpadding="3"><td>&nbsp;</td><td>&nbsp;</td></tr>
   1068 <tr><td class="author-text">&nbsp;</td>
   1069 <td class="author-text">Yoshito Umaoka</td></tr>
   1070 <tr><td class="author-text">&nbsp;</td>
   1071 <td class="author-text">IBM</td></tr>
   1072 <tr><td class="author" align="right">Email:&nbsp;</td>
   1073 <td class="author-text"><a href="mailto:yoshito_umaoka (a] us.ibm.com">yoshito_umaoka (a] us.ibm.com</a></td></tr>
   1074 <tr cellpadding="3"><td>&nbsp;</td><td>&nbsp;</td></tr>
   1075 <tr><td class="author-text">&nbsp;</td>
   1076 <td class="author-text">Courtney Falk</td></tr>
   1077 <tr><td class="author-text">&nbsp;</td>
   1078 <td class="author-text">Infinite Automata</td></tr>
   1079 <tr><td class="author" align="right">Email:&nbsp;</td>
   1080 <td class="author-text"><a href="mailto:court (a] infiauto.com">court (a] infiauto.com</a></td></tr>
   1081 </table>
   1082 </body></html>
   1083