1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2010-2013, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.impl.locale; 11 12 import java.util.ArrayList; 13 import java.util.Collections; 14 import java.util.HashMap; 15 import java.util.List; 16 import java.util.Map; 17 import java.util.Set; 18 19 /** 20 * @hide Only a subset of ICU is exposed in Android 21 */ 22 public class LanguageTag { 23 private static final boolean JDKIMPL = false; 24 25 // 26 // static fields 27 // 28 public static final String SEP = "-"; 29 public static final String PRIVATEUSE = "x"; 30 public static String UNDETERMINED = "und"; 31 public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; 32 33 // 34 // Language subtag fields 35 // 36 private String _language = ""; // language subtag 37 private String _script = ""; // script subtag 38 private String _region = ""; // region subtag 39 private String _privateuse = ""; // privateuse 40 41 private List<String> _extlangs = Collections.emptyList(); // extlang subtags 42 private List<String> _variants = Collections.emptyList(); // variant subtags 43 private List<String> _extensions = Collections.emptyList(); // extensions 44 45 // Map contains grandfathered tags and its preferred mappings from 46 // http://www.ietf.org/rfc/rfc5646.txt 47 private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED = 48 new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>(); 49 50 static { 51 // grandfathered = irregular ; non-redundant tags registered 52 // / regular ; during the RFC 3066 era 53 // 54 // irregular = "en-GB-oed" ; irregular tags do not match 55 // / "i-ami" ; the 'langtag' production and 56 // / "i-bnn" ; would not otherwise be 57 // / "i-default" ; considered 'well-formed' 58 // / "i-enochian" ; These tags are all valid, 59 // / "i-hak" ; but most are deprecated 60 // / "i-klingon" ; in favor of more modern 61 // / "i-lux" ; subtags or subtag 62 // / "i-mingo" ; combination 63 // / "i-navajo" 64 // / "i-pwn" 65 // / "i-tao" 66 // / "i-tay" 67 // / "i-tsu" 68 // / "sgn-BE-FR" 69 // / "sgn-BE-NL" 70 // / "sgn-CH-DE" 71 // 72 // regular = "art-lojban" ; these tags match the 'langtag' 73 // / "cel-gaulish" ; production, but their subtags 74 // / "no-bok" ; are not extended language 75 // / "no-nyn" ; or variant subtags: their meaning 76 // / "zh-guoyu" ; is defined by their registration 77 // / "zh-hakka" ; and all of these are deprecated 78 // / "zh-min" ; in favor of a more modern 79 // / "zh-min-nan" ; subtag or sequence of subtags 80 // / "zh-xiang" 81 82 final String[][] entries = { 83 //{"tag", "preferred"}, 84 {"art-lojban", "jbo"}, 85 {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback 86 {"en-GB-oed", "en-GB-x-oed"}, // fallback 87 {"i-ami", "ami"}, 88 {"i-bnn", "bnn"}, 89 {"i-default", "en-x-i-default"}, // fallback 90 {"i-enochian", "und-x-i-enochian"}, // fallback 91 {"i-hak", "hak"}, 92 {"i-klingon", "tlh"}, 93 {"i-lux", "lb"}, 94 {"i-mingo", "see-x-i-mingo"}, // fallback 95 {"i-navajo", "nv"}, 96 {"i-pwn", "pwn"}, 97 {"i-tao", "tao"}, 98 {"i-tay", "tay"}, 99 {"i-tsu", "tsu"}, 100 {"no-bok", "nb"}, 101 {"no-nyn", "nn"}, 102 {"sgn-BE-FR", "sfb"}, 103 {"sgn-BE-NL", "vgt"}, 104 {"sgn-CH-DE", "sgg"}, 105 {"zh-guoyu", "cmn"}, 106 {"zh-hakka", "hak"}, 107 {"zh-min", "nan-x-zh-min"}, // fallback 108 {"zh-min-nan", "nan"}, 109 {"zh-xiang", "hsn"}, 110 }; 111 for (String[] e : entries) { 112 GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); 113 } 114 } 115 116 private LanguageTag() { 117 } 118 119 /* 120 * BNF in RFC5464 121 * 122 * Language-Tag = langtag ; normal language tags 123 * / privateuse ; private use tag 124 * / grandfathered ; grandfathered tags 125 * 126 * 127 * langtag = language 128 * ["-" script] 129 * ["-" region] 130 * *("-" variant) 131 * *("-" extension) 132 * ["-" privateuse] 133 * 134 * language = 2*3ALPHA ; shortest ISO 639 code 135 * ["-" extlang] ; sometimes followed by 136 * ; extended language subtags 137 * / 4ALPHA ; or reserved for future use 138 * / 5*8ALPHA ; or registered language subtag 139 * 140 * extlang = 3ALPHA ; selected ISO 639 codes 141 * *2("-" 3ALPHA) ; permanently reserved 142 * 143 * script = 4ALPHA ; ISO 15924 code 144 * 145 * region = 2ALPHA ; ISO 3166-1 code 146 * / 3DIGIT ; UN M.49 code 147 * 148 * variant = 5*8alphanum ; registered variants 149 * / (DIGIT 3alphanum) 150 * 151 * extension = singleton 1*("-" (2*8alphanum)) 152 * 153 * ; Single alphanumerics 154 * ; "x" reserved for private use 155 * singleton = DIGIT ; 0 - 9 156 * / %x41-57 ; A - W 157 * / %x59-5A ; Y - Z 158 * / %x61-77 ; a - w 159 * / %x79-7A ; y - z 160 * 161 * privateuse = "x" 1*("-" (1*8alphanum)) 162 * 163 */ 164 public static LanguageTag parse(String languageTag, ParseStatus sts) { 165 if (sts == null) { 166 sts = new ParseStatus(); 167 } else { 168 sts.reset(); 169 } 170 171 StringTokenIterator itr; 172 boolean isGrandfathered = false; 173 174 // Check if the tag is grandfathered 175 String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag)); 176 if (gfmap != null) { 177 // use preferred mapping 178 itr = new StringTokenIterator(gfmap[1], SEP); 179 isGrandfathered = true; 180 } else { 181 itr = new StringTokenIterator(languageTag, SEP); 182 } 183 184 LanguageTag tag = new LanguageTag(); 185 186 // langtag must start with either language or privateuse 187 if (tag.parseLanguage(itr, sts)) { 188 tag.parseExtlangs(itr, sts); 189 tag.parseScript(itr, sts); 190 tag.parseRegion(itr, sts); 191 tag.parseVariants(itr, sts); 192 tag.parseExtensions(itr, sts); 193 } 194 tag.parsePrivateuse(itr, sts); 195 196 if (isGrandfathered) { 197 // Grandfathered tag is replaced with a well-formed tag above. 198 // However, the parsed length must be the original tag length. 199 assert (itr.isDone()); 200 assert (!sts.isError()); 201 sts._parseLength = languageTag.length(); 202 } else if (!itr.isDone() && !sts.isError()) { 203 String s = itr.current(); 204 sts._errorIndex = itr.currentStart(); 205 if (s.length() == 0) { 206 sts._errorMsg = "Empty subtag"; 207 } else { 208 sts._errorMsg = "Invalid subtag: " + s; 209 } 210 } 211 212 return tag; 213 } 214 215 // 216 // Language subtag parsers 217 // 218 219 private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) { 220 if (itr.isDone() || sts.isError()) { 221 return false; 222 } 223 224 boolean found = false; 225 226 String s = itr.current(); 227 if (isLanguage(s)) { 228 found = true; 229 _language = s; 230 sts._parseLength = itr.currentEnd(); 231 itr.next(); 232 } 233 234 return found; 235 } 236 237 private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { 238 if (itr.isDone() || sts.isError()) { 239 return false; 240 } 241 242 boolean found = false; 243 244 while (!itr.isDone()) { 245 String s = itr.current(); 246 if (!isExtlang(s)) { 247 break; 248 } 249 found = true; 250 if (_extlangs.isEmpty()) { 251 _extlangs = new ArrayList<String>(3); 252 } 253 _extlangs.add(s); 254 sts._parseLength = itr.currentEnd(); 255 itr.next(); 256 257 if (_extlangs.size() == 3) { 258 // Maximum 3 extlangs 259 break; 260 } 261 } 262 263 return found; 264 } 265 266 private boolean parseScript(StringTokenIterator itr, ParseStatus sts) { 267 if (itr.isDone() || sts.isError()) { 268 return false; 269 } 270 271 boolean found = false; 272 273 String s = itr.current(); 274 if (isScript(s)) { 275 found = true; 276 _script = s; 277 sts._parseLength = itr.currentEnd(); 278 itr.next(); 279 } 280 281 return found; 282 } 283 284 private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { 285 if (itr.isDone() || sts.isError()) { 286 return false; 287 } 288 289 boolean found = false; 290 291 String s = itr.current(); 292 if (isRegion(s)) { 293 found = true; 294 _region = s; 295 sts._parseLength = itr.currentEnd(); 296 itr.next(); 297 } 298 299 return found; 300 } 301 302 private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { 303 if (itr.isDone() || sts.isError()) { 304 return false; 305 } 306 307 boolean found = false; 308 309 while (!itr.isDone()) { 310 String s = itr.current(); 311 if (!isVariant(s)) { 312 break; 313 } 314 found = true; 315 if (_variants.isEmpty()) { 316 _variants = new ArrayList<String>(3); 317 } 318 _variants.add(s); 319 sts._parseLength = itr.currentEnd(); 320 itr.next(); 321 } 322 323 return found; 324 } 325 326 private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { 327 if (itr.isDone() || sts.isError()) { 328 return false; 329 } 330 331 boolean found = false; 332 333 while (!itr.isDone()) { 334 String s = itr.current(); 335 if (isExtensionSingleton(s)) { 336 int start = itr.currentStart(); 337 String singleton = s; 338 StringBuilder sb = new StringBuilder(singleton); 339 340 itr.next(); 341 while (!itr.isDone()) { 342 s = itr.current(); 343 if (isExtensionSubtag(s)) { 344 sb.append(SEP).append(s); 345 sts._parseLength = itr.currentEnd(); 346 } else { 347 break; 348 } 349 itr.next(); 350 } 351 352 if (sts._parseLength <= start) { 353 sts._errorIndex = start; 354 sts._errorMsg = "Incomplete extension '" + singleton + "'"; 355 break; 356 } 357 358 if (_extensions.size() == 0) { 359 _extensions = new ArrayList<String>(4); 360 } 361 _extensions.add(sb.toString()); 362 found = true; 363 } else { 364 break; 365 } 366 } 367 return found; 368 } 369 370 private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { 371 if (itr.isDone() || sts.isError()) { 372 return false; 373 } 374 375 boolean found = false; 376 377 String s = itr.current(); 378 if (isPrivateusePrefix(s)) { 379 int start = itr.currentStart(); 380 StringBuilder sb = new StringBuilder(s); 381 382 itr.next(); 383 while (!itr.isDone()) { 384 s = itr.current(); 385 if (!isPrivateuseSubtag(s)) { 386 break; 387 } 388 sb.append(SEP).append(s); 389 sts._parseLength = itr.currentEnd(); 390 391 itr.next(); 392 } 393 394 if (sts._parseLength <= start) { 395 // need at least 1 private subtag 396 sts._errorIndex = start; 397 sts._errorMsg = "Incomplete privateuse"; 398 } else { 399 _privateuse = sb.toString(); 400 found = true; 401 } 402 } 403 404 return found; 405 } 406 407 public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { 408 LanguageTag tag = new LanguageTag(); 409 410 String language = baseLocale.getLanguage(); 411 String script = baseLocale.getScript(); 412 String region = baseLocale.getRegion(); 413 String variant = baseLocale.getVariant(); 414 415 boolean hasSubtag = false; 416 417 String privuseVar = null; // store ill-formed variant subtags 418 419 if (language.length() > 0 && isLanguage(language)) { 420 // Convert a deprecated language code used by Java to 421 // a new code 422 if (language.equals("iw")) { 423 language = "he"; 424 } else if (language.equals("ji")) { 425 language = "yi"; 426 } else if (language.equals("in")) { 427 language = "id"; 428 } 429 tag._language = language; 430 } 431 432 if (script.length() > 0 && isScript(script)) { 433 tag._script = canonicalizeScript(script); 434 hasSubtag = true; 435 } 436 437 if (region.length() > 0 && isRegion(region)) { 438 tag._region = canonicalizeRegion(region); 439 hasSubtag = true; 440 } 441 442 if (JDKIMPL) { 443 // Special handling for no_NO_NY - use nn_NO for language tag 444 if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) { 445 tag._language = "nn"; 446 variant = ""; 447 } 448 } 449 450 if (variant.length() > 0) { 451 List<String> variants = null; 452 StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); 453 while (!varitr.isDone()) { 454 String var = varitr.current(); 455 if (!isVariant(var)) { 456 break; 457 } 458 if (variants == null) { 459 variants = new ArrayList<String>(); 460 } 461 if (JDKIMPL) { 462 variants.add(var); // Do not canonicalize! 463 } else { 464 variants.add(canonicalizeVariant(var)); 465 } 466 varitr.next(); 467 } 468 if (variants != null) { 469 tag._variants = variants; 470 hasSubtag = true; 471 } 472 if (!varitr.isDone()) { 473 // ill-formed variant subtags 474 StringBuilder buf = new StringBuilder(); 475 while (!varitr.isDone()) { 476 String prvv = varitr.current(); 477 if (!isPrivateuseSubtag(prvv)) { 478 // cannot use private use subtag - truncated 479 break; 480 } 481 if (buf.length() > 0) { 482 buf.append(SEP); 483 } 484 if (!JDKIMPL) { 485 prvv = AsciiUtil.toLowerString(prvv); 486 } 487 buf.append(prvv); 488 varitr.next(); 489 } 490 if (buf.length() > 0) { 491 privuseVar = buf.toString(); 492 } 493 } 494 } 495 496 List<String> extensions = null; 497 String privateuse = null; 498 499 Set<Character> locextKeys = localeExtensions.getKeys(); 500 for (Character locextKey : locextKeys) { 501 Extension ext = localeExtensions.getExtension(locextKey); 502 if (isPrivateusePrefixChar(locextKey.charValue())) { 503 privateuse = ext.getValue(); 504 } else { 505 if (extensions == null) { 506 extensions = new ArrayList<String>(); 507 } 508 extensions.add(locextKey.toString() + SEP + ext.getValue()); 509 } 510 } 511 512 if (extensions != null) { 513 tag._extensions = extensions; 514 hasSubtag = true; 515 } 516 517 // append ill-formed variant subtags to private use 518 if (privuseVar != null) { 519 if (privateuse == null) { 520 privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; 521 } else { 522 privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); 523 } 524 } 525 526 if (privateuse != null) { 527 tag._privateuse = privateuse; 528 } 529 530 if (tag._language.length() == 0 && (hasSubtag || privateuse == null)) { 531 // use lang "und" when 1) no language is available AND 532 // 2) any of other subtags other than private use are available or 533 // no private use tag is available 534 tag._language = UNDETERMINED; 535 } 536 537 return tag; 538 } 539 540 // 541 // Getter methods for language subtag fields 542 // 543 544 public String getLanguage() { 545 return _language; 546 } 547 548 public List<String> getExtlangs() { 549 return Collections.unmodifiableList(_extlangs); 550 } 551 552 public String getScript() { 553 return _script; 554 } 555 556 public String getRegion() { 557 return _region; 558 } 559 560 public List<String> getVariants() { 561 return Collections.unmodifiableList(_variants); 562 } 563 564 public List<String> getExtensions() { 565 return Collections.unmodifiableList(_extensions); 566 } 567 568 public String getPrivateuse() { 569 return _privateuse; 570 } 571 572 // 573 // Language subtag syntax checking methods 574 // 575 576 public static boolean isLanguage(String s) { 577 // language = 2*3ALPHA ; shortest ISO 639 code 578 // ["-" extlang] ; sometimes followed by 579 // ; extended language subtags 580 // / 4ALPHA ; or reserved for future use 581 // / 5*8ALPHA ; or registered language subtag 582 return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); 583 } 584 585 public static boolean isExtlang(String s) { 586 // extlang = 3ALPHA ; selected ISO 639 codes 587 // *2("-" 3ALPHA) ; permanently reserved 588 return (s.length() == 3) && AsciiUtil.isAlphaString(s); 589 } 590 591 public static boolean isScript(String s) { 592 // script = 4ALPHA ; ISO 15924 code 593 return (s.length() == 4) && AsciiUtil.isAlphaString(s); 594 } 595 596 public static boolean isRegion(String s) { 597 // region = 2ALPHA ; ISO 3166-1 code 598 // / 3DIGIT ; UN M.49 code 599 return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) 600 || ((s.length() == 3) && AsciiUtil.isNumericString(s)); 601 } 602 603 public static boolean isVariant(String s) { 604 // variant = 5*8alphanum ; registered variants 605 // / (DIGIT 3alphanum) 606 int len = s.length(); 607 if (len >= 5 && len <= 8) { 608 return AsciiUtil.isAlphaNumericString(s); 609 } 610 if (len == 4) { 611 return AsciiUtil.isNumeric(s.charAt(0)) 612 && AsciiUtil.isAlphaNumeric(s.charAt(1)) 613 && AsciiUtil.isAlphaNumeric(s.charAt(2)) 614 && AsciiUtil.isAlphaNumeric(s.charAt(3)); 615 } 616 return false; 617 } 618 619 public static boolean isExtensionSingleton(String s) { 620 // singleton = DIGIT ; 0 - 9 621 // / %x41-57 ; A - W 622 // / %x59-5A ; Y - Z 623 // / %x61-77 ; a - w 624 // / %x79-7A ; y - z 625 626 return (s.length() == 1) 627 && AsciiUtil.isAlphaString(s) 628 && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); 629 } 630 631 public static boolean isExtensionSingletonChar(char c) { 632 return isExtensionSingleton(String.valueOf(c)); 633 } 634 635 public static boolean isExtensionSubtag(String s) { 636 // extension = singleton 1*("-" (2*8alphanum)) 637 return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); 638 } 639 640 public static boolean isPrivateusePrefix(String s) { 641 // privateuse = "x" 1*("-" (1*8alphanum)) 642 return (s.length() == 1) 643 && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); 644 } 645 646 public static boolean isPrivateusePrefixChar(char c) { 647 return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c))); 648 } 649 650 public static boolean isPrivateuseSubtag(String s) { 651 // privateuse = "x" 1*("-" (1*8alphanum)) 652 return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); 653 } 654 655 // 656 // Language subtag canonicalization methods 657 // 658 659 public static String canonicalizeLanguage(String s) { 660 return AsciiUtil.toLowerString(s); 661 } 662 663 public static String canonicalizeExtlang(String s) { 664 return AsciiUtil.toLowerString(s); 665 } 666 667 public static String canonicalizeScript(String s) { 668 return AsciiUtil.toTitleString(s); 669 } 670 671 public static String canonicalizeRegion(String s) { 672 return AsciiUtil.toUpperString(s); 673 } 674 675 public static String canonicalizeVariant(String s) { 676 return AsciiUtil.toLowerString(s); 677 } 678 679 public static String canonicalizeExtension(String s) { 680 return AsciiUtil.toLowerString(s); 681 } 682 683 public static String canonicalizeExtensionSingleton(String s) { 684 return AsciiUtil.toLowerString(s); 685 } 686 687 public static String canonicalizeExtensionSubtag(String s) { 688 return AsciiUtil.toLowerString(s); 689 } 690 691 public static String canonicalizePrivateuse(String s) { 692 return AsciiUtil.toLowerString(s); 693 } 694 695 public static String canonicalizePrivateuseSubtag(String s) { 696 return AsciiUtil.toLowerString(s); 697 } 698 699 @Override 700 public String toString() { 701 StringBuilder sb = new StringBuilder(); 702 703 if (_language.length() > 0) { 704 sb.append(_language); 705 706 for (String extlang : _extlangs) { 707 sb.append(SEP).append(extlang); 708 } 709 710 if (_script.length() > 0) { 711 sb.append(SEP).append(_script); 712 } 713 714 if (_region.length() > 0) { 715 sb.append(SEP).append(_region); 716 } 717 718 for (String variant : _variants) { 719 sb.append(SEP).append(variant); 720 } 721 722 for (String extension : _extensions) { 723 sb.append(SEP).append(extension); 724 } 725 } 726 if (_privateuse.length() > 0) { 727 if (sb.length() > 0) { 728 sb.append(SEP); 729 } 730 sb.append(_privateuse); 731 } 732 733 return sb.toString(); 734 } 735 } 736