1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2009-2011, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.impl.locale; 10 11 import java.util.ArrayList; 12 import java.util.HashMap; 13 import java.util.HashSet; 14 import java.util.List; 15 import java.util.Set; 16 17 public final class InternalLocaleBuilder { 18 19 private static final boolean JDKIMPL = false; 20 21 private String _language = ""; 22 private String _script = ""; 23 private String _region = ""; 24 private String _variant = ""; 25 26 private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0)); 27 28 private HashMap<CaseInsensitiveChar, String> _extensions; 29 private HashSet<CaseInsensitiveString> _uattributes; 30 private HashMap<CaseInsensitiveString, String> _ukeywords; 31 32 33 public InternalLocaleBuilder() { 34 } 35 36 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { 37 if (language == null || language.length() == 0) { 38 _language = ""; 39 } else { 40 if (!LanguageTag.isLanguage(language)) { 41 throw new LocaleSyntaxException("Ill-formed language: " + language, 0); 42 } 43 _language = language; 44 } 45 return this; 46 } 47 48 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { 49 if (script == null || script.length() == 0) { 50 _script = ""; 51 } else { 52 if (!LanguageTag.isScript(script)) { 53 throw new LocaleSyntaxException("Ill-formed script: " + script, 0); 54 } 55 _script = script; 56 } 57 return this; 58 } 59 60 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { 61 if (region == null || region.length() == 0) { 62 _region = ""; 63 } else { 64 if (!LanguageTag.isRegion(region)) { 65 throw new LocaleSyntaxException("Ill-formed region: " + region, 0); 66 } 67 _region = region; 68 } 69 return this; 70 } 71 72 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { 73 if (variant == null || variant.length() == 0) { 74 _variant = ""; 75 } else { 76 // normalize separators to "_" 77 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 78 int errIdx = checkVariants(var, BaseLocale.SEP); 79 if (errIdx != -1) { 80 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 81 } 82 _variant = var; 83 } 84 return this; 85 } 86 87 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 88 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 89 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 90 } 91 // Use case insensitive string to prevent duplication 92 if (_uattributes == null) { 93 _uattributes = new HashSet<CaseInsensitiveString>(4); 94 } 95 _uattributes.add(new CaseInsensitiveString(attribute)); 96 return this; 97 } 98 99 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 100 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 101 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 102 } 103 if (_uattributes != null) { 104 _uattributes.remove(new CaseInsensitiveString(attribute)); 105 } 106 return this; 107 } 108 109 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { 110 if (!UnicodeLocaleExtension.isKey(key)) { 111 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); 112 } 113 114 CaseInsensitiveString cikey = new CaseInsensitiveString(key); 115 if (type == null) { 116 if (_ukeywords != null) { 117 // null type is used for remove the key 118 _ukeywords.remove(cikey); 119 } 120 } else { 121 if (type.length() != 0) { 122 // normalize separator to "-" 123 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 124 // validate 125 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); 126 while (!itr.isDone()) { 127 String s = itr.current(); 128 if (!UnicodeLocaleExtension.isTypeSubtag(s)) { 129 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart()); 130 } 131 itr.next(); 132 } 133 } 134 if (_ukeywords == null) { 135 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 136 } 137 _ukeywords.put(cikey, type); 138 } 139 return this; 140 } 141 142 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { 143 // validate key 144 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); 145 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { 146 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); 147 } 148 149 boolean remove = (value == null || value.length() == 0); 150 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); 151 152 if (remove) { 153 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 154 // clear entire Unicode locale extension 155 if (_uattributes != null) { 156 _uattributes.clear(); 157 } 158 if (_ukeywords != null) { 159 _ukeywords.clear(); 160 } 161 } else { 162 if (_extensions != null && _extensions.containsKey(key)) { 163 _extensions.remove(key); 164 } 165 } 166 } else { 167 // validate value 168 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 169 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); 170 while (!itr.isDone()) { 171 String s = itr.current(); 172 boolean validSubtag; 173 if (isBcpPrivateuse) { 174 validSubtag = LanguageTag.isPrivateuseSubtag(s); 175 } else { 176 validSubtag = LanguageTag.isExtensionSubtag(s); 177 } 178 if (!validSubtag) { 179 throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart()); 180 } 181 itr.next(); 182 } 183 184 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 185 setUnicodeLocaleExtension(val); 186 } else { 187 if (_extensions == null) { 188 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 189 } 190 _extensions.put(key, val); 191 } 192 } 193 return this; 194 } 195 196 /* 197 * Set extension/private subtags in a single string representation 198 */ 199 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { 200 if (subtags == null || subtags.length() == 0) { 201 clearExtensions(); 202 return this; 203 } 204 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 205 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 206 207 List<String> extensions = null; 208 String privateuse = null; 209 210 int parsed = 0; 211 int start; 212 213 // Make a list of extension subtags 214 while (!itr.isDone()) { 215 String s = itr.current(); 216 if (LanguageTag.isExtensionSingleton(s)) { 217 start = itr.currentStart(); 218 String singleton = s; 219 StringBuilder sb = new StringBuilder(singleton); 220 221 itr.next(); 222 while (!itr.isDone()) { 223 s = itr.current(); 224 if (LanguageTag.isExtensionSubtag(s)) { 225 sb.append(LanguageTag.SEP).append(s); 226 parsed = itr.currentEnd(); 227 } else { 228 break; 229 } 230 itr.next(); 231 } 232 233 if (parsed < start) { 234 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start); 235 } 236 237 if (extensions == null) { 238 extensions = new ArrayList<String>(4); 239 } 240 extensions.add(sb.toString()); 241 } else { 242 break; 243 } 244 } 245 if (!itr.isDone()) { 246 String s = itr.current(); 247 if (LanguageTag.isPrivateusePrefix(s)) { 248 start = itr.currentStart(); 249 StringBuilder sb = new StringBuilder(s); 250 251 itr.next(); 252 while (!itr.isDone()) { 253 s = itr.current(); 254 if (!LanguageTag.isPrivateuseSubtag(s)) { 255 break; 256 } 257 sb.append(LanguageTag.SEP).append(s); 258 parsed = itr.currentEnd(); 259 260 itr.next(); 261 } 262 if (parsed <= start) { 263 throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start); 264 } else { 265 privateuse = sb.toString(); 266 } 267 } 268 } 269 270 if (!itr.isDone()) { 271 throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart()); 272 } 273 274 return setExtensions(extensions, privateuse); 275 } 276 277 /* 278 * Set a list of BCP47 extensions and private use subtags 279 * BCP47 extensions are already validated and well-formed, but may contain duplicates 280 */ 281 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) { 282 clearExtensions(); 283 284 if (bcpExtensions != null && bcpExtensions.size() > 0) { 285 HashSet<CaseInsensitiveChar> processedExtensions = new HashSet<CaseInsensitiveChar>(bcpExtensions.size()); 286 for (String bcpExt : bcpExtensions) { 287 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0)); 288 // ignore duplicates 289 if (!processedExtensions.contains(key)) { 290 // each extension string contains singleton, e.g. "a-abc-def" 291 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 292 setUnicodeLocaleExtension(bcpExt.substring(2)); 293 } else { 294 if (_extensions == null) { 295 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 296 } 297 _extensions.put(key, bcpExt.substring(2)); 298 } 299 } 300 } 301 } 302 if (privateuse != null && privateuse.length() > 0) { 303 // privateuse string contains prefix, e.g. "x-abc-def" 304 if (_extensions == null) { 305 _extensions = new HashMap<CaseInsensitiveChar, String>(1); 306 } 307 _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2)); 308 } 309 310 return this; 311 } 312 313 /* 314 * Reset Builder's internal state with the given language tag 315 */ 316 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { 317 clear(); 318 if (langtag.getExtlangs().size() > 0) { 319 _language = langtag.getExtlangs().get(0); 320 } else { 321 String language = langtag.getLanguage(); 322 if (!language.equals(LanguageTag.UNDETERMINED)) { 323 _language = language; 324 } 325 } 326 _script = langtag.getScript(); 327 _region = langtag.getRegion(); 328 329 List<String> bcpVariants = langtag.getVariants(); 330 if (bcpVariants.size() > 0) { 331 StringBuilder var = new StringBuilder(bcpVariants.get(0)); 332 for (int i = 1; i < bcpVariants.size(); i++) { 333 var.append(BaseLocale.SEP).append(bcpVariants.get(i)); 334 } 335 _variant = var.toString(); 336 } 337 338 setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); 339 340 return this; 341 } 342 343 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { 344 String language = base.getLanguage(); 345 String script = base.getScript(); 346 String region = base.getRegion(); 347 String variant = base.getVariant(); 348 349 if (JDKIMPL) { 350 // Special backward compatibility support 351 352 // Exception 1 - ja_JP_JP 353 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { 354 // When locale ja_JP_JP is created, ca-japanese is always there. 355 // The builder ignores the variant "JP" 356 assert("japanese".equals(extensions.getUnicodeLocaleType("ca"))); 357 variant = ""; 358 } 359 // Exception 2 - th_TH_TH 360 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { 361 // When locale th_TH_TH is created, nu-thai is always there. 362 // The builder ignores the variant "TH" 363 assert("thai".equals(extensions.getUnicodeLocaleType("nu"))); 364 variant = ""; 365 } 366 // Exception 3 - no_NO_NY 367 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { 368 // no_NO_NY is a valid locale and used by Java 6 or older versions. 369 // The build ignores the variant "NY" and change the language to "nn". 370 language = "nn"; 371 variant = ""; 372 } 373 } 374 375 // Validate base locale fields before updating internal state. 376 // LocaleExtensions always store validated/canonicalized values, 377 // so no checks are necessary. 378 if (language.length() > 0 && !LanguageTag.isLanguage(language)) { 379 throw new LocaleSyntaxException("Ill-formed language: " + language); 380 } 381 382 if (script.length() > 0 && !LanguageTag.isScript(script)) { 383 throw new LocaleSyntaxException("Ill-formed script: " + script); 384 } 385 386 if (region.length() > 0 && !LanguageTag.isRegion(region)) { 387 throw new LocaleSyntaxException("Ill-formed region: " + region); 388 } 389 390 if (variant.length() > 0) { 391 int errIdx = checkVariants(variant, BaseLocale.SEP); 392 if (errIdx != -1) { 393 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 394 } 395 } 396 397 // The input locale is validated at this point. 398 // Now, updating builder's internal fields. 399 _language = language; 400 _script = script; 401 _region = region; 402 _variant = variant; 403 clearExtensions(); 404 405 Set<Character> extKeys = (extensions == null) ? null : extensions.getKeys(); 406 if (extKeys != null) { 407 // map extensions back to builder's internal format 408 for (Character key : extKeys) { 409 Extension e = extensions.getExtension(key); 410 if (e instanceof UnicodeLocaleExtension) { 411 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; 412 for (String uatr : ue.getUnicodeLocaleAttributes()) { 413 if (_uattributes == null) { 414 _uattributes = new HashSet<CaseInsensitiveString>(4); 415 } 416 _uattributes.add(new CaseInsensitiveString(uatr)); 417 } 418 for (String ukey : ue.getUnicodeLocaleKeys()) { 419 if (_ukeywords == null) { 420 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 421 } 422 _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); 423 } 424 } else { 425 if (_extensions == null) { 426 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 427 } 428 _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue()); 429 } 430 } 431 } 432 return this; 433 } 434 435 public InternalLocaleBuilder clear() { 436 _language = ""; 437 _script = ""; 438 _region = ""; 439 _variant = ""; 440 clearExtensions(); 441 return this; 442 } 443 444 public InternalLocaleBuilder clearExtensions() { 445 if (_extensions != null) { 446 _extensions.clear(); 447 } 448 if (_uattributes != null) { 449 _uattributes.clear(); 450 } 451 if (_ukeywords != null) { 452 _ukeywords.clear(); 453 } 454 return this; 455 } 456 457 public BaseLocale getBaseLocale() { 458 String language = _language; 459 String script = _script; 460 String region = _region; 461 String variant = _variant; 462 463 // Special private use subtag sequence identified by "lvariant" will be 464 // interpreted as Java variant. 465 if (_extensions != null) { 466 String privuse = _extensions.get(PRIVUSE_KEY); 467 if (privuse != null) { 468 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); 469 boolean sawPrefix = false; 470 int privVarStart = -1; 471 while (!itr.isDone()) { 472 if (sawPrefix) { 473 privVarStart = itr.currentStart(); 474 break; 475 } 476 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 477 sawPrefix = true; 478 } 479 itr.next(); 480 } 481 if (privVarStart != -1) { 482 StringBuilder sb = new StringBuilder(variant); 483 if (sb.length() != 0) { 484 sb.append(BaseLocale.SEP); 485 } 486 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP)); 487 variant = sb.toString(); 488 } 489 } 490 } 491 492 return BaseLocale.getInstance(language, script, region, variant); 493 } 494 495 public LocaleExtensions getLocaleExtensions() { 496 if ((_extensions == null || _extensions.size() == 0) 497 && (_uattributes == null || _uattributes.size() == 0) 498 && (_ukeywords == null || _ukeywords.size() == 0)) { 499 return LocaleExtensions.EMPTY_EXTENSIONS; 500 } 501 502 return new LocaleExtensions(_extensions, _uattributes, _ukeywords); 503 } 504 505 /* 506 * Remove special private use subtag sequence identified by "lvariant" 507 * and return the rest. Only used by LocaleExtensions 508 */ 509 static String removePrivateuseVariant(String privuseVal) { 510 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); 511 512 // Note: privateuse value "abc-lvariant" is unchanged 513 // because no subtags after "lvariant". 514 515 int prefixStart = -1; 516 boolean sawPrivuseVar = false; 517 while (!itr.isDone()) { 518 if (prefixStart != -1) { 519 // Note: privateuse value "abc-lvariant" is unchanged 520 // because no subtags after "lvariant". 521 sawPrivuseVar = true; 522 break; 523 } 524 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 525 prefixStart = itr.currentStart(); 526 } 527 itr.next(); 528 } 529 if (!sawPrivuseVar) { 530 return privuseVal; 531 } 532 533 assert(prefixStart == 0 || prefixStart > 1); 534 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); 535 } 536 537 /* 538 * Check if the given variant subtags separated by the given 539 * separator(s) are valid 540 */ 541 private int checkVariants(String variants, String sep) { 542 StringTokenIterator itr = new StringTokenIterator(variants, sep); 543 while (!itr.isDone()) { 544 String s = itr.current(); 545 if (!LanguageTag.isVariant(s)) { 546 return itr.currentStart(); 547 } 548 itr.next(); 549 } 550 return -1; 551 } 552 553 /* 554 * Private methods parsing Unicode Locale Extension subtags. 555 * Duplicated attributes/keywords will be ignored. 556 * The input must be a valid extension subtags (excluding singleton). 557 */ 558 private void setUnicodeLocaleExtension(String subtags) { 559 // wipe out existing attributes/keywords 560 if (_uattributes != null) { 561 _uattributes.clear(); 562 } 563 if (_ukeywords != null) { 564 _ukeywords.clear(); 565 } 566 567 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 568 569 // parse attributes 570 while (!itr.isDone()) { 571 if (!UnicodeLocaleExtension.isAttribute(itr.current())) { 572 break; 573 } 574 if (_uattributes == null) { 575 _uattributes = new HashSet<CaseInsensitiveString>(4); 576 } 577 _uattributes.add(new CaseInsensitiveString(itr.current())); 578 itr.next(); 579 } 580 581 // parse keywords 582 CaseInsensitiveString key = null; 583 String type; 584 int typeStart = -1; 585 int typeEnd = -1; 586 while (!itr.isDone()) { 587 if (key != null) { 588 if (UnicodeLocaleExtension.isKey(itr.current())) { 589 // next keyword - emit previous one 590 assert(typeStart == -1 || typeEnd != -1); 591 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 592 if (_ukeywords == null) { 593 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 594 } 595 _ukeywords.put(key, type); 596 597 // reset keyword info 598 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); 599 key = _ukeywords.containsKey(tmpKey) ? null : tmpKey; 600 typeStart = typeEnd = -1; 601 } else { 602 if (typeStart == -1) { 603 typeStart = itr.currentStart(); 604 } 605 typeEnd = itr.currentEnd(); 606 } 607 } else if (UnicodeLocaleExtension.isKey(itr.current())) { 608 // 1. first keyword or 609 // 2. next keyword, but previous one was duplicate 610 key = new CaseInsensitiveString(itr.current()); 611 if (_ukeywords != null && _ukeywords.containsKey(key)) { 612 // duplicate 613 key = null; 614 } 615 } 616 617 if (!itr.hasNext()) { 618 if (key != null) { 619 // last keyword 620 assert(typeStart == -1 || typeEnd != -1); 621 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 622 if (_ukeywords == null) { 623 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 624 } 625 _ukeywords.put(key, type); 626 } 627 break; 628 } 629 630 itr.next(); 631 } 632 } 633 634 static class CaseInsensitiveString { 635 private String _s; 636 637 CaseInsensitiveString(String s) { 638 _s = s; 639 } 640 641 public String value() { 642 return _s; 643 } 644 645 public int hashCode() { 646 return AsciiUtil.toLowerString(_s).hashCode(); 647 } 648 649 public boolean equals(Object obj) { 650 if (this == obj) { 651 return true; 652 } 653 if (!(obj instanceof CaseInsensitiveString)) { 654 return false; 655 } 656 return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value()); 657 } 658 } 659 660 static class CaseInsensitiveChar { 661 private char _c; 662 663 CaseInsensitiveChar(char c) { 664 _c = c; 665 } 666 667 public char value() { 668 return _c; 669 } 670 671 public int hashCode() { 672 return AsciiUtil.toLower(_c); 673 } 674 675 public boolean equals(Object obj) { 676 if (this == obj) { 677 return true; 678 } 679 if (!(obj instanceof CaseInsensitiveChar)) { 680 return false; 681 } 682 return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value()); 683 } 684 685 } 686 } 687