1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.icu.text.AlphabeticIndex; 20 import android.icu.text.AlphabeticIndex.ImmutableIndex; 21 import android.icu.text.Transliterator; 22 import android.os.LocaleList; 23 import android.provider.ContactsContract.FullNameStyle; 24 import android.provider.ContactsContract.PhoneticNameStyle; 25 import android.text.TextUtils; 26 import android.util.ArraySet; 27 import android.util.Log; 28 29 import com.android.providers.contacts.HanziToPinyin.Token; 30 31 import com.google.common.annotations.VisibleForTesting; 32 33 import java.lang.Character.UnicodeBlock; 34 import java.util.ArrayList; 35 import java.util.Collections; 36 import java.util.Iterator; 37 import java.util.List; 38 import java.util.Locale; 39 import java.util.Set; 40 41 42 /** 43 * This utility class provides specialized handling for locale specific 44 * information: labels, name lookup keys. 45 */ 46 public class ContactLocaleUtils { 47 public static final String TAG = "ContactLocale"; 48 49 private static final boolean DEBUG = false; // don't submit with true 50 51 public static final Locale LOCALE_ARABIC = new Locale("ar"); 52 public static final Locale LOCALE_GREEK = new Locale("el"); 53 public static final Locale LOCALE_HEBREW = new Locale("he"); 54 // Serbian and Ukrainian labels are complementary supersets of Russian 55 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 56 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 57 public static final Locale LOCALE_THAI = new Locale("th"); 58 59 // -- Note for adding locales to sDefaultLabelLocales -- 60 // 61 // AlphabeticIndex.getBucketLabel() uses a binary search across 62 // the entire label set so care should be taken about growing this 63 // set too large. The following set determines for which locales 64 // we will show labels other than your primary locale. General rules 65 // of thumb for adding a locale: should be a supported locale; and 66 // should not be included if from a name it is not deterministic 67 // which way to label it (so eg Chinese cannot be added because 68 // the labeling of a Chinese character varies between Simplified, 69 // Traditional, and Japanese locales). Use English only for all 70 // Latin based alphabets. Ukrainian and Serbian are chosen for 71 // Cyrillic because their alphabets are complementary supersets 72 // of Russian. 73 private static final Locale[] sDefaultLabelLocales = new Locale[]{ 74 Locale.ENGLISH, 75 Locale.JAPANESE, 76 Locale.KOREAN, 77 LOCALE_THAI, 78 LOCALE_ARABIC, 79 LOCALE_HEBREW, 80 LOCALE_GREEK, 81 LOCALE_UKRAINIAN, 82 LOCALE_SERBIAN, 83 }; 84 85 @VisibleForTesting 86 static void dumpIndex(ImmutableIndex index) { 87 final StringBuilder labels = new StringBuilder(); 88 String sep = ""; 89 for (int i = 0; i < index.getBucketCount(); i++) { 90 labels.append(sep); 91 labels.append(index.getBucket(i).getLabel()); 92 sep = ","; 93 } 94 Log.d(TAG, "Labels=[" + labels + "]"); 95 } 96 97 /** 98 * This class is the default implementation and should be the base class 99 * for other locales. 100 * 101 * sortKey: same as name 102 * nameLookupKeys: none 103 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 104 * phone numbers "#". Eg English labels are: [A-Z], #, " " 105 */ 106 private static class ContactLocaleUtilsBase { 107 private static final String EMPTY_STRING = ""; 108 private static final String NUMBER_STRING = "#"; 109 110 protected final ImmutableIndex mAlphabeticIndex; 111 private final int mAlphabeticIndexBucketCount; 112 private final int mNumberBucketIndex; 113 private final boolean mUsePinyinTransliterator; 114 115 public ContactLocaleUtilsBase(LocaleSet systemLocales) { 116 mUsePinyinTransliterator = systemLocales.shouldPreferSimplifiedChinese(); 117 118 // Build the index buckets based on the current system locale set and 119 // sDefaultLabelLocales. 120 if (DEBUG) { 121 Log.d(TAG, "Building index buckets..."); 122 } 123 final List<Locale> locales = getLocalesForBuckets(systemLocales); 124 125 AlphabeticIndex ai = new AlphabeticIndex(locales.get(0)) 126 .setMaxLabelCount(300); 127 for (int i = 1; i < locales.size(); i++) { 128 ai.addLabels(locales.get(i)); 129 } 130 131 mAlphabeticIndex = ai.buildImmutableIndex(); 132 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 133 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 134 if (DEBUG) { 135 dumpIndex(mAlphabeticIndex); 136 } 137 } 138 139 static List<Locale> getLocalesForBuckets(LocaleSet systemLocales) { 140 141 // Create a list of locales that should be used to generate the index buckets. 142 // - Source: the system locales and sDefaultLabelLocales. 143 // - Rules: 144 // - Don't add the same locale multiple times. 145 // - Also special rules for Chinese (b/31115382): 146 // - Don't add multiple Chinese locales. 147 // - Don't add any Chinese locales after Japanese. 148 149 // First, collect all the locales (allowing duplicates). 150 final LocaleList localeList = systemLocales.getAllLocales(); 151 152 final List<Locale> locales = new ArrayList<>( 153 localeList.size() + sDefaultLabelLocales.length); 154 for (int i = 0; i < localeList.size(); i++) { 155 locales.add(localeList.get(i)); 156 } 157 for (int i = 0; i < sDefaultLabelLocales.length; i++) { 158 locales.add(sDefaultLabelLocales[i]); 159 } 160 161 // Then apply the rules to generate the final list. 162 final List<Locale> ret = new ArrayList<>(locales.size()); 163 boolean allowChinese = true; 164 165 for (int i = 0; i < locales.size(); i++) { 166 final Locale locale = locales.get(i); 167 168 if (ret.contains(locale)) { 169 continue; 170 } 171 if (LocaleSet.isLanguageChinese(locale)) { 172 if (!allowChinese) { 173 continue; 174 } 175 allowChinese = false; 176 } 177 if (LocaleSet.isLanguageJapanese(locale)) { 178 allowChinese = false; 179 } 180 if (DEBUG) { 181 Log.d(TAG, " Adding locale: " + locale.toLanguageTag()); 182 } 183 ret.add(locale); 184 } 185 return ret; 186 } 187 188 public String getSortKey(String name) { 189 return name; 190 } 191 192 public int getNumberBucketIndex() { 193 return mNumberBucketIndex; 194 } 195 196 /** 197 * Returns the bucket index for the specified string. AlphabeticIndex 198 * sorts strings into buckets numbered in order from 0 to N, where the 199 * exact value of N depends on how many representative index labels are 200 * used in a particular locale. This routine adds one additional bucket 201 * for phone numbers. It attempts to detect phone numbers and shifts 202 * the bucket indexes returned by AlphabeticIndex in order to make room 203 * for the new # bucket, so the returned range becomes 0 to N+1. 204 */ 205 public int getBucketIndex(String name) { 206 boolean prefixIsNumeric = false; 207 final int length = name.length(); 208 int offset = 0; 209 while (offset < length) { 210 int codePoint = Character.codePointAt(name, offset); 211 // Ignore standard phone number separators and identify any 212 // string that otherwise starts with a number. 213 if (Character.isDigit(codePoint)) { 214 prefixIsNumeric = true; 215 break; 216 } else if (!Character.isSpaceChar(codePoint) && 217 codePoint != '+' && codePoint != '(' && 218 codePoint != ')' && codePoint != '.' && 219 codePoint != '-' && codePoint != '#') { 220 break; 221 } 222 offset += Character.charCount(codePoint); 223 } 224 if (prefixIsNumeric) { 225 return mNumberBucketIndex; 226 } 227 228 /** 229 * ICU 55 AlphabeticIndex doesn't support Simplified Chinese 230 * as a secondary locale so it is necessary to use the 231 * Pinyin transliterator. We also use this for a Simplified 232 * Chinese primary locale because it gives more accurate letter 233 * buckets. b/19835686 234 */ 235 if (mUsePinyinTransliterator) { 236 name = HanziToPinyin.getInstance().transliterate(name); 237 } 238 final int bucket = mAlphabeticIndex.getBucketIndex(name); 239 if (bucket < 0) { 240 return -1; 241 } 242 if (bucket >= mNumberBucketIndex) { 243 return bucket + 1; 244 } 245 return bucket; 246 } 247 248 /** 249 * Returns the number of buckets in use (one more than AlphabeticIndex 250 * uses, because this class adds a bucket for phone numbers). 251 */ 252 public int getBucketCount() { 253 return mAlphabeticIndexBucketCount + 1; 254 } 255 256 /** 257 * Returns the label for the specified bucket index if a valid index, 258 * otherwise returns an empty string. '#' is returned for the phone 259 * number bucket; for all others, the AlphabeticIndex label is returned. 260 */ 261 public String getBucketLabel(int bucketIndex) { 262 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 263 return EMPTY_STRING; 264 } else if (bucketIndex == mNumberBucketIndex) { 265 return NUMBER_STRING; 266 } else if (bucketIndex > mNumberBucketIndex) { 267 --bucketIndex; 268 } 269 return mAlphabeticIndex.getBucket(bucketIndex).getLabel(); 270 } 271 272 @SuppressWarnings("unused") 273 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 274 return null; 275 } 276 277 public ArrayList<String> getLabels() { 278 final int bucketCount = getBucketCount(); 279 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 280 for(int i = 0; i < bucketCount; ++i) { 281 labels.add(getBucketLabel(i)); 282 } 283 return labels; 284 } 285 } 286 287 /** 288 * Japanese specific locale overrides. 289 * 290 * sortKey: unchanged (same as name) 291 * nameLookupKeys: unchanged (none) 292 * labels: extends default labels by labeling unlabeled CJ characters 293 * with the Japanese character ("misc"). Japanese labels are: 294 * , , , , , , , , , , , [A-Z], #, " " 295 */ 296 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 297 // \u4ed6 is Japanese character ("misc") 298 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 299 private final int mMiscBucketIndex; 300 301 public JapaneseContactUtils(LocaleSet locales) { 302 super(locales); 303 // Determine which bucket AlphabeticIndex is lumping unclassified 304 // Japanese characters into by looking up the bucket index for 305 // a representative Kanji/CJK unified ideograph (\u65e5 is the 306 // character ''). 307 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 308 } 309 310 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 311 // Japanese characters. This includes all code blocks that might 312 // contain a character used in Japanese (which is why unified CJK 313 // blocks are included but Korean Hangul and jamo are not). 314 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 315 static { 316 Set<UnicodeBlock> set = new ArraySet<>(); 317 set.add(UnicodeBlock.HIRAGANA); 318 set.add(UnicodeBlock.KATAKANA); 319 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 320 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 321 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 322 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 323 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 324 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 325 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 326 set.add(UnicodeBlock.CJK_COMPATIBILITY); 327 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 328 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 329 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 330 CJ_BLOCKS = Collections.unmodifiableSet(set); 331 } 332 333 /** 334 * Helper routine to identify unlabeled Chinese or Japanese characters 335 * to put in a 'misc' bucket. 336 * 337 * @return true if the specified Unicode code point is Chinese or 338 * Japanese 339 */ 340 private static boolean isChineseOrJapanese(int codePoint) { 341 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 342 } 343 344 /** 345 * Returns the bucket index for the specified string. Adds an 346 * additional 'misc' bucket for Kanji characters to the base class set. 347 */ 348 @Override 349 public int getBucketIndex(String name) { 350 final int bucketIndex = super.getBucketIndex(name); 351 if ((bucketIndex == mMiscBucketIndex && 352 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 353 bucketIndex > mMiscBucketIndex) { 354 return bucketIndex + 1; 355 } 356 return bucketIndex; 357 } 358 359 /** 360 * Returns the number of buckets in use (one more than the base class 361 * uses, because this class adds a bucket for Kanji). 362 */ 363 @Override 364 public int getBucketCount() { 365 return super.getBucketCount() + 1; 366 } 367 368 /** 369 * Returns the label for the specified bucket index if a valid index, 370 * otherwise returns an empty string. '' is returned for unclassified 371 * Kanji; for all others, the label determined by the base class is 372 * returned. 373 */ 374 @Override 375 public String getBucketLabel(int bucketIndex) { 376 if (bucketIndex == mMiscBucketIndex) { 377 return JAPANESE_MISC_LABEL; 378 } else if (bucketIndex > mMiscBucketIndex) { 379 --bucketIndex; 380 } 381 return super.getBucketLabel(bucketIndex); 382 } 383 384 @Override 385 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 386 // Hiragana and Katakana will be positively identified as Japanese. 387 if (nameStyle == PhoneticNameStyle.JAPANESE) { 388 return getRomajiNameLookupKeys(name); 389 } 390 return null; 391 } 392 393 private static boolean mInitializedTransliterator; 394 private static Transliterator mJapaneseTransliterator; 395 396 private static Transliterator getJapaneseTransliterator() { 397 synchronized(JapaneseContactUtils.class) { 398 if (!mInitializedTransliterator) { 399 mInitializedTransliterator = true; 400 Transliterator t = null; 401 try { 402 t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;" 403 + " Latin-Ascii"); 404 } catch (IllegalArgumentException e) { 405 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 406 + " is missing"); 407 } 408 mJapaneseTransliterator = t; 409 } 410 return mJapaneseTransliterator; 411 } 412 } 413 414 public static Iterator<String> getRomajiNameLookupKeys(String name) { 415 final Transliterator t = getJapaneseTransliterator(); 416 if (t == null) { 417 return null; 418 } 419 final String romajiName = t.transliterate(name); 420 if (TextUtils.isEmpty(romajiName) || 421 TextUtils.equals(name, romajiName)) { 422 return null; 423 } 424 final ArraySet<String> keys = new ArraySet<>(); 425 keys.add(romajiName); 426 return keys.iterator(); 427 } 428 429 /** 430 * Returns the number for "#" bucket index. 431 * Adds an additional 'misc' bucket for Kanji characters to the base class set. 432 */ 433 @Override 434 public int getNumberBucketIndex() { 435 final int numberBucketIndex = super.getNumberBucketIndex(); 436 if (numberBucketIndex > mMiscBucketIndex) { 437 return numberBucketIndex + 1; 438 } 439 return numberBucketIndex; 440 } 441 } 442 443 /** 444 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 445 * for generating pinyin transliteration. 446 * 447 * sortKey: unchanged (same as name) 448 * nameLookupKeys: adds additional name lookup keys 449 * - Chinese character's pinyin and pinyin's initial character. 450 * - Latin word and initial character. 451 * labels: unchanged 452 * Simplified Chinese labels are the same as English: [A-Z], #, " " 453 */ 454 private static class SimplifiedChineseContactUtils 455 extends ContactLocaleUtilsBase { 456 public SimplifiedChineseContactUtils(LocaleSet locales) { 457 super(locales); 458 } 459 460 @Override 461 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 462 if (nameStyle != FullNameStyle.JAPANESE && 463 nameStyle != FullNameStyle.KOREAN) { 464 return getPinyinNameLookupKeys(name); 465 } 466 return null; 467 } 468 469 public static Iterator<String> getPinyinNameLookupKeys(String name) { 470 // TODO : Reduce the object allocation. 471 ArraySet<String> keys = new ArraySet<>(); 472 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name); 473 final int tokenCount = tokens.size(); 474 final StringBuilder keyPinyin = new StringBuilder(); 475 final StringBuilder keyInitial = new StringBuilder(); 476 // There is no space among the Chinese Characters, the variant name 477 // lookup key wouldn't work for Chinese. The keyOriginal is used to 478 // build the lookup keys for itself. 479 final StringBuilder keyOriginal = new StringBuilder(); 480 for (int i = tokenCount - 1; i >= 0; i--) { 481 final Token token = tokens.get(i); 482 if (Token.UNKNOWN == token.type) { 483 continue; 484 } 485 if (Token.PINYIN == token.type) { 486 keyPinyin.insert(0, token.target); 487 keyInitial.insert(0, token.target.charAt(0)); 488 } else if (Token.LATIN == token.type) { 489 // Avoid adding space at the end of String. 490 if (keyPinyin.length() > 0) { 491 keyPinyin.insert(0, ' '); 492 } 493 if (keyOriginal.length() > 0) { 494 keyOriginal.insert(0, ' '); 495 } 496 keyPinyin.insert(0, token.source); 497 keyInitial.insert(0, token.source.charAt(0)); 498 } 499 keyOriginal.insert(0, token.source); 500 keys.add(keyOriginal.toString()); 501 keys.add(keyPinyin.toString()); 502 keys.add(keyInitial.toString()); 503 } 504 return keys.iterator(); 505 } 506 } 507 508 private static ContactLocaleUtils sSingleton; 509 510 private final LocaleSet mLocales; 511 private final ContactLocaleUtilsBase mUtils; 512 513 private ContactLocaleUtils(LocaleSet locales) { 514 if (locales == null) { 515 mLocales = LocaleSet.newDefault(); 516 } else { 517 mLocales = locales; 518 } 519 if (mLocales.shouldPreferJapanese()) { 520 mUtils = new JapaneseContactUtils(mLocales); 521 } else if (mLocales.shouldPreferSimplifiedChinese()) { 522 mUtils = new SimplifiedChineseContactUtils(mLocales); 523 } else { 524 mUtils = new ContactLocaleUtilsBase(mLocales); 525 } 526 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: " 527 + getLabels().toString()); 528 } 529 530 public boolean isLocale(LocaleSet locales) { 531 return mLocales.equals(locales); 532 } 533 534 public static synchronized ContactLocaleUtils getInstance() { 535 if (sSingleton == null) { 536 sSingleton = new ContactLocaleUtils(LocaleSet.newDefault()); 537 } 538 return sSingleton; 539 } 540 541 @VisibleForTesting 542 public static ContactLocaleUtils newInstanceForTest(Locale... locales) { 543 return new ContactLocaleUtils(LocaleSet.newForTest(locales)); 544 } 545 546 @VisibleForTesting 547 public static synchronized void setLocaleForTest(Locale... locales) { 548 setLocales(LocaleSet.newForTest(locales)); 549 } 550 551 public static synchronized void setLocales(LocaleSet locales) { 552 if (sSingleton == null || !sSingleton.isLocale(locales)) { 553 if (DEBUG) { 554 Log.d(TAG, "Setting locale(s) to " + locales); 555 } 556 sSingleton = new ContactLocaleUtils(locales); 557 } 558 } 559 560 public String getSortKey(String name, int nameStyle) { 561 return mUtils.getSortKey(name); 562 } 563 564 public int getBucketIndex(String name) { 565 return mUtils.getBucketIndex(name); 566 } 567 568 public int getNumberBucketIndex() { 569 return mUtils.getNumberBucketIndex(); 570 } 571 572 public int getBucketCount() { 573 return mUtils.getBucketCount(); 574 } 575 576 public String getBucketLabel(int bucketIndex) { 577 return mUtils.getBucketLabel(bucketIndex); 578 } 579 580 public String getLabel(String name) { 581 return getBucketLabel(getBucketIndex(name)); 582 } 583 584 public ArrayList<String> getLabels() { 585 return mUtils.getLabels(); 586 } 587 588 /** 589 * Determine which utility should be used for generating NameLookupKey. 590 * (ie, whether we generate Romaji or Pinyin lookup keys or not) 591 * 592 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 593 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 594 * lookup keys when not in a Chinese or Korean locale. 595 * 596 * Otherwise, use the default behavior of that locale: 597 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 598 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 599 */ 600 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 601 if (!mLocales.isPrimaryLocaleCJK()) { 602 if (mLocales.shouldPreferSimplifiedChinese()) { 603 if (nameStyle == FullNameStyle.CHINESE || 604 nameStyle == FullNameStyle.CJK) { 605 return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name); 606 } 607 } else { 608 if (nameStyle == FullNameStyle.JAPANESE) { 609 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 610 } 611 } 612 } 613 return mUtils.getNameLookupKeys(name, nameStyle); 614 } 615 616 } 617