1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.icu.text.AlphabeticIndex; 20 import android.icu.text.AlphabeticIndex.ImmutableIndex; 21 import android.icu.text.Transliterator; 22 import android.os.LocaleList; 23 import android.provider.ContactsContract.FullNameStyle; 24 import android.provider.ContactsContract.PhoneticNameStyle; 25 import android.text.TextUtils; 26 import android.util.Log; 27 28 import com.android.providers.contacts.HanziToPinyin.Token; 29 30 import com.google.common.annotations.VisibleForTesting; 31 32 import java.lang.Character.UnicodeBlock; 33 import java.util.ArrayList; 34 import java.util.Collections; 35 import java.util.HashSet; 36 import java.util.Iterator; 37 import java.util.List; 38 import java.util.Locale; 39 import java.util.Set; 40 41 42 /** 43 * This utility class provides specialized handling for locale specific 44 * information: labels, name lookup keys. 45 */ 46 public class ContactLocaleUtils { 47 public static final String TAG = "ContactLocale"; 48 49 private static final boolean DEBUG = false; // don't submit with true 50 51 public static final Locale LOCALE_ARABIC = new Locale("ar"); 52 public static final Locale LOCALE_GREEK = new Locale("el"); 53 public static final Locale LOCALE_HEBREW = new Locale("he"); 54 // Serbian and Ukrainian labels are complementary supersets of Russian 55 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 56 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 57 public static final Locale LOCALE_THAI = new Locale("th"); 58 59 // -- Note for adding locales to sDefaultLabelLocales -- 60 // 61 // AlphabeticIndex.getBucketLabel() uses a binary search across 62 // the entire label set so care should be taken about growing this 63 // set too large. The following set determines for which locales 64 // we will show labels other than your primary locale. General rules 65 // of thumb for adding a locale: should be a supported locale; and 66 // should not be included if from a name it is not deterministic 67 // which way to label it (so eg Chinese cannot be added because 68 // the labeling of a Chinese character varies between Simplified, 69 // Traditional, and Japanese locales). Use English only for all 70 // Latin based alphabets. Ukrainian and Serbian are chosen for 71 // Cyrillic because their alphabets are complementary supersets 72 // of Russian. 73 private static final Locale[] sDefaultLabelLocales = new Locale[]{ 74 Locale.ENGLISH, 75 Locale.JAPANESE, 76 Locale.KOREAN, 77 LOCALE_THAI, 78 LOCALE_ARABIC, 79 LOCALE_HEBREW, 80 LOCALE_GREEK, 81 LOCALE_UKRAINIAN, 82 LOCALE_SERBIAN, 83 }; 84 85 @VisibleForTesting 86 static void dumpIndex(ImmutableIndex index) { 87 final StringBuilder labels = new StringBuilder(); 88 String sep = ""; 89 for (int i = 0; i < index.getBucketCount(); i++) { 90 labels.append(sep); 91 labels.append(index.getBucket(i).getLabel()); 92 sep = ","; 93 } 94 Log.d(TAG, "Labels=[" + labels + "]"); 95 } 96 97 /** 98 * This class is the default implementation and should be the base class 99 * for other locales. 100 * 101 * sortKey: same as name 102 * nameLookupKeys: none 103 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 104 * phone numbers "#". Eg English labels are: [A-Z], #, " " 105 */ 106 private static class ContactLocaleUtilsBase { 107 private static final String EMPTY_STRING = ""; 108 private static final String NUMBER_STRING = "#"; 109 110 protected final ImmutableIndex mAlphabeticIndex; 111 private final int mAlphabeticIndexBucketCount; 112 private final int mNumberBucketIndex; 113 private final boolean mUsePinyinTransliterator; 114 115 public ContactLocaleUtilsBase(LocaleSet systemLocales) { 116 mUsePinyinTransliterator = systemLocales.shouldPreferSimplifiedChinese(); 117 118 // Build the index buckets based on the current system locale set and 119 // sDefaultLabelLocales. 120 if (DEBUG) { 121 Log.d(TAG, "Building index buckets..."); 122 } 123 final List<Locale> locales = getLocalesForBuckets(systemLocales); 124 125 AlphabeticIndex ai = new AlphabeticIndex(locales.get(0)) 126 .setMaxLabelCount(300); 127 for (int i = 1; i < locales.size(); i++) { 128 ai.addLabels(locales.get(i)); 129 } 130 131 mAlphabeticIndex = ai.buildImmutableIndex(); 132 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 133 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 134 if (DEBUG) { 135 dumpIndex(mAlphabeticIndex); 136 } 137 } 138 139 static List<Locale> getLocalesForBuckets(LocaleSet systemLocales) { 140 141 // Create a list of locales that should be used to generate the index buckets. 142 // - Source: the system locales and sDefaultLabelLocales. 143 // - Rules: 144 // - Don't add the same locale multiple times. 145 // - Also special rules for Chinese (b/31115382): 146 // - Don't add multiple Chinese locales. 147 // - Don't add any Chinese locales after Japanese. 148 149 // First, collect all the locales (allowing duplicates). 150 final LocaleList localeList = systemLocales.getAllLocales(); 151 152 final List<Locale> locales = new ArrayList<>( 153 localeList.size() + sDefaultLabelLocales.length); 154 for (int i = 0; i < localeList.size(); i++) { 155 locales.add(localeList.get(i)); 156 } 157 for (int i = 0; i < sDefaultLabelLocales.length; i++) { 158 locales.add(sDefaultLabelLocales[i]); 159 } 160 161 // Then apply the rules to generate the final list. 162 final List<Locale> ret = new ArrayList<>(locales.size()); 163 boolean allowChinese = true; 164 165 for (int i = 0; i < locales.size(); i++) { 166 final Locale locale = locales.get(i); 167 168 if (ret.contains(locale)) { 169 continue; 170 } 171 if (LocaleSet.isLanguageChinese(locale)) { 172 if (!allowChinese) { 173 continue; 174 } 175 allowChinese = false; 176 } 177 if (LocaleSet.isLanguageJapanese(locale)) { 178 allowChinese = false; 179 } 180 if (DEBUG) { 181 Log.d(TAG, " Adding locale: " + locale.toLanguageTag()); 182 } 183 ret.add(locale); 184 } 185 return ret; 186 } 187 188 public String getSortKey(String name) { 189 return name; 190 } 191 192 public int getNumberBucketIndex() { 193 return mNumberBucketIndex; 194 } 195 196 /** 197 * Returns the bucket index for the specified string. AlphabeticIndex 198 * sorts strings into buckets numbered in order from 0 to N, where the 199 * exact value of N depends on how many representative index labels are 200 * used in a particular locale. This routine adds one additional bucket 201 * for phone numbers. It attempts to detect phone numbers and shifts 202 * the bucket indexes returned by AlphabeticIndex in order to make room 203 * for the new # bucket, so the returned range becomes 0 to N+1. 204 */ 205 public int getBucketIndex(String name) { 206 boolean prefixIsNumeric = false; 207 final int length = name.length(); 208 int offset = 0; 209 while (offset < length) { 210 int codePoint = Character.codePointAt(name, offset); 211 // Ignore standard phone number separators and identify any 212 // string that otherwise starts with a number. 213 if (Character.isDigit(codePoint)) { 214 prefixIsNumeric = true; 215 break; 216 } else if (!Character.isSpaceChar(codePoint) && 217 codePoint != '+' && codePoint != '(' && 218 codePoint != ')' && codePoint != '.' && 219 codePoint != '-' && codePoint != '#') { 220 break; 221 } 222 offset += Character.charCount(codePoint); 223 } 224 if (prefixIsNumeric) { 225 return mNumberBucketIndex; 226 } 227 228 /** 229 * ICU 55 AlphabeticIndex doesn't support Simplified Chinese 230 * as a secondary locale so it is necessary to use the 231 * Pinyin transliterator. We also use this for a Simplified 232 * Chinese primary locale because it gives more accurate letter 233 * buckets. b/19835686 234 */ 235 if (mUsePinyinTransliterator) { 236 name = HanziToPinyin.getInstance().transliterate(name); 237 } 238 final int bucket = mAlphabeticIndex.getBucketIndex(name); 239 if (bucket < 0) { 240 return -1; 241 } 242 if (bucket >= mNumberBucketIndex) { 243 return bucket + 1; 244 } 245 return bucket; 246 } 247 248 /** 249 * Returns the number of buckets in use (one more than AlphabeticIndex 250 * uses, because this class adds a bucket for phone numbers). 251 */ 252 public int getBucketCount() { 253 return mAlphabeticIndexBucketCount + 1; 254 } 255 256 /** 257 * Returns the label for the specified bucket index if a valid index, 258 * otherwise returns an empty string. '#' is returned for the phone 259 * number bucket; for all others, the AlphabeticIndex label is returned. 260 */ 261 public String getBucketLabel(int bucketIndex) { 262 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 263 return EMPTY_STRING; 264 } else if (bucketIndex == mNumberBucketIndex) { 265 return NUMBER_STRING; 266 } else if (bucketIndex > mNumberBucketIndex) { 267 --bucketIndex; 268 } 269 return mAlphabeticIndex.getBucket(bucketIndex).getLabel(); 270 } 271 272 @SuppressWarnings("unused") 273 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 274 return null; 275 } 276 277 public ArrayList<String> getLabels() { 278 final int bucketCount = getBucketCount(); 279 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 280 for(int i = 0; i < bucketCount; ++i) { 281 labels.add(getBucketLabel(i)); 282 } 283 return labels; 284 } 285 } 286 287 /** 288 * Japanese specific locale overrides. 289 * 290 * sortKey: unchanged (same as name) 291 * nameLookupKeys: unchanged (none) 292 * labels: extends default labels by labeling unlabeled CJ characters 293 * with the Japanese character ("misc"). Japanese labels are: 294 * , , , , , , , , , , , [A-Z], #, " " 295 */ 296 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 297 // \u4ed6 is Japanese character ("misc") 298 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 299 private final int mMiscBucketIndex; 300 301 public JapaneseContactUtils(LocaleSet locales) { 302 super(locales); 303 // Determine which bucket AlphabeticIndex is lumping unclassified 304 // Japanese characters into by looking up the bucket index for 305 // a representative Kanji/CJK unified ideograph (\u65e5 is the 306 // character ''). 307 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 308 } 309 310 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 311 // Japanese characters. This includes all code blocks that might 312 // contain a character used in Japanese (which is why unified CJK 313 // blocks are included but Korean Hangul and jamo are not). 314 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 315 static { 316 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 317 set.add(UnicodeBlock.HIRAGANA); 318 set.add(UnicodeBlock.KATAKANA); 319 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 320 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 321 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 322 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 323 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 324 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 325 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 326 set.add(UnicodeBlock.CJK_COMPATIBILITY); 327 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 328 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 329 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 330 CJ_BLOCKS = Collections.unmodifiableSet(set); 331 } 332 333 /** 334 * Helper routine to identify unlabeled Chinese or Japanese characters 335 * to put in a 'misc' bucket. 336 * 337 * @return true if the specified Unicode code point is Chinese or 338 * Japanese 339 */ 340 private static boolean isChineseOrJapanese(int codePoint) { 341 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 342 } 343 344 /** 345 * Returns the bucket index for the specified string. Adds an 346 * additional 'misc' bucket for Kanji characters to the base class set. 347 */ 348 @Override 349 public int getBucketIndex(String name) { 350 final int bucketIndex = super.getBucketIndex(name); 351 if ((bucketIndex == mMiscBucketIndex && 352 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 353 bucketIndex > mMiscBucketIndex) { 354 return bucketIndex + 1; 355 } 356 return bucketIndex; 357 } 358 359 /** 360 * Returns the number of buckets in use (one more than the base class 361 * uses, because this class adds a bucket for Kanji). 362 */ 363 @Override 364 public int getBucketCount() { 365 return super.getBucketCount() + 1; 366 } 367 368 /** 369 * Returns the label for the specified bucket index if a valid index, 370 * otherwise returns an empty string. '' is returned for unclassified 371 * Kanji; for all others, the label determined by the base class is 372 * returned. 373 */ 374 @Override 375 public String getBucketLabel(int bucketIndex) { 376 if (bucketIndex == mMiscBucketIndex) { 377 return JAPANESE_MISC_LABEL; 378 } else if (bucketIndex > mMiscBucketIndex) { 379 --bucketIndex; 380 } 381 return super.getBucketLabel(bucketIndex); 382 } 383 384 @Override 385 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 386 // Hiragana and Katakana will be positively identified as Japanese. 387 if (nameStyle == PhoneticNameStyle.JAPANESE) { 388 return getRomajiNameLookupKeys(name); 389 } 390 return null; 391 } 392 393 private static boolean mInitializedTransliterator; 394 private static Transliterator mJapaneseTransliterator; 395 396 private static Transliterator getJapaneseTransliterator() { 397 synchronized(JapaneseContactUtils.class) { 398 if (!mInitializedTransliterator) { 399 mInitializedTransliterator = true; 400 Transliterator t = null; 401 try { 402 t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;" 403 + " Latin-Ascii"); 404 } catch (IllegalArgumentException e) { 405 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 406 + " is missing"); 407 } 408 mJapaneseTransliterator = t; 409 } 410 return mJapaneseTransliterator; 411 } 412 } 413 414 public static Iterator<String> getRomajiNameLookupKeys(String name) { 415 final Transliterator t = getJapaneseTransliterator(); 416 if (t == null) { 417 return null; 418 } 419 final String romajiName = t.transliterate(name); 420 if (TextUtils.isEmpty(romajiName) || 421 TextUtils.equals(name, romajiName)) { 422 return null; 423 } 424 final HashSet<String> keys = new HashSet<String>(); 425 keys.add(romajiName); 426 return keys.iterator(); 427 } 428 } 429 430 /** 431 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 432 * for generating pinyin transliteration. 433 * 434 * sortKey: unchanged (same as name) 435 * nameLookupKeys: adds additional name lookup keys 436 * - Chinese character's pinyin and pinyin's initial character. 437 * - Latin word and initial character. 438 * labels: unchanged 439 * Simplified Chinese labels are the same as English: [A-Z], #, " " 440 */ 441 private static class SimplifiedChineseContactUtils 442 extends ContactLocaleUtilsBase { 443 public SimplifiedChineseContactUtils(LocaleSet locales) { 444 super(locales); 445 } 446 447 @Override 448 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 449 if (nameStyle != FullNameStyle.JAPANESE && 450 nameStyle != FullNameStyle.KOREAN) { 451 return getPinyinNameLookupKeys(name); 452 } 453 return null; 454 } 455 456 public static Iterator<String> getPinyinNameLookupKeys(String name) { 457 // TODO : Reduce the object allocation. 458 HashSet<String> keys = new HashSet<String>(); 459 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name); 460 final int tokenCount = tokens.size(); 461 final StringBuilder keyPinyin = new StringBuilder(); 462 final StringBuilder keyInitial = new StringBuilder(); 463 // There is no space among the Chinese Characters, the variant name 464 // lookup key wouldn't work for Chinese. The keyOriginal is used to 465 // build the lookup keys for itself. 466 final StringBuilder keyOriginal = new StringBuilder(); 467 for (int i = tokenCount - 1; i >= 0; i--) { 468 final Token token = tokens.get(i); 469 if (Token.UNKNOWN == token.type) { 470 continue; 471 } 472 if (Token.PINYIN == token.type) { 473 keyPinyin.insert(0, token.target); 474 keyInitial.insert(0, token.target.charAt(0)); 475 } else if (Token.LATIN == token.type) { 476 // Avoid adding space at the end of String. 477 if (keyPinyin.length() > 0) { 478 keyPinyin.insert(0, ' '); 479 } 480 if (keyOriginal.length() > 0) { 481 keyOriginal.insert(0, ' '); 482 } 483 keyPinyin.insert(0, token.source); 484 keyInitial.insert(0, token.source.charAt(0)); 485 } 486 keyOriginal.insert(0, token.source); 487 keys.add(keyOriginal.toString()); 488 keys.add(keyPinyin.toString()); 489 keys.add(keyInitial.toString()); 490 } 491 return keys.iterator(); 492 } 493 } 494 495 private static ContactLocaleUtils sSingleton; 496 497 private final LocaleSet mLocales; 498 private final ContactLocaleUtilsBase mUtils; 499 500 private ContactLocaleUtils(LocaleSet locales) { 501 if (locales == null) { 502 mLocales = LocaleSet.newDefault(); 503 } else { 504 mLocales = locales; 505 } 506 if (mLocales.shouldPreferJapanese()) { 507 mUtils = new JapaneseContactUtils(mLocales); 508 } else if (mLocales.shouldPreferSimplifiedChinese()) { 509 mUtils = new SimplifiedChineseContactUtils(mLocales); 510 } else { 511 mUtils = new ContactLocaleUtilsBase(mLocales); 512 } 513 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: " 514 + getLabels().toString()); 515 } 516 517 public boolean isLocale(LocaleSet locales) { 518 return mLocales.equals(locales); 519 } 520 521 public static synchronized ContactLocaleUtils getInstance() { 522 if (sSingleton == null) { 523 sSingleton = new ContactLocaleUtils(LocaleSet.newDefault()); 524 } 525 return sSingleton; 526 } 527 528 @VisibleForTesting 529 public static ContactLocaleUtils newInstanceForTest(Locale... locales) { 530 return new ContactLocaleUtils(LocaleSet.newForTest(locales)); 531 } 532 533 @VisibleForTesting 534 public static synchronized void setLocaleForTest(Locale... locales) { 535 setLocales(LocaleSet.newForTest(locales)); 536 } 537 538 public static synchronized void setLocales(LocaleSet locales) { 539 if (sSingleton == null || !sSingleton.isLocale(locales)) { 540 if (DEBUG) { 541 Log.d(TAG, "Setting locale(s) to " + locales); 542 } 543 sSingleton = new ContactLocaleUtils(locales); 544 } 545 } 546 547 public String getSortKey(String name, int nameStyle) { 548 return mUtils.getSortKey(name); 549 } 550 551 public int getBucketIndex(String name) { 552 return mUtils.getBucketIndex(name); 553 } 554 555 public int getNumberBucketIndex() { 556 return mUtils.getNumberBucketIndex(); 557 } 558 559 public int getBucketCount() { 560 return mUtils.getBucketCount(); 561 } 562 563 public String getBucketLabel(int bucketIndex) { 564 return mUtils.getBucketLabel(bucketIndex); 565 } 566 567 public String getLabel(String name) { 568 return getBucketLabel(getBucketIndex(name)); 569 } 570 571 public ArrayList<String> getLabels() { 572 return mUtils.getLabels(); 573 } 574 575 /** 576 * Determine which utility should be used for generating NameLookupKey. 577 * (ie, whether we generate Romaji or Pinyin lookup keys or not) 578 * 579 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 580 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 581 * lookup keys when not in a Chinese or Korean locale. 582 * 583 * Otherwise, use the default behavior of that locale: 584 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 585 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 586 */ 587 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 588 if (!mLocales.isPrimaryLocaleCJK()) { 589 if (mLocales.shouldPreferSimplifiedChinese()) { 590 if (nameStyle == FullNameStyle.CHINESE || 591 nameStyle == FullNameStyle.CJK) { 592 return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name); 593 } 594 } else { 595 if (nameStyle == FullNameStyle.JAPANESE) { 596 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 597 } 598 } 599 } 600 return mUtils.getNameLookupKeys(name, nameStyle); 601 } 602 603 } 604