1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.provider.ContactsContract.FullNameStyle; 20 import android.provider.ContactsContract.PhoneticNameStyle; 21 import android.text.TextUtils; 22 import android.util.Log; 23 24 import com.android.providers.contacts.HanziToPinyin.Token; 25 import com.google.common.annotations.VisibleForTesting; 26 27 import java.lang.Character.UnicodeBlock; 28 import java.util.Arrays; 29 import java.util.ArrayList; 30 import java.util.Collections; 31 import java.util.HashMap; 32 import java.util.HashSet; 33 import java.util.Iterator; 34 import java.util.List; 35 import java.util.Locale; 36 import java.util.Map; 37 import java.util.Set; 38 39 import libcore.icu.AlphabeticIndex; 40 import libcore.icu.AlphabeticIndex.ImmutableIndex; 41 import libcore.icu.Transliterator; 42 43 /** 44 * This utility class provides specialized handling for locale specific 45 * information: labels, name lookup keys. 46 */ 47 public class ContactLocaleUtils { 48 public static final String TAG = "ContactLocale"; 49 50 public static final Locale LOCALE_ARABIC = new Locale("ar"); 51 public static final Locale LOCALE_GREEK = new Locale("el"); 52 public static final Locale LOCALE_HEBREW = new Locale("he"); 53 // Serbian and Ukrainian labels are complementary supersets of Russian 54 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 55 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 56 public static final Locale LOCALE_THAI = new Locale("th"); 57 58 /** 59 * This class is the default implementation and should be the base class 60 * for other locales. 61 * 62 * sortKey: same as name 63 * nameLookupKeys: none 64 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 65 * phone numbers "#". Eg English labels are: [A-Z], #, " " 66 */ 67 private static class ContactLocaleUtilsBase { 68 private static final String EMPTY_STRING = ""; 69 private static final String NUMBER_STRING = "#"; 70 71 protected final ImmutableIndex mAlphabeticIndex; 72 private final int mAlphabeticIndexBucketCount; 73 private final int mNumberBucketIndex; 74 private final boolean mEnableSecondaryLocalePinyin; 75 76 public ContactLocaleUtilsBase(LocaleSet locales) { 77 // AlphabeticIndex.getBucketLabel() uses a binary search across 78 // the entire label set so care should be taken about growing this 79 // set too large. The following set determines for which locales 80 // we will show labels other than your primary locale. General rules 81 // of thumb for adding a locale: should be a supported locale; and 82 // should not be included if from a name it is not deterministic 83 // which way to label it (so eg Chinese cannot be added because 84 // the labeling of a Chinese character varies between Simplified, 85 // Traditional, and Japanese locales). Use English only for all 86 // Latin based alphabets. Ukrainian and Serbian are chosen for 87 // Cyrillic because their alphabets are complementary supersets 88 // of Russian. 89 final Locale secondaryLocale = locales.getSecondaryLocale(); 90 mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese(); 91 AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale()) 92 .setMaxLabelCount(300); 93 if (secondaryLocale != null) { 94 ai.addLabels(secondaryLocale); 95 } 96 mAlphabeticIndex = ai.addLabels(Locale.ENGLISH) 97 .addLabels(Locale.JAPANESE) 98 .addLabels(Locale.KOREAN) 99 .addLabels(LOCALE_THAI) 100 .addLabels(LOCALE_ARABIC) 101 .addLabels(LOCALE_HEBREW) 102 .addLabels(LOCALE_GREEK) 103 .addLabels(LOCALE_UKRAINIAN) 104 .addLabels(LOCALE_SERBIAN) 105 .getImmutableIndex(); 106 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 107 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 108 } 109 110 public String getSortKey(String name) { 111 return name; 112 } 113 114 /** 115 * Returns the bucket index for the specified string. AlphabeticIndex 116 * sorts strings into buckets numbered in order from 0 to N, where the 117 * exact value of N depends on how many representative index labels are 118 * used in a particular locale. This routine adds one additional bucket 119 * for phone numbers. It attempts to detect phone numbers and shifts 120 * the bucket indexes returned by AlphabeticIndex in order to make room 121 * for the new # bucket, so the returned range becomes 0 to N+1. 122 */ 123 public int getBucketIndex(String name) { 124 boolean prefixIsNumeric = false; 125 final int length = name.length(); 126 int offset = 0; 127 while (offset < length) { 128 int codePoint = Character.codePointAt(name, offset); 129 // Ignore standard phone number separators and identify any 130 // string that otherwise starts with a number. 131 if (Character.isDigit(codePoint)) { 132 prefixIsNumeric = true; 133 break; 134 } else if (!Character.isSpaceChar(codePoint) && 135 codePoint != '+' && codePoint != '(' && 136 codePoint != ')' && codePoint != '.' && 137 codePoint != '-' && codePoint != '#') { 138 break; 139 } 140 offset += Character.charCount(codePoint); 141 } 142 if (prefixIsNumeric) { 143 return mNumberBucketIndex; 144 } 145 146 /** 147 * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese 148 * as a secondary locale. Remove the following if that is added. 149 */ 150 if (mEnableSecondaryLocalePinyin) { 151 name = HanziToPinyin.getInstance().transliterate(name); 152 } 153 final int bucket = mAlphabeticIndex.getBucketIndex(name); 154 if (bucket < 0) { 155 return -1; 156 } 157 if (bucket >= mNumberBucketIndex) { 158 return bucket + 1; 159 } 160 return bucket; 161 } 162 163 /** 164 * Returns the number of buckets in use (one more than AlphabeticIndex 165 * uses, because this class adds a bucket for phone numbers). 166 */ 167 public int getBucketCount() { 168 return mAlphabeticIndexBucketCount + 1; 169 } 170 171 /** 172 * Returns the label for the specified bucket index if a valid index, 173 * otherwise returns an empty string. '#' is returned for the phone 174 * number bucket; for all others, the AlphabeticIndex label is returned. 175 */ 176 public String getBucketLabel(int bucketIndex) { 177 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 178 return EMPTY_STRING; 179 } else if (bucketIndex == mNumberBucketIndex) { 180 return NUMBER_STRING; 181 } else if (bucketIndex > mNumberBucketIndex) { 182 --bucketIndex; 183 } 184 return mAlphabeticIndex.getBucketLabel(bucketIndex); 185 } 186 187 @SuppressWarnings("unused") 188 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 189 return null; 190 } 191 192 public ArrayList<String> getLabels() { 193 final int bucketCount = getBucketCount(); 194 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 195 for(int i = 0; i < bucketCount; ++i) { 196 labels.add(getBucketLabel(i)); 197 } 198 return labels; 199 } 200 } 201 202 /** 203 * Japanese specific locale overrides. 204 * 205 * sortKey: unchanged (same as name) 206 * nameLookupKeys: unchanged (none) 207 * labels: extends default labels by labeling unlabeled CJ characters 208 * with the Japanese character ("misc"). Japanese labels are: 209 * , , , , , , , , , , , [A-Z], #, " " 210 */ 211 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 212 // \u4ed6 is Japanese character ("misc") 213 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 214 private final int mMiscBucketIndex; 215 216 public JapaneseContactUtils(LocaleSet locales) { 217 super(locales); 218 // Determine which bucket AlphabeticIndex is lumping unclassified 219 // Japanese characters into by looking up the bucket index for 220 // a representative Kanji/CJK unified ideograph (\u65e5 is the 221 // character ''). 222 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 223 } 224 225 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 226 // Japanese characters. This includes all code blocks that might 227 // contain a character used in Japanese (which is why unified CJK 228 // blocks are included but Korean Hangul and jamo are not). 229 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 230 static { 231 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 232 set.add(UnicodeBlock.HIRAGANA); 233 set.add(UnicodeBlock.KATAKANA); 234 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 235 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 236 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 237 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 238 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 239 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 240 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 241 set.add(UnicodeBlock.CJK_COMPATIBILITY); 242 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 243 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 244 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 245 CJ_BLOCKS = Collections.unmodifiableSet(set); 246 } 247 248 /** 249 * Helper routine to identify unlabeled Chinese or Japanese characters 250 * to put in a 'misc' bucket. 251 * 252 * @return true if the specified Unicode code point is Chinese or 253 * Japanese 254 */ 255 private static boolean isChineseOrJapanese(int codePoint) { 256 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 257 } 258 259 /** 260 * Returns the bucket index for the specified string. Adds an 261 * additional 'misc' bucket for Kanji characters to the base class set. 262 */ 263 @Override 264 public int getBucketIndex(String name) { 265 final int bucketIndex = super.getBucketIndex(name); 266 if ((bucketIndex == mMiscBucketIndex && 267 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 268 bucketIndex > mMiscBucketIndex) { 269 return bucketIndex + 1; 270 } 271 return bucketIndex; 272 } 273 274 /** 275 * Returns the number of buckets in use (one more than the base class 276 * uses, because this class adds a bucket for Kanji). 277 */ 278 @Override 279 public int getBucketCount() { 280 return super.getBucketCount() + 1; 281 } 282 283 /** 284 * Returns the label for the specified bucket index if a valid index, 285 * otherwise returns an empty string. '' is returned for unclassified 286 * Kanji; for all others, the label determined by the base class is 287 * returned. 288 */ 289 @Override 290 public String getBucketLabel(int bucketIndex) { 291 if (bucketIndex == mMiscBucketIndex) { 292 return JAPANESE_MISC_LABEL; 293 } else if (bucketIndex > mMiscBucketIndex) { 294 --bucketIndex; 295 } 296 return super.getBucketLabel(bucketIndex); 297 } 298 299 @Override 300 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 301 // Hiragana and Katakana will be positively identified as Japanese. 302 if (nameStyle == PhoneticNameStyle.JAPANESE) { 303 return getRomajiNameLookupKeys(name); 304 } 305 return null; 306 } 307 308 private static boolean mInitializedTransliterator; 309 private static Transliterator mJapaneseTransliterator; 310 311 private static Transliterator getJapaneseTransliterator() { 312 synchronized(JapaneseContactUtils.class) { 313 if (!mInitializedTransliterator) { 314 mInitializedTransliterator = true; 315 Transliterator t = null; 316 try { 317 t = new Transliterator("Hiragana-Latin; Katakana-Latin;" 318 + " Latin-Ascii"); 319 } catch (RuntimeException e) { 320 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 321 + " is missing"); 322 } 323 mJapaneseTransliterator = t; 324 } 325 return mJapaneseTransliterator; 326 } 327 } 328 329 public static Iterator<String> getRomajiNameLookupKeys(String name) { 330 final Transliterator t = getJapaneseTransliterator(); 331 if (t == null) { 332 return null; 333 } 334 final String romajiName = t.transliterate(name); 335 if (TextUtils.isEmpty(romajiName) || 336 TextUtils.equals(name, romajiName)) { 337 return null; 338 } 339 final HashSet<String> keys = new HashSet<String>(); 340 keys.add(romajiName); 341 return keys.iterator(); 342 } 343 } 344 345 /** 346 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 347 * for generating pinyin transliteration. 348 * 349 * sortKey: unchanged (same as name) 350 * nameLookupKeys: adds additional name lookup keys 351 * - Chinese character's pinyin and pinyin's initial character. 352 * - Latin word and initial character. 353 * labels: unchanged 354 * Simplified Chinese labels are the same as English: [A-Z], #, " " 355 */ 356 private static class SimplifiedChineseContactUtils 357 extends ContactLocaleUtilsBase { 358 public SimplifiedChineseContactUtils(LocaleSet locales) { 359 super(locales); 360 } 361 362 @Override 363 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 364 if (nameStyle != FullNameStyle.JAPANESE && 365 nameStyle != FullNameStyle.KOREAN) { 366 return getPinyinNameLookupKeys(name); 367 } 368 return null; 369 } 370 371 public static Iterator<String> getPinyinNameLookupKeys(String name) { 372 // TODO : Reduce the object allocation. 373 HashSet<String> keys = new HashSet<String>(); 374 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name); 375 final int tokenCount = tokens.size(); 376 final StringBuilder keyPinyin = new StringBuilder(); 377 final StringBuilder keyInitial = new StringBuilder(); 378 // There is no space among the Chinese Characters, the variant name 379 // lookup key wouldn't work for Chinese. The keyOriginal is used to 380 // build the lookup keys for itself. 381 final StringBuilder keyOriginal = new StringBuilder(); 382 for (int i = tokenCount - 1; i >= 0; i--) { 383 final Token token = tokens.get(i); 384 if (Token.UNKNOWN == token.type) { 385 continue; 386 } 387 if (Token.PINYIN == token.type) { 388 keyPinyin.insert(0, token.target); 389 keyInitial.insert(0, token.target.charAt(0)); 390 } else if (Token.LATIN == token.type) { 391 // Avoid adding space at the end of String. 392 if (keyPinyin.length() > 0) { 393 keyPinyin.insert(0, ' '); 394 } 395 if (keyOriginal.length() > 0) { 396 keyOriginal.insert(0, ' '); 397 } 398 keyPinyin.insert(0, token.source); 399 keyInitial.insert(0, token.source.charAt(0)); 400 } 401 keyOriginal.insert(0, token.source); 402 keys.add(keyOriginal.toString()); 403 keys.add(keyPinyin.toString()); 404 keys.add(keyInitial.toString()); 405 } 406 return keys.iterator(); 407 } 408 } 409 410 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 411 412 private static ContactLocaleUtils sSingleton; 413 414 private final LocaleSet mLocales; 415 private final ContactLocaleUtilsBase mUtils; 416 417 private ContactLocaleUtils(LocaleSet locales) { 418 if (locales == null) { 419 mLocales = LocaleSet.getDefault(); 420 } else { 421 mLocales = locales; 422 } 423 if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) { 424 mUtils = new JapaneseContactUtils(mLocales); 425 } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) { 426 mUtils = new SimplifiedChineseContactUtils(mLocales); 427 } else { 428 mUtils = new ContactLocaleUtilsBase(mLocales); 429 } 430 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: " 431 + getLabels().toString()); 432 } 433 434 public boolean isLocale(LocaleSet locales) { 435 return mLocales.equals(locales); 436 } 437 438 public static synchronized ContactLocaleUtils getInstance() { 439 if (sSingleton == null) { 440 sSingleton = new ContactLocaleUtils(LocaleSet.getDefault()); 441 } 442 return sSingleton; 443 } 444 445 @VisibleForTesting 446 public static synchronized void setLocale(Locale locale) { 447 setLocales(new LocaleSet(locale)); 448 } 449 450 public static synchronized void setLocales(LocaleSet locales) { 451 if (sSingleton == null || !sSingleton.isLocale(locales)) { 452 sSingleton = new ContactLocaleUtils(locales); 453 } 454 } 455 456 public String getSortKey(String name, int nameStyle) { 457 return mUtils.getSortKey(name); 458 } 459 460 public int getBucketIndex(String name) { 461 return mUtils.getBucketIndex(name); 462 } 463 464 public int getBucketCount() { 465 return mUtils.getBucketCount(); 466 } 467 468 public String getBucketLabel(int bucketIndex) { 469 return mUtils.getBucketLabel(bucketIndex); 470 } 471 472 public String getLabel(String name) { 473 return getBucketLabel(getBucketIndex(name)); 474 } 475 476 public ArrayList<String> getLabels() { 477 return mUtils.getLabels(); 478 } 479 480 /** 481 * Determine which utility should be used for generating NameLookupKey. 482 * (ie, whether we generate Romaji or Pinyin lookup keys or not) 483 * 484 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 485 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 486 * lookup keys when not in a Chinese or Korean locale. 487 * 488 * Otherwise, use the default behavior of that locale: 489 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 490 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 491 */ 492 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 493 if (!mLocales.isPrimaryLocaleCJK()) { 494 if (mLocales.isSecondaryLocaleSimplifiedChinese()) { 495 if (nameStyle == FullNameStyle.CHINESE || 496 nameStyle == FullNameStyle.CJK) { 497 return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name); 498 } 499 } else { 500 if (nameStyle == FullNameStyle.JAPANESE) { 501 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 502 } 503 } 504 } 505 return mUtils.getNameLookupKeys(name, nameStyle); 506 } 507 508 } 509