1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.provider.ContactsContract.FullNameStyle; 20 import android.provider.ContactsContract.PhoneticNameStyle; 21 import android.text.TextUtils; 22 import android.util.Log; 23 24 import com.android.providers.contacts.HanziToPinyin.Token; 25 26 import java.lang.Character.UnicodeBlock; 27 import java.util.Arrays; 28 import java.util.ArrayList; 29 import java.util.Collections; 30 import java.util.HashMap; 31 import java.util.HashSet; 32 import java.util.Iterator; 33 import java.util.List; 34 import java.util.Locale; 35 import java.util.Map; 36 import java.util.Set; 37 38 import libcore.icu.AlphabeticIndex; 39 import libcore.icu.AlphabeticIndex.ImmutableIndex; 40 import libcore.icu.Transliterator; 41 42 /** 43 * This utility class provides specialized handling for locale specific 44 * information: labels, name lookup keys. 45 */ 46 public class ContactLocaleUtils { 47 public static final String TAG = "ContactLocale"; 48 49 public static final Locale LOCALE_ARABIC = new Locale("ar"); 50 public static final Locale LOCALE_GREEK = new Locale("el"); 51 public static final Locale LOCALE_HEBREW = new Locale("he"); 52 // Ukrainian labels are superset of Russian 53 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 54 public static final Locale LOCALE_THAI = new Locale("th"); 55 56 /** 57 * This class is the default implementation and should be the base class 58 * for other locales. 59 * 60 * sortKey: same as name 61 * nameLookupKeys: none 62 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 63 * phone numbers "#". Eg English labels are: [A-Z], #, " " 64 */ 65 private static class ContactLocaleUtilsBase { 66 private static final String EMPTY_STRING = ""; 67 private static final String NUMBER_STRING = "#"; 68 69 protected final ImmutableIndex mAlphabeticIndex; 70 private final int mAlphabeticIndexBucketCount; 71 private final int mNumberBucketIndex; 72 73 public ContactLocaleUtilsBase(Locale locale) { 74 // AlphabeticIndex.getBucketLabel() uses a binary search across 75 // the entire label set so care should be taken about growing this 76 // set too large. The following set determines for which locales 77 // we will show labels other than your primary locale. General rules 78 // of thumb for adding a locale: should be a supported locale; and 79 // should not be included if from a name it is not deterministic 80 // which way to label it (so eg Chinese cannot be added because 81 // the labeling of a Chinese character varies between Simplified, 82 // Traditional, and Japanese locales). Use English only for all 83 // Latin based alphabets. Ukrainian is chosen for Cyrillic because 84 // its alphabet is a superset of Russian. 85 mAlphabeticIndex = new AlphabeticIndex(locale) 86 .setMaxLabelCount(300) 87 .addLabels(Locale.ENGLISH) 88 .addLabels(Locale.JAPANESE) 89 .addLabels(Locale.KOREAN) 90 .addLabels(LOCALE_THAI) 91 .addLabels(LOCALE_ARABIC) 92 .addLabels(LOCALE_HEBREW) 93 .addLabels(LOCALE_GREEK) 94 .addLabels(LOCALE_UKRAINIAN) 95 .getImmutableIndex(); 96 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 97 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 98 } 99 100 public String getSortKey(String name) { 101 return name; 102 } 103 104 /** 105 * Returns the bucket index for the specified string. AlphabeticIndex 106 * sorts strings into buckets numbered in order from 0 to N, where the 107 * exact value of N depends on how many representative index labels are 108 * used in a particular locale. This routine adds one additional bucket 109 * for phone numbers. It attempts to detect phone numbers and shifts 110 * the bucket indexes returned by AlphabeticIndex in order to make room 111 * for the new # bucket, so the returned range becomes 0 to N+1. 112 */ 113 public int getBucketIndex(String name) { 114 boolean prefixIsNumeric = false; 115 final int length = name.length(); 116 int offset = 0; 117 while (offset < length) { 118 int codePoint = Character.codePointAt(name, offset); 119 // Ignore standard phone number separators and identify any 120 // string that otherwise starts with a number. 121 if (Character.isDigit(codePoint)) { 122 prefixIsNumeric = true; 123 break; 124 } else if (!Character.isSpaceChar(codePoint) && 125 codePoint != '+' && codePoint != '(' && 126 codePoint != ')' && codePoint != '.' && 127 codePoint != '-' && codePoint != '#') { 128 break; 129 } 130 offset += Character.charCount(codePoint); 131 } 132 if (prefixIsNumeric) { 133 return mNumberBucketIndex; 134 } 135 136 final int bucket = mAlphabeticIndex.getBucketIndex(name); 137 if (bucket < 0) { 138 return -1; 139 } 140 if (bucket >= mNumberBucketIndex) { 141 return bucket + 1; 142 } 143 return bucket; 144 } 145 146 /** 147 * Returns the number of buckets in use (one more than AlphabeticIndex 148 * uses, because this class adds a bucket for phone numbers). 149 */ 150 public int getBucketCount() { 151 return mAlphabeticIndexBucketCount + 1; 152 } 153 154 /** 155 * Returns the label for the specified bucket index if a valid index, 156 * otherwise returns an empty string. '#' is returned for the phone 157 * number bucket; for all others, the AlphabeticIndex label is returned. 158 */ 159 public String getBucketLabel(int bucketIndex) { 160 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 161 return EMPTY_STRING; 162 } else if (bucketIndex == mNumberBucketIndex) { 163 return NUMBER_STRING; 164 } else if (bucketIndex > mNumberBucketIndex) { 165 --bucketIndex; 166 } 167 return mAlphabeticIndex.getBucketLabel(bucketIndex); 168 } 169 170 @SuppressWarnings("unused") 171 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 172 return null; 173 } 174 175 public ArrayList<String> getLabels() { 176 final int bucketCount = getBucketCount(); 177 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 178 for(int i = 0; i < bucketCount; ++i) { 179 labels.add(getBucketLabel(i)); 180 } 181 return labels; 182 } 183 } 184 185 /** 186 * Japanese specific locale overrides. 187 * 188 * sortKey: unchanged (same as name) 189 * nameLookupKeys: unchanged (none) 190 * labels: extends default labels by labeling unlabeled CJ characters 191 * with the Japanese character ("misc"). Japanese labels are: 192 * , , , , , , , , , , , [A-Z], #, " " 193 */ 194 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 195 // \u4ed6 is Japanese character ("misc") 196 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 197 private final int mMiscBucketIndex; 198 199 public JapaneseContactUtils(Locale locale) { 200 super(locale); 201 // Determine which bucket AlphabeticIndex is lumping unclassified 202 // Japanese characters into by looking up the bucket index for 203 // a representative Kanji/CJK unified ideograph (\u65e5 is the 204 // character ''). 205 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 206 } 207 208 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 209 // Japanese characters. This includes all code blocks that might 210 // contain a character used in Japanese (which is why unified CJK 211 // blocks are included but Korean Hangul and jamo are not). 212 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 213 static { 214 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 215 set.add(UnicodeBlock.HIRAGANA); 216 set.add(UnicodeBlock.KATAKANA); 217 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 218 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 219 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 220 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 221 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 222 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 223 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 224 set.add(UnicodeBlock.CJK_COMPATIBILITY); 225 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 226 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 227 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 228 CJ_BLOCKS = Collections.unmodifiableSet(set); 229 } 230 231 /** 232 * Helper routine to identify unlabeled Chinese or Japanese characters 233 * to put in a 'misc' bucket. 234 * 235 * @return true if the specified Unicode code point is Chinese or 236 * Japanese 237 */ 238 private static boolean isChineseOrJapanese(int codePoint) { 239 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 240 } 241 242 /** 243 * Returns the bucket index for the specified string. Adds an 244 * additional 'misc' bucket for Kanji characters to the base class set. 245 */ 246 @Override 247 public int getBucketIndex(String name) { 248 final int bucketIndex = super.getBucketIndex(name); 249 if ((bucketIndex == mMiscBucketIndex && 250 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 251 bucketIndex > mMiscBucketIndex) { 252 return bucketIndex + 1; 253 } 254 return bucketIndex; 255 } 256 257 /** 258 * Returns the number of buckets in use (one more than the base class 259 * uses, because this class adds a bucket for Kanji). 260 */ 261 @Override 262 public int getBucketCount() { 263 return super.getBucketCount() + 1; 264 } 265 266 /** 267 * Returns the label for the specified bucket index if a valid index, 268 * otherwise returns an empty string. '' is returned for unclassified 269 * Kanji; for all others, the label determined by the base class is 270 * returned. 271 */ 272 @Override 273 public String getBucketLabel(int bucketIndex) { 274 if (bucketIndex == mMiscBucketIndex) { 275 return JAPANESE_MISC_LABEL; 276 } else if (bucketIndex > mMiscBucketIndex) { 277 --bucketIndex; 278 } 279 return super.getBucketLabel(bucketIndex); 280 } 281 282 @Override 283 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 284 // Hiragana and Katakana will be positively identified as Japanese. 285 if (nameStyle == PhoneticNameStyle.JAPANESE) { 286 return getRomajiNameLookupKeys(name); 287 } 288 return null; 289 } 290 291 private static boolean mInitializedTransliterator; 292 private static Transliterator mJapaneseTransliterator; 293 294 private static Transliterator getJapaneseTransliterator() { 295 synchronized(JapaneseContactUtils.class) { 296 if (!mInitializedTransliterator) { 297 mInitializedTransliterator = true; 298 Transliterator t = null; 299 try { 300 t = new Transliterator("Hiragana-Latin; Katakana-Latin;" 301 + " Latin-Ascii"); 302 } catch (RuntimeException e) { 303 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 304 + " is missing"); 305 } 306 mJapaneseTransliterator = t; 307 } 308 return mJapaneseTransliterator; 309 } 310 } 311 312 public static Iterator<String> getRomajiNameLookupKeys(String name) { 313 final Transliterator t = getJapaneseTransliterator(); 314 if (t == null) { 315 return null; 316 } 317 final String romajiName = t.transliterate(name); 318 if (TextUtils.isEmpty(romajiName) || 319 TextUtils.equals(name, romajiName)) { 320 return null; 321 } 322 final HashSet<String> keys = new HashSet<String>(); 323 keys.add(romajiName); 324 return keys.iterator(); 325 } 326 } 327 328 /** 329 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 330 * for generating pinyin transliteration. 331 * 332 * sortKey: unchanged (same as name) 333 * nameLookupKeys: adds additional name lookup keys 334 * - Chinese character's pinyin and pinyin's initial character. 335 * - Latin word and initial character. 336 * labels: unchanged 337 * Simplified Chinese labels are the same as English: [A-Z], #, " " 338 */ 339 private static class SimplifiedChineseContactUtils 340 extends ContactLocaleUtilsBase { 341 public SimplifiedChineseContactUtils(Locale locale) { 342 super(locale); 343 } 344 345 @Override 346 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 347 if (nameStyle != FullNameStyle.JAPANESE && 348 nameStyle != FullNameStyle.KOREAN) { 349 return getPinyinNameLookupKeys(name); 350 } 351 return null; 352 } 353 354 public static Iterator<String> getPinyinNameLookupKeys(String name) { 355 // TODO : Reduce the object allocation. 356 HashSet<String> keys = new HashSet<String>(); 357 ArrayList<Token> tokens = HanziToPinyin.getInstance().get(name); 358 final int tokenCount = tokens.size(); 359 final StringBuilder keyPinyin = new StringBuilder(); 360 final StringBuilder keyInitial = new StringBuilder(); 361 // There is no space among the Chinese Characters, the variant name 362 // lookup key wouldn't work for Chinese. The keyOriginal is used to 363 // build the lookup keys for itself. 364 final StringBuilder keyOriginal = new StringBuilder(); 365 for (int i = tokenCount - 1; i >= 0; i--) { 366 final Token token = tokens.get(i); 367 if (Token.UNKNOWN == token.type) { 368 continue; 369 } 370 if (Token.PINYIN == token.type) { 371 keyPinyin.insert(0, token.target); 372 keyInitial.insert(0, token.target.charAt(0)); 373 } else if (Token.LATIN == token.type) { 374 // Avoid adding space at the end of String. 375 if (keyPinyin.length() > 0) { 376 keyPinyin.insert(0, ' '); 377 } 378 if (keyOriginal.length() > 0) { 379 keyOriginal.insert(0, ' '); 380 } 381 keyPinyin.insert(0, token.source); 382 keyInitial.insert(0, token.source.charAt(0)); 383 } 384 keyOriginal.insert(0, token.source); 385 keys.add(keyOriginal.toString()); 386 keys.add(keyPinyin.toString()); 387 keys.add(keyInitial.toString()); 388 } 389 return keys.iterator(); 390 } 391 } 392 393 private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase(); 394 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 395 private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase(); 396 397 private static ContactLocaleUtils sSingleton; 398 399 private final Locale mLocale; 400 private final String mLanguage; 401 private final ContactLocaleUtilsBase mUtils; 402 403 private ContactLocaleUtils(Locale locale) { 404 if (locale == null) { 405 mLocale = Locale.getDefault(); 406 } else { 407 mLocale = locale; 408 } 409 mLanguage = mLocale.getLanguage().toLowerCase(); 410 if (mLanguage.equals(JAPANESE_LANGUAGE)) { 411 mUtils = new JapaneseContactUtils(mLocale); 412 } else if (mLocale.equals(Locale.CHINA)) { 413 mUtils = new SimplifiedChineseContactUtils(mLocale); 414 } else { 415 mUtils = new ContactLocaleUtilsBase(mLocale); 416 } 417 Log.i(TAG, "AddressBook Labels [" + mLocale.toString() + "]: " 418 + getLabels().toString()); 419 } 420 421 public boolean isLocale(Locale locale) { 422 return mLocale.equals(locale); 423 } 424 425 public static synchronized ContactLocaleUtils getInstance() { 426 if (sSingleton == null) { 427 sSingleton = new ContactLocaleUtils(null); 428 } 429 return sSingleton; 430 } 431 432 public static synchronized void setLocale(Locale locale) { 433 if (sSingleton == null || !sSingleton.isLocale(locale)) { 434 sSingleton = new ContactLocaleUtils(locale); 435 } 436 } 437 438 public String getSortKey(String name, int nameStyle) { 439 return mUtils.getSortKey(name); 440 } 441 442 public int getBucketIndex(String name) { 443 return mUtils.getBucketIndex(name); 444 } 445 446 public int getBucketCount() { 447 return mUtils.getBucketCount(); 448 } 449 450 public String getBucketLabel(int bucketIndex) { 451 return mUtils.getBucketLabel(bucketIndex); 452 } 453 454 public String getLabel(String name) { 455 return getBucketLabel(getBucketIndex(name)); 456 } 457 458 public ArrayList<String> getLabels() { 459 return mUtils.getLabels(); 460 } 461 462 /** 463 * Determine which utility should be used for generating NameLookupKey. 464 * (ie, whether we generate Pinyin lookup keys or not) 465 * 466 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 467 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 468 * lookup keys when not in a Chinese or Korean locale. 469 * 470 * Otherwise, use the default behavior of that locale: 471 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 472 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 473 */ 474 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 475 if (nameStyle == FullNameStyle.JAPANESE && 476 !CHINESE_LANGUAGE.equals(mLanguage) && 477 !KOREAN_LANGUAGE.equals(mLanguage)) { 478 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 479 } 480 return mUtils.getNameLookupKeys(name, nameStyle); 481 } 482 483 } 484