Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import android.icu.text.AlphabeticIndex;
     20 import android.icu.text.AlphabeticIndex.ImmutableIndex;
     21 import android.icu.text.Transliterator;
     22 import android.os.LocaleList;
     23 import android.provider.ContactsContract.FullNameStyle;
     24 import android.provider.ContactsContract.PhoneticNameStyle;
     25 import android.text.TextUtils;
     26 import android.util.Log;
     27 
     28 import com.android.providers.contacts.HanziToPinyin.Token;
     29 
     30 import com.google.common.annotations.VisibleForTesting;
     31 
     32 import java.lang.Character.UnicodeBlock;
     33 import java.util.ArrayList;
     34 import java.util.Collections;
     35 import java.util.HashSet;
     36 import java.util.Iterator;
     37 import java.util.List;
     38 import java.util.Locale;
     39 import java.util.Set;
     40 
     41 
     42 /**
     43  * This utility class provides specialized handling for locale specific
     44  * information: labels, name lookup keys.
     45  */
     46 public class ContactLocaleUtils {
     47     public static final String TAG = "ContactLocale";
     48 
     49     private static final boolean DEBUG = false; // don't submit with true
     50 
     51     public static final Locale LOCALE_ARABIC = new Locale("ar");
     52     public static final Locale LOCALE_GREEK = new Locale("el");
     53     public static final Locale LOCALE_HEBREW = new Locale("he");
     54     // Serbian and Ukrainian labels are complementary supersets of Russian
     55     public static final Locale LOCALE_SERBIAN = new Locale("sr");
     56     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
     57     public static final Locale LOCALE_THAI = new Locale("th");
     58 
     59     // -- Note for adding locales to sDefaultLabelLocales --
     60     //
     61     // AlphabeticIndex.getBucketLabel() uses a binary search across
     62     // the entire label set so care should be taken about growing this
     63     // set too large. The following set determines for which locales
     64     // we will show labels other than your primary locale. General rules
     65     // of thumb for adding a locale: should be a supported locale; and
     66     // should not be included if from a name it is not deterministic
     67     // which way to label it (so eg Chinese cannot be added because
     68     // the labeling of a Chinese character varies between Simplified,
     69     // Traditional, and Japanese locales). Use English only for all
     70     // Latin based alphabets. Ukrainian and Serbian are chosen for
     71     // Cyrillic because their alphabets are complementary supersets
     72     // of Russian.
     73     private static final Locale[] sDefaultLabelLocales = new Locale[]{
     74             Locale.ENGLISH,
     75             Locale.JAPANESE,
     76             Locale.KOREAN,
     77             LOCALE_THAI,
     78             LOCALE_ARABIC,
     79             LOCALE_HEBREW,
     80             LOCALE_GREEK,
     81             LOCALE_UKRAINIAN,
     82             LOCALE_SERBIAN,
     83     };
     84 
     85     @VisibleForTesting
     86     static void dumpIndex(ImmutableIndex index) {
     87         final StringBuilder labels = new StringBuilder();
     88         String sep = "";
     89         for (int i = 0; i < index.getBucketCount(); i++) {
     90             labels.append(sep);
     91             labels.append(index.getBucket(i).getLabel());
     92             sep = ",";
     93         }
     94         Log.d(TAG, "Labels=[" + labels + "]");
     95     }
     96 
     97     /**
     98      * This class is the default implementation and should be the base class
     99      * for other locales.
    100      *
    101      * sortKey: same as name
    102      * nameLookupKeys: none
    103      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
    104      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
    105      */
    106     private static class ContactLocaleUtilsBase {
    107         private static final String EMPTY_STRING = "";
    108         private static final String NUMBER_STRING = "#";
    109 
    110         protected final ImmutableIndex mAlphabeticIndex;
    111         private final int mAlphabeticIndexBucketCount;
    112         private final int mNumberBucketIndex;
    113         private final boolean mUsePinyinTransliterator;
    114 
    115         public ContactLocaleUtilsBase(LocaleSet systemLocales) {
    116             mUsePinyinTransliterator = systemLocales.shouldPreferSimplifiedChinese();
    117 
    118             // Build the index buckets based on the current system locale set and
    119             // sDefaultLabelLocales.
    120             if (DEBUG) {
    121                 Log.d(TAG, "Building index buckets...");
    122             }
    123             final List<Locale> locales = getLocalesForBuckets(systemLocales);
    124 
    125             AlphabeticIndex ai = new AlphabeticIndex(locales.get(0))
    126                     .setMaxLabelCount(300);
    127             for (int i = 1; i < locales.size(); i++) {
    128                 ai.addLabels(locales.get(i));
    129             }
    130 
    131             mAlphabeticIndex = ai.buildImmutableIndex();
    132             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
    133             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
    134             if (DEBUG) {
    135                 dumpIndex(mAlphabeticIndex);
    136             }
    137         }
    138 
    139         static List<Locale> getLocalesForBuckets(LocaleSet systemLocales) {
    140 
    141             // Create a list of locales that should be used to generate the index buckets.
    142             // - Source: the system locales and sDefaultLabelLocales.
    143             // - Rules:
    144             //   - Don't add the same locale multiple times.
    145             //   - Also special rules for Chinese (b/31115382):
    146             //     - Don't add multiple Chinese locales.
    147             //     - Don't add any Chinese locales after Japanese.
    148 
    149             // First, collect all the locales (allowing duplicates).
    150             final LocaleList localeList = systemLocales.getAllLocales();
    151 
    152             final List<Locale> locales = new ArrayList<>(
    153                     localeList.size() + sDefaultLabelLocales.length);
    154             for (int i = 0; i < localeList.size(); i++) {
    155                 locales.add(localeList.get(i));
    156             }
    157             for (int i = 0; i < sDefaultLabelLocales.length; i++) {
    158                 locales.add(sDefaultLabelLocales[i]);
    159             }
    160 
    161             // Then apply the rules to generate the final list.
    162             final List<Locale> ret = new ArrayList<>(locales.size());
    163             boolean allowChinese = true;
    164 
    165             for (int i = 0; i < locales.size(); i++) {
    166                 final Locale locale = locales.get(i);
    167 
    168                 if (ret.contains(locale)) {
    169                     continue;
    170                 }
    171                 if (LocaleSet.isLanguageChinese(locale)) {
    172                     if (!allowChinese) {
    173                         continue;
    174                     }
    175                     allowChinese = false;
    176                 }
    177                 if (LocaleSet.isLanguageJapanese(locale)) {
    178                     allowChinese = false;
    179                 }
    180                 if (DEBUG) {
    181                     Log.d(TAG, "  Adding locale: " + locale.toLanguageTag());
    182                 }
    183                 ret.add(locale);
    184             }
    185             return ret;
    186         }
    187 
    188         public String getSortKey(String name) {
    189             return name;
    190         }
    191 
    192         public int getNumberBucketIndex() {
    193             return mNumberBucketIndex;
    194         }
    195 
    196         /**
    197          * Returns the bucket index for the specified string. AlphabeticIndex
    198          * sorts strings into buckets numbered in order from 0 to N, where the
    199          * exact value of N depends on how many representative index labels are
    200          * used in a particular locale. This routine adds one additional bucket
    201          * for phone numbers. It attempts to detect phone numbers and shifts
    202          * the bucket indexes returned by AlphabeticIndex in order to make room
    203          * for the new # bucket, so the returned range becomes 0 to N+1.
    204          */
    205         public int getBucketIndex(String name) {
    206             boolean prefixIsNumeric = false;
    207             final int length = name.length();
    208             int offset = 0;
    209             while (offset < length) {
    210                 int codePoint = Character.codePointAt(name, offset);
    211                 // Ignore standard phone number separators and identify any
    212                 // string that otherwise starts with a number.
    213                 if (Character.isDigit(codePoint)) {
    214                     prefixIsNumeric = true;
    215                     break;
    216                 } else if (!Character.isSpaceChar(codePoint) &&
    217                            codePoint != '+' && codePoint != '(' &&
    218                            codePoint != ')' && codePoint != '.' &&
    219                            codePoint != '-' && codePoint != '#') {
    220                     break;
    221                 }
    222                 offset += Character.charCount(codePoint);
    223             }
    224             if (prefixIsNumeric) {
    225                 return mNumberBucketIndex;
    226             }
    227 
    228             /**
    229              * ICU 55 AlphabeticIndex doesn't support Simplified Chinese
    230              * as a secondary locale so it is necessary to use the
    231              * Pinyin transliterator. We also use this for a Simplified
    232              * Chinese primary locale because it gives more accurate letter
    233              * buckets. b/19835686
    234              */
    235             if (mUsePinyinTransliterator) {
    236                 name = HanziToPinyin.getInstance().transliterate(name);
    237             }
    238             final int bucket = mAlphabeticIndex.getBucketIndex(name);
    239             if (bucket < 0) {
    240                 return -1;
    241             }
    242             if (bucket >= mNumberBucketIndex) {
    243                 return bucket + 1;
    244             }
    245             return bucket;
    246         }
    247 
    248         /**
    249          * Returns the number of buckets in use (one more than AlphabeticIndex
    250          * uses, because this class adds a bucket for phone numbers).
    251          */
    252         public int getBucketCount() {
    253             return mAlphabeticIndexBucketCount + 1;
    254         }
    255 
    256         /**
    257          * Returns the label for the specified bucket index if a valid index,
    258          * otherwise returns an empty string. '#' is returned for the phone
    259          * number bucket; for all others, the AlphabeticIndex label is returned.
    260          */
    261         public String getBucketLabel(int bucketIndex) {
    262             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
    263                 return EMPTY_STRING;
    264             } else if (bucketIndex == mNumberBucketIndex) {
    265                 return NUMBER_STRING;
    266             } else if (bucketIndex > mNumberBucketIndex) {
    267                 --bucketIndex;
    268             }
    269             return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
    270         }
    271 
    272         @SuppressWarnings("unused")
    273         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    274             return null;
    275         }
    276 
    277         public ArrayList<String> getLabels() {
    278             final int bucketCount = getBucketCount();
    279             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
    280             for(int i = 0; i < bucketCount; ++i) {
    281                 labels.add(getBucketLabel(i));
    282             }
    283             return labels;
    284         }
    285     }
    286 
    287     /**
    288      * Japanese specific locale overrides.
    289      *
    290      * sortKey: unchanged (same as name)
    291      * nameLookupKeys: unchanged (none)
    292      * labels: extends default labels by labeling unlabeled CJ characters
    293      *     with the Japanese character  ("misc"). Japanese labels are:
    294      *     , , , , , , , , , , , [A-Z], #, " "
    295      */
    296     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
    297         // \u4ed6 is Japanese character  ("misc")
    298         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
    299         private final int mMiscBucketIndex;
    300 
    301         public JapaneseContactUtils(LocaleSet locales) {
    302             super(locales);
    303             // Determine which bucket AlphabeticIndex is lumping unclassified
    304             // Japanese characters into by looking up the bucket index for
    305             // a representative Kanji/CJK unified ideograph (\u65e5 is the
    306             // character '').
    307             mMiscBucketIndex = super.getBucketIndex("\u65e5");
    308         }
    309 
    310         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
    311         // Japanese characters. This includes all code blocks that might
    312         // contain a character used in Japanese (which is why unified CJK
    313         // blocks are included but Korean Hangul and jamo are not).
    314         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
    315         static {
    316             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
    317             set.add(UnicodeBlock.HIRAGANA);
    318             set.add(UnicodeBlock.KATAKANA);
    319             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
    320             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
    321             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
    322             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
    323             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
    324             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
    325             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
    326             set.add(UnicodeBlock.CJK_COMPATIBILITY);
    327             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
    328             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
    329             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
    330             CJ_BLOCKS = Collections.unmodifiableSet(set);
    331         }
    332 
    333         /**
    334          * Helper routine to identify unlabeled Chinese or Japanese characters
    335          * to put in a 'misc' bucket.
    336          *
    337          * @return true if the specified Unicode code point is Chinese or
    338          *              Japanese
    339          */
    340         private static boolean isChineseOrJapanese(int codePoint) {
    341             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
    342         }
    343 
    344         /**
    345          * Returns the bucket index for the specified string. Adds an
    346          * additional 'misc' bucket for Kanji characters to the base class set.
    347          */
    348         @Override
    349         public int getBucketIndex(String name) {
    350             final int bucketIndex = super.getBucketIndex(name);
    351             if ((bucketIndex == mMiscBucketIndex &&
    352                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
    353                 bucketIndex > mMiscBucketIndex) {
    354                 return bucketIndex + 1;
    355             }
    356             return bucketIndex;
    357         }
    358 
    359         /**
    360          * Returns the number of buckets in use (one more than the base class
    361          * uses, because this class adds a bucket for Kanji).
    362          */
    363         @Override
    364         public int getBucketCount() {
    365             return super.getBucketCount() + 1;
    366         }
    367 
    368         /**
    369          * Returns the label for the specified bucket index if a valid index,
    370          * otherwise returns an empty string. '' is returned for unclassified
    371          * Kanji; for all others, the label determined by the base class is
    372          * returned.
    373          */
    374         @Override
    375         public String getBucketLabel(int bucketIndex) {
    376             if (bucketIndex == mMiscBucketIndex) {
    377                 return JAPANESE_MISC_LABEL;
    378             } else if (bucketIndex > mMiscBucketIndex) {
    379                 --bucketIndex;
    380             }
    381             return super.getBucketLabel(bucketIndex);
    382         }
    383 
    384         @Override
    385         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    386             // Hiragana and Katakana will be positively identified as Japanese.
    387             if (nameStyle == PhoneticNameStyle.JAPANESE) {
    388                 return getRomajiNameLookupKeys(name);
    389             }
    390             return null;
    391         }
    392 
    393         private static boolean mInitializedTransliterator;
    394         private static Transliterator mJapaneseTransliterator;
    395 
    396         private static Transliterator getJapaneseTransliterator() {
    397             synchronized(JapaneseContactUtils.class) {
    398                 if (!mInitializedTransliterator) {
    399                     mInitializedTransliterator = true;
    400                     Transliterator t = null;
    401                     try {
    402                         t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
    403                                 + " Latin-Ascii");
    404                     } catch (IllegalArgumentException e) {
    405                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
    406                                 + " is missing");
    407                     }
    408                     mJapaneseTransliterator = t;
    409                 }
    410                 return mJapaneseTransliterator;
    411             }
    412         }
    413 
    414         public static Iterator<String> getRomajiNameLookupKeys(String name) {
    415             final Transliterator t = getJapaneseTransliterator();
    416             if (t == null) {
    417                 return null;
    418             }
    419             final String romajiName = t.transliterate(name);
    420             if (TextUtils.isEmpty(romajiName) ||
    421                     TextUtils.equals(name, romajiName)) {
    422                 return null;
    423             }
    424             final HashSet<String> keys = new HashSet<String>();
    425             keys.add(romajiName);
    426             return keys.iterator();
    427         }
    428     }
    429 
    430     /**
    431      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
    432      * for generating pinyin transliteration.
    433      *
    434      * sortKey: unchanged (same as name)
    435      * nameLookupKeys: adds additional name lookup keys
    436      *     - Chinese character's pinyin and pinyin's initial character.
    437      *     - Latin word and initial character.
    438      * labels: unchanged
    439      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
    440      */
    441     private static class SimplifiedChineseContactUtils
    442         extends ContactLocaleUtilsBase {
    443         public SimplifiedChineseContactUtils(LocaleSet locales) {
    444             super(locales);
    445         }
    446 
    447         @Override
    448         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    449             if (nameStyle != FullNameStyle.JAPANESE &&
    450                     nameStyle != FullNameStyle.KOREAN) {
    451                 return getPinyinNameLookupKeys(name);
    452             }
    453             return null;
    454         }
    455 
    456         public static Iterator<String> getPinyinNameLookupKeys(String name) {
    457             // TODO : Reduce the object allocation.
    458             HashSet<String> keys = new HashSet<String>();
    459             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
    460             final int tokenCount = tokens.size();
    461             final StringBuilder keyPinyin = new StringBuilder();
    462             final StringBuilder keyInitial = new StringBuilder();
    463             // There is no space among the Chinese Characters, the variant name
    464             // lookup key wouldn't work for Chinese. The keyOriginal is used to
    465             // build the lookup keys for itself.
    466             final StringBuilder keyOriginal = new StringBuilder();
    467             for (int i = tokenCount - 1; i >= 0; i--) {
    468                 final Token token = tokens.get(i);
    469                 if (Token.UNKNOWN == token.type) {
    470                     continue;
    471                 }
    472                 if (Token.PINYIN == token.type) {
    473                     keyPinyin.insert(0, token.target);
    474                     keyInitial.insert(0, token.target.charAt(0));
    475                 } else if (Token.LATIN == token.type) {
    476                     // Avoid adding space at the end of String.
    477                     if (keyPinyin.length() > 0) {
    478                         keyPinyin.insert(0, ' ');
    479                     }
    480                     if (keyOriginal.length() > 0) {
    481                         keyOriginal.insert(0, ' ');
    482                     }
    483                     keyPinyin.insert(0, token.source);
    484                     keyInitial.insert(0, token.source.charAt(0));
    485                 }
    486                 keyOriginal.insert(0, token.source);
    487                 keys.add(keyOriginal.toString());
    488                 keys.add(keyPinyin.toString());
    489                 keys.add(keyInitial.toString());
    490             }
    491             return keys.iterator();
    492         }
    493     }
    494 
    495     private static ContactLocaleUtils sSingleton;
    496 
    497     private final LocaleSet mLocales;
    498     private final ContactLocaleUtilsBase mUtils;
    499 
    500     private ContactLocaleUtils(LocaleSet locales) {
    501         if (locales == null) {
    502             mLocales = LocaleSet.newDefault();
    503         } else {
    504             mLocales = locales;
    505         }
    506         if (mLocales.shouldPreferJapanese()) {
    507             mUtils = new JapaneseContactUtils(mLocales);
    508         } else if (mLocales.shouldPreferSimplifiedChinese()) {
    509             mUtils = new SimplifiedChineseContactUtils(mLocales);
    510         } else {
    511             mUtils = new ContactLocaleUtilsBase(mLocales);
    512         }
    513         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
    514                 + getLabels().toString());
    515     }
    516 
    517     public boolean isLocale(LocaleSet locales) {
    518         return mLocales.equals(locales);
    519     }
    520 
    521     public static synchronized ContactLocaleUtils getInstance() {
    522         if (sSingleton == null) {
    523             sSingleton = new ContactLocaleUtils(LocaleSet.newDefault());
    524         }
    525         return sSingleton;
    526     }
    527 
    528     @VisibleForTesting
    529     public static ContactLocaleUtils newInstanceForTest(Locale... locales) {
    530         return new ContactLocaleUtils(LocaleSet.newForTest(locales));
    531     }
    532 
    533     @VisibleForTesting
    534     public static synchronized void setLocaleForTest(Locale... locales) {
    535         setLocales(LocaleSet.newForTest(locales));
    536     }
    537 
    538     public static synchronized void setLocales(LocaleSet locales) {
    539         if (sSingleton == null || !sSingleton.isLocale(locales)) {
    540             if (DEBUG) {
    541                 Log.d(TAG, "Setting locale(s) to " + locales);
    542             }
    543             sSingleton = new ContactLocaleUtils(locales);
    544         }
    545     }
    546 
    547     public String getSortKey(String name, int nameStyle) {
    548         return mUtils.getSortKey(name);
    549     }
    550 
    551     public int getBucketIndex(String name) {
    552         return mUtils.getBucketIndex(name);
    553     }
    554 
    555     public int getNumberBucketIndex() {
    556         return mUtils.getNumberBucketIndex();
    557     }
    558 
    559     public int getBucketCount() {
    560         return mUtils.getBucketCount();
    561     }
    562 
    563     public String getBucketLabel(int bucketIndex) {
    564         return mUtils.getBucketLabel(bucketIndex);
    565     }
    566 
    567     public String getLabel(String name) {
    568         return getBucketLabel(getBucketIndex(name));
    569     }
    570 
    571     public ArrayList<String> getLabels() {
    572         return mUtils.getLabels();
    573     }
    574 
    575     /**
    576      *  Determine which utility should be used for generating NameLookupKey.
    577      *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
    578      *
    579      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
    580      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
    581      *  lookup keys when not in a Chinese or Korean locale.
    582      *
    583      *  Otherwise, use the default behavior of that locale:
    584      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
    585      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
    586      */
    587     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    588         if (!mLocales.isPrimaryLocaleCJK()) {
    589             if (mLocales.shouldPreferSimplifiedChinese()) {
    590                 if (nameStyle == FullNameStyle.CHINESE ||
    591                         nameStyle == FullNameStyle.CJK) {
    592                     return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
    593                 }
    594             } else {
    595                 if (nameStyle == FullNameStyle.JAPANESE) {
    596                     return JapaneseContactUtils.getRomajiNameLookupKeys(name);
    597                 }
    598             }
    599         }
    600         return mUtils.getNameLookupKeys(name, nameStyle);
    601     }
    602 
    603 }
    604