Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import android.icu.text.AlphabeticIndex;
     20 import android.icu.text.AlphabeticIndex.ImmutableIndex;
     21 import android.icu.text.Transliterator;
     22 import android.provider.ContactsContract.FullNameStyle;
     23 import android.provider.ContactsContract.PhoneticNameStyle;
     24 import android.os.LocaleList;
     25 import android.text.TextUtils;
     26 import android.util.ArraySet;
     27 import android.util.Log;
     28 
     29 import com.android.providers.contacts.HanziToPinyin.Token;
     30 import com.google.common.annotations.VisibleForTesting;
     31 
     32 import java.lang.Character.UnicodeBlock;
     33 import java.util.ArrayList;
     34 import java.util.Collections;
     35 import java.util.HashSet;
     36 import java.util.Iterator;
     37 import java.util.Locale;
     38 import java.util.Set;
     39 
     40 
     41 /**
     42  * This utility class provides specialized handling for locale specific
     43  * information: labels, name lookup keys.
     44  */
     45 public class ContactLocaleUtils {
     46     public static final String TAG = "ContactLocale";
     47 
     48     public static final Locale LOCALE_ARABIC = new Locale("ar");
     49     public static final Locale LOCALE_GREEK = new Locale("el");
     50     public static final Locale LOCALE_HEBREW = new Locale("he");
     51     // Serbian and Ukrainian labels are complementary supersets of Russian
     52     public static final Locale LOCALE_SERBIAN = new Locale("sr");
     53     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
     54     public static final Locale LOCALE_THAI = new Locale("th");
     55 
     56     // -- Note for adding locales to sDefaultLabelLocales --
     57     //
     58     // AlphabeticIndex.getBucketLabel() uses a binary search across
     59     // the entire label set so care should be taken about growing this
     60     // set too large. The following set determines for which locales
     61     // we will show labels other than your primary locale. General rules
     62     // of thumb for adding a locale: should be a supported locale; and
     63     // should not be included if from a name it is not deterministic
     64     // which way to label it (so eg Chinese cannot be added because
     65     // the labeling of a Chinese character varies between Simplified,
     66     // Traditional, and Japanese locales). Use English only for all
     67     // Latin based alphabets. Ukrainian and Serbian are chosen for
     68     // Cyrillic because their alphabets are complementary supersets
     69     // of Russian.
     70     private static final Locale[] sDefaultLabelLocales = new Locale[]{
     71             Locale.ENGLISH,
     72             Locale.JAPANESE,
     73             Locale.KOREAN,
     74             LOCALE_THAI,
     75             LOCALE_ARABIC,
     76             LOCALE_HEBREW,
     77             LOCALE_GREEK,
     78             LOCALE_UKRAINIAN,
     79             LOCALE_SERBIAN,
     80     };
     81 
     82     /**
     83      * This class is the default implementation and should be the base class
     84      * for other locales.
     85      *
     86      * sortKey: same as name
     87      * nameLookupKeys: none
     88      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
     89      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
     90      */
     91     private static class ContactLocaleUtilsBase {
     92         private static final String EMPTY_STRING = "";
     93         private static final String NUMBER_STRING = "#";
     94 
     95         protected final ImmutableIndex mAlphabeticIndex;
     96         private final int mAlphabeticIndexBucketCount;
     97         private final int mNumberBucketIndex;
     98         private final boolean mUsePinyinTransliterator;
     99 
    100         public ContactLocaleUtilsBase(LocaleSet locales) {
    101             mUsePinyinTransliterator = locales.shouldPreferSimplifiedChinese();
    102 
    103             final ArraySet<Locale> addedLocales = new ArraySet<>();
    104 
    105             // First, add from the primary locale (which may not be the first locale in the locale
    106             // list).
    107             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
    108                     .setMaxLabelCount(300);
    109             addedLocales.add(locales.getPrimaryLocale());
    110 
    111             // Next, add all locale form the locale list.
    112             final LocaleList localeList = locales.getAllLocales();
    113             for (int i = 0; i < localeList.size(); i++) {
    114                 addLabels(ai, localeList.get(i), addedLocales);
    115             }
    116             // Then add the default locales.
    117             for (int i = 0; i < sDefaultLabelLocales.length; i++) {
    118                 addLabels(ai, sDefaultLabelLocales[i], addedLocales);
    119             }
    120             mAlphabeticIndex = ai.buildImmutableIndex();
    121             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
    122             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
    123         }
    124 
    125         private static void addLabels(
    126                 AlphabeticIndex ai, Locale locale, ArraySet<Locale> addedLocales) {
    127             if (addedLocales.contains(locale)) {
    128                 return;
    129             }
    130             ai.addLabels(locale);
    131             addedLocales.add(locale);
    132         }
    133 
    134         public String getSortKey(String name) {
    135             return name;
    136         }
    137 
    138         public int getNumberBucketIndex() {
    139             return mNumberBucketIndex;
    140         }
    141 
    142         /**
    143          * Returns the bucket index for the specified string. AlphabeticIndex
    144          * sorts strings into buckets numbered in order from 0 to N, where the
    145          * exact value of N depends on how many representative index labels are
    146          * used in a particular locale. This routine adds one additional bucket
    147          * for phone numbers. It attempts to detect phone numbers and shifts
    148          * the bucket indexes returned by AlphabeticIndex in order to make room
    149          * for the new # bucket, so the returned range becomes 0 to N+1.
    150          */
    151         public int getBucketIndex(String name) {
    152             boolean prefixIsNumeric = false;
    153             final int length = name.length();
    154             int offset = 0;
    155             while (offset < length) {
    156                 int codePoint = Character.codePointAt(name, offset);
    157                 // Ignore standard phone number separators and identify any
    158                 // string that otherwise starts with a number.
    159                 if (Character.isDigit(codePoint)) {
    160                     prefixIsNumeric = true;
    161                     break;
    162                 } else if (!Character.isSpaceChar(codePoint) &&
    163                            codePoint != '+' && codePoint != '(' &&
    164                            codePoint != ')' && codePoint != '.' &&
    165                            codePoint != '-' && codePoint != '#') {
    166                     break;
    167                 }
    168                 offset += Character.charCount(codePoint);
    169             }
    170             if (prefixIsNumeric) {
    171                 return mNumberBucketIndex;
    172             }
    173 
    174             /**
    175              * ICU 55 AlphabeticIndex doesn't support Simplified Chinese
    176              * as a secondary locale so it is necessary to use the
    177              * Pinyin transliterator. We also use this for a Simplified
    178              * Chinese primary locale because it gives more accurate letter
    179              * buckets. b/19835686
    180              */
    181             if (mUsePinyinTransliterator) {
    182                 name = HanziToPinyin.getInstance().transliterate(name);
    183             }
    184             final int bucket = mAlphabeticIndex.getBucketIndex(name);
    185             if (bucket < 0) {
    186                 return -1;
    187             }
    188             if (bucket >= mNumberBucketIndex) {
    189                 return bucket + 1;
    190             }
    191             return bucket;
    192         }
    193 
    194         /**
    195          * Returns the number of buckets in use (one more than AlphabeticIndex
    196          * uses, because this class adds a bucket for phone numbers).
    197          */
    198         public int getBucketCount() {
    199             return mAlphabeticIndexBucketCount + 1;
    200         }
    201 
    202         /**
    203          * Returns the label for the specified bucket index if a valid index,
    204          * otherwise returns an empty string. '#' is returned for the phone
    205          * number bucket; for all others, the AlphabeticIndex label is returned.
    206          */
    207         public String getBucketLabel(int bucketIndex) {
    208             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
    209                 return EMPTY_STRING;
    210             } else if (bucketIndex == mNumberBucketIndex) {
    211                 return NUMBER_STRING;
    212             } else if (bucketIndex > mNumberBucketIndex) {
    213                 --bucketIndex;
    214             }
    215             return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
    216         }
    217 
    218         @SuppressWarnings("unused")
    219         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    220             return null;
    221         }
    222 
    223         public ArrayList<String> getLabels() {
    224             final int bucketCount = getBucketCount();
    225             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
    226             for(int i = 0; i < bucketCount; ++i) {
    227                 labels.add(getBucketLabel(i));
    228             }
    229             return labels;
    230         }
    231     }
    232 
    233     /**
    234      * Japanese specific locale overrides.
    235      *
    236      * sortKey: unchanged (same as name)
    237      * nameLookupKeys: unchanged (none)
    238      * labels: extends default labels by labeling unlabeled CJ characters
    239      *     with the Japanese character  ("misc"). Japanese labels are:
    240      *     , , , , , , , , , , , [A-Z], #, " "
    241      */
    242     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
    243         // \u4ed6 is Japanese character  ("misc")
    244         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
    245         private final int mMiscBucketIndex;
    246 
    247         public JapaneseContactUtils(LocaleSet locales) {
    248             super(locales);
    249             // Determine which bucket AlphabeticIndex is lumping unclassified
    250             // Japanese characters into by looking up the bucket index for
    251             // a representative Kanji/CJK unified ideograph (\u65e5 is the
    252             // character '').
    253             mMiscBucketIndex = super.getBucketIndex("\u65e5");
    254         }
    255 
    256         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
    257         // Japanese characters. This includes all code blocks that might
    258         // contain a character used in Japanese (which is why unified CJK
    259         // blocks are included but Korean Hangul and jamo are not).
    260         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
    261         static {
    262             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
    263             set.add(UnicodeBlock.HIRAGANA);
    264             set.add(UnicodeBlock.KATAKANA);
    265             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
    266             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
    267             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
    268             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
    269             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
    270             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
    271             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
    272             set.add(UnicodeBlock.CJK_COMPATIBILITY);
    273             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
    274             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
    275             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
    276             CJ_BLOCKS = Collections.unmodifiableSet(set);
    277         }
    278 
    279         /**
    280          * Helper routine to identify unlabeled Chinese or Japanese characters
    281          * to put in a 'misc' bucket.
    282          *
    283          * @return true if the specified Unicode code point is Chinese or
    284          *              Japanese
    285          */
    286         private static boolean isChineseOrJapanese(int codePoint) {
    287             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
    288         }
    289 
    290         /**
    291          * Returns the bucket index for the specified string. Adds an
    292          * additional 'misc' bucket for Kanji characters to the base class set.
    293          */
    294         @Override
    295         public int getBucketIndex(String name) {
    296             final int bucketIndex = super.getBucketIndex(name);
    297             if ((bucketIndex == mMiscBucketIndex &&
    298                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
    299                 bucketIndex > mMiscBucketIndex) {
    300                 return bucketIndex + 1;
    301             }
    302             return bucketIndex;
    303         }
    304 
    305         /**
    306          * Returns the number of buckets in use (one more than the base class
    307          * uses, because this class adds a bucket for Kanji).
    308          */
    309         @Override
    310         public int getBucketCount() {
    311             return super.getBucketCount() + 1;
    312         }
    313 
    314         /**
    315          * Returns the label for the specified bucket index if a valid index,
    316          * otherwise returns an empty string. '' is returned for unclassified
    317          * Kanji; for all others, the label determined by the base class is
    318          * returned.
    319          */
    320         @Override
    321         public String getBucketLabel(int bucketIndex) {
    322             if (bucketIndex == mMiscBucketIndex) {
    323                 return JAPANESE_MISC_LABEL;
    324             } else if (bucketIndex > mMiscBucketIndex) {
    325                 --bucketIndex;
    326             }
    327             return super.getBucketLabel(bucketIndex);
    328         }
    329 
    330         @Override
    331         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    332             // Hiragana and Katakana will be positively identified as Japanese.
    333             if (nameStyle == PhoneticNameStyle.JAPANESE) {
    334                 return getRomajiNameLookupKeys(name);
    335             }
    336             return null;
    337         }
    338 
    339         private static boolean mInitializedTransliterator;
    340         private static Transliterator mJapaneseTransliterator;
    341 
    342         private static Transliterator getJapaneseTransliterator() {
    343             synchronized(JapaneseContactUtils.class) {
    344                 if (!mInitializedTransliterator) {
    345                     mInitializedTransliterator = true;
    346                     Transliterator t = null;
    347                     try {
    348                         t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
    349                                 + " Latin-Ascii");
    350                     } catch (IllegalArgumentException e) {
    351                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
    352                                 + " is missing");
    353                     }
    354                     mJapaneseTransliterator = t;
    355                 }
    356                 return mJapaneseTransliterator;
    357             }
    358         }
    359 
    360         public static Iterator<String> getRomajiNameLookupKeys(String name) {
    361             final Transliterator t = getJapaneseTransliterator();
    362             if (t == null) {
    363                 return null;
    364             }
    365             final String romajiName = t.transliterate(name);
    366             if (TextUtils.isEmpty(romajiName) ||
    367                     TextUtils.equals(name, romajiName)) {
    368                 return null;
    369             }
    370             final HashSet<String> keys = new HashSet<String>();
    371             keys.add(romajiName);
    372             return keys.iterator();
    373         }
    374     }
    375 
    376     /**
    377      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
    378      * for generating pinyin transliteration.
    379      *
    380      * sortKey: unchanged (same as name)
    381      * nameLookupKeys: adds additional name lookup keys
    382      *     - Chinese character's pinyin and pinyin's initial character.
    383      *     - Latin word and initial character.
    384      * labels: unchanged
    385      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
    386      */
    387     private static class SimplifiedChineseContactUtils
    388         extends ContactLocaleUtilsBase {
    389         public SimplifiedChineseContactUtils(LocaleSet locales) {
    390             super(locales);
    391         }
    392 
    393         @Override
    394         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    395             if (nameStyle != FullNameStyle.JAPANESE &&
    396                     nameStyle != FullNameStyle.KOREAN) {
    397                 return getPinyinNameLookupKeys(name);
    398             }
    399             return null;
    400         }
    401 
    402         public static Iterator<String> getPinyinNameLookupKeys(String name) {
    403             // TODO : Reduce the object allocation.
    404             HashSet<String> keys = new HashSet<String>();
    405             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
    406             final int tokenCount = tokens.size();
    407             final StringBuilder keyPinyin = new StringBuilder();
    408             final StringBuilder keyInitial = new StringBuilder();
    409             // There is no space among the Chinese Characters, the variant name
    410             // lookup key wouldn't work for Chinese. The keyOriginal is used to
    411             // build the lookup keys for itself.
    412             final StringBuilder keyOriginal = new StringBuilder();
    413             for (int i = tokenCount - 1; i >= 0; i--) {
    414                 final Token token = tokens.get(i);
    415                 if (Token.UNKNOWN == token.type) {
    416                     continue;
    417                 }
    418                 if (Token.PINYIN == token.type) {
    419                     keyPinyin.insert(0, token.target);
    420                     keyInitial.insert(0, token.target.charAt(0));
    421                 } else if (Token.LATIN == token.type) {
    422                     // Avoid adding space at the end of String.
    423                     if (keyPinyin.length() > 0) {
    424                         keyPinyin.insert(0, ' ');
    425                     }
    426                     if (keyOriginal.length() > 0) {
    427                         keyOriginal.insert(0, ' ');
    428                     }
    429                     keyPinyin.insert(0, token.source);
    430                     keyInitial.insert(0, token.source.charAt(0));
    431                 }
    432                 keyOriginal.insert(0, token.source);
    433                 keys.add(keyOriginal.toString());
    434                 keys.add(keyPinyin.toString());
    435                 keys.add(keyInitial.toString());
    436             }
    437             return keys.iterator();
    438         }
    439     }
    440 
    441     private static ContactLocaleUtils sSingleton;
    442 
    443     private final LocaleSet mLocales;
    444     private final ContactLocaleUtilsBase mUtils;
    445 
    446     private ContactLocaleUtils(LocaleSet locales) {
    447         if (locales == null) {
    448             mLocales = LocaleSet.newDefault();
    449         } else {
    450             mLocales = locales;
    451         }
    452         if (mLocales.shouldPreferJapanese()) {
    453             mUtils = new JapaneseContactUtils(mLocales);
    454         } else if (mLocales.shouldPreferSimplifiedChinese()) {
    455             mUtils = new SimplifiedChineseContactUtils(mLocales);
    456         } else {
    457             mUtils = new ContactLocaleUtilsBase(mLocales);
    458         }
    459         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
    460                 + getLabels().toString());
    461     }
    462 
    463     public boolean isLocale(LocaleSet locales) {
    464         return mLocales.equals(locales);
    465     }
    466 
    467     public static synchronized ContactLocaleUtils getInstance() {
    468         if (sSingleton == null) {
    469             sSingleton = new ContactLocaleUtils(LocaleSet.newDefault());
    470         }
    471         return sSingleton;
    472     }
    473 
    474     @VisibleForTesting
    475     public static synchronized void setLocaleForTest(Locale... locales) {
    476         setLocales(LocaleSet.newForTest(locales));
    477     }
    478 
    479     public static synchronized void setLocales(LocaleSet locales) {
    480         if (sSingleton == null || !sSingleton.isLocale(locales)) {
    481             sSingleton = new ContactLocaleUtils(locales);
    482         }
    483     }
    484 
    485     public String getSortKey(String name, int nameStyle) {
    486         return mUtils.getSortKey(name);
    487     }
    488 
    489     public int getBucketIndex(String name) {
    490         return mUtils.getBucketIndex(name);
    491     }
    492 
    493     public int getNumberBucketIndex() {
    494         return mUtils.getNumberBucketIndex();
    495     }
    496 
    497     public int getBucketCount() {
    498         return mUtils.getBucketCount();
    499     }
    500 
    501     public String getBucketLabel(int bucketIndex) {
    502         return mUtils.getBucketLabel(bucketIndex);
    503     }
    504 
    505     public String getLabel(String name) {
    506         return getBucketLabel(getBucketIndex(name));
    507     }
    508 
    509     public ArrayList<String> getLabels() {
    510         return mUtils.getLabels();
    511     }
    512 
    513     /**
    514      *  Determine which utility should be used for generating NameLookupKey.
    515      *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
    516      *
    517      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
    518      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
    519      *  lookup keys when not in a Chinese or Korean locale.
    520      *
    521      *  Otherwise, use the default behavior of that locale:
    522      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
    523      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
    524      */
    525     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    526         if (!mLocales.isPrimaryLocaleCJK()) {
    527             if (mLocales.shouldPreferSimplifiedChinese()) {
    528                 if (nameStyle == FullNameStyle.CHINESE ||
    529                         nameStyle == FullNameStyle.CJK) {
    530                     return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
    531                 }
    532             } else {
    533                 if (nameStyle == FullNameStyle.JAPANESE) {
    534                     return JapaneseContactUtils.getRomajiNameLookupKeys(name);
    535                 }
    536             }
    537         }
    538         return mUtils.getNameLookupKeys(name, nameStyle);
    539     }
    540 
    541 }
    542