Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import android.provider.ContactsContract.FullNameStyle;
     20 import android.provider.ContactsContract.PhoneticNameStyle;
     21 import android.text.TextUtils;
     22 import android.util.Log;
     23 
     24 import com.android.providers.contacts.HanziToPinyin.Token;
     25 import com.google.common.annotations.VisibleForTesting;
     26 
     27 import java.lang.Character.UnicodeBlock;
     28 import java.util.Arrays;
     29 import java.util.ArrayList;
     30 import java.util.Collections;
     31 import java.util.HashMap;
     32 import java.util.HashSet;
     33 import java.util.Iterator;
     34 import java.util.List;
     35 import java.util.Locale;
     36 import java.util.Map;
     37 import java.util.Set;
     38 
     39 import libcore.icu.AlphabeticIndex;
     40 import libcore.icu.AlphabeticIndex.ImmutableIndex;
     41 import libcore.icu.Transliterator;
     42 
     43 /**
     44  * This utility class provides specialized handling for locale specific
     45  * information: labels, name lookup keys.
     46  */
     47 public class ContactLocaleUtils {
     48     public static final String TAG = "ContactLocale";
     49 
     50     public static final Locale LOCALE_ARABIC = new Locale("ar");
     51     public static final Locale LOCALE_GREEK = new Locale("el");
     52     public static final Locale LOCALE_HEBREW = new Locale("he");
     53     // Serbian and Ukrainian labels are complementary supersets of Russian
     54     public static final Locale LOCALE_SERBIAN = new Locale("sr");
     55     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
     56     public static final Locale LOCALE_THAI = new Locale("th");
     57 
     58     /**
     59      * This class is the default implementation and should be the base class
     60      * for other locales.
     61      *
     62      * sortKey: same as name
     63      * nameLookupKeys: none
     64      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
     65      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
     66      */
     67     private static class ContactLocaleUtilsBase {
     68         private static final String EMPTY_STRING = "";
     69         private static final String NUMBER_STRING = "#";
     70 
     71         protected final ImmutableIndex mAlphabeticIndex;
     72         private final int mAlphabeticIndexBucketCount;
     73         private final int mNumberBucketIndex;
     74         private final boolean mEnableSecondaryLocalePinyin;
     75 
     76         public ContactLocaleUtilsBase(LocaleSet locales) {
     77             // AlphabeticIndex.getBucketLabel() uses a binary search across
     78             // the entire label set so care should be taken about growing this
     79             // set too large. The following set determines for which locales
     80             // we will show labels other than your primary locale. General rules
     81             // of thumb for adding a locale: should be a supported locale; and
     82             // should not be included if from a name it is not deterministic
     83             // which way to label it (so eg Chinese cannot be added because
     84             // the labeling of a Chinese character varies between Simplified,
     85             // Traditional, and Japanese locales). Use English only for all
     86             // Latin based alphabets. Ukrainian and Serbian are chosen for
     87             // Cyrillic because their alphabets are complementary supersets
     88             // of Russian.
     89             final Locale secondaryLocale = locales.getSecondaryLocale();
     90             mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
     91             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
     92                 .setMaxLabelCount(300);
     93             if (secondaryLocale != null) {
     94                 ai.addLabels(secondaryLocale);
     95             }
     96             mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
     97                 .addLabels(Locale.JAPANESE)
     98                 .addLabels(Locale.KOREAN)
     99                 .addLabels(LOCALE_THAI)
    100                 .addLabels(LOCALE_ARABIC)
    101                 .addLabels(LOCALE_HEBREW)
    102                 .addLabels(LOCALE_GREEK)
    103                 .addLabels(LOCALE_UKRAINIAN)
    104                 .addLabels(LOCALE_SERBIAN)
    105                 .getImmutableIndex();
    106             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
    107             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
    108         }
    109 
    110         public String getSortKey(String name) {
    111             return name;
    112         }
    113 
    114         /**
    115          * Returns the bucket index for the specified string. AlphabeticIndex
    116          * sorts strings into buckets numbered in order from 0 to N, where the
    117          * exact value of N depends on how many representative index labels are
    118          * used in a particular locale. This routine adds one additional bucket
    119          * for phone numbers. It attempts to detect phone numbers and shifts
    120          * the bucket indexes returned by AlphabeticIndex in order to make room
    121          * for the new # bucket, so the returned range becomes 0 to N+1.
    122          */
    123         public int getBucketIndex(String name) {
    124             boolean prefixIsNumeric = false;
    125             final int length = name.length();
    126             int offset = 0;
    127             while (offset < length) {
    128                 int codePoint = Character.codePointAt(name, offset);
    129                 // Ignore standard phone number separators and identify any
    130                 // string that otherwise starts with a number.
    131                 if (Character.isDigit(codePoint)) {
    132                     prefixIsNumeric = true;
    133                     break;
    134                 } else if (!Character.isSpaceChar(codePoint) &&
    135                            codePoint != '+' && codePoint != '(' &&
    136                            codePoint != ')' && codePoint != '.' &&
    137                            codePoint != '-' && codePoint != '#') {
    138                     break;
    139                 }
    140                 offset += Character.charCount(codePoint);
    141             }
    142             if (prefixIsNumeric) {
    143                 return mNumberBucketIndex;
    144             }
    145 
    146             /**
    147              * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
    148              * as a secondary locale. Remove the following if that is added.
    149              */
    150             if (mEnableSecondaryLocalePinyin) {
    151                 name = HanziToPinyin.getInstance().transliterate(name);
    152             }
    153             final int bucket = mAlphabeticIndex.getBucketIndex(name);
    154             if (bucket < 0) {
    155                 return -1;
    156             }
    157             if (bucket >= mNumberBucketIndex) {
    158                 return bucket + 1;
    159             }
    160             return bucket;
    161         }
    162 
    163         /**
    164          * Returns the number of buckets in use (one more than AlphabeticIndex
    165          * uses, because this class adds a bucket for phone numbers).
    166          */
    167         public int getBucketCount() {
    168             return mAlphabeticIndexBucketCount + 1;
    169         }
    170 
    171         /**
    172          * Returns the label for the specified bucket index if a valid index,
    173          * otherwise returns an empty string. '#' is returned for the phone
    174          * number bucket; for all others, the AlphabeticIndex label is returned.
    175          */
    176         public String getBucketLabel(int bucketIndex) {
    177             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
    178                 return EMPTY_STRING;
    179             } else if (bucketIndex == mNumberBucketIndex) {
    180                 return NUMBER_STRING;
    181             } else if (bucketIndex > mNumberBucketIndex) {
    182                 --bucketIndex;
    183             }
    184             return mAlphabeticIndex.getBucketLabel(bucketIndex);
    185         }
    186 
    187         @SuppressWarnings("unused")
    188         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    189             return null;
    190         }
    191 
    192         public ArrayList<String> getLabels() {
    193             final int bucketCount = getBucketCount();
    194             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
    195             for(int i = 0; i < bucketCount; ++i) {
    196                 labels.add(getBucketLabel(i));
    197             }
    198             return labels;
    199         }
    200     }
    201 
    202     /**
    203      * Japanese specific locale overrides.
    204      *
    205      * sortKey: unchanged (same as name)
    206      * nameLookupKeys: unchanged (none)
    207      * labels: extends default labels by labeling unlabeled CJ characters
    208      *     with the Japanese character  ("misc"). Japanese labels are:
    209      *     , , , , , , , , , , , [A-Z], #, " "
    210      */
    211     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
    212         // \u4ed6 is Japanese character  ("misc")
    213         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
    214         private final int mMiscBucketIndex;
    215 
    216         public JapaneseContactUtils(LocaleSet locales) {
    217             super(locales);
    218             // Determine which bucket AlphabeticIndex is lumping unclassified
    219             // Japanese characters into by looking up the bucket index for
    220             // a representative Kanji/CJK unified ideograph (\u65e5 is the
    221             // character '').
    222             mMiscBucketIndex = super.getBucketIndex("\u65e5");
    223         }
    224 
    225         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
    226         // Japanese characters. This includes all code blocks that might
    227         // contain a character used in Japanese (which is why unified CJK
    228         // blocks are included but Korean Hangul and jamo are not).
    229         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
    230         static {
    231             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
    232             set.add(UnicodeBlock.HIRAGANA);
    233             set.add(UnicodeBlock.KATAKANA);
    234             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
    235             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
    236             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
    237             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
    238             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
    239             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
    240             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
    241             set.add(UnicodeBlock.CJK_COMPATIBILITY);
    242             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
    243             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
    244             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
    245             CJ_BLOCKS = Collections.unmodifiableSet(set);
    246         }
    247 
    248         /**
    249          * Helper routine to identify unlabeled Chinese or Japanese characters
    250          * to put in a 'misc' bucket.
    251          *
    252          * @return true if the specified Unicode code point is Chinese or
    253          *              Japanese
    254          */
    255         private static boolean isChineseOrJapanese(int codePoint) {
    256             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
    257         }
    258 
    259         /**
    260          * Returns the bucket index for the specified string. Adds an
    261          * additional 'misc' bucket for Kanji characters to the base class set.
    262          */
    263         @Override
    264         public int getBucketIndex(String name) {
    265             final int bucketIndex = super.getBucketIndex(name);
    266             if ((bucketIndex == mMiscBucketIndex &&
    267                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
    268                 bucketIndex > mMiscBucketIndex) {
    269                 return bucketIndex + 1;
    270             }
    271             return bucketIndex;
    272         }
    273 
    274         /**
    275          * Returns the number of buckets in use (one more than the base class
    276          * uses, because this class adds a bucket for Kanji).
    277          */
    278         @Override
    279         public int getBucketCount() {
    280             return super.getBucketCount() + 1;
    281         }
    282 
    283         /**
    284          * Returns the label for the specified bucket index if a valid index,
    285          * otherwise returns an empty string. '' is returned for unclassified
    286          * Kanji; for all others, the label determined by the base class is
    287          * returned.
    288          */
    289         @Override
    290         public String getBucketLabel(int bucketIndex) {
    291             if (bucketIndex == mMiscBucketIndex) {
    292                 return JAPANESE_MISC_LABEL;
    293             } else if (bucketIndex > mMiscBucketIndex) {
    294                 --bucketIndex;
    295             }
    296             return super.getBucketLabel(bucketIndex);
    297         }
    298 
    299         @Override
    300         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    301             // Hiragana and Katakana will be positively identified as Japanese.
    302             if (nameStyle == PhoneticNameStyle.JAPANESE) {
    303                 return getRomajiNameLookupKeys(name);
    304             }
    305             return null;
    306         }
    307 
    308         private static boolean mInitializedTransliterator;
    309         private static Transliterator mJapaneseTransliterator;
    310 
    311         private static Transliterator getJapaneseTransliterator() {
    312             synchronized(JapaneseContactUtils.class) {
    313                 if (!mInitializedTransliterator) {
    314                     mInitializedTransliterator = true;
    315                     Transliterator t = null;
    316                     try {
    317                         t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
    318                                 + " Latin-Ascii");
    319                     } catch (RuntimeException e) {
    320                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
    321                                 + " is missing");
    322                     }
    323                     mJapaneseTransliterator = t;
    324                 }
    325                 return mJapaneseTransliterator;
    326             }
    327         }
    328 
    329         public static Iterator<String> getRomajiNameLookupKeys(String name) {
    330             final Transliterator t = getJapaneseTransliterator();
    331             if (t == null) {
    332                 return null;
    333             }
    334             final String romajiName = t.transliterate(name);
    335             if (TextUtils.isEmpty(romajiName) ||
    336                     TextUtils.equals(name, romajiName)) {
    337                 return null;
    338             }
    339             final HashSet<String> keys = new HashSet<String>();
    340             keys.add(romajiName);
    341             return keys.iterator();
    342         }
    343     }
    344 
    345     /**
    346      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
    347      * for generating pinyin transliteration.
    348      *
    349      * sortKey: unchanged (same as name)
    350      * nameLookupKeys: adds additional name lookup keys
    351      *     - Chinese character's pinyin and pinyin's initial character.
    352      *     - Latin word and initial character.
    353      * labels: unchanged
    354      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
    355      */
    356     private static class SimplifiedChineseContactUtils
    357         extends ContactLocaleUtilsBase {
    358         public SimplifiedChineseContactUtils(LocaleSet locales) {
    359             super(locales);
    360         }
    361 
    362         @Override
    363         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    364             if (nameStyle != FullNameStyle.JAPANESE &&
    365                     nameStyle != FullNameStyle.KOREAN) {
    366                 return getPinyinNameLookupKeys(name);
    367             }
    368             return null;
    369         }
    370 
    371         public static Iterator<String> getPinyinNameLookupKeys(String name) {
    372             // TODO : Reduce the object allocation.
    373             HashSet<String> keys = new HashSet<String>();
    374             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
    375             final int tokenCount = tokens.size();
    376             final StringBuilder keyPinyin = new StringBuilder();
    377             final StringBuilder keyInitial = new StringBuilder();
    378             // There is no space among the Chinese Characters, the variant name
    379             // lookup key wouldn't work for Chinese. The keyOriginal is used to
    380             // build the lookup keys for itself.
    381             final StringBuilder keyOriginal = new StringBuilder();
    382             for (int i = tokenCount - 1; i >= 0; i--) {
    383                 final Token token = tokens.get(i);
    384                 if (Token.UNKNOWN == token.type) {
    385                     continue;
    386                 }
    387                 if (Token.PINYIN == token.type) {
    388                     keyPinyin.insert(0, token.target);
    389                     keyInitial.insert(0, token.target.charAt(0));
    390                 } else if (Token.LATIN == token.type) {
    391                     // Avoid adding space at the end of String.
    392                     if (keyPinyin.length() > 0) {
    393                         keyPinyin.insert(0, ' ');
    394                     }
    395                     if (keyOriginal.length() > 0) {
    396                         keyOriginal.insert(0, ' ');
    397                     }
    398                     keyPinyin.insert(0, token.source);
    399                     keyInitial.insert(0, token.source.charAt(0));
    400                 }
    401                 keyOriginal.insert(0, token.source);
    402                 keys.add(keyOriginal.toString());
    403                 keys.add(keyPinyin.toString());
    404                 keys.add(keyInitial.toString());
    405             }
    406             return keys.iterator();
    407         }
    408     }
    409 
    410     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
    411 
    412     private static ContactLocaleUtils sSingleton;
    413 
    414     private final LocaleSet mLocales;
    415     private final ContactLocaleUtilsBase mUtils;
    416 
    417     private ContactLocaleUtils(LocaleSet locales) {
    418         if (locales == null) {
    419             mLocales = LocaleSet.getDefault();
    420         } else {
    421             mLocales = locales;
    422         }
    423         if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
    424             mUtils = new JapaneseContactUtils(mLocales);
    425         } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
    426             mUtils = new SimplifiedChineseContactUtils(mLocales);
    427         } else {
    428             mUtils = new ContactLocaleUtilsBase(mLocales);
    429         }
    430         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
    431                 + getLabels().toString());
    432     }
    433 
    434     public boolean isLocale(LocaleSet locales) {
    435         return mLocales.equals(locales);
    436     }
    437 
    438     public static synchronized ContactLocaleUtils getInstance() {
    439         if (sSingleton == null) {
    440             sSingleton = new ContactLocaleUtils(LocaleSet.getDefault());
    441         }
    442         return sSingleton;
    443     }
    444 
    445     @VisibleForTesting
    446     public static synchronized void setLocale(Locale locale) {
    447         setLocales(new LocaleSet(locale));
    448     }
    449 
    450     public static synchronized void setLocales(LocaleSet locales) {
    451         if (sSingleton == null || !sSingleton.isLocale(locales)) {
    452             sSingleton = new ContactLocaleUtils(locales);
    453         }
    454     }
    455 
    456     public String getSortKey(String name, int nameStyle) {
    457         return mUtils.getSortKey(name);
    458     }
    459 
    460     public int getBucketIndex(String name) {
    461         return mUtils.getBucketIndex(name);
    462     }
    463 
    464     public int getBucketCount() {
    465         return mUtils.getBucketCount();
    466     }
    467 
    468     public String getBucketLabel(int bucketIndex) {
    469         return mUtils.getBucketLabel(bucketIndex);
    470     }
    471 
    472     public String getLabel(String name) {
    473         return getBucketLabel(getBucketIndex(name));
    474     }
    475 
    476     public ArrayList<String> getLabels() {
    477         return mUtils.getLabels();
    478     }
    479 
    480     /**
    481      *  Determine which utility should be used for generating NameLookupKey.
    482      *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
    483      *
    484      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
    485      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
    486      *  lookup keys when not in a Chinese or Korean locale.
    487      *
    488      *  Otherwise, use the default behavior of that locale:
    489      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
    490      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
    491      */
    492     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    493         if (!mLocales.isPrimaryLocaleCJK()) {
    494             if (mLocales.isSecondaryLocaleSimplifiedChinese()) {
    495                 if (nameStyle == FullNameStyle.CHINESE ||
    496                         nameStyle == FullNameStyle.CJK) {
    497                     return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
    498                 }
    499             } else {
    500                 if (nameStyle == FullNameStyle.JAPANESE) {
    501                     return JapaneseContactUtils.getRomajiNameLookupKeys(name);
    502                 }
    503             }
    504         }
    505         return mUtils.getNameLookupKeys(name, nameStyle);
    506     }
    507 
    508 }
    509