Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import android.provider.ContactsContract.FullNameStyle;
     20 import android.provider.ContactsContract.PhoneticNameStyle;
     21 import android.text.TextUtils;
     22 import android.util.Log;
     23 
     24 import com.android.providers.contacts.HanziToPinyin.Token;
     25 
     26 import java.lang.Character.UnicodeBlock;
     27 import java.util.Arrays;
     28 import java.util.ArrayList;
     29 import java.util.Collections;
     30 import java.util.HashMap;
     31 import java.util.HashSet;
     32 import java.util.Iterator;
     33 import java.util.List;
     34 import java.util.Locale;
     35 import java.util.Map;
     36 import java.util.Set;
     37 
     38 import libcore.icu.AlphabeticIndex;
     39 import libcore.icu.AlphabeticIndex.ImmutableIndex;
     40 import libcore.icu.Transliterator;
     41 
     42 /**
     43  * This utility class provides specialized handling for locale specific
     44  * information: labels, name lookup keys.
     45  */
     46 public class ContactLocaleUtils {
     47     public static final String TAG = "ContactLocale";
     48 
     49     public static final Locale LOCALE_ARABIC = new Locale("ar");
     50     public static final Locale LOCALE_GREEK = new Locale("el");
     51     public static final Locale LOCALE_HEBREW = new Locale("he");
     52     // Ukrainian labels are superset of Russian
     53     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
     54     public static final Locale LOCALE_THAI = new Locale("th");
     55 
     56     /**
     57      * This class is the default implementation and should be the base class
     58      * for other locales.
     59      *
     60      * sortKey: same as name
     61      * nameLookupKeys: none
     62      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
     63      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
     64      */
     65     private static class ContactLocaleUtilsBase {
     66         private static final String EMPTY_STRING = "";
     67         private static final String NUMBER_STRING = "#";
     68 
     69         protected final ImmutableIndex mAlphabeticIndex;
     70         private final int mAlphabeticIndexBucketCount;
     71         private final int mNumberBucketIndex;
     72 
     73         public ContactLocaleUtilsBase(Locale locale) {
     74             // AlphabeticIndex.getBucketLabel() uses a binary search across
     75             // the entire label set so care should be taken about growing this
     76             // set too large. The following set determines for which locales
     77             // we will show labels other than your primary locale. General rules
     78             // of thumb for adding a locale: should be a supported locale; and
     79             // should not be included if from a name it is not deterministic
     80             // which way to label it (so eg Chinese cannot be added because
     81             // the labeling of a Chinese character varies between Simplified,
     82             // Traditional, and Japanese locales). Use English only for all
     83             // Latin based alphabets. Ukrainian is chosen for Cyrillic because
     84             // its alphabet is a superset of Russian.
     85             mAlphabeticIndex = new AlphabeticIndex(locale)
     86                 .setMaxLabelCount(300)
     87                 .addLabels(Locale.ENGLISH)
     88                 .addLabels(Locale.JAPANESE)
     89                 .addLabels(Locale.KOREAN)
     90                 .addLabels(LOCALE_THAI)
     91                 .addLabels(LOCALE_ARABIC)
     92                 .addLabels(LOCALE_HEBREW)
     93                 .addLabels(LOCALE_GREEK)
     94                 .addLabels(LOCALE_UKRAINIAN)
     95                 .getImmutableIndex();
     96             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
     97             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
     98         }
     99 
    100         public String getSortKey(String name) {
    101             return name;
    102         }
    103 
    104         /**
    105          * Returns the bucket index for the specified string. AlphabeticIndex
    106          * sorts strings into buckets numbered in order from 0 to N, where the
    107          * exact value of N depends on how many representative index labels are
    108          * used in a particular locale. This routine adds one additional bucket
    109          * for phone numbers. It attempts to detect phone numbers and shifts
    110          * the bucket indexes returned by AlphabeticIndex in order to make room
    111          * for the new # bucket, so the returned range becomes 0 to N+1.
    112          */
    113         public int getBucketIndex(String name) {
    114             boolean prefixIsNumeric = false;
    115             final int length = name.length();
    116             int offset = 0;
    117             while (offset < length) {
    118                 int codePoint = Character.codePointAt(name, offset);
    119                 // Ignore standard phone number separators and identify any
    120                 // string that otherwise starts with a number.
    121                 if (Character.isDigit(codePoint)) {
    122                     prefixIsNumeric = true;
    123                     break;
    124                 } else if (!Character.isSpaceChar(codePoint) &&
    125                            codePoint != '+' && codePoint != '(' &&
    126                            codePoint != ')' && codePoint != '.' &&
    127                            codePoint != '-' && codePoint != '#') {
    128                     break;
    129                 }
    130                 offset += Character.charCount(codePoint);
    131             }
    132             if (prefixIsNumeric) {
    133                 return mNumberBucketIndex;
    134             }
    135 
    136             final int bucket = mAlphabeticIndex.getBucketIndex(name);
    137             if (bucket < 0) {
    138                 return -1;
    139             }
    140             if (bucket >= mNumberBucketIndex) {
    141                 return bucket + 1;
    142             }
    143             return bucket;
    144         }
    145 
    146         /**
    147          * Returns the number of buckets in use (one more than AlphabeticIndex
    148          * uses, because this class adds a bucket for phone numbers).
    149          */
    150         public int getBucketCount() {
    151             return mAlphabeticIndexBucketCount + 1;
    152         }
    153 
    154         /**
    155          * Returns the label for the specified bucket index if a valid index,
    156          * otherwise returns an empty string. '#' is returned for the phone
    157          * number bucket; for all others, the AlphabeticIndex label is returned.
    158          */
    159         public String getBucketLabel(int bucketIndex) {
    160             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
    161                 return EMPTY_STRING;
    162             } else if (bucketIndex == mNumberBucketIndex) {
    163                 return NUMBER_STRING;
    164             } else if (bucketIndex > mNumberBucketIndex) {
    165                 --bucketIndex;
    166             }
    167             return mAlphabeticIndex.getBucketLabel(bucketIndex);
    168         }
    169 
    170         @SuppressWarnings("unused")
    171         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    172             return null;
    173         }
    174 
    175         public ArrayList<String> getLabels() {
    176             final int bucketCount = getBucketCount();
    177             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
    178             for(int i = 0; i < bucketCount; ++i) {
    179                 labels.add(getBucketLabel(i));
    180             }
    181             return labels;
    182         }
    183     }
    184 
    185     /**
    186      * Japanese specific locale overrides.
    187      *
    188      * sortKey: unchanged (same as name)
    189      * nameLookupKeys: unchanged (none)
    190      * labels: extends default labels by labeling unlabeled CJ characters
    191      *     with the Japanese character  ("misc"). Japanese labels are:
    192      *     , , , , , , , , , , , [A-Z], #, " "
    193      */
    194     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
    195         // \u4ed6 is Japanese character  ("misc")
    196         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
    197         private final int mMiscBucketIndex;
    198 
    199         public JapaneseContactUtils(Locale locale) {
    200             super(locale);
    201             // Determine which bucket AlphabeticIndex is lumping unclassified
    202             // Japanese characters into by looking up the bucket index for
    203             // a representative Kanji/CJK unified ideograph (\u65e5 is the
    204             // character '').
    205             mMiscBucketIndex = super.getBucketIndex("\u65e5");
    206         }
    207 
    208         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
    209         // Japanese characters. This includes all code blocks that might
    210         // contain a character used in Japanese (which is why unified CJK
    211         // blocks are included but Korean Hangul and jamo are not).
    212         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
    213         static {
    214             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
    215             set.add(UnicodeBlock.HIRAGANA);
    216             set.add(UnicodeBlock.KATAKANA);
    217             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
    218             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
    219             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
    220             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
    221             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
    222             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
    223             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
    224             set.add(UnicodeBlock.CJK_COMPATIBILITY);
    225             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
    226             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
    227             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
    228             CJ_BLOCKS = Collections.unmodifiableSet(set);
    229         }
    230 
    231         /**
    232          * Helper routine to identify unlabeled Chinese or Japanese characters
    233          * to put in a 'misc' bucket.
    234          *
    235          * @return true if the specified Unicode code point is Chinese or
    236          *              Japanese
    237          */
    238         private static boolean isChineseOrJapanese(int codePoint) {
    239             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
    240         }
    241 
    242         /**
    243          * Returns the bucket index for the specified string. Adds an
    244          * additional 'misc' bucket for Kanji characters to the base class set.
    245          */
    246         @Override
    247         public int getBucketIndex(String name) {
    248             final int bucketIndex = super.getBucketIndex(name);
    249             if ((bucketIndex == mMiscBucketIndex &&
    250                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
    251                 bucketIndex > mMiscBucketIndex) {
    252                 return bucketIndex + 1;
    253             }
    254             return bucketIndex;
    255         }
    256 
    257         /**
    258          * Returns the number of buckets in use (one more than the base class
    259          * uses, because this class adds a bucket for Kanji).
    260          */
    261         @Override
    262         public int getBucketCount() {
    263             return super.getBucketCount() + 1;
    264         }
    265 
    266         /**
    267          * Returns the label for the specified bucket index if a valid index,
    268          * otherwise returns an empty string. '' is returned for unclassified
    269          * Kanji; for all others, the label determined by the base class is
    270          * returned.
    271          */
    272         @Override
    273         public String getBucketLabel(int bucketIndex) {
    274             if (bucketIndex == mMiscBucketIndex) {
    275                 return JAPANESE_MISC_LABEL;
    276             } else if (bucketIndex > mMiscBucketIndex) {
    277                 --bucketIndex;
    278             }
    279             return super.getBucketLabel(bucketIndex);
    280         }
    281 
    282         @Override
    283         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    284             // Hiragana and Katakana will be positively identified as Japanese.
    285             if (nameStyle == PhoneticNameStyle.JAPANESE) {
    286                 return getRomajiNameLookupKeys(name);
    287             }
    288             return null;
    289         }
    290 
    291         private static boolean mInitializedTransliterator;
    292         private static Transliterator mJapaneseTransliterator;
    293 
    294         private static Transliterator getJapaneseTransliterator() {
    295             synchronized(JapaneseContactUtils.class) {
    296                 if (!mInitializedTransliterator) {
    297                     mInitializedTransliterator = true;
    298                     Transliterator t = null;
    299                     try {
    300                         t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
    301                                 + " Latin-Ascii");
    302                     } catch (RuntimeException e) {
    303                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
    304                                 + " is missing");
    305                     }
    306                     mJapaneseTransliterator = t;
    307                 }
    308                 return mJapaneseTransliterator;
    309             }
    310         }
    311 
    312         public static Iterator<String> getRomajiNameLookupKeys(String name) {
    313             final Transliterator t = getJapaneseTransliterator();
    314             if (t == null) {
    315                 return null;
    316             }
    317             final String romajiName = t.transliterate(name);
    318             if (TextUtils.isEmpty(romajiName) ||
    319                     TextUtils.equals(name, romajiName)) {
    320                 return null;
    321             }
    322             final HashSet<String> keys = new HashSet<String>();
    323             keys.add(romajiName);
    324             return keys.iterator();
    325         }
    326     }
    327 
    328     /**
    329      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
    330      * for generating pinyin transliteration.
    331      *
    332      * sortKey: unchanged (same as name)
    333      * nameLookupKeys: adds additional name lookup keys
    334      *     - Chinese character's pinyin and pinyin's initial character.
    335      *     - Latin word and initial character.
    336      * labels: unchanged
    337      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
    338      */
    339     private static class SimplifiedChineseContactUtils
    340         extends ContactLocaleUtilsBase {
    341         public SimplifiedChineseContactUtils(Locale locale) {
    342             super(locale);
    343         }
    344 
    345         @Override
    346         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    347             if (nameStyle != FullNameStyle.JAPANESE &&
    348                     nameStyle != FullNameStyle.KOREAN) {
    349                 return getPinyinNameLookupKeys(name);
    350             }
    351             return null;
    352         }
    353 
    354         public static Iterator<String> getPinyinNameLookupKeys(String name) {
    355             // TODO : Reduce the object allocation.
    356             HashSet<String> keys = new HashSet<String>();
    357             ArrayList<Token> tokens = HanziToPinyin.getInstance().get(name);
    358             final int tokenCount = tokens.size();
    359             final StringBuilder keyPinyin = new StringBuilder();
    360             final StringBuilder keyInitial = new StringBuilder();
    361             // There is no space among the Chinese Characters, the variant name
    362             // lookup key wouldn't work for Chinese. The keyOriginal is used to
    363             // build the lookup keys for itself.
    364             final StringBuilder keyOriginal = new StringBuilder();
    365             for (int i = tokenCount - 1; i >= 0; i--) {
    366                 final Token token = tokens.get(i);
    367                 if (Token.UNKNOWN == token.type) {
    368                     continue;
    369                 }
    370                 if (Token.PINYIN == token.type) {
    371                     keyPinyin.insert(0, token.target);
    372                     keyInitial.insert(0, token.target.charAt(0));
    373                 } else if (Token.LATIN == token.type) {
    374                     // Avoid adding space at the end of String.
    375                     if (keyPinyin.length() > 0) {
    376                         keyPinyin.insert(0, ' ');
    377                     }
    378                     if (keyOriginal.length() > 0) {
    379                         keyOriginal.insert(0, ' ');
    380                     }
    381                     keyPinyin.insert(0, token.source);
    382                     keyInitial.insert(0, token.source.charAt(0));
    383                 }
    384                 keyOriginal.insert(0, token.source);
    385                 keys.add(keyOriginal.toString());
    386                 keys.add(keyPinyin.toString());
    387                 keys.add(keyInitial.toString());
    388             }
    389             return keys.iterator();
    390         }
    391     }
    392 
    393     private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
    394     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
    395     private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
    396 
    397     private static ContactLocaleUtils sSingleton;
    398 
    399     private final Locale mLocale;
    400     private final String mLanguage;
    401     private final ContactLocaleUtilsBase mUtils;
    402 
    403     private ContactLocaleUtils(Locale locale) {
    404         if (locale == null) {
    405             mLocale = Locale.getDefault();
    406         } else {
    407             mLocale = locale;
    408         }
    409         mLanguage = mLocale.getLanguage().toLowerCase();
    410         if (mLanguage.equals(JAPANESE_LANGUAGE)) {
    411             mUtils = new JapaneseContactUtils(mLocale);
    412         } else if (mLocale.equals(Locale.CHINA)) {
    413             mUtils = new SimplifiedChineseContactUtils(mLocale);
    414         } else {
    415             mUtils = new ContactLocaleUtilsBase(mLocale);
    416         }
    417         Log.i(TAG, "AddressBook Labels [" + mLocale.toString() + "]: "
    418               + getLabels().toString());
    419     }
    420 
    421     public boolean isLocale(Locale locale) {
    422         return mLocale.equals(locale);
    423     }
    424 
    425     public static synchronized ContactLocaleUtils getInstance() {
    426         if (sSingleton == null) {
    427             sSingleton = new ContactLocaleUtils(null);
    428         }
    429         return sSingleton;
    430     }
    431 
    432     public static synchronized void setLocale(Locale locale) {
    433         if (sSingleton == null || !sSingleton.isLocale(locale)) {
    434             sSingleton = new ContactLocaleUtils(locale);
    435         }
    436     }
    437 
    438     public String getSortKey(String name, int nameStyle) {
    439         return mUtils.getSortKey(name);
    440     }
    441 
    442     public int getBucketIndex(String name) {
    443         return mUtils.getBucketIndex(name);
    444     }
    445 
    446     public int getBucketCount() {
    447         return mUtils.getBucketCount();
    448     }
    449 
    450     public String getBucketLabel(int bucketIndex) {
    451         return mUtils.getBucketLabel(bucketIndex);
    452     }
    453 
    454     public String getLabel(String name) {
    455         return getBucketLabel(getBucketIndex(name));
    456     }
    457 
    458     public ArrayList<String> getLabels() {
    459         return mUtils.getLabels();
    460     }
    461 
    462     /**
    463      *  Determine which utility should be used for generating NameLookupKey.
    464      *  (ie, whether we generate Pinyin lookup keys or not)
    465      *
    466      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
    467      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
    468      *  lookup keys when not in a Chinese or Korean locale.
    469      *
    470      *  Otherwise, use the default behavior of that locale:
    471      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
    472      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
    473      */
    474     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
    475         if (nameStyle == FullNameStyle.JAPANESE &&
    476                 !CHINESE_LANGUAGE.equals(mLanguage) &&
    477                 !KOREAN_LANGUAGE.equals(mLanguage)) {
    478             return JapaneseContactUtils.getRomajiNameLookupKeys(name);
    479         }
    480         return mUtils.getNameLookupKeys(name, nameStyle);
    481     }
    482 
    483 }
    484