Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import android.provider.ContactsContract.FullNameStyle;
     20 
     21 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
     22 import com.android.providers.contacts.SearchIndexManager.IndexBuilder;
     23 
     24 import java.util.Arrays;
     25 import java.util.Comparator;
     26 import java.util.Iterator;
     27 
     28 /**
     29  * Given a full name, constructs all possible variants of the name.
     30  */
     31 public abstract class NameLookupBuilder {
     32 
     33     private static final int MAX_NAME_TOKENS = 4;
     34 
     35     private final NameSplitter mSplitter;
     36     private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][];
     37     private StringBuilder mStringBuilder = new StringBuilder();
     38     private String[] mNames = new String[NameSplitter.MAX_TOKENS];
     39 
     40     private static final int[] KOREAN_JAUM_CONVERT_MAP = {
     41         // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to
     42         // in Hangul Jamo area 0x1100 ~ 0x1112
     43         0x1100, // 0x3131 HANGUL LETTER KIYEOK
     44         0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK
     45         0x00,   // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored)
     46         0x1102, // 0x3134 HANGUL LETTER NIEUN
     47         0x00,   // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored)
     48         0x00,   // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored)
     49         0x1103, // 0x3137 HANGUL LETTER TIKEUT
     50         0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT
     51         0x1105, // 0x3139 HANGUL LETTER RIEUL
     52         0x00,   // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored)
     53         0x00,   // 0x313B HANGUL LETTER RIEULMIEUM (Ignored)
     54         0x00,   // 0x313C HANGUL LETTER RIEULPIEUP (Ignored)
     55         0x00,   // 0x313D HANGUL LETTER RIEULSIOS (Ignored)
     56         0x00,   // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored)
     57         0x00,   // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored)
     58         0x00,   // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored)
     59         0x1106, // 0x3141 HANGUL LETTER MIEUM
     60         0x1107, // 0x3142 HANGUL LETTER PIEUP
     61         0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP
     62         0x00,   // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored)
     63         0x1109, // 0x3145 HANGUL LETTER SIOS
     64         0x110A, // 0x3146 HANGUL LETTER SSANGSIOS
     65         0x110B, // 0x3147 HANGUL LETTER IEUNG
     66         0x110C, // 0x3148 HANGUL LETTER CIEUC
     67         0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC
     68         0x110E, // 0x314A HANGUL LETTER CHIEUCH
     69         0x110F, // 0x314B HANGUL LETTER KHIEUKH
     70         0x1110, // 0x314C HANGUL LETTER THIEUTH
     71         0x1111, // 0x314D HANGUL LETTER PHIEUPH
     72         0x1112  // 0x314E HANGUL LETTER HIEUH
     73     };
     74 
     75     public NameLookupBuilder(NameSplitter splitter) {
     76         mSplitter = splitter;
     77     }
     78 
     79     /**
     80      * Inserts a name lookup record with the supplied column values.
     81      */
     82     protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType,
     83             String string);
     84 
     85     /**
     86      * Returns common nickname cluster IDs for a given name. For example, it
     87      * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple
     88      * clusters, e.g. Leo could be Leonard or Leopold.
     89      *
     90      * May return null.
     91      *
     92      * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}.
     93      */
     94     protected abstract String[] getCommonNicknameClusters(String normalizedName);
     95 
     96     /**
     97      * Inserts name lookup records for the given structured name.
     98      */
     99     public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) {
    100         int tokenCount = mSplitter.tokenize(mNames, name);
    101         if (tokenCount == 0) {
    102             return;
    103         }
    104 
    105         for (int i = 0; i < tokenCount; i++) {
    106             mNames[i] = normalizeName(mNames[i]);
    107         }
    108 
    109         boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS;
    110         if (tooManyTokens) {
    111             insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true);
    112 
    113             // Favor longer parts of the name
    114             Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() {
    115 
    116                 public int compare(String s1, String s2) {
    117                     return s2.length() - s1.length();
    118                 }
    119             });
    120 
    121             // Insert a collation key for each extra word - useful for contact filtering
    122             // and suggestions
    123             String firstToken = mNames[0];
    124             for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) {
    125                 mNames[0] = mNames[i];
    126                 insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS);
    127             }
    128             mNames[0] = firstToken;
    129 
    130             tokenCount = MAX_NAME_TOKENS;
    131         }
    132 
    133         // Phase I: insert all variants not involving nickname clusters
    134         for (int i = 0; i < tokenCount; i++) {
    135             mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]);
    136         }
    137 
    138         insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true);
    139         insertNicknamePermutations(rawContactId, dataId, 0, tokenCount);
    140     }
    141 
    142     public void appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle) {
    143         int tokenCount = mSplitter.tokenize(mNames, name);
    144         if (tokenCount == 0) {
    145             return;
    146         }
    147 
    148         for (int i = 0; i < tokenCount; i++) {
    149             builder.appendName(mNames[i]);
    150         }
    151 
    152         appendNameShorthandLookup(builder, name, fullNameStyle);
    153         appendNameLookupForLocaleBasedName(builder, name, fullNameStyle);
    154     }
    155 
    156     /**
    157      * Insert more name indexes according to locale specifies.
    158      */
    159     private void appendNameLookupForLocaleBasedName(IndexBuilder builder,
    160             String fullName, int fullNameStyle) {
    161         if (fullNameStyle == FullNameStyle.KOREAN) {
    162             NameSplitter.Name name = new NameSplitter.Name();
    163             mSplitter.split(name, fullName, fullNameStyle);
    164             if (name.givenNames != null) {
    165                 builder.appendName(name.givenNames);
    166                 appendKoreanNameConsonantsLookup(builder, name.givenNames);
    167             }
    168             appendKoreanNameConsonantsLookup(builder, fullName);
    169         }
    170     }
    171 
    172     /**
    173      * Inserts Korean lead consonants records of name for the given structured name.
    174      */
    175     private void appendKoreanNameConsonantsLookup(IndexBuilder builder, String name) {
    176         int position = 0;
    177         int consonantLength = 0;
    178         int character;
    179 
    180         final int stringLength = name.length();
    181         mStringBuilder.setLength(0);
    182         do {
    183             character = name.codePointAt(position++);
    184             if ((character == 0x20) || (character == 0x2c) || (character == 0x2E)) {
    185                 // Skip spaces, commas and periods.
    186                 continue;
    187             }
    188             // Exclude characters that are not in Korean leading consonants area
    189             // and Korean characters area.
    190             if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) ||
    191                     (character > 0x314E && character < 0xAC00) ||
    192                     (character > 0xD7A3)) {
    193                 break;
    194             }
    195             // Decompose and take a only lead-consonant for composed Korean characters.
    196             if (character >= 0xAC00) {
    197                 // Lead consonant = "Lead consonant base" +
    198                 //      (character - "Korean Character base") /
    199                 //          ("Lead consonant count" * "middle Vowel count")
    200                 character = 0x1100 + (character - 0xAC00) / 588;
    201             } else if (character >= 0x3131) {
    202                 // Hangul Compatibility Jamo area 0x3131 ~ 0x314E :
    203                 // Convert to Hangul Jamo area 0x1100 ~ 0x1112
    204                 if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP.length) {
    205                     // This is not lead-consonant
    206                     break;
    207                 }
    208                 character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131];
    209                 if (character == 0) {
    210                     // This is not lead-consonant
    211                     break;
    212                 }
    213             }
    214             mStringBuilder.appendCodePoint(character);
    215             consonantLength++;
    216         } while (position < stringLength);
    217 
    218         // At least, insert consonants when Korean characters are two or more.
    219         // Only one character cases are covered by NAME_COLLATION_KEY
    220         if (consonantLength > 1) {
    221             builder.appendName(mStringBuilder.toString());
    222         }
    223     }
    224 
    225     protected String normalizeName(String name) {
    226         return NameNormalizer.normalize(name);
    227     }
    228 
    229     /**
    230      * Inserts all name variants based on permutations of tokens between
    231      * fromIndex and toIndex
    232      *
    233      * @param initiallyExact true if the name without permutations is the exact
    234      *            original name
    235      * @param buildCollationKey true if a collation key makes sense for these
    236      *            permutations (false if at least one of the tokens is a
    237      *            nickname cluster key)
    238      */
    239     private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex,
    240             boolean initiallyExact, boolean buildCollationKey) {
    241         if (fromIndex == toIndex) {
    242             insertNameVariant(rawContactId, dataId, toIndex,
    243                     initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT,
    244                     buildCollationKey);
    245             return;
    246         }
    247 
    248         // Swap the first token with each other token (including itself, which is a no-op)
    249         // and recursively insert all permutations for the remaining tokens
    250         String firstToken = mNames[fromIndex];
    251         for (int i = fromIndex; i < toIndex; i++) {
    252             mNames[fromIndex] = mNames[i];
    253             mNames[i] = firstToken;
    254 
    255             insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex,
    256                     initiallyExact && i == fromIndex, buildCollationKey);
    257 
    258             mNames[i] = mNames[fromIndex];
    259             mNames[fromIndex] = firstToken;
    260         }
    261     }
    262 
    263     /**
    264      * Inserts a single name variant and optionally its collation key counterpart.
    265      */
    266     private void insertNameVariant(long rawContactId, long dataId, int tokenCount,
    267             int lookupType, boolean buildCollationKey) {
    268         mStringBuilder.setLength(0);
    269 
    270         for (int i = 0; i < tokenCount; i++) {
    271             if (i != 0) {
    272                 mStringBuilder.append('.');
    273             }
    274             mStringBuilder.append(mNames[i]);
    275         }
    276 
    277         insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString());
    278 
    279         if (buildCollationKey) {
    280             insertCollationKey(rawContactId, dataId, tokenCount);
    281         }
    282     }
    283 
    284     /**
    285      * Inserts a collation key for the current contents of {@link #mNames}.
    286      */
    287     private void insertCollationKey(long rawContactId, long dataId, int tokenCount) {
    288         mStringBuilder.setLength(0);
    289 
    290         for (int i = 0; i < tokenCount; i++) {
    291             mStringBuilder.append(mNames[i]);
    292         }
    293 
    294         insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY,
    295                 mStringBuilder.toString());
    296     }
    297 
    298     /**
    299      * For all tokens that correspond to nickname clusters, substitutes each cluster key
    300      * and inserts all permutations with that key.
    301      */
    302     private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex,
    303             int tokenCount) {
    304         for (int i = fromIndex; i < tokenCount; i++) {
    305             String[] clusters = mNicknameClusters[i];
    306             if (clusters != null) {
    307                 String token = mNames[i];
    308                 for (int j = 0; j < clusters.length; j++) {
    309                     mNames[i] = clusters[j];
    310 
    311                     // Insert all permutations with this nickname cluster
    312                     insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false);
    313 
    314                     // Repeat recursively for other nickname clusters
    315                     insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount);
    316                 }
    317                 mNames[i] = token;
    318             }
    319         }
    320     }
    321 
    322     /**
    323      * Insert more name indexes according to locale specifies for those locales
    324      * for which we have alternative shorthand name methods (eg, Pinyin for
    325      * Chinese, Romaji for Japanese).
    326      */
    327     public void appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle) {
    328         Iterator<String> it =
    329                 ContactLocaleUtils.getInstance().getNameLookupKeys(name, fullNameStyle);
    330         if (it != null) {
    331             while (it.hasNext()) {
    332                 builder.appendName(it.next());
    333             }
    334         }
    335     }
    336 }
    337