Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts;
     18 
     19 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
     20 
     21 import android.provider.ContactsContract.FullNameStyle;
     22 
     23 import java.util.Arrays;
     24 import java.util.Comparator;
     25 import java.util.Iterator;
     26 
     27 /**
     28  * Given a full name, constructs all possible variants of the name.
     29  */
     30 public abstract class NameLookupBuilder {
     31 
     32     private static final int MAX_NAME_TOKENS = 4;
     33 
     34     private final NameSplitter mSplitter;
     35     private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][];
     36     private StringBuilder mStringBuilder = new StringBuilder();
     37     private String[] mNames = new String[NameSplitter.MAX_TOKENS];
     38 
     39     private static int[] KOREAN_JAUM_CONVERT_MAP = {
     40         // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to
     41         // in Hangul Jamo area 0x1100 ~ 0x1112
     42         0x1100, // 0x3131 HANGUL LETTER KIYEOK
     43         0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK
     44         0x00,   // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored)
     45         0x1102, // 0x3134 HANGUL LETTER NIEUN
     46         0x00,   // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored)
     47         0x00,   // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored)
     48         0x1103, // 0x3137 HANGUL LETTER TIKEUT
     49         0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT
     50         0x1105, // 0x3139 HANGUL LETTER RIEUL
     51         0x00,   // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored)
     52         0x00,   // 0x313B HANGUL LETTER RIEULMIEUM (Ignored)
     53         0x00,   // 0x313C HANGUL LETTER RIEULPIEUP (Ignored)
     54         0x00,   // 0x313D HANGUL LETTER RIEULSIOS (Ignored)
     55         0x00,   // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored)
     56         0x00,   // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored)
     57         0x00,   // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored)
     58         0x1106, // 0x3141 HANGUL LETTER MIEUM
     59         0x1107, // 0x3142 HANGUL LETTER PIEUP
     60         0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP
     61         0x00,   // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored)
     62         0x1109, // 0x3145 HANGUL LETTER SIOS
     63         0x110A, // 0x3146 HANGUL LETTER SSANGSIOS
     64         0x110B, // 0x3147 HANGUL LETTER IEUNG
     65         0x110C, // 0x3148 HANGUL LETTER CIEUC
     66         0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC
     67         0x110E, // 0x314A HANGUL LETTER CHIEUCH
     68         0x110F, // 0x314B HANGUL LETTER KHIEUKH
     69         0x1110, // 0x314C HANGUL LETTER THIEUTH
     70         0x1111, // 0x314D HANGUL LETTER PHIEUPH
     71         0x1112  // 0x314E HANGUL LETTER HIEUH
     72     };
     73     private static int KOREAN_JAUM_CONVERT_MAP_COUNT = 30;
     74 
     75 
     76     public NameLookupBuilder(NameSplitter splitter) {
     77         mSplitter = splitter;
     78     }
     79 
     80     /**
     81      * Inserts a name lookup record with the supplied column values.
     82      */
     83     protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType,
     84             String string);
     85 
     86     /**
     87      * Returns common nickname cluster IDs for a given name. For example, it
     88      * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple
     89      * clusters, e.g. Leo could be Leonard or Leopold.
     90      *
     91      * May return null.
     92      *
     93      * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}.
     94      */
     95     protected abstract String[] getCommonNicknameClusters(String normalizedName);
     96 
     97     /**
     98      * Inserts name lookup records for the given structured name.
     99      */
    100     public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) {
    101         int tokenCount = mSplitter.tokenize(mNames, name);
    102         if (tokenCount == 0) {
    103             return;
    104         }
    105 
    106         for (int i = 0; i < tokenCount; i++) {
    107             mNames[i] = normalizeName(mNames[i]);
    108         }
    109 
    110         boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS;
    111         if (tooManyTokens) {
    112             insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true);
    113 
    114             // Favor longer parts of the name
    115             Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() {
    116 
    117                 public int compare(String s1, String s2) {
    118                     return s2.length() - s1.length();
    119                 }
    120             });
    121 
    122             // Insert a collation key for each extra word - useful for contact filtering
    123             // and suggestions
    124             String firstToken = mNames[0];
    125             for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) {
    126                 mNames[0] = mNames[i];
    127                 insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS);
    128             }
    129             mNames[0] = firstToken;
    130 
    131             tokenCount = MAX_NAME_TOKENS;
    132         }
    133 
    134         // Phase I: insert all variants not involving nickname clusters
    135         for (int i = 0; i < tokenCount; i++) {
    136             mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]);
    137         }
    138 
    139         insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true);
    140         insertNicknamePermutations(rawContactId, dataId, 0, tokenCount);
    141         insertNameShorthandLookup(rawContactId, dataId, name, fullNameStyle);
    142         insertLocaleBasedSpecificLookup(rawContactId, dataId, name, fullNameStyle);
    143     }
    144 
    145     private void insertLocaleBasedSpecificLookup(long rawContactId, long dataId, String name,
    146             int fullNameStyle) {
    147         if (fullNameStyle == FullNameStyle.KOREAN) {
    148             insertKoreanNameConsonantsLookup(rawContactId, dataId, name);
    149         }
    150     }
    151 
    152     /**
    153      * Inserts Korean lead consonants records of name for the given structured name.
    154      */
    155     private void insertKoreanNameConsonantsLookup(long rawContactId, long dataId, String name) {
    156         int position = 0;
    157         int consonantLength = 0;
    158         int character;
    159 
    160         final int stringLength = name.length();
    161         mStringBuilder.setLength(0);
    162         do {
    163             character = name.codePointAt(position++);
    164             if (character == 0x20) {
    165                 // Skip spaces.
    166                 continue;
    167             }
    168             // Exclude characters that are not in Korean leading consonants area
    169             // and Korean characters area.
    170             if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) ||
    171                     (character > 0x314E && character < 0xAC00) ||
    172                     (character > 0xD7A3)) {
    173                 break;
    174             }
    175             // Decompose and take a only lead-consonant for composed Korean characters.
    176             if (character >= 0xAC00) {
    177                 // Lead consonant = "Lead consonant base" +
    178                 //      (character - "Korean Character base") /
    179                 //          ("Lead consonant count" * "middle Vowel count")
    180                 character = 0x1100 + (character - 0xAC00) / 588;
    181             } else if (character >= 0x3131) {
    182                 // Hangul Compatibility Jamo area 0x3131 ~ 0x314E :
    183                 // Convert to Hangul Jamo area 0x1100 ~ 0x1112
    184                 if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP_COUNT) {
    185                     // This is not lead-consonant
    186                     break;
    187                 }
    188                 character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131];
    189                 if (character == 0) {
    190                     // This is not lead-consonant
    191                     break;
    192                 }
    193             }
    194             mStringBuilder.appendCodePoint(character);
    195             consonantLength++;
    196         } while (position < stringLength);
    197 
    198         // At least, insert consonants when Korean characters are two or more.
    199         // Only one character cases are covered by NAME_COLLATION_KEY
    200         if (consonantLength > 1) {
    201             insertNameLookup(rawContactId, dataId, NameLookupType.NAME_CONSONANTS,
    202                     normalizeName(mStringBuilder.toString()));
    203         }
    204     }
    205 
    206     protected String normalizeName(String name) {
    207         return NameNormalizer.normalize(name);
    208     }
    209 
    210     /**
    211      * Inserts all name variants based on permutations of tokens between
    212      * fromIndex and toIndex
    213      *
    214      * @param initiallyExact true if the name without permutations is the exact
    215      *            original name
    216      * @param buildCollationKey true if a collation key makes sense for these
    217      *            permutations (false if at least one of the tokens is a
    218      *            nickname cluster key)
    219      */
    220     private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex,
    221             boolean initiallyExact, boolean buildCollationKey) {
    222         if (fromIndex == toIndex) {
    223             insertNameVariant(rawContactId, dataId, toIndex,
    224                     initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT,
    225                     buildCollationKey);
    226             return;
    227         }
    228 
    229         // Swap the first token with each other token (including itself, which is a no-op)
    230         // and recursively insert all permutations for the remaining tokens
    231         String firstToken = mNames[fromIndex];
    232         for (int i = fromIndex; i < toIndex; i++) {
    233             mNames[fromIndex] = mNames[i];
    234             mNames[i] = firstToken;
    235 
    236             insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex,
    237                     initiallyExact && i == fromIndex, buildCollationKey);
    238 
    239             mNames[i] = mNames[fromIndex];
    240             mNames[fromIndex] = firstToken;
    241         }
    242     }
    243 
    244     /**
    245      * Inserts a single name variant and optionally its collation key counterpart.
    246      */
    247     private void insertNameVariant(long rawContactId, long dataId, int tokenCount,
    248             int lookupType, boolean buildCollationKey) {
    249         mStringBuilder.setLength(0);
    250 
    251         for (int i = 0; i < tokenCount; i++) {
    252             if (i != 0) {
    253                 mStringBuilder.append('.');
    254             }
    255             mStringBuilder.append(mNames[i]);
    256         }
    257 
    258         insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString());
    259 
    260         if (buildCollationKey) {
    261             insertCollationKey(rawContactId, dataId, tokenCount);
    262         }
    263     }
    264 
    265     /**
    266      * Inserts a collation key for the current contents of {@link #mNames}.
    267      */
    268     private void insertCollationKey(long rawContactId, long dataId, int tokenCount) {
    269         mStringBuilder.setLength(0);
    270 
    271         for (int i = 0; i < tokenCount; i++) {
    272             mStringBuilder.append(mNames[i]);
    273         }
    274 
    275         insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY,
    276                 mStringBuilder.toString());
    277     }
    278 
    279     /**
    280      * For all tokens that correspond to nickname clusters, substitutes each cluster key
    281      * and inserts all permutations with that key.
    282      */
    283     private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex,
    284             int tokenCount) {
    285         for (int i = fromIndex; i < tokenCount; i++) {
    286             String[] clusters = mNicknameClusters[i];
    287             if (clusters != null) {
    288                 String token = mNames[i];
    289                 for (int j = 0; j < clusters.length; j++) {
    290                     mNames[i] = clusters[j];
    291 
    292                     // Insert all permutations with this nickname cluster
    293                     insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false);
    294 
    295                     // Repeat recursively for other nickname clusters
    296                     insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount);
    297                 }
    298                 mNames[i] = token;
    299             }
    300         }
    301     }
    302 
    303     private void insertNameShorthandLookup(long rawContactId, long dataId, String name,
    304             int fullNameStyle) {
    305         Iterator<String> it =
    306                 ContactLocaleUtils.getIntance().getNameLookupKeys(name, fullNameStyle);
    307         if (it != null) {
    308             while (it.hasNext()) {
    309                 String key = it.next();
    310                 insertNameLookup(rawContactId, dataId, NameLookupType.NAME_SHORTHAND,
    311                         normalizeName(key));
    312             }
    313         }
    314     }
    315 }
    316