1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType; 20 21 import android.provider.ContactsContract.FullNameStyle; 22 23 import java.util.Arrays; 24 import java.util.Comparator; 25 import java.util.Iterator; 26 27 /** 28 * Given a full name, constructs all possible variants of the name. 29 */ 30 public abstract class NameLookupBuilder { 31 32 private static final int MAX_NAME_TOKENS = 4; 33 34 private final NameSplitter mSplitter; 35 private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][]; 36 private StringBuilder mStringBuilder = new StringBuilder(); 37 private String[] mNames = new String[NameSplitter.MAX_TOKENS]; 38 39 private static int[] KOREAN_JAUM_CONVERT_MAP = { 40 // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to 41 // in Hangul Jamo area 0x1100 ~ 0x1112 42 0x1100, // 0x3131 HANGUL LETTER KIYEOK 43 0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK 44 0x00, // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored) 45 0x1102, // 0x3134 HANGUL LETTER NIEUN 46 0x00, // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored) 47 0x00, // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored) 48 0x1103, // 0x3137 HANGUL LETTER TIKEUT 49 0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT 50 0x1105, // 0x3139 HANGUL LETTER RIEUL 51 0x00, // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored) 52 0x00, // 0x313B HANGUL LETTER RIEULMIEUM (Ignored) 53 0x00, // 0x313C HANGUL LETTER RIEULPIEUP (Ignored) 54 0x00, // 0x313D HANGUL LETTER RIEULSIOS (Ignored) 55 0x00, // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored) 56 0x00, // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored) 57 0x00, // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored) 58 0x1106, // 0x3141 HANGUL LETTER MIEUM 59 0x1107, // 0x3142 HANGUL LETTER PIEUP 60 0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP 61 0x00, // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored) 62 0x1109, // 0x3145 HANGUL LETTER SIOS 63 0x110A, // 0x3146 HANGUL LETTER SSANGSIOS 64 0x110B, // 0x3147 HANGUL LETTER IEUNG 65 0x110C, // 0x3148 HANGUL LETTER CIEUC 66 0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC 67 0x110E, // 0x314A HANGUL LETTER CHIEUCH 68 0x110F, // 0x314B HANGUL LETTER KHIEUKH 69 0x1110, // 0x314C HANGUL LETTER THIEUTH 70 0x1111, // 0x314D HANGUL LETTER PHIEUPH 71 0x1112 // 0x314E HANGUL LETTER HIEUH 72 }; 73 private static int KOREAN_JAUM_CONVERT_MAP_COUNT = 30; 74 75 76 public NameLookupBuilder(NameSplitter splitter) { 77 mSplitter = splitter; 78 } 79 80 /** 81 * Inserts a name lookup record with the supplied column values. 82 */ 83 protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType, 84 String string); 85 86 /** 87 * Returns common nickname cluster IDs for a given name. For example, it 88 * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple 89 * clusters, e.g. Leo could be Leonard or Leopold. 90 * 91 * May return null. 92 * 93 * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}. 94 */ 95 protected abstract String[] getCommonNicknameClusters(String normalizedName); 96 97 /** 98 * Inserts name lookup records for the given structured name. 99 */ 100 public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) { 101 int tokenCount = mSplitter.tokenize(mNames, name); 102 if (tokenCount == 0) { 103 return; 104 } 105 106 for (int i = 0; i < tokenCount; i++) { 107 mNames[i] = normalizeName(mNames[i]); 108 } 109 110 boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS; 111 if (tooManyTokens) { 112 insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true); 113 114 // Favor longer parts of the name 115 Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() { 116 117 public int compare(String s1, String s2) { 118 return s2.length() - s1.length(); 119 } 120 }); 121 122 // Insert a collation key for each extra word - useful for contact filtering 123 // and suggestions 124 String firstToken = mNames[0]; 125 for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) { 126 mNames[0] = mNames[i]; 127 insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS); 128 } 129 mNames[0] = firstToken; 130 131 tokenCount = MAX_NAME_TOKENS; 132 } 133 134 // Phase I: insert all variants not involving nickname clusters 135 for (int i = 0; i < tokenCount; i++) { 136 mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]); 137 } 138 139 insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true); 140 insertNicknamePermutations(rawContactId, dataId, 0, tokenCount); 141 insertNameShorthandLookup(rawContactId, dataId, name, fullNameStyle); 142 insertLocaleBasedSpecificLookup(rawContactId, dataId, name, fullNameStyle); 143 } 144 145 private void insertLocaleBasedSpecificLookup(long rawContactId, long dataId, String name, 146 int fullNameStyle) { 147 if (fullNameStyle == FullNameStyle.KOREAN) { 148 insertKoreanNameConsonantsLookup(rawContactId, dataId, name); 149 } 150 } 151 152 /** 153 * Inserts Korean lead consonants records of name for the given structured name. 154 */ 155 private void insertKoreanNameConsonantsLookup(long rawContactId, long dataId, String name) { 156 int position = 0; 157 int consonantLength = 0; 158 int character; 159 160 final int stringLength = name.length(); 161 mStringBuilder.setLength(0); 162 do { 163 character = name.codePointAt(position++); 164 if (character == 0x20) { 165 // Skip spaces. 166 continue; 167 } 168 // Exclude characters that are not in Korean leading consonants area 169 // and Korean characters area. 170 if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) || 171 (character > 0x314E && character < 0xAC00) || 172 (character > 0xD7A3)) { 173 break; 174 } 175 // Decompose and take a only lead-consonant for composed Korean characters. 176 if (character >= 0xAC00) { 177 // Lead consonant = "Lead consonant base" + 178 // (character - "Korean Character base") / 179 // ("Lead consonant count" * "middle Vowel count") 180 character = 0x1100 + (character - 0xAC00) / 588; 181 } else if (character >= 0x3131) { 182 // Hangul Compatibility Jamo area 0x3131 ~ 0x314E : 183 // Convert to Hangul Jamo area 0x1100 ~ 0x1112 184 if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP_COUNT) { 185 // This is not lead-consonant 186 break; 187 } 188 character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131]; 189 if (character == 0) { 190 // This is not lead-consonant 191 break; 192 } 193 } 194 mStringBuilder.appendCodePoint(character); 195 consonantLength++; 196 } while (position < stringLength); 197 198 // At least, insert consonants when Korean characters are two or more. 199 // Only one character cases are covered by NAME_COLLATION_KEY 200 if (consonantLength > 1) { 201 insertNameLookup(rawContactId, dataId, NameLookupType.NAME_CONSONANTS, 202 normalizeName(mStringBuilder.toString())); 203 } 204 } 205 206 protected String normalizeName(String name) { 207 return NameNormalizer.normalize(name); 208 } 209 210 /** 211 * Inserts all name variants based on permutations of tokens between 212 * fromIndex and toIndex 213 * 214 * @param initiallyExact true if the name without permutations is the exact 215 * original name 216 * @param buildCollationKey true if a collation key makes sense for these 217 * permutations (false if at least one of the tokens is a 218 * nickname cluster key) 219 */ 220 private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex, 221 boolean initiallyExact, boolean buildCollationKey) { 222 if (fromIndex == toIndex) { 223 insertNameVariant(rawContactId, dataId, toIndex, 224 initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT, 225 buildCollationKey); 226 return; 227 } 228 229 // Swap the first token with each other token (including itself, which is a no-op) 230 // and recursively insert all permutations for the remaining tokens 231 String firstToken = mNames[fromIndex]; 232 for (int i = fromIndex; i < toIndex; i++) { 233 mNames[fromIndex] = mNames[i]; 234 mNames[i] = firstToken; 235 236 insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex, 237 initiallyExact && i == fromIndex, buildCollationKey); 238 239 mNames[i] = mNames[fromIndex]; 240 mNames[fromIndex] = firstToken; 241 } 242 } 243 244 /** 245 * Inserts a single name variant and optionally its collation key counterpart. 246 */ 247 private void insertNameVariant(long rawContactId, long dataId, int tokenCount, 248 int lookupType, boolean buildCollationKey) { 249 mStringBuilder.setLength(0); 250 251 for (int i = 0; i < tokenCount; i++) { 252 if (i != 0) { 253 mStringBuilder.append('.'); 254 } 255 mStringBuilder.append(mNames[i]); 256 } 257 258 insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString()); 259 260 if (buildCollationKey) { 261 insertCollationKey(rawContactId, dataId, tokenCount); 262 } 263 } 264 265 /** 266 * Inserts a collation key for the current contents of {@link #mNames}. 267 */ 268 private void insertCollationKey(long rawContactId, long dataId, int tokenCount) { 269 mStringBuilder.setLength(0); 270 271 for (int i = 0; i < tokenCount; i++) { 272 mStringBuilder.append(mNames[i]); 273 } 274 275 insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY, 276 mStringBuilder.toString()); 277 } 278 279 /** 280 * For all tokens that correspond to nickname clusters, substitutes each cluster key 281 * and inserts all permutations with that key. 282 */ 283 private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex, 284 int tokenCount) { 285 for (int i = fromIndex; i < tokenCount; i++) { 286 String[] clusters = mNicknameClusters[i]; 287 if (clusters != null) { 288 String token = mNames[i]; 289 for (int j = 0; j < clusters.length; j++) { 290 mNames[i] = clusters[j]; 291 292 // Insert all permutations with this nickname cluster 293 insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false); 294 295 // Repeat recursively for other nickname clusters 296 insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount); 297 } 298 mNames[i] = token; 299 } 300 } 301 } 302 303 private void insertNameShorthandLookup(long rawContactId, long dataId, String name, 304 int fullNameStyle) { 305 Iterator<String> it = 306 ContactLocaleUtils.getIntance().getNameLookupKeys(name, fullNameStyle); 307 if (it != null) { 308 while (it.hasNext()) { 309 String key = it.next(); 310 insertNameLookup(rawContactId, dataId, NameLookupType.NAME_SHORTHAND, 311 normalizeName(key)); 312 } 313 } 314 } 315 } 316