1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 19 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; 20 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; 21 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 22 23 import android.content.ContentValues; 24 import android.database.Cursor; 25 import android.database.sqlite.SQLiteDatabase; 26 import android.os.SystemClock; 27 import android.provider.ContactsContract.CommonDataKinds.Email; 28 import android.provider.ContactsContract.CommonDataKinds.Nickname; 29 import android.provider.ContactsContract.CommonDataKinds.Organization; 30 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal; 31 import android.provider.ContactsContract.Data; 32 import android.provider.ContactsContract.ProviderStatus; 33 import android.text.TextUtils; 34 import android.util.Log; 35 36 import java.util.HashSet; 37 import java.util.Set; 38 import java.util.regex.Pattern; 39 40 /** 41 * Maintains a search index for comprehensive contact search. 42 */ 43 public class SearchIndexManager { 44 private static final String TAG = "ContactsFTS"; 45 46 public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index"; 47 private static final int SEARCH_INDEX_VERSION = 1; 48 49 private static final class ContactIndexQuery { 50 public static final String[] COLUMNS = { 51 Data.CONTACT_ID, 52 MimetypesColumns.MIMETYPE, 53 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5, 54 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11, 55 Data.DATA12, Data.DATA13, Data.DATA14 56 }; 57 58 public static final int MIMETYPE = 1; 59 } 60 61 public static class IndexBuilder { 62 public static final int SEPARATOR_SPACE = 0; 63 public static final int SEPARATOR_PARENTHESES = 1; 64 public static final int SEPARATOR_SLASH = 2; 65 public static final int SEPARATOR_COMMA = 3; 66 67 private StringBuilder mSbContent = new StringBuilder(); 68 private StringBuilder mSbName = new StringBuilder(); 69 private StringBuilder mSbTokens = new StringBuilder(); 70 private StringBuilder mSbElementContent = new StringBuilder(); 71 private HashSet<String> mUniqueElements = new HashSet<String>(); 72 private Cursor mCursor; 73 74 void setCursor(Cursor cursor) { 75 this.mCursor = cursor; 76 } 77 78 void reset() { 79 mSbContent.setLength(0); 80 mSbTokens.setLength(0); 81 mSbName.setLength(0); 82 mSbElementContent.setLength(0); 83 mUniqueElements.clear(); 84 } 85 86 public String getContent() { 87 return mSbContent.length() == 0 ? null : mSbContent.toString(); 88 } 89 90 public String getName() { 91 return mSbName.length() == 0 ? null : mSbName.toString(); 92 } 93 94 public String getTokens() { 95 return mSbTokens.length() == 0 ? null : mSbTokens.toString(); 96 } 97 98 public String getString(String columnName) { 99 return mCursor.getString(mCursor.getColumnIndex(columnName)); 100 } 101 102 public int getInt(String columnName) { 103 return mCursor.getInt(mCursor.getColumnIndex(columnName)); 104 } 105 106 @Override 107 public String toString() { 108 return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens; 109 } 110 111 public void commit() { 112 if (mSbElementContent.length() != 0) { 113 String content = mSbElementContent.toString().replace('\n', ' '); 114 if (!mUniqueElements.contains(content)) { 115 if (mSbContent.length() != 0) { 116 mSbContent.append('\n'); 117 } 118 mSbContent.append(content); 119 mUniqueElements.add(content); 120 } 121 mSbElementContent.setLength(0); 122 } 123 } 124 125 public void appendContentFromColumn(String columnName) { 126 appendContentFromColumn(columnName, SEPARATOR_SPACE); 127 } 128 129 public void appendContentFromColumn(String columnName, int format) { 130 appendContent(getString(columnName), format); 131 } 132 133 public void appendContent(String value) { 134 appendContent(value, SEPARATOR_SPACE); 135 } 136 137 public void appendContent(String value, int format) { 138 if (TextUtils.isEmpty(value)) { 139 return; 140 } 141 142 switch (format) { 143 case SEPARATOR_SPACE: 144 if (mSbElementContent.length() > 0) { 145 mSbElementContent.append(' '); 146 } 147 mSbElementContent.append(value); 148 break; 149 150 case SEPARATOR_SLASH: 151 mSbElementContent.append('/').append(value); 152 break; 153 154 case SEPARATOR_PARENTHESES: 155 if (mSbElementContent.length() > 0) { 156 mSbElementContent.append(' '); 157 } 158 mSbElementContent.append('(').append(value).append(')'); 159 break; 160 161 case SEPARATOR_COMMA: 162 if (mSbElementContent.length() > 0) { 163 mSbElementContent.append(", "); 164 } 165 mSbElementContent.append(value); 166 break; 167 } 168 } 169 170 public void appendToken(String token) { 171 if (TextUtils.isEmpty(token)) { 172 return; 173 } 174 175 if (mSbTokens.length() != 0) { 176 mSbTokens.append(' '); 177 } 178 mSbTokens.append(token); 179 } 180 181 private static final Pattern PATTERN_HYPHEN = Pattern.compile("\\-"); 182 183 public void appendName(String name) { 184 if (TextUtils.isEmpty(name)) { 185 return; 186 } 187 if (name.indexOf('-') < 0) { 188 // Common case -- no hyphens in it. 189 appendNameInternal(name); 190 } else { 191 // In order to make hyphenated names searchable, let's split names with '-'. 192 for (String namePart : PATTERN_HYPHEN.split(name)) { 193 if (!TextUtils.isEmpty(namePart)) { 194 appendNameInternal(namePart); 195 } 196 } 197 } 198 } 199 200 private void appendNameInternal(String name) { 201 if (mSbName.length() != 0) { 202 mSbName.append(' '); 203 } 204 mSbName.append(NameNormalizer.normalize(name)); 205 } 206 } 207 208 private final ContactsProvider2 mContactsProvider; 209 private final ContactsDatabaseHelper mDbHelper; 210 private StringBuilder mSb = new StringBuilder(); 211 private IndexBuilder mIndexBuilder = new IndexBuilder(); 212 private ContentValues mValues = new ContentValues(); 213 private String[] mSelectionArgs1 = new String[1]; 214 215 public SearchIndexManager(ContactsProvider2 contactsProvider) { 216 this.mContactsProvider = contactsProvider; 217 mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper(); 218 } 219 220 public void updateIndex() { 221 if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) { 222 return; 223 } 224 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 225 db.beginTransaction(); 226 try { 227 if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) { 228 rebuildIndex(db); 229 setSearchIndexVersion(SEARCH_INDEX_VERSION); 230 db.setTransactionSuccessful(); 231 } 232 } finally { 233 db.endTransaction(); 234 } 235 } 236 237 private void rebuildIndex(SQLiteDatabase db) { 238 mContactsProvider.setProviderStatus(ProviderStatus.STATUS_UPGRADING); 239 long start = SystemClock.currentThreadTimeMillis(); 240 int count = 0; 241 try { 242 mDbHelper.createSearchIndexTable(db); 243 count = buildIndex(db, null, false); 244 } finally { 245 mContactsProvider.setProviderStatus(ProviderStatus.STATUS_NORMAL); 246 247 long end = SystemClock.currentThreadTimeMillis(); 248 Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, " 249 + count + " contacts"); 250 } 251 } 252 253 public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) { 254 mSb.setLength(0); 255 mSb.append("("); 256 if (!contactIds.isEmpty()) { 257 mSb.append(Data.CONTACT_ID + " IN ("); 258 for (Long contactId : contactIds) { 259 mSb.append(contactId).append(","); 260 } 261 mSb.setLength(mSb.length() - 1); 262 mSb.append(')'); 263 } 264 265 if (!rawContactIds.isEmpty()) { 266 if (!contactIds.isEmpty()) { 267 mSb.append(" OR "); 268 } 269 mSb.append(Data.RAW_CONTACT_ID + " IN ("); 270 for (Long rawContactId : rawContactIds) { 271 mSb.append(rawContactId).append(","); 272 } 273 mSb.setLength(mSb.length() - 1); 274 mSb.append(')'); 275 } 276 277 mSb.append(")"); 278 buildIndex(mDbHelper.getWritableDatabase(), mSb.toString(), true); 279 } 280 281 private int buildIndex(SQLiteDatabase db, String selection, boolean replace) { 282 mSb.setLength(0); 283 mSb.append(Data.CONTACT_ID + ", "); 284 mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "="); 285 mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE)); 286 mSb.append(" THEN -4 "); 287 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 288 mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE)); 289 mSb.append(" THEN -3 "); 290 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 291 mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE)); 292 mSb.append(" THEN -2"); 293 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 294 mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE)); 295 mSb.append(" THEN -1"); 296 mSb.append(" ELSE " + DataColumns.MIMETYPE_ID); 297 mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID); 298 299 int count = 0; 300 Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS, 301 selection, null, null, null, mSb.toString()); 302 mIndexBuilder.setCursor(cursor); 303 mIndexBuilder.reset(); 304 try { 305 long currentContactId = -1; 306 while (cursor.moveToNext()) { 307 long contactId = cursor.getLong(0); 308 if (contactId != currentContactId) { 309 if (currentContactId != -1) { 310 saveContactIndex(db, currentContactId, mIndexBuilder, replace); 311 count++; 312 } 313 currentContactId = contactId; 314 mIndexBuilder.reset(); 315 } 316 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE); 317 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype); 318 if (dataRowHandler.hasSearchableData()) { 319 dataRowHandler.appendSearchableData(mIndexBuilder); 320 mIndexBuilder.commit(); 321 } 322 } 323 if (currentContactId != -1) { 324 saveContactIndex(db, currentContactId, mIndexBuilder, replace); 325 count++; 326 } 327 } finally { 328 cursor.close(); 329 } 330 return count; 331 } 332 333 private void saveContactIndex( 334 SQLiteDatabase db, long contactId, IndexBuilder builder, boolean replace) { 335 mValues.clear(); 336 mValues.put(SearchIndexColumns.CONTENT, builder.getContent()); 337 mValues.put(SearchIndexColumns.NAME, builder.getName()); 338 mValues.put(SearchIndexColumns.TOKENS, builder.getTokens()); 339 if (replace) { 340 mSelectionArgs1[0] = String.valueOf(contactId); 341 int count = db.update(Tables.SEARCH_INDEX, mValues, 342 SearchIndexColumns.CONTACT_ID + "=CAST(? AS int)", mSelectionArgs1); 343 if (count == 0) { 344 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 345 db.insert(Tables.SEARCH_INDEX, null, mValues); 346 } 347 } else { 348 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 349 db.insert(Tables.SEARCH_INDEX, null, mValues); 350 } 351 } 352 private int getSearchIndexVersion() { 353 return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0")); 354 } 355 356 private void setSearchIndexVersion(int version) { 357 mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version)); 358 } 359 360 /** 361 * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same 362 * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then 363 * returned as a String. 364 * @see FtsQueryBuilder#UNSCOPED_NORMALIZING 365 * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING 366 */ 367 public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { 368 // SQLite's "simple" tokenizer uses the following rules to detect characters: 369 // - Unicode codepoints >= 128: Everything 370 // - Unicode codepoints < 128: Alphanumeric and "_" 371 // Everything else is a separator of tokens 372 int tokenStart = -1; 373 final StringBuilder result = new StringBuilder(); 374 for (int i = 0; i <= query.length(); i++) { 375 final boolean isChar; 376 if (i == query.length()) { 377 isChar = false; 378 } else { 379 final char ch = query.charAt(i); 380 if (ch >= 128) { 381 isChar = true; 382 } else { 383 isChar = Character.isLetterOrDigit(ch) || ch == '_'; 384 } 385 } 386 if (isChar) { 387 if (tokenStart == -1) { 388 tokenStart = i; 389 } 390 } else { 391 if (tokenStart != -1) { 392 final String token = query.substring(tokenStart, i); 393 ftsQueryBuilder.addToken(result, token); 394 tokenStart = -1; 395 } 396 } 397 } 398 return result.toString(); 399 } 400 401 public static abstract class FtsQueryBuilder { 402 public abstract void addToken(StringBuilder builder, String token); 403 404 /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */ 405 public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder(); 406 407 /** 408 * Scopes each token to a column and normalizes the name. 409 * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*" 410 */ 411 public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING = 412 new ScopedNameNormalizingBuilder(); 413 414 /** 415 * Scopes each token to a the content column and also for name with normalization. 416 * Also adds a user-defined expression to each token. This allows common criteria to be 417 * concatenated to each token. 418 * Example (commonCriteria=" OR tokens:123*"): 419 * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*" 420 */ 421 public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) { 422 return new FtsQueryBuilder() { 423 @Override 424 public void addToken(StringBuilder builder, String token) { 425 if (builder.length() != 0) builder.append(' '); 426 427 builder.append("content:"); 428 builder.append(token); 429 builder.append("* "); 430 431 final String normalizedToken = NameNormalizer.normalize(token); 432 if (!TextUtils.isEmpty(normalizedToken)) { 433 builder.append(" OR name:"); 434 builder.append(normalizedToken); 435 builder.append('*'); 436 } 437 438 builder.append(commonCriteria); 439 } 440 }; 441 } 442 } 443 444 private static class UnscopedNormalizingBuilder extends FtsQueryBuilder { 445 @Override 446 public void addToken(StringBuilder builder, String token) { 447 if (builder.length() != 0) builder.append(' '); 448 449 // the token could be empty (if the search query was "_"). we should still emit it 450 // here, as we otherwise risk to end up with an empty MATCH-expression MATCH "" 451 builder.append(NameNormalizer.normalize(token)); 452 builder.append('*'); 453 } 454 } 455 456 private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder { 457 @Override 458 public void addToken(StringBuilder builder, String token) { 459 if (builder.length() != 0) builder.append(' '); 460 461 builder.append("content:"); 462 builder.append(token); 463 builder.append('*'); 464 465 final String normalizedToken = NameNormalizer.normalize(token); 466 if (!TextUtils.isEmpty(normalizedToken)) { 467 builder.append(" OR name:"); 468 builder.append(normalizedToken); 469 builder.append('*'); 470 } 471 472 builder.append(" OR tokens:"); 473 builder.append(token); 474 builder.append("*"); 475 } 476 } 477 } 478