Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.providers.contacts;
     17 
     18 import android.content.ContentValues;
     19 import android.database.Cursor;
     20 import android.database.sqlite.SQLiteDatabase;
     21 import android.os.SystemClock;
     22 import android.provider.ContactsContract.CommonDataKinds.Email;
     23 import android.provider.ContactsContract.CommonDataKinds.Nickname;
     24 import android.provider.ContactsContract.CommonDataKinds.Organization;
     25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
     26 import android.provider.ContactsContract.Data;
     27 import android.provider.ContactsContract.ProviderStatus;
     28 import android.provider.ContactsContract.RawContacts;
     29 import android.text.TextUtils;
     30 import android.util.Log;
     31 
     32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
     33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
     34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
     35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
     36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
     37 import com.google.android.collect.Lists;
     38 import com.google.common.annotations.VisibleForTesting;
     39 
     40 import java.util.ArrayList;
     41 import java.util.HashSet;
     42 import java.util.List;
     43 import java.util.Set;
     44 import java.util.regex.Pattern;
     45 
     46 /**
     47  * Maintains a search index for comprehensive contact search.
     48  */
     49 public class SearchIndexManager {
     50     private static final String TAG = "ContactsFTS";
     51 
     52     private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
     53 
     54     public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
     55     private static final int SEARCH_INDEX_VERSION = 1;
     56 
     57     private static final class ContactIndexQuery {
     58         public static final String[] COLUMNS = {
     59                 Data.CONTACT_ID,
     60                 MimetypesColumns.MIMETYPE,
     61                 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
     62                 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
     63                 Data.DATA12, Data.DATA13, Data.DATA14
     64         };
     65 
     66         public static final int MIMETYPE = 1;
     67     }
     68 
     69     public static class IndexBuilder {
     70         public static final int SEPARATOR_SPACE = 0;
     71         public static final int SEPARATOR_PARENTHESES = 1;
     72         public static final int SEPARATOR_SLASH = 2;
     73         public static final int SEPARATOR_COMMA = 3;
     74 
     75         private StringBuilder mSbContent = new StringBuilder();
     76         private StringBuilder mSbName = new StringBuilder();
     77         private StringBuilder mSbTokens = new StringBuilder();
     78         private StringBuilder mSbElementContent = new StringBuilder();
     79         private HashSet<String> mUniqueElements = new HashSet<String>();
     80         private Cursor mCursor;
     81 
     82         void setCursor(Cursor cursor) {
     83             this.mCursor = cursor;
     84         }
     85 
     86         void reset() {
     87             mSbContent.setLength(0);
     88             mSbTokens.setLength(0);
     89             mSbName.setLength(0);
     90             mSbElementContent.setLength(0);
     91             mUniqueElements.clear();
     92         }
     93 
     94         public String getContent() {
     95             return mSbContent.length() == 0 ? null : mSbContent.toString();
     96         }
     97 
     98         public String getName() {
     99             return mSbName.length() == 0 ? null : mSbName.toString();
    100         }
    101 
    102         public String getTokens() {
    103             return mSbTokens.length() == 0 ? null : mSbTokens.toString();
    104         }
    105 
    106         public String getString(String columnName) {
    107             return mCursor.getString(mCursor.getColumnIndex(columnName));
    108         }
    109 
    110         public int getInt(String columnName) {
    111             return mCursor.getInt(mCursor.getColumnIndex(columnName));
    112         }
    113 
    114         @Override
    115         public String toString() {
    116             return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens;
    117         }
    118 
    119         public void commit() {
    120             if (mSbElementContent.length() != 0) {
    121                 String content = mSbElementContent.toString().replace('\n', ' ');
    122                 if (!mUniqueElements.contains(content)) {
    123                     if (mSbContent.length() != 0) {
    124                         mSbContent.append('\n');
    125                     }
    126                     mSbContent.append(content);
    127                     mUniqueElements.add(content);
    128                 }
    129                 mSbElementContent.setLength(0);
    130             }
    131         }
    132 
    133         public void appendContentFromColumn(String columnName) {
    134             appendContentFromColumn(columnName, SEPARATOR_SPACE);
    135         }
    136 
    137         public void appendContentFromColumn(String columnName, int format) {
    138             appendContent(getString(columnName), format);
    139         }
    140 
    141         public void appendContent(String value) {
    142             appendContent(value, SEPARATOR_SPACE);
    143         }
    144 
    145         private void appendContent(String value, int format) {
    146             if (TextUtils.isEmpty(value)) {
    147                 return;
    148             }
    149 
    150             switch (format) {
    151                 case SEPARATOR_SPACE:
    152                     if (mSbElementContent.length() > 0) {
    153                         mSbElementContent.append(' ');
    154                     }
    155                     mSbElementContent.append(value);
    156                     break;
    157 
    158                 case SEPARATOR_SLASH:
    159                     mSbElementContent.append('/').append(value);
    160                     break;
    161 
    162                 case SEPARATOR_PARENTHESES:
    163                     if (mSbElementContent.length() > 0) {
    164                         mSbElementContent.append(' ');
    165                     }
    166                     mSbElementContent.append('(').append(value).append(')');
    167                     break;
    168 
    169                 case SEPARATOR_COMMA:
    170                     if (mSbElementContent.length() > 0) {
    171                         mSbElementContent.append(", ");
    172                     }
    173                     mSbElementContent.append(value);
    174                     break;
    175             }
    176         }
    177 
    178         public void appendToken(String token) {
    179             if (TextUtils.isEmpty(token)) {
    180                 return;
    181             }
    182 
    183             if (mSbTokens.length() != 0) {
    184                 mSbTokens.append(' ');
    185             }
    186             mSbTokens.append(token);
    187         }
    188 
    189         public void appendNameFromColumn(String columnName) {
    190             appendName(getString(columnName));
    191         }
    192 
    193         public void appendName(String name) {
    194             if (TextUtils.isEmpty(name)) {
    195                 return;
    196             }
    197             // First, put the original name.
    198             appendNameInternal(name);
    199 
    200             // Then, if the name contains more than one FTS token, put each token into the index
    201             // too.
    202             //
    203             // This is to make names with special characters searchable, such as "double-barrelled"
    204             // "L'Image".
    205             //
    206             // Here's how it works:
    207             // Because we "normalize" names when putting into the index, if we only put
    208             // "double-barrelled", the index will only contain "doublebarrelled".
    209             // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
    210             // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
    211             // but the second one doesn't (because we only do the prefix match), so
    212             // "doublebarrelled" doesn't match.
    213             // So, here, we put each token in a name into the index too.  In the case above,
    214             // we put also "double" and "barrelled".
    215             // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
    216             // will all match "double-barrelled".
    217             final List<String> nameParts = splitIntoFtsTokens(name);
    218             if (nameParts.size() > 1) {
    219                 for (String namePart : nameParts) {
    220                     if (!TextUtils.isEmpty(namePart)) {
    221                         appendNameInternal(namePart);
    222                     }
    223                 }
    224             }
    225         }
    226 
    227         /**
    228          * Normalize a name and add to {@link #mSbName}
    229          */
    230         private void appendNameInternal(String name) {
    231             if (mSbName.length() != 0) {
    232                 mSbName.append(' ');
    233             }
    234             mSbName.append(NameNormalizer.normalize(name));
    235         }
    236     }
    237 
    238     private final ContactsProvider2 mContactsProvider;
    239     private final ContactsDatabaseHelper mDbHelper;
    240     private StringBuilder mSb = new StringBuilder();
    241     private IndexBuilder mIndexBuilder = new IndexBuilder();
    242     private ContentValues mValues = new ContentValues();
    243     private String[] mSelectionArgs1 = new String[1];
    244 
    245     public SearchIndexManager(ContactsProvider2 contactsProvider) {
    246         this.mContactsProvider = contactsProvider;
    247         mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
    248     }
    249 
    250     public void updateIndex(boolean force) {
    251         if (force) {
    252             setSearchIndexVersion(0);
    253         } else {
    254             if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
    255                 return;
    256             }
    257         }
    258         SQLiteDatabase db = mDbHelper.getWritableDatabase();
    259         db.beginTransaction();
    260         try {
    261             // We do a version check again, because the version might have been modified after
    262             // the first check.  We need to do the check again in a transaction to make sure.
    263             if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
    264                 rebuildIndex(db);
    265                 setSearchIndexVersion(SEARCH_INDEX_VERSION);
    266                 db.setTransactionSuccessful();
    267             }
    268         } finally {
    269             db.endTransaction();
    270         }
    271     }
    272 
    273     private void rebuildIndex(SQLiteDatabase db) {
    274         mContactsProvider.setProviderStatus(ProviderStatus.STATUS_UPGRADING);
    275         final long start = SystemClock.elapsedRealtime();
    276         int count = 0;
    277         try {
    278             mDbHelper.createSearchIndexTable(db, true);
    279             count = buildAndInsertIndex(db, null);
    280         } finally {
    281             mContactsProvider.setProviderStatus(ProviderStatus.STATUS_NORMAL);
    282 
    283             final long end = SystemClock.elapsedRealtime();
    284             Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
    285                     + count + " contacts");
    286         }
    287     }
    288 
    289     public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
    290         if (VERBOSE_LOGGING) {
    291             Log.v(TAG, "Updating search index for " + contactIds.size() +
    292                     " contacts / " + rawContactIds.size() + " raw contacts");
    293         }
    294         StringBuilder sb = new StringBuilder();
    295         sb.append("(");
    296         if (!contactIds.isEmpty()) {
    297             sb.append(RawContacts.CONTACT_ID + " IN (");
    298             for (Long contactId : contactIds) {
    299                 sb.append(contactId).append(",");
    300             }
    301             sb.setLength(sb.length() - 1);
    302             sb.append(')');
    303         }
    304 
    305         if (!rawContactIds.isEmpty()) {
    306             if (!contactIds.isEmpty()) {
    307                 sb.append(" OR ");
    308             }
    309             sb.append(RawContactsColumns.CONCRETE_ID + " IN (");
    310             for (Long rawContactId : rawContactIds) {
    311                 sb.append(rawContactId).append(",");
    312             }
    313             sb.setLength(sb.length() - 1);
    314             sb.append(')');
    315         }
    316 
    317         sb.append(")");
    318 
    319         // The selection to select raw_contacts.
    320         final String rawContactsSelection = sb.toString();
    321 
    322         // Remove affected search_index rows.
    323         final SQLiteDatabase db = mDbHelper.getWritableDatabase();
    324         final int deleted = db.delete(Tables.SEARCH_INDEX,
    325                 SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
    326                     RawContacts.CONTACT_ID +
    327                     " FROM " + Tables.RAW_CONTACTS +
    328                     " WHERE " + rawContactsSelection +
    329                     ")"
    330                 , null);
    331 
    332         // Then rebuild index for them.
    333         final int count = buildAndInsertIndex(db, rawContactsSelection);
    334         if (VERBOSE_LOGGING) {
    335             Log.v(TAG, "Updated search index for " + count + " contacts");
    336         }
    337     }
    338 
    339     private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
    340         mSb.setLength(0);
    341         mSb.append(Data.CONTACT_ID + ", ");
    342         mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
    343         mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
    344         mSb.append(" THEN -4 ");
    345         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    346         mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
    347         mSb.append(" THEN -3 ");
    348         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    349         mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
    350         mSb.append(" THEN -2");
    351         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    352         mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
    353         mSb.append(" THEN -1");
    354         mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
    355         mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
    356 
    357         int count = 0;
    358         Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
    359                 selection, null, null, null, mSb.toString());
    360         mIndexBuilder.setCursor(cursor);
    361         mIndexBuilder.reset();
    362         try {
    363             long currentContactId = -1;
    364             while (cursor.moveToNext()) {
    365                 long contactId = cursor.getLong(0);
    366                 if (contactId != currentContactId) {
    367                     if (currentContactId != -1) {
    368                         insertIndexRow(db, currentContactId, mIndexBuilder);
    369                         count++;
    370                     }
    371                     currentContactId = contactId;
    372                     mIndexBuilder.reset();
    373                 }
    374                 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
    375                 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
    376                 if (dataRowHandler.hasSearchableData()) {
    377                     dataRowHandler.appendSearchableData(mIndexBuilder);
    378                     mIndexBuilder.commit();
    379                 }
    380             }
    381             if (currentContactId != -1) {
    382                 insertIndexRow(db, currentContactId, mIndexBuilder);
    383                 count++;
    384             }
    385         } finally {
    386             cursor.close();
    387         }
    388         return count;
    389     }
    390 
    391     private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
    392         mValues.clear();
    393         mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
    394         mValues.put(SearchIndexColumns.NAME, builder.getName());
    395         mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
    396         mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
    397         db.insert(Tables.SEARCH_INDEX, null, mValues);
    398     }
    399     private int getSearchIndexVersion() {
    400         return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
    401     }
    402 
    403     private void setSearchIndexVersion(int version) {
    404         mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
    405     }
    406 
    407     /**
    408      * Token separator that matches SQLite's "simple" tokenizer.
    409      * - Unicode codepoints >= 128: Everything
    410      * - Unicode codepoints < 128: Alphanumeric and "_"
    411      * - Everything else is a separator of tokens
    412      */
    413     private static final Pattern FTS_TOKEN_SEPARATOR_RE =
    414             Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
    415 
    416     /**
    417      * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
    418      */
    419     @VisibleForTesting
    420     static List<String> splitIntoFtsTokens(String s) {
    421         final ArrayList<String> ret = Lists.newArrayList();
    422         for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
    423             if (!TextUtils.isEmpty(token)) {
    424                 ret.add(token);
    425             }
    426         }
    427         return ret;
    428     }
    429 
    430     /**
    431      * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
    432      * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
    433      * returned as a String.
    434      * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
    435      * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
    436      */
    437     public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
    438         final StringBuilder result = new StringBuilder();
    439         for (String token : splitIntoFtsTokens(query)) {
    440             ftsQueryBuilder.addToken(result, token);
    441         }
    442         return result.toString();
    443     }
    444 
    445     public static abstract class FtsQueryBuilder {
    446         public abstract void addToken(StringBuilder builder, String token);
    447 
    448         /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
    449         public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
    450 
    451         /**
    452          * Scopes each token to a column and normalizes the name.
    453          * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
    454          */
    455         public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
    456                 new ScopedNameNormalizingBuilder();
    457 
    458         /**
    459          * Scopes each token to a the content column and also for name with normalization.
    460          * Also adds a user-defined expression to each token. This allows common criteria to be
    461          * concatenated to each token.
    462          * Example (commonCriteria=" OR tokens:123*"):
    463          * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
    464          */
    465         public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
    466             return new FtsQueryBuilder() {
    467                 @Override
    468                 public void addToken(StringBuilder builder, String token) {
    469                     if (builder.length() != 0) builder.append(' ');
    470 
    471                     builder.append("content:");
    472                     builder.append(token);
    473                     builder.append("* ");
    474 
    475                     final String normalizedToken = NameNormalizer.normalize(token);
    476                     if (!TextUtils.isEmpty(normalizedToken)) {
    477                         builder.append(" OR name:");
    478                         builder.append(normalizedToken);
    479                         builder.append('*');
    480                     }
    481 
    482                     builder.append(commonCriteria);
    483                 }
    484             };
    485         }
    486     }
    487 
    488     private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
    489         @Override
    490         public void addToken(StringBuilder builder, String token) {
    491             if (builder.length() != 0) builder.append(' ');
    492 
    493             // the token could be empty (if the search query was "_"). we should still emit it
    494             // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
    495             builder.append(NameNormalizer.normalize(token));
    496             builder.append('*');
    497         }
    498     }
    499 
    500     private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
    501         @Override
    502         public void addToken(StringBuilder builder, String token) {
    503             if (builder.length() != 0) builder.append(' ');
    504 
    505             builder.append("content:");
    506             builder.append(token);
    507             builder.append('*');
    508 
    509             final String normalizedToken = NameNormalizer.normalize(token);
    510             if (!TextUtils.isEmpty(normalizedToken)) {
    511                 builder.append(" OR name:");
    512                 builder.append(normalizedToken);
    513                 builder.append('*');
    514             }
    515 
    516             builder.append(" OR tokens:");
    517             builder.append(token);
    518             builder.append("*");
    519         }
    520     }
    521 }
    522