Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.providers.contacts;
     17 
     18 import android.content.ContentValues;
     19 import android.database.Cursor;
     20 import android.database.sqlite.SQLiteDatabase;
     21 import android.os.SystemClock;
     22 import android.provider.ContactsContract.CommonDataKinds.Email;
     23 import android.provider.ContactsContract.CommonDataKinds.Nickname;
     24 import android.provider.ContactsContract.CommonDataKinds.Organization;
     25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
     26 import android.provider.ContactsContract.Data;
     27 import android.provider.ContactsContract.ProviderStatus;
     28 import android.provider.ContactsContract.RawContacts;
     29 import android.text.TextUtils;
     30 import android.util.Log;
     31 
     32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
     33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
     34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
     35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
     36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
     37 import com.google.android.collect.Lists;
     38 import com.google.common.annotations.VisibleForTesting;
     39 
     40 import java.util.ArrayList;
     41 import java.util.HashSet;
     42 import java.util.List;
     43 import java.util.Set;
     44 import java.util.regex.Pattern;
     45 
     46 /**
     47  * Maintains a search index for comprehensive contact search.
     48  */
     49 public class SearchIndexManager {
     50     private static final String TAG = "ContactsFTS";
     51 
     52     private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
     53 
     54     public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
     55     private static final int SEARCH_INDEX_VERSION = 1;
     56 
     57     private static final class ContactIndexQuery {
     58         public static final String[] COLUMNS = {
     59                 Data.CONTACT_ID,
     60                 MimetypesColumns.MIMETYPE,
     61                 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
     62                 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
     63                 Data.DATA12, Data.DATA13, Data.DATA14
     64         };
     65 
     66         public static final int MIMETYPE = 1;
     67     }
     68 
     69     public static class IndexBuilder {
     70         public static final int SEPARATOR_SPACE = 0;
     71         public static final int SEPARATOR_PARENTHESES = 1;
     72         public static final int SEPARATOR_SLASH = 2;
     73         public static final int SEPARATOR_COMMA = 3;
     74 
     75         private StringBuilder mSbContent = new StringBuilder();
     76         private StringBuilder mSbName = new StringBuilder();
     77         private StringBuilder mSbTokens = new StringBuilder();
     78         private StringBuilder mSbElementContent = new StringBuilder();
     79         private HashSet<String> mUniqueElements = new HashSet<String>();
     80         private Cursor mCursor;
     81 
     82         void setCursor(Cursor cursor) {
     83             this.mCursor = cursor;
     84         }
     85 
     86         void reset() {
     87             mSbContent.setLength(0);
     88             mSbTokens.setLength(0);
     89             mSbName.setLength(0);
     90             mSbElementContent.setLength(0);
     91             mUniqueElements.clear();
     92         }
     93 
     94         public String getContent() {
     95             return mSbContent.length() == 0 ? null : mSbContent.toString();
     96         }
     97 
     98         public String getName() {
     99             return mSbName.length() == 0 ? null : mSbName.toString();
    100         }
    101 
    102         public String getTokens() {
    103             return mSbTokens.length() == 0 ? null : mSbTokens.toString();
    104         }
    105 
    106         public String getString(String columnName) {
    107             return mCursor.getString(mCursor.getColumnIndex(columnName));
    108         }
    109 
    110         public int getInt(String columnName) {
    111             return mCursor.getInt(mCursor.getColumnIndex(columnName));
    112         }
    113 
    114         @Override
    115         public String toString() {
    116             return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens;
    117         }
    118 
    119         public void commit() {
    120             if (mSbElementContent.length() != 0) {
    121                 String content = mSbElementContent.toString().replace('\n', ' ');
    122                 if (!mUniqueElements.contains(content)) {
    123                     if (mSbContent.length() != 0) {
    124                         mSbContent.append('\n');
    125                     }
    126                     mSbContent.append(content);
    127                     mUniqueElements.add(content);
    128                 }
    129                 mSbElementContent.setLength(0);
    130             }
    131         }
    132 
    133         public void appendContentFromColumn(String columnName) {
    134             appendContentFromColumn(columnName, SEPARATOR_SPACE);
    135         }
    136 
    137         public void appendContentFromColumn(String columnName, int format) {
    138             appendContent(getString(columnName), format);
    139         }
    140 
    141         public void appendContent(String value) {
    142             appendContent(value, SEPARATOR_SPACE);
    143         }
    144 
    145         private void appendContent(String value, int format) {
    146             if (TextUtils.isEmpty(value)) {
    147                 return;
    148             }
    149 
    150             switch (format) {
    151                 case SEPARATOR_SPACE:
    152                     if (mSbElementContent.length() > 0) {
    153                         mSbElementContent.append(' ');
    154                     }
    155                     mSbElementContent.append(value);
    156                     break;
    157 
    158                 case SEPARATOR_SLASH:
    159                     mSbElementContent.append('/').append(value);
    160                     break;
    161 
    162                 case SEPARATOR_PARENTHESES:
    163                     if (mSbElementContent.length() > 0) {
    164                         mSbElementContent.append(' ');
    165                     }
    166                     mSbElementContent.append('(').append(value).append(')');
    167                     break;
    168 
    169                 case SEPARATOR_COMMA:
    170                     if (mSbElementContent.length() > 0) {
    171                         mSbElementContent.append(", ");
    172                     }
    173                     mSbElementContent.append(value);
    174                     break;
    175             }
    176         }
    177 
    178         public void appendToken(String token) {
    179             if (TextUtils.isEmpty(token)) {
    180                 return;
    181             }
    182 
    183             if (mSbTokens.length() != 0) {
    184                 mSbTokens.append(' ');
    185             }
    186             mSbTokens.append(token);
    187         }
    188 
    189         public void appendNameFromColumn(String columnName) {
    190             appendName(getString(columnName));
    191         }
    192 
    193         public void appendName(String name) {
    194             if (TextUtils.isEmpty(name)) {
    195                 return;
    196             }
    197             // First, put the original name.
    198             appendNameInternal(name);
    199 
    200             // Then, if the name contains more than one FTS token, put each token into the index
    201             // too.
    202             //
    203             // This is to make names with special characters searchable, such as "double-barrelled"
    204             // "L'Image".
    205             //
    206             // Here's how it works:
    207             // Because we "normalize" names when putting into the index, if we only put
    208             // "double-barrelled", the index will only contain "doublebarrelled".
    209             // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
    210             // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
    211             // but the second one doesn't (because we only do the prefix match), so
    212             // "doublebarrelled" doesn't match.
    213             // So, here, we put each token in a name into the index too.  In the case above,
    214             // we put also "double" and "barrelled".
    215             // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
    216             // will all match "double-barrelled".
    217             final List<String> nameParts = splitIntoFtsTokens(name);
    218             if (nameParts.size() > 1) {
    219                 for (String namePart : nameParts) {
    220                     if (!TextUtils.isEmpty(namePart)) {
    221                         appendNameInternal(namePart);
    222                     }
    223                 }
    224             }
    225         }
    226 
    227         /**
    228          * Normalize a name and add to {@link #mSbName}
    229          */
    230         private void appendNameInternal(String name) {
    231             if (mSbName.length() != 0) {
    232                 mSbName.append(' ');
    233             }
    234             mSbName.append(NameNormalizer.normalize(name));
    235         }
    236     }
    237 
    238     private final ContactsProvider2 mContactsProvider;
    239     private final ContactsDatabaseHelper mDbHelper;
    240     private StringBuilder mSb = new StringBuilder();
    241     private IndexBuilder mIndexBuilder = new IndexBuilder();
    242     private ContentValues mValues = new ContentValues();
    243     private String[] mSelectionArgs1 = new String[1];
    244 
    245     public SearchIndexManager(ContactsProvider2 contactsProvider) {
    246         this.mContactsProvider = contactsProvider;
    247         mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
    248     }
    249 
    250     public void updateIndex(boolean force) {
    251         if (force) {
    252             setSearchIndexVersion(0);
    253         } else {
    254             if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
    255                 return;
    256             }
    257         }
    258         SQLiteDatabase db = mDbHelper.getWritableDatabase();
    259         db.beginTransaction();
    260         try {
    261             // We do a version check again, because the version might have been modified after
    262             // the first check.  We need to do the check again in a transaction to make sure.
    263             if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
    264                 rebuildIndex(db);
    265                 setSearchIndexVersion(SEARCH_INDEX_VERSION);
    266                 db.setTransactionSuccessful();
    267             }
    268         } finally {
    269             db.endTransaction();
    270         }
    271     }
    272 
    273     private void rebuildIndex(SQLiteDatabase db) {
    274         mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING);
    275         final long start = SystemClock.elapsedRealtime();
    276         int count = 0;
    277         try {
    278             mDbHelper.createSearchIndexTable(db, true);
    279             count = buildAndInsertIndex(db, null);
    280         } finally {
    281             mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL);
    282 
    283             final long end = SystemClock.elapsedRealtime();
    284             Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
    285                     + count + " contacts");
    286         }
    287     }
    288 
    289     public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
    290         if (VERBOSE_LOGGING) {
    291             Log.v(TAG, "Updating search index for " + contactIds.size() +
    292                     " contacts / " + rawContactIds.size() + " raw contacts");
    293         }
    294         StringBuilder sb = new StringBuilder();
    295         sb.append("(");
    296         if (!contactIds.isEmpty()) {
    297             // Select all raw contacts that belong to all contacts in contactIds
    298             sb.append(RawContacts.CONTACT_ID + " IN (");
    299             sb.append(TextUtils.join(",", contactIds));
    300             sb.append(')');
    301         }
    302         if (!rawContactIds.isEmpty()) {
    303             if (!contactIds.isEmpty()) {
    304                 sb.append(" OR ");
    305             }
    306             // Select all raw contacts that belong to the same contact as all raw contacts
    307             // in rawContactIds. For every raw contact in rawContactIds that we are updating
    308             // the index for, we need to rebuild the search index for all raw contacts belonging
    309             // to the same contact, because we can only update the search index on a per-contact
    310             // basis.
    311             sb.append(RawContacts.CONTACT_ID + " IN " +
    312                     "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS +
    313                     " WHERE " + RawContactsColumns.CONCRETE_ID + " IN (");
    314             sb.append(TextUtils.join(",", rawContactIds));
    315             sb.append("))");
    316         }
    317 
    318         sb.append(")");
    319 
    320         // The selection to select raw_contacts.
    321         final String rawContactsSelection = sb.toString();
    322 
    323         // Remove affected search_index rows.
    324         final SQLiteDatabase db = mDbHelper.getWritableDatabase();
    325         final int deleted = db.delete(Tables.SEARCH_INDEX,
    326                 SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
    327                     RawContacts.CONTACT_ID +
    328                     " FROM " + Tables.RAW_CONTACTS +
    329                     " WHERE " + rawContactsSelection +
    330                     ")"
    331                 , null);
    332 
    333         // Then rebuild index for them.
    334         final int count = buildAndInsertIndex(db, rawContactsSelection);
    335 
    336         if (VERBOSE_LOGGING) {
    337             Log.v(TAG, "Updated search index for " + count + " contacts");
    338         }
    339     }
    340 
    341     private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
    342         mSb.setLength(0);
    343         mSb.append(Data.CONTACT_ID + ", ");
    344         mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
    345         mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
    346         mSb.append(" THEN -4 ");
    347         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    348         mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
    349         mSb.append(" THEN -3 ");
    350         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    351         mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
    352         mSb.append(" THEN -2");
    353         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
    354         mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
    355         mSb.append(" THEN -1");
    356         mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
    357         mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
    358 
    359         int count = 0;
    360         Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
    361                 selection, null, null, null, mSb.toString());
    362         mIndexBuilder.setCursor(cursor);
    363         mIndexBuilder.reset();
    364         try {
    365             long currentContactId = -1;
    366             while (cursor.moveToNext()) {
    367                 long contactId = cursor.getLong(0);
    368                 if (contactId != currentContactId) {
    369                     if (currentContactId != -1) {
    370                         insertIndexRow(db, currentContactId, mIndexBuilder);
    371                         count++;
    372                     }
    373                     currentContactId = contactId;
    374                     mIndexBuilder.reset();
    375                 }
    376                 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
    377                 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
    378                 if (dataRowHandler.hasSearchableData()) {
    379                     dataRowHandler.appendSearchableData(mIndexBuilder);
    380                     mIndexBuilder.commit();
    381                 }
    382             }
    383             if (currentContactId != -1) {
    384                 insertIndexRow(db, currentContactId, mIndexBuilder);
    385                 count++;
    386             }
    387         } finally {
    388             cursor.close();
    389         }
    390         return count;
    391     }
    392 
    393     private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
    394         mValues.clear();
    395         mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
    396         mValues.put(SearchIndexColumns.NAME, builder.getName());
    397         mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
    398         mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
    399         db.insert(Tables.SEARCH_INDEX, null, mValues);
    400     }
    401     private int getSearchIndexVersion() {
    402         return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
    403     }
    404 
    405     private void setSearchIndexVersion(int version) {
    406         mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
    407     }
    408 
    409     /**
    410      * Token separator that matches SQLite's "simple" tokenizer.
    411      * - Unicode codepoints >= 128: Everything
    412      * - Unicode codepoints < 128: Alphanumeric and "_"
    413      * - Everything else is a separator of tokens
    414      */
    415     private static final Pattern FTS_TOKEN_SEPARATOR_RE =
    416             Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
    417 
    418     /**
    419      * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
    420      */
    421     @VisibleForTesting
    422     static List<String> splitIntoFtsTokens(String s) {
    423         final ArrayList<String> ret = Lists.newArrayList();
    424         for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
    425             if (!TextUtils.isEmpty(token)) {
    426                 ret.add(token);
    427             }
    428         }
    429         return ret;
    430     }
    431 
    432     /**
    433      * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
    434      * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
    435      * returned as a String.
    436      * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
    437      * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
    438      */
    439     public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
    440         final StringBuilder result = new StringBuilder();
    441         for (String token : splitIntoFtsTokens(query)) {
    442             ftsQueryBuilder.addToken(result, token);
    443         }
    444         return result.toString();
    445     }
    446 
    447     public static abstract class FtsQueryBuilder {
    448         public abstract void addToken(StringBuilder builder, String token);
    449 
    450         /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
    451         public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
    452 
    453         /**
    454          * Scopes each token to a column and normalizes the name.
    455          * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
    456          */
    457         public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
    458                 new ScopedNameNormalizingBuilder();
    459 
    460         /**
    461          * Scopes each token to a the content column and also for name with normalization.
    462          * Also adds a user-defined expression to each token. This allows common criteria to be
    463          * concatenated to each token.
    464          * Example (commonCriteria=" OR tokens:123*"):
    465          * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
    466          */
    467         public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
    468             return new FtsQueryBuilder() {
    469                 @Override
    470                 public void addToken(StringBuilder builder, String token) {
    471                     if (builder.length() != 0) builder.append(' ');
    472 
    473                     builder.append("content:");
    474                     builder.append(token);
    475                     builder.append("* ");
    476 
    477                     final String normalizedToken = NameNormalizer.normalize(token);
    478                     if (!TextUtils.isEmpty(normalizedToken)) {
    479                         builder.append(" OR name:");
    480                         builder.append(normalizedToken);
    481                         builder.append('*');
    482                     }
    483 
    484                     builder.append(commonCriteria);
    485                 }
    486             };
    487         }
    488     }
    489 
    490     private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
    491         @Override
    492         public void addToken(StringBuilder builder, String token) {
    493             if (builder.length() != 0) builder.append(' ');
    494 
    495             // the token could be empty (if the search query was "_"). we should still emit it
    496             // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
    497             builder.append(NameNormalizer.normalize(token));
    498             builder.append('*');
    499         }
    500     }
    501 
    502     private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
    503         @Override
    504         public void addToken(StringBuilder builder, String token) {
    505             if (builder.length() != 0) builder.append(' ');
    506 
    507             builder.append("content:");
    508             builder.append(token);
    509             builder.append('*');
    510 
    511             final String normalizedToken = NameNormalizer.normalize(token);
    512             if (!TextUtils.isEmpty(normalizedToken)) {
    513                 builder.append(" OR name:");
    514                 builder.append(normalizedToken);
    515                 builder.append('*');
    516             }
    517 
    518             builder.append(" OR tokens:");
    519             builder.append(token);
    520             builder.append("*");
    521         }
    522     }
    523 }
    524