Home | History | Annotate | Download | only in aggregation
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package com.android.providers.contacts.aggregation;
     18 
     19 import android.database.Cursor;
     20 import android.database.DatabaseUtils;
     21 import android.database.sqlite.SQLiteDatabase;
     22 import android.provider.ContactsContract.AggregationExceptions;
     23 import android.provider.ContactsContract.CommonDataKinds.Identity;
     24 import android.provider.ContactsContract.Contacts.AggregationSuggestions;
     25 import android.provider.ContactsContract.Data;
     26 import android.provider.ContactsContract.RawContacts;
     27 import android.text.TextUtils;
     28 import android.util.Log;
     29 import com.android.providers.contacts.ContactsDatabaseHelper;
     30 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
     31 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupColumns;
     32 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
     33 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
     34 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
     35 import com.android.providers.contacts.ContactsProvider2;
     36 import com.android.providers.contacts.NameSplitter;
     37 import com.android.providers.contacts.PhotoPriorityResolver;
     38 import com.android.providers.contacts.TransactionContext;
     39 import com.android.providers.contacts.aggregation.util.CommonNicknameCache;
     40 import com.android.providers.contacts.aggregation.util.ContactMatcher;
     41 import com.android.providers.contacts.aggregation.util.MatchScore;
     42 import com.android.providers.contacts.database.ContactsTableUtil;
     43 import com.google.android.collect.Sets;
     44 
     45 import java.util.ArrayList;
     46 import java.util.HashSet;
     47 import java.util.List;
     48 import java.util.Set;
     49 
     50 /**
     51  * ContactAggregator deals with aggregating contact information coming from different sources.
     52  * Two John Doe contacts from two disjoint sources are presumed to be the same
     53  * person unless the user declares otherwise.
     54  */
     55 public class ContactAggregator extends AbstractContactAggregator {
     56 
     57     // Return code for the canJoinIntoContact method.
     58     private static final int JOIN = 1;
     59     private static final int KEEP_SEPARATE = 0;
     60     private static final int RE_AGGREGATE = -1;
     61 
     62     private final ContactMatcher mMatcher = new ContactMatcher();
     63 
     64     /**
     65      * Constructor.
     66      */
     67     public ContactAggregator(ContactsProvider2 contactsProvider,
     68             ContactsDatabaseHelper contactsDatabaseHelper,
     69             PhotoPriorityResolver photoPriorityResolver, NameSplitter nameSplitter,
     70             CommonNicknameCache commonNicknameCache) {
     71         super(contactsProvider, contactsDatabaseHelper, photoPriorityResolver, nameSplitter,
     72                 commonNicknameCache);
     73     }
     74 
     75   /**
     76      * Given a specific raw contact, finds all matching aggregate contacts and chooses the one
     77      * with the highest match score.  If no such contact is found, creates a new contact.
     78      */
     79     synchronized void aggregateContact(TransactionContext txContext, SQLiteDatabase db,
     80             long rawContactId, long accountId, long currentContactId,
     81             MatchCandidateList candidates) {
     82 
     83         if (VERBOSE_LOGGING) {
     84             Log.v(TAG, "aggregateContact: rid=" + rawContactId + " cid=" + currentContactId);
     85         }
     86 
     87         int aggregationMode = RawContacts.AGGREGATION_MODE_DEFAULT;
     88 
     89         Integer aggModeObject = mRawContactsMarkedForAggregation.remove(rawContactId);
     90         if (aggModeObject != null) {
     91             aggregationMode = aggModeObject;
     92         }
     93 
     94         long contactId = -1; // Best matching contact ID.
     95         boolean needReaggregate = false;
     96 
     97         final ContactMatcher matcher = new ContactMatcher();
     98         final Set<Long> rawContactIdsInSameAccount = new HashSet<Long>();
     99         final Set<Long> rawContactIdsInOtherAccount = new HashSet<Long>();
    100         if (aggregationMode == RawContacts.AGGREGATION_MODE_DEFAULT) {
    101             candidates.clear();
    102             matcher.clear();
    103 
    104             contactId = pickBestMatchBasedOnExceptions(db, rawContactId, matcher);
    105             if (contactId == -1) {
    106 
    107                 // If this is a newly inserted contact or a visible contact, look for
    108                 // data matches.
    109                 if (currentContactId == 0
    110                         || mDbHelper.isContactInDefaultDirectory(db, currentContactId)) {
    111                     contactId = pickBestMatchBasedOnData(db, rawContactId, candidates, matcher);
    112                 }
    113 
    114                 // If we found an best matched contact, find out if the raw contact can be joined
    115                 // into it
    116                 if (contactId != -1 && contactId != currentContactId) {
    117                     // List all raw contact ID and their account ID mappings in contact
    118                     // [contactId] excluding raw_contact [rawContactId].
    119 
    120                     // Based on the mapping, create two sets of raw contact IDs in
    121                     // [rawContactAccountId] and not in [rawContactAccountId]. We don't always
    122                     // need them, so lazily initialize them.
    123                     mSelectionArgs2[0] = String.valueOf(contactId);
    124                     mSelectionArgs2[1] = String.valueOf(rawContactId);
    125                     final Cursor rawContactsToAccountsCursor = db.rawQuery(
    126                             "SELECT " + RawContacts._ID + ", " + RawContactsColumns.ACCOUNT_ID +
    127                                     " FROM " + Tables.RAW_CONTACTS +
    128                                     " WHERE " + RawContacts.CONTACT_ID + "=?" +
    129                                     " AND " + RawContacts._ID + "!=?",
    130                             mSelectionArgs2);
    131                     try {
    132                         rawContactsToAccountsCursor.moveToPosition(-1);
    133                         while (rawContactsToAccountsCursor.moveToNext()) {
    134                             final long rcId = rawContactsToAccountsCursor.getLong(0);
    135                             final long rc_accountId = rawContactsToAccountsCursor.getLong(1);
    136                             if (rc_accountId == accountId) {
    137                                 rawContactIdsInSameAccount.add(rcId);
    138                             } else {
    139                                 rawContactIdsInOtherAccount.add(rcId);
    140                             }
    141                         }
    142                     } finally {
    143                         rawContactsToAccountsCursor.close();
    144                     }
    145                     final int actionCode;
    146                     final int totalNumOfRawContactsInCandidate = rawContactIdsInSameAccount.size()
    147                             + rawContactIdsInOtherAccount.size();
    148                     if (totalNumOfRawContactsInCandidate >= AGGREGATION_CONTACT_SIZE_LIMIT) {
    149                         if (VERBOSE_LOGGING) {
    150                             Log.v(TAG, "Too many raw contacts (" + totalNumOfRawContactsInCandidate
    151                                     + ") in the best matching contact, so skip aggregation");
    152                         }
    153                         actionCode = KEEP_SEPARATE;
    154                     } else {
    155                         actionCode = canJoinIntoContact(db, rawContactId,
    156                                 rawContactIdsInSameAccount, rawContactIdsInOtherAccount);
    157                     }
    158                     if (actionCode == KEEP_SEPARATE) {
    159                         contactId = -1;
    160                     } else if (actionCode == RE_AGGREGATE) {
    161                         needReaggregate = true;
    162                     }
    163                 }
    164             }
    165         } else if (aggregationMode == RawContacts.AGGREGATION_MODE_DISABLED) {
    166             return;
    167         }
    168 
    169         // # of raw_contacts in the [currentContactId] contact excluding the [rawContactId]
    170         // raw_contact.
    171         long currentContactContentsCount = 0;
    172 
    173         if (currentContactId != 0) {
    174             mRawContactCountQuery.bindLong(1, currentContactId);
    175             mRawContactCountQuery.bindLong(2, rawContactId);
    176             currentContactContentsCount = mRawContactCountQuery.simpleQueryForLong();
    177         }
    178 
    179         // If there are no other raw contacts in the current aggregate, we might as well reuse it.
    180         // Also, if the aggregation mode is SUSPENDED, we must reuse the same aggregate.
    181         if (contactId == -1
    182                 && currentContactId != 0
    183                 && (currentContactContentsCount == 0
    184                         || aggregationMode == RawContacts.AGGREGATION_MODE_SUSPENDED)) {
    185             contactId = currentContactId;
    186         }
    187 
    188         if (contactId == currentContactId) {
    189             // Aggregation unchanged
    190             markAggregated(db, String.valueOf(rawContactId));
    191             if (VERBOSE_LOGGING) {
    192                 Log.v(TAG, "Aggregation unchanged");
    193             }
    194         } else if (contactId == -1) {
    195             // create new contact for [rawContactId]
    196             createContactForRawContacts(db, txContext, Sets.newHashSet(rawContactId), null);
    197             if (currentContactContentsCount > 0) {
    198                 updateAggregateData(txContext, currentContactId);
    199             }
    200             if (VERBOSE_LOGGING) {
    201                 Log.v(TAG, "create new contact for rid=" + rawContactId);
    202             }
    203         } else if (needReaggregate) {
    204             // re-aggregate
    205             final Set<Long> allRawContactIdSet = new HashSet<Long>();
    206             allRawContactIdSet.addAll(rawContactIdsInSameAccount);
    207             allRawContactIdSet.addAll(rawContactIdsInOtherAccount);
    208             // If there is no other raw contacts aggregated with the given raw contact currently,
    209             // we might as well reuse it.
    210             currentContactId = (currentContactId != 0 && currentContactContentsCount == 0)
    211                     ? currentContactId : 0;
    212             reAggregateRawContacts(txContext, db, contactId, currentContactId, rawContactId,
    213                     allRawContactIdSet);
    214             if (VERBOSE_LOGGING) {
    215                 Log.v(TAG, "Re-aggregating rid=" + rawContactId + " and cid=" + contactId);
    216             }
    217         } else {
    218             // Joining with an existing aggregate
    219             if (currentContactContentsCount == 0) {
    220                 // Delete a previous aggregate if it only contained this raw contact
    221                 ContactsTableUtil.deleteContact(db, currentContactId);
    222 
    223                 mAggregatedPresenceDelete.bindLong(1, currentContactId);
    224                 mAggregatedPresenceDelete.execute();
    225             }
    226 
    227             clearSuperPrimarySetting(db, contactId, rawContactId);
    228             setContactIdAndMarkAggregated(rawContactId, contactId);
    229             computeAggregateData(db, contactId, mContactUpdate);
    230             mContactUpdate.bindLong(ContactReplaceSqlStatement.CONTACT_ID, contactId);
    231             mContactUpdate.execute();
    232             mDbHelper.updateContactVisible(txContext, contactId);
    233             updateAggregatedStatusUpdate(contactId);
    234             // Make sure the raw contact does not contribute to the current contact
    235             if (currentContactId != 0) {
    236                 updateAggregateData(txContext, currentContactId);
    237             }
    238             if (VERBOSE_LOGGING) {
    239                 Log.v(TAG, "Join rid=" + rawContactId + " with cid=" + contactId);
    240             }
    241         }
    242     }
    243 
    244     /**
    245      * Find out which mime-types are shared by raw contact of {@code rawContactId} and raw contacts
    246      * of {@code contactId}. Clear the is_super_primary settings for these mime-types.
    247      */
    248     private void clearSuperPrimarySetting(SQLiteDatabase db, long contactId, long rawContactId) {
    249         final String[] args = {String.valueOf(contactId), String.valueOf(rawContactId)};
    250 
    251         // Find out which mime-types exist with is_super_primary=true on both the raw contact of
    252         // rawContactId and raw contacts of contactId
    253         int index = 0;
    254         final StringBuilder mimeTypeCondition = new StringBuilder();
    255         mimeTypeCondition.append(" AND " + DataColumns.MIMETYPE_ID + " IN (");
    256 
    257         final Cursor c = db.rawQuery(
    258                 "SELECT DISTINCT(a." + DataColumns.MIMETYPE_ID + ")" +
    259                 " FROM (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " +
    260                         Data.IS_SUPER_PRIMARY + " =1 AND " +
    261                         Data.RAW_CONTACT_ID + " IN (SELECT " + RawContacts._ID + " FROM " +
    262                         Tables.RAW_CONTACTS + " WHERE " + RawContacts.CONTACT_ID + "=?1)) AS a" +
    263                 " JOIN  (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " +
    264                         Data.IS_SUPER_PRIMARY + " =1 AND " +
    265                         Data.RAW_CONTACT_ID + "=?2) AS b" +
    266                 " ON a." + DataColumns.MIMETYPE_ID + "=b." + DataColumns.MIMETYPE_ID,
    267                 args);
    268         try {
    269             c.moveToPosition(-1);
    270             while (c.moveToNext()) {
    271                 if (index > 0) {
    272                     mimeTypeCondition.append(',');
    273                 }
    274                 mimeTypeCondition.append(c.getLong((0)));
    275                 index++;
    276             }
    277         } finally {
    278             c.close();
    279         }
    280 
    281         if (index == 0) {
    282             return;
    283         }
    284 
    285         // Clear is_super_primary setting for all the mime-types with is_super_primary=true
    286         // in both raw contact of rawContactId and raw contacts of contactId
    287         String superPrimaryUpdateSql = "UPDATE " + Tables.DATA +
    288                 " SET " + Data.IS_SUPER_PRIMARY + "=0" +
    289                 " WHERE (" +  Data.RAW_CONTACT_ID +
    290                         " IN (SELECT " + RawContacts._ID +  " FROM " + Tables.RAW_CONTACTS +
    291                         " WHERE " + RawContacts.CONTACT_ID + "=?1)" +
    292                         " OR " +  Data.RAW_CONTACT_ID + "=?2)";
    293 
    294         mimeTypeCondition.append(')');
    295         superPrimaryUpdateSql += mimeTypeCondition.toString();
    296         db.execSQL(superPrimaryUpdateSql, args);
    297     }
    298 
    299     /**
    300      * @return JOIN if the raw contact of {@code rawContactId} can be joined into the existing
    301      * contact of {@code contactId}. KEEP_SEPARATE if the raw contact of {@code rawContactId}
    302      * cannot be joined into the existing contact of {@code contactId}. RE_AGGREGATE if raw contact
    303      * of {@code rawContactId} and all the raw contacts of contact of {@code contactId} need to be
    304      * re-aggregated.
    305      *
    306      * If contact of {@code contactId} doesn't contain any raw contacts from the same account as
    307      * raw contact of {@code rawContactId}, join raw contact with contact if there is no identity
    308      * mismatch between them on the same namespace, otherwise, keep them separate.
    309      *
    310      * If contact of {@code contactId} contains raw contacts from the same account as raw contact of
    311      * {@code rawContactId}, join raw contact with contact if there's at least one raw contact in
    312      * those raw contacts that shares at least one email address, phone number, or identity;
    313      * otherwise, re-aggregate raw contact and all the raw contacts of contact.
    314      */
    315     private int canJoinIntoContact(SQLiteDatabase db, long rawContactId,
    316             Set<Long> rawContactIdsInSameAccount, Set<Long> rawContactIdsInOtherAccount ) {
    317 
    318         if (rawContactIdsInSameAccount.isEmpty()) {
    319             final String rid = String.valueOf(rawContactId);
    320             final String ridsInOtherAccts = TextUtils.join(",", rawContactIdsInOtherAccount);
    321             // If there is no identity match between raw contact of [rawContactId] and
    322             // any raw contact in other accounts on the same namespace, and there is at least
    323             // one identity mismatch exist, keep raw contact separate from contact.
    324             if (DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts,
    325                     /* isIdentityMatching =*/ true, /* countOnly =*/ true), null) == 0 &&
    326                     DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts,
    327                             /* isIdentityMatching =*/ false, /* countOnly =*/ true), null) > 0) {
    328                 if (VERBOSE_LOGGING) {
    329                     Log.v(TAG, "canJoinIntoContact: no duplicates, but has no matching identity " +
    330                             "and has mis-matching identity on the same namespace between rid=" +
    331                             rid + " and ridsInOtherAccts=" + ridsInOtherAccts);
    332                 }
    333                 return KEEP_SEPARATE; // has identity and identity doesn't match
    334             } else {
    335                 if (VERBOSE_LOGGING) {
    336                     Log.v(TAG, "canJoinIntoContact: can join the first raw contact from the same " +
    337                             "account without any identity mismatch.");
    338                 }
    339                 return JOIN; // no identity or identity match
    340             }
    341         }
    342         if (VERBOSE_LOGGING) {
    343             Log.v(TAG, "canJoinIntoContact: " + rawContactIdsInSameAccount.size() +
    344                     " duplicate(s) found");
    345         }
    346 
    347 
    348         final Set<Long> rawContactIdSet = new HashSet<Long>();
    349         rawContactIdSet.add(rawContactId);
    350         if (rawContactIdsInSameAccount.size() > 0 &&
    351                 isDataMaching(db, rawContactIdSet, rawContactIdsInSameAccount)) {
    352             if (VERBOSE_LOGGING) {
    353                 Log.v(TAG, "canJoinIntoContact: join if there is a data matching found in the " +
    354                         "same account");
    355             }
    356             return JOIN;
    357         } else {
    358             if (VERBOSE_LOGGING) {
    359                 Log.v(TAG, "canJoinIntoContact: re-aggregate rid=" + rawContactId +
    360                         " with its best matching contact to connected component");
    361             }
    362             return RE_AGGREGATE;
    363         }
    364     }
    365 
    366     /**
    367      * If there's any identity, email address or a phone number matching between two raw contact
    368      * sets.
    369      */
    370     private boolean isDataMaching(SQLiteDatabase db, Set<Long> rawContactIdSet1,
    371             Set<Long> rawContactIdSet2) {
    372         final String rawContactIds1 = TextUtils.join(",", rawContactIdSet1);
    373         final String rawContactIds2 = TextUtils.join(",", rawContactIdSet2);
    374         // First, check for the identity
    375         if (isFirstColumnGreaterThanZero(db, buildIdentityMatchingSql(
    376                 rawContactIds1, rawContactIds2,  /* isIdentityMatching =*/ true,
    377                 /* countOnly =*/true))) {
    378             if (VERBOSE_LOGGING) {
    379                 Log.v(TAG, "canJoinIntoContact: identity match found between " + rawContactIds1 +
    380                         " and " + rawContactIds2);
    381             }
    382             return true;
    383         }
    384 
    385         // Next, check for the email address.
    386         if (isFirstColumnGreaterThanZero(db,
    387                 buildEmailMatchingSql(rawContactIds1, rawContactIds2, true))) {
    388             if (VERBOSE_LOGGING) {
    389                 Log.v(TAG, "canJoinIntoContact: email match found between " + rawContactIds1 +
    390                         " and " + rawContactIds2);
    391             }
    392             return true;
    393         }
    394 
    395         // Lastly, the phone number.
    396         if (isFirstColumnGreaterThanZero(db,
    397                 buildPhoneMatchingSql(rawContactIds1, rawContactIds2, true))) {
    398             if (VERBOSE_LOGGING) {
    399                 Log.v(TAG, "canJoinIntoContact: phone match found between " + rawContactIds1 +
    400                         " and " + rawContactIds2);
    401             }
    402             return true;
    403         }
    404         return false;
    405     }
    406 
    407     /**
    408      * Re-aggregate rawContact of {@code rawContactId} and all the raw contacts of
    409      * {@code existingRawContactIds} into connected components. This only happens when a given
    410      * raw contacts cannot be joined with its best matching contacts directly.
    411      *
    412      *  Two raw contacts are considered connected if they share at least one email address, phone
    413      *  number or identity. Create new contact for each connected component except the very first
    414      *  one that doesn't contain rawContactId of {@code rawContactId}.
    415      */
    416     private void reAggregateRawContacts(TransactionContext txContext, SQLiteDatabase db,
    417             long contactId, long currentContactId, long rawContactId,
    418             Set<Long> existingRawContactIds) {
    419         // Find the connected component based on the aggregation exceptions or
    420         // identity/email/phone matching for all the raw contacts of [contactId] and the give
    421         // raw contact.
    422         final Set<Long> allIds = new HashSet<Long>();
    423         allIds.add(rawContactId);
    424         allIds.addAll(existingRawContactIds);
    425         final Set<Set<Long>> connectedRawContactSets = findConnectedRawContacts(db, allIds);
    426 
    427         if (connectedRawContactSets.size() == 1) {
    428             // If everything is connected, create one contact with [contactId]
    429             createContactForRawContacts(db, txContext, connectedRawContactSets.iterator().next(),
    430                     contactId);
    431         } else {
    432             for (Set<Long> connectedRawContactIds : connectedRawContactSets) {
    433                 if (connectedRawContactIds.contains(rawContactId)) {
    434                     // crate contact for connect component containing [rawContactId], reuse
    435                     // [currentContactId] if possible.
    436                     createContactForRawContacts(db, txContext, connectedRawContactIds,
    437                             currentContactId == 0 ? null : currentContactId);
    438                     connectedRawContactSets.remove(connectedRawContactIds);
    439                     break;
    440                 }
    441             }
    442             // Create new contact for each connected component except the last one. The last one
    443             // will reuse [contactId]. Only the last one can reuse [contactId] when all other raw
    444             // contacts has already been assigned new contact Id, so that the contact aggregation
    445             // stats could be updated correctly.
    446             int index = connectedRawContactSets.size();
    447             for (Set<Long> connectedRawContactIds : connectedRawContactSets) {
    448                 if (index > 1) {
    449                     createContactForRawContacts(db, txContext, connectedRawContactIds, null);
    450                     index--;
    451                 } else {
    452                     createContactForRawContacts(db, txContext, connectedRawContactIds, contactId);
    453                 }
    454             }
    455         }
    456     }
    457 
    458     /**
    459      * Ensures that automatic aggregation rules are followed after a contact
    460      * becomes visible or invisible. Specifically, consider this case: there are
    461      * three contacts named Foo. Two of them come from account A1 and one comes
    462      * from account A2. The aggregation rules say that in this case none of the
    463      * three Foo's should be aggregated: two of them are in the same account, so
    464      * they don't get aggregated; the third has two affinities, so it does not
    465      * join either of them.
    466      * <p>
    467      * Consider what happens if one of the "Foo"s from account A1 becomes
    468      * invisible. Nothing stands in the way of aggregating the other two
    469      * anymore, so they should get joined.
    470      * <p>
    471      * What if the invisible "Foo" becomes visible after that? We should split the
    472      * aggregate between the other two.
    473      */
    474     public void updateAggregationAfterVisibilityChange(long contactId) {
    475         SQLiteDatabase db = mDbHelper.getWritableDatabase();
    476         boolean visible = mDbHelper.isContactInDefaultDirectory(db, contactId);
    477         if (visible) {
    478             markContactForAggregation(db, contactId);
    479         } else {
    480             // Find all contacts that _could be_ aggregated with this one and
    481             // rerun aggregation for all of them
    482             mSelectionArgs1[0] = String.valueOf(contactId);
    483             Cursor cursor = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS,
    484                     RawContactIdQuery.SELECTION, mSelectionArgs1, null, null, null);
    485             try {
    486                 while (cursor.moveToNext()) {
    487                     long rawContactId = cursor.getLong(RawContactIdQuery.RAW_CONTACT_ID);
    488                     mMatcher.clear();
    489 
    490                     updateMatchScoresBasedOnIdentityMatch(db, rawContactId, mMatcher);
    491                     updateMatchScoresBasedOnNameMatches(db, rawContactId, mMatcher);
    492                     List<MatchScore> bestMatches =
    493                             mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_PRIMARY);
    494                     for (MatchScore matchScore : bestMatches) {
    495                         markContactForAggregation(db, matchScore.getContactId());
    496                     }
    497 
    498                     mMatcher.clear();
    499                     updateMatchScoresBasedOnEmailMatches(db, rawContactId, mMatcher);
    500                     updateMatchScoresBasedOnPhoneMatches(db, rawContactId, mMatcher);
    501                     bestMatches =
    502                             mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SECONDARY);
    503                     for (MatchScore matchScore : bestMatches) {
    504                         markContactForAggregation(db, matchScore.getContactId());
    505                     }
    506                 }
    507             } finally {
    508                 cursor.close();
    509             }
    510         }
    511     }
    512 
    513     /**
    514      * Updates the contact ID for the specified contact and marks the raw contact as aggregated.
    515      */
    516     private void setContactIdAndMarkAggregated(long rawContactId, long contactId) {
    517         mContactIdAndMarkAggregatedUpdate.bindLong(1, contactId);
    518         mContactIdAndMarkAggregatedUpdate.bindLong(2, rawContactId);
    519         mContactIdAndMarkAggregatedUpdate.execute();
    520     }
    521 
    522     interface AggregateExceptionQuery {
    523         String TABLE = Tables.AGGREGATION_EXCEPTIONS
    524             + " JOIN raw_contacts raw_contacts1 "
    525                     + " ON (agg_exceptions.raw_contact_id1 = raw_contacts1._id) "
    526             + " JOIN raw_contacts raw_contacts2 "
    527                     + " ON (agg_exceptions.raw_contact_id2 = raw_contacts2._id) ";
    528 
    529         String[] COLUMNS = {
    530             AggregationExceptions.TYPE,
    531             AggregationExceptions.RAW_CONTACT_ID1,
    532             "raw_contacts1." + RawContacts.CONTACT_ID,
    533             "raw_contacts1." + RawContactsColumns.AGGREGATION_NEEDED,
    534             "raw_contacts2." + RawContacts.CONTACT_ID,
    535             "raw_contacts2." + RawContactsColumns.AGGREGATION_NEEDED,
    536         };
    537 
    538         int TYPE = 0;
    539         int RAW_CONTACT_ID1 = 1;
    540         int CONTACT_ID1 = 2;
    541         int AGGREGATION_NEEDED_1 = 3;
    542         int CONTACT_ID2 = 4;
    543         int AGGREGATION_NEEDED_2 = 5;
    544     }
    545 
    546     /**
    547      * Computes match scores based on exceptions entered by the user: always match and never match.
    548      * Returns the aggregate contact with the always match exception if any.
    549      */
    550     private long pickBestMatchBasedOnExceptions(SQLiteDatabase db, long rawContactId,
    551             ContactMatcher matcher) {
    552         if (!mAggregationExceptionIdsValid) {
    553             prefetchAggregationExceptionIds(db);
    554         }
    555 
    556         // If there are no aggregation exceptions involving this raw contact, there is no need to
    557         // run a query and we can just return -1, which stands for "nothing found"
    558         if (!mAggregationExceptionIds.contains(rawContactId)) {
    559             return -1;
    560         }
    561 
    562         final Cursor c = db.query(AggregateExceptionQuery.TABLE,
    563                 AggregateExceptionQuery.COLUMNS,
    564                 AggregationExceptions.RAW_CONTACT_ID1 + "=" + rawContactId
    565                         + " OR " + AggregationExceptions.RAW_CONTACT_ID2 + "=" + rawContactId,
    566                 null, null, null, null);
    567 
    568         try {
    569             while (c.moveToNext()) {
    570                 int type = c.getInt(AggregateExceptionQuery.TYPE);
    571                 long rawContactId1 = c.getLong(AggregateExceptionQuery.RAW_CONTACT_ID1);
    572                 long contactId = -1;
    573                 if (rawContactId == rawContactId1) {
    574                     if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_2) == 0
    575                             && !c.isNull(AggregateExceptionQuery.CONTACT_ID2)) {
    576                         contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID2);
    577                     }
    578                 } else {
    579                     if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_1) == 0
    580                             && !c.isNull(AggregateExceptionQuery.CONTACT_ID1)) {
    581                         contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID1);
    582                     }
    583                 }
    584                 if (contactId != -1) {
    585                     if (type == AggregationExceptions.TYPE_KEEP_TOGETHER) {
    586                         matcher.keepIn(contactId);
    587                     } else {
    588                         matcher.keepOut(contactId);
    589                     }
    590                 }
    591             }
    592         } finally {
    593             c.close();
    594         }
    595 
    596         return matcher.pickBestMatch(MatchScore.MAX_SCORE, true);
    597     }
    598 
    599     /**
    600      * Picks the best matching contact based on matches between data elements.  It considers
    601      * name match to be primary and phone, email etc matches to be secondary.  A good primary
    602      * match triggers aggregation, while a good secondary match only triggers aggregation in
    603      * the absence of a strong primary mismatch.
    604      * <p>
    605      * Consider these examples:
    606      * <p>
    607      * John Doe with phone number 111-111-1111 and Jon Doe with phone number 111-111-1111 should
    608      * be aggregated (same number, similar names).
    609      * <p>
    610      * John Doe with phone number 111-111-1111 and Deborah Doe with phone number 111-111-1111 should
    611      * not be aggregated (same number, different names).
    612      */
    613     private long pickBestMatchBasedOnData(SQLiteDatabase db, long rawContactId,
    614             MatchCandidateList candidates, ContactMatcher matcher) {
    615 
    616         // Find good matches based on name alone
    617         long bestMatch = updateMatchScoresBasedOnDataMatches(db, rawContactId, matcher);
    618         if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) {
    619             // We found multiple matches on the name - do not aggregate because of the ambiguity
    620             return -1;
    621         } else if (bestMatch == -1) {
    622             // We haven't found a good match on name, see if we have any matches on phone, email etc
    623             bestMatch = pickBestMatchBasedOnSecondaryData(db, rawContactId, candidates, matcher);
    624             if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) {
    625                 return -1;
    626             }
    627         }
    628 
    629         return bestMatch;
    630     }
    631 
    632 
    633     /**
    634      * Picks the best matching contact based on secondary data matches.  The method loads
    635      * structured names for all candidate contacts and recomputes match scores using approximate
    636      * matching.
    637      */
    638     private long pickBestMatchBasedOnSecondaryData(SQLiteDatabase db,
    639             long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) {
    640         List<Long> secondaryContactIds = matcher.prepareSecondaryMatchCandidates(
    641                 ContactMatcher.SCORE_THRESHOLD_PRIMARY);
    642         if (secondaryContactIds == null || secondaryContactIds.size() > SECONDARY_HIT_LIMIT) {
    643             return -1;
    644         }
    645 
    646         loadNameMatchCandidates(db, rawContactId, candidates, true);
    647 
    648         mSb.setLength(0);
    649         mSb.append(RawContacts.CONTACT_ID).append(" IN (");
    650         for (int i = 0; i < secondaryContactIds.size(); i++) {
    651             if (i != 0) {
    652                 mSb.append(',');
    653             }
    654             mSb.append(secondaryContactIds.get(i));
    655         }
    656 
    657         // We only want to compare structured names to structured names
    658         // at this stage, we need to ignore all other sources of name lookup data.
    659         mSb.append(") AND " + STRUCTURED_NAME_BASED_LOOKUP_SQL);
    660 
    661         matchAllCandidates(db, mSb.toString(), candidates, matcher,
    662                 ContactMatcher.MATCHING_ALGORITHM_CONSERVATIVE, null);
    663 
    664         return matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_SECONDARY, false);
    665     }
    666 
    667     /**
    668      * Computes scores for contacts that have matching data rows.
    669      */
    670     private long updateMatchScoresBasedOnDataMatches(SQLiteDatabase db, long rawContactId,
    671             ContactMatcher matcher) {
    672 
    673         updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher);
    674         updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher);
    675         long bestMatch = matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_PRIMARY, false);
    676         if (bestMatch != -1) {
    677             return bestMatch;
    678         }
    679 
    680         updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher);
    681         updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher);
    682 
    683         return -1;
    684     }
    685 
    686     private interface IdentityLookupMatchQuery {
    687         final String TABLE = Tables.DATA + " dataA"
    688                 + " JOIN " + Tables.DATA + " dataB" +
    689                 " ON (dataA." + Identity.NAMESPACE + "=dataB." + Identity.NAMESPACE +
    690                 " AND dataA." + Identity.IDENTITY + "=dataB." + Identity.IDENTITY + ")"
    691                 + " JOIN " + Tables.RAW_CONTACTS +
    692                 " ON (dataB." + Data.RAW_CONTACT_ID + " = "
    693                 + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")";
    694 
    695         final String SELECTION = "dataA." + Data.RAW_CONTACT_ID + "=?1"
    696                 + " AND dataA." + DataColumns.MIMETYPE_ID + "=?2"
    697                 + " AND dataA." + Identity.NAMESPACE + " NOT NULL"
    698                 + " AND dataA." + Identity.IDENTITY + " NOT NULL"
    699                 + " AND dataB." + DataColumns.MIMETYPE_ID + "=?2"
    700                 + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0"
    701                 + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
    702 
    703         final String[] COLUMNS = new String[] {
    704             RawContacts.CONTACT_ID
    705         };
    706 
    707         int CONTACT_ID = 0;
    708     }
    709 
    710     /**
    711      * Finds contacts with exact identity matches to the the specified raw contact.
    712      */
    713     private void updateMatchScoresBasedOnIdentityMatch(SQLiteDatabase db, long rawContactId,
    714             ContactMatcher matcher) {
    715         mSelectionArgs2[0] = String.valueOf(rawContactId);
    716         mSelectionArgs2[1] = String.valueOf(mMimeTypeIdIdentity);
    717         Cursor c = db.query(IdentityLookupMatchQuery.TABLE, IdentityLookupMatchQuery.COLUMNS,
    718                 IdentityLookupMatchQuery.SELECTION,
    719                 mSelectionArgs2, RawContacts.CONTACT_ID, null, null);
    720         try {
    721             while (c.moveToNext()) {
    722                 final long contactId = c.getLong(IdentityLookupMatchQuery.CONTACT_ID);
    723                 matcher.matchIdentity(contactId);
    724             }
    725         } finally {
    726             c.close();
    727         }
    728 
    729     }
    730 
    731     private interface NameLookupMatchQuery {
    732         String TABLE = Tables.NAME_LOOKUP + " nameA"
    733                 + " JOIN " + Tables.NAME_LOOKUP + " nameB" +
    734                 " ON (" + "nameA." + NameLookupColumns.NORMALIZED_NAME + "="
    735                         + "nameB." + NameLookupColumns.NORMALIZED_NAME + ")"
    736                 + " JOIN " + Tables.RAW_CONTACTS +
    737                 " ON (nameB." + NameLookupColumns.RAW_CONTACT_ID + " = "
    738                         + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")";
    739 
    740         String SELECTION = "nameA." + NameLookupColumns.RAW_CONTACT_ID + "=?"
    741                 + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0"
    742                 + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
    743 
    744         String[] COLUMNS = new String[] {
    745             RawContacts.CONTACT_ID,
    746             "nameA." + NameLookupColumns.NORMALIZED_NAME,
    747             "nameA." + NameLookupColumns.NAME_TYPE,
    748             "nameB." + NameLookupColumns.NAME_TYPE,
    749         };
    750 
    751         int CONTACT_ID = 0;
    752         int NAME = 1;
    753         int NAME_TYPE_A = 2;
    754         int NAME_TYPE_B = 3;
    755     }
    756 
    757     /**
    758      * Finds contacts with names matching the name of the specified raw contact.
    759      */
    760     private void updateMatchScoresBasedOnNameMatches(SQLiteDatabase db, long rawContactId,
    761             ContactMatcher matcher) {
    762         mSelectionArgs1[0] = String.valueOf(rawContactId);
    763         Cursor c = db.query(NameLookupMatchQuery.TABLE, NameLookupMatchQuery.COLUMNS,
    764                 NameLookupMatchQuery.SELECTION,
    765                 mSelectionArgs1, null, null, null, PRIMARY_HIT_LIMIT_STRING);
    766         try {
    767             while (c.moveToNext()) {
    768                 long contactId = c.getLong(NameLookupMatchQuery.CONTACT_ID);
    769                 String name = c.getString(NameLookupMatchQuery.NAME);
    770                 int nameTypeA = c.getInt(NameLookupMatchQuery.NAME_TYPE_A);
    771                 int nameTypeB = c.getInt(NameLookupMatchQuery.NAME_TYPE_B);
    772                 matcher.matchName(contactId, nameTypeA, name,
    773                         nameTypeB, name, ContactMatcher.MATCHING_ALGORITHM_EXACT);
    774                 if (nameTypeA == NameLookupType.NICKNAME &&
    775                         nameTypeB == NameLookupType.NICKNAME) {
    776                     matcher.updateScoreWithNicknameMatch(contactId);
    777                 }
    778             }
    779         } finally {
    780             c.close();
    781         }
    782     }
    783 
    784     private void updateMatchScoresBasedOnEmailMatches(SQLiteDatabase db, long rawContactId,
    785             ContactMatcher matcher) {
    786         mSelectionArgs2[0] = String.valueOf(rawContactId);
    787         mSelectionArgs2[1] = String.valueOf(mMimeTypeIdEmail);
    788         Cursor c = db.query(EmailLookupQuery.TABLE, EmailLookupQuery.COLUMNS,
    789                 EmailLookupQuery.SELECTION,
    790                 mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING);
    791         try {
    792             while (c.moveToNext()) {
    793                 long contactId = c.getLong(EmailLookupQuery.CONTACT_ID);
    794                 matcher.updateScoreWithEmailMatch(contactId);
    795             }
    796         } finally {
    797             c.close();
    798         }
    799     }
    800 
    801     private void updateMatchScoresBasedOnPhoneMatches(SQLiteDatabase db, long rawContactId,
    802             ContactMatcher matcher) {
    803         mSelectionArgs2[0] = String.valueOf(rawContactId);
    804         mSelectionArgs2[1] = mDbHelper.getUseStrictPhoneNumberComparisonParameter();
    805         Cursor c = db.query(PhoneLookupQuery.TABLE, PhoneLookupQuery.COLUMNS,
    806                 PhoneLookupQuery.SELECTION,
    807                 mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING);
    808         try {
    809             while (c.moveToNext()) {
    810                 long contactId = c.getLong(PhoneLookupQuery.CONTACT_ID);
    811                 matcher.updateScoreWithPhoneNumberMatch(contactId);
    812             }
    813         } finally {
    814             c.close();
    815         }
    816     }
    817 
    818     /**
    819      * Loads name lookup rows for approximate name matching and updates match scores based on that
    820      * data.
    821      */
    822     private void lookupApproximateNameMatches(SQLiteDatabase db, MatchCandidateList candidates,
    823             ContactMatcher matcher) {
    824         HashSet<String> firstLetters = new HashSet<String>();
    825         for (int i = 0; i < candidates.mCount; i++) {
    826             final NameMatchCandidate candidate = candidates.mList.get(i);
    827             if (candidate.mName.length() >= 2) {
    828                 String firstLetter = candidate.mName.substring(0, 2);
    829                 if (!firstLetters.contains(firstLetter)) {
    830                     firstLetters.add(firstLetter);
    831                     final String selection = "(" + NameLookupColumns.NORMALIZED_NAME + " GLOB '"
    832                             + firstLetter + "*') AND "
    833                             + "(" + NameLookupColumns.NAME_TYPE + " IN("
    834                                     + NameLookupType.NAME_COLLATION_KEY + ","
    835                                     + NameLookupType.EMAIL_BASED_NICKNAME + ","
    836                                     + NameLookupType.NICKNAME + ")) AND "
    837                             + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
    838                     matchAllCandidates(db, selection, candidates, matcher,
    839                             ContactMatcher.MATCHING_ALGORITHM_APPROXIMATE,
    840                             String.valueOf(FIRST_LETTER_SUGGESTION_HIT_LIMIT));
    841                 }
    842             }
    843         }
    844     }
    845 
    846     private interface ContactNameLookupQuery {
    847         String TABLE = Tables.NAME_LOOKUP_JOIN_RAW_CONTACTS;
    848 
    849         String[] COLUMNS = new String[] {
    850                 RawContacts.CONTACT_ID,
    851                 NameLookupColumns.NORMALIZED_NAME,
    852                 NameLookupColumns.NAME_TYPE
    853         };
    854 
    855         int CONTACT_ID = 0;
    856         int NORMALIZED_NAME = 1;
    857         int NAME_TYPE = 2;
    858     }
    859 
    860     /**
    861      * Loads all candidate rows from the name lookup table and updates match scores based
    862      * on that data.
    863      */
    864     private void matchAllCandidates(SQLiteDatabase db, String selection,
    865             MatchCandidateList candidates, ContactMatcher matcher, int algorithm, String limit) {
    866         final Cursor c = db.query(ContactNameLookupQuery.TABLE, ContactNameLookupQuery.COLUMNS,
    867                 selection, null, null, null, null, limit);
    868 
    869         try {
    870             while (c.moveToNext()) {
    871                 Long contactId = c.getLong(ContactNameLookupQuery.CONTACT_ID);
    872                 String name = c.getString(ContactNameLookupQuery.NORMALIZED_NAME);
    873                 int nameType = c.getInt(ContactNameLookupQuery.NAME_TYPE);
    874 
    875                 // Note the N^2 complexity of the following fragment. This is not a huge concern
    876                 // since the number of candidates is very small and in general secondary hits
    877                 // in the absence of primary hits are rare.
    878                 for (int i = 0; i < candidates.mCount; i++) {
    879                     NameMatchCandidate candidate = candidates.mList.get(i);
    880                     matcher.matchName(contactId, candidate.mLookupType, candidate.mName,
    881                             nameType, name, algorithm);
    882                 }
    883             }
    884         } finally {
    885             c.close();
    886         }
    887     }
    888 
    889     /**
    890      * Finds contacts with data matches and returns a list of {@link MatchScore}'s in the
    891      * descending order of match score.
    892      * @param parameters
    893      */
    894      protected List<MatchScore> findMatchingContacts(final SQLiteDatabase db, long contactId,
    895             ArrayList<AggregationSuggestionParameter> parameters) {
    896 
    897         MatchCandidateList candidates = new MatchCandidateList();
    898         ContactMatcher matcher = new ContactMatcher();
    899 
    900         // Don't aggregate a contact with itself
    901         matcher.keepOut(contactId);
    902 
    903         if (parameters == null || parameters.size() == 0) {
    904             final Cursor c = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS,
    905                     RawContacts.CONTACT_ID + "=" + contactId, null, null, null, null);
    906             try {
    907                 while (c.moveToNext()) {
    908                     long rawContactId = c.getLong(RawContactIdQuery.RAW_CONTACT_ID);
    909                     updateMatchScoresForSuggestionsBasedOnDataMatches(db, rawContactId, candidates,
    910                             matcher);
    911                 }
    912             } finally {
    913                 c.close();
    914             }
    915         } else {
    916             updateMatchScoresForSuggestionsBasedOnDataMatches(db, candidates,
    917                     matcher, parameters);
    918         }
    919 
    920         return matcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SUGGEST);
    921     }
    922 
    923     /**
    924      * Computes scores for contacts that have matching data rows.
    925      */
    926     private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db,
    927             long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) {
    928 
    929         updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher);
    930         updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher);
    931         updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher);
    932         updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher);
    933         loadNameMatchCandidates(db, rawContactId, candidates, false);
    934         lookupApproximateNameMatches(db, candidates, matcher);
    935     }
    936 
    937     private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db,
    938             MatchCandidateList candidates, ContactMatcher matcher,
    939             ArrayList<AggregationSuggestionParameter> parameters) {
    940         for (AggregationSuggestionParameter parameter : parameters) {
    941             if (AggregationSuggestions.PARAMETER_MATCH_NAME.equals(parameter.kind)) {
    942                 updateMatchScoresBasedOnNameMatches(db, parameter.value, candidates, matcher);
    943             }
    944 
    945             // TODO: add support for other parameter kinds
    946         }
    947     }
    948 }
    949