1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts.aggregation; 18 19 import android.database.Cursor; 20 import android.database.DatabaseUtils; 21 import android.database.sqlite.SQLiteDatabase; 22 import android.provider.ContactsContract.AggregationExceptions; 23 import android.provider.ContactsContract.CommonDataKinds.Identity; 24 import android.provider.ContactsContract.Contacts.AggregationSuggestions; 25 import android.provider.ContactsContract.Data; 26 import android.provider.ContactsContract.RawContacts; 27 import android.text.TextUtils; 28 import android.util.Log; 29 import com.android.providers.contacts.ContactsDatabaseHelper; 30 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 31 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupColumns; 32 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType; 33 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; 34 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 35 import com.android.providers.contacts.ContactsProvider2; 36 import com.android.providers.contacts.NameSplitter; 37 import com.android.providers.contacts.PhotoPriorityResolver; 38 import com.android.providers.contacts.TransactionContext; 39 import com.android.providers.contacts.aggregation.util.CommonNicknameCache; 40 import com.android.providers.contacts.aggregation.util.ContactMatcher; 41 import com.android.providers.contacts.aggregation.util.MatchScore; 42 import com.android.providers.contacts.database.ContactsTableUtil; 43 import com.google.android.collect.Sets; 44 45 import java.util.ArrayList; 46 import java.util.HashSet; 47 import java.util.List; 48 import java.util.Set; 49 50 /** 51 * ContactAggregator deals with aggregating contact information coming from different sources. 52 * Two John Doe contacts from two disjoint sources are presumed to be the same 53 * person unless the user declares otherwise. 54 */ 55 public class ContactAggregator extends AbstractContactAggregator { 56 57 // Return code for the canJoinIntoContact method. 58 private static final int JOIN = 1; 59 private static final int KEEP_SEPARATE = 0; 60 private static final int RE_AGGREGATE = -1; 61 62 private final ContactMatcher mMatcher = new ContactMatcher(); 63 64 /** 65 * Constructor. 66 */ 67 public ContactAggregator(ContactsProvider2 contactsProvider, 68 ContactsDatabaseHelper contactsDatabaseHelper, 69 PhotoPriorityResolver photoPriorityResolver, NameSplitter nameSplitter, 70 CommonNicknameCache commonNicknameCache) { 71 super(contactsProvider, contactsDatabaseHelper, photoPriorityResolver, nameSplitter, 72 commonNicknameCache); 73 } 74 75 /** 76 * Given a specific raw contact, finds all matching aggregate contacts and chooses the one 77 * with the highest match score. If no such contact is found, creates a new contact. 78 */ 79 synchronized void aggregateContact(TransactionContext txContext, SQLiteDatabase db, 80 long rawContactId, long accountId, long currentContactId, 81 MatchCandidateList candidates) { 82 83 if (VERBOSE_LOGGING) { 84 Log.v(TAG, "aggregateContact: rid=" + rawContactId + " cid=" + currentContactId); 85 } 86 87 int aggregationMode = RawContacts.AGGREGATION_MODE_DEFAULT; 88 89 Integer aggModeObject = mRawContactsMarkedForAggregation.remove(rawContactId); 90 if (aggModeObject != null) { 91 aggregationMode = aggModeObject; 92 } 93 94 long contactId = -1; // Best matching contact ID. 95 boolean needReaggregate = false; 96 97 final ContactMatcher matcher = new ContactMatcher(); 98 final Set<Long> rawContactIdsInSameAccount = new HashSet<Long>(); 99 final Set<Long> rawContactIdsInOtherAccount = new HashSet<Long>(); 100 if (aggregationMode == RawContacts.AGGREGATION_MODE_DEFAULT) { 101 candidates.clear(); 102 matcher.clear(); 103 104 contactId = pickBestMatchBasedOnExceptions(db, rawContactId, matcher); 105 if (contactId == -1) { 106 107 // If this is a newly inserted contact or a visible contact, look for 108 // data matches. 109 if (currentContactId == 0 110 || mDbHelper.isContactInDefaultDirectory(db, currentContactId)) { 111 contactId = pickBestMatchBasedOnData(db, rawContactId, candidates, matcher); 112 } 113 114 // If we found an best matched contact, find out if the raw contact can be joined 115 // into it 116 if (contactId != -1 && contactId != currentContactId) { 117 // List all raw contact ID and their account ID mappings in contact 118 // [contactId] excluding raw_contact [rawContactId]. 119 120 // Based on the mapping, create two sets of raw contact IDs in 121 // [rawContactAccountId] and not in [rawContactAccountId]. We don't always 122 // need them, so lazily initialize them. 123 mSelectionArgs2[0] = String.valueOf(contactId); 124 mSelectionArgs2[1] = String.valueOf(rawContactId); 125 final Cursor rawContactsToAccountsCursor = db.rawQuery( 126 "SELECT " + RawContacts._ID + ", " + RawContactsColumns.ACCOUNT_ID + 127 " FROM " + Tables.RAW_CONTACTS + 128 " WHERE " + RawContacts.CONTACT_ID + "=?" + 129 " AND " + RawContacts._ID + "!=?", 130 mSelectionArgs2); 131 try { 132 rawContactsToAccountsCursor.moveToPosition(-1); 133 while (rawContactsToAccountsCursor.moveToNext()) { 134 final long rcId = rawContactsToAccountsCursor.getLong(0); 135 final long rc_accountId = rawContactsToAccountsCursor.getLong(1); 136 if (rc_accountId == accountId) { 137 rawContactIdsInSameAccount.add(rcId); 138 } else { 139 rawContactIdsInOtherAccount.add(rcId); 140 } 141 } 142 } finally { 143 rawContactsToAccountsCursor.close(); 144 } 145 final int actionCode; 146 final int totalNumOfRawContactsInCandidate = rawContactIdsInSameAccount.size() 147 + rawContactIdsInOtherAccount.size(); 148 if (totalNumOfRawContactsInCandidate >= AGGREGATION_CONTACT_SIZE_LIMIT) { 149 if (VERBOSE_LOGGING) { 150 Log.v(TAG, "Too many raw contacts (" + totalNumOfRawContactsInCandidate 151 + ") in the best matching contact, so skip aggregation"); 152 } 153 actionCode = KEEP_SEPARATE; 154 } else { 155 actionCode = canJoinIntoContact(db, rawContactId, 156 rawContactIdsInSameAccount, rawContactIdsInOtherAccount); 157 } 158 if (actionCode == KEEP_SEPARATE) { 159 contactId = -1; 160 } else if (actionCode == RE_AGGREGATE) { 161 needReaggregate = true; 162 } 163 } 164 } 165 } else if (aggregationMode == RawContacts.AGGREGATION_MODE_DISABLED) { 166 return; 167 } 168 169 // # of raw_contacts in the [currentContactId] contact excluding the [rawContactId] 170 // raw_contact. 171 long currentContactContentsCount = 0; 172 173 if (currentContactId != 0) { 174 mRawContactCountQuery.bindLong(1, currentContactId); 175 mRawContactCountQuery.bindLong(2, rawContactId); 176 currentContactContentsCount = mRawContactCountQuery.simpleQueryForLong(); 177 } 178 179 // If there are no other raw contacts in the current aggregate, we might as well reuse it. 180 // Also, if the aggregation mode is SUSPENDED, we must reuse the same aggregate. 181 if (contactId == -1 182 && currentContactId != 0 183 && (currentContactContentsCount == 0 184 || aggregationMode == RawContacts.AGGREGATION_MODE_SUSPENDED)) { 185 contactId = currentContactId; 186 } 187 188 if (contactId == currentContactId) { 189 // Aggregation unchanged 190 markAggregated(db, String.valueOf(rawContactId)); 191 if (VERBOSE_LOGGING) { 192 Log.v(TAG, "Aggregation unchanged"); 193 } 194 } else if (contactId == -1) { 195 // create new contact for [rawContactId] 196 createContactForRawContacts(db, txContext, Sets.newHashSet(rawContactId), null); 197 if (currentContactContentsCount > 0) { 198 updateAggregateData(txContext, currentContactId); 199 } 200 if (VERBOSE_LOGGING) { 201 Log.v(TAG, "create new contact for rid=" + rawContactId); 202 } 203 } else if (needReaggregate) { 204 // re-aggregate 205 final Set<Long> allRawContactIdSet = new HashSet<Long>(); 206 allRawContactIdSet.addAll(rawContactIdsInSameAccount); 207 allRawContactIdSet.addAll(rawContactIdsInOtherAccount); 208 // If there is no other raw contacts aggregated with the given raw contact currently, 209 // we might as well reuse it. 210 currentContactId = (currentContactId != 0 && currentContactContentsCount == 0) 211 ? currentContactId : 0; 212 reAggregateRawContacts(txContext, db, contactId, currentContactId, rawContactId, 213 allRawContactIdSet); 214 if (VERBOSE_LOGGING) { 215 Log.v(TAG, "Re-aggregating rid=" + rawContactId + " and cid=" + contactId); 216 } 217 } else { 218 // Joining with an existing aggregate 219 if (currentContactContentsCount == 0) { 220 // Delete a previous aggregate if it only contained this raw contact 221 ContactsTableUtil.deleteContact(db, currentContactId); 222 223 mAggregatedPresenceDelete.bindLong(1, currentContactId); 224 mAggregatedPresenceDelete.execute(); 225 } 226 227 clearSuperPrimarySetting(db, contactId, rawContactId); 228 setContactIdAndMarkAggregated(rawContactId, contactId); 229 computeAggregateData(db, contactId, mContactUpdate); 230 mContactUpdate.bindLong(ContactReplaceSqlStatement.CONTACT_ID, contactId); 231 mContactUpdate.execute(); 232 mDbHelper.updateContactVisible(txContext, contactId); 233 updateAggregatedStatusUpdate(contactId); 234 // Make sure the raw contact does not contribute to the current contact 235 if (currentContactId != 0) { 236 updateAggregateData(txContext, currentContactId); 237 } 238 if (VERBOSE_LOGGING) { 239 Log.v(TAG, "Join rid=" + rawContactId + " with cid=" + contactId); 240 } 241 } 242 } 243 244 /** 245 * Find out which mime-types are shared by raw contact of {@code rawContactId} and raw contacts 246 * of {@code contactId}. Clear the is_super_primary settings for these mime-types. 247 */ 248 private void clearSuperPrimarySetting(SQLiteDatabase db, long contactId, long rawContactId) { 249 final String[] args = {String.valueOf(contactId), String.valueOf(rawContactId)}; 250 251 // Find out which mime-types exist with is_super_primary=true on both the raw contact of 252 // rawContactId and raw contacts of contactId 253 int index = 0; 254 final StringBuilder mimeTypeCondition = new StringBuilder(); 255 mimeTypeCondition.append(" AND " + DataColumns.MIMETYPE_ID + " IN ("); 256 257 final Cursor c = db.rawQuery( 258 "SELECT DISTINCT(a." + DataColumns.MIMETYPE_ID + ")" + 259 " FROM (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " + 260 Data.IS_SUPER_PRIMARY + " =1 AND " + 261 Data.RAW_CONTACT_ID + " IN (SELECT " + RawContacts._ID + " FROM " + 262 Tables.RAW_CONTACTS + " WHERE " + RawContacts.CONTACT_ID + "=?1)) AS a" + 263 " JOIN (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " + 264 Data.IS_SUPER_PRIMARY + " =1 AND " + 265 Data.RAW_CONTACT_ID + "=?2) AS b" + 266 " ON a." + DataColumns.MIMETYPE_ID + "=b." + DataColumns.MIMETYPE_ID, 267 args); 268 try { 269 c.moveToPosition(-1); 270 while (c.moveToNext()) { 271 if (index > 0) { 272 mimeTypeCondition.append(','); 273 } 274 mimeTypeCondition.append(c.getLong((0))); 275 index++; 276 } 277 } finally { 278 c.close(); 279 } 280 281 if (index == 0) { 282 return; 283 } 284 285 // Clear is_super_primary setting for all the mime-types with is_super_primary=true 286 // in both raw contact of rawContactId and raw contacts of contactId 287 String superPrimaryUpdateSql = "UPDATE " + Tables.DATA + 288 " SET " + Data.IS_SUPER_PRIMARY + "=0" + 289 " WHERE (" + Data.RAW_CONTACT_ID + 290 " IN (SELECT " + RawContacts._ID + " FROM " + Tables.RAW_CONTACTS + 291 " WHERE " + RawContacts.CONTACT_ID + "=?1)" + 292 " OR " + Data.RAW_CONTACT_ID + "=?2)"; 293 294 mimeTypeCondition.append(')'); 295 superPrimaryUpdateSql += mimeTypeCondition.toString(); 296 db.execSQL(superPrimaryUpdateSql, args); 297 } 298 299 /** 300 * @return JOIN if the raw contact of {@code rawContactId} can be joined into the existing 301 * contact of {@code contactId}. KEEP_SEPARATE if the raw contact of {@code rawContactId} 302 * cannot be joined into the existing contact of {@code contactId}. RE_AGGREGATE if raw contact 303 * of {@code rawContactId} and all the raw contacts of contact of {@code contactId} need to be 304 * re-aggregated. 305 * 306 * If contact of {@code contactId} doesn't contain any raw contacts from the same account as 307 * raw contact of {@code rawContactId}, join raw contact with contact if there is no identity 308 * mismatch between them on the same namespace, otherwise, keep them separate. 309 * 310 * If contact of {@code contactId} contains raw contacts from the same account as raw contact of 311 * {@code rawContactId}, join raw contact with contact if there's at least one raw contact in 312 * those raw contacts that shares at least one email address, phone number, or identity; 313 * otherwise, re-aggregate raw contact and all the raw contacts of contact. 314 */ 315 private int canJoinIntoContact(SQLiteDatabase db, long rawContactId, 316 Set<Long> rawContactIdsInSameAccount, Set<Long> rawContactIdsInOtherAccount ) { 317 318 if (rawContactIdsInSameAccount.isEmpty()) { 319 final String rid = String.valueOf(rawContactId); 320 final String ridsInOtherAccts = TextUtils.join(",", rawContactIdsInOtherAccount); 321 // If there is no identity match between raw contact of [rawContactId] and 322 // any raw contact in other accounts on the same namespace, and there is at least 323 // one identity mismatch exist, keep raw contact separate from contact. 324 if (DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts, 325 /* isIdentityMatching =*/ true, /* countOnly =*/ true), null) == 0 && 326 DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts, 327 /* isIdentityMatching =*/ false, /* countOnly =*/ true), null) > 0) { 328 if (VERBOSE_LOGGING) { 329 Log.v(TAG, "canJoinIntoContact: no duplicates, but has no matching identity " + 330 "and has mis-matching identity on the same namespace between rid=" + 331 rid + " and ridsInOtherAccts=" + ridsInOtherAccts); 332 } 333 return KEEP_SEPARATE; // has identity and identity doesn't match 334 } else { 335 if (VERBOSE_LOGGING) { 336 Log.v(TAG, "canJoinIntoContact: can join the first raw contact from the same " + 337 "account without any identity mismatch."); 338 } 339 return JOIN; // no identity or identity match 340 } 341 } 342 if (VERBOSE_LOGGING) { 343 Log.v(TAG, "canJoinIntoContact: " + rawContactIdsInSameAccount.size() + 344 " duplicate(s) found"); 345 } 346 347 348 final Set<Long> rawContactIdSet = new HashSet<Long>(); 349 rawContactIdSet.add(rawContactId); 350 if (rawContactIdsInSameAccount.size() > 0 && 351 isDataMaching(db, rawContactIdSet, rawContactIdsInSameAccount)) { 352 if (VERBOSE_LOGGING) { 353 Log.v(TAG, "canJoinIntoContact: join if there is a data matching found in the " + 354 "same account"); 355 } 356 return JOIN; 357 } else { 358 if (VERBOSE_LOGGING) { 359 Log.v(TAG, "canJoinIntoContact: re-aggregate rid=" + rawContactId + 360 " with its best matching contact to connected component"); 361 } 362 return RE_AGGREGATE; 363 } 364 } 365 366 /** 367 * If there's any identity, email address or a phone number matching between two raw contact 368 * sets. 369 */ 370 private boolean isDataMaching(SQLiteDatabase db, Set<Long> rawContactIdSet1, 371 Set<Long> rawContactIdSet2) { 372 final String rawContactIds1 = TextUtils.join(",", rawContactIdSet1); 373 final String rawContactIds2 = TextUtils.join(",", rawContactIdSet2); 374 // First, check for the identity 375 if (isFirstColumnGreaterThanZero(db, buildIdentityMatchingSql( 376 rawContactIds1, rawContactIds2, /* isIdentityMatching =*/ true, 377 /* countOnly =*/true))) { 378 if (VERBOSE_LOGGING) { 379 Log.v(TAG, "canJoinIntoContact: identity match found between " + rawContactIds1 + 380 " and " + rawContactIds2); 381 } 382 return true; 383 } 384 385 // Next, check for the email address. 386 if (isFirstColumnGreaterThanZero(db, 387 buildEmailMatchingSql(rawContactIds1, rawContactIds2, true))) { 388 if (VERBOSE_LOGGING) { 389 Log.v(TAG, "canJoinIntoContact: email match found between " + rawContactIds1 + 390 " and " + rawContactIds2); 391 } 392 return true; 393 } 394 395 // Lastly, the phone number. 396 if (isFirstColumnGreaterThanZero(db, 397 buildPhoneMatchingSql(rawContactIds1, rawContactIds2, true))) { 398 if (VERBOSE_LOGGING) { 399 Log.v(TAG, "canJoinIntoContact: phone match found between " + rawContactIds1 + 400 " and " + rawContactIds2); 401 } 402 return true; 403 } 404 return false; 405 } 406 407 /** 408 * Re-aggregate rawContact of {@code rawContactId} and all the raw contacts of 409 * {@code existingRawContactIds} into connected components. This only happens when a given 410 * raw contacts cannot be joined with its best matching contacts directly. 411 * 412 * Two raw contacts are considered connected if they share at least one email address, phone 413 * number or identity. Create new contact for each connected component except the very first 414 * one that doesn't contain rawContactId of {@code rawContactId}. 415 */ 416 private void reAggregateRawContacts(TransactionContext txContext, SQLiteDatabase db, 417 long contactId, long currentContactId, long rawContactId, 418 Set<Long> existingRawContactIds) { 419 // Find the connected component based on the aggregation exceptions or 420 // identity/email/phone matching for all the raw contacts of [contactId] and the give 421 // raw contact. 422 final Set<Long> allIds = new HashSet<Long>(); 423 allIds.add(rawContactId); 424 allIds.addAll(existingRawContactIds); 425 final Set<Set<Long>> connectedRawContactSets = findConnectedRawContacts(db, allIds); 426 427 if (connectedRawContactSets.size() == 1) { 428 // If everything is connected, create one contact with [contactId] 429 createContactForRawContacts(db, txContext, connectedRawContactSets.iterator().next(), 430 contactId); 431 } else { 432 for (Set<Long> connectedRawContactIds : connectedRawContactSets) { 433 if (connectedRawContactIds.contains(rawContactId)) { 434 // crate contact for connect component containing [rawContactId], reuse 435 // [currentContactId] if possible. 436 createContactForRawContacts(db, txContext, connectedRawContactIds, 437 currentContactId == 0 ? null : currentContactId); 438 connectedRawContactSets.remove(connectedRawContactIds); 439 break; 440 } 441 } 442 // Create new contact for each connected component except the last one. The last one 443 // will reuse [contactId]. Only the last one can reuse [contactId] when all other raw 444 // contacts has already been assigned new contact Id, so that the contact aggregation 445 // stats could be updated correctly. 446 int index = connectedRawContactSets.size(); 447 for (Set<Long> connectedRawContactIds : connectedRawContactSets) { 448 if (index > 1) { 449 createContactForRawContacts(db, txContext, connectedRawContactIds, null); 450 index--; 451 } else { 452 createContactForRawContacts(db, txContext, connectedRawContactIds, contactId); 453 } 454 } 455 } 456 } 457 458 /** 459 * Ensures that automatic aggregation rules are followed after a contact 460 * becomes visible or invisible. Specifically, consider this case: there are 461 * three contacts named Foo. Two of them come from account A1 and one comes 462 * from account A2. The aggregation rules say that in this case none of the 463 * three Foo's should be aggregated: two of them are in the same account, so 464 * they don't get aggregated; the third has two affinities, so it does not 465 * join either of them. 466 * <p> 467 * Consider what happens if one of the "Foo"s from account A1 becomes 468 * invisible. Nothing stands in the way of aggregating the other two 469 * anymore, so they should get joined. 470 * <p> 471 * What if the invisible "Foo" becomes visible after that? We should split the 472 * aggregate between the other two. 473 */ 474 public void updateAggregationAfterVisibilityChange(long contactId) { 475 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 476 boolean visible = mDbHelper.isContactInDefaultDirectory(db, contactId); 477 if (visible) { 478 markContactForAggregation(db, contactId); 479 } else { 480 // Find all contacts that _could be_ aggregated with this one and 481 // rerun aggregation for all of them 482 mSelectionArgs1[0] = String.valueOf(contactId); 483 Cursor cursor = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS, 484 RawContactIdQuery.SELECTION, mSelectionArgs1, null, null, null); 485 try { 486 while (cursor.moveToNext()) { 487 long rawContactId = cursor.getLong(RawContactIdQuery.RAW_CONTACT_ID); 488 mMatcher.clear(); 489 490 updateMatchScoresBasedOnIdentityMatch(db, rawContactId, mMatcher); 491 updateMatchScoresBasedOnNameMatches(db, rawContactId, mMatcher); 492 List<MatchScore> bestMatches = 493 mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_PRIMARY); 494 for (MatchScore matchScore : bestMatches) { 495 markContactForAggregation(db, matchScore.getContactId()); 496 } 497 498 mMatcher.clear(); 499 updateMatchScoresBasedOnEmailMatches(db, rawContactId, mMatcher); 500 updateMatchScoresBasedOnPhoneMatches(db, rawContactId, mMatcher); 501 bestMatches = 502 mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SECONDARY); 503 for (MatchScore matchScore : bestMatches) { 504 markContactForAggregation(db, matchScore.getContactId()); 505 } 506 } 507 } finally { 508 cursor.close(); 509 } 510 } 511 } 512 513 /** 514 * Updates the contact ID for the specified contact and marks the raw contact as aggregated. 515 */ 516 private void setContactIdAndMarkAggregated(long rawContactId, long contactId) { 517 mContactIdAndMarkAggregatedUpdate.bindLong(1, contactId); 518 mContactIdAndMarkAggregatedUpdate.bindLong(2, rawContactId); 519 mContactIdAndMarkAggregatedUpdate.execute(); 520 } 521 522 interface AggregateExceptionQuery { 523 String TABLE = Tables.AGGREGATION_EXCEPTIONS 524 + " JOIN raw_contacts raw_contacts1 " 525 + " ON (agg_exceptions.raw_contact_id1 = raw_contacts1._id) " 526 + " JOIN raw_contacts raw_contacts2 " 527 + " ON (agg_exceptions.raw_contact_id2 = raw_contacts2._id) "; 528 529 String[] COLUMNS = { 530 AggregationExceptions.TYPE, 531 AggregationExceptions.RAW_CONTACT_ID1, 532 "raw_contacts1." + RawContacts.CONTACT_ID, 533 "raw_contacts1." + RawContactsColumns.AGGREGATION_NEEDED, 534 "raw_contacts2." + RawContacts.CONTACT_ID, 535 "raw_contacts2." + RawContactsColumns.AGGREGATION_NEEDED, 536 }; 537 538 int TYPE = 0; 539 int RAW_CONTACT_ID1 = 1; 540 int CONTACT_ID1 = 2; 541 int AGGREGATION_NEEDED_1 = 3; 542 int CONTACT_ID2 = 4; 543 int AGGREGATION_NEEDED_2 = 5; 544 } 545 546 /** 547 * Computes match scores based on exceptions entered by the user: always match and never match. 548 * Returns the aggregate contact with the always match exception if any. 549 */ 550 private long pickBestMatchBasedOnExceptions(SQLiteDatabase db, long rawContactId, 551 ContactMatcher matcher) { 552 if (!mAggregationExceptionIdsValid) { 553 prefetchAggregationExceptionIds(db); 554 } 555 556 // If there are no aggregation exceptions involving this raw contact, there is no need to 557 // run a query and we can just return -1, which stands for "nothing found" 558 if (!mAggregationExceptionIds.contains(rawContactId)) { 559 return -1; 560 } 561 562 final Cursor c = db.query(AggregateExceptionQuery.TABLE, 563 AggregateExceptionQuery.COLUMNS, 564 AggregationExceptions.RAW_CONTACT_ID1 + "=" + rawContactId 565 + " OR " + AggregationExceptions.RAW_CONTACT_ID2 + "=" + rawContactId, 566 null, null, null, null); 567 568 try { 569 while (c.moveToNext()) { 570 int type = c.getInt(AggregateExceptionQuery.TYPE); 571 long rawContactId1 = c.getLong(AggregateExceptionQuery.RAW_CONTACT_ID1); 572 long contactId = -1; 573 if (rawContactId == rawContactId1) { 574 if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_2) == 0 575 && !c.isNull(AggregateExceptionQuery.CONTACT_ID2)) { 576 contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID2); 577 } 578 } else { 579 if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_1) == 0 580 && !c.isNull(AggregateExceptionQuery.CONTACT_ID1)) { 581 contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID1); 582 } 583 } 584 if (contactId != -1) { 585 if (type == AggregationExceptions.TYPE_KEEP_TOGETHER) { 586 matcher.keepIn(contactId); 587 } else { 588 matcher.keepOut(contactId); 589 } 590 } 591 } 592 } finally { 593 c.close(); 594 } 595 596 return matcher.pickBestMatch(MatchScore.MAX_SCORE, true); 597 } 598 599 /** 600 * Picks the best matching contact based on matches between data elements. It considers 601 * name match to be primary and phone, email etc matches to be secondary. A good primary 602 * match triggers aggregation, while a good secondary match only triggers aggregation in 603 * the absence of a strong primary mismatch. 604 * <p> 605 * Consider these examples: 606 * <p> 607 * John Doe with phone number 111-111-1111 and Jon Doe with phone number 111-111-1111 should 608 * be aggregated (same number, similar names). 609 * <p> 610 * John Doe with phone number 111-111-1111 and Deborah Doe with phone number 111-111-1111 should 611 * not be aggregated (same number, different names). 612 */ 613 private long pickBestMatchBasedOnData(SQLiteDatabase db, long rawContactId, 614 MatchCandidateList candidates, ContactMatcher matcher) { 615 616 // Find good matches based on name alone 617 long bestMatch = updateMatchScoresBasedOnDataMatches(db, rawContactId, matcher); 618 if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) { 619 // We found multiple matches on the name - do not aggregate because of the ambiguity 620 return -1; 621 } else if (bestMatch == -1) { 622 // We haven't found a good match on name, see if we have any matches on phone, email etc 623 bestMatch = pickBestMatchBasedOnSecondaryData(db, rawContactId, candidates, matcher); 624 if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) { 625 return -1; 626 } 627 } 628 629 return bestMatch; 630 } 631 632 633 /** 634 * Picks the best matching contact based on secondary data matches. The method loads 635 * structured names for all candidate contacts and recomputes match scores using approximate 636 * matching. 637 */ 638 private long pickBestMatchBasedOnSecondaryData(SQLiteDatabase db, 639 long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) { 640 List<Long> secondaryContactIds = matcher.prepareSecondaryMatchCandidates( 641 ContactMatcher.SCORE_THRESHOLD_PRIMARY); 642 if (secondaryContactIds == null || secondaryContactIds.size() > SECONDARY_HIT_LIMIT) { 643 return -1; 644 } 645 646 loadNameMatchCandidates(db, rawContactId, candidates, true); 647 648 mSb.setLength(0); 649 mSb.append(RawContacts.CONTACT_ID).append(" IN ("); 650 for (int i = 0; i < secondaryContactIds.size(); i++) { 651 if (i != 0) { 652 mSb.append(','); 653 } 654 mSb.append(secondaryContactIds.get(i)); 655 } 656 657 // We only want to compare structured names to structured names 658 // at this stage, we need to ignore all other sources of name lookup data. 659 mSb.append(") AND " + STRUCTURED_NAME_BASED_LOOKUP_SQL); 660 661 matchAllCandidates(db, mSb.toString(), candidates, matcher, 662 ContactMatcher.MATCHING_ALGORITHM_CONSERVATIVE, null); 663 664 return matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_SECONDARY, false); 665 } 666 667 /** 668 * Computes scores for contacts that have matching data rows. 669 */ 670 private long updateMatchScoresBasedOnDataMatches(SQLiteDatabase db, long rawContactId, 671 ContactMatcher matcher) { 672 673 updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher); 674 updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher); 675 long bestMatch = matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_PRIMARY, false); 676 if (bestMatch != -1) { 677 return bestMatch; 678 } 679 680 updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher); 681 updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher); 682 683 return -1; 684 } 685 686 private interface IdentityLookupMatchQuery { 687 final String TABLE = Tables.DATA + " dataA" 688 + " JOIN " + Tables.DATA + " dataB" + 689 " ON (dataA." + Identity.NAMESPACE + "=dataB." + Identity.NAMESPACE + 690 " AND dataA." + Identity.IDENTITY + "=dataB." + Identity.IDENTITY + ")" 691 + " JOIN " + Tables.RAW_CONTACTS + 692 " ON (dataB." + Data.RAW_CONTACT_ID + " = " 693 + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")"; 694 695 final String SELECTION = "dataA." + Data.RAW_CONTACT_ID + "=?1" 696 + " AND dataA." + DataColumns.MIMETYPE_ID + "=?2" 697 + " AND dataA." + Identity.NAMESPACE + " NOT NULL" 698 + " AND dataA." + Identity.IDENTITY + " NOT NULL" 699 + " AND dataB." + DataColumns.MIMETYPE_ID + "=?2" 700 + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0" 701 + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY; 702 703 final String[] COLUMNS = new String[] { 704 RawContacts.CONTACT_ID 705 }; 706 707 int CONTACT_ID = 0; 708 } 709 710 /** 711 * Finds contacts with exact identity matches to the the specified raw contact. 712 */ 713 private void updateMatchScoresBasedOnIdentityMatch(SQLiteDatabase db, long rawContactId, 714 ContactMatcher matcher) { 715 mSelectionArgs2[0] = String.valueOf(rawContactId); 716 mSelectionArgs2[1] = String.valueOf(mMimeTypeIdIdentity); 717 Cursor c = db.query(IdentityLookupMatchQuery.TABLE, IdentityLookupMatchQuery.COLUMNS, 718 IdentityLookupMatchQuery.SELECTION, 719 mSelectionArgs2, RawContacts.CONTACT_ID, null, null); 720 try { 721 while (c.moveToNext()) { 722 final long contactId = c.getLong(IdentityLookupMatchQuery.CONTACT_ID); 723 matcher.matchIdentity(contactId); 724 } 725 } finally { 726 c.close(); 727 } 728 729 } 730 731 private interface NameLookupMatchQuery { 732 String TABLE = Tables.NAME_LOOKUP + " nameA" 733 + " JOIN " + Tables.NAME_LOOKUP + " nameB" + 734 " ON (" + "nameA." + NameLookupColumns.NORMALIZED_NAME + "=" 735 + "nameB." + NameLookupColumns.NORMALIZED_NAME + ")" 736 + " JOIN " + Tables.RAW_CONTACTS + 737 " ON (nameB." + NameLookupColumns.RAW_CONTACT_ID + " = " 738 + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")"; 739 740 String SELECTION = "nameA." + NameLookupColumns.RAW_CONTACT_ID + "=?" 741 + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0" 742 + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY; 743 744 String[] COLUMNS = new String[] { 745 RawContacts.CONTACT_ID, 746 "nameA." + NameLookupColumns.NORMALIZED_NAME, 747 "nameA." + NameLookupColumns.NAME_TYPE, 748 "nameB." + NameLookupColumns.NAME_TYPE, 749 }; 750 751 int CONTACT_ID = 0; 752 int NAME = 1; 753 int NAME_TYPE_A = 2; 754 int NAME_TYPE_B = 3; 755 } 756 757 /** 758 * Finds contacts with names matching the name of the specified raw contact. 759 */ 760 private void updateMatchScoresBasedOnNameMatches(SQLiteDatabase db, long rawContactId, 761 ContactMatcher matcher) { 762 mSelectionArgs1[0] = String.valueOf(rawContactId); 763 Cursor c = db.query(NameLookupMatchQuery.TABLE, NameLookupMatchQuery.COLUMNS, 764 NameLookupMatchQuery.SELECTION, 765 mSelectionArgs1, null, null, null, PRIMARY_HIT_LIMIT_STRING); 766 try { 767 while (c.moveToNext()) { 768 long contactId = c.getLong(NameLookupMatchQuery.CONTACT_ID); 769 String name = c.getString(NameLookupMatchQuery.NAME); 770 int nameTypeA = c.getInt(NameLookupMatchQuery.NAME_TYPE_A); 771 int nameTypeB = c.getInt(NameLookupMatchQuery.NAME_TYPE_B); 772 matcher.matchName(contactId, nameTypeA, name, 773 nameTypeB, name, ContactMatcher.MATCHING_ALGORITHM_EXACT); 774 if (nameTypeA == NameLookupType.NICKNAME && 775 nameTypeB == NameLookupType.NICKNAME) { 776 matcher.updateScoreWithNicknameMatch(contactId); 777 } 778 } 779 } finally { 780 c.close(); 781 } 782 } 783 784 private void updateMatchScoresBasedOnEmailMatches(SQLiteDatabase db, long rawContactId, 785 ContactMatcher matcher) { 786 mSelectionArgs2[0] = String.valueOf(rawContactId); 787 mSelectionArgs2[1] = String.valueOf(mMimeTypeIdEmail); 788 Cursor c = db.query(EmailLookupQuery.TABLE, EmailLookupQuery.COLUMNS, 789 EmailLookupQuery.SELECTION, 790 mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING); 791 try { 792 while (c.moveToNext()) { 793 long contactId = c.getLong(EmailLookupQuery.CONTACT_ID); 794 matcher.updateScoreWithEmailMatch(contactId); 795 } 796 } finally { 797 c.close(); 798 } 799 } 800 801 private void updateMatchScoresBasedOnPhoneMatches(SQLiteDatabase db, long rawContactId, 802 ContactMatcher matcher) { 803 mSelectionArgs2[0] = String.valueOf(rawContactId); 804 mSelectionArgs2[1] = mDbHelper.getUseStrictPhoneNumberComparisonParameter(); 805 Cursor c = db.query(PhoneLookupQuery.TABLE, PhoneLookupQuery.COLUMNS, 806 PhoneLookupQuery.SELECTION, 807 mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING); 808 try { 809 while (c.moveToNext()) { 810 long contactId = c.getLong(PhoneLookupQuery.CONTACT_ID); 811 matcher.updateScoreWithPhoneNumberMatch(contactId); 812 } 813 } finally { 814 c.close(); 815 } 816 } 817 818 /** 819 * Loads name lookup rows for approximate name matching and updates match scores based on that 820 * data. 821 */ 822 private void lookupApproximateNameMatches(SQLiteDatabase db, MatchCandidateList candidates, 823 ContactMatcher matcher) { 824 HashSet<String> firstLetters = new HashSet<String>(); 825 for (int i = 0; i < candidates.mCount; i++) { 826 final NameMatchCandidate candidate = candidates.mList.get(i); 827 if (candidate.mName.length() >= 2) { 828 String firstLetter = candidate.mName.substring(0, 2); 829 if (!firstLetters.contains(firstLetter)) { 830 firstLetters.add(firstLetter); 831 final String selection = "(" + NameLookupColumns.NORMALIZED_NAME + " GLOB '" 832 + firstLetter + "*') AND " 833 + "(" + NameLookupColumns.NAME_TYPE + " IN(" 834 + NameLookupType.NAME_COLLATION_KEY + "," 835 + NameLookupType.EMAIL_BASED_NICKNAME + "," 836 + NameLookupType.NICKNAME + ")) AND " 837 + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY; 838 matchAllCandidates(db, selection, candidates, matcher, 839 ContactMatcher.MATCHING_ALGORITHM_APPROXIMATE, 840 String.valueOf(FIRST_LETTER_SUGGESTION_HIT_LIMIT)); 841 } 842 } 843 } 844 } 845 846 private interface ContactNameLookupQuery { 847 String TABLE = Tables.NAME_LOOKUP_JOIN_RAW_CONTACTS; 848 849 String[] COLUMNS = new String[] { 850 RawContacts.CONTACT_ID, 851 NameLookupColumns.NORMALIZED_NAME, 852 NameLookupColumns.NAME_TYPE 853 }; 854 855 int CONTACT_ID = 0; 856 int NORMALIZED_NAME = 1; 857 int NAME_TYPE = 2; 858 } 859 860 /** 861 * Loads all candidate rows from the name lookup table and updates match scores based 862 * on that data. 863 */ 864 private void matchAllCandidates(SQLiteDatabase db, String selection, 865 MatchCandidateList candidates, ContactMatcher matcher, int algorithm, String limit) { 866 final Cursor c = db.query(ContactNameLookupQuery.TABLE, ContactNameLookupQuery.COLUMNS, 867 selection, null, null, null, null, limit); 868 869 try { 870 while (c.moveToNext()) { 871 Long contactId = c.getLong(ContactNameLookupQuery.CONTACT_ID); 872 String name = c.getString(ContactNameLookupQuery.NORMALIZED_NAME); 873 int nameType = c.getInt(ContactNameLookupQuery.NAME_TYPE); 874 875 // Note the N^2 complexity of the following fragment. This is not a huge concern 876 // since the number of candidates is very small and in general secondary hits 877 // in the absence of primary hits are rare. 878 for (int i = 0; i < candidates.mCount; i++) { 879 NameMatchCandidate candidate = candidates.mList.get(i); 880 matcher.matchName(contactId, candidate.mLookupType, candidate.mName, 881 nameType, name, algorithm); 882 } 883 } 884 } finally { 885 c.close(); 886 } 887 } 888 889 /** 890 * Finds contacts with data matches and returns a list of {@link MatchScore}'s in the 891 * descending order of match score. 892 * @param parameters 893 */ 894 protected List<MatchScore> findMatchingContacts(final SQLiteDatabase db, long contactId, 895 ArrayList<AggregationSuggestionParameter> parameters) { 896 897 MatchCandidateList candidates = new MatchCandidateList(); 898 ContactMatcher matcher = new ContactMatcher(); 899 900 // Don't aggregate a contact with itself 901 matcher.keepOut(contactId); 902 903 if (parameters == null || parameters.size() == 0) { 904 final Cursor c = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS, 905 RawContacts.CONTACT_ID + "=" + contactId, null, null, null, null); 906 try { 907 while (c.moveToNext()) { 908 long rawContactId = c.getLong(RawContactIdQuery.RAW_CONTACT_ID); 909 updateMatchScoresForSuggestionsBasedOnDataMatches(db, rawContactId, candidates, 910 matcher); 911 } 912 } finally { 913 c.close(); 914 } 915 } else { 916 updateMatchScoresForSuggestionsBasedOnDataMatches(db, candidates, 917 matcher, parameters); 918 } 919 920 return matcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SUGGEST); 921 } 922 923 /** 924 * Computes scores for contacts that have matching data rows. 925 */ 926 private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db, 927 long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) { 928 929 updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher); 930 updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher); 931 updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher); 932 updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher); 933 loadNameMatchCandidates(db, rawContactId, candidates, false); 934 lookupApproximateNameMatches(db, candidates, matcher); 935 } 936 937 private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db, 938 MatchCandidateList candidates, ContactMatcher matcher, 939 ArrayList<AggregationSuggestionParameter> parameters) { 940 for (AggregationSuggestionParameter parameter : parameters) { 941 if (AggregationSuggestions.PARAMETER_MATCH_NAME.equals(parameter.kind)) { 942 updateMatchScoresBasedOnNameMatches(db, parameter.value, candidates, matcher); 943 } 944 945 // TODO: add support for other parameter kinds 946 } 947 } 948 } 949