1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/predictors/autocomplete_action_predictor.h" 6 7 #include <math.h> 8 9 #include <vector> 10 11 #include "base/bind.h" 12 #include "base/guid.h" 13 #include "base/i18n/case_conversion.h" 14 #include "base/metrics/histogram.h" 15 #include "base/strings/string_util.h" 16 #include "base/strings/stringprintf.h" 17 #include "base/strings/utf_string_conversions.h" 18 #include "chrome/browser/autocomplete/autocomplete_match.h" 19 #include "chrome/browser/autocomplete/autocomplete_result.h" 20 #include "chrome/browser/chrome_notification_types.h" 21 #include "chrome/browser/history/history_notifications.h" 22 #include "chrome/browser/history/history_service.h" 23 #include "chrome/browser/history/history_service_factory.h" 24 #include "chrome/browser/history/in_memory_database.h" 25 #include "chrome/browser/omnibox/omnibox_log.h" 26 #include "chrome/browser/predictors/autocomplete_action_predictor_factory.h" 27 #include "chrome/browser/predictors/predictor_database.h" 28 #include "chrome/browser/predictors/predictor_database_factory.h" 29 #include "chrome/browser/prerender/prerender_field_trial.h" 30 #include "chrome/browser/prerender/prerender_handle.h" 31 #include "chrome/browser/prerender/prerender_manager.h" 32 #include "chrome/browser/prerender/prerender_manager_factory.h" 33 #include "chrome/browser/profiles/profile.h" 34 #include "content/public/browser/browser_thread.h" 35 #include "content/public/browser/notification_details.h" 36 #include "content/public/browser/notification_service.h" 37 #include "content/public/browser/notification_source.h" 38 39 namespace { 40 41 const float kConfidenceCutoff[] = { 42 0.8f, 43 0.5f 44 }; 45 46 COMPILE_ASSERT(arraysize(kConfidenceCutoff) == 47 predictors::AutocompleteActionPredictor::LAST_PREDICT_ACTION, 48 ConfidenceCutoff_count_mismatch); 49 50 const size_t kMinimumUserTextLength = 1; 51 const int kMinimumNumberOfHits = 3; 52 53 enum DatabaseAction { 54 DATABASE_ACTION_ADD, 55 DATABASE_ACTION_UPDATE, 56 DATABASE_ACTION_DELETE_SOME, 57 DATABASE_ACTION_DELETE_ALL, 58 DATABASE_ACTION_COUNT 59 }; 60 61 } // namespace 62 63 namespace predictors { 64 65 const int AutocompleteActionPredictor::kMaximumDaysToKeepEntry = 14; 66 67 AutocompleteActionPredictor::AutocompleteActionPredictor(Profile* profile) 68 : profile_(profile), 69 main_profile_predictor_(NULL), 70 incognito_predictor_(NULL), 71 initialized_(false) { 72 if (profile_->IsOffTheRecord()) { 73 main_profile_predictor_ = AutocompleteActionPredictorFactory::GetForProfile( 74 profile_->GetOriginalProfile()); 75 DCHECK(main_profile_predictor_); 76 main_profile_predictor_->incognito_predictor_ = this; 77 if (main_profile_predictor_->initialized_) 78 CopyFromMainProfile(); 79 } else { 80 // Request the in-memory database from the history to force it to load so 81 // it's available as soon as possible. 82 HistoryService* history_service = HistoryServiceFactory::GetForProfile( 83 profile_, Profile::EXPLICIT_ACCESS); 84 if (history_service) 85 history_service->InMemoryDatabase(); 86 87 table_ = 88 PredictorDatabaseFactory::GetForProfile(profile_)->autocomplete_table(); 89 90 // Observe all main frame loads so we can wait for the first to complete 91 // before accessing DB and IO threads to build the local cache. 92 notification_registrar_.Add(this, 93 content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME, 94 content::NotificationService::AllSources()); 95 } 96 } 97 98 AutocompleteActionPredictor::~AutocompleteActionPredictor() { 99 if (main_profile_predictor_) 100 main_profile_predictor_->incognito_predictor_ = NULL; 101 else if (incognito_predictor_) 102 incognito_predictor_->main_profile_predictor_ = NULL; 103 if (prerender_handle_.get()) 104 prerender_handle_->OnCancel(); 105 } 106 107 void AutocompleteActionPredictor::RegisterTransitionalMatches( 108 const base::string16& user_text, 109 const AutocompleteResult& result) { 110 if (user_text.length() < kMinimumUserTextLength) 111 return; 112 const base::string16 lower_user_text(base::i18n::ToLower(user_text)); 113 114 // Merge this in to an existing match if we already saw |user_text| 115 std::vector<TransitionalMatch>::iterator match_it = 116 std::find(transitional_matches_.begin(), transitional_matches_.end(), 117 lower_user_text); 118 119 if (match_it == transitional_matches_.end()) { 120 TransitionalMatch transitional_match; 121 transitional_match.user_text = lower_user_text; 122 match_it = transitional_matches_.insert(transitional_matches_.end(), 123 transitional_match); 124 } 125 126 for (AutocompleteResult::const_iterator i(result.begin()); i != result.end(); 127 ++i) { 128 if (std::find(match_it->urls.begin(), match_it->urls.end(), 129 i->destination_url) == match_it->urls.end()) { 130 match_it->urls.push_back(i->destination_url); 131 } 132 } 133 } 134 135 void AutocompleteActionPredictor::ClearTransitionalMatches() { 136 transitional_matches_.clear(); 137 } 138 139 void AutocompleteActionPredictor::StartPrerendering( 140 const GURL& url, 141 const content::SessionStorageNamespaceMap& session_storage_namespace_map, 142 const gfx::Size& size) { 143 // Only cancel the old prerender after starting the new one, so if the URLs 144 // are the same, the underlying prerender will be reused. 145 scoped_ptr<prerender::PrerenderHandle> old_prerender_handle( 146 prerender_handle_.release()); 147 if (prerender::PrerenderManager* prerender_manager = 148 prerender::PrerenderManagerFactory::GetForProfile(profile_)) { 149 content::SessionStorageNamespace* session_storage_namespace = NULL; 150 content::SessionStorageNamespaceMap::const_iterator it = 151 session_storage_namespace_map.find(std::string()); 152 if (it != session_storage_namespace_map.end()) 153 session_storage_namespace = it->second.get(); 154 prerender_handle_.reset(prerender_manager->AddPrerenderFromOmnibox( 155 url, session_storage_namespace, size)); 156 } 157 if (old_prerender_handle) 158 old_prerender_handle->OnCancel(); 159 } 160 161 // Given a match, return a recommended action. 162 AutocompleteActionPredictor::Action 163 AutocompleteActionPredictor::RecommendAction( 164 const base::string16& user_text, 165 const AutocompleteMatch& match) const { 166 bool is_in_db = false; 167 const double confidence = CalculateConfidence(user_text, match, &is_in_db); 168 DCHECK(confidence >= 0.0 && confidence <= 1.0); 169 170 UMA_HISTOGRAM_BOOLEAN("AutocompleteActionPredictor.MatchIsInDb", is_in_db); 171 172 if (is_in_db) { 173 // Multiple enties with the same URL are fine as the confidence may be 174 // different. 175 tracked_urls_.push_back(std::make_pair(match.destination_url, confidence)); 176 UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.Confidence", 177 confidence * 100); 178 } 179 180 // Map the confidence to an action. 181 Action action = ACTION_NONE; 182 for (int i = 0; i < LAST_PREDICT_ACTION; ++i) { 183 if (confidence >= kConfidenceCutoff[i]) { 184 action = static_cast<Action>(i); 185 break; 186 } 187 } 188 189 // Downgrade prerender to preconnect if this is a search match or if omnibox 190 // prerendering is disabled. There are cases when Instant will not handle a 191 // search suggestion and in those cases it would be good to prerender the 192 // search results, however search engines have not been set up to correctly 193 // handle being prerendered and until they are we should avoid it. 194 // http://crbug.com/117495 195 if (action == ACTION_PRERENDER && 196 (AutocompleteMatch::IsSearchType(match.type) || 197 !prerender::IsOmniboxEnabled(profile_))) { 198 action = ACTION_PRECONNECT; 199 } 200 201 return action; 202 } 203 204 // Return true if the suggestion type warrants a TCP/IP preconnection. 205 // i.e., it is now quite likely that the user will select the related domain. 206 // static 207 bool AutocompleteActionPredictor::IsPreconnectable( 208 const AutocompleteMatch& match) { 209 return AutocompleteMatch::IsSearchType(match.type); 210 } 211 212 void AutocompleteActionPredictor::Observe( 213 int type, 214 const content::NotificationSource& source, 215 const content::NotificationDetails& details) { 216 switch (type) { 217 case content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME: 218 CreateLocalCachesFromDatabase(); 219 notification_registrar_.Remove( 220 this, 221 content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME, 222 content::NotificationService::AllSources()); 223 break; 224 225 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: { 226 DCHECK(initialized_); 227 const content::Details<const history::URLsDeletedDetails> 228 urls_deleted_details = 229 content::Details<const history::URLsDeletedDetails>(details); 230 if (urls_deleted_details->all_history) 231 DeleteAllRows(); 232 else 233 DeleteRowsWithURLs(urls_deleted_details->rows); 234 break; 235 } 236 237 // This notification does not catch all instances of the user navigating 238 // from the Omnibox, but it does catch the cases where the dropdown is open 239 // and those are the events we're most interested in. 240 case chrome::NOTIFICATION_OMNIBOX_OPENED_URL: { 241 DCHECK(initialized_); 242 243 // TODO(dominich): This doesn't need to be synchronous. Investigate 244 // posting it as a task to be run later. 245 OnOmniboxOpenedUrl(*content::Details<OmniboxLog>(details).ptr()); 246 break; 247 } 248 249 case chrome::NOTIFICATION_HISTORY_LOADED: { 250 TryDeleteOldEntries(content::Details<HistoryService>(details).ptr()); 251 252 notification_registrar_.Remove(this, 253 chrome::NOTIFICATION_HISTORY_LOADED, 254 content::Source<Profile>(profile_)); 255 break; 256 } 257 258 default: 259 NOTREACHED() << "Unexpected notification observed."; 260 break; 261 } 262 } 263 264 void AutocompleteActionPredictor::CreateLocalCachesFromDatabase() { 265 // Create local caches using the database as loaded. We will garbage collect 266 // rows from the caches and the database once the history service is 267 // available. 268 std::vector<AutocompleteActionPredictorTable::Row>* rows = 269 new std::vector<AutocompleteActionPredictorTable::Row>(); 270 content::BrowserThread::PostTaskAndReply(content::BrowserThread::DB, 271 FROM_HERE, 272 base::Bind(&AutocompleteActionPredictorTable::GetAllRows, table_, rows), 273 base::Bind(&AutocompleteActionPredictor::CreateCaches, AsWeakPtr(), 274 base::Owned(rows))); 275 } 276 277 void AutocompleteActionPredictor::DeleteAllRows() { 278 if (!initialized_) 279 return; 280 281 db_cache_.clear(); 282 db_id_cache_.clear(); 283 284 if (table_.get()) { 285 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, 286 base::Bind(&AutocompleteActionPredictorTable::DeleteAllRows, 287 table_)); 288 } 289 290 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction", 291 DATABASE_ACTION_DELETE_ALL, DATABASE_ACTION_COUNT); 292 } 293 294 void AutocompleteActionPredictor::DeleteRowsWithURLs( 295 const history::URLRows& rows) { 296 if (!initialized_) 297 return; 298 299 std::vector<AutocompleteActionPredictorTable::Row::Id> id_list; 300 301 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) { 302 if (std::find_if(rows.begin(), rows.end(), 303 history::URLRow::URLRowHasURL(it->first.url)) != rows.end()) { 304 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first); 305 DCHECK(id_it != db_id_cache_.end()); 306 id_list.push_back(id_it->second); 307 db_id_cache_.erase(id_it); 308 db_cache_.erase(it++); 309 } else { 310 ++it; 311 } 312 } 313 314 if (table_.get()) { 315 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, 316 base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_, 317 id_list)); 318 } 319 320 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction", 321 DATABASE_ACTION_DELETE_SOME, DATABASE_ACTION_COUNT); 322 } 323 324 void AutocompleteActionPredictor::OnOmniboxOpenedUrl(const OmniboxLog& log) { 325 if (log.text.length() < kMinimumUserTextLength) 326 return; 327 328 const AutocompleteMatch& match = log.result.match_at(log.selected_index); 329 330 UMA_HISTOGRAM_BOOLEAN( 331 base::StringPrintf("Prerender.OmniboxNavigationsCouldPrerender%s", 332 prerender::PrerenderManager::GetModeString()).c_str(), 333 prerender::IsOmniboxEnabled(profile_)); 334 335 const GURL& opened_url = match.destination_url; 336 const base::string16 lower_user_text(base::i18n::ToLower(log.text)); 337 338 // Traverse transitional matches for those that have a user_text that is a 339 // prefix of |lower_user_text|. 340 std::vector<AutocompleteActionPredictorTable::Row> rows_to_add; 341 std::vector<AutocompleteActionPredictorTable::Row> rows_to_update; 342 343 for (std::vector<TransitionalMatch>::const_iterator it = 344 transitional_matches_.begin(); it != transitional_matches_.end(); 345 ++it) { 346 if (!StartsWith(lower_user_text, it->user_text, true)) 347 continue; 348 349 // Add entries to the database for those matches. 350 for (std::vector<GURL>::const_iterator url_it = it->urls.begin(); 351 url_it != it->urls.end(); ++url_it) { 352 DCHECK(it->user_text.length() >= kMinimumUserTextLength); 353 const DBCacheKey key = { it->user_text, *url_it }; 354 const bool is_hit = (*url_it == opened_url); 355 356 AutocompleteActionPredictorTable::Row row; 357 row.user_text = key.user_text; 358 row.url = key.url; 359 360 DBCacheMap::iterator it = db_cache_.find(key); 361 if (it == db_cache_.end()) { 362 row.id = base::GenerateGUID(); 363 row.number_of_hits = is_hit ? 1 : 0; 364 row.number_of_misses = is_hit ? 0 : 1; 365 366 rows_to_add.push_back(row); 367 } else { 368 DCHECK(db_id_cache_.find(key) != db_id_cache_.end()); 369 row.id = db_id_cache_.find(key)->second; 370 row.number_of_hits = it->second.number_of_hits + (is_hit ? 1 : 0); 371 row.number_of_misses = it->second.number_of_misses + (is_hit ? 0 : 1); 372 373 rows_to_update.push_back(row); 374 } 375 } 376 } 377 if (rows_to_add.size() > 0 || rows_to_update.size() > 0) 378 AddAndUpdateRows(rows_to_add, rows_to_update); 379 380 ClearTransitionalMatches(); 381 382 // Check against tracked urls and log accuracy for the confidence we 383 // predicted. 384 for (std::vector<std::pair<GURL, double> >::const_iterator it = 385 tracked_urls_.begin(); it != tracked_urls_.end(); 386 ++it) { 387 if (opened_url == it->first) { 388 UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.AccurateCount", 389 it->second * 100); 390 } 391 } 392 tracked_urls_.clear(); 393 } 394 395 void AutocompleteActionPredictor::AddAndUpdateRows( 396 const AutocompleteActionPredictorTable::Rows& rows_to_add, 397 const AutocompleteActionPredictorTable::Rows& rows_to_update) { 398 if (!initialized_) 399 return; 400 401 for (AutocompleteActionPredictorTable::Rows::const_iterator it = 402 rows_to_add.begin(); it != rows_to_add.end(); ++it) { 403 const DBCacheKey key = { it->user_text, it->url }; 404 DBCacheValue value = { it->number_of_hits, it->number_of_misses }; 405 406 DCHECK(db_cache_.find(key) == db_cache_.end()); 407 408 db_cache_[key] = value; 409 db_id_cache_[key] = it->id; 410 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction", 411 DATABASE_ACTION_ADD, DATABASE_ACTION_COUNT); 412 } 413 for (AutocompleteActionPredictorTable::Rows::const_iterator it = 414 rows_to_update.begin(); it != rows_to_update.end(); ++it) { 415 const DBCacheKey key = { it->user_text, it->url }; 416 417 DBCacheMap::iterator db_it = db_cache_.find(key); 418 DCHECK(db_it != db_cache_.end()); 419 DCHECK(db_id_cache_.find(key) != db_id_cache_.end()); 420 421 db_it->second.number_of_hits = it->number_of_hits; 422 db_it->second.number_of_misses = it->number_of_misses; 423 UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction", 424 DATABASE_ACTION_UPDATE, DATABASE_ACTION_COUNT); 425 } 426 427 if (table_.get()) { 428 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, 429 base::Bind(&AutocompleteActionPredictorTable::AddAndUpdateRows, 430 table_, rows_to_add, rows_to_update)); 431 } 432 } 433 434 void AutocompleteActionPredictor::CreateCaches( 435 std::vector<AutocompleteActionPredictorTable::Row>* rows) { 436 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 437 DCHECK(!profile_->IsOffTheRecord()); 438 DCHECK(!initialized_); 439 DCHECK(db_cache_.empty()); 440 DCHECK(db_id_cache_.empty()); 441 442 for (std::vector<AutocompleteActionPredictorTable::Row>::const_iterator it = 443 rows->begin(); it != rows->end(); ++it) { 444 const DBCacheKey key = { it->user_text, it->url }; 445 const DBCacheValue value = { it->number_of_hits, it->number_of_misses }; 446 db_cache_[key] = value; 447 db_id_cache_[key] = it->id; 448 } 449 450 // If the history service is ready, delete any old or invalid entries. 451 HistoryService* history_service = 452 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 453 if (!TryDeleteOldEntries(history_service)) { 454 // Wait for the notification that the history service is ready and the URL 455 // DB is loaded. 456 notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, 457 content::Source<Profile>(profile_)); 458 } 459 } 460 461 bool AutocompleteActionPredictor::TryDeleteOldEntries(HistoryService* service) { 462 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 463 DCHECK(!profile_->IsOffTheRecord()); 464 DCHECK(!initialized_); 465 466 if (!service) 467 return false; 468 469 history::URLDatabase* url_db = service->InMemoryDatabase(); 470 if (!url_db) 471 return false; 472 473 DeleteOldEntries(url_db); 474 return true; 475 } 476 477 void AutocompleteActionPredictor::DeleteOldEntries( 478 history::URLDatabase* url_db) { 479 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 480 DCHECK(!profile_->IsOffTheRecord()); 481 DCHECK(!initialized_); 482 DCHECK(table_.get()); 483 484 std::vector<AutocompleteActionPredictorTable::Row::Id> ids_to_delete; 485 DeleteOldIdsFromCaches(url_db, &ids_to_delete); 486 487 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, 488 base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_, 489 ids_to_delete)); 490 491 FinishInitialization(); 492 if (incognito_predictor_) 493 incognito_predictor_->CopyFromMainProfile(); 494 } 495 496 void AutocompleteActionPredictor::DeleteOldIdsFromCaches( 497 history::URLDatabase* url_db, 498 std::vector<AutocompleteActionPredictorTable::Row::Id>* id_list) { 499 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 500 DCHECK(!profile_->IsOffTheRecord()); 501 DCHECK(!initialized_); 502 DCHECK(url_db); 503 DCHECK(id_list); 504 505 id_list->clear(); 506 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) { 507 history::URLRow url_row; 508 509 if ((url_db->GetRowForURL(it->first.url, &url_row) == 0) || 510 ((base::Time::Now() - url_row.last_visit()).InDays() > 511 kMaximumDaysToKeepEntry)) { 512 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first); 513 DCHECK(id_it != db_id_cache_.end()); 514 id_list->push_back(id_it->second); 515 db_id_cache_.erase(id_it); 516 db_cache_.erase(it++); 517 } else { 518 ++it; 519 } 520 } 521 } 522 523 void AutocompleteActionPredictor::CopyFromMainProfile() { 524 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 525 DCHECK(profile_->IsOffTheRecord()); 526 DCHECK(!initialized_); 527 DCHECK(main_profile_predictor_); 528 DCHECK(main_profile_predictor_->initialized_); 529 530 db_cache_ = main_profile_predictor_->db_cache_; 531 db_id_cache_ = main_profile_predictor_->db_id_cache_; 532 FinishInitialization(); 533 } 534 535 void AutocompleteActionPredictor::FinishInitialization() { 536 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 537 DCHECK(!initialized_); 538 539 // Incognito and normal profiles should listen only to omnibox notifications 540 // from their own profile, but both should listen to history deletions from 541 // the main profile, since opening the history page in either case actually 542 // opens the non-incognito history (and lets users delete from there). 543 notification_registrar_.Add(this, chrome::NOTIFICATION_OMNIBOX_OPENED_URL, 544 content::Source<Profile>(profile_)); 545 notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, 546 content::Source<Profile>(profile_->GetOriginalProfile())); 547 initialized_ = true; 548 } 549 550 double AutocompleteActionPredictor::CalculateConfidence( 551 const base::string16& user_text, 552 const AutocompleteMatch& match, 553 bool* is_in_db) const { 554 const DBCacheKey key = { user_text, match.destination_url }; 555 556 *is_in_db = false; 557 if (user_text.length() < kMinimumUserTextLength) 558 return 0.0; 559 560 const DBCacheMap::const_iterator iter = db_cache_.find(key); 561 if (iter == db_cache_.end()) 562 return 0.0; 563 564 *is_in_db = true; 565 return CalculateConfidenceForDbEntry(iter); 566 } 567 568 double AutocompleteActionPredictor::CalculateConfidenceForDbEntry( 569 DBCacheMap::const_iterator iter) const { 570 const DBCacheValue& value = iter->second; 571 if (value.number_of_hits < kMinimumNumberOfHits) 572 return 0.0; 573 574 const double number_of_hits = static_cast<double>(value.number_of_hits); 575 return number_of_hits / (number_of_hits + value.number_of_misses); 576 } 577 578 AutocompleteActionPredictor::TransitionalMatch::TransitionalMatch() { 579 } 580 581 AutocompleteActionPredictor::TransitionalMatch::~TransitionalMatch() { 582 } 583 584 } // namespace predictors 585