1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/typed_url_syncable_service.h" 6 7 #include "base/auto_reset.h" 8 #include "base/logging.h" 9 #include "base/metrics/histogram.h" 10 #include "base/strings/utf_string_conversions.h" 11 #include "chrome/browser/history/history_backend.h" 12 #include "net/base/net_util.h" 13 #include "sync/protocol/sync.pb.h" 14 #include "sync/protocol/typed_url_specifics.pb.h" 15 16 namespace { 17 18 // The server backend can't handle arbitrarily large node sizes, so to keep 19 // the size under control we limit the visit array. 20 static const int kMaxTypedUrlVisits = 100; 21 22 // There's no limit on how many visits the history DB could have for a given 23 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to 24 // running out of memory (http://crbug.com/89793). This value is different 25 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be 26 // RELOAD visits, which will be stripped. 27 static const int kMaxVisitsToFetch = 1000; 28 29 // This is the threshold at which we start throttling sync updates for typed 30 // URLs - any URLs with a typed_count >= this threshold will be throttled. 31 static const int kTypedUrlVisitThrottleThreshold = 10; 32 33 // This is the multiple we use when throttling sync updates. If the multiple is 34 // N, we sync up every Nth update (i.e. when typed_count % N == 0). 35 static const int kTypedUrlVisitThrottleMultiple = 10; 36 37 } // namespace 38 39 namespace history { 40 41 const char kTypedUrlTag[] = "google_chrome_typed_urls"; 42 43 static bool CheckVisitOrdering(const VisitVector& visits) { 44 int64 previous_visit_time = 0; 45 for (VisitVector::const_iterator visit = visits.begin(); 46 visit != visits.end(); ++visit) { 47 if (visit != visits.begin()) { 48 // We allow duplicate visits here - they shouldn't really be allowed, but 49 // they still seem to show up sometimes and we haven't figured out the 50 // source, so we just log an error instead of failing an assertion. 51 // (http://crbug.com/91473). 52 if (previous_visit_time == visit->visit_time.ToInternalValue()) 53 DVLOG(1) << "Duplicate visit time encountered"; 54 else if (previous_visit_time > visit->visit_time.ToInternalValue()) 55 return false; 56 } 57 58 previous_visit_time = visit->visit_time.ToInternalValue(); 59 } 60 return true; 61 } 62 63 TypedUrlSyncableService::TypedUrlSyncableService( 64 HistoryBackend* history_backend) 65 : history_backend_(history_backend), 66 processing_syncer_changes_(false), 67 expected_loop_(base::MessageLoop::current()) { 68 DCHECK(history_backend_); 69 DCHECK(expected_loop_ == base::MessageLoop::current()); 70 } 71 72 TypedUrlSyncableService::~TypedUrlSyncableService() { 73 DCHECK(expected_loop_ == base::MessageLoop::current()); 74 } 75 76 syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing( 77 syncer::ModelType type, 78 const syncer::SyncDataList& initial_sync_data, 79 scoped_ptr<syncer::SyncChangeProcessor> sync_processor, 80 scoped_ptr<syncer::SyncErrorFactory> error_handler) { 81 DCHECK(expected_loop_ == base::MessageLoop::current()); 82 DCHECK(!sync_processor_.get()); 83 DCHECK(sync_processor.get()); 84 DCHECK(error_handler.get()); 85 DCHECK_EQ(type, syncer::TYPED_URLS); 86 87 syncer::SyncMergeResult merge_result(type); 88 sync_processor_ = sync_processor.Pass(); 89 sync_error_handler_ = error_handler.Pass(); 90 91 // TODO(mgist): Add implementation 92 93 return merge_result; 94 } 95 96 void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) { 97 DCHECK(expected_loop_ == base::MessageLoop::current()); 98 DCHECK_EQ(type, syncer::TYPED_URLS); 99 100 sync_processor_.reset(); 101 sync_error_handler_.reset(); 102 } 103 104 syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData( 105 syncer::ModelType type) const { 106 DCHECK(expected_loop_ == base::MessageLoop::current()); 107 syncer::SyncDataList list; 108 109 // TODO(mgist): Add implementation 110 111 return list; 112 } 113 114 syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges( 115 const tracked_objects::Location& from_here, 116 const syncer::SyncChangeList& change_list) { 117 DCHECK(expected_loop_ == base::MessageLoop::current()); 118 119 // TODO(mgist): Add implementation 120 121 return syncer::SyncError(FROM_HERE, 122 syncer::SyncError::DATATYPE_ERROR, 123 "Typed url syncable service is not implemented.", 124 syncer::TYPED_URLS); 125 } 126 127 void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) { 128 DCHECK(expected_loop_ == base::MessageLoop::current()); 129 DCHECK(changed_urls); 130 131 if (processing_syncer_changes_) 132 return; // These are changes originating from us, ignore. 133 if (!sync_processor_.get()) 134 return; // Sync processor not yet initialized, don't sync. 135 136 // Create SyncChangeList. 137 syncer::SyncChangeList changes; 138 139 for (URLRows::iterator url = changed_urls->begin(); 140 url != changed_urls->end(); ++url) { 141 // Only care if the modified URL is typed. 142 if (url->typed_count() > 0) { 143 // If there were any errors updating the sync node, just ignore them and 144 // continue on to process the next URL. 145 CreateOrUpdateSyncNode(*url, &changes); 146 } 147 } 148 149 // Send SyncChangeList to server if there are any changes. 150 if (changes.size() > 0) 151 sync_processor_->ProcessSyncChanges(FROM_HERE, changes); 152 } 153 154 void TypedUrlSyncableService::OnUrlVisited(content::PageTransition transition, 155 URLRow* row) { 156 DCHECK(expected_loop_ == base::MessageLoop::current()); 157 DCHECK(row); 158 159 if (processing_syncer_changes_) 160 return; // These are changes originating from us, ignore. 161 if (!sync_processor_.get()) 162 return; // Sync processor not yet initialized, don't sync. 163 if (!ShouldSyncVisit(transition, row)) 164 return; 165 166 // Create SyncChangeList. 167 syncer::SyncChangeList changes; 168 169 CreateOrUpdateSyncNode(*row, &changes); 170 171 // Send SyncChangeList to server if there are any changes. 172 if (changes.size() > 0) 173 sync_processor_->ProcessSyncChanges(FROM_HERE, changes); 174 } 175 176 void TypedUrlSyncableService::OnUrlsDeleted(bool all_history, 177 bool expired, 178 URLRows* rows) { 179 DCHECK(expected_loop_ == base::MessageLoop::current()); 180 181 if (processing_syncer_changes_) 182 return; // These are changes originating from us, ignore. 183 if (!sync_processor_.get()) 184 return; // Sync processor not yet initialized, don't sync. 185 186 // Ignore URLs expired due to old age (we don't want to sync them as deletions 187 // to avoid extra traffic up to the server, and also to make sure that a 188 // client with a bad clock setting won't go on an expiration rampage and 189 // delete all history from every client). The server will gracefully age out 190 // the sync DB entries when they've been idle for long enough. 191 if (expired) 192 return; 193 194 // Create SyncChangeList. 195 syncer::SyncChangeList changes; 196 197 if (all_history) { 198 // Delete all synced typed urls. 199 for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin(); 200 url != synced_typed_urls_.end(); ++url) { 201 VisitVector visits; 202 URLRow row(*url); 203 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE, 204 row, visits, url->spec(), &changes); 205 } 206 // Clear cache of server state. 207 synced_typed_urls_.clear(); 208 } else { 209 DCHECK(rows); 210 // Delete rows. 211 for (URLRows::const_iterator row = rows->begin(); 212 row != rows->end(); ++row) { 213 // Add specifics to change list for all synced urls that were deleted. 214 if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) { 215 VisitVector visits; 216 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE, 217 *row, visits, row->url().spec(), &changes); 218 // Delete typed url from cache. 219 synced_typed_urls_.erase(row->url()); 220 } 221 } 222 } 223 224 // Send SyncChangeList to server if there are any changes. 225 if (changes.size() > 0) 226 sync_processor_->ProcessSyncChanges(FROM_HERE, changes); 227 } 228 229 bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) { 230 // Ignore empty URLs. Not sure how this can happen (maybe import from other 231 // busted browsers, or misuse of the history API, or just plain bugs) but we 232 // can't deal with them. 233 if (url.spec().empty()) 234 return true; 235 236 // Ignore local file URLs. 237 if (url.SchemeIsFile()) 238 return true; 239 240 // Ignore localhost URLs. 241 if (net::IsLocalhost(url.host())) 242 return true; 243 244 return false; 245 } 246 247 bool TypedUrlSyncableService::ShouldSyncVisit( 248 content::PageTransition page_transition, 249 URLRow* row) { 250 if (!row) 251 return false; 252 int typed_count = row->typed_count(); 253 content::PageTransition transition = static_cast<content::PageTransition>( 254 page_transition & content::PAGE_TRANSITION_CORE_MASK); 255 256 // Just use an ad-hoc criteria to determine whether to ignore this 257 // notification. For most users, the distribution of visits is roughly a bell 258 // curve with a long tail - there are lots of URLs with < 5 visits so we want 259 // to make sure we sync up every visit to ensure the proper ordering of 260 // suggestions. But there are relatively few URLs with > 10 visits, and those 261 // tend to be more broadly distributed such that there's no need to sync up 262 // every visit to preserve their relative ordering. 263 return (transition == content::PAGE_TRANSITION_TYPED && 264 typed_count > 0 && 265 (typed_count < kTypedUrlVisitThrottleThreshold || 266 (typed_count % kTypedUrlVisitThrottleMultiple) == 0)); 267 } 268 269 bool TypedUrlSyncableService::CreateOrUpdateSyncNode( 270 URLRow url, 271 syncer::SyncChangeList* changes) { 272 DCHECK_GT(url.typed_count(), 0); 273 274 if (ShouldIgnoreUrl(url.url())) 275 return true; 276 277 // Get the visits for this node. 278 VisitVector visit_vector; 279 if (!FixupURLAndGetVisits(&url, &visit_vector)) { 280 DLOG(ERROR) << "Could not load visits for url: " << url.url(); 281 return false; 282 } 283 DCHECK(!visit_vector.empty()); 284 285 std::string title = url.url().spec(); 286 syncer::SyncChange::SyncChangeType change_type; 287 288 // If server already has URL, then send a sync update, else add it. 289 change_type = 290 (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ? 291 syncer::SyncChange::ACTION_UPDATE : 292 syncer::SyncChange::ACTION_ADD; 293 294 // Ensure cache of server state is up to date. 295 synced_typed_urls_.insert(url.url()); 296 297 AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes); 298 299 return true; 300 } 301 302 void TypedUrlSyncableService::AddTypedUrlToChangeList( 303 syncer::SyncChange::SyncChangeType change_type, 304 const URLRow& row, 305 const VisitVector& visits, 306 std::string title, 307 syncer::SyncChangeList* change_list) { 308 sync_pb::EntitySpecifics entity_specifics; 309 sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url(); 310 311 if (change_type == syncer::SyncChange::ACTION_DELETE) { 312 typed_url->set_url(row.url().spec()); 313 } else { 314 WriteToTypedUrlSpecifics(row, visits, typed_url); 315 } 316 317 change_list->push_back( 318 syncer::SyncChange(FROM_HERE, change_type, 319 syncer::SyncData::CreateLocalData( 320 kTypedUrlTag, title, entity_specifics))); 321 } 322 323 void TypedUrlSyncableService::WriteToTypedUrlSpecifics( 324 const URLRow& url, 325 const VisitVector& visits, 326 sync_pb::TypedUrlSpecifics* typed_url) { 327 328 DCHECK(!url.last_visit().is_null()); 329 DCHECK(!visits.empty()); 330 DCHECK_EQ(url.last_visit().ToInternalValue(), 331 visits.back().visit_time.ToInternalValue()); 332 333 typed_url->set_url(url.url().spec()); 334 typed_url->set_title(base::UTF16ToUTF8(url.title())); 335 typed_url->set_hidden(url.hidden()); 336 337 DCHECK(CheckVisitOrdering(visits)); 338 339 bool only_typed = false; 340 int skip_count = 0; 341 342 if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) { 343 int typed_count = 0; 344 int total = 0; 345 // Walk the passed-in visit vector and count the # of typed visits. 346 for (VisitVector::const_iterator visit = visits.begin(); 347 visit != visits.end(); ++visit) { 348 content::PageTransition transition = content::PageTransitionFromInt( 349 visit->transition & content::PAGE_TRANSITION_CORE_MASK); 350 // We ignore reload visits. 351 if (transition == content::PAGE_TRANSITION_RELOAD) 352 continue; 353 ++total; 354 if (transition == content::PAGE_TRANSITION_TYPED) 355 ++typed_count; 356 } 357 // We should have at least one typed visit. This can sometimes happen if 358 // the history DB has an inaccurate count for some reason (there's been 359 // bugs in the history code in the past which has left users in the wild 360 // with incorrect counts - http://crbug.com/84258). 361 DCHECK(typed_count > 0); 362 363 if (typed_count > kMaxTypedUrlVisits) { 364 only_typed = true; 365 skip_count = typed_count - kMaxTypedUrlVisits; 366 } else if (total > kMaxTypedUrlVisits) { 367 skip_count = total - kMaxTypedUrlVisits; 368 } 369 } 370 371 for (VisitVector::const_iterator visit = visits.begin(); 372 visit != visits.end(); ++visit) { 373 content::PageTransition transition = content::PageTransitionFromInt( 374 visit->transition & content::PAGE_TRANSITION_CORE_MASK); 375 // Skip reload visits. 376 if (transition == content::PAGE_TRANSITION_RELOAD) 377 continue; 378 379 // If we only have room for typed visits, then only add typed visits. 380 if (only_typed && transition != content::PAGE_TRANSITION_TYPED) 381 continue; 382 383 if (skip_count > 0) { 384 // We have too many entries to fit, so we need to skip the oldest ones. 385 // Only skip typed URLs if there are too many typed URLs to fit. 386 if (only_typed || transition != content::PAGE_TRANSITION_TYPED) { 387 --skip_count; 388 continue; 389 } 390 } 391 typed_url->add_visits(visit->visit_time.ToInternalValue()); 392 typed_url->add_visit_transitions(visit->transition); 393 } 394 DCHECK_EQ(skip_count, 0); 395 396 if (typed_url->visits_size() == 0) { 397 // If we get here, it's because we don't actually have any TYPED visits 398 // even though the visit's typed_count > 0 (corrupted typed_count). So 399 // let's go ahead and add a RELOAD visit at the most recent visit since 400 // it's not legal to have an empty visit array (yet another workaround 401 // for http://crbug.com/84258). 402 typed_url->add_visits(url.last_visit().ToInternalValue()); 403 typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD); 404 } 405 CHECK_GT(typed_url->visits_size(), 0); 406 CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits); 407 CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size()); 408 } 409 410 bool TypedUrlSyncableService::FixupURLAndGetVisits( 411 URLRow* url, 412 VisitVector* visits) { 413 ++num_db_accesses_; 414 CHECK(history_backend_); 415 if (!history_backend_->GetMostRecentVisitsForURL( 416 url->id(), kMaxVisitsToFetch, visits)) { 417 ++num_db_errors_; 418 return false; 419 } 420 421 // Sometimes (due to a bug elsewhere in the history or sync code, or due to 422 // a crash between adding a URL to the history database and updating the 423 // visit DB) the visit vector for a URL can be empty. If this happens, just 424 // create a new visit whose timestamp is the same as the last_visit time. 425 // This is a workaround for http://crbug.com/84258. 426 if (visits->empty()) { 427 DVLOG(1) << "Found empty visits for URL: " << url->url(); 428 VisitRow visit( 429 url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0); 430 visits->push_back(visit); 431 } 432 433 // GetMostRecentVisitsForURL() returns the data in the opposite order that 434 // we need it, so reverse it. 435 std::reverse(visits->begin(), visits->end()); 436 437 // Sometimes, the last_visit field in the URL doesn't match the timestamp of 438 // the last visit in our visit array (they come from different tables, so 439 // crashes/bugs can cause them to mismatch), so just set it here. 440 url->set_last_visit(visits->back().visit_time); 441 DCHECK(CheckVisitOrdering(*visits)); 442 return true; 443 } 444 445 } // namespace history 446