Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/typed_url_syncable_service.h"
      6 
      7 #include "base/auto_reset.h"
      8 #include "base/logging.h"
      9 #include "base/metrics/histogram.h"
     10 #include "base/strings/utf_string_conversions.h"
     11 #include "chrome/browser/history/history_backend.h"
     12 #include "net/base/net_util.h"
     13 #include "sync/protocol/sync.pb.h"
     14 #include "sync/protocol/typed_url_specifics.pb.h"
     15 
     16 namespace {
     17 
     18 // The server backend can't handle arbitrarily large node sizes, so to keep
     19 // the size under control we limit the visit array.
     20 static const int kMaxTypedUrlVisits = 100;
     21 
     22 // There's no limit on how many visits the history DB could have for a given
     23 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
     24 // running out of memory (http://crbug.com/89793). This value is different
     25 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
     26 // RELOAD visits, which will be stripped.
     27 static const int kMaxVisitsToFetch = 1000;
     28 
     29 // This is the threshold at which we start throttling sync updates for typed
     30 // URLs - any URLs with a typed_count >= this threshold will be throttled.
     31 static const int kTypedUrlVisitThrottleThreshold = 10;
     32 
     33 // This is the multiple we use when throttling sync updates. If the multiple is
     34 // N, we sync up every Nth update (i.e. when typed_count % N == 0).
     35 static const int kTypedUrlVisitThrottleMultiple = 10;
     36 
     37 }  // namespace
     38 
     39 namespace history {
     40 
     41 const char kTypedUrlTag[] = "google_chrome_typed_urls";
     42 
     43 static bool CheckVisitOrdering(const VisitVector& visits) {
     44   int64 previous_visit_time = 0;
     45   for (VisitVector::const_iterator visit = visits.begin();
     46        visit != visits.end(); ++visit) {
     47     if (visit != visits.begin()) {
     48       // We allow duplicate visits here - they shouldn't really be allowed, but
     49       // they still seem to show up sometimes and we haven't figured out the
     50       // source, so we just log an error instead of failing an assertion.
     51       // (http://crbug.com/91473).
     52       if (previous_visit_time == visit->visit_time.ToInternalValue())
     53         DVLOG(1) << "Duplicate visit time encountered";
     54       else if (previous_visit_time > visit->visit_time.ToInternalValue())
     55         return false;
     56     }
     57 
     58     previous_visit_time = visit->visit_time.ToInternalValue();
     59   }
     60   return true;
     61 }
     62 
     63 TypedUrlSyncableService::TypedUrlSyncableService(
     64     HistoryBackend* history_backend)
     65     : history_backend_(history_backend),
     66       processing_syncer_changes_(false),
     67       expected_loop_(base::MessageLoop::current()) {
     68   DCHECK(history_backend_);
     69   DCHECK(expected_loop_ == base::MessageLoop::current());
     70 }
     71 
     72 TypedUrlSyncableService::~TypedUrlSyncableService() {
     73   DCHECK(expected_loop_ == base::MessageLoop::current());
     74 }
     75 
     76 syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing(
     77     syncer::ModelType type,
     78     const syncer::SyncDataList& initial_sync_data,
     79     scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
     80     scoped_ptr<syncer::SyncErrorFactory> error_handler) {
     81   DCHECK(expected_loop_ == base::MessageLoop::current());
     82   DCHECK(!sync_processor_.get());
     83   DCHECK(sync_processor.get());
     84   DCHECK(error_handler.get());
     85   DCHECK_EQ(type, syncer::TYPED_URLS);
     86 
     87   syncer::SyncMergeResult merge_result(type);
     88   sync_processor_ = sync_processor.Pass();
     89   sync_error_handler_ = error_handler.Pass();
     90 
     91   // TODO(mgist): Add implementation
     92 
     93   return merge_result;
     94 }
     95 
     96 void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) {
     97   DCHECK(expected_loop_ == base::MessageLoop::current());
     98   DCHECK_EQ(type, syncer::TYPED_URLS);
     99 
    100   sync_processor_.reset();
    101   sync_error_handler_.reset();
    102 }
    103 
    104 syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData(
    105     syncer::ModelType type) const {
    106   DCHECK(expected_loop_ == base::MessageLoop::current());
    107   syncer::SyncDataList list;
    108 
    109   // TODO(mgist): Add implementation
    110 
    111   return list;
    112 }
    113 
    114 syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges(
    115     const tracked_objects::Location& from_here,
    116     const syncer::SyncChangeList& change_list) {
    117   DCHECK(expected_loop_ == base::MessageLoop::current());
    118 
    119   // TODO(mgist): Add implementation
    120 
    121   return syncer::SyncError(FROM_HERE,
    122                            syncer::SyncError::DATATYPE_ERROR,
    123                            "Typed url syncable service is not implemented.",
    124                            syncer::TYPED_URLS);
    125 }
    126 
    127 void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) {
    128   DCHECK(expected_loop_ == base::MessageLoop::current());
    129   DCHECK(changed_urls);
    130 
    131   if (processing_syncer_changes_)
    132     return;  // These are changes originating from us, ignore.
    133   if (!sync_processor_.get())
    134     return;  // Sync processor not yet initialized, don't sync.
    135 
    136   // Create SyncChangeList.
    137   syncer::SyncChangeList changes;
    138 
    139   for (URLRows::iterator url = changed_urls->begin();
    140        url != changed_urls->end(); ++url) {
    141     // Only care if the modified URL is typed.
    142     if (url->typed_count() > 0) {
    143       // If there were any errors updating the sync node, just ignore them and
    144       // continue on to process the next URL.
    145       CreateOrUpdateSyncNode(*url, &changes);
    146     }
    147   }
    148 
    149   // Send SyncChangeList to server if there are any changes.
    150   if (changes.size() > 0)
    151     sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
    152 }
    153 
    154 void TypedUrlSyncableService::OnUrlVisited(ui::PageTransition transition,
    155                                            URLRow* row) {
    156   DCHECK(expected_loop_ == base::MessageLoop::current());
    157   DCHECK(row);
    158 
    159   if (processing_syncer_changes_)
    160     return;  // These are changes originating from us, ignore.
    161   if (!sync_processor_.get())
    162     return;  // Sync processor not yet initialized, don't sync.
    163   if (!ShouldSyncVisit(transition, row))
    164     return;
    165 
    166   // Create SyncChangeList.
    167   syncer::SyncChangeList changes;
    168 
    169   CreateOrUpdateSyncNode(*row, &changes);
    170 
    171   // Send SyncChangeList to server if there are any changes.
    172   if (changes.size() > 0)
    173     sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
    174 }
    175 
    176 void TypedUrlSyncableService::OnUrlsDeleted(bool all_history,
    177                                             bool expired,
    178                                             URLRows* rows) {
    179   DCHECK(expected_loop_ == base::MessageLoop::current());
    180 
    181   if (processing_syncer_changes_)
    182     return;  // These are changes originating from us, ignore.
    183   if (!sync_processor_.get())
    184     return;  // Sync processor not yet initialized, don't sync.
    185 
    186   // Ignore URLs expired due to old age (we don't want to sync them as deletions
    187   // to avoid extra traffic up to the server, and also to make sure that a
    188   // client with a bad clock setting won't go on an expiration rampage and
    189   // delete all history from every client). The server will gracefully age out
    190   // the sync DB entries when they've been idle for long enough.
    191   if (expired)
    192     return;
    193 
    194   // Create SyncChangeList.
    195   syncer::SyncChangeList changes;
    196 
    197   if (all_history) {
    198     // Delete all synced typed urls.
    199     for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin();
    200          url != synced_typed_urls_.end(); ++url) {
    201       VisitVector visits;
    202       URLRow row(*url);
    203       AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
    204                               row, visits, url->spec(), &changes);
    205     }
    206     // Clear cache of server state.
    207     synced_typed_urls_.clear();
    208   } else {
    209     DCHECK(rows);
    210     // Delete rows.
    211     for (URLRows::const_iterator row = rows->begin();
    212          row != rows->end(); ++row) {
    213       // Add specifics to change list for all synced urls that were deleted.
    214       if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) {
    215         VisitVector visits;
    216         AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
    217                                 *row, visits, row->url().spec(), &changes);
    218         // Delete typed url from cache.
    219         synced_typed_urls_.erase(row->url());
    220       }
    221     }
    222   }
    223 
    224   // Send SyncChangeList to server if there are any changes.
    225   if (changes.size() > 0)
    226     sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
    227 }
    228 
    229 bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) {
    230   // Ignore empty URLs. Not sure how this can happen (maybe import from other
    231   // busted browsers, or misuse of the history API, or just plain bugs) but we
    232   // can't deal with them.
    233   if (url.spec().empty())
    234     return true;
    235 
    236   // Ignore local file URLs.
    237   if (url.SchemeIsFile())
    238     return true;
    239 
    240   // Ignore localhost URLs.
    241   if (net::IsLocalhost(url.host()))
    242     return true;
    243 
    244   return false;
    245 }
    246 
    247 bool TypedUrlSyncableService::ShouldSyncVisit(
    248     ui::PageTransition page_transition,
    249     URLRow* row) {
    250   if (!row)
    251     return false;
    252   int typed_count = row->typed_count();
    253   ui::PageTransition transition = ui::PageTransitionFromInt(
    254       page_transition & ui::PAGE_TRANSITION_CORE_MASK);
    255 
    256   // Just use an ad-hoc criteria to determine whether to ignore this
    257   // notification. For most users, the distribution of visits is roughly a bell
    258   // curve with a long tail - there are lots of URLs with < 5 visits so we want
    259   // to make sure we sync up every visit to ensure the proper ordering of
    260   // suggestions. But there are relatively few URLs with > 10 visits, and those
    261   // tend to be more broadly distributed such that there's no need to sync up
    262   // every visit to preserve their relative ordering.
    263   return (transition == ui::PAGE_TRANSITION_TYPED &&
    264           typed_count > 0 &&
    265           (typed_count < kTypedUrlVisitThrottleThreshold ||
    266            (typed_count % kTypedUrlVisitThrottleMultiple) == 0));
    267 }
    268 
    269 bool TypedUrlSyncableService::CreateOrUpdateSyncNode(
    270     URLRow url,
    271     syncer::SyncChangeList* changes) {
    272   DCHECK_GT(url.typed_count(), 0);
    273 
    274   if (ShouldIgnoreUrl(url.url()))
    275     return true;
    276 
    277   // Get the visits for this node.
    278   VisitVector visit_vector;
    279   if (!FixupURLAndGetVisits(&url, &visit_vector)) {
    280     DLOG(ERROR) << "Could not load visits for url: " << url.url();
    281     return false;
    282   }
    283   DCHECK(!visit_vector.empty());
    284 
    285   std::string title = url.url().spec();
    286   syncer::SyncChange::SyncChangeType change_type;
    287 
    288   // If server already has URL, then send a sync update, else add it.
    289   change_type =
    290       (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ?
    291       syncer::SyncChange::ACTION_UPDATE :
    292       syncer::SyncChange::ACTION_ADD;
    293 
    294   // Ensure cache of server state is up to date.
    295   synced_typed_urls_.insert(url.url());
    296 
    297   AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes);
    298 
    299   return true;
    300 }
    301 
    302 void TypedUrlSyncableService::AddTypedUrlToChangeList(
    303     syncer::SyncChange::SyncChangeType change_type,
    304     const URLRow& row,
    305     const VisitVector& visits,
    306     std::string title,
    307     syncer::SyncChangeList* change_list) {
    308   sync_pb::EntitySpecifics entity_specifics;
    309   sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url();
    310 
    311   if (change_type == syncer::SyncChange::ACTION_DELETE) {
    312     typed_url->set_url(row.url().spec());
    313   } else {
    314     WriteToTypedUrlSpecifics(row, visits, typed_url);
    315   }
    316 
    317   change_list->push_back(
    318       syncer::SyncChange(FROM_HERE, change_type,
    319                          syncer::SyncData::CreateLocalData(
    320                              kTypedUrlTag, title, entity_specifics)));
    321 }
    322 
    323 void TypedUrlSyncableService::WriteToTypedUrlSpecifics(
    324     const URLRow& url,
    325     const VisitVector& visits,
    326     sync_pb::TypedUrlSpecifics* typed_url) {
    327 
    328   DCHECK(!url.last_visit().is_null());
    329   DCHECK(!visits.empty());
    330   DCHECK_EQ(url.last_visit().ToInternalValue(),
    331             visits.back().visit_time.ToInternalValue());
    332 
    333   typed_url->set_url(url.url().spec());
    334   typed_url->set_title(base::UTF16ToUTF8(url.title()));
    335   typed_url->set_hidden(url.hidden());
    336 
    337   DCHECK(CheckVisitOrdering(visits));
    338 
    339   bool only_typed = false;
    340   int skip_count = 0;
    341 
    342   if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
    343     int typed_count = 0;
    344     int total = 0;
    345     // Walk the passed-in visit vector and count the # of typed visits.
    346     for (VisitVector::const_iterator visit = visits.begin();
    347          visit != visits.end(); ++visit) {
    348       ui::PageTransition transition = ui::PageTransitionFromInt(
    349           visit->transition & ui::PAGE_TRANSITION_CORE_MASK);
    350       // We ignore reload visits.
    351       if (transition == ui::PAGE_TRANSITION_RELOAD)
    352         continue;
    353       ++total;
    354       if (transition == ui::PAGE_TRANSITION_TYPED)
    355         ++typed_count;
    356     }
    357     // We should have at least one typed visit. This can sometimes happen if
    358     // the history DB has an inaccurate count for some reason (there's been
    359     // bugs in the history code in the past which has left users in the wild
    360     // with incorrect counts - http://crbug.com/84258).
    361     DCHECK(typed_count > 0);
    362 
    363     if (typed_count > kMaxTypedUrlVisits) {
    364       only_typed = true;
    365       skip_count = typed_count - kMaxTypedUrlVisits;
    366     } else if (total > kMaxTypedUrlVisits) {
    367       skip_count = total - kMaxTypedUrlVisits;
    368     }
    369   }
    370 
    371   for (VisitVector::const_iterator visit = visits.begin();
    372        visit != visits.end(); ++visit) {
    373     ui::PageTransition transition =
    374         ui::PageTransitionStripQualifier(visit->transition);
    375     // Skip reload visits.
    376     if (transition == ui::PAGE_TRANSITION_RELOAD)
    377       continue;
    378 
    379     // If we only have room for typed visits, then only add typed visits.
    380     if (only_typed && transition != ui::PAGE_TRANSITION_TYPED)
    381       continue;
    382 
    383     if (skip_count > 0) {
    384       // We have too many entries to fit, so we need to skip the oldest ones.
    385       // Only skip typed URLs if there are too many typed URLs to fit.
    386       if (only_typed || transition != ui::PAGE_TRANSITION_TYPED) {
    387         --skip_count;
    388         continue;
    389       }
    390     }
    391     typed_url->add_visits(visit->visit_time.ToInternalValue());
    392     typed_url->add_visit_transitions(visit->transition);
    393   }
    394   DCHECK_EQ(skip_count, 0);
    395 
    396   if (typed_url->visits_size() == 0) {
    397     // If we get here, it's because we don't actually have any TYPED visits
    398     // even though the visit's typed_count > 0 (corrupted typed_count). So
    399     // let's go ahead and add a RELOAD visit at the most recent visit since
    400     // it's not legal to have an empty visit array (yet another workaround
    401     // for http://crbug.com/84258).
    402     typed_url->add_visits(url.last_visit().ToInternalValue());
    403     typed_url->add_visit_transitions(ui::PAGE_TRANSITION_RELOAD);
    404   }
    405   CHECK_GT(typed_url->visits_size(), 0);
    406   CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
    407   CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
    408 }
    409 
    410 bool TypedUrlSyncableService::FixupURLAndGetVisits(
    411     URLRow* url,
    412     VisitVector* visits) {
    413   ++num_db_accesses_;
    414   CHECK(history_backend_);
    415   if (!history_backend_->GetMostRecentVisitsForURL(
    416           url->id(), kMaxVisitsToFetch, visits)) {
    417     ++num_db_errors_;
    418     return false;
    419   }
    420 
    421   // Sometimes (due to a bug elsewhere in the history or sync code, or due to
    422   // a crash between adding a URL to the history database and updating the
    423   // visit DB) the visit vector for a URL can be empty. If this happens, just
    424   // create a new visit whose timestamp is the same as the last_visit time.
    425   // This is a workaround for http://crbug.com/84258.
    426   if (visits->empty()) {
    427     DVLOG(1) << "Found empty visits for URL: " << url->url();
    428     VisitRow visit(
    429         url->id(), url->last_visit(), 0, ui::PAGE_TRANSITION_TYPED, 0);
    430     visits->push_back(visit);
    431   }
    432 
    433   // GetMostRecentVisitsForURL() returns the data in the opposite order that
    434   // we need it, so reverse it.
    435   std::reverse(visits->begin(), visits->end());
    436 
    437   // Sometimes, the last_visit field in the URL doesn't match the timestamp of
    438   // the last visit in our visit array (they come from different tables, so
    439   // crashes/bugs can cause them to mismatch), so just set it here.
    440   url->set_last_visit(visits->back().visit_time);
    441   DCHECK(CheckVisitOrdering(*visits));
    442   return true;
    443 }
    444 
    445 }  // namespace history
    446