Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/logging.h"
     10 #include "base/metrics/histogram.h"
     11 
     12 namespace {
     13 
     14 // Return |true| if the range is sorted by the given comparator.
     15 template <typename CTI, typename LESS>
     16 bool sorted(CTI beg, CTI end, LESS less) {
     17   while ((end - beg) > 2) {
     18     CTI n = beg++;
     19     if (less(*beg, *n))
     20       return false;
     21   }
     22   return true;
     23 }
     24 
     25 // Find items matching between |subs| and |adds|, and remove them.  To minimize
     26 // copies, the inputs are processing in parallel, so |subs| and |adds| should be
     27 // compatibly ordered (either by SBAddPrefixLess or SBAddPrefixHashLess).
     28 //
     29 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add, for the
     30 // tightest compare appropriate (see calls in SBProcessSubs).
     31 template <typename SubsT, typename AddsT,
     32           typename PredAddSubT, typename PredSubAddT>
     33 void KnockoutSubs(SubsT* subs, AddsT* adds,
     34                   PredAddSubT predAddSub, PredSubAddT predSubAdd) {
     35   // Keep a pair of output iterators for writing kept items.  Due to
     36   // deletions, these may lag the main iterators.  Using erase() on
     37   // individual items would result in O(N^2) copies.  Using std::list
     38   // would work around that, at double or triple the memory cost.
     39   typename AddsT::iterator add_out = adds->begin();
     40   typename SubsT::iterator sub_out = subs->begin();
     41 
     42   // Current location in containers.
     43   // TODO(shess): I want these to be const_iterator, but then
     44   // std::copy() gets confused.  Could snag a const_iterator add_end,
     45   // or write an inline std::copy(), but it seems like I'm doing
     46   // something wrong.
     47   typename AddsT::iterator add_iter = adds->begin();
     48   typename SubsT::iterator sub_iter = subs->begin();
     49 
     50   while (add_iter != adds->end() && sub_iter != subs->end()) {
     51     // If |*sub_iter| < |*add_iter|, retain the sub.
     52     if (predSubAdd(*sub_iter, *add_iter)) {
     53       *sub_out = *sub_iter;
     54       ++sub_out;
     55       ++sub_iter;
     56 
     57       // If |*add_iter| < |*sub_iter|, retain the add.
     58     } else if (predAddSub(*add_iter, *sub_iter)) {
     59       *add_out = *add_iter;
     60       ++add_out;
     61       ++add_iter;
     62 
     63       // Drop equal items.
     64     } else {
     65       ++add_iter;
     66       ++sub_iter;
     67     }
     68   }
     69 
     70   // Erase any leftover gap.
     71   adds->erase(add_out, add_iter);
     72   subs->erase(sub_out, sub_iter);
     73 }
     74 
     75 // Remove deleted items (|chunk_id| in |del_set|) from the container.
     76 template <typename ItemsT>
     77 void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) {
     78   DCHECK(items);
     79 
     80   // Move items from |iter| to |end_iter|, skipping items in |del_set|.
     81   typename ItemsT::iterator end_iter = items->begin();
     82   for (typename ItemsT::iterator iter = end_iter;
     83        iter != items->end(); ++iter) {
     84     if (del_set.count(iter->chunk_id) == 0) {
     85       *end_iter = *iter;
     86       ++end_iter;
     87     }
     88   }
     89   items->erase(end_iter, items->end());
     90 }
     91 
     92 // Remove prefixes which are in the same chunk as their fullhash.  This was a
     93 // mistake in earlier implementations.
     94 template <typename HashesT, typename PrefixesT>
     95 size_t KnockoutPrefixVolunteers(const HashesT& full_hashes,
     96                                 PrefixesT* prefixes) {
     97   typename PrefixesT::iterator prefixes_process = prefixes->begin();
     98   typename PrefixesT::iterator prefixes_out = prefixes->begin();
     99   typename HashesT::const_iterator hashes_process = full_hashes.begin();
    100 
    101   size_t skipped_count = 0;
    102 
    103   while (hashes_process != full_hashes.end()) {
    104     // Scan prefixes forward until an item is not less than the current hash.
    105     while (prefixes_process != prefixes->end() &&
    106            SBAddPrefixLess(*prefixes_process, *hashes_process)) {
    107       if (prefixes_process != prefixes_out) {
    108         *prefixes_out = *prefixes_process;
    109       }
    110       prefixes_out++;
    111       prefixes_process++;
    112     }
    113 
    114     // If the current hash is also not less than the prefix, that implies they
    115     // are equal.  Skip the prefix.
    116     if (prefixes_process != prefixes->end() &&
    117         !SBAddPrefixLess(*hashes_process, *prefixes_process)) {
    118       skipped_count++;
    119       prefixes_process++;
    120     }
    121 
    122     hashes_process++;
    123   }
    124 
    125   // If any prefixes were skipped, copy over the tail and erase the excess.
    126   if (prefixes_process != prefixes_out) {
    127     prefixes_out = std::copy(prefixes_process, prefixes->end(), prefixes_out);
    128     prefixes->erase(prefixes_out, prefixes->end());
    129   }
    130 
    131   return skipped_count;
    132 }
    133 
    134 }  // namespace
    135 
    136 void SBProcessSubs(SBAddPrefixes* add_prefixes,
    137                    SBSubPrefixes* sub_prefixes,
    138                    std::vector<SBAddFullHash>* add_full_hashes,
    139                    std::vector<SBSubFullHash>* sub_full_hashes,
    140                    const base::hash_set<int32>& add_chunks_deleted,
    141                    const base::hash_set<int32>& sub_chunks_deleted) {
    142   // It is possible to structure templates and template
    143   // specializations such that the following calls work without having
    144   // to qualify things.  It becomes very arbitrary, though, and less
    145   // clear how things are working.
    146 
    147   // Make sure things are sorted appropriately.
    148   DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(),
    149                 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>));
    150   DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(),
    151                 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>));
    152   DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(),
    153                 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>));
    154   DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
    155                 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
    156 
    157   // Earlier database code added prefixes when it saw fullhashes.  The protocol
    158   // should never send a chunk of mixed prefixes and fullhashes, the following
    159   // removes any such cases which are seen.
    160   // TODO(shess): Remove this code once most databases have been processed.
    161   // Chunk churn should clean up anyone left over.  This only takes a few ms to
    162   // run through my current database, so it doesn't seem worthwhile to do much
    163   // more than that.
    164   size_t skipped = KnockoutPrefixVolunteers(*add_full_hashes, add_prefixes);
    165   skipped += KnockoutPrefixVolunteers(*sub_full_hashes, sub_prefixes);
    166   UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped);
    167 
    168   // Factor out the prefix subs.
    169   KnockoutSubs(sub_prefixes, add_prefixes,
    170                SBAddPrefixLess<SBAddPrefix,SBSubPrefix>,
    171                SBAddPrefixLess<SBSubPrefix,SBAddPrefix>);
    172 
    173   // Factor out the full-hash subs.
    174   KnockoutSubs(sub_full_hashes, add_full_hashes,
    175                SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>,
    176                SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>);
    177 
    178   // Remove items from the deleted chunks.  This is done after other
    179   // processing to allow subs to knock out adds (and be removed) even
    180   // if the add's chunk is deleted.
    181   RemoveDeleted(add_prefixes, add_chunks_deleted);
    182   RemoveDeleted(sub_prefixes, sub_chunks_deleted);
    183   RemoveDeleted(add_full_hashes, add_chunks_deleted);
    184   RemoveDeleted(sub_full_hashes, sub_chunks_deleted);
    185 }
    186