1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 6 7 #include <algorithm> 8 9 #include "base/logging.h" 10 #include "base/metrics/histogram.h" 11 12 namespace { 13 14 // Return |true| if the range is sorted by the given comparator. 15 template <typename CTI, typename LESS> 16 bool sorted(CTI beg, CTI end, LESS less) { 17 while ((end - beg) > 2) { 18 CTI n = beg++; 19 if (less(*beg, *n)) 20 return false; 21 } 22 return true; 23 } 24 25 // Find items matching between |subs| and |adds|, and remove them. To minimize 26 // copies, the inputs are processing in parallel, so |subs| and |adds| should be 27 // compatibly ordered (either by SBAddPrefixLess or SBAddPrefixHashLess). 28 // 29 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add, for the 30 // tightest compare appropriate (see calls in SBProcessSubs). 31 template <typename SubsT, typename AddsT, 32 typename PredAddSubT, typename PredSubAddT> 33 void KnockoutSubs(SubsT* subs, AddsT* adds, 34 PredAddSubT predAddSub, PredSubAddT predSubAdd) { 35 // Keep a pair of output iterators for writing kept items. Due to 36 // deletions, these may lag the main iterators. Using erase() on 37 // individual items would result in O(N^2) copies. Using std::list 38 // would work around that, at double or triple the memory cost. 39 typename AddsT::iterator add_out = adds->begin(); 40 typename SubsT::iterator sub_out = subs->begin(); 41 42 // Current location in containers. 43 // TODO(shess): I want these to be const_iterator, but then 44 // std::copy() gets confused. Could snag a const_iterator add_end, 45 // or write an inline std::copy(), but it seems like I'm doing 46 // something wrong. 47 typename AddsT::iterator add_iter = adds->begin(); 48 typename SubsT::iterator sub_iter = subs->begin(); 49 50 while (add_iter != adds->end() && sub_iter != subs->end()) { 51 // If |*sub_iter| < |*add_iter|, retain the sub. 52 if (predSubAdd(*sub_iter, *add_iter)) { 53 *sub_out = *sub_iter; 54 ++sub_out; 55 ++sub_iter; 56 57 // If |*add_iter| < |*sub_iter|, retain the add. 58 } else if (predAddSub(*add_iter, *sub_iter)) { 59 *add_out = *add_iter; 60 ++add_out; 61 ++add_iter; 62 63 // Drop equal items. 64 } else { 65 ++add_iter; 66 ++sub_iter; 67 } 68 } 69 70 // Erase any leftover gap. 71 adds->erase(add_out, add_iter); 72 subs->erase(sub_out, sub_iter); 73 } 74 75 // Remove deleted items (|chunk_id| in |del_set|) from the container. 76 template <typename ItemsT> 77 void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) { 78 DCHECK(items); 79 80 // Move items from |iter| to |end_iter|, skipping items in |del_set|. 81 typename ItemsT::iterator end_iter = items->begin(); 82 for (typename ItemsT::iterator iter = end_iter; 83 iter != items->end(); ++iter) { 84 if (del_set.count(iter->chunk_id) == 0) { 85 *end_iter = *iter; 86 ++end_iter; 87 } 88 } 89 items->erase(end_iter, items->end()); 90 } 91 92 // Remove prefixes which are in the same chunk as their fullhash. This was a 93 // mistake in earlier implementations. 94 template <typename HashesT, typename PrefixesT> 95 size_t KnockoutPrefixVolunteers(const HashesT& full_hashes, 96 PrefixesT* prefixes) { 97 typename PrefixesT::iterator prefixes_process = prefixes->begin(); 98 typename PrefixesT::iterator prefixes_out = prefixes->begin(); 99 typename HashesT::const_iterator hashes_process = full_hashes.begin(); 100 101 size_t skipped_count = 0; 102 103 while (hashes_process != full_hashes.end()) { 104 // Scan prefixes forward until an item is not less than the current hash. 105 while (prefixes_process != prefixes->end() && 106 SBAddPrefixLess(*prefixes_process, *hashes_process)) { 107 if (prefixes_process != prefixes_out) { 108 *prefixes_out = *prefixes_process; 109 } 110 prefixes_out++; 111 prefixes_process++; 112 } 113 114 // If the current hash is also not less than the prefix, that implies they 115 // are equal. Skip the prefix. 116 if (prefixes_process != prefixes->end() && 117 !SBAddPrefixLess(*hashes_process, *prefixes_process)) { 118 skipped_count++; 119 prefixes_process++; 120 } 121 122 hashes_process++; 123 } 124 125 // If any prefixes were skipped, copy over the tail and erase the excess. 126 if (prefixes_process != prefixes_out) { 127 prefixes_out = std::copy(prefixes_process, prefixes->end(), prefixes_out); 128 prefixes->erase(prefixes_out, prefixes->end()); 129 } 130 131 return skipped_count; 132 } 133 134 } // namespace 135 136 void SBProcessSubs(SBAddPrefixes* add_prefixes, 137 SBSubPrefixes* sub_prefixes, 138 std::vector<SBAddFullHash>* add_full_hashes, 139 std::vector<SBSubFullHash>* sub_full_hashes, 140 const base::hash_set<int32>& add_chunks_deleted, 141 const base::hash_set<int32>& sub_chunks_deleted) { 142 // It is possible to structure templates and template 143 // specializations such that the following calls work without having 144 // to qualify things. It becomes very arbitrary, though, and less 145 // clear how things are working. 146 147 // Make sure things are sorted appropriately. 148 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(), 149 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>)); 150 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(), 151 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>)); 152 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(), 153 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>)); 154 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(), 155 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>)); 156 157 // Earlier database code added prefixes when it saw fullhashes. The protocol 158 // should never send a chunk of mixed prefixes and fullhashes, the following 159 // removes any such cases which are seen. 160 // TODO(shess): Remove this code once most databases have been processed. 161 // Chunk churn should clean up anyone left over. This only takes a few ms to 162 // run through my current database, so it doesn't seem worthwhile to do much 163 // more than that. 164 size_t skipped = KnockoutPrefixVolunteers(*add_full_hashes, add_prefixes); 165 skipped += KnockoutPrefixVolunteers(*sub_full_hashes, sub_prefixes); 166 UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped); 167 168 // Factor out the prefix subs. 169 KnockoutSubs(sub_prefixes, add_prefixes, 170 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, 171 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>); 172 173 // Factor out the full-hash subs. 174 KnockoutSubs(sub_full_hashes, add_full_hashes, 175 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, 176 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>); 177 178 // Remove items from the deleted chunks. This is done after other 179 // processing to allow subs to knock out adds (and be removed) even 180 // if the add's chunk is deleted. 181 RemoveDeleted(add_prefixes, add_chunks_deleted); 182 RemoveDeleted(sub_prefixes, sub_chunks_deleted); 183 RemoveDeleted(add_full_hashes, add_chunks_deleted); 184 RemoveDeleted(sub_full_hashes, sub_chunks_deleted); 185 } 186