1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/bookmarks/bookmark_index.h" 6 7 #include <algorithm> 8 #include <iterator> 9 #include <list> 10 11 #include "base/string16.h" 12 #include "chrome/browser/bookmarks/bookmark_model.h" 13 #include "chrome/browser/bookmarks/bookmark_utils.h" 14 #include "chrome/browser/history/history_database.h" 15 #include "chrome/browser/history/query_parser.h" 16 #include "chrome/browser/profiles/profile.h" 17 #include "ui/base/l10n/l10n_util.h" 18 19 // Used when finding the set of bookmarks that match a query. Each match 20 // represents a set of terms (as an interator into the Index) matching the 21 // query as well as the set of nodes that contain those terms in their titles. 22 struct BookmarkIndex::Match { 23 // List of terms matching the query. 24 std::list<Index::const_iterator> terms; 25 26 // The set of nodes matching the terms. As an optimization this is empty 27 // when we match only one term, and is filled in when we get more than one 28 // term. We can do this as when we have only one matching term we know 29 // the set of matching nodes is terms.front()->second. 30 // 31 // Use nodes_begin() and nodes_end() to get an iterator over the set as 32 // it handles the necessary switching between nodes and terms.front(). 33 NodeSet nodes; 34 35 // Returns an iterator to the beginning of the matching nodes. See 36 // description of nodes for why this should be used over nodes.begin(). 37 NodeSet::const_iterator nodes_begin() const; 38 39 // Returns an iterator to the beginning of the matching nodes. See 40 // description of nodes for why this should be used over nodes.end(). 41 NodeSet::const_iterator nodes_end() const; 42 }; 43 44 BookmarkIndex::NodeSet::const_iterator 45 BookmarkIndex::Match::nodes_begin() const { 46 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); 47 } 48 49 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { 50 return nodes.empty() ? terms.front()->second.end() : nodes.end(); 51 } 52 53 BookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) { 54 } 55 56 BookmarkIndex::~BookmarkIndex() { 57 } 58 59 void BookmarkIndex::Add(const BookmarkNode* node) { 60 if (!node->is_url()) 61 return; 62 std::vector<string16> terms = ExtractQueryWords(node->GetTitle()); 63 for (size_t i = 0; i < terms.size(); ++i) 64 RegisterNode(terms[i], node); 65 } 66 67 void BookmarkIndex::Remove(const BookmarkNode* node) { 68 if (!node->is_url()) 69 return; 70 71 std::vector<string16> terms = ExtractQueryWords(node->GetTitle()); 72 for (size_t i = 0; i < terms.size(); ++i) 73 UnregisterNode(terms[i], node); 74 } 75 76 void BookmarkIndex::GetBookmarksWithTitlesMatching( 77 const string16& query, 78 size_t max_count, 79 std::vector<bookmark_utils::TitleMatch>* results) { 80 std::vector<string16> terms = ExtractQueryWords(query); 81 if (terms.empty()) 82 return; 83 84 Matches matches; 85 for (size_t i = 0; i < terms.size(); ++i) { 86 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) 87 return; 88 } 89 90 NodeTypedCountPairs node_typed_counts; 91 SortMatches(matches, &node_typed_counts); 92 93 // We use a QueryParser to fill in match positions for us. It's not the most 94 // efficient way to go about this, but by the time we get here we know what 95 // matches and so this shouldn't be performance critical. 96 QueryParser parser; 97 ScopedVector<QueryNode> query_nodes; 98 parser.ParseQuery(query, &query_nodes.get()); 99 100 // The highest typed counts should be at the beginning of the results vector 101 // so that the best matches will always be included in the results. The loop 102 // that calculates result relevance in HistoryContentsProvider::ConvertResults 103 // will run backwards to assure higher relevance will be attributed to the 104 // best matches. 105 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); 106 i != node_typed_counts.end() && results->size() < max_count; ++i) 107 AddMatchToResults(i->first, &parser, query_nodes.get(), results); 108 } 109 110 void BookmarkIndex::SortMatches(const Matches& matches, 111 NodeTypedCountPairs* node_typed_counts) const { 112 HistoryService* const history_service = profile_ ? 113 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL; 114 115 history::URLDatabase* url_db = history_service ? 116 history_service->InMemoryDatabase() : NULL; 117 118 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) 119 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); 120 121 std::sort(node_typed_counts->begin(), node_typed_counts->end(), 122 &NodeTypedCountPairSortFunc); 123 } 124 125 void BookmarkIndex::ExtractBookmarkNodePairs( 126 history::URLDatabase* url_db, 127 const Match& match, 128 NodeTypedCountPairs* node_typed_counts) const { 129 130 for (NodeSet::const_iterator i = match.nodes_begin(); 131 i != match.nodes_end(); ++i) { 132 history::URLRow url; 133 if (url_db) 134 url_db->GetRowForURL((*i)->GetURL(), &url); 135 NodeTypedCountPair pair(*i, url.typed_count()); 136 node_typed_counts->push_back(pair); 137 } 138 } 139 140 void BookmarkIndex::AddMatchToResults( 141 const BookmarkNode* node, 142 QueryParser* parser, 143 const std::vector<QueryNode*>& query_nodes, 144 std::vector<bookmark_utils::TitleMatch>* results) { 145 bookmark_utils::TitleMatch title_match; 146 // Check that the result matches the query. The previous search 147 // was a simple per-word search, while the more complex matching 148 // of QueryParser may filter it out. For example, the query 149 // ["thi"] will match the bookmark titled [Thinking], but since 150 // ["thi"] is quoted we don't want to do a prefix match. 151 if (parser->DoesQueryMatch(node->GetTitle(), query_nodes, 152 &(title_match.match_positions))) { 153 title_match.node = node; 154 results->push_back(title_match); 155 } 156 } 157 158 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term, 159 bool first_term, 160 Matches* matches) { 161 Index::const_iterator i = index_.lower_bound(term); 162 if (i == index_.end()) 163 return false; 164 165 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { 166 // Term is too short for prefix match, compare using exact match. 167 if (i->first != term) 168 return false; // No bookmarks with this term. 169 170 if (first_term) { 171 Match match; 172 match.terms.push_back(i); 173 matches->push_back(match); 174 return true; 175 } 176 CombineMatchesInPlace(i, matches); 177 } else if (first_term) { 178 // This is the first term and we're doing a prefix match. Loop through 179 // index adding all entries that start with term to matches. 180 while (i != index_.end() && 181 i->first.size() >= term.size() && 182 term.compare(0, term.size(), i->first, 0, term.size()) == 0) { 183 Match match; 184 match.terms.push_back(i); 185 matches->push_back(match); 186 ++i; 187 } 188 } else { 189 // Prefix match and not the first term. Loop through index combining 190 // current matches in matches with term, placing result in result. 191 Matches result; 192 while (i != index_.end() && 193 i->first.size() >= term.size() && 194 term.compare(0, term.size(), i->first, 0, term.size()) == 0) { 195 CombineMatches(i, *matches, &result); 196 ++i; 197 } 198 matches->swap(result); 199 } 200 return !matches->empty(); 201 } 202 203 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i, 204 Matches* matches) { 205 for (size_t i = 0; i < matches->size(); ) { 206 Match* match = &((*matches)[i]); 207 NodeSet intersection; 208 std::set_intersection(match->nodes_begin(), match->nodes_end(), 209 index_i->second.begin(), index_i->second.end(), 210 std::inserter(intersection, intersection.begin())); 211 if (intersection.empty()) { 212 matches->erase(matches->begin() + i); 213 } else { 214 match->terms.push_back(index_i); 215 match->nodes.swap(intersection); 216 ++i; 217 } 218 } 219 } 220 221 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i, 222 const Matches& current_matches, 223 Matches* result) { 224 for (size_t i = 0; i < current_matches.size(); ++i) { 225 const Match& match = current_matches[i]; 226 NodeSet intersection; 227 std::set_intersection(match.nodes_begin(), match.nodes_end(), 228 index_i->second.begin(), index_i->second.end(), 229 std::inserter(intersection, intersection.begin())); 230 if (!intersection.empty()) { 231 result->push_back(Match()); 232 Match& combined_match = result->back(); 233 combined_match.terms = match.terms; 234 combined_match.terms.push_back(index_i); 235 combined_match.nodes.swap(intersection); 236 } 237 } 238 } 239 240 std::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) { 241 std::vector<string16> terms; 242 if (query.empty()) 243 return std::vector<string16>(); 244 QueryParser parser; 245 // TODO: use ICU normalization. 246 parser.ExtractQueryWords(l10n_util::ToLower(query), &terms); 247 return terms; 248 } 249 250 void BookmarkIndex::RegisterNode(const string16& term, 251 const BookmarkNode* node) { 252 if (std::find(index_[term].begin(), index_[term].end(), node) != 253 index_[term].end()) { 254 // We've already added node for term. 255 return; 256 } 257 index_[term].insert(node); 258 } 259 260 void BookmarkIndex::UnregisterNode(const string16& term, 261 const BookmarkNode* node) { 262 Index::iterator i = index_.find(term); 263 if (i == index_.end()) { 264 // We can get here if the node has the same term more than once. For 265 // example, a bookmark with the title 'foo foo' would end up here. 266 return; 267 } 268 i->second.erase(node); 269 if (i->second.empty()) 270 index_.erase(i); 271 } 272