Home | History | Annotate | Download | only in bookmarks
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/bookmarks/bookmark_index.h"
      6 
      7 #include <algorithm>
      8 #include <iterator>
      9 #include <list>
     10 
     11 #include "base/string16.h"
     12 #include "chrome/browser/bookmarks/bookmark_model.h"
     13 #include "chrome/browser/bookmarks/bookmark_utils.h"
     14 #include "chrome/browser/history/history_database.h"
     15 #include "chrome/browser/history/query_parser.h"
     16 #include "chrome/browser/profiles/profile.h"
     17 #include "ui/base/l10n/l10n_util.h"
     18 
     19 // Used when finding the set of bookmarks that match a query. Each match
     20 // represents a set of terms (as an interator into the Index) matching the
     21 // query as well as the set of nodes that contain those terms in their titles.
     22 struct BookmarkIndex::Match {
     23   // List of terms matching the query.
     24   std::list<Index::const_iterator> terms;
     25 
     26   // The set of nodes matching the terms. As an optimization this is empty
     27   // when we match only one term, and is filled in when we get more than one
     28   // term. We can do this as when we have only one matching term we know
     29   // the set of matching nodes is terms.front()->second.
     30   //
     31   // Use nodes_begin() and nodes_end() to get an iterator over the set as
     32   // it handles the necessary switching between nodes and terms.front().
     33   NodeSet nodes;
     34 
     35   // Returns an iterator to the beginning of the matching nodes. See
     36   // description of nodes for why this should be used over nodes.begin().
     37   NodeSet::const_iterator nodes_begin() const;
     38 
     39   // Returns an iterator to the beginning of the matching nodes. See
     40   // description of nodes for why this should be used over nodes.end().
     41   NodeSet::const_iterator nodes_end() const;
     42 };
     43 
     44 BookmarkIndex::NodeSet::const_iterator
     45     BookmarkIndex::Match::nodes_begin() const {
     46   return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
     47 }
     48 
     49 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
     50   return nodes.empty() ? terms.front()->second.end() : nodes.end();
     51 }
     52 
     53 BookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) {
     54 }
     55 
     56 BookmarkIndex::~BookmarkIndex() {
     57 }
     58 
     59 void BookmarkIndex::Add(const BookmarkNode* node) {
     60   if (!node->is_url())
     61     return;
     62   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
     63   for (size_t i = 0; i < terms.size(); ++i)
     64     RegisterNode(terms[i], node);
     65 }
     66 
     67 void BookmarkIndex::Remove(const BookmarkNode* node) {
     68   if (!node->is_url())
     69     return;
     70 
     71   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
     72   for (size_t i = 0; i < terms.size(); ++i)
     73     UnregisterNode(terms[i], node);
     74 }
     75 
     76 void BookmarkIndex::GetBookmarksWithTitlesMatching(
     77     const string16& query,
     78     size_t max_count,
     79     std::vector<bookmark_utils::TitleMatch>* results) {
     80   std::vector<string16> terms = ExtractQueryWords(query);
     81   if (terms.empty())
     82     return;
     83 
     84   Matches matches;
     85   for (size_t i = 0; i < terms.size(); ++i) {
     86     if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
     87       return;
     88   }
     89 
     90   NodeTypedCountPairs node_typed_counts;
     91   SortMatches(matches, &node_typed_counts);
     92 
     93   // We use a QueryParser to fill in match positions for us. It's not the most
     94   // efficient way to go about this, but by the time we get here we know what
     95   // matches and so this shouldn't be performance critical.
     96   QueryParser parser;
     97   ScopedVector<QueryNode> query_nodes;
     98   parser.ParseQuery(query, &query_nodes.get());
     99 
    100   // The highest typed counts should be at the beginning of the results vector
    101   // so that the best matches will always be included in the results. The loop
    102   // that calculates result relevance in HistoryContentsProvider::ConvertResults
    103   // will run backwards to assure higher relevance will be attributed to the
    104   // best matches.
    105   for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
    106        i != node_typed_counts.end() && results->size() < max_count; ++i)
    107     AddMatchToResults(i->first, &parser, query_nodes.get(), results);
    108 }
    109 
    110 void BookmarkIndex::SortMatches(const Matches& matches,
    111                                 NodeTypedCountPairs* node_typed_counts) const {
    112   HistoryService* const history_service = profile_ ?
    113       profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL;
    114 
    115   history::URLDatabase* url_db = history_service ?
    116       history_service->InMemoryDatabase() : NULL;
    117 
    118   for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
    119     ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);
    120 
    121   std::sort(node_typed_counts->begin(), node_typed_counts->end(),
    122             &NodeTypedCountPairSortFunc);
    123 }
    124 
    125 void BookmarkIndex::ExtractBookmarkNodePairs(
    126     history::URLDatabase* url_db,
    127     const Match& match,
    128     NodeTypedCountPairs* node_typed_counts) const {
    129 
    130   for (NodeSet::const_iterator i = match.nodes_begin();
    131        i != match.nodes_end(); ++i) {
    132     history::URLRow url;
    133     if (url_db)
    134       url_db->GetRowForURL((*i)->GetURL(), &url);
    135     NodeTypedCountPair pair(*i, url.typed_count());
    136     node_typed_counts->push_back(pair);
    137   }
    138 }
    139 
    140 void BookmarkIndex::AddMatchToResults(
    141     const BookmarkNode* node,
    142     QueryParser* parser,
    143     const std::vector<QueryNode*>& query_nodes,
    144     std::vector<bookmark_utils::TitleMatch>* results) {
    145   bookmark_utils::TitleMatch title_match;
    146   // Check that the result matches the query.  The previous search
    147   // was a simple per-word search, while the more complex matching
    148   // of QueryParser may filter it out.  For example, the query
    149   // ["thi"] will match the bookmark titled [Thinking], but since
    150   // ["thi"] is quoted we don't want to do a prefix match.
    151   if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
    152                              &(title_match.match_positions))) {
    153     title_match.node = node;
    154     results->push_back(title_match);
    155   }
    156 }
    157 
    158 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term,
    159                                                       bool first_term,
    160                                                       Matches* matches) {
    161   Index::const_iterator i = index_.lower_bound(term);
    162   if (i == index_.end())
    163     return false;
    164 
    165   if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
    166     // Term is too short for prefix match, compare using exact match.
    167     if (i->first != term)
    168       return false;  // No bookmarks with this term.
    169 
    170     if (first_term) {
    171       Match match;
    172       match.terms.push_back(i);
    173       matches->push_back(match);
    174       return true;
    175     }
    176     CombineMatchesInPlace(i, matches);
    177   } else if (first_term) {
    178     // This is the first term and we're doing a prefix match. Loop through
    179     // index adding all entries that start with term to matches.
    180     while (i != index_.end() &&
    181            i->first.size() >= term.size() &&
    182            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
    183       Match match;
    184       match.terms.push_back(i);
    185       matches->push_back(match);
    186       ++i;
    187     }
    188   } else {
    189     // Prefix match and not the first term. Loop through index combining
    190     // current matches in matches with term, placing result in result.
    191     Matches result;
    192     while (i != index_.end() &&
    193            i->first.size() >= term.size() &&
    194            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
    195       CombineMatches(i, *matches, &result);
    196       ++i;
    197     }
    198     matches->swap(result);
    199   }
    200   return !matches->empty();
    201 }
    202 
    203 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
    204                                           Matches* matches) {
    205   for (size_t i = 0; i < matches->size(); ) {
    206     Match* match = &((*matches)[i]);
    207     NodeSet intersection;
    208     std::set_intersection(match->nodes_begin(), match->nodes_end(),
    209                           index_i->second.begin(), index_i->second.end(),
    210                           std::inserter(intersection, intersection.begin()));
    211     if (intersection.empty()) {
    212       matches->erase(matches->begin() + i);
    213     } else {
    214       match->terms.push_back(index_i);
    215       match->nodes.swap(intersection);
    216       ++i;
    217     }
    218   }
    219 }
    220 
    221 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
    222                                    const Matches& current_matches,
    223                                    Matches* result) {
    224   for (size_t i = 0; i < current_matches.size(); ++i) {
    225     const Match& match = current_matches[i];
    226     NodeSet intersection;
    227     std::set_intersection(match.nodes_begin(), match.nodes_end(),
    228                           index_i->second.begin(), index_i->second.end(),
    229                           std::inserter(intersection, intersection.begin()));
    230     if (!intersection.empty()) {
    231       result->push_back(Match());
    232       Match& combined_match = result->back();
    233       combined_match.terms = match.terms;
    234       combined_match.terms.push_back(index_i);
    235       combined_match.nodes.swap(intersection);
    236     }
    237   }
    238 }
    239 
    240 std::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) {
    241   std::vector<string16> terms;
    242   if (query.empty())
    243     return std::vector<string16>();
    244   QueryParser parser;
    245   // TODO: use ICU normalization.
    246   parser.ExtractQueryWords(l10n_util::ToLower(query), &terms);
    247   return terms;
    248 }
    249 
    250 void BookmarkIndex::RegisterNode(const string16& term,
    251                                  const BookmarkNode* node) {
    252   if (std::find(index_[term].begin(), index_[term].end(), node) !=
    253       index_[term].end()) {
    254     // We've already added node for term.
    255     return;
    256   }
    257   index_[term].insert(node);
    258 }
    259 
    260 void BookmarkIndex::UnregisterNode(const string16& term,
    261                                    const BookmarkNode* node) {
    262   Index::iterator i = index_.find(term);
    263   if (i == index_.end()) {
    264     // We can get here if the node has the same term more than once. For
    265     // example, a bookmark with the title 'foo foo' would end up here.
    266     return;
    267   }
    268   i->second.erase(node);
    269   if (i->second.empty())
    270     index_.erase(i);
    271 }
    272