Home | History | Annotate | Download | only in bookmarks
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/bookmarks/bookmark_index.h"
      6 
      7 #include <algorithm>
      8 #include <iterator>
      9 #include <list>
     10 
     11 #include "base/i18n/case_conversion.h"
     12 #include "base/strings/string16.h"
     13 #include "chrome/browser/bookmarks/bookmark_model.h"
     14 #include "chrome/browser/bookmarks/bookmark_title_match.h"
     15 #include "chrome/browser/history/history_service.h"
     16 #include "chrome/browser/history/history_service_factory.h"
     17 #include "chrome/browser/history/query_parser.h"
     18 #include "chrome/browser/history/url_database.h"
     19 
     20 // Used when finding the set of bookmarks that match a query. Each match
     21 // represents a set of terms (as an interator into the Index) matching the
     22 // query as well as the set of nodes that contain those terms in their titles.
     23 struct BookmarkIndex::Match {
     24   // List of terms matching the query.
     25   std::list<Index::const_iterator> terms;
     26 
     27   // The set of nodes matching the terms. As an optimization this is empty
     28   // when we match only one term, and is filled in when we get more than one
     29   // term. We can do this as when we have only one matching term we know
     30   // the set of matching nodes is terms.front()->second.
     31   //
     32   // Use nodes_begin() and nodes_end() to get an iterator over the set as
     33   // it handles the necessary switching between nodes and terms.front().
     34   NodeSet nodes;
     35 
     36   // Returns an iterator to the beginning of the matching nodes. See
     37   // description of nodes for why this should be used over nodes.begin().
     38   NodeSet::const_iterator nodes_begin() const;
     39 
     40   // Returns an iterator to the beginning of the matching nodes. See
     41   // description of nodes for why this should be used over nodes.end().
     42   NodeSet::const_iterator nodes_end() const;
     43 };
     44 
     45 BookmarkIndex::NodeSet::const_iterator
     46     BookmarkIndex::Match::nodes_begin() const {
     47   return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
     48 }
     49 
     50 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
     51   return nodes.empty() ? terms.front()->second.end() : nodes.end();
     52 }
     53 
     54 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context)
     55     : browser_context_(browser_context) {
     56 }
     57 
     58 BookmarkIndex::~BookmarkIndex() {
     59 }
     60 
     61 void BookmarkIndex::Add(const BookmarkNode* node) {
     62   if (!node->is_url())
     63     return;
     64   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
     65   for (size_t i = 0; i < terms.size(); ++i)
     66     RegisterNode(terms[i], node);
     67 }
     68 
     69 void BookmarkIndex::Remove(const BookmarkNode* node) {
     70   if (!node->is_url())
     71     return;
     72 
     73   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
     74   for (size_t i = 0; i < terms.size(); ++i)
     75     UnregisterNode(terms[i], node);
     76 }
     77 
     78 void BookmarkIndex::GetBookmarksWithTitlesMatching(
     79     const string16& query,
     80     size_t max_count,
     81     std::vector<BookmarkTitleMatch>* results) {
     82   std::vector<string16> terms = ExtractQueryWords(query);
     83   if (terms.empty())
     84     return;
     85 
     86   Matches matches;
     87   for (size_t i = 0; i < terms.size(); ++i) {
     88     if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
     89       return;
     90   }
     91 
     92   NodeTypedCountPairs node_typed_counts;
     93   SortMatches(matches, &node_typed_counts);
     94 
     95   // We use a QueryParser to fill in match positions for us. It's not the most
     96   // efficient way to go about this, but by the time we get here we know what
     97   // matches and so this shouldn't be performance critical.
     98   QueryParser parser;
     99   ScopedVector<QueryNode> query_nodes;
    100   parser.ParseQueryNodes(query, &query_nodes.get());
    101 
    102   // The highest typed counts should be at the beginning of the results vector
    103   // so that the best matches will always be included in the results. The loop
    104   // that calculates result relevance in HistoryContentsProvider::ConvertResults
    105   // will run backwards to assure higher relevance will be attributed to the
    106   // best matches.
    107   for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
    108        i != node_typed_counts.end() && results->size() < max_count; ++i)
    109     AddMatchToResults(i->first, &parser, query_nodes.get(), results);
    110 }
    111 
    112 void BookmarkIndex::SortMatches(const Matches& matches,
    113                                 NodeTypedCountPairs* node_typed_counts) const {
    114   HistoryService* const history_service = browser_context_ ?
    115       HistoryServiceFactory::GetForProfile(
    116           Profile::FromBrowserContext(browser_context_),
    117           Profile::EXPLICIT_ACCESS) : NULL;
    118 
    119   history::URLDatabase* url_db = history_service ?
    120       history_service->InMemoryDatabase() : NULL;
    121 
    122   for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
    123     ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);
    124 
    125   std::sort(node_typed_counts->begin(), node_typed_counts->end(),
    126             &NodeTypedCountPairSortFunc);
    127   // Eliminate duplicates.
    128   node_typed_counts->erase(std::unique(node_typed_counts->begin(),
    129                                        node_typed_counts->end()),
    130                            node_typed_counts->end());
    131 }
    132 
    133 void BookmarkIndex::ExtractBookmarkNodePairs(
    134     history::URLDatabase* url_db,
    135     const Match& match,
    136     NodeTypedCountPairs* node_typed_counts) const {
    137 
    138   for (NodeSet::const_iterator i = match.nodes_begin();
    139        i != match.nodes_end(); ++i) {
    140     history::URLRow url;
    141     if (url_db)
    142       url_db->GetRowForURL((*i)->url(), &url);
    143     NodeTypedCountPair pair(*i, url.typed_count());
    144     node_typed_counts->push_back(pair);
    145   }
    146 }
    147 
    148 void BookmarkIndex::AddMatchToResults(
    149     const BookmarkNode* node,
    150     QueryParser* parser,
    151     const std::vector<QueryNode*>& query_nodes,
    152     std::vector<BookmarkTitleMatch>* results) {
    153   BookmarkTitleMatch title_match;
    154   // Check that the result matches the query.  The previous search
    155   // was a simple per-word search, while the more complex matching
    156   // of QueryParser may filter it out.  For example, the query
    157   // ["thi"] will match the bookmark titled [Thinking], but since
    158   // ["thi"] is quoted we don't want to do a prefix match.
    159   if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
    160                              &(title_match.match_positions))) {
    161     title_match.node = node;
    162     results->push_back(title_match);
    163   }
    164 }
    165 
    166 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term,
    167                                                       bool first_term,
    168                                                       Matches* matches) {
    169   Index::const_iterator i = index_.lower_bound(term);
    170   if (i == index_.end())
    171     return false;
    172 
    173   if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
    174     // Term is too short for prefix match, compare using exact match.
    175     if (i->first != term)
    176       return false;  // No bookmarks with this term.
    177 
    178     if (first_term) {
    179       Match match;
    180       match.terms.push_back(i);
    181       matches->push_back(match);
    182       return true;
    183     }
    184     CombineMatchesInPlace(i, matches);
    185   } else if (first_term) {
    186     // This is the first term and we're doing a prefix match. Loop through
    187     // index adding all entries that start with term to matches.
    188     while (i != index_.end() &&
    189            i->first.size() >= term.size() &&
    190            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
    191       Match match;
    192       match.terms.push_back(i);
    193       matches->push_back(match);
    194       ++i;
    195     }
    196   } else {
    197     // Prefix match and not the first term. Loop through index combining
    198     // current matches in matches with term, placing result in result.
    199     Matches result;
    200     while (i != index_.end() &&
    201            i->first.size() >= term.size() &&
    202            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
    203       CombineMatches(i, *matches, &result);
    204       ++i;
    205     }
    206     matches->swap(result);
    207   }
    208   return !matches->empty();
    209 }
    210 
    211 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
    212                                           Matches* matches) {
    213   for (size_t i = 0; i < matches->size(); ) {
    214     Match* match = &((*matches)[i]);
    215     NodeSet intersection;
    216     std::set_intersection(match->nodes_begin(), match->nodes_end(),
    217                           index_i->second.begin(), index_i->second.end(),
    218                           std::inserter(intersection, intersection.begin()));
    219     if (intersection.empty()) {
    220       matches->erase(matches->begin() + i);
    221     } else {
    222       match->terms.push_back(index_i);
    223       match->nodes.swap(intersection);
    224       ++i;
    225     }
    226   }
    227 }
    228 
    229 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
    230                                    const Matches& current_matches,
    231                                    Matches* result) {
    232   for (size_t i = 0; i < current_matches.size(); ++i) {
    233     const Match& match = current_matches[i];
    234     NodeSet intersection;
    235     std::set_intersection(match.nodes_begin(), match.nodes_end(),
    236                           index_i->second.begin(), index_i->second.end(),
    237                           std::inserter(intersection, intersection.begin()));
    238     if (!intersection.empty()) {
    239       result->push_back(Match());
    240       Match& combined_match = result->back();
    241       combined_match.terms = match.terms;
    242       combined_match.terms.push_back(index_i);
    243       combined_match.nodes.swap(intersection);
    244     }
    245   }
    246 }
    247 
    248 std::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) {
    249   std::vector<string16> terms;
    250   if (query.empty())
    251     return std::vector<string16>();
    252   QueryParser parser;
    253   // TODO(brettw): use ICU normalization:
    254   // http://userguide.icu-project.org/transforms/normalization
    255   parser.ParseQueryWords(base::i18n::ToLower(query), &terms);
    256   return terms;
    257 }
    258 
    259 void BookmarkIndex::RegisterNode(const string16& term,
    260                                  const BookmarkNode* node) {
    261   index_[term].insert(node);
    262 }
    263 
    264 void BookmarkIndex::UnregisterNode(const string16& term,
    265                                    const BookmarkNode* node) {
    266   Index::iterator i = index_.find(term);
    267   if (i == index_.end()) {
    268     // We can get here if the node has the same term more than once. For
    269     // example, a bookmark with the title 'foo foo' would end up here.
    270     return;
    271   }
    272   i->second.erase(node);
    273   if (i->second.empty())
    274     index_.erase(i);
    275 }
    276