Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/in_memory_url_index_types.h"
      6 
      7 #include <algorithm>
      8 #include <functional>
      9 #include <iterator>
     10 #include <numeric>
     11 #include <set>
     12 
     13 #include "base/i18n/break_iterator.h"
     14 #include "base/i18n/case_conversion.h"
     15 #include "base/strings/string_util.h"
     16 #include "net/base/escape.h"
     17 #include "net/base/net_util.h"
     18 
     19 namespace history {
     20 
     21 // Matches within URL and Title Strings ----------------------------------------
     22 
     23 TermMatches MatchTermInString(const base::string16& term,
     24                               const base::string16& cleaned_string,
     25                               int term_num) {
     26   const size_t kMaxCompareLength = 2048;
     27   const base::string16& short_string =
     28       (cleaned_string.length() > kMaxCompareLength) ?
     29       cleaned_string.substr(0, kMaxCompareLength) : cleaned_string;
     30   TermMatches matches;
     31   for (size_t location = short_string.find(term);
     32        location != base::string16::npos;
     33        location = short_string.find(term, location + 1))
     34     matches.push_back(TermMatch(term_num, location, term.length()));
     35   return matches;
     36 }
     37 
     38 // Comparison function for sorting TermMatches by their offsets.
     39 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
     40   return m1.offset < m2.offset;
     41 }
     42 
     43 TermMatches SortAndDeoverlapMatches(const TermMatches& matches) {
     44   if (matches.empty())
     45     return matches;
     46   TermMatches sorted_matches = matches;
     47   std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
     48   TermMatches clean_matches;
     49   TermMatch last_match;
     50   for (TermMatches::const_iterator iter = sorted_matches.begin();
     51        iter != sorted_matches.end(); ++iter) {
     52     if (iter->offset >= last_match.offset + last_match.length) {
     53       last_match = *iter;
     54       clean_matches.push_back(last_match);
     55     }
     56   }
     57   return clean_matches;
     58 }
     59 
     60 std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) {
     61   std::vector<size_t> offsets;
     62   for (TermMatches::const_iterator i = matches.begin(); i != matches.end();
     63        ++i) {
     64     offsets.push_back(i->offset);
     65     offsets.push_back(i->offset + i->length);
     66   }
     67   return offsets;
     68 }
     69 
     70 TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
     71                                         const std::vector<size_t>& offsets) {
     72   DCHECK_EQ(2 * matches.size(), offsets.size());
     73   TermMatches new_matches;
     74   std::vector<size_t>::const_iterator offset_iter = offsets.begin();
     75   for (TermMatches::const_iterator term_iter = matches.begin();
     76        term_iter != matches.end(); ++term_iter, ++offset_iter) {
     77     const size_t starting_offset = *offset_iter;
     78     ++offset_iter;
     79     const size_t ending_offset = *offset_iter;
     80     if ((starting_offset != base::string16::npos) &&
     81         (ending_offset != base::string16::npos) &&
     82         (starting_offset != ending_offset)) {
     83       TermMatch new_match(*term_iter);
     84       new_match.offset = starting_offset;
     85       new_match.length = ending_offset - starting_offset;
     86       new_matches.push_back(new_match);
     87     }
     88   }
     89   return new_matches;
     90 }
     91 
     92 // Utility Functions -----------------------------------------------------------
     93 
     94 String16Set String16SetFromString16(const base::string16& cleaned_uni_string,
     95                                     WordStarts* word_starts) {
     96   String16Vector words =
     97       String16VectorFromString16(cleaned_uni_string, false, word_starts);
     98   String16Set word_set;
     99   for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
    100        ++iter)
    101     word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars));
    102   return word_set;
    103 }
    104 
    105 String16Vector String16VectorFromString16(
    106     const base::string16& cleaned_uni_string,
    107     bool break_on_space,
    108     WordStarts* word_starts) {
    109   if (word_starts)
    110     word_starts->clear();
    111   base::i18n::BreakIterator iter(cleaned_uni_string,
    112       break_on_space ? base::i18n::BreakIterator::BREAK_SPACE :
    113           base::i18n::BreakIterator::BREAK_WORD);
    114   String16Vector words;
    115   if (!iter.Init())
    116     return words;
    117   while (iter.Advance()) {
    118     if (break_on_space || iter.IsWord()) {
    119       base::string16 word(iter.GetString());
    120       size_t initial_whitespace = 0;
    121       if (break_on_space) {
    122         base::string16 trimmed_word;
    123         base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word);
    124         initial_whitespace = word.length() - trimmed_word.length();
    125         base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word);
    126       }
    127       if (word.empty())
    128         continue;
    129       words.push_back(word);
    130       if (!word_starts)
    131         continue;
    132       size_t word_start = iter.prev() + initial_whitespace;
    133       if (word_start < kMaxSignificantChars)
    134         word_starts->push_back(word_start);
    135     }
    136   }
    137   return words;
    138 }
    139 
    140 Char16Set Char16SetFromString16(const base::string16& term) {
    141   Char16Set characters;
    142   for (base::string16::const_iterator iter = term.begin(); iter != term.end();
    143        ++iter)
    144     characters.insert(*iter);
    145   return characters;
    146 }
    147 
    148 // HistoryInfoMapValue ---------------------------------------------------------
    149 
    150 HistoryInfoMapValue::HistoryInfoMapValue() {}
    151 HistoryInfoMapValue::~HistoryInfoMapValue() {}
    152 
    153 // RowWordStarts ---------------------------------------------------------------
    154 
    155 RowWordStarts::RowWordStarts() {}
    156 RowWordStarts::~RowWordStarts() {}
    157 
    158 void RowWordStarts::Clear() {
    159   url_word_starts_.clear();
    160   title_word_starts_.clear();
    161 }
    162 
    163 }  // namespace history
    164