1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ui/app_list/search/tokenized_string_match.h" 6 7 #include "base/i18n/string_search.h" 8 #include "base/logging.h" 9 #include "ui/app_list/search/tokenized_string_char_iterator.h" 10 11 namespace app_list { 12 13 namespace { 14 15 // The factors below are applied when the current char of query matches 16 // the current char of the text to be matched. Different factors are chosen 17 // based on where the match happens. kIsPrefixMultiplier is used when the 18 // matched portion is a prefix of both the query and the text, which implies 19 // that the matched chars are at the same position in query and text. This is 20 // the most preferred case thus it has the highest score. When the current char 21 // of the query and the text does not match, the algorithm moves to the next 22 // token in the text and try to match from there. kIsFrontOfWordMultipler will 23 // be used if the first char of the token matches the current char of the query. 24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is 25 // used. 26 // Examples: 27 // Suppose the text to be matched is 'Google Chrome'. 28 // Query 'go' would yield kIsPrefixMultiplier for each char. 29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and 30 // kIsFrontOfWordMultipler for 'c'. 31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and 32 // kIsWeakHitMultiplier for 'h'. 33 // Query 'oo' does not match any prefix and would use the substring match 34 // fallback, thus kIsSubstringMultiplier is used for each char. 35 const double kIsPrefixMultiplier = 1.0; 36 const double kIsFrontOfWordMultipler = 0.8; 37 const double kIsWeakHitMultiplier = 0.6; 38 const double kIsSubstringMultiplier = 0.4; 39 40 // A relevance score that represents no match. 41 const double kNoMatchScore = 0.0; 42 43 // PrefixMatcher matches the chars of a given query as prefix of tokens in 44 // a given text or as prefix of the acronyms of those text tokens. 45 class PrefixMatcher { 46 public: 47 PrefixMatcher(const TokenizedString& query, 48 const TokenizedString& text) 49 : query_iter_(query), 50 text_iter_(text), 51 current_match_(gfx::Range::InvalidRange()), 52 current_relevance_(kNoMatchScore) { 53 } 54 55 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to 56 // perform DFS (depth first search) so that all possible matches are 57 // attempted. Stops on the first full match and returns true. Otherwise, 58 // returns false to indicate no match. 59 bool Match() { 60 while (!RunMatch()) { 61 // No match found and no more states to try. Bail out. 62 if (states_.empty()) { 63 current_relevance_ = kNoMatchScore; 64 current_hits_.clear(); 65 return false; 66 } 67 68 PopState(); 69 70 // Skip restored match to try other possibilites. 71 AdvanceToNextTextToken(); 72 } 73 74 if (current_match_.IsValid()) 75 current_hits_.push_back(current_match_); 76 77 return true; 78 } 79 80 double relevance() const { return current_relevance_; } 81 const TokenizedStringMatch::Hits& hits() const { return current_hits_; } 82 83 private: 84 // Context record of a match. 85 struct State { 86 State() : relevance(kNoMatchScore) {} 87 State(double relevance, 88 const gfx::Range& current_match, 89 const TokenizedStringMatch::Hits& hits, 90 const TokenizedStringCharIterator& query_iter, 91 const TokenizedStringCharIterator& text_iter) 92 : relevance(relevance), 93 current_match(current_match), 94 hits(hits.begin(), hits.end()), 95 query_iter_state(query_iter.GetState()), 96 text_iter_state(text_iter.GetState()) {} 97 98 // The current score of the processed query chars. 99 double relevance; 100 101 // Current matching range. 102 gfx::Range current_match; 103 104 // Completed matching ranges of the processed query chars. 105 TokenizedStringMatch::Hits hits; 106 107 // States of the processed query and text chars. 108 TokenizedStringCharIterator::State query_iter_state; 109 TokenizedStringCharIterator::State text_iter_state; 110 }; 111 typedef std::vector<State> States; 112 113 // Match chars from the query and text one by one. For each matching char, 114 // calculate relevance and matching ranges. And the current stats is 115 // recorded so that the match could be skipped later to try other 116 // possiblities. Repeat until any of the iterators run out. Return true if 117 // query iterator runs out, i.e. all chars in query are matched. 118 bool RunMatch() { 119 while (!query_iter_.end() && !text_iter_.end()) { 120 if (query_iter_.Get() == text_iter_.Get()) { 121 PushState(); 122 123 if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos()) 124 current_relevance_ += kIsPrefixMultiplier; 125 else if (text_iter_.IsFirstCharOfToken()) 126 current_relevance_ += kIsFrontOfWordMultipler; 127 else 128 current_relevance_ += kIsWeakHitMultiplier; 129 130 if (!current_match_.IsValid()) 131 current_match_.set_start(text_iter_.GetArrayPos()); 132 current_match_.set_end(text_iter_.GetArrayPos() + 133 text_iter_.GetCharSize()); 134 135 query_iter_.NextChar(); 136 text_iter_.NextChar(); 137 } else { 138 AdvanceToNextTextToken(); 139 } 140 } 141 142 return query_iter_.end(); 143 } 144 145 // Skip to the next text token and close current match. Invoked when a 146 // mismatch happens or to skip a restored match. 147 void AdvanceToNextTextToken() { 148 if (current_match_.IsValid()) { 149 current_hits_.push_back(current_match_); 150 current_match_ = gfx::Range::InvalidRange(); 151 } 152 153 text_iter_.NextToken(); 154 } 155 156 void PushState() { 157 states_.push_back(State(current_relevance_, 158 current_match_, 159 current_hits_, 160 query_iter_, 161 text_iter_)); 162 } 163 164 void PopState() { 165 DCHECK(!states_.empty()); 166 167 State& last_match = states_.back(); 168 current_relevance_ = last_match.relevance; 169 current_match_ = last_match.current_match; 170 current_hits_.swap(last_match.hits); 171 query_iter_.SetState(last_match.query_iter_state); 172 text_iter_.SetState(last_match.text_iter_state); 173 174 states_.pop_back(); 175 } 176 177 TokenizedStringCharIterator query_iter_; 178 TokenizedStringCharIterator text_iter_; 179 180 States states_; 181 gfx::Range current_match_; 182 183 double current_relevance_; 184 TokenizedStringMatch::Hits current_hits_; 185 186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher); 187 }; 188 189 } // namespace 190 191 TokenizedStringMatch::TokenizedStringMatch() 192 : relevance_(kNoMatchScore) {} 193 194 TokenizedStringMatch::~TokenizedStringMatch() {} 195 196 bool TokenizedStringMatch::Calculate(const TokenizedString& query, 197 const TokenizedString& text) { 198 relevance_ = kNoMatchScore; 199 hits_.clear(); 200 201 PrefixMatcher matcher(query, text); 202 if (matcher.Match()) { 203 relevance_ = matcher.relevance(); 204 hits_.assign(matcher.hits().begin(), matcher.hits().end()); 205 } 206 207 // Substring match as a fallback. 208 if (relevance_ == kNoMatchScore) { 209 size_t substr_match_start = 0; 210 size_t substr_match_length = 0; 211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(), 212 text.text(), 213 &substr_match_start, 214 &substr_match_length)) { 215 relevance_ = kIsSubstringMultiplier * substr_match_length; 216 hits_.push_back(gfx::Range(substr_match_start, 217 substr_match_start + substr_match_length)); 218 } 219 } 220 221 // Using length() for normalizing is not 100% correct but should be good 222 // enough compared with using real char count of the text. 223 if (text.text().length()) 224 relevance_ /= text.text().length(); 225 226 return relevance_ > kNoMatchScore; 227 } 228 229 bool TokenizedStringMatch::Calculate(const base::string16& query, 230 const base::string16& text) { 231 const TokenizedString tokenized_query(query); 232 const TokenizedString tokenized_text(text); 233 return Calculate(tokenized_query, tokenized_text); 234 } 235 236 } // namespace app_list 237