1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ 6 #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ 7 8 #include <vector> 9 10 #include "base/basictypes.h" 11 #include "base/strings/string16.h" 12 #include "components/query_parser/snippet.h" 13 14 namespace query_parser { 15 16 class QueryNodeList; 17 18 // Used by HasMatchIn. 19 struct QueryWord { 20 // The work to match against. 21 base::string16 word; 22 23 // The starting position of the word in the original text. 24 size_t position; 25 }; 26 27 typedef std::vector<query_parser::QueryWord> QueryWordVector; 28 29 // QueryNode is used by QueryParser to represent the elements that constitute a 30 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant 31 // for external usage. 32 class QueryNode { 33 public: 34 virtual ~QueryNode() {} 35 36 // Serialize ourselves out to a string that can be passed to SQLite. Returns 37 // the number of words in this node. 38 virtual int AppendToSQLiteQuery(base::string16* query) const = 0; 39 40 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList. 41 virtual bool IsWord() const = 0; 42 43 // Returns true if this node matches |word|. If |exact| is true, the string 44 // must exactly match. Otherwise, this uses a starts with comparison. 45 virtual bool Matches(const base::string16& word, bool exact) const = 0; 46 47 // Returns true if this node matches at least one of the words in |words|. An 48 // entry is added to |match_positions| for all matching words giving the 49 // matching regions. 50 virtual bool HasMatchIn(const QueryWordVector& words, 51 Snippet::MatchPositions* match_positions) const = 0; 52 53 // Returns true if this node matches at least one of the words in |words|. 54 virtual bool HasMatchIn(const QueryWordVector& words) const = 0; 55 56 // Appends the words that make up this node in |words|. 57 virtual void AppendWords(std::vector<base::string16>* words) const = 0; 58 }; 59 60 typedef std::vector<query_parser::QueryNode*> QueryNodeStarVector; 61 62 // This class is used to parse queries entered into the history search into more 63 // normalized queries that can be passed to the SQLite backend. 64 class QueryParser { 65 public: 66 QueryParser(); 67 68 // For CJK ideographs and Korean Hangul, even a single character 69 // can be useful in prefix matching, but that may give us too many 70 // false positives. Moreover, the current ICU word breaker gives us 71 // back every single Chinese character as a word so that there's no 72 // point doing anything for them and we only adjust the minimum length 73 // to 2 for Korean Hangul while using 3 for others. This is a temporary 74 // hack until we have a segmentation support. 75 static bool IsWordLongEnoughForPrefixSearch(const base::string16& word); 76 77 // Parse a query into a SQLite query. The resulting query is placed in 78 // |sqlite_query| and the number of words is returned. 79 int ParseQuery(const base::string16& query, base::string16* sqlite_query); 80 81 // Parses |query|, returning the words that make up it. Any words in quotes 82 // are put in |words| without the quotes. For example, the query text 83 // "foo bar" results in two entries being added to words, one for foo and one 84 // for bar. 85 void ParseQueryWords(const base::string16& query, 86 std::vector<base::string16>* words); 87 88 // Parses |query|, returning the nodes that constitute the valid words in the 89 // query. This is intended for later usage with DoesQueryMatch. Ownership of 90 // the nodes passes to the caller. 91 void ParseQueryNodes(const base::string16& query, 92 QueryNodeStarVector* nodes); 93 94 // Returns true if the string text matches the query nodes created by a call 95 // to ParseQuery. If the query does match, each of the matching positions in 96 // the text is added to |match_positions|. 97 bool DoesQueryMatch(const base::string16& text, 98 const QueryNodeStarVector& nodes, 99 Snippet::MatchPositions* match_positions); 100 101 // Returns true if all of the |words| match the query |nodes| created by a 102 // call to ParseQuery. 103 bool DoesQueryMatch(const QueryWordVector& words, 104 const QueryNodeStarVector& nodes); 105 106 // Extracts the words from |text|, placing each word into |words|. 107 void ExtractQueryWords(const base::string16& text, 108 QueryWordVector* words); 109 110 // Sorts the match positions in |matches| by their first index, then 111 // coalesces any match positions that intersect each other. 112 static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches); 113 114 private: 115 // Does the work of parsing |query|; creates nodes in |root| as appropriate. 116 // This is invoked from both of the ParseQuery methods. 117 bool ParseQueryImpl(const base::string16& query, QueryNodeList* root); 118 119 DISALLOW_COPY_AND_ASSIGN(QueryParser); 120 }; 121 122 } // namespace query_parser 123 124 #endif // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ 125