Home | History | Annotate | Download | only in query_parser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
      6 #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
      7 
      8 #include <vector>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/strings/string16.h"
     12 #include "components/query_parser/snippet.h"
     13 
     14 namespace query_parser {
     15 
     16 class QueryNodeList;
     17 
     18 // Used by HasMatchIn.
     19 struct QueryWord {
     20   // The work to match against.
     21   base::string16 word;
     22 
     23   // The starting position of the word in the original text.
     24   size_t position;
     25 };
     26 
     27 typedef std::vector<query_parser::QueryWord> QueryWordVector;
     28 
     29 // QueryNode is used by QueryParser to represent the elements that constitute a
     30 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
     31 // for external usage.
     32 class QueryNode {
     33  public:
     34   virtual ~QueryNode() {}
     35 
     36   // Serialize ourselves out to a string that can be passed to SQLite. Returns
     37   // the number of words in this node.
     38   virtual int AppendToSQLiteQuery(base::string16* query) const = 0;
     39 
     40   // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
     41   virtual bool IsWord() const = 0;
     42 
     43   // Returns true if this node matches |word|. If |exact| is true, the string
     44   // must exactly match. Otherwise, this uses a starts with comparison.
     45   virtual bool Matches(const base::string16& word, bool exact) const = 0;
     46 
     47   // Returns true if this node matches at least one of the words in |words|. An
     48   // entry is added to |match_positions| for all matching words giving the
     49   // matching regions.
     50   virtual bool HasMatchIn(const QueryWordVector& words,
     51                           Snippet::MatchPositions* match_positions) const = 0;
     52 
     53   // Returns true if this node matches at least one of the words in |words|.
     54   virtual bool HasMatchIn(const QueryWordVector& words) const = 0;
     55 
     56   // Appends the words that make up this node in |words|.
     57   virtual void AppendWords(std::vector<base::string16>* words) const = 0;
     58 };
     59 
     60 typedef std::vector<query_parser::QueryNode*> QueryNodeStarVector;
     61 
     62 // This class is used to parse queries entered into the history search into more
     63 // normalized queries that can be passed to the SQLite backend.
     64 class QueryParser {
     65  public:
     66   QueryParser();
     67 
     68   // For CJK ideographs and Korean Hangul, even a single character
     69   // can be useful in prefix matching, but that may give us too many
     70   // false positives. Moreover, the current ICU word breaker gives us
     71   // back every single Chinese character as a word so that there's no
     72   // point doing anything for them and we only adjust the minimum length
     73   // to 2 for Korean Hangul while using 3 for others. This is a temporary
     74   // hack until we have a segmentation support.
     75   static bool IsWordLongEnoughForPrefixSearch(const base::string16& word);
     76 
     77   // Parse a query into a SQLite query. The resulting query is placed in
     78   // |sqlite_query| and the number of words is returned.
     79   int ParseQuery(const base::string16& query, base::string16* sqlite_query);
     80 
     81   // Parses |query|, returning the words that make up it. Any words in quotes
     82   // are put in |words| without the quotes. For example, the query text
     83   // "foo bar" results in two entries being added to words, one for foo and one
     84   // for bar.
     85   void ParseQueryWords(const base::string16& query,
     86                        std::vector<base::string16>* words);
     87 
     88   // Parses |query|, returning the nodes that constitute the valid words in the
     89   // query. This is intended for later usage with DoesQueryMatch. Ownership of
     90   // the nodes passes to the caller.
     91   void ParseQueryNodes(const base::string16& query,
     92                        QueryNodeStarVector* nodes);
     93 
     94   // Returns true if the string text matches the query nodes created by a call
     95   // to ParseQuery. If the query does match, each of the matching positions in
     96   // the text is added to |match_positions|.
     97   bool DoesQueryMatch(const base::string16& text,
     98                       const QueryNodeStarVector& nodes,
     99                       Snippet::MatchPositions* match_positions);
    100 
    101   // Returns true if all of the |words| match the query |nodes| created by a
    102   // call to ParseQuery.
    103   bool DoesQueryMatch(const QueryWordVector& words,
    104                       const QueryNodeStarVector& nodes);
    105 
    106   // Extracts the words from |text|, placing each word into |words|.
    107   void ExtractQueryWords(const base::string16& text,
    108                          QueryWordVector* words);
    109 
    110   // Sorts the match positions in |matches| by their first index, then
    111   // coalesces any match positions that intersect each other.
    112   static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches);
    113 
    114  private:
    115   // Does the work of parsing |query|; creates nodes in |root| as appropriate.
    116   // This is invoked from both of the ParseQuery methods.
    117   bool ParseQueryImpl(const base::string16& query, QueryNodeList* root);
    118 
    119   DISALLOW_COPY_AND_ASSIGN(QueryParser);
    120 };
    121 
    122 }  // namespace query_parser
    123 
    124 #endif  // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
    125