Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The query parser is used to parse queries entered into the history
      6 // search into more normalized queries can be passed to the SQLite backend.
      7 
      8 #ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
      9 #define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
     10 #pragma once
     11 
     12 #include <vector>
     13 
     14 #include "base/string16.h"
     15 #include "chrome/browser/history/snippet.h"
     16 
     17 class QueryNodeList;
     18 
     19 // Used by HasMatchIn.
     20 struct QueryWord {
     21   // The work to match against.
     22   string16 word;
     23 
     24   // The starting position of the word in the original text.
     25   size_t position;
     26 };
     27 
     28 // QueryNode is used by QueryNodeParser to represent the elements that
     29 // constitute a query. While QueryNode is exposed by way of ParseQuery, it
     30 // really isn't meant for external usage.
     31 class QueryNode {
     32  public:
     33   virtual ~QueryNode() {}
     34 
     35   // Serialize ourselves out to a string that can be passed to SQLite. Returns
     36   // the number of words in this node.
     37   virtual int AppendToSQLiteQuery(string16* query) const = 0;
     38 
     39   // Return true if this is a word node, false if it's a QueryNodeList.
     40   virtual bool IsWord() const = 0;
     41 
     42   // Returns true if this node matches the specified text. If exact is true,
     43   // the string must exactly match. Otherwise, this uses a starts with
     44   // comparison.
     45   virtual bool Matches(const string16& word, bool exact) const = 0;
     46 
     47   // Returns true if this node matches at least one of the words in words. If
     48   // the node matches at least one word, an entry is added to match_positions
     49   // giving the matching region.
     50   virtual bool HasMatchIn(const std::vector<QueryWord>& words,
     51                           Snippet::MatchPositions* match_positions) const = 0;
     52 
     53   // Appends the words that make up this node in |words|.
     54   virtual void AppendWords(std::vector<string16>* words) const = 0;
     55 };
     56 
     57 
     58 class QueryParser {
     59  public:
     60   QueryParser();
     61 
     62   // For CJK ideographs and Korean Hangul, even a single character
     63   // can be useful in prefix matching, but that may give us too many
     64   // false positives. Moreover, the current ICU word breaker gives us
     65   // back every single Chinese character as a word so that there's no
     66   // point doing anything for them and we only adjust the minimum length
     67   // to 2 for Korean Hangul while using 3 for others. This is a temporary
     68   // hack until we have a segmentation support.
     69   static bool IsWordLongEnoughForPrefixSearch(const string16& word);
     70 
     71   // Parse a query into a SQLite query. The resulting query is placed in
     72   // sqlite_query and the number of words is returned.
     73   int ParseQuery(const string16& query,
     74                  string16* sqlite_query);
     75 
     76   // Parses the query words in query, returning the nodes that constitute the
     77   // valid words in the query. This is intended for later usage with
     78   // DoesQueryMatch.
     79   // Ownership of the nodes passes to the caller.
     80   void ParseQuery(const string16& query,
     81                   std::vector<QueryNode*>* nodes);
     82 
     83   // Parses a query returning the words that make up the query. Any words in
     84   // quotes are put in |words| without the quotes. For example, the query text
     85   // "foo bar" results in two entries being added to words, one for foo and one
     86   // for bar.
     87   void ExtractQueryWords(const string16& query,
     88                          std::vector<string16>* words);
     89 
     90   // Returns true if the string text matches the query nodes created by a call
     91   // to ParseQuery. If the query does match each of the matching positions in
     92   // the text is added to |match_positions|.
     93   bool DoesQueryMatch(const string16& text,
     94                       const std::vector<QueryNode*>& nodes,
     95                       Snippet::MatchPositions* match_positions);
     96 
     97  private:
     98   // Does the work of parsing a query; creates nodes in QueryNodeList as
     99   // appropriate. This is invoked from both of the ParseQuery methods.
    100   bool ParseQueryImpl(const string16& query,
    101                       QueryNodeList* root);
    102 
    103   // Extracts the words from text, placing each word into words.
    104   void ExtractQueryWords(const string16& text,
    105                          std::vector<QueryWord>* words);
    106 };
    107 
    108 #endif  // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
    109