Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      6 #define CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/gtest_prod_util.h"
     12 #include "base/strings/string16.h"
     13 #include "components/metrics/proto/omnibox_event.pb.h"
     14 #include "components/metrics/proto/omnibox_input_type.pb.h"
     15 #include "url/gurl.h"
     16 #include "url/url_parse.h"
     17 
     18 // The user input for an autocomplete query.  Allows copying.
     19 class AutocompleteInput {
     20  public:
     21   AutocompleteInput();
     22   // |text| and |cursor_position| represent the input query and location of
     23   // the cursor with the query respectively.  |cursor_position| may be set to
     24   // base::string16::npos if the input |text| doesn't come directly from the
     25   // user's typing.
     26   //
     27   // |desired_tld| is the user's desired TLD, if one is not already present in
     28   // the text to autocomplete.  When this is non-empty, it also implies that
     29   // "www." should be prepended to the domain where possible. The |desired_tld|
     30   // should not contain a leading '.' (use "com" instead of ".com").
     31   //
     32   // If |current_url| is set to a valid search result page URL, providers can
     33   // use it to perform query refinement. For example, if it is set to an image
     34   // search result page, the search provider may generate an image search URL.
     35   // Query refinement is only used by mobile ports, so only these set
     36   // |current_url| to a non-empty string.
     37   //
     38   // |current_page_classification| represents the type of page the user is
     39   // viewing and manner in which the user is accessing the omnibox; it's
     40   // more than simply the URL.  It includes, for example, whether the page
     41   // is a search result page doing search term replacement or not.
     42   //
     43   // |prevent_inline_autocomplete| is true if the generated result set should
     44   // not require inline autocomplete for the default match.  This is difficult
     45   // to explain in the abstract; the practical use case is that after the user
     46   // deletes text in the edit, the HistoryURLProvider should make sure not to
     47   // promote a match requiring inline autocomplete too highly.
     48   //
     49   // |prefer_keyword| should be true when the keyword UI is onscreen; this will
     50   // bias the autocomplete result set toward the keyword provider when the input
     51   // string is a bare keyword.
     52   //
     53   // |allow_exact_keyword_match| should be false when triggering keyword mode on
     54   // the input string would be surprising or wrong, e.g. when highlighting text
     55   // in a page and telling the browser to search for it or navigate to it. This
     56   // parameter only applies to substituting keywords.
     57   //
     58   // If |matches_requested| is BEST_MATCH or SYNCHRONOUS_MATCHES the controller
     59   // asks the providers to only return matches which are synchronously
     60   // available, which should mean that all providers will be done immediately.
     61   AutocompleteInput(const base::string16& text,
     62                     size_t cursor_position,
     63                     const base::string16& desired_tld,
     64                     const GURL& current_url,
     65                     metrics::OmniboxEventProto::PageClassification
     66                         current_page_classification,
     67                     bool prevent_inline_autocomplete,
     68                     bool prefer_keyword,
     69                     bool allow_exact_keyword_match,
     70                     bool want_asynchronous_matches);
     71   ~AutocompleteInput();
     72 
     73   // If type is |FORCED_QUERY| and |text| starts with '?', it is removed.
     74   // Returns number of leading characters removed.
     75   static size_t RemoveForcedQueryStringIfNecessary(
     76       metrics::OmniboxInputType::Type type,
     77       base::string16* text);
     78 
     79   // Converts |type| to a string representation.  Used in logging.
     80   static std::string TypeToString(metrics::OmniboxInputType::Type type);
     81 
     82   // Parses |text| and returns the type of input this will be interpreted as.
     83   // The components of the input are stored in the output parameter |parts|, if
     84   // it is non-NULL. The scheme is stored in |scheme| if it is non-NULL. The
     85   // canonicalized URL is stored in |canonicalized_url|; however, this URL is
     86   // not guaranteed to be valid, especially if the parsed type is, e.g., QUERY.
     87   static metrics::OmniboxInputType::Type Parse(
     88       const base::string16& text,
     89       const base::string16& desired_tld,
     90       url::Parsed* parts,
     91       base::string16* scheme,
     92       GURL* canonicalized_url);
     93 
     94   // Parses |text| and fill |scheme| and |host| by the positions of them.
     95   // The results are almost as same as the result of Parse(), but if the scheme
     96   // is view-source, this function returns the positions of scheme and host
     97   // in the URL qualified by "view-source:" prefix.
     98   static void ParseForEmphasizeComponents(const base::string16& text,
     99                                           url::Component* scheme,
    100                                           url::Component* host);
    101 
    102   // Code that wants to format URLs with a format flag including
    103   // net::kFormatUrlOmitTrailingSlashOnBareHostname risk changing the meaning if
    104   // the result is then parsed as AutocompleteInput.  Such code can call this
    105   // function with the URL and its formatted string, and it will return a
    106   // formatted string with the same meaning as the original URL (i.e. it will
    107   // re-append a slash if necessary).
    108   static base::string16 FormattedStringWithEquivalentMeaning(
    109       const GURL& url,
    110       const base::string16& formatted_url);
    111 
    112   // Returns the number of non-empty components in |parts| besides the host.
    113   static int NumNonHostComponents(const url::Parsed& parts);
    114 
    115   // Returns whether |text| begins "http:" or "view-source:http:".
    116   static bool HasHTTPScheme(const base::string16& text);
    117 
    118   // User-provided text to be completed.
    119   const base::string16& text() const { return text_; }
    120 
    121   // Returns 0-based cursor position within |text_| or base::string16::npos if
    122   // not used.
    123   size_t cursor_position() const { return cursor_position_; }
    124 
    125   // Use of this setter is risky, since no other internal state is updated
    126   // besides |text_|, |cursor_position_| and |parts_|.  Only callers who know
    127   // that they're not changing the type/scheme/etc. should use this.
    128   void UpdateText(const base::string16& text,
    129                   size_t cursor_position,
    130                   const url::Parsed& parts);
    131 
    132   // The current URL, or an invalid GURL if query refinement is not desired.
    133   const GURL& current_url() const { return current_url_; }
    134 
    135   // The type of page that is currently behind displayed and how it is
    136   // displayed (e.g., with search term replacement or without).
    137   metrics::OmniboxEventProto::PageClassification current_page_classification()
    138       const {
    139     return current_page_classification_;
    140   }
    141 
    142   // The type of input supplied.
    143   metrics::OmniboxInputType::Type type() const { return type_; }
    144 
    145   // Returns parsed URL components.
    146   const url::Parsed& parts() const { return parts_; }
    147 
    148   // The scheme parsed from the provided text; only meaningful when type_ is
    149   // URL.
    150   const base::string16& scheme() const { return scheme_; }
    151 
    152   // The input as an URL to navigate to, if possible.
    153   const GURL& canonicalized_url() const { return canonicalized_url_; }
    154 
    155   // Returns whether inline autocompletion should be prevented.
    156   bool prevent_inline_autocomplete() const {
    157     return prevent_inline_autocomplete_;
    158   }
    159 
    160   // Returns whether, given an input string consisting solely of a substituting
    161   // keyword, we should score it like a non-substituting keyword.
    162   bool prefer_keyword() const { return prefer_keyword_; }
    163 
    164   // Returns whether this input is allowed to be treated as an exact
    165   // keyword match.  If not, the default result is guaranteed not to be a
    166   // keyword search, even if the input is "<keyword> <search string>".
    167   bool allow_exact_keyword_match() const { return allow_exact_keyword_match_; }
    168 
    169   // Returns whether providers should be allowed to make asynchronous requests
    170   // when processing this input.
    171   bool want_asynchronous_matches() const { return want_asynchronous_matches_; }
    172 
    173   // Resets all internal variables to the null-constructed state.
    174   void Clear();
    175 
    176  private:
    177   FRIEND_TEST_ALL_PREFIXES(AutocompleteProviderTest, GetDestinationURL);
    178 
    179   // NOTE: Whenever adding a new field here, please make sure to update Clear()
    180   // method.
    181   base::string16 text_;
    182   size_t cursor_position_;
    183   GURL current_url_;
    184   metrics::OmniboxEventProto::PageClassification current_page_classification_;
    185   metrics::OmniboxInputType::Type type_;
    186   url::Parsed parts_;
    187   base::string16 scheme_;
    188   GURL canonicalized_url_;
    189   bool prevent_inline_autocomplete_;
    190   bool prefer_keyword_;
    191   bool allow_exact_keyword_match_;
    192   bool want_asynchronous_matches_;
    193 };
    194 
    195 #endif  // CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
    196