Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      6 #define CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/gtest_prod_util.h"
     12 #include "base/strings/string16.h"
     13 #include "url/gurl.h"
     14 #include "url/url_parse.h"
     15 
     16 // The user input for an autocomplete query.  Allows copying.
     17 class AutocompleteInput {
     18  public:
     19   // Note that the type below may be misleading.  For example, "http:/" alone
     20   // cannot be opened as a URL, so it is marked as a QUERY; yet the user
     21   // probably intends to type more and have it eventually become a URL, so we
     22   // need to make sure we still run it through inline autocomplete.
     23   enum Type {
     24     INVALID,        // Empty input
     25     UNKNOWN,        // Valid input whose type cannot be determined
     26     URL,            // Input autodetected as a URL
     27     QUERY,          // Input autodetected as a query
     28     FORCED_QUERY,   // Input forced to be a query by an initial '?'
     29   };
     30 
     31   // Enumeration of the possible match query types. Callers who only need some
     32   // of the matches for a particular input can get answers more quickly by
     33   // specifying that upfront.
     34   enum MatchesRequested {
     35     // Only the best match in the whole result set matters.  Providers should at
     36     // most return synchronously-available matches, and if possible do even less
     37     // work, so that it's safe to ask for these repeatedly in the course of one
     38     // higher-level "synchronous" query.
     39     BEST_MATCH,
     40 
     41     // Only synchronous matches should be returned.
     42     SYNCHRONOUS_MATCHES,
     43 
     44     // All matches should be fetched.
     45     ALL_MATCHES,
     46   };
     47 
     48   // The type of page currently displayed.
     49   // Note: when adding an element to this enum, please add it at the end
     50   // and update omnibox_event.proto::PageClassification and
     51   // omnibox_edit_model.cc::ClassifyPage() too.
     52   enum PageClassification {
     53     INVALID_SPEC = 0,   // invalid URI; shouldn't happen
     54     NEW_TAB_PAGE = 1,   // chrome://newtab/
     55     // Note that chrome://newtab/ doesn't have to be the built-in
     56     // version; it could be replaced by an extension.
     57     BLANK = 2,          // about:blank
     58     HOMEPAGE = 3,       // user switched settings to "open this page" mode.
     59     // Note that if the homepage is set to the new tab page or about blank,
     60     // then we'll classify the web page into those categories, not HOMEPAGE.
     61     OTHER = 4,          // everything not included somewhere else on this list
     62     // The user is on a search result page that's doing search term
     63     // replacement, meaning the search terms should've appeared in the omnibox
     64     // before the user started editing it, not the URL of the page.
     65     SEARCH_RESULT_PAGE_DOING_SEARCH_TERM_REPLACEMENT = 6,
     66     // The new tab page in which this omnibox interaction first started
     67     // with the user having focus in the omnibox.
     68     INSTANT_NEW_TAB_PAGE_WITH_OMNIBOX_AS_STARTING_FOCUS = 7,
     69     // The new tab page in which this omnibox interaction first started
     70     // with the user having focus in the fakebox.
     71     INSTANT_NEW_TAB_PAGE_WITH_FAKEBOX_AS_STARTING_FOCUS = 8,
     72     // The user is on a search result page that's not doing search term
     73     // replacement, meaning the URL of the page should've appeared in the
     74     // omnibox before the user started editing it, not the search terms.
     75     SEARCH_RESULT_PAGE_NO_SEARCH_TERM_REPLACEMENT = 9
     76   };
     77 
     78   AutocompleteInput();
     79   // |text| and |cursor_position| represent the input query and location of
     80   // the cursor with the query respectively.  |cursor_position| may be set to
     81   // string16::npos if the input |text| doesn't come directly from the user's
     82   // typing.
     83   //
     84   // |desired_tld| is the user's desired TLD, if one is not already present in
     85   // the text to autocomplete.  When this is non-empty, it also implies that
     86   // "www." should be prepended to the domain where possible. The |desired_tld|
     87   // should not contain a leading '.' (use "com" instead of ".com").
     88   //
     89   // If |current_url| is set to a valid search result page URL, providers can
     90   // use it to perform query refinement. For example, if it is set to an image
     91   // search result page, the search provider may generate an image search URL.
     92   // Query refinement is only used by mobile ports, so only these set
     93   // |current_url| to a non-empty string.
     94   //
     95   // |current_page_classification| represents the type of page the user is
     96   // viewing and manner in which the user is accessing the omnibox; it's
     97   // more than simply the URL.  It includes, for example, whether the page
     98   // is a search result page doing search term replacement or not.
     99   //
    100   // |prevent_inline_autocomplete| is true if the generated result set should
    101   // not require inline autocomplete for the default match.  This is difficult
    102   // to explain in the abstract; the practical use case is that after the user
    103   // deletes text in the edit, the HistoryURLProvider should make sure not to
    104   // promote a match requiring inline autocomplete too highly.
    105   //
    106   // |prefer_keyword| should be true when the keyword UI is onscreen; this will
    107   // bias the autocomplete result set toward the keyword provider when the input
    108   // string is a bare keyword.
    109   //
    110   // |allow_exact_keyword_match| should be false when triggering keyword mode on
    111   // the input string would be surprising or wrong, e.g. when highlighting text
    112   // in a page and telling the browser to search for it or navigate to it. This
    113   // parameter only applies to substituting keywords.
    114   //
    115   // If |matches_requested| is BEST_MATCH or SYNCHRONOUS_MATCHES the controller
    116   // asks the providers to only return matches which are synchronously
    117   // available, which should mean that all providers will be done immediately.
    118   AutocompleteInput(const string16& text,
    119                     size_t cursor_position,
    120                     const string16& desired_tld,
    121                     const GURL& current_url,
    122                     PageClassification current_page_classification,
    123                     bool prevent_inline_autocomplete,
    124                     bool prefer_keyword,
    125                     bool allow_exact_keyword_match,
    126                     MatchesRequested matches_requested);
    127   ~AutocompleteInput();
    128 
    129   // If type is |FORCED_QUERY| and |text| starts with '?', it is removed.
    130   // Returns number of leading characters removed.
    131   static size_t RemoveForcedQueryStringIfNecessary(Type type, string16* text);
    132 
    133   // Converts |type| to a string representation.  Used in logging.
    134   static std::string TypeToString(Type type);
    135 
    136   // Parses |text| and returns the type of input this will be interpreted as.
    137   // The components of the input are stored in the output parameter |parts|, if
    138   // it is non-NULL. The scheme is stored in |scheme| if it is non-NULL. The
    139   // canonicalized URL is stored in |canonicalized_url|; however, this URL is
    140   // not guaranteed to be valid, especially if the parsed type is, e.g., QUERY.
    141   static Type Parse(const string16& text,
    142                     const string16& desired_tld,
    143                     url_parse::Parsed* parts,
    144                     string16* scheme,
    145                     GURL* canonicalized_url);
    146 
    147   // Parses |text| and fill |scheme| and |host| by the positions of them.
    148   // The results are almost as same as the result of Parse(), but if the scheme
    149   // is view-source, this function returns the positions of scheme and host
    150   // in the URL qualified by "view-source:" prefix.
    151   static void ParseForEmphasizeComponents(const string16& text,
    152                                           url_parse::Component* scheme,
    153                                           url_parse::Component* host);
    154 
    155   // Code that wants to format URLs with a format flag including
    156   // net::kFormatUrlOmitTrailingSlashOnBareHostname risk changing the meaning if
    157   // the result is then parsed as AutocompleteInput.  Such code can call this
    158   // function with the URL and its formatted string, and it will return a
    159   // formatted string with the same meaning as the original URL (i.e. it will
    160   // re-append a slash if necessary).
    161   static string16 FormattedStringWithEquivalentMeaning(
    162       const GURL& url,
    163       const string16& formatted_url);
    164 
    165   // Returns the number of non-empty components in |parts| besides the host.
    166   static int NumNonHostComponents(const url_parse::Parsed& parts);
    167 
    168   // User-provided text to be completed.
    169   const string16& text() const { return text_; }
    170 
    171   // Returns 0-based cursor position within |text_| or string16::npos if not
    172   // used.
    173   size_t cursor_position() const { return cursor_position_; }
    174 
    175   // Use of this setter is risky, since no other internal state is updated
    176   // besides |text_|, |cursor_position_| and |parts_|.  Only callers who know
    177   // that they're not changing the type/scheme/etc. should use this.
    178   void UpdateText(const string16& text,
    179                   size_t cursor_position,
    180                   const url_parse::Parsed& parts);
    181 
    182   // The current URL, or an invalid GURL if query refinement is not desired.
    183   const GURL& current_url() const { return current_url_; }
    184 
    185   // The type of page that is currently behind displayed and how it is
    186   // displayed (e.g., with search term replacement or without).
    187   AutocompleteInput::PageClassification current_page_classification() const {
    188     return current_page_classification_;
    189   }
    190 
    191   // The type of input supplied.
    192   Type type() const { return type_; }
    193 
    194   // Returns parsed URL components.
    195   const url_parse::Parsed& parts() const { return parts_; }
    196 
    197   // The scheme parsed from the provided text; only meaningful when type_ is
    198   // URL.
    199   const string16& scheme() const { return scheme_; }
    200 
    201   // The input as an URL to navigate to, if possible.
    202   const GURL& canonicalized_url() const { return canonicalized_url_; }
    203 
    204   // Returns whether inline autocompletion should be prevented.
    205   bool prevent_inline_autocomplete() const {
    206     return prevent_inline_autocomplete_;
    207   }
    208 
    209   // Returns whether, given an input string consisting solely of a substituting
    210   // keyword, we should score it like a non-substituting keyword.
    211   bool prefer_keyword() const { return prefer_keyword_; }
    212 
    213   // Returns whether this input is allowed to be treated as an exact
    214   // keyword match.  If not, the default result is guaranteed not to be a
    215   // keyword search, even if the input is "<keyword> <search string>".
    216   bool allow_exact_keyword_match() const { return allow_exact_keyword_match_; }
    217 
    218   // See description of enum for details.
    219   MatchesRequested matches_requested() const { return matches_requested_; }
    220 
    221   // Resets all internal variables to the null-constructed state.
    222   void Clear();
    223 
    224  private:
    225   FRIEND_TEST_ALL_PREFIXES(AutocompleteProviderTest, GetDestinationURL);
    226 
    227   // NOTE: Whenever adding a new field here, please make sure to update Clear()
    228   // method.
    229   string16 text_;
    230   size_t cursor_position_;
    231   GURL current_url_;
    232   AutocompleteInput::PageClassification current_page_classification_;
    233   Type type_;
    234   url_parse::Parsed parts_;
    235   string16 scheme_;
    236   GURL canonicalized_url_;
    237   bool prevent_inline_autocomplete_;
    238   bool prefer_keyword_;
    239   bool allow_exact_keyword_match_;
    240   MatchesRequested matches_requested_;
    241 };
    242 
    243 #endif  // CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
    244