Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      6 #define CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/gtest_prod_util.h"
     12 #include "base/strings/string16.h"
     13 #include "url/gurl.h"
     14 #include "url/url_parse.h"
     15 
     16 // The user input for an autocomplete query.  Allows copying.
     17 class AutocompleteInput {
     18  public:
     19   // Note that the type below may be misleading.  For example, "http:/" alone
     20   // cannot be opened as a URL, so it is marked as a QUERY; yet the user
     21   // probably intends to type more and have it eventually become a URL, so we
     22   // need to make sure we still run it through inline autocomplete.
     23   enum Type {
     24     INVALID,        // Empty input
     25     UNKNOWN,        // Valid input whose type cannot be determined
     26     URL,            // Input autodetected as a URL
     27     QUERY,          // Input autodetected as a query
     28     FORCED_QUERY,   // Input forced to be a query by an initial '?'
     29   };
     30 
     31   // Enumeration of the possible match query types. Callers who only need some
     32   // of the matches for a particular input can get answers more quickly by
     33   // specifying that upfront.
     34   enum MatchesRequested {
     35     // Only the best match in the whole result set matters.  Providers should at
     36     // most return synchronously-available matches, and if possible do even less
     37     // work, so that it's safe to ask for these repeatedly in the course of one
     38     // higher-level "synchronous" query.
     39     BEST_MATCH,
     40 
     41     // Only synchronous matches should be returned.
     42     SYNCHRONOUS_MATCHES,
     43 
     44     // All matches should be fetched.
     45     ALL_MATCHES,
     46   };
     47 
     48   // The type of page currently displayed.
     49   // Note: when adding an element to this enum, please add it at the end
     50   // and update omnibox_event.proto::PageClassification and
     51   // omnibox_edit_model.cc::ClassifyPage() too.
     52   enum PageClassification {
     53     // An invalid URL; shouldn't happen.
     54     INVALID_SPEC = 0,
     55 
     56     // chrome://newtab/.  This can be either the built-in version or a
     57     // replacement new tab page from an extension.  Note that when Instant
     58     // Extended is enabled, the new tab page will be reported as either
     59     // INSTANT_NTP_WITH_OMNIBOX_AS_STARTING_FOCUS or
     60     // INSTANT_NTP_WITH_FAKEBOX_AS_STARTING_FOCUS below,
     61     // unless an extension is replacing the new tab page, in which case
     62     // it will still be reported as NTP.
     63     NTP = 1,
     64 
     65     // about:blank.
     66     BLANK = 2,
     67 
     68     // The user's home page.  Note that if the home page is set to any
     69     // of the new tab page versions or to about:blank, then we'll
     70     // classify the page into those categories, not HOME_PAGE.
     71     HOME_PAGE = 3,
     72 
     73     // The catch-all entry of everything not included somewhere else
     74     // on this list.
     75     OTHER = 4,
     76 
     77     // The user is on a search result page that's doing search term
     78     // replacement, meaning the search terms should've appeared in the omnibox
     79     // before the user started editing it, not the URL of the page.
     80     SEARCH_RESULT_PAGE_DOING_SEARCH_TERM_REPLACEMENT = 6,
     81 
     82     // The new tab page in which this omnibox interaction first started
     83     // with the user having focus in the omnibox.
     84     INSTANT_NTP_WITH_OMNIBOX_AS_STARTING_FOCUS = 7,
     85 
     86     // The new tab page in which this omnibox interaction first started
     87     // with the user having focus in the fakebox.
     88     INSTANT_NTP_WITH_FAKEBOX_AS_STARTING_FOCUS = 8,
     89 
     90     // The user is on a search result page that's not doing search term
     91     // replacement, meaning the URL of the page should've appeared in the
     92     // omnibox before the user started editing it, not the search terms.
     93     SEARCH_RESULT_PAGE_NO_SEARCH_TERM_REPLACEMENT = 9
     94   };
     95 
     96   AutocompleteInput();
     97   // |text| and |cursor_position| represent the input query and location of
     98   // the cursor with the query respectively.  |cursor_position| may be set to
     99   // base::string16::npos if the input |text| doesn't come directly from the
    100   // user's typing.
    101   //
    102   // |desired_tld| is the user's desired TLD, if one is not already present in
    103   // the text to autocomplete.  When this is non-empty, it also implies that
    104   // "www." should be prepended to the domain where possible. The |desired_tld|
    105   // should not contain a leading '.' (use "com" instead of ".com").
    106   //
    107   // If |current_url| is set to a valid search result page URL, providers can
    108   // use it to perform query refinement. For example, if it is set to an image
    109   // search result page, the search provider may generate an image search URL.
    110   // Query refinement is only used by mobile ports, so only these set
    111   // |current_url| to a non-empty string.
    112   //
    113   // |current_page_classification| represents the type of page the user is
    114   // viewing and manner in which the user is accessing the omnibox; it's
    115   // more than simply the URL.  It includes, for example, whether the page
    116   // is a search result page doing search term replacement or not.
    117   //
    118   // |prevent_inline_autocomplete| is true if the generated result set should
    119   // not require inline autocomplete for the default match.  This is difficult
    120   // to explain in the abstract; the practical use case is that after the user
    121   // deletes text in the edit, the HistoryURLProvider should make sure not to
    122   // promote a match requiring inline autocomplete too highly.
    123   //
    124   // |prefer_keyword| should be true when the keyword UI is onscreen; this will
    125   // bias the autocomplete result set toward the keyword provider when the input
    126   // string is a bare keyword.
    127   //
    128   // |allow_exact_keyword_match| should be false when triggering keyword mode on
    129   // the input string would be surprising or wrong, e.g. when highlighting text
    130   // in a page and telling the browser to search for it or navigate to it. This
    131   // parameter only applies to substituting keywords.
    132   //
    133   // If |matches_requested| is BEST_MATCH or SYNCHRONOUS_MATCHES the controller
    134   // asks the providers to only return matches which are synchronously
    135   // available, which should mean that all providers will be done immediately.
    136   AutocompleteInput(const base::string16& text,
    137                     size_t cursor_position,
    138                     const base::string16& desired_tld,
    139                     const GURL& current_url,
    140                     PageClassification current_page_classification,
    141                     bool prevent_inline_autocomplete,
    142                     bool prefer_keyword,
    143                     bool allow_exact_keyword_match,
    144                     MatchesRequested matches_requested);
    145   ~AutocompleteInput();
    146 
    147   // If type is |FORCED_QUERY| and |text| starts with '?', it is removed.
    148   // Returns number of leading characters removed.
    149   static size_t RemoveForcedQueryStringIfNecessary(Type type,
    150                                                    base::string16* text);
    151 
    152   // Converts |type| to a string representation.  Used in logging.
    153   static std::string TypeToString(Type type);
    154 
    155   // Parses |text| and returns the type of input this will be interpreted as.
    156   // The components of the input are stored in the output parameter |parts|, if
    157   // it is non-NULL. The scheme is stored in |scheme| if it is non-NULL. The
    158   // canonicalized URL is stored in |canonicalized_url|; however, this URL is
    159   // not guaranteed to be valid, especially if the parsed type is, e.g., QUERY.
    160   static Type Parse(const base::string16& text,
    161                     const base::string16& desired_tld,
    162                     url_parse::Parsed* parts,
    163                     base::string16* scheme,
    164                     GURL* canonicalized_url);
    165 
    166   // Parses |text| and fill |scheme| and |host| by the positions of them.
    167   // The results are almost as same as the result of Parse(), but if the scheme
    168   // is view-source, this function returns the positions of scheme and host
    169   // in the URL qualified by "view-source:" prefix.
    170   static void ParseForEmphasizeComponents(const base::string16& text,
    171                                           url_parse::Component* scheme,
    172                                           url_parse::Component* host);
    173 
    174   // Code that wants to format URLs with a format flag including
    175   // net::kFormatUrlOmitTrailingSlashOnBareHostname risk changing the meaning if
    176   // the result is then parsed as AutocompleteInput.  Such code can call this
    177   // function with the URL and its formatted string, and it will return a
    178   // formatted string with the same meaning as the original URL (i.e. it will
    179   // re-append a slash if necessary).
    180   static base::string16 FormattedStringWithEquivalentMeaning(
    181       const GURL& url,
    182       const base::string16& formatted_url);
    183 
    184   // Returns the number of non-empty components in |parts| besides the host.
    185   static int NumNonHostComponents(const url_parse::Parsed& parts);
    186 
    187   // Returns whether |text| begins "http:" or "view-source:http:".
    188   static bool HasHTTPScheme(const base::string16& text);
    189 
    190   // User-provided text to be completed.
    191   const base::string16& text() const { return text_; }
    192 
    193   // Returns 0-based cursor position within |text_| or base::string16::npos if
    194   // not used.
    195   size_t cursor_position() const { return cursor_position_; }
    196 
    197   // Use of this setter is risky, since no other internal state is updated
    198   // besides |text_|, |cursor_position_| and |parts_|.  Only callers who know
    199   // that they're not changing the type/scheme/etc. should use this.
    200   void UpdateText(const base::string16& text,
    201                   size_t cursor_position,
    202                   const url_parse::Parsed& parts);
    203 
    204   // The current URL, or an invalid GURL if query refinement is not desired.
    205   const GURL& current_url() const { return current_url_; }
    206 
    207   // The type of page that is currently behind displayed and how it is
    208   // displayed (e.g., with search term replacement or without).
    209   AutocompleteInput::PageClassification current_page_classification() const {
    210     return current_page_classification_;
    211   }
    212 
    213   // The type of input supplied.
    214   Type type() const { return type_; }
    215 
    216   // Returns parsed URL components.
    217   const url_parse::Parsed& parts() const { return parts_; }
    218 
    219   // The scheme parsed from the provided text; only meaningful when type_ is
    220   // URL.
    221   const base::string16& scheme() const { return scheme_; }
    222 
    223   // The input as an URL to navigate to, if possible.
    224   const GURL& canonicalized_url() const { return canonicalized_url_; }
    225 
    226   // Returns whether inline autocompletion should be prevented.
    227   bool prevent_inline_autocomplete() const {
    228     return prevent_inline_autocomplete_;
    229   }
    230 
    231   // Returns whether, given an input string consisting solely of a substituting
    232   // keyword, we should score it like a non-substituting keyword.
    233   bool prefer_keyword() const { return prefer_keyword_; }
    234 
    235   // Returns whether this input is allowed to be treated as an exact
    236   // keyword match.  If not, the default result is guaranteed not to be a
    237   // keyword search, even if the input is "<keyword> <search string>".
    238   bool allow_exact_keyword_match() const { return allow_exact_keyword_match_; }
    239 
    240   // See description of enum for details.
    241   MatchesRequested matches_requested() const { return matches_requested_; }
    242 
    243   // Resets all internal variables to the null-constructed state.
    244   void Clear();
    245 
    246  private:
    247   FRIEND_TEST_ALL_PREFIXES(AutocompleteProviderTest, GetDestinationURL);
    248 
    249   // NOTE: Whenever adding a new field here, please make sure to update Clear()
    250   // method.
    251   base::string16 text_;
    252   size_t cursor_position_;
    253   GURL current_url_;
    254   AutocompleteInput::PageClassification current_page_classification_;
    255   Type type_;
    256   url_parse::Parsed parts_;
    257   base::string16 scheme_;
    258   GURL canonicalized_url_;
    259   bool prevent_inline_autocomplete_;
    260   bool prefer_keyword_;
    261   bool allow_exact_keyword_match_;
    262   MatchesRequested matches_requested_;
    263 };
    264 
    265 #endif  // CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_INPUT_H_
    266