Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_MATCH_H_
      6 #define CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_MATCH_H_
      7 
      8 #include <map>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/memory/scoped_ptr.h"
     13 #include "chrome/browser/search_engines/template_url.h"
     14 #include "chrome/common/autocomplete_match_type.h"
     15 #include "content/public/common/page_transition_types.h"
     16 #include "url/gurl.h"
     17 
     18 class AutocompleteProvider;
     19 class Profile;
     20 class TemplateURL;
     21 
     22 namespace base {
     23 class Time;
     24 }  // namespace base
     25 
     26 const char kACMatchPropertyInputText[] = "input text";
     27 const char kACMatchPropertyContentsPrefix[] = "match contents prefix";
     28 const char kACMatchPropertyContentsStartIndex[] = "match contents start index";
     29 
     30 // AutocompleteMatch ----------------------------------------------------------
     31 
     32 // A single result line with classified spans.  The autocomplete popup displays
     33 // the 'contents' and the 'description' (the description is optional) in the
     34 // autocomplete dropdown, and fills in 'fill_into_edit' into the textbox when
     35 // that line is selected.  fill_into_edit may be the same as 'description' for
     36 // things like URLs, but may be different for searches or other providers.  For
     37 // example, a search result may say "Search for asdf" as the description, but
     38 // "asdf" should appear in the box.
     39 struct AutocompleteMatch {
     40   // Autocomplete matches contain strings that are classified according to a
     41   // separate vector of styles.  This vector associates flags with particular
     42   // string segments, and must be in sorted order.  All text must be associated
     43   // with some kind of classification.  Even if a match has no distinct
     44   // segments, its vector should contain an entry at offset 0 with no flags.
     45   //
     46   // Example: The user typed "goog"
     47   //   http://www.google.com/        Google
     48   //   ^          ^   ^              ^   ^
     49   //   0,         |   15,            |   4,
     50   //              11,match           0,match
     51   //
     52   // This structure holds the classification information for each span.
     53   struct ACMatchClassification {
     54     // The values in here are not mutually exclusive -- use them like a
     55     // bitfield.  This also means we use "int" instead of this enum type when
     56     // passing the values around, so the compiler doesn't complain.
     57     enum Style {
     58       NONE  = 0,
     59       URL   = 1 << 0,  // A URL
     60       MATCH = 1 << 1,  // A match for the user's search term
     61       DIM   = 1 << 2,  // "Helper text"
     62     };
     63 
     64     ACMatchClassification(size_t offset, int style)
     65         : offset(offset),
     66           style(style) {
     67     }
     68 
     69     // Offset within the string that this classification starts
     70     size_t offset;
     71 
     72     int style;
     73   };
     74 
     75   typedef std::vector<ACMatchClassification> ACMatchClassifications;
     76 
     77   // Type used by providers to attach additional, optional information to
     78   // an AutocompleteMatch.
     79   typedef std::map<std::string, std::string> AdditionalInfo;
     80 
     81   // The type of this match.
     82   typedef AutocompleteMatchType::Type Type;
     83 
     84   // Null-terminated array of characters that are not valid within |contents|
     85   // and |description| strings.
     86   static const base::char16 kInvalidChars[];
     87 
     88   AutocompleteMatch();
     89   AutocompleteMatch(AutocompleteProvider* provider,
     90                     int relevance,
     91                     bool deletable,
     92                     Type type);
     93   AutocompleteMatch(const AutocompleteMatch& match);
     94   ~AutocompleteMatch();
     95 
     96   // Converts |type| to a string representation.  Used in logging and debugging.
     97   AutocompleteMatch& operator=(const AutocompleteMatch& match);
     98 
     99   // Converts |type| to a resource identifier for the appropriate icon for this
    100   // type to show in the completion popup.
    101   static int TypeToIcon(Type type);
    102 
    103   // Converts |type| to a resource identifier for the appropriate icon for this
    104   // type to show in the location bar.
    105   static int TypeToLocationBarIcon(Type type);
    106 
    107   // Comparison function for determining when one match is better than another.
    108   static bool MoreRelevant(const AutocompleteMatch& elem1,
    109                            const AutocompleteMatch& elem2);
    110 
    111   // Comparison function for removing matches with duplicate destinations.
    112   // Destinations are compared using |stripped_destination_url|.  Pairs of
    113   // matches with empty destinations are treated as differing, since empty
    114   // destinations are expected for non-navigable matches.
    115   static bool DestinationsEqual(const AutocompleteMatch& elem1,
    116                                 const AutocompleteMatch& elem2);
    117 
    118   // Helper functions for classes creating matches:
    119   // Fills in the classifications for |text|, using |style| as the base style
    120   // and marking the first instance of |find_text| as a match.  (This match
    121   // will also not be dimmed, if |style| has DIM set.)
    122   static void ClassifyMatchInString(const base::string16& find_text,
    123                                     const base::string16& text,
    124                                     int style,
    125                                     ACMatchClassifications* classifications);
    126 
    127   // Similar to ClassifyMatchInString(), but for cases where the range to mark
    128   // as matching is already known (avoids calling find()).  This can be helpful
    129   // when find() would be misleading (e.g. you want to mark the second match in
    130   // a string instead of the first).
    131   static void ClassifyLocationInString(size_t match_location,
    132                                        size_t match_length,
    133                                        size_t overall_length,
    134                                        int style,
    135                                        ACMatchClassifications* classifications);
    136 
    137   // Returns a new vector of classifications containing the merged contents of
    138   // |classifications1| and |classifications2|.
    139   static ACMatchClassifications MergeClassifications(
    140       const ACMatchClassifications& classifications1,
    141       const ACMatchClassifications& classifications2);
    142 
    143   // Converts classifications to and from a serialized string representation
    144   // (using comma-separated integers to sequentially list positions and styles).
    145   static std::string ClassificationsToString(
    146       const ACMatchClassifications& classifications);
    147   static ACMatchClassifications ClassificationsFromString(
    148       const std::string& serialized_classifications);
    149 
    150   // Adds a classification to the end of |classifications| iff its style is
    151   // different from the last existing classification.  |offset| must be larger
    152   // than the offset of the last classification in |classifications|.
    153   static void AddLastClassificationIfNecessary(
    154       ACMatchClassifications* classifications,
    155       size_t offset,
    156       int style);
    157 
    158   // Removes invalid characters from |text|. Should be called on strings coming
    159   // from external sources (such as extensions) before assigning to |contents|
    160   // or |description|.
    161   static base::string16 SanitizeString(const base::string16& text);
    162 
    163   // Convenience function to check if |type| is a search (as opposed to a URL or
    164   // an extension).
    165   static bool IsSearchType(Type type);
    166 
    167   // Convenience function to check if |type| is a special search suggest type -
    168   // like entity, personalized, profile or postfix.
    169   static bool IsSpecializedSearchType(Type type);
    170 
    171   // Copies the destination_url with "www." stripped off to
    172   // |stripped_destination_url| and also converts https protocol to
    173   // http.  These two conversions are merely to allow comparisons to
    174   // remove likely duplicates; these URLs are not used as actual
    175   // destination URLs.  This method is invoked internally by the
    176   // AutocompleteResult and does not normally need to be invoked.
    177   // If |profile| is not NULL, it is used to get a template URL corresponding
    178   // to this match.  The template is used to strip off query args other than
    179   // the search terms themselves that would otherwise prevent from proper
    180   // deduping.
    181   void ComputeStrippedDestinationURL(Profile* profile);
    182 
    183   // Gets data relevant to whether there should be any special keyword-related
    184   // UI shown for this match.  If this match represents a selected keyword, i.e.
    185   // the UI should be "in keyword mode", |keyword| will be set to the keyword
    186   // and |is_keyword_hint| will be set to false.  If this match has a non-NULL
    187   // |associated_keyword|, i.e. we should show a "Press [tab] to search ___"
    188   // hint and allow the user to toggle into keyword mode, |keyword| will be set
    189   // to the associated keyword and |is_keyword_hint| will be set to true.  Note
    190   // that only one of these states can be in effect at once.  In all other
    191   // cases, |keyword| will be cleared, even when our member variable |keyword|
    192   // is non-empty -- such as with non-substituting keywords or matches that
    193   // represent searches using the default search engine.  See also
    194   // GetSubstitutingExplicitlyInvokedKeyword().
    195   void GetKeywordUIState(Profile* profile,
    196                          base::string16* keyword,
    197                          bool* is_keyword_hint) const;
    198 
    199   // Returns |keyword|, but only if it represents a substituting keyword that
    200   // the user has explicitly invoked.  If for example this match represents a
    201   // search with the default search engine (and the user didn't explicitly
    202   // invoke its keyword), this returns the empty string.  The result is that
    203   // this function returns a non-empty string in the same cases as when the UI
    204   // should show up as being "in keyword mode".
    205   base::string16 GetSubstitutingExplicitlyInvokedKeyword(
    206       Profile* profile) const;
    207 
    208   // Returns the TemplateURL associated with this match.  This may be NULL if
    209   // the match has no keyword OR if the keyword no longer corresponds to a valid
    210   // TemplateURL.  See comments on |keyword| below.
    211   // If |allow_fallback_to_destination_host| is true and the keyword does
    212   // not map to a valid TemplateURL, we'll then check for a TemplateURL that
    213   // corresponds to the destination_url's hostname.
    214   TemplateURL* GetTemplateURL(Profile* profile,
    215                               bool allow_fallback_to_destination_host) const;
    216 
    217   // Adds optional information to the |additional_info| dictionary.
    218   void RecordAdditionalInfo(const std::string& property,
    219                             const std::string& value);
    220   void RecordAdditionalInfo(const std::string& property, int value);
    221   void RecordAdditionalInfo(const std::string& property,
    222                             const base::Time& value);
    223 
    224   // Returns the value recorded for |property| in the |additional_info|
    225   // dictionary.  Returns the empty string if no such value exists.
    226   std::string GetAdditionalInfo(const std::string& property) const;
    227 
    228   // Returns whether this match is a "verbatim" match: a URL navigation directly
    229   // to the user's input, a search for the user's input with the default search
    230   // engine, or a "keyword mode" search for the query portion of the user's
    231   // input.  Note that rare or unusual types that could be considered verbatim,
    232   // such as keyword engine matches or extension-provided matches, aren't
    233   // detected by this IsVerbatimType, as the user will not be able to infer
    234   // what will happen when he or she presses enter in those cases if the match
    235   // is not shown.
    236   bool IsVerbatimType() const;
    237 
    238   // Returns whether this match or any duplicate of this match can be deleted.
    239   // This is used to decide whether we should call DeleteMatch().
    240   bool SupportsDeletion() const;
    241 
    242   // The provider of this match, used to remember which provider the user had
    243   // selected when the input changes. This may be NULL, in which case there is
    244   // no provider (or memory of the user's selection).
    245   AutocompleteProvider* provider;
    246 
    247   // The relevance of this match. See table in autocomplete.h for scores
    248   // returned by various providers. This is used to rank matches among all
    249   // responding providers, so different providers must be carefully tuned to
    250   // supply matches with appropriate relevance.
    251   //
    252   // TODO(pkasting): http://b/1111299 This should be calculated algorithmically,
    253   // rather than being a fairly fixed value defined by the table above.
    254   int relevance;
    255 
    256   // How many times this result was typed in / selected from the omnibox.
    257   // Only set for some providers and result_types.  If it is not set,
    258   // its value is -1.  At the time of writing this comment, it is only
    259   // set for matches from HistoryURL and HistoryQuickProvider.
    260   int typed_count;
    261 
    262   // True if the user should be able to delete this match.
    263   bool deletable;
    264 
    265   // This string is loaded into the location bar when the item is selected
    266   // by pressing the arrow keys. This may be different than a URL, for example,
    267   // for search suggestions, this would just be the search terms.
    268   base::string16 fill_into_edit;
    269 
    270   // The inline autocompletion to display after the user's typing in the
    271   // omnibox, if this match becomes the default match.  It may be empty.
    272   base::string16 inline_autocompletion;
    273 
    274   // If false, the omnibox should prevent this match from being the
    275   // default match.  Providers should set this to true only if the
    276   // user's input, plus any inline autocompletion on this match, would
    277   // lead the user to expect a navigation to this match's destination.
    278   // For example, with input "foo", a search for "bar" or navigation
    279   // to "bar.com" should not set this flag; a navigation to "foo.com"
    280   // should only set this flag if ".com" will be inline autocompleted;
    281   // and a navigation to "foo/" (an intranet host) or search for "foo"
    282   // should set this flag.
    283   bool allowed_to_be_default_match;
    284 
    285   // The URL to actually load when the autocomplete item is selected. This URL
    286   // should be canonical so we can compare URLs with strcmp to avoid dupes.
    287   // It may be empty if there is no possible navigation.
    288   GURL destination_url;
    289 
    290   // The destination URL with "www." stripped off for better dupe finding.
    291   GURL stripped_destination_url;
    292 
    293   // The main text displayed in the address bar dropdown.
    294   base::string16 contents;
    295   ACMatchClassifications contents_class;
    296 
    297   // Additional helper text for each entry, such as a title or description.
    298   base::string16 description;
    299   ACMatchClassifications description_class;
    300 
    301   // A rich-format version of the display for the dropdown.
    302   base::string16 answer_contents;
    303   base::string16 answer_type;
    304 
    305   // The transition type to use when the user opens this match.  By default
    306   // this is TYPED.  Providers whose matches do not look like URLs should set
    307   // it to GENERATED.
    308   content::PageTransition transition;
    309 
    310   // True when this match is the "what you typed" match from the history
    311   // system.
    312   bool is_history_what_you_typed_match;
    313 
    314   // Type of this match.
    315   Type type;
    316 
    317   // Set with a keyword provider match if this match can show a keyword hint.
    318   // For example, if this is a SearchProvider match for "www.amazon.com",
    319   // |associated_keyword| could be a KeywordProvider match for "amazon.com".
    320   scoped_ptr<AutocompleteMatch> associated_keyword;
    321 
    322   // The keyword of the TemplateURL the match originated from.  This is nonempty
    323   // for both explicit "keyword mode" matches as well as matches for the default
    324   // search provider (so, any match for which we're doing substitution); it
    325   // doesn't imply (alone) that the UI is going to show a keyword hint or
    326   // keyword mode.  For that, see GetKeywordUIState() or
    327   // GetSubstitutingExplicitlyInvokedKeyword().
    328   //
    329   // CAUTION: The TemplateURL associated with this keyword may be deleted or
    330   // modified while the AutocompleteMatch is alive.  This means anyone who
    331   // accesses it must perform any necessary sanity checks before blindly using
    332   // it!
    333   base::string16 keyword;
    334 
    335   // True if the user has starred the destination URL.
    336   bool starred;
    337 
    338   // True if this match is from a previous result.
    339   bool from_previous;
    340 
    341   // Optional search terms args.  If present,
    342   // AutocompleteController::UpdateAssistedQueryStats() will incorporate this
    343   // data with additional data it calculates and pass the completed struct to
    344   // TemplateURLRef::ReplaceSearchTerms() to reset the match's |destination_url|
    345   // after the complete set of matches in the AutocompleteResult has been chosen
    346   // and sorted.  Most providers will leave this as NULL, which will cause the
    347   // AutocompleteController to do no additional transformations.
    348   scoped_ptr<TemplateURLRef::SearchTermsArgs> search_terms_args;
    349 
    350   // Information dictionary into which each provider can optionally record a
    351   // property and associated value and which is presented in chrome://omnibox.
    352   AdditionalInfo additional_info;
    353 
    354   // A list of matches culled during de-duplication process, retained to
    355   // ensure if a match is deleted, the duplicates are deleted as well.
    356   std::vector<AutocompleteMatch> duplicate_matches;
    357 
    358 #ifndef NDEBUG
    359   // Does a data integrity check on this match.
    360   void Validate() const;
    361 
    362   // Checks one text/classifications pair for valid values.
    363   void ValidateClassifications(
    364       const base::string16& text,
    365       const ACMatchClassifications& classifications) const;
    366 #endif
    367 };
    368 
    369 typedef AutocompleteMatch::ACMatchClassification ACMatchClassification;
    370 typedef std::vector<ACMatchClassification> ACMatchClassifications;
    371 typedef std::vector<AutocompleteMatch> ACMatches;
    372 
    373 #endif  // CHROME_BROWSER_AUTOCOMPLETE_AUTOCOMPLETE_MATCH_H_
    374