Home | History | Annotate | Download | only in search_engines
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
      6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
      7 #pragma once
      8 
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/gtest_prod_util.h"
     13 #include "base/time.h"
     14 #include "chrome/browser/search_engines/search_engine_type.h"
     15 #include "chrome/browser/search_engines/template_url_id.h"
     16 #include "googleurl/src/gurl.h"
     17 
     18 class PrefService;
     19 class SearchTermsData;
     20 class TemplateURL;
     21 class WebDataService;
     22 struct WDKeywordsResult;
     23 
     24 // TemplateURL represents the relevant portions of the Open Search Description
     25 // Document (http://www.opensearch.org/Specifications/OpenSearch).
     26 // The main use case for TemplateURL is to use the TemplateURLRef returned by
     27 // suggestions_url or url for keyword/suggestion expansion:
     28 // . suggestions_url describes a URL that is ideal for as you type suggestions.
     29 //   The returned results are in the mime type application/x-suggestions+json.
     30 // . url describes a URL that may be used as a shortcut. Returned results are
     31 //   are text/html.
     32 // Before using either one, make sure it's non-NULL, and if you intend to use
     33 // it to replace search terms, make sure SupportsReplacement returns true.
     34 // To use either URL invoke the ReplaceSearchTerms method on the corresponding
     35 // TemplateURLRef.
     36 //
     37 // For files parsed from the Web, be sure and invoke IsValid. IsValid returns
     38 // true if the URL could be parsed.
     39 //
     40 // Both TemplateURL and TemplateURLRef have value semantics. This allows the
     41 // UI to create a copy while the user modifies the values.
     42 class TemplateURLRef {
     43  public:
     44   // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
     45   // parameter.  Most callers aren't using Suggest capabilities and should just
     46   // pass NO_SUGGESTIONS_AVAILABLE.
     47   // NOTE: Because positive values are meaningful, make sure these are negative!
     48   enum AcceptedSuggestion {
     49     NO_SUGGESTION_CHOSEN = -1,
     50     NO_SUGGESTIONS_AVAILABLE = -2,
     51   };
     52 
     53   TemplateURLRef();
     54 
     55   TemplateURLRef(const std::string& url, int index_offset, int page_offset);
     56 
     57   ~TemplateURLRef();
     58 
     59   // Returns true if this URL supports replacement.
     60   bool SupportsReplacement() const;
     61 
     62   // Like SupportsReplacement but usable on threads other than the UI thread.
     63   bool SupportsReplacementUsingTermsData(
     64       const SearchTermsData& search_terms_data) const;
     65 
     66   // Returns a string that is the result of replacing the search terms in
     67   // the url with the specified value.
     68   //
     69   // If this TemplateURLRef does not support replacement (SupportsReplacement
     70   // returns false), an empty string is returned.
     71   //
     72   // The TemplateURL is used to determine the input encoding for the term.
     73   std::string ReplaceSearchTerms(
     74       const TemplateURL& host,
     75       const string16& terms,
     76       int accepted_suggestion,
     77       const string16& original_query_for_suggestion) const;
     78 
     79   // Just like ReplaceSearchTerms except that it takes SearchTermsData to supply
     80   // the data for some search terms. Most of the time ReplaceSearchTerms should
     81   // be called.
     82   std::string ReplaceSearchTermsUsingTermsData(
     83       const TemplateURL& host,
     84       const string16& terms,
     85       int accepted_suggestion,
     86       const string16& original_query_for_suggestion,
     87       const SearchTermsData& search_terms_data) const;
     88 
     89   // Returns the raw URL. None of the parameters will have been replaced.
     90   const std::string& url() const { return url_; }
     91 
     92   // Returns the index number of the first search result.
     93   int index_offset() const { return index_offset_; }
     94 
     95   // Returns the page number of the first search results.
     96   int page_offset() const { return page_offset_; }
     97 
     98   // Returns true if the TemplateURLRef is valid. An invalid TemplateURLRef is
     99   // one that contains unknown terms, or invalid characters.
    100   bool IsValid() const;
    101 
    102   // Like IsValid but usable on threads other than the UI thread.
    103   bool IsValidUsingTermsData(const SearchTermsData& search_terms_data) const;
    104 
    105   // Returns a string representation of this TemplateURLRef suitable for
    106   // display. The display format is the same as the format used by Firefox.
    107   string16 DisplayURL() const;
    108 
    109   // Converts a string as returned by DisplayURL back into a string as
    110   // understood by TemplateURLRef.
    111   static std::string DisplayURLToURLRef(const string16& display_url);
    112 
    113   // If this TemplateURLRef is valid and contains one search term, this returns
    114   // the host/path of the URL, otherwise this returns an empty string.
    115   const std::string& GetHost() const;
    116   const std::string& GetPath() const;
    117 
    118   // If this TemplateURLRef is valid and contains one search term, this returns
    119   // the key of the search term, otherwise this returns an empty string.
    120   const std::string& GetSearchTermKey() const;
    121 
    122   // Converts the specified term in the encoding of the host TemplateURL to a
    123   // string16.
    124   string16 SearchTermToString16(const TemplateURL& host,
    125                                 const std::string& term) const;
    126 
    127   // Returns true if this TemplateURLRef has a replacement term of
    128   // {google:baseURL} or {google:baseSuggestURL}.
    129   bool HasGoogleBaseURLs() const;
    130 
    131   // Returns true if both refs are NULL or have the same values.
    132   static bool SameUrlRefs(const TemplateURLRef* ref1,
    133                           const TemplateURLRef* ref2);
    134 
    135   // Collects metrics whether searches through Google are sent with RLZ string.
    136   void CollectRLZMetrics() const;
    137 
    138  private:
    139   friend class SearchHostToURLsMapTest;
    140   friend class TemplateURL;
    141   friend class TemplateURLModelTestUtil;
    142   friend class TemplateURLTest;
    143   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
    144   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
    145   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
    146   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
    147   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
    148   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
    149   FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
    150 
    151   // Enumeration of the known types.
    152   enum ReplacementType {
    153     ENCODING,
    154     GOOGLE_ACCEPTED_SUGGESTION,
    155     GOOGLE_BASE_URL,
    156     GOOGLE_BASE_SUGGEST_URL,
    157     GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
    158     GOOGLE_RLZ,
    159     GOOGLE_UNESCAPED_SEARCH_TERMS,
    160     LANGUAGE,
    161     SEARCH_TERMS,
    162   };
    163 
    164   // Used to identify an element of the raw url that can be replaced.
    165   struct Replacement {
    166     Replacement(ReplacementType type, size_t index)
    167         : type(type), index(index) {}
    168     ReplacementType type;
    169     size_t index;
    170   };
    171 
    172   // The list of elements to replace.
    173   typedef std::vector<struct Replacement> Replacements;
    174 
    175   // TemplateURLRef internally caches values to make replacement quick. This
    176   // method invalidates any cached values.
    177   void InvalidateCachedValues() const;
    178 
    179   // Resets the url.
    180   void Set(const std::string& url, int index_offset, int page_offset);
    181 
    182   // Parses the parameter in url at the specified offset. start/end specify the
    183   // range of the parameter in the url, including the braces. If the parameter
    184   // is valid, url is updated to reflect the appropriate parameter. If
    185   // the parameter is one of the known parameters an element is added to
    186   // replacements indicating the type and range of the element. The original
    187   // parameter is erased from the url.
    188   //
    189   // If the parameter is not a known parameter, it's not erased and false is
    190   // returned.
    191   bool ParseParameter(size_t start,
    192                       size_t end,
    193                       std::string* url,
    194                       Replacements* replacements) const;
    195 
    196   // Parses the specified url, replacing parameters as necessary. If
    197   // successful, valid is set to true, and the parsed url is returned. For all
    198   // known parameters that are encountered an entry is added to replacements.
    199   // If there is an error parsing the url, valid is set to false, and an empty
    200   // string is returned.
    201   std::string ParseURL(const std::string& url,
    202                        Replacements* replacements,
    203                        bool* valid) const;
    204 
    205   // If the url has not yet been parsed, ParseURL is invoked.
    206   // NOTE: While this is const, it modifies parsed_, valid_, parsed_url_ and
    207   // search_offset_.
    208   void ParseIfNecessary() const;
    209 
    210   // Like ParseIfNecessary but usable on threads other than the UI thread.
    211   void ParseIfNecessaryUsingTermsData(
    212       const SearchTermsData& search_terms_data) const;
    213 
    214   // Extracts the query key and host from the url.
    215   void ParseHostAndSearchTermKey(
    216       const SearchTermsData& search_terms_data) const;
    217 
    218   // Used by tests to set the value for the Google base url. This takes
    219   // ownership of the given std::string.
    220   static void SetGoogleBaseURL(std::string* google_base_url);
    221 
    222   // The raw URL. Where as this contains all the terms (such as {searchTerms}),
    223   // parsed_url_ has them all stripped out.
    224   std::string url_;
    225 
    226   // indexOffset defined for the Url element.
    227   int index_offset_;
    228 
    229   // searchOffset defined for the Url element.
    230   int page_offset_;
    231 
    232   // Whether the URL has been parsed.
    233   mutable bool parsed_;
    234 
    235   // Whether the url was successfully parsed.
    236   mutable bool valid_;
    237 
    238   // The parsed URL. All terms have been stripped out of this with
    239   // replacements_ giving the index of the terms to replace.
    240   mutable std::string parsed_url_;
    241 
    242   // Do we support replacement?
    243   mutable bool supports_replacements_;
    244 
    245   // The replaceable parts of url (parsed_url_). These are ordered by index
    246   // into the string, and may be empty.
    247   mutable Replacements replacements_;
    248 
    249   // Host, path and key of the search term. These are only set if the url
    250   // contains one search term.
    251   mutable std::string host_;
    252   mutable std::string path_;
    253   mutable std::string search_term_key_;
    254 };
    255 
    256 // Describes the relevant portions of a single OSD document.
    257 class TemplateURL {
    258  public:
    259   // Describes a single image reference. Each TemplateURL may have
    260   // any number (including 0) of ImageRefs.
    261   //
    262   // If a TemplateURL has no images, the favicon for the generated URL
    263   // should be used.
    264   struct ImageRef {
    265     ImageRef(const std::string& type, int width, int height)
    266         : type(type), width(width), height(height) {
    267     }
    268 
    269     ImageRef(const std::string& type, int width, int height, const GURL& url)
    270       : type(type), width(width), height(height), url(url) {
    271     }
    272 
    273     // Mime type for the image.
    274     // ICO image will have the format: image/x-icon or image/vnd.microsoft.icon
    275     std::string type;
    276 
    277     // Size of the image
    278     int width;
    279     int height;
    280 
    281     // URL of the image.
    282     GURL url;
    283   };
    284 
    285   // Generates a favicon URL from the specified url.
    286   static GURL GenerateFaviconURL(const GURL& url);
    287 
    288   // Returns true if |turl| is non-null and has a search URL that supports
    289   // replacement.
    290   static bool SupportsReplacement(const TemplateURL* turl);
    291 
    292   // Like SupportsReplacement but usable on threads other than the UI thread.
    293   static bool SupportsReplacementUsingTermsData(
    294       const TemplateURL* turl,
    295       const SearchTermsData& search_terms_data);
    296 
    297   TemplateURL();
    298   ~TemplateURL();
    299 
    300   // A short description of the template. This is the name we show to the user
    301   // in various places that use keywords. For example, the location bar shows
    302   // this when the user selects the keyword.
    303   void set_short_name(const string16& short_name) {
    304     short_name_ = short_name;
    305   }
    306   string16 short_name() const { return short_name_; }
    307 
    308   // An accessor for the short_name, but adjusted so it can be appropriately
    309   // displayed even if it is LTR and the UI is RTL.
    310   string16 AdjustedShortNameForLocaleDirection() const;
    311 
    312   // A description of the template; this may be empty.
    313   void set_description(const string16& description) {
    314     description_ = description;
    315   }
    316   string16 description() const { return description_; }
    317 
    318   // URL providing JSON results. This is typically used to provide suggestions
    319   // as your type. If NULL, this url does not support suggestions.
    320   // Be sure and check the resulting TemplateURLRef for SupportsReplacement
    321   // before using.
    322   void SetSuggestionsURL(const std::string& suggestions_url,
    323                          int index_offset,
    324                          int page_offset);
    325   const TemplateURLRef* suggestions_url() const {
    326     return suggestions_url_.url().empty() ? NULL : &suggestions_url_;
    327   }
    328 
    329   // Parameterized URL for providing the results. This may be NULL.
    330   // Be sure and check the resulting TemplateURLRef for SupportsReplacement
    331   // before using.
    332   void SetURL(const std::string& url, int index_offset, int page_offset);
    333   // Returns the TemplateURLRef that may be used for search results. This
    334   // returns NULL if a url element was not specified.
    335   const TemplateURLRef* url() const {
    336     return url_.url().empty() ? NULL : &url_;
    337   }
    338 
    339   // Parameterized URL for instant results. This may be NULL.  Be sure and check
    340   // the resulting TemplateURLRef for SupportsReplacement before using. See
    341   // TemplateURLRef for a description of |index_offset| and |page_offset|.
    342   void SetInstantURL(const std::string& url, int index_offset, int page_offset);
    343   // Returns the TemplateURLRef that may be used for search results. This
    344   // returns NULL if a url element was not specified.
    345   const TemplateURLRef* instant_url() const {
    346     return instant_url_.url().empty() ? NULL : &instant_url_;
    347   }
    348 
    349   // URL to the OSD file this came from. May be empty.
    350   void set_originating_url(const GURL& url) {
    351     originating_url_ = url;
    352   }
    353   const GURL& originating_url() const { return originating_url_; }
    354 
    355   // The shortcut for this template url. May be empty.
    356   void set_keyword(const string16& keyword);
    357   string16 keyword() const;
    358 
    359   // Whether to autogenerate a keyword from the url() in GetKeyword().  Most
    360   // consumers should not need this.
    361   // NOTE: Calling set_keyword() turns this back off.  Manual and automatic
    362   // keywords are mutually exclusive.
    363   void set_autogenerate_keyword(bool autogenerate_keyword) {
    364     autogenerate_keyword_ = autogenerate_keyword;
    365     if (autogenerate_keyword_) {
    366       keyword_.clear();
    367       keyword_generated_ = false;
    368     }
    369   }
    370   bool autogenerate_keyword() const {
    371     return autogenerate_keyword_;
    372   }
    373 
    374   // Ensures that the keyword is generated.  Most consumers should not need this
    375   // because it is done automatically.  Use this method on the UI thread, so
    376   // the keyword may be accessed on another thread.
    377   void EnsureKeyword() const;
    378 
    379   // Whether this keyword is shown in the default list of search providers. This
    380   // is just a property and does not indicate whether this TemplateURL has
    381   // a TemplateURLRef that supports replacement. Use ShowInDefaultList to
    382   // test both.
    383   // The default value is false.
    384   void set_show_in_default_list(bool show_in_default_list) {
    385     show_in_default_list_ = show_in_default_list;
    386   }
    387   bool show_in_default_list() const { return show_in_default_list_; }
    388 
    389   // Returns true if show_in_default_list() is true and this TemplateURL has a
    390   // TemplateURLRef that supports replacement.
    391   bool ShowInDefaultList() const;
    392 
    393   // Whether it's safe for auto-modification code (the autogenerator and the
    394   // code that imports data from other browsers) to replace the TemplateURL.
    395   // This should be set to false for any keyword the user edits, or any keyword
    396   // that the user clearly manually edited in the past, like a bookmark keyword
    397   // from another browser.
    398   void set_safe_for_autoreplace(bool safe_for_autoreplace) {
    399     safe_for_autoreplace_ = safe_for_autoreplace;
    400   }
    401   bool safe_for_autoreplace() const { return safe_for_autoreplace_; }
    402 
    403   // Images for this URL. May be empty.
    404   void add_image_ref(const ImageRef& ref) { image_refs_.push_back(ref); }
    405   const std::vector<ImageRef>& image_refs() const { return image_refs_; }
    406 
    407   // Convenience methods for getting/setting an ImageRef that points to a
    408   // favicon. A TemplateURL need not have an ImageRef for a favicon. In such
    409   // a situation GetFaviconURL returns an invalid url.
    410   //
    411   // If url is empty and there is an image ref for a favicon, it is removed.
    412   void SetFaviconURL(const GURL& url);
    413   GURL GetFaviconURL() const;
    414 
    415   // Set of languages supported. This may be empty.
    416   void add_language(const string16& language) {
    417     languages_.push_back(language);
    418   }
    419   std::vector<string16> languages() const { return languages_; }
    420 
    421   // Date this keyword was created.
    422   //
    423   // NOTE: this may be 0, which indicates the keyword was created before we
    424   // started tracking creation time.
    425   void set_date_created(base::Time time) { date_created_ = time; }
    426   base::Time date_created() const { return date_created_; }
    427 
    428   // True if this TemplateURL was automatically created by the administrator via
    429   // group policy.
    430   void set_created_by_policy(bool created_by_policy) {
    431      created_by_policy_ = created_by_policy;
    432   }
    433   bool created_by_policy() const { return created_by_policy_; }
    434 
    435   // Number of times this keyword has been explicitly used to load a URL.  We
    436   // don't increment this for uses as the "default search engine" since that's
    437   // not really "explicit" usage and incrementing would result in pinning the
    438   // user's default search engine(s) to the top of the list of searches on the
    439   // New Tab page, de-emphasizing the omnibox as "where you go to search".
    440   void set_usage_count(int count) { usage_count_ = count; }
    441   int usage_count() const { return usage_count_; }
    442 
    443   // The list of supported encodings for the search terms. This may be empty,
    444   // which indicates the terms should be encoded with UTF-8.
    445   void set_input_encodings(const std::vector<std::string>& encodings) {
    446     input_encodings_ = encodings;
    447   }
    448   void add_input_encoding(const std::string& encoding) {
    449     input_encodings_.push_back(encoding);
    450   }
    451   const std::vector<std::string>& input_encodings() const {
    452     return input_encodings_;
    453   }
    454 
    455   void set_search_engine_type(SearchEngineType search_engine_type) {
    456     search_engine_type_ = search_engine_type;
    457   }
    458   SearchEngineType search_engine_type() const {
    459     return search_engine_type_;
    460   }
    461 
    462   void set_logo_id(int logo_id) { logo_id_ = logo_id; }
    463   int logo_id() const { return logo_id_; }
    464 
    465   // Returns the unique identifier of this TemplateURL. The unique ID is set
    466   // by the TemplateURLModel when the TemplateURL is added to it.
    467   TemplateURLID id() const { return id_; }
    468 
    469   // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0.
    470   void set_prepopulate_id(int id) { prepopulate_id_ = id; }
    471   int prepopulate_id() const { return prepopulate_id_; }
    472 
    473   std::string GetExtensionId() const;
    474   bool IsExtensionKeyword() const;
    475 
    476  private:
    477   friend void MergeEnginesFromPrepopulateData(
    478       PrefService* prefs,
    479       WebDataService* service,
    480       std::vector<TemplateURL*>* template_urls,
    481       const TemplateURL** default_search_provider);
    482   friend class KeywordTable;
    483   friend class KeywordTableTest;
    484   friend class SearchHostToURLsMap;
    485   friend class TemplateURLModel;
    486 
    487   // Invalidates cached values on this object and its child TemplateURLRefs.
    488   void InvalidateCachedValues() const;
    489 
    490   // Unique identifier, used when archived to the database.
    491   void set_id(TemplateURLID id) { id_ = id; }
    492 
    493   string16 short_name_;
    494   string16 description_;
    495   TemplateURLRef suggestions_url_;
    496   TemplateURLRef url_;
    497   TemplateURLRef instant_url_;
    498   GURL originating_url_;
    499   mutable string16 keyword_;
    500   bool autogenerate_keyword_;  // If this is set, |keyword_| holds the cached
    501                                // generated keyword if available.
    502   mutable bool keyword_generated_;  // True if the keyword was generated. This
    503                                     // is used to avoid multiple attempts if
    504                                     // generating a keyword failed.
    505   bool show_in_default_list_;
    506   bool safe_for_autoreplace_;
    507   std::vector<ImageRef> image_refs_;
    508   std::vector<string16> languages_;
    509   // List of supported input encodings.
    510   std::vector<std::string> input_encodings_;
    511   TemplateURLID id_;
    512   base::Time date_created_;
    513   bool created_by_policy_;
    514   int usage_count_;
    515   SearchEngineType search_engine_type_;
    516   int logo_id_;
    517   int prepopulate_id_;
    518 
    519   // TODO(sky): Add date last parsed OSD file.
    520 };
    521 
    522 #endif  // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
    523