Home | History | Annotate | Download | only in browser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/google/core/browser/google_util.h"
      6 
      7 #include <string>
      8 #include <vector>
      9 
     10 #include "base/command_line.h"
     11 #include "base/strings/string16.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_split.h"
     14 #include "base/strings/string_util.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "components/google/core/browser/google_switches.h"
     17 #include "components/google/core/browser/google_url_tracker.h"
     18 #include "components/url_fixer/url_fixer.h"
     19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     20 #include "net/base/url_util.h"
     21 #include "url/gurl.h"
     22 
     23 // Only use Link Doctor on official builds.  It uses an API key, too, but
     24 // seems best to just disable it, for more responsive error pages and to reduce
     25 // server load.
     26 #if defined(GOOGLE_CHROME_BUILD)
     27 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc"
     28 #else
     29 #define LINKDOCTOR_SERVER_REQUEST_URL ""
     30 #endif
     31 
     32 
     33 // Helpers --------------------------------------------------------------------
     34 
     35 namespace {
     36 
     37 bool gUseMockLinkDoctorBaseURLForTesting = false;
     38 
     39 bool IsPathHomePageBase(const std::string& path) {
     40   return (path == "/") || (path == "/webhp");
     41 }
     42 
     43 // True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If
     44 // |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host
     45 // "*.<domain_in_lower_case>.<TLD>" instead.
     46 bool IsValidHostName(const std::string& host,
     47                      const std::string& domain_in_lower_case,
     48                      google_util::SubdomainPermission subdomain_permission) {
     49   size_t tld_length = net::registry_controlled_domains::GetRegistryLength(
     50       host,
     51       net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
     52       net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
     53   if ((tld_length == 0) || (tld_length == std::string::npos))
     54     return false;
     55   // Removes the tld and the preceding dot.
     56   std::string host_minus_tld(host, 0, host.length() - tld_length - 1);
     57   if (LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case.c_str()))
     58     return true;
     59   if (subdomain_permission == google_util::ALLOW_SUBDOMAIN)
     60     return EndsWith(host_minus_tld, "." + domain_in_lower_case, false);
     61   return LowerCaseEqualsASCII(host_minus_tld,
     62                               ("www." + domain_in_lower_case).c_str());
     63 }
     64 
     65 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
     66 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
     67 // port for its scheme (80 for HTTP, 443 for HTTPS).
     68 bool IsValidURL(const GURL& url, google_util::PortPermission port_permission) {
     69   return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
     70       (url.port().empty() ||
     71        (port_permission == google_util::ALLOW_NON_STANDARD_PORTS));
     72 }
     73 
     74 }  // namespace
     75 
     76 
     77 namespace google_util {
     78 
     79 // Global functions -----------------------------------------------------------
     80 
     81 bool HasGoogleSearchQueryParam(const std::string& str) {
     82   url::Component query(0, str.length()), key, value;
     83   while (url::ExtractQueryKeyValue(str.c_str(), &query, &key, &value)) {
     84     if ((key.len == 1) && (str[key.begin] == 'q') && value.is_nonempty())
     85       return true;
     86   }
     87   return false;
     88 }
     89 
     90 GURL LinkDoctorBaseURL() {
     91   if (gUseMockLinkDoctorBaseURLForTesting)
     92     return GURL("http://mock.linkdoctor.url/for?testing");
     93   return GURL(LINKDOCTOR_SERVER_REQUEST_URL);
     94 }
     95 
     96 void SetMockLinkDoctorBaseURLForTesting() {
     97   gUseMockLinkDoctorBaseURLForTesting = true;
     98 }
     99 
    100 std::string GetGoogleLocale(const std::string& application_locale) {
    101   // Google does not recognize "nb" for Norwegian Bokmal; it uses "no".
    102   return (application_locale == "nb") ? "no" : application_locale;
    103 }
    104 
    105 GURL AppendGoogleLocaleParam(const GURL& url,
    106                              const std::string& application_locale) {
    107   return net::AppendQueryParameter(
    108       url, "hl", GetGoogleLocale(application_locale));
    109 }
    110 
    111 std::string GetGoogleCountryCode(GURL google_homepage_url) {
    112   const std::string google_hostname = google_homepage_url.host();
    113   const size_t last_dot = google_hostname.find_last_of('.');
    114   if (last_dot == std::string::npos) {
    115     NOTREACHED();
    116   }
    117   std::string country_code = google_hostname.substr(last_dot + 1);
    118   // Assume the com TLD implies the US.
    119   if (country_code == "com")
    120     return "us";
    121   // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR
    122   // code for the UK is "gb".
    123   if (country_code == "uk")
    124     return "gb";
    125   // Catalonia does not have a CLDR country code, since it's a region in Spain,
    126   // so use Spain instead.
    127   if (country_code == "cat")
    128     return "es";
    129   return country_code;
    130 }
    131 
    132 GURL GetGoogleSearchURL(GURL google_homepage_url) {
    133   // To transform the homepage URL into the corresponding search URL, add the
    134   // "search" and the "q=" query string.
    135   std::string search_path = "search";
    136   std::string query_string = "q=";
    137   GURL::Replacements replacements;
    138   replacements.SetPathStr(search_path);
    139   replacements.SetQueryStr(query_string);
    140   return google_homepage_url.ReplaceComponents(replacements);
    141 }
    142 
    143 GURL CommandLineGoogleBaseURL() {
    144   // Unit tests may add command-line flags after the first call to this
    145   // function, so we don't simply initialize a static |base_url| directly and
    146   // then unconditionally return it.
    147   CR_DEFINE_STATIC_LOCAL(std::string, switch_value, ());
    148   CR_DEFINE_STATIC_LOCAL(GURL, base_url, ());
    149   std::string current_switch_value(
    150       CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    151           switches::kGoogleBaseURL));
    152   if (current_switch_value != switch_value) {
    153     switch_value = current_switch_value;
    154     base_url = url_fixer::FixupURL(switch_value, std::string());
    155     if (!base_url.is_valid() || base_url.has_query() || base_url.has_ref())
    156       base_url = GURL();
    157   }
    158   return base_url;
    159 }
    160 
    161 bool StartsWithCommandLineGoogleBaseURL(const GURL& url) {
    162   GURL base_url(CommandLineGoogleBaseURL());
    163   return base_url.is_valid() &&
    164       StartsWithASCII(url.possibly_invalid_spec(), base_url.spec(), true);
    165 }
    166 
    167 bool IsGoogleHostname(const std::string& host,
    168                       SubdomainPermission subdomain_permission) {
    169   GURL base_url(CommandLineGoogleBaseURL());
    170   if (base_url.is_valid() && (host == base_url.host()))
    171     return true;
    172 
    173   return IsValidHostName(host, "google", subdomain_permission);
    174 }
    175 
    176 bool IsGoogleDomainUrl(const GURL& url,
    177                        SubdomainPermission subdomain_permission,
    178                        PortPermission port_permission) {
    179   return IsValidURL(url, port_permission) &&
    180       IsGoogleHostname(url.host(), subdomain_permission);
    181 }
    182 
    183 bool IsGoogleHomePageUrl(const GURL& url) {
    184   // First check to see if this has a Google domain.
    185   if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS))
    186     return false;
    187 
    188   // Make sure the path is a known home page path.
    189   std::string path(url.path());
    190   return IsPathHomePageBase(path) || StartsWithASCII(path, "/ig", false);
    191 }
    192 
    193 bool IsGoogleSearchUrl(const GURL& url) {
    194   // First check to see if this has a Google domain.
    195   if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS))
    196     return false;
    197 
    198   // Make sure the path is a known search path.
    199   std::string path(url.path());
    200   bool is_home_page_base = IsPathHomePageBase(path);
    201   if (!is_home_page_base && (path != "/search"))
    202     return false;
    203 
    204   // Check for query parameter in URL parameter and hash fragment, depending on
    205   // the path type.
    206   return HasGoogleSearchQueryParam(url.ref()) ||
    207       (!is_home_page_base && HasGoogleSearchQueryParam(url.query()));
    208 }
    209 
    210 bool IsYoutubeDomainUrl(const GURL& url,
    211                         SubdomainPermission subdomain_permission,
    212                         PortPermission port_permission) {
    213   return IsValidURL(url, port_permission) &&
    214       IsValidHostName(url.host(), "youtube", subdomain_permission);
    215 }
    216 
    217 }  // namespace google_util
    218