1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/google/core/browser/google_util.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/command_line.h" 11 #include "base/strings/string16.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_split.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/utf_string_conversions.h" 16 #include "components/google/core/browser/google_switches.h" 17 #include "components/google/core/browser/google_url_tracker.h" 18 #include "components/url_fixer/url_fixer.h" 19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 20 #include "net/base/url_util.h" 21 #include "url/gurl.h" 22 23 // Only use Link Doctor on official builds. It uses an API key, too, but 24 // seems best to just disable it, for more responsive error pages and to reduce 25 // server load. 26 #if defined(GOOGLE_CHROME_BUILD) 27 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc" 28 #else 29 #define LINKDOCTOR_SERVER_REQUEST_URL "" 30 #endif 31 32 33 // Helpers -------------------------------------------------------------------- 34 35 namespace { 36 37 bool gUseMockLinkDoctorBaseURLForTesting = false; 38 39 bool IsPathHomePageBase(const std::string& path) { 40 return (path == "/") || (path == "/webhp"); 41 } 42 43 // True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If 44 // |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host 45 // "*.<domain_in_lower_case>.<TLD>" instead. 46 bool IsValidHostName(const std::string& host, 47 const std::string& domain_in_lower_case, 48 google_util::SubdomainPermission subdomain_permission) { 49 size_t tld_length = net::registry_controlled_domains::GetRegistryLength( 50 host, 51 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 52 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 53 if ((tld_length == 0) || (tld_length == std::string::npos)) 54 return false; 55 // Removes the tld and the preceding dot. 56 std::string host_minus_tld(host, 0, host.length() - tld_length - 1); 57 if (LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case.c_str())) 58 return true; 59 if (subdomain_permission == google_util::ALLOW_SUBDOMAIN) 60 return EndsWith(host_minus_tld, "." + domain_in_lower_case, false); 61 return LowerCaseEqualsASCII(host_minus_tld, 62 ("www." + domain_in_lower_case).c_str()); 63 } 64 65 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission| 66 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard 67 // port for its scheme (80 for HTTP, 443 for HTTPS). 68 bool IsValidURL(const GURL& url, google_util::PortPermission port_permission) { 69 return url.is_valid() && url.SchemeIsHTTPOrHTTPS() && 70 (url.port().empty() || 71 (port_permission == google_util::ALLOW_NON_STANDARD_PORTS)); 72 } 73 74 } // namespace 75 76 77 namespace google_util { 78 79 // Global functions ----------------------------------------------------------- 80 81 bool HasGoogleSearchQueryParam(const std::string& str) { 82 url::Component query(0, str.length()), key, value; 83 while (url::ExtractQueryKeyValue(str.c_str(), &query, &key, &value)) { 84 if ((key.len == 1) && (str[key.begin] == 'q') && value.is_nonempty()) 85 return true; 86 } 87 return false; 88 } 89 90 GURL LinkDoctorBaseURL() { 91 if (gUseMockLinkDoctorBaseURLForTesting) 92 return GURL("http://mock.linkdoctor.url/for?testing"); 93 return GURL(LINKDOCTOR_SERVER_REQUEST_URL); 94 } 95 96 void SetMockLinkDoctorBaseURLForTesting() { 97 gUseMockLinkDoctorBaseURLForTesting = true; 98 } 99 100 std::string GetGoogleLocale(const std::string& application_locale) { 101 // Google does not recognize "nb" for Norwegian Bokmal; it uses "no". 102 return (application_locale == "nb") ? "no" : application_locale; 103 } 104 105 GURL AppendGoogleLocaleParam(const GURL& url, 106 const std::string& application_locale) { 107 return net::AppendQueryParameter( 108 url, "hl", GetGoogleLocale(application_locale)); 109 } 110 111 std::string GetGoogleCountryCode(GURL google_homepage_url) { 112 const std::string google_hostname = google_homepage_url.host(); 113 const size_t last_dot = google_hostname.find_last_of('.'); 114 if (last_dot == std::string::npos) { 115 NOTREACHED(); 116 } 117 std::string country_code = google_hostname.substr(last_dot + 1); 118 // Assume the com TLD implies the US. 119 if (country_code == "com") 120 return "us"; 121 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR 122 // code for the UK is "gb". 123 if (country_code == "uk") 124 return "gb"; 125 // Catalonia does not have a CLDR country code, since it's a region in Spain, 126 // so use Spain instead. 127 if (country_code == "cat") 128 return "es"; 129 return country_code; 130 } 131 132 GURL GetGoogleSearchURL(GURL google_homepage_url) { 133 // To transform the homepage URL into the corresponding search URL, add the 134 // "search" and the "q=" query string. 135 std::string search_path = "search"; 136 std::string query_string = "q="; 137 GURL::Replacements replacements; 138 replacements.SetPathStr(search_path); 139 replacements.SetQueryStr(query_string); 140 return google_homepage_url.ReplaceComponents(replacements); 141 } 142 143 GURL CommandLineGoogleBaseURL() { 144 // Unit tests may add command-line flags after the first call to this 145 // function, so we don't simply initialize a static |base_url| directly and 146 // then unconditionally return it. 147 CR_DEFINE_STATIC_LOCAL(std::string, switch_value, ()); 148 CR_DEFINE_STATIC_LOCAL(GURL, base_url, ()); 149 std::string current_switch_value( 150 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( 151 switches::kGoogleBaseURL)); 152 if (current_switch_value != switch_value) { 153 switch_value = current_switch_value; 154 base_url = url_fixer::FixupURL(switch_value, std::string()); 155 if (!base_url.is_valid() || base_url.has_query() || base_url.has_ref()) 156 base_url = GURL(); 157 } 158 return base_url; 159 } 160 161 bool StartsWithCommandLineGoogleBaseURL(const GURL& url) { 162 GURL base_url(CommandLineGoogleBaseURL()); 163 return base_url.is_valid() && 164 StartsWithASCII(url.possibly_invalid_spec(), base_url.spec(), true); 165 } 166 167 bool IsGoogleHostname(const std::string& host, 168 SubdomainPermission subdomain_permission) { 169 GURL base_url(CommandLineGoogleBaseURL()); 170 if (base_url.is_valid() && (host == base_url.host())) 171 return true; 172 173 return IsValidHostName(host, "google", subdomain_permission); 174 } 175 176 bool IsGoogleDomainUrl(const GURL& url, 177 SubdomainPermission subdomain_permission, 178 PortPermission port_permission) { 179 return IsValidURL(url, port_permission) && 180 IsGoogleHostname(url.host(), subdomain_permission); 181 } 182 183 bool IsGoogleHomePageUrl(const GURL& url) { 184 // First check to see if this has a Google domain. 185 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) 186 return false; 187 188 // Make sure the path is a known home page path. 189 std::string path(url.path()); 190 return IsPathHomePageBase(path) || StartsWithASCII(path, "/ig", false); 191 } 192 193 bool IsGoogleSearchUrl(const GURL& url) { 194 // First check to see if this has a Google domain. 195 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) 196 return false; 197 198 // Make sure the path is a known search path. 199 std::string path(url.path()); 200 bool is_home_page_base = IsPathHomePageBase(path); 201 if (!is_home_page_base && (path != "/search")) 202 return false; 203 204 // Check for query parameter in URL parameter and hash fragment, depending on 205 // the path type. 206 return HasGoogleSearchQueryParam(url.ref()) || 207 (!is_home_page_base && HasGoogleSearchQueryParam(url.query())); 208 } 209 210 bool IsYoutubeDomainUrl(const GURL& url, 211 SubdomainPermission subdomain_permission, 212 PortPermission port_permission) { 213 return IsValidURL(url, port_permission) && 214 IsValidHostName(url.host(), "youtube", subdomain_permission); 215 } 216 217 } // namespace google_util 218