1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 6 // later modified by others), but almost entirely rewritten for Chrome. 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 8 /* ***** BEGIN LICENSE BLOCK ***** 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 10 * 11 * The contents of this file are subject to the Mozilla Public License Version 12 * 1.1 (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * http://www.mozilla.org/MPL/ 15 * 16 * Software distributed under the License is distributed on an "AS IS" basis, 17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 18 * for the specific language governing rights and limitations under the 19 * License. 20 * 21 * The Original Code is Mozilla Effective-TLD Service 22 * 23 * The Initial Developer of the Original Code is 24 * Google Inc. 25 * Portions created by the Initial Developer are Copyright (C) 2006 26 * the Initial Developer. All Rights Reserved. 27 * 28 * Contributor(s): 29 * Pamela Greene <pamg.bugs (at) gmail.com> (original author) 30 * Daniel Witte <dwitte (at) stanford.edu> 31 * 32 * Alternatively, the contents of this file may be used under the terms of 33 * either the GNU General Public License Version 2 or later (the "GPL"), or 34 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 35 * in which case the provisions of the GPL or the LGPL are applicable instead 36 * of those above. If you wish to allow use of your version of this file only 37 * under the terms of either the GPL or the LGPL, and not to allow others to 38 * use your version of this file under the terms of the MPL, indicate your 39 * decision by deleting the provisions above and replace them with the notice 40 * and other provisions required by the GPL or the LGPL. If you do not delete 41 * the provisions above, a recipient may use your version of this file under 42 * the terms of any one of the MPL, the GPL or the LGPL. 43 * 44 * ***** END LICENSE BLOCK ***** */ 45 46 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 47 48 #include "base/logging.h" 49 #include "base/strings/string_util.h" 50 #include "base/strings/utf_string_conversions.h" 51 #include "net/base/net_module.h" 52 #include "net/base/net_util.h" 53 #include "url/gurl.h" 54 #include "url/url_parse.h" 55 56 #include "effective_tld_names.cc" 57 58 namespace net { 59 namespace registry_controlled_domains { 60 61 namespace { 62 63 const int kExceptionRule = 1; 64 const int kWildcardRule = 2; 65 const int kPrivateRule = 4; 66 67 const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain; 68 69 // 'stringpool' is defined as a macro by the gperf-generated 70 // "effective_tld_names.cc". Provide a real constant value for it instead. 71 const char* const kDefaultStringPool = stringpool; 72 #undef stringpool 73 74 FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction; 75 const char* g_stringpool = kDefaultStringPool; 76 77 size_t GetRegistryLengthImpl( 78 const std::string& host, 79 UnknownRegistryFilter unknown_filter, 80 PrivateRegistryFilter private_filter) { 81 DCHECK(!host.empty()); 82 83 // Skip leading dots. 84 const size_t host_check_begin = host.find_first_not_of('.'); 85 if (host_check_begin == std::string::npos) 86 return 0; // Host is only dots. 87 88 // A single trailing dot isn't relevant in this determination, but does need 89 // to be included in the final returned length. 90 size_t host_check_len = host.length(); 91 if (host[host_check_len - 1] == '.') { 92 --host_check_len; 93 DCHECK(host_check_len > 0); // If this weren't true, the host would be ".", 94 // and we'd have already returned above. 95 if (host[host_check_len - 1] == '.') 96 return 0; // Multiple trailing dots. 97 } 98 99 // Walk up the domain tree, most specific to least specific, 100 // looking for matches at each level. 101 size_t prev_start = std::string::npos; 102 size_t curr_start = host_check_begin; 103 size_t next_dot = host.find('.', curr_start); 104 if (next_dot >= host_check_len) // Catches std::string::npos as well. 105 return 0; // This can't have a registry + domain. 106 while (1) { 107 const char* domain_str = host.data() + curr_start; 108 int domain_length = host_check_len - curr_start; 109 const DomainRule* rule = g_find_domain_function(domain_str, domain_length); 110 111 // We need to compare the string after finding a match because the 112 // no-collisions of perfect hashing only refers to items in the set. Since 113 // we're searching for arbitrary domains, there could be collisions. 114 // Furthermore, if the apparent match is a private registry and we're not 115 // including those, it can't be an actual match. 116 if (rule) { 117 bool do_check = !(rule->type & kPrivateRule) || 118 private_filter == INCLUDE_PRIVATE_REGISTRIES; 119 if (do_check && base::strncasecmp(domain_str, 120 g_stringpool + rule->name_offset, 121 domain_length) == 0) { 122 // Exception rules override wildcard rules when the domain is an exact 123 // match, but wildcards take precedence when there's a subdomain. 124 if (rule->type & kWildcardRule && (prev_start != std::string::npos)) { 125 // If prev_start == host_check_begin, then the host is the registry 126 // itself, so return 0. 127 return (prev_start == host_check_begin) ? 128 0 : (host.length() - prev_start); 129 } 130 131 if (rule->type & kExceptionRule) { 132 if (next_dot == std::string::npos) { 133 // If we get here, we had an exception rule with no dots (e.g. 134 // "!foo"). This would only be valid if we had a corresponding 135 // wildcard rule, which would have to be "*". But we explicitly 136 // disallow that case, so this kind of rule is invalid. 137 NOTREACHED() << "Invalid exception rule"; 138 return 0; 139 } 140 return host.length() - next_dot - 1; 141 } 142 143 // If curr_start == host_check_begin, then the host is the registry 144 // itself, so return 0. 145 return (curr_start == host_check_begin) ? 146 0 : (host.length() - curr_start); 147 } 148 } 149 150 if (next_dot >= host_check_len) // Catches std::string::npos as well. 151 break; 152 153 prev_start = curr_start; 154 curr_start = next_dot + 1; 155 next_dot = host.find('.', curr_start); 156 } 157 158 // No rule found in the registry. curr_start now points to the first 159 // character of the last subcomponent of the host, so if we allow unknown 160 // registries, return the length of this subcomponent. 161 return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? 162 (host.length() - curr_start) : 0; 163 } 164 165 std::string GetDomainAndRegistryImpl( 166 const std::string& host, PrivateRegistryFilter private_filter) { 167 DCHECK(!host.empty()); 168 169 // Find the length of the registry for this host. 170 const size_t registry_length = 171 GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter); 172 if ((registry_length == std::string::npos) || (registry_length == 0)) 173 return std::string(); // No registry. 174 // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding 175 // subcomponent length. 176 DCHECK(host.length() >= 2); 177 if (registry_length > (host.length() - 2)) { 178 NOTREACHED() << 179 "Host does not have at least one subcomponent before registry!"; 180 return std::string(); 181 } 182 183 // Move past the dot preceding the registry, and search for the next previous 184 // dot. Return the host from after that dot, or the whole host when there is 185 // no dot. 186 const size_t dot = host.rfind('.', host.length() - registry_length - 2); 187 if (dot == std::string::npos) 188 return host; 189 return host.substr(dot + 1); 190 } 191 192 } // namespace 193 194 std::string GetDomainAndRegistry( 195 const GURL& gurl, 196 PrivateRegistryFilter filter) { 197 const url_parse::Component host = 198 gurl.parsed_for_possibly_invalid_spec().host; 199 if ((host.len <= 0) || gurl.HostIsIPAddress()) 200 return std::string(); 201 return GetDomainAndRegistryImpl(std::string( 202 gurl.possibly_invalid_spec().data() + host.begin, host.len), filter); 203 } 204 205 std::string GetDomainAndRegistry( 206 const std::string& host, 207 PrivateRegistryFilter filter) { 208 url_canon::CanonHostInfo host_info; 209 const std::string canon_host(CanonicalizeHost(host, &host_info)); 210 if (canon_host.empty() || host_info.IsIPAddress()) 211 return std::string(); 212 return GetDomainAndRegistryImpl(canon_host, filter); 213 } 214 215 bool SameDomainOrHost( 216 const GURL& gurl1, 217 const GURL& gurl2, 218 PrivateRegistryFilter filter) { 219 // See if both URLs have a known domain + registry, and those values are the 220 // same. 221 const std::string domain1(GetDomainAndRegistry(gurl1, filter)); 222 const std::string domain2(GetDomainAndRegistry(gurl2, filter)); 223 if (!domain1.empty() || !domain2.empty()) 224 return domain1 == domain2; 225 226 // No domains. See if the hosts are identical. 227 const url_parse::Component host1 = 228 gurl1.parsed_for_possibly_invalid_spec().host; 229 const url_parse::Component host2 = 230 gurl2.parsed_for_possibly_invalid_spec().host; 231 if ((host1.len <= 0) || (host1.len != host2.len)) 232 return false; 233 return !strncmp(gurl1.possibly_invalid_spec().data() + host1.begin, 234 gurl2.possibly_invalid_spec().data() + host2.begin, 235 host1.len); 236 } 237 238 size_t GetRegistryLength( 239 const GURL& gurl, 240 UnknownRegistryFilter unknown_filter, 241 PrivateRegistryFilter private_filter) { 242 const url_parse::Component host = 243 gurl.parsed_for_possibly_invalid_spec().host; 244 if (host.len <= 0) 245 return std::string::npos; 246 if (gurl.HostIsIPAddress()) 247 return 0; 248 return GetRegistryLengthImpl( 249 std::string(gurl.possibly_invalid_spec().data() + host.begin, host.len), 250 unknown_filter, 251 private_filter); 252 } 253 254 size_t GetRegistryLength( 255 const std::string& host, 256 UnknownRegistryFilter unknown_filter, 257 PrivateRegistryFilter private_filter) { 258 url_canon::CanonHostInfo host_info; 259 const std::string canon_host(CanonicalizeHost(host, &host_info)); 260 if (canon_host.empty()) 261 return std::string::npos; 262 if (host_info.IsIPAddress()) 263 return 0; 264 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 265 } 266 267 void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function, 268 const char* stringpool) { 269 g_find_domain_function = function ? function : kDefaultFindDomainFunction; 270 g_stringpool = stringpool ? stringpool : kDefaultStringPool; 271 } 272 273 } // namespace registry_controlled_domains 274 } // namespace net 275