1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/net_util.h" 6 7 #include <algorithm> 8 #include <iterator> 9 #include <map> 10 11 #include "build/build_config.h" 12 13 #if defined(OS_WIN) 14 #include <windows.h> 15 #include <iphlpapi.h> 16 #include <winsock2.h> 17 #pragma comment(lib, "iphlpapi.lib") 18 #elif defined(OS_POSIX) 19 #include <fcntl.h> 20 #if !defined(OS_ANDROID) 21 #include <ifaddrs.h> 22 #endif 23 #include <net/if.h> 24 #include <netdb.h> 25 #include <netinet/in.h> 26 #endif 27 28 #include "base/basictypes.h" 29 #include "base/file_util.h" 30 #include "base/files/file_path.h" 31 #include "base/i18n/file_util_icu.h" 32 #include "base/i18n/icu_string_conversions.h" 33 #include "base/i18n/time_formatting.h" 34 #include "base/json/string_escape.h" 35 #include "base/lazy_instance.h" 36 #include "base/logging.h" 37 #include "base/memory/singleton.h" 38 #include "base/message_loop/message_loop.h" 39 #include "base/metrics/histogram.h" 40 #include "base/path_service.h" 41 #include "base/stl_util.h" 42 #include "base/strings/string_number_conversions.h" 43 #include "base/strings/string_piece.h" 44 #include "base/strings/string_split.h" 45 #include "base/strings/string_tokenizer.h" 46 #include "base/strings/string_util.h" 47 #include "base/strings/stringprintf.h" 48 #include "base/strings/sys_string_conversions.h" 49 #include "base/strings/utf_offset_string_conversions.h" 50 #include "base/strings/utf_string_conversions.h" 51 #include "base/synchronization/lock.h" 52 #include "base/sys_byteorder.h" 53 #include "base/time/time.h" 54 #include "base/values.h" 55 #include "grit/net_resources.h" 56 #include "url/gurl.h" 57 #include "url/url_canon.h" 58 #include "url/url_canon_ip.h" 59 #include "url/url_parse.h" 60 #if defined(OS_ANDROID) 61 #include "net/android/network_library.h" 62 #endif 63 #include "net/base/dns_util.h" 64 #include "net/base/escape.h" 65 #include "net/base/mime_util.h" 66 #include "net/base/net_module.h" 67 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 68 #if defined(OS_WIN) 69 #include "net/base/winsock_init.h" 70 #endif 71 #include "net/http/http_content_disposition.h" 72 #include "third_party/icu/source/common/unicode/uidna.h" 73 #include "third_party/icu/source/common/unicode/uniset.h" 74 #include "third_party/icu/source/common/unicode/uscript.h" 75 #include "third_party/icu/source/common/unicode/uset.h" 76 #include "third_party/icu/source/i18n/unicode/datefmt.h" 77 #include "third_party/icu/source/i18n/unicode/regex.h" 78 #include "third_party/icu/source/i18n/unicode/ulocdata.h" 79 80 using base::Time; 81 82 namespace net { 83 84 namespace { 85 86 typedef std::vector<size_t> Offsets; 87 88 // what we prepend to get a file URL 89 static const base::FilePath::CharType kFileURLPrefix[] = 90 FILE_PATH_LITERAL("file:///"); 91 92 // The general list of blocked ports. Will be blocked unless a specific 93 // protocol overrides it. (Ex: ftp can use ports 20 and 21) 94 static const int kRestrictedPorts[] = { 95 1, // tcpmux 96 7, // echo 97 9, // discard 98 11, // systat 99 13, // daytime 100 15, // netstat 101 17, // qotd 102 19, // chargen 103 20, // ftp data 104 21, // ftp access 105 22, // ssh 106 23, // telnet 107 25, // smtp 108 37, // time 109 42, // name 110 43, // nicname 111 53, // domain 112 77, // priv-rjs 113 79, // finger 114 87, // ttylink 115 95, // supdup 116 101, // hostriame 117 102, // iso-tsap 118 103, // gppitnp 119 104, // acr-nema 120 109, // pop2 121 110, // pop3 122 111, // sunrpc 123 113, // auth 124 115, // sftp 125 117, // uucp-path 126 119, // nntp 127 123, // NTP 128 135, // loc-srv /epmap 129 139, // netbios 130 143, // imap2 131 179, // BGP 132 389, // ldap 133 465, // smtp+ssl 134 512, // print / exec 135 513, // login 136 514, // shell 137 515, // printer 138 526, // tempo 139 530, // courier 140 531, // chat 141 532, // netnews 142 540, // uucp 143 556, // remotefs 144 563, // nntp+ssl 145 587, // stmp? 146 601, // ?? 147 636, // ldap+ssl 148 993, // ldap+ssl 149 995, // pop3+ssl 150 2049, // nfs 151 3659, // apple-sasl / PasswordServer 152 4045, // lockd 153 6000, // X11 154 6665, // Alternate IRC [Apple addition] 155 6666, // Alternate IRC [Apple addition] 156 6667, // Standard IRC [Apple addition] 157 6668, // Alternate IRC [Apple addition] 158 6669, // Alternate IRC [Apple addition] 159 0xFFFF, // Used to block all invalid port numbers (see 160 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) 161 }; 162 163 // FTP overrides the following restricted ports. 164 static const int kAllowedFtpPorts[] = { 165 21, // ftp data 166 22, // ssh 167 }; 168 169 // Does some simple normalization of scripts so we can allow certain scripts 170 // to exist together. 171 // TODO(brettw) bug 880223: we should allow some other languages to be 172 // oombined such as Chinese and Latin. We will probably need a more 173 // complicated system of language pairs to have more fine-grained control. 174 UScriptCode NormalizeScript(UScriptCode code) { 175 switch (code) { 176 case USCRIPT_KATAKANA: 177 case USCRIPT_HIRAGANA: 178 case USCRIPT_KATAKANA_OR_HIRAGANA: 179 case USCRIPT_HANGUL: // This one is arguable. 180 return USCRIPT_HAN; 181 default: 182 return code; 183 } 184 } 185 186 bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) { 187 UScriptCode first_script = USCRIPT_INVALID_CODE; 188 bool is_first = true; 189 190 int i = 0; 191 while (i < str_len) { 192 unsigned code_point; 193 U16_NEXT(str, i, str_len, code_point); 194 195 UErrorCode err = U_ZERO_ERROR; 196 UScriptCode cur_script = uscript_getScript(code_point, &err); 197 if (err != U_ZERO_ERROR) 198 return false; // Report mixed on error. 199 cur_script = NormalizeScript(cur_script); 200 201 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. 202 if (is_first && cur_script != USCRIPT_COMMON) { 203 first_script = cur_script; 204 is_first = false; 205 } else { 206 if (cur_script != USCRIPT_COMMON && cur_script != first_script) 207 return false; 208 } 209 } 210 return true; 211 } 212 213 // Check if the script of a language can be 'safely' mixed with 214 // Latin letters in the ASCII range. 215 bool IsCompatibleWithASCIILetters(const std::string& lang) { 216 // For now, just list Chinese, Japanese and Korean (positive list). 217 // An alternative is negative-listing (languages using Greek and 218 // Cyrillic letters), but it can be more dangerous. 219 return !lang.substr(0, 2).compare("zh") || 220 !lang.substr(0, 2).compare("ja") || 221 !lang.substr(0, 2).compare("ko"); 222 } 223 224 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; 225 226 class LangToExemplarSet { 227 public: 228 static LangToExemplarSet* GetInstance() { 229 return Singleton<LangToExemplarSet>::get(); 230 } 231 232 private: 233 LangToExemplarSetMap map; 234 LangToExemplarSet() { } 235 ~LangToExemplarSet() { 236 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); 237 } 238 239 friend class Singleton<LangToExemplarSet>; 240 friend struct DefaultSingletonTraits<LangToExemplarSet>; 241 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); 242 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); 243 244 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); 245 }; 246 247 bool GetExemplarSetForLang(const std::string& lang, 248 icu::UnicodeSet** lang_set) { 249 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 250 LangToExemplarSetMap::const_iterator pos = map.find(lang); 251 if (pos != map.end()) { 252 *lang_set = pos->second; 253 return true; 254 } 255 return false; 256 } 257 258 void SetExemplarSetForLang(const std::string& lang, 259 icu::UnicodeSet* lang_set) { 260 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 261 map.insert(std::make_pair(lang, lang_set)); 262 } 263 264 static base::LazyInstance<base::Lock>::Leaky 265 g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; 266 267 // Returns true if all the characters in component_characters are used by 268 // the language |lang|. 269 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, 270 const std::string& lang) { 271 CR_DEFINE_STATIC_LOCAL( 272 const icu::UnicodeSet, kASCIILetters, ('a', 'z')); 273 icu::UnicodeSet* lang_set = NULL; 274 // We're called from both the UI thread and the history thread. 275 { 276 base::AutoLock lock(g_lang_set_lock.Get()); 277 if (!GetExemplarSetForLang(lang, &lang_set)) { 278 UErrorCode status = U_ZERO_ERROR; 279 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); 280 // TODO(jungshik) Turn this check on when the ICU data file is 281 // rebuilt with the minimal subset of locale data for languages 282 // to which Chrome is not localized but which we offer in the list 283 // of languages selectable for Accept-Languages. With the rebuilt ICU 284 // data, ulocdata_open never should fall back to the default locale. 285 // (issue 2078) 286 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); 287 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { 288 lang_set = reinterpret_cast<icu::UnicodeSet *>( 289 ulocdata_getExemplarSet(uld, NULL, 0, 290 ULOCDATA_ES_STANDARD, &status)); 291 // If |lang| is compatible with ASCII Latin letters, add them. 292 if (IsCompatibleWithASCIILetters(lang)) 293 lang_set->addAll(kASCIILetters); 294 } else { 295 lang_set = new icu::UnicodeSet(1, 0); 296 } 297 lang_set->freeze(); 298 SetExemplarSetForLang(lang, lang_set); 299 ulocdata_close(uld); 300 } 301 } 302 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); 303 } 304 305 // Returns true if the given Unicode host component is safe to display to the 306 // user. 307 bool IsIDNComponentSafe(const base::char16* str, 308 int str_len, 309 const std::string& languages) { 310 // Most common cases (non-IDN) do not reach here so that we don't 311 // need a fast return path. 312 // TODO(jungshik) : Check if there's any character inappropriate 313 // (although allowed) for domain names. 314 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and 315 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt 316 // For now, we borrow the list from Mozilla and tweaked it slightly. 317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 318 // they're gonna be canonicalized to U+0020 and full stop before 319 // reaching here.) 320 // The original list is available at 321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 323 324 UErrorCode status = U_ZERO_ERROR; 325 #ifdef U_WCHAR_IS_UTF16 326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" 328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 334 L"[\ufffa-\ufffd]]"), status); 335 DCHECK(U_SUCCESS(status)); 336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 337 // Lone katakana no, so, or n 338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 339 // Repeating Japanese accent characters 340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 341 0, status); 342 #else 343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 352 DCHECK(U_SUCCESS(status)); 353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 354 // Lone katakana no, so, or n 355 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" 356 // Repeating Japanese accent characters 357 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), 358 0, status); 359 #endif 360 DCHECK(U_SUCCESS(status)); 361 icu::UnicodeSet component_characters; 362 icu::UnicodeString component_string(str, str_len); 363 component_characters.addAll(component_string); 364 if (dangerous_characters.containsSome(component_characters)) 365 return false; 366 367 DCHECK(U_SUCCESS(status)); 368 dangerous_patterns.reset(component_string); 369 if (dangerous_patterns.find()) 370 return false; 371 372 // If the language list is empty, the result is completely determined 373 // by whether a component is a single script or not. This will block 374 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are 375 // allowed with |languages| (while it blocks Chinese + Latin letters with 376 // an accent as should be the case), but we want to err on the safe side 377 // when |languages| is empty. 378 if (languages.empty()) 379 return IsIDNComponentInSingleScript(str, str_len); 380 381 // |common_characters| is made up of ASCII numbers, hyphen, plus and 382 // underscore that are used across scripts and allowed in domain names. 383 // (sync'd with characters allowed in url_canon_host with square 384 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. 385 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), 386 status); 387 DCHECK(U_SUCCESS(status)); 388 // Subtract common characters because they're always allowed so that 389 // we just have to check if a language-specific set contains 390 // the remainder. 391 component_characters.removeAll(common_characters); 392 393 base::StringTokenizer t(languages, ","); 394 while (t.GetNext()) { 395 if (IsComponentCoveredByLang(component_characters, t.token())) 396 return true; 397 } 398 return false; 399 } 400 401 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to 402 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). 403 // 404 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with 405 // the backward compatibility in mind. What it does: 406 // 407 // 1. Use the up-to-date Unicode data. 408 // 2. Define a case folding/mapping with the up-to-date Unicode data as 409 // in IDNA 2003. 410 // 3. Use transitional mechanism for 4 deviation characters (sharp-s, 411 // final sigma, ZWJ and ZWNJ) for now. 412 // 4. Continue to allow symbols and punctuations. 413 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. 414 // 6. Do not apply STD3 rules 415 // 7. Do not allow unassigned code points. 416 // 417 // It also closely matches what IE 10 does except for the BiDi check ( 418 // http://goo.gl/3XBhqw ). 419 // See http://http://unicode.org/reports/tr46/ and references therein 420 // for more details. 421 struct UIDNAWrapper { 422 UIDNAWrapper() { 423 UErrorCode err = U_ZERO_ERROR; 424 // TODO(jungshik): Change options as different parties (browsers, 425 // registrars, search engines) converge toward a consensus. 426 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); 427 if (U_FAILURE(err)) 428 value = NULL; 429 } 430 431 UIDNA* value; 432 }; 433 434 static base::LazyInstance<UIDNAWrapper>::Leaky 435 g_uidna = LAZY_INSTANCE_INITIALIZER; 436 437 // Converts one component of a host (between dots) to IDN if safe. The result 438 // will be APPENDED to the given output string and will be the same as the input 439 // if it is not IDN or the IDN is unsafe to display. Returns whether any 440 // conversion was performed. 441 bool IDNToUnicodeOneComponent(const base::char16* comp, 442 size_t comp_len, 443 const std::string& languages, 444 base::string16* out) { 445 DCHECK(out); 446 if (comp_len == 0) 447 return false; 448 449 // Only transform if the input can be an IDN component. 450 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 451 if ((comp_len > arraysize(kIdnPrefix)) && 452 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { 453 UIDNA* uidna = g_uidna.Get().value; 454 DCHECK(uidna != NULL); 455 size_t original_length = out->length(); 456 int output_length = 64; 457 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 458 UErrorCode status; 459 do { 460 out->resize(original_length + output_length); 461 status = U_ZERO_ERROR; 462 // This returns the actual length required. If this is more than 64 463 // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try 464 // the conversion again, but with a sufficiently large buffer. 465 output_length = uidna_labelToUnicode( 466 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], 467 output_length, &info, &status); 468 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); 469 470 if (U_SUCCESS(status) && info.errors == 0) { 471 // Converted successfully. Ensure that the converted component 472 // can be safely displayed to the user. 473 out->resize(original_length + output_length); 474 if (IsIDNComponentSafe(out->data() + original_length, output_length, 475 languages)) 476 return true; 477 } 478 479 // Something went wrong. Revert to original string. 480 out->resize(original_length); 481 } 482 483 // We get here with no IDN or on error, in which case we just append the 484 // literal input. 485 out->append(comp, comp_len); 486 return false; 487 } 488 489 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. 490 void LimitOffsets(const base::string16& str, Offsets* offsets_for_adjustment) { 491 if (offsets_for_adjustment) { 492 std::for_each(offsets_for_adjustment->begin(), 493 offsets_for_adjustment->end(), 494 base::LimitOffset<base::string16>(str.length())); 495 } 496 } 497 498 // TODO(brettw) bug 734373: check the scripts for each host component and 499 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for 500 // scripts that the user has installed. For now, just put the entire 501 // path through IDN. Maybe this feature can be implemented in ICU itself? 502 // 503 // We may want to skip this step in the case of file URLs to allow unicode 504 // UNC hostnames regardless of encodings. 505 base::string16 IDNToUnicodeWithOffsets(const std::string& host, 506 const std::string& languages, 507 Offsets* offsets_for_adjustment) { 508 // Convert the ASCII input to a base::string16 for ICU. 509 base::string16 input16; 510 input16.reserve(host.length()); 511 input16.insert(input16.end(), host.begin(), host.end()); 512 513 // Do each component of the host separately, since we enforce script matching 514 // on a per-component basis. 515 base::string16 out16; 516 { 517 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 518 for (size_t component_start = 0, component_end; 519 component_start < input16.length(); 520 component_start = component_end + 1) { 521 // Find the end of the component. 522 component_end = input16.find('.', component_start); 523 if (component_end == base::string16::npos) 524 component_end = input16.length(); // For getting the last component. 525 size_t component_length = component_end - component_start; 526 size_t new_component_start = out16.length(); 527 bool converted_idn = false; 528 if (component_end > component_start) { 529 // Add the substring that we just found. 530 converted_idn = IDNToUnicodeOneComponent( 531 input16.data() + component_start, component_length, languages, 532 &out16); 533 } 534 size_t new_component_length = out16.length() - new_component_start; 535 536 if (converted_idn && offsets_for_adjustment) { 537 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start, 538 component_length, new_component_length)); 539 } 540 541 // Need to add the dot we just found (if we found one). 542 if (component_end < input16.length()) 543 out16.push_back('.'); 544 } 545 } 546 547 LimitOffsets(out16, offsets_for_adjustment); 548 return out16; 549 } 550 551 // Called after transforming a component to set all affected elements in 552 // |offsets_for_adjustment| to the correct new values. |original_offsets| 553 // represents the offsets before the transform; |original_component_begin| and 554 // |original_component_end| represent the pre-transform boundaries of the 555 // affected component. |transformed_offsets| should be a vector created by 556 // adjusting |original_offsets| to be relative to the beginning of the component 557 // in question (via an OffsetAdjuster) and then transformed along with the 558 // component. Note that any elements in this vector which didn't originally 559 // point into the component may contain arbitrary values and should be ignored. 560 // |transformed_component_begin| and |transformed_component_end| are the 561 // endpoints of the transformed component and are used in combination with the 562 // two offset vectors to calculate the resulting absolute offsets, which are 563 // stored in |offsets_for_adjustment|. 564 void AdjustForComponentTransform(const Offsets& original_offsets, 565 size_t original_component_begin, 566 size_t original_component_end, 567 const Offsets& transformed_offsets, 568 size_t transformed_component_begin, 569 size_t transformed_component_end, 570 Offsets* offsets_for_adjustment) { 571 if (!offsets_for_adjustment) 572 return; // Nothing to do. 573 574 for (size_t i = 0; i < original_offsets.size(); ++i) { 575 size_t original_offset = original_offsets[i]; 576 if ((original_offset >= original_component_begin) && 577 (original_offset < original_component_end)) { 578 // This offset originally pointed into the transformed component. 579 // Adjust the transformed relative offset by the new beginning point of 580 // the transformed component. 581 size_t transformed_offset = transformed_offsets[i]; 582 (*offsets_for_adjustment)[i] = 583 (transformed_offset == base::string16::npos) ? 584 base::string16::npos : 585 (transformed_offset + transformed_component_begin); 586 } else if ((original_offset >= original_component_end) && 587 (original_offset != std::string::npos)) { 588 // This offset pointed after the transformed component. Adjust the 589 // original absolute offset by the difference between the new and old 590 // component lengths. 591 (*offsets_for_adjustment)[i] = 592 original_offset - original_component_end + transformed_component_end; 593 } 594 } 595 } 596 597 // If |component| is valid, its begin is incremented by |delta|. 598 void AdjustComponent(int delta, url_parse::Component* component) { 599 if (!component->is_valid()) 600 return; 601 602 DCHECK(delta >= 0 || component->begin >= -delta); 603 component->begin += delta; 604 } 605 606 // Adjusts all the components of |parsed| by |delta|, except for the scheme. 607 void AdjustAllComponentsButScheme(int delta, url_parse::Parsed* parsed) { 608 AdjustComponent(delta, &(parsed->username)); 609 AdjustComponent(delta, &(parsed->password)); 610 AdjustComponent(delta, &(parsed->host)); 611 AdjustComponent(delta, &(parsed->port)); 612 AdjustComponent(delta, &(parsed->path)); 613 AdjustComponent(delta, &(parsed->query)); 614 AdjustComponent(delta, &(parsed->ref)); 615 } 616 617 // Helper for FormatUrlWithOffsets(). 618 base::string16 FormatViewSourceUrl(const GURL& url, 619 const Offsets& original_offsets, 620 const std::string& languages, 621 FormatUrlTypes format_types, 622 UnescapeRule::Type unescape_rules, 623 url_parse::Parsed* new_parsed, 624 size_t* prefix_end, 625 Offsets* offsets_for_adjustment) { 626 DCHECK(new_parsed); 627 const char kViewSource[] = "view-source:"; 628 const size_t kViewSourceLength = arraysize(kViewSource) - 1; 629 630 // Format the underlying URL and adjust offsets. 631 const std::string& url_str(url.possibly_invalid_spec()); 632 Offsets offsets_into_underlying_url(original_offsets); 633 { 634 base::OffsetAdjuster adjuster(&offsets_into_underlying_url); 635 adjuster.Add(base::OffsetAdjuster::Adjustment(0, kViewSourceLength, 0)); 636 } 637 base::string16 result(ASCIIToUTF16(kViewSource) + 638 FormatUrlWithOffsets(GURL(url_str.substr(kViewSourceLength)), languages, 639 format_types, unescape_rules, new_parsed, prefix_end, 640 &offsets_into_underlying_url)); 641 AdjustForComponentTransform(original_offsets, kViewSourceLength, 642 url_str.length(), offsets_into_underlying_url, 643 kViewSourceLength, result.length(), 644 offsets_for_adjustment); 645 LimitOffsets(result, offsets_for_adjustment); 646 647 // Adjust positions of the parsed components. 648 if (new_parsed->scheme.is_nonempty()) { 649 // Assume "view-source:real-scheme" as a scheme. 650 new_parsed->scheme.len += kViewSourceLength; 651 } else { 652 new_parsed->scheme.begin = 0; 653 new_parsed->scheme.len = kViewSourceLength - 1; 654 } 655 AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); 656 657 if (prefix_end) 658 *prefix_end += kViewSourceLength; 659 660 return result; 661 } 662 663 class AppendComponentTransform { 664 public: 665 AppendComponentTransform() {} 666 virtual ~AppendComponentTransform() {} 667 668 virtual base::string16 Execute(const std::string& component_text, 669 Offsets* offsets_into_component) const = 0; 670 671 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an 672 // accessible copy constructor in order to call AppendFormattedComponent() 673 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). 674 }; 675 676 class HostComponentTransform : public AppendComponentTransform { 677 public: 678 explicit HostComponentTransform(const std::string& languages) 679 : languages_(languages) { 680 } 681 682 private: 683 virtual base::string16 Execute( 684 const std::string& component_text, 685 Offsets* offsets_into_component) const OVERRIDE { 686 return IDNToUnicodeWithOffsets(component_text, languages_, 687 offsets_into_component); 688 } 689 690 const std::string& languages_; 691 }; 692 693 class NonHostComponentTransform : public AppendComponentTransform { 694 public: 695 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) 696 : unescape_rules_(unescape_rules) { 697 } 698 699 private: 700 virtual base::string16 Execute( 701 const std::string& component_text, 702 Offsets* offsets_into_component) const OVERRIDE { 703 return (unescape_rules_ == UnescapeRule::NONE) ? 704 base::UTF8ToUTF16AndAdjustOffsets(component_text, 705 offsets_into_component) : 706 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, 707 unescape_rules_, offsets_into_component); 708 } 709 710 const UnescapeRule::Type unescape_rules_; 711 }; 712 713 // Transforms the portion of |spec| covered by |original_component| according to 714 // |transform|. Appends the result to |output|. If |output_component| is 715 // non-NULL, its start and length are set to the transformed component's new 716 // start and length. For each element in |original_offsets| which is at least 717 // as large as original_component.begin, the corresponding element of 718 // |offsets_for_adjustment| is transformed appropriately. 719 void AppendFormattedComponent(const std::string& spec, 720 const url_parse::Component& original_component, 721 const Offsets& original_offsets, 722 const AppendComponentTransform& transform, 723 base::string16* output, 724 url_parse::Component* output_component, 725 Offsets* offsets_for_adjustment) { 726 DCHECK(output); 727 if (original_component.is_nonempty()) { 728 size_t original_component_begin = 729 static_cast<size_t>(original_component.begin); 730 size_t output_component_begin = output->length(); 731 std::string component_str(spec, original_component_begin, 732 static_cast<size_t>(original_component.len)); 733 734 // Transform |component_str| and adjust the offsets accordingly. 735 Offsets offsets_into_component(original_offsets); 736 { 737 base::OffsetAdjuster adjuster(&offsets_into_component); 738 adjuster.Add(base::OffsetAdjuster::Adjustment(0, original_component_begin, 739 0)); 740 } 741 output->append(transform.Execute(component_str, &offsets_into_component)); 742 AdjustForComponentTransform(original_offsets, original_component_begin, 743 static_cast<size_t>(original_component.end()), 744 offsets_into_component, output_component_begin, 745 output->length(), offsets_for_adjustment); 746 747 // Set positions of the parsed component. 748 if (output_component) { 749 output_component->begin = static_cast<int>(output_component_begin); 750 output_component->len = 751 static_cast<int>(output->length() - output_component_begin); 752 } 753 } else if (output_component) { 754 output_component->reset(); 755 } 756 } 757 758 void SanitizeGeneratedFileName(base::FilePath::StringType* filename, 759 bool replace_trailing) { 760 const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-"); 761 if (filename->empty()) 762 return; 763 if (replace_trailing) { 764 // Handle CreateFile() stripping trailing dots and spaces on filenames 765 // http://support.microsoft.com/kb/115827 766 size_t length = filename->size(); 767 size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" .")); 768 filename->resize((pos == std::string::npos) ? 0 : (pos + 1)); 769 TrimWhitespace(*filename, TRIM_TRAILING, filename); 770 if (filename->empty()) 771 return; 772 size_t trimmed = length - filename->size(); 773 if (trimmed) 774 filename->insert(filename->end(), trimmed, kReplace[0]); 775 } 776 base::TrimString(*filename, FILE_PATH_LITERAL("."), filename); 777 if (filename->empty()) 778 return; 779 // Replace any path information by changing path separators. 780 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace); 781 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace); 782 } 783 784 // Returns the filename determined from the last component of the path portion 785 // of the URL. Returns an empty string if the URL doesn't have a path or is 786 // invalid. If the generated filename is not reliable, 787 // |should_overwrite_extension| will be set to true, in which case a better 788 // extension should be determined based on the content type. 789 std::string GetFileNameFromURL(const GURL& url, 790 const std::string& referrer_charset, 791 bool* should_overwrite_extension) { 792 // about: and data: URLs don't have file names, but esp. data: URLs may 793 // contain parts that look like ones (i.e., contain a slash). Therefore we 794 // don't attempt to divine a file name out of them. 795 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) 796 return std::string(); 797 798 const std::string unescaped_url_filename = UnescapeURLComponent( 799 url.ExtractFileName(), 800 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 801 802 // The URL's path should be escaped UTF-8, but may not be. 803 std::string decoded_filename = unescaped_url_filename; 804 if (!IsStringUTF8(decoded_filename)) { 805 // TODO(jshin): this is probably not robust enough. To be sure, we need 806 // encoding detection. 807 base::string16 utf16_output; 808 if (!referrer_charset.empty() && 809 base::CodepageToUTF16(unescaped_url_filename, 810 referrer_charset.c_str(), 811 base::OnStringConversionError::FAIL, 812 &utf16_output)) { 813 decoded_filename = UTF16ToUTF8(utf16_output); 814 } else { 815 decoded_filename = WideToUTF8( 816 base::SysNativeMBToWide(unescaped_url_filename)); 817 } 818 } 819 // If the URL contains a (possibly empty) query, assume it is a generator, and 820 // allow the determined extension to be overwritten. 821 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); 822 823 return decoded_filename; 824 } 825 826 // Returns whether the specified extension is automatically integrated into the 827 // windows shell. 828 bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) { 829 base::FilePath::StringType extension_lower = StringToLowerASCII(extension); 830 831 // http://msdn.microsoft.com/en-us/library/ms811694.aspx 832 // Right-clicking on shortcuts can be magical. 833 if ((extension_lower == FILE_PATH_LITERAL("local")) || 834 (extension_lower == FILE_PATH_LITERAL("lnk"))) 835 return true; 836 837 // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html 838 // Files become magical if they end in a CLSID, so block such extensions. 839 if (!extension_lower.empty() && 840 (extension_lower[0] == FILE_PATH_LITERAL('{')) && 841 (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}'))) 842 return true; 843 return false; 844 } 845 846 // Returns whether the specified file name is a reserved name on windows. 847 // This includes names like "com2.zip" (which correspond to devices) and 848 // desktop.ini and thumbs.db which have special meaning to the windows shell. 849 bool IsReservedName(const base::FilePath::StringType& filename) { 850 // This list is taken from the MSDN article "Naming a file" 851 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx 852 // I also added clock$ because GetSaveFileName seems to consider it as a 853 // reserved name too. 854 static const char* const known_devices[] = { 855 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", 856 "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", 857 "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$" 858 }; 859 #if defined(OS_WIN) 860 std::string filename_lower = StringToLowerASCII(WideToUTF8(filename)); 861 #elif defined(OS_POSIX) 862 std::string filename_lower = StringToLowerASCII(filename); 863 #endif 864 865 for (size_t i = 0; i < arraysize(known_devices); ++i) { 866 // Exact match. 867 if (filename_lower == known_devices[i]) 868 return true; 869 // Starts with "DEVICE.". 870 if (filename_lower.find(std::string(known_devices[i]) + ".") == 0) 871 return true; 872 } 873 874 static const char* const magic_names[] = { 875 // These file names are used by the "Customize folder" feature of the shell. 876 "desktop.ini", 877 "thumbs.db", 878 }; 879 880 for (size_t i = 0; i < arraysize(magic_names); ++i) { 881 if (filename_lower == magic_names[i]) 882 return true; 883 } 884 885 return false; 886 } 887 888 // Examines the current extension in |file_name| and modifies it if necessary in 889 // order to ensure the filename is safe. If |file_name| doesn't contain an 890 // extension or if |ignore_extension| is true, then a new extension will be 891 // constructed based on the |mime_type|. 892 // 893 // We're addressing two things here: 894 // 895 // 1) Usability. If there is no reliable file extension, we want to guess a 896 // reasonable file extension based on the content type. 897 // 898 // 2) Shell integration. Some file extensions automatically integrate with the 899 // shell. We block these extensions to prevent a malicious web site from 900 // integrating with the user's shell. 901 void EnsureSafeExtension(const std::string& mime_type, 902 bool ignore_extension, 903 base::FilePath* file_name) { 904 // See if our file name already contains an extension. 905 base::FilePath::StringType extension = file_name->Extension(); 906 if (!extension.empty()) 907 extension.erase(extension.begin()); // Erase preceding '.'. 908 909 if ((ignore_extension || extension.empty()) && !mime_type.empty()) { 910 base::FilePath::StringType preferred_mime_extension; 911 std::vector<base::FilePath::StringType> all_mime_extensions; 912 // The GetPreferredExtensionForMimeType call will end up going to disk. Do 913 // this on another thread to avoid slowing the IO thread. 914 // http://crbug.com/61827 915 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched 916 // over to IO safe threads. 917 base::ThreadRestrictions::ScopedAllowIO allow_io; 918 net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension); 919 net::GetExtensionsForMimeType(mime_type, &all_mime_extensions); 920 // If the existing extension is in the list of valid extensions for the 921 // given type, use it. This avoids doing things like pointlessly renaming 922 // "foo.jpg" to "foo.jpeg". 923 if (std::find(all_mime_extensions.begin(), 924 all_mime_extensions.end(), 925 extension) != all_mime_extensions.end()) { 926 // leave |extension| alone 927 } else if (!preferred_mime_extension.empty()) { 928 extension = preferred_mime_extension; 929 } 930 } 931 932 #if defined(OS_WIN) 933 static const base::FilePath::CharType default_extension[] = 934 FILE_PATH_LITERAL("download"); 935 936 // Rename shell-integrated extensions. 937 // TODO(asanka): Consider stripping out the bad extension and replacing it 938 // with the preferred extension for the MIME type if one is available. 939 if (IsShellIntegratedExtension(extension)) 940 extension.assign(default_extension); 941 #endif 942 943 *file_name = file_name->ReplaceExtension(extension); 944 } 945 946 bool FilePathToString16(const base::FilePath& path, base::string16* converted) { 947 #if defined(OS_WIN) 948 return WideToUTF16(path.value().c_str(), path.value().size(), converted); 949 #elif defined(OS_POSIX) 950 std::string component8 = path.AsUTF8Unsafe(); 951 return !component8.empty() && 952 UTF8ToUTF16(component8.c_str(), component8.size(), converted); 953 #endif 954 } 955 956 bool IPNumberPrefixCheck(const IPAddressNumber& ip_number, 957 const unsigned char* ip_prefix, 958 size_t prefix_length_in_bits) { 959 // Compare all the bytes that fall entirely within the prefix. 960 int num_entire_bytes_in_prefix = prefix_length_in_bits / 8; 961 for (int i = 0; i < num_entire_bytes_in_prefix; ++i) { 962 if (ip_number[i] != ip_prefix[i]) 963 return false; 964 } 965 966 // In case the prefix was not a multiple of 8, there will be 1 byte 967 // which is only partially masked. 968 int remaining_bits = prefix_length_in_bits % 8; 969 if (remaining_bits != 0) { 970 unsigned char mask = 0xFF << (8 - remaining_bits); 971 int i = num_entire_bytes_in_prefix; 972 if ((ip_number[i] & mask) != (ip_prefix[i] & mask)) 973 return false; 974 } 975 return true; 976 } 977 978 } // namespace 979 980 const FormatUrlType kFormatUrlOmitNothing = 0; 981 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 982 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 983 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 984 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | 985 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; 986 987 static base::LazyInstance<std::multiset<int> >::Leaky 988 g_explicitly_allowed_ports = LAZY_INSTANCE_INITIALIZER; 989 990 size_t GetCountOfExplicitlyAllowedPorts() { 991 return g_explicitly_allowed_ports.Get().size(); 992 } 993 994 GURL FilePathToFileURL(const base::FilePath& path) { 995 // Produce a URL like "file:///C:/foo" for a regular file, or 996 // "file://///server/path" for UNC. The URL canonicalizer will fix up the 997 // latter case to be the canonical UNC form: "file://server/path" 998 base::FilePath::StringType url_string(kFileURLPrefix); 999 url_string.append(path.value()); 1000 1001 // Now do replacement of some characters. Since we assume the input is a 1002 // literal filename, anything the URL parser might consider special should 1003 // be escaped here. 1004 1005 // must be the first substitution since others will introduce percents as the 1006 // escape character 1007 ReplaceSubstringsAfterOffset(&url_string, 0, 1008 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); 1009 1010 // semicolon is supposed to be some kind of separator according to RFC 2396 1011 ReplaceSubstringsAfterOffset(&url_string, 0, 1012 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); 1013 1014 ReplaceSubstringsAfterOffset(&url_string, 0, 1015 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); 1016 1017 ReplaceSubstringsAfterOffset(&url_string, 0, 1018 FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F")); 1019 1020 #if defined(OS_POSIX) 1021 ReplaceSubstringsAfterOffset(&url_string, 0, 1022 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); 1023 #endif 1024 1025 return GURL(url_string); 1026 } 1027 1028 std::string GetSpecificHeader(const std::string& headers, 1029 const std::string& name) { 1030 // We want to grab the Value from the "Key: Value" pairs in the headers, 1031 // which should look like this (no leading spaces, \n-separated) (we format 1032 // them this way in url_request_inet.cc): 1033 // HTTP/1.1 200 OK\n 1034 // ETag: "6d0b8-947-24f35ec0"\n 1035 // Content-Length: 2375\n 1036 // Content-Type: text/html; charset=UTF-8\n 1037 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n 1038 if (headers.empty()) 1039 return std::string(); 1040 1041 std::string match('\n' + name + ':'); 1042 1043 std::string::const_iterator begin = 1044 std::search(headers.begin(), headers.end(), match.begin(), match.end(), 1045 base::CaseInsensitiveCompareASCII<char>()); 1046 1047 if (begin == headers.end()) 1048 return std::string(); 1049 1050 begin += match.length(); 1051 1052 std::string ret; 1053 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')), 1054 TRIM_ALL, &ret); 1055 return ret; 1056 } 1057 1058 base::string16 IDNToUnicode(const std::string& host, 1059 const std::string& languages) { 1060 return IDNToUnicodeWithOffsets(host, languages, NULL); 1061 } 1062 1063 std::string CanonicalizeHost(const std::string& host, 1064 url_canon::CanonHostInfo* host_info) { 1065 // Try to canonicalize the host. 1066 const url_parse::Component raw_host_component( 1067 0, static_cast<int>(host.length())); 1068 std::string canon_host; 1069 url_canon::StdStringCanonOutput canon_host_output(&canon_host); 1070 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, 1071 &canon_host_output, host_info); 1072 1073 if (host_info->out_host.is_nonempty() && 1074 host_info->family != url_canon::CanonHostInfo::BROKEN) { 1075 // Success! Assert that there's no extra garbage. 1076 canon_host_output.Complete(); 1077 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); 1078 } else { 1079 // Empty host, or canonicalization failed. We'll return empty. 1080 canon_host.clear(); 1081 } 1082 1083 return canon_host; 1084 } 1085 1086 std::string GetDirectoryListingHeader(const base::string16& title) { 1087 static const base::StringPiece header( 1088 NetModule::GetResource(IDR_DIR_HEADER_HTML)); 1089 // This can be null in unit tests. 1090 DLOG_IF(WARNING, header.empty()) << 1091 "Missing resource: directory listing header"; 1092 1093 std::string result; 1094 if (!header.empty()) 1095 result.assign(header.data(), header.size()); 1096 1097 result.append("<script>start("); 1098 base::EscapeJSONString(title, true, &result); 1099 result.append(");</script>\n"); 1100 1101 return result; 1102 } 1103 1104 inline bool IsHostCharAlphanumeric(char c) { 1105 // We can just check lowercase because uppercase characters have already been 1106 // normalized. 1107 return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')); 1108 } 1109 1110 bool IsCanonicalizedHostCompliant(const std::string& host, 1111 const std::string& desired_tld) { 1112 if (host.empty()) 1113 return false; 1114 1115 bool in_component = false; 1116 bool most_recent_component_started_alphanumeric = false; 1117 bool last_char_was_underscore = false; 1118 1119 for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { 1120 const char c = *i; 1121 if (!in_component) { 1122 most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c); 1123 if (!most_recent_component_started_alphanumeric && (c != '-')) 1124 return false; 1125 in_component = true; 1126 } else { 1127 if (c == '.') { 1128 if (last_char_was_underscore) 1129 return false; 1130 in_component = false; 1131 } else if (IsHostCharAlphanumeric(c) || (c == '-')) { 1132 last_char_was_underscore = false; 1133 } else if (c == '_') { 1134 last_char_was_underscore = true; 1135 } else { 1136 return false; 1137 } 1138 } 1139 } 1140 1141 return most_recent_component_started_alphanumeric || 1142 (!desired_tld.empty() && IsHostCharAlphanumeric(desired_tld[0])); 1143 } 1144 1145 std::string GetDirectoryListingEntry(const base::string16& name, 1146 const std::string& raw_bytes, 1147 bool is_dir, 1148 int64 size, 1149 Time modified) { 1150 std::string result; 1151 result.append("<script>addRow("); 1152 base::EscapeJSONString(name, true, &result); 1153 result.append(","); 1154 if (raw_bytes.empty()) { 1155 base::EscapeJSONString(EscapePath(UTF16ToUTF8(name)), true, &result); 1156 } else { 1157 base::EscapeJSONString(EscapePath(raw_bytes), true, &result); 1158 } 1159 if (is_dir) { 1160 result.append(",1,"); 1161 } else { 1162 result.append(",0,"); 1163 } 1164 1165 // Negative size means unknown or not applicable (e.g. directory). 1166 base::string16 size_string; 1167 if (size >= 0) 1168 size_string = FormatBytesUnlocalized(size); 1169 base::EscapeJSONString(size_string, true, &result); 1170 1171 result.append(","); 1172 1173 base::string16 modified_str; 1174 // |modified| can be NULL in FTP listings. 1175 if (!modified.is_null()) { 1176 modified_str = base::TimeFormatShortDateAndTime(modified); 1177 } 1178 base::EscapeJSONString(modified_str, true, &result); 1179 1180 result.append(");</script>\n"); 1181 1182 return result; 1183 } 1184 1185 base::string16 StripWWW(const base::string16& text) { 1186 const base::string16 www(ASCIIToUTF16("www.")); 1187 return StartsWith(text, www, true) ? text.substr(www.length()) : text; 1188 } 1189 1190 base::string16 StripWWWFromHost(const GURL& url) { 1191 DCHECK(url.is_valid()); 1192 return StripWWW(ASCIIToUTF16(url.host())); 1193 } 1194 1195 bool IsSafePortablePathComponent(const base::FilePath& component) { 1196 base::string16 component16; 1197 base::FilePath::StringType sanitized = component.value(); 1198 SanitizeGeneratedFileName(&sanitized, true); 1199 base::FilePath::StringType extension = component.Extension(); 1200 if (!extension.empty()) 1201 extension.erase(extension.begin()); // Erase preceding '.'. 1202 return !component.empty() && 1203 (component == component.BaseName()) && 1204 (component == component.StripTrailingSeparators()) && 1205 FilePathToString16(component, &component16) && 1206 file_util::IsFilenameLegal(component16) && 1207 !IsShellIntegratedExtension(extension) && 1208 (sanitized == component.value()) && 1209 !IsReservedName(component.value()); 1210 } 1211 1212 bool IsSafePortableRelativePath(const base::FilePath& path) { 1213 if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator()) 1214 return false; 1215 std::vector<base::FilePath::StringType> components; 1216 path.GetComponents(&components); 1217 if (components.empty()) 1218 return false; 1219 for (size_t i = 0; i < components.size() - 1; ++i) { 1220 if (!IsSafePortablePathComponent(base::FilePath(components[i]))) 1221 return false; 1222 } 1223 return IsSafePortablePathComponent(path.BaseName()); 1224 } 1225 1226 void GenerateSafeFileName(const std::string& mime_type, 1227 bool ignore_extension, 1228 base::FilePath* file_path) { 1229 // Make sure we get the right file extension 1230 EnsureSafeExtension(mime_type, ignore_extension, file_path); 1231 1232 #if defined(OS_WIN) 1233 // Prepend "_" to the file name if it's a reserved name 1234 base::FilePath::StringType leaf_name = file_path->BaseName().value(); 1235 DCHECK(!leaf_name.empty()); 1236 if (IsReservedName(leaf_name)) { 1237 leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; 1238 *file_path = file_path->DirName(); 1239 if (file_path->value() == base::FilePath::kCurrentDirectory) { 1240 *file_path = base::FilePath(leaf_name); 1241 } else { 1242 *file_path = file_path->Append(leaf_name); 1243 } 1244 } 1245 #endif 1246 } 1247 1248 base::string16 GetSuggestedFilename(const GURL& url, 1249 const std::string& content_disposition, 1250 const std::string& referrer_charset, 1251 const std::string& suggested_name, 1252 const std::string& mime_type, 1253 const std::string& default_name) { 1254 // TODO: this function to be updated to match the httpbis recommendations. 1255 // Talk to abarth for the latest news. 1256 1257 // We don't translate this fallback string, "download". If localization is 1258 // needed, the caller should provide localized fallback in |default_name|. 1259 static const base::FilePath::CharType kFinalFallbackName[] = 1260 FILE_PATH_LITERAL("download"); 1261 std::string filename; // In UTF-8 1262 bool overwrite_extension = false; 1263 1264 // Try to extract a filename from content-disposition first. 1265 if (!content_disposition.empty()) { 1266 HttpContentDisposition header(content_disposition, referrer_charset); 1267 filename = header.filename(); 1268 } 1269 1270 // Then try to use the suggested name. 1271 if (filename.empty() && !suggested_name.empty()) 1272 filename = suggested_name; 1273 1274 // Now try extracting the filename from the URL. GetFileNameFromURL() only 1275 // looks at the last component of the URL and doesn't return the hostname as a 1276 // failover. 1277 if (filename.empty()) 1278 filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension); 1279 1280 // Finally try the URL hostname, but only if there's no default specified in 1281 // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a 1282 // host name. 1283 if (filename.empty() && 1284 default_name.empty() && 1285 url.is_valid() && 1286 !url.host().empty()) { 1287 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) 1288 filename = url.host(); 1289 } 1290 1291 bool replace_trailing = false; 1292 base::FilePath::StringType result_str, default_name_str; 1293 #if defined(OS_WIN) 1294 replace_trailing = true; 1295 result_str = UTF8ToUTF16(filename); 1296 default_name_str = UTF8ToUTF16(default_name); 1297 #else 1298 result_str = filename; 1299 default_name_str = default_name; 1300 #endif 1301 SanitizeGeneratedFileName(&result_str, replace_trailing); 1302 if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) == 1303 base::FilePath::StringType::npos) { 1304 result_str = !default_name_str.empty() ? default_name_str : 1305 base::FilePath::StringType(kFinalFallbackName); 1306 overwrite_extension = false; 1307 } 1308 file_util::ReplaceIllegalCharactersInPath(&result_str, '-'); 1309 base::FilePath result(result_str); 1310 GenerateSafeFileName(mime_type, overwrite_extension, &result); 1311 1312 base::string16 result16; 1313 if (!FilePathToString16(result, &result16)) { 1314 result = base::FilePath(default_name_str); 1315 if (!FilePathToString16(result, &result16)) { 1316 result = base::FilePath(kFinalFallbackName); 1317 FilePathToString16(result, &result16); 1318 } 1319 } 1320 return result16; 1321 } 1322 1323 base::FilePath GenerateFileName(const GURL& url, 1324 const std::string& content_disposition, 1325 const std::string& referrer_charset, 1326 const std::string& suggested_name, 1327 const std::string& mime_type, 1328 const std::string& default_file_name) { 1329 base::string16 file_name = GetSuggestedFilename(url, 1330 content_disposition, 1331 referrer_charset, 1332 suggested_name, 1333 mime_type, 1334 default_file_name); 1335 1336 #if defined(OS_WIN) 1337 base::FilePath generated_name(file_name); 1338 #else 1339 base::FilePath generated_name( 1340 base::SysWideToNativeMB(UTF16ToWide(file_name))); 1341 #endif 1342 1343 #if defined(OS_CHROMEOS) 1344 // When doing file manager operations on ChromeOS, the file paths get 1345 // normalized in WebKit layer, so let's ensure downloaded files have 1346 // normalized names. Otherwise, we won't be able to handle files with NFD 1347 // utf8 encoded characters in name. 1348 file_util::NormalizeFileNameEncoding(&generated_name); 1349 #endif 1350 1351 DCHECK(!generated_name.empty()); 1352 1353 return generated_name; 1354 } 1355 1356 bool IsPortAllowedByDefault(int port) { 1357 int array_size = arraysize(kRestrictedPorts); 1358 for (int i = 0; i < array_size; i++) { 1359 if (kRestrictedPorts[i] == port) { 1360 return false; 1361 } 1362 } 1363 return true; 1364 } 1365 1366 bool IsPortAllowedByFtp(int port) { 1367 int array_size = arraysize(kAllowedFtpPorts); 1368 for (int i = 0; i < array_size; i++) { 1369 if (kAllowedFtpPorts[i] == port) { 1370 return true; 1371 } 1372 } 1373 // Port not explicitly allowed by FTP, so return the default restrictions. 1374 return IsPortAllowedByDefault(port); 1375 } 1376 1377 bool IsPortAllowedByOverride(int port) { 1378 if (g_explicitly_allowed_ports.Get().empty()) 1379 return false; 1380 1381 return g_explicitly_allowed_ports.Get().count(port) > 0; 1382 } 1383 1384 int SetNonBlocking(int fd) { 1385 #if defined(OS_WIN) 1386 unsigned long no_block = 1; 1387 return ioctlsocket(fd, FIONBIO, &no_block); 1388 #elif defined(OS_POSIX) 1389 int flags = fcntl(fd, F_GETFL, 0); 1390 if (-1 == flags) 1391 return flags; 1392 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 1393 #endif 1394 } 1395 1396 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 1397 std::string::const_iterator host_and_port_end, 1398 std::string* host, 1399 int* port) { 1400 if (host_and_port_begin >= host_and_port_end) 1401 return false; 1402 1403 // When using url_parse, we use char*. 1404 const char* auth_begin = &(*host_and_port_begin); 1405 int auth_len = host_and_port_end - host_and_port_begin; 1406 1407 url_parse::Component auth_component(0, auth_len); 1408 url_parse::Component username_component; 1409 url_parse::Component password_component; 1410 url_parse::Component hostname_component; 1411 url_parse::Component port_component; 1412 1413 url_parse::ParseAuthority(auth_begin, auth_component, &username_component, 1414 &password_component, &hostname_component, &port_component); 1415 1416 // There shouldn't be a username/password. 1417 if (username_component.is_valid() || password_component.is_valid()) 1418 return false; 1419 1420 if (!hostname_component.is_nonempty()) 1421 return false; // Failed parsing. 1422 1423 int parsed_port_number = -1; 1424 if (port_component.is_nonempty()) { 1425 parsed_port_number = url_parse::ParsePort(auth_begin, port_component); 1426 1427 // If parsing failed, port_number will be either PORT_INVALID or 1428 // PORT_UNSPECIFIED, both of which are negative. 1429 if (parsed_port_number < 0) 1430 return false; // Failed parsing the port number. 1431 } 1432 1433 if (port_component.len == 0) 1434 return false; // Reject inputs like "foo:" 1435 1436 // Pass results back to caller. 1437 host->assign(auth_begin + hostname_component.begin, hostname_component.len); 1438 *port = parsed_port_number; 1439 1440 return true; // Success. 1441 } 1442 1443 bool ParseHostAndPort(const std::string& host_and_port, 1444 std::string* host, 1445 int* port) { 1446 return ParseHostAndPort( 1447 host_and_port.begin(), host_and_port.end(), host, port); 1448 } 1449 1450 std::string GetHostAndPort(const GURL& url) { 1451 // For IPv6 literals, GURL::host() already includes the brackets so it is 1452 // safe to just append a colon. 1453 return base::StringPrintf("%s:%d", url.host().c_str(), 1454 url.EffectiveIntPort()); 1455 } 1456 1457 std::string GetHostAndOptionalPort(const GURL& url) { 1458 // For IPv6 literals, GURL::host() already includes the brackets 1459 // so it is safe to just append a colon. 1460 if (url.has_port()) 1461 return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str()); 1462 return url.host(); 1463 } 1464 1465 bool IsHostnameNonUnique(const std::string& hostname) { 1466 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address. 1467 const std::string host_or_ip = hostname.find(':') != std::string::npos ? 1468 "[" + hostname + "]" : hostname; 1469 url_canon::CanonHostInfo host_info; 1470 std::string canonical_name = CanonicalizeHost(host_or_ip, &host_info); 1471 1472 // If canonicalization fails, then the input is truly malformed. However, 1473 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique. 1474 if (canonical_name.empty()) 1475 return false; 1476 1477 // If |hostname| is an IP address, check to see if it's in an IANA-reserved 1478 // range. 1479 if (host_info.IsIPAddress()) { 1480 IPAddressNumber host_addr; 1481 if (!ParseIPLiteralToNumber(hostname.substr(host_info.out_host.begin, 1482 host_info.out_host.len), 1483 &host_addr)) { 1484 return false; 1485 } 1486 switch (host_info.family) { 1487 case url_canon::CanonHostInfo::IPV4: 1488 case url_canon::CanonHostInfo::IPV6: 1489 return IsIPAddressReserved(host_addr); 1490 case url_canon::CanonHostInfo::NEUTRAL: 1491 case url_canon::CanonHostInfo::BROKEN: 1492 return false; 1493 } 1494 } 1495 1496 // Check for a registry controlled portion of |hostname|, ignoring private 1497 // registries, as they already chain to ICANN-administered registries, 1498 // and explicitly ignoring unknown registries. 1499 // 1500 // Note: This means that as new gTLDs are introduced on the Internet, they 1501 // will be treated as non-unique until the registry controlled domain list 1502 // is updated. However, because gTLDs are expected to provide significant 1503 // advance notice to deprecate older versions of this code, this an 1504 // acceptable tradeoff. 1505 return 0 == registry_controlled_domains::GetRegistryLength( 1506 canonical_name, 1507 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 1508 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 1509 } 1510 1511 // Don't compare IPv4 and IPv6 addresses (they have different range 1512 // reservations). Keep separate reservation arrays for each IP type, and 1513 // consolidate adjacent reserved ranges within a reservation array when 1514 // possible. 1515 // Sources for info: 1516 // www.iana.org/assignments/ipv4-address-space/ipv4-address-space.xhtml 1517 // www.iana.org/assignments/ipv6-address-space/ipv6-address-space.xhtml 1518 // They're formatted here with the prefix as the last element. For example: 1519 // 10.0.0.0/8 becomes 10,0,0,0,8 and fec0::/10 becomes 0xfe,0xc0,0,0,0...,10. 1520 bool IsIPAddressReserved(const IPAddressNumber& host_addr) { 1521 static const unsigned char kReservedIPv4[][5] = { 1522 { 0,0,0,0,8 }, { 10,0,0,0,8 }, { 100,64,0,0,10 }, { 127,0,0,0,8 }, 1523 { 169,254,0,0,16 }, { 172,16,0,0,12 }, { 192,0,2,0,24 }, 1524 { 192,88,99,0,24 }, { 192,168,0,0,16 }, { 198,18,0,0,15 }, 1525 { 198,51,100,0,24 }, { 203,0,113,0,24 }, { 224,0,0,0,3 } 1526 }; 1527 static const unsigned char kReservedIPv6[][17] = { 1528 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8 }, 1529 { 0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 }, 1530 { 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2 }, 1531 { 0xc0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }, 1532 { 0xe0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 }, 1533 { 0xf0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5 }, 1534 { 0xf8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6 }, 1535 { 0xfc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7 }, 1536 { 0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9 }, 1537 { 0xfe,0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10 }, 1538 { 0xfe,0xc0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10 }, 1539 }; 1540 size_t array_size = 0; 1541 const unsigned char* array = NULL; 1542 switch (host_addr.size()) { 1543 case kIPv4AddressSize: 1544 array_size = arraysize(kReservedIPv4); 1545 array = kReservedIPv4[0]; 1546 break; 1547 case kIPv6AddressSize: 1548 array_size = arraysize(kReservedIPv6); 1549 array = kReservedIPv6[0]; 1550 break; 1551 } 1552 if (!array) 1553 return false; 1554 size_t width = host_addr.size() + 1; 1555 for (size_t i = 0; i < array_size; ++i, array += width) { 1556 if (IPNumberPrefixCheck(host_addr, array, array[width-1])) 1557 return true; 1558 } 1559 return false; 1560 } 1561 1562 // Extracts the address and port portions of a sockaddr. 1563 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr, 1564 socklen_t sock_addr_len, 1565 const uint8** address, 1566 size_t* address_len, 1567 uint16* port) { 1568 if (sock_addr->sa_family == AF_INET) { 1569 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in))) 1570 return false; 1571 const struct sockaddr_in* addr = 1572 reinterpret_cast<const struct sockaddr_in*>(sock_addr); 1573 *address = reinterpret_cast<const uint8*>(&addr->sin_addr); 1574 *address_len = kIPv4AddressSize; 1575 if (port) 1576 *port = base::NetToHost16(addr->sin_port); 1577 return true; 1578 } 1579 1580 if (sock_addr->sa_family == AF_INET6) { 1581 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in6))) 1582 return false; 1583 const struct sockaddr_in6* addr = 1584 reinterpret_cast<const struct sockaddr_in6*>(sock_addr); 1585 *address = reinterpret_cast<const unsigned char*>(&addr->sin6_addr); 1586 *address_len = kIPv6AddressSize; 1587 if (port) 1588 *port = base::NetToHost16(addr->sin6_port); 1589 return true; 1590 } 1591 1592 return false; // Unrecognized |sa_family|. 1593 } 1594 1595 std::string IPAddressToString(const uint8* address, 1596 size_t address_len) { 1597 std::string str; 1598 url_canon::StdStringCanonOutput output(&str); 1599 1600 if (address_len == kIPv4AddressSize) { 1601 url_canon::AppendIPv4Address(address, &output); 1602 } else if (address_len == kIPv6AddressSize) { 1603 url_canon::AppendIPv6Address(address, &output); 1604 } else { 1605 CHECK(false) << "Invalid IP address with length: " << address_len; 1606 } 1607 1608 output.Complete(); 1609 return str; 1610 } 1611 1612 std::string IPAddressToStringWithPort(const uint8* address, 1613 size_t address_len, 1614 uint16 port) { 1615 std::string address_str = IPAddressToString(address, address_len); 1616 1617 if (address_len == kIPv6AddressSize) { 1618 // Need to bracket IPv6 addresses since they contain colons. 1619 return base::StringPrintf("[%s]:%d", address_str.c_str(), port); 1620 } 1621 return base::StringPrintf("%s:%d", address_str.c_str(), port); 1622 } 1623 1624 std::string NetAddressToString(const struct sockaddr* sa, 1625 socklen_t sock_addr_len) { 1626 const uint8* address; 1627 size_t address_len; 1628 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address, 1629 &address_len, NULL)) { 1630 NOTREACHED(); 1631 return std::string(); 1632 } 1633 return IPAddressToString(address, address_len); 1634 } 1635 1636 std::string NetAddressToStringWithPort(const struct sockaddr* sa, 1637 socklen_t sock_addr_len) { 1638 const uint8* address; 1639 size_t address_len; 1640 uint16 port; 1641 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address, 1642 &address_len, &port)) { 1643 NOTREACHED(); 1644 return std::string(); 1645 } 1646 return IPAddressToStringWithPort(address, address_len, port); 1647 } 1648 1649 std::string IPAddressToString(const IPAddressNumber& addr) { 1650 return IPAddressToString(&addr.front(), addr.size()); 1651 } 1652 1653 std::string IPAddressToStringWithPort(const IPAddressNumber& addr, 1654 uint16 port) { 1655 return IPAddressToStringWithPort(&addr.front(), addr.size(), port); 1656 } 1657 1658 std::string IPAddressToPackedString(const IPAddressNumber& addr) { 1659 return std::string(reinterpret_cast<const char *>(&addr.front()), 1660 addr.size()); 1661 } 1662 1663 std::string GetHostName() { 1664 #if defined(OS_WIN) 1665 EnsureWinsockInit(); 1666 #endif 1667 1668 // Host names are limited to 255 bytes. 1669 char buffer[256]; 1670 int result = gethostname(buffer, sizeof(buffer)); 1671 if (result != 0) { 1672 DVLOG(1) << "gethostname() failed with " << result; 1673 buffer[0] = '\0'; 1674 } 1675 return std::string(buffer); 1676 } 1677 1678 void GetIdentityFromURL(const GURL& url, 1679 base::string16* username, 1680 base::string16* password) { 1681 UnescapeRule::Type flags = 1682 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; 1683 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); 1684 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); 1685 } 1686 1687 std::string GetHostOrSpecFromURL(const GURL& url) { 1688 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); 1689 } 1690 1691 void AppendFormattedHost(const GURL& url, 1692 const std::string& languages, 1693 base::string16* output) { 1694 Offsets offsets; 1695 AppendFormattedComponent(url.possibly_invalid_spec(), 1696 url.parsed_for_possibly_invalid_spec().host, offsets, 1697 HostComponentTransform(languages), output, NULL, NULL); 1698 } 1699 1700 base::string16 FormatUrlWithOffsets( 1701 const GURL& url, 1702 const std::string& languages, 1703 FormatUrlTypes format_types, 1704 UnescapeRule::Type unescape_rules, 1705 url_parse::Parsed* new_parsed, 1706 size_t* prefix_end, 1707 Offsets* offsets_for_adjustment) { 1708 url_parse::Parsed parsed_temp; 1709 if (!new_parsed) 1710 new_parsed = &parsed_temp; 1711 else 1712 *new_parsed = url_parse::Parsed(); 1713 Offsets original_offsets; 1714 if (offsets_for_adjustment) 1715 original_offsets = *offsets_for_adjustment; 1716 1717 // Special handling for view-source:. Don't use content::kViewSourceScheme 1718 // because this library shouldn't depend on chrome. 1719 const char* const kViewSource = "view-source"; 1720 // Reject "view-source:view-source:..." to avoid deep recursion. 1721 const char* const kViewSourceTwice = "view-source:view-source:"; 1722 if (url.SchemeIs(kViewSource) && 1723 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { 1724 return FormatViewSourceUrl(url, original_offsets, languages, format_types, 1725 unescape_rules, new_parsed, prefix_end, 1726 offsets_for_adjustment); 1727 } 1728 1729 // We handle both valid and invalid URLs (this will give us the spec 1730 // regardless of validity). 1731 const std::string& spec = url.possibly_invalid_spec(); 1732 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 1733 1734 // Scheme & separators. These are ASCII. 1735 base::string16 url_string; 1736 url_string.insert(url_string.end(), spec.begin(), 1737 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, 1738 true)); 1739 const char kHTTP[] = "http://"; 1740 const char kFTP[] = "ftp."; 1741 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This 1742 // means that if we trim "http://" off a URL whose host starts with "ftp." and 1743 // the user inputs this into any field subject to fixup (which is basically 1744 // all input fields), the meaning would be changed. (In fact, often the 1745 // formatted URL is directly pre-filled into an input field.) For this reason 1746 // we avoid stripping "http://" in this case. 1747 bool omit_http = (format_types & kFormatUrlOmitHTTP) && 1748 EqualsASCII(url_string, kHTTP) && 1749 !StartsWithASCII(url.host(), kFTP, true); 1750 new_parsed->scheme = parsed.scheme; 1751 1752 // Username & password. 1753 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { 1754 // Remove the username and password fields. We don't want to display those 1755 // to the user since they can be used for attacks, 1756 // e.g. "http://google.com:search@evil.ru/" 1757 new_parsed->username.reset(); 1758 new_parsed->password.reset(); 1759 // Update the offsets based on removed username and/or password. 1760 if (offsets_for_adjustment && !offsets_for_adjustment->empty() && 1761 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { 1762 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 1763 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { 1764 // The seeming off-by-one and off-by-two in these first two lines are to 1765 // account for the ':' after the username and '@' after the password. 1766 offset_adjuster.Add(base::OffsetAdjuster::Adjustment( 1767 static_cast<size_t>(parsed.username.begin), 1768 static_cast<size_t>(parsed.username.len + parsed.password.len + 2), 1769 0)); 1770 } else { 1771 const url_parse::Component* nonempty_component = 1772 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; 1773 // The seeming off-by-one in below is to account for the '@' after the 1774 // username/password. 1775 offset_adjuster.Add(base::OffsetAdjuster::Adjustment( 1776 static_cast<size_t>(nonempty_component->begin), 1777 static_cast<size_t>(nonempty_component->len + 1), 0)); 1778 } 1779 } 1780 } else { 1781 AppendFormattedComponent(spec, parsed.username, original_offsets, 1782 NonHostComponentTransform(unescape_rules), &url_string, 1783 &new_parsed->username, offsets_for_adjustment); 1784 if (parsed.password.is_valid()) 1785 url_string.push_back(':'); 1786 AppendFormattedComponent(spec, parsed.password, original_offsets, 1787 NonHostComponentTransform(unescape_rules), &url_string, 1788 &new_parsed->password, offsets_for_adjustment); 1789 if (parsed.username.is_valid() || parsed.password.is_valid()) 1790 url_string.push_back('@'); 1791 } 1792 if (prefix_end) 1793 *prefix_end = static_cast<size_t>(url_string.length()); 1794 1795 // Host. 1796 AppendFormattedComponent(spec, parsed.host, original_offsets, 1797 HostComponentTransform(languages), &url_string, &new_parsed->host, 1798 offsets_for_adjustment); 1799 1800 // Port. 1801 if (parsed.port.is_nonempty()) { 1802 url_string.push_back(':'); 1803 new_parsed->port.begin = url_string.length(); 1804 url_string.insert(url_string.end(), 1805 spec.begin() + parsed.port.begin, 1806 spec.begin() + parsed.port.end()); 1807 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 1808 } else { 1809 new_parsed->port.reset(); 1810 } 1811 1812 // Path & query. Both get the same general unescape & convert treatment. 1813 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || 1814 !CanStripTrailingSlash(url)) { 1815 AppendFormattedComponent(spec, parsed.path, original_offsets, 1816 NonHostComponentTransform(unescape_rules), &url_string, 1817 &new_parsed->path, offsets_for_adjustment); 1818 } else { 1819 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 1820 offset_adjuster.Add(base::OffsetAdjuster::Adjustment( 1821 url_string.length(), parsed.path.len, 0)); 1822 } 1823 if (parsed.query.is_valid()) 1824 url_string.push_back('?'); 1825 AppendFormattedComponent(spec, parsed.query, original_offsets, 1826 NonHostComponentTransform(unescape_rules), &url_string, 1827 &new_parsed->query, offsets_for_adjustment); 1828 1829 // Ref. This is valid, unescaped UTF-8, so we can just convert. 1830 if (parsed.ref.is_valid()) 1831 url_string.push_back('#'); 1832 AppendFormattedComponent(spec, parsed.ref, original_offsets, 1833 NonHostComponentTransform(UnescapeRule::NONE), &url_string, 1834 &new_parsed->ref, offsets_for_adjustment); 1835 1836 // If we need to strip out http do it after the fact. This way we don't need 1837 // to worry about how offset_for_adjustment is interpreted. 1838 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { 1839 const size_t kHTTPSize = arraysize(kHTTP) - 1; 1840 url_string = url_string.substr(kHTTPSize); 1841 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { 1842 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 1843 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); 1844 } 1845 if (prefix_end) 1846 *prefix_end -= kHTTPSize; 1847 1848 // Adjust new_parsed. 1849 DCHECK(new_parsed->scheme.is_valid()); 1850 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. 1851 new_parsed->scheme.reset(); 1852 AdjustAllComponentsButScheme(delta, new_parsed); 1853 } 1854 1855 LimitOffsets(url_string, offsets_for_adjustment); 1856 return url_string; 1857 } 1858 1859 base::string16 FormatUrl(const GURL& url, 1860 const std::string& languages, 1861 FormatUrlTypes format_types, 1862 UnescapeRule::Type unescape_rules, 1863 url_parse::Parsed* new_parsed, 1864 size_t* prefix_end, 1865 size_t* offset_for_adjustment) { 1866 Offsets offsets; 1867 if (offset_for_adjustment) 1868 offsets.push_back(*offset_for_adjustment); 1869 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, 1870 unescape_rules, new_parsed, prefix_end, &offsets); 1871 if (offset_for_adjustment) 1872 *offset_for_adjustment = offsets[0]; 1873 return result; 1874 } 1875 1876 bool CanStripTrailingSlash(const GURL& url) { 1877 // Omit the path only for standard, non-file URLs with nothing but "/" after 1878 // the hostname. 1879 return url.IsStandard() && !url.SchemeIsFile() && 1880 !url.SchemeIsFileSystem() && !url.has_query() && !url.has_ref() 1881 && url.path() == "/"; 1882 } 1883 1884 GURL SimplifyUrlForRequest(const GURL& url) { 1885 DCHECK(url.is_valid()); 1886 GURL::Replacements replacements; 1887 replacements.ClearUsername(); 1888 replacements.ClearPassword(); 1889 replacements.ClearRef(); 1890 return url.ReplaceComponents(replacements); 1891 } 1892 1893 // Specifies a comma separated list of port numbers that should be accepted 1894 // despite bans. If the string is invalid no allowed ports are stored. 1895 void SetExplicitlyAllowedPorts(const std::string& allowed_ports) { 1896 if (allowed_ports.empty()) 1897 return; 1898 1899 std::multiset<int> ports; 1900 size_t last = 0; 1901 size_t size = allowed_ports.size(); 1902 // The comma delimiter. 1903 const std::string::value_type kComma = ','; 1904 1905 // Overflow is still possible for evil user inputs. 1906 for (size_t i = 0; i <= size; ++i) { 1907 // The string should be composed of only digits and commas. 1908 if (i != size && !IsAsciiDigit(allowed_ports[i]) && 1909 (allowed_ports[i] != kComma)) 1910 return; 1911 if (i == size || allowed_ports[i] == kComma) { 1912 if (i > last) { 1913 int port; 1914 base::StringToInt(base::StringPiece(allowed_ports.begin() + last, 1915 allowed_ports.begin() + i), 1916 &port); 1917 ports.insert(port); 1918 } 1919 last = i + 1; 1920 } 1921 } 1922 g_explicitly_allowed_ports.Get() = ports; 1923 } 1924 1925 ScopedPortException::ScopedPortException(int port) : port_(port) { 1926 g_explicitly_allowed_ports.Get().insert(port); 1927 } 1928 1929 ScopedPortException::~ScopedPortException() { 1930 std::multiset<int>::iterator it = 1931 g_explicitly_allowed_ports.Get().find(port_); 1932 if (it != g_explicitly_allowed_ports.Get().end()) 1933 g_explicitly_allowed_ports.Get().erase(it); 1934 else 1935 NOTREACHED(); 1936 } 1937 1938 bool HaveOnlyLoopbackAddresses() { 1939 #if defined(OS_ANDROID) 1940 return android::HaveOnlyLoopbackAddresses(); 1941 #elif defined(OS_POSIX) 1942 struct ifaddrs* interface_addr = NULL; 1943 int rv = getifaddrs(&interface_addr); 1944 if (rv != 0) { 1945 DVLOG(1) << "getifaddrs() failed with errno = " << errno; 1946 return false; 1947 } 1948 1949 bool result = true; 1950 for (struct ifaddrs* interface = interface_addr; 1951 interface != NULL; 1952 interface = interface->ifa_next) { 1953 if (!(IFF_UP & interface->ifa_flags)) 1954 continue; 1955 if (IFF_LOOPBACK & interface->ifa_flags) 1956 continue; 1957 const struct sockaddr* addr = interface->ifa_addr; 1958 if (!addr) 1959 continue; 1960 if (addr->sa_family == AF_INET6) { 1961 // Safe cast since this is AF_INET6. 1962 const struct sockaddr_in6* addr_in6 = 1963 reinterpret_cast<const struct sockaddr_in6*>(addr); 1964 const struct in6_addr* sin6_addr = &addr_in6->sin6_addr; 1965 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr)) 1966 continue; 1967 } 1968 if (addr->sa_family != AF_INET6 && addr->sa_family != AF_INET) 1969 continue; 1970 1971 result = false; 1972 break; 1973 } 1974 freeifaddrs(interface_addr); 1975 return result; 1976 #elif defined(OS_WIN) 1977 // TODO(wtc): implement with the GetAdaptersAddresses function. 1978 NOTIMPLEMENTED(); 1979 return false; 1980 #else 1981 NOTIMPLEMENTED(); 1982 return false; 1983 #endif // defined(various platforms) 1984 } 1985 1986 AddressFamily GetAddressFamily(const IPAddressNumber& address) { 1987 switch (address.size()) { 1988 case kIPv4AddressSize: 1989 return ADDRESS_FAMILY_IPV4; 1990 case kIPv6AddressSize: 1991 return ADDRESS_FAMILY_IPV6; 1992 default: 1993 return ADDRESS_FAMILY_UNSPECIFIED; 1994 } 1995 } 1996 1997 int ConvertAddressFamily(AddressFamily address_family) { 1998 switch (address_family) { 1999 case ADDRESS_FAMILY_UNSPECIFIED: 2000 return AF_UNSPEC; 2001 case ADDRESS_FAMILY_IPV4: 2002 return AF_INET; 2003 case ADDRESS_FAMILY_IPV6: 2004 return AF_INET6; 2005 } 2006 NOTREACHED(); 2007 return AF_UNSPEC; 2008 } 2009 2010 bool ParseIPLiteralToNumber(const std::string& ip_literal, 2011 IPAddressNumber* ip_number) { 2012 // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains 2013 // a colon however, it must be an IPv6 address. 2014 if (ip_literal.find(':') != std::string::npos) { 2015 // GURL expects IPv6 hostnames to be surrounded with brackets. 2016 std::string host_brackets = "[" + ip_literal + "]"; 2017 url_parse::Component host_comp(0, host_brackets.size()); 2018 2019 // Try parsing the hostname as an IPv6 literal. 2020 ip_number->resize(16); // 128 bits. 2021 return url_canon::IPv6AddressToNumber(host_brackets.data(), 2022 host_comp, 2023 &(*ip_number)[0]); 2024 } 2025 2026 // Otherwise the string is an IPv4 address. 2027 ip_number->resize(4); // 32 bits. 2028 url_parse::Component host_comp(0, ip_literal.size()); 2029 int num_components; 2030 url_canon::CanonHostInfo::Family family = url_canon::IPv4AddressToNumber( 2031 ip_literal.data(), host_comp, &(*ip_number)[0], &num_components); 2032 return family == url_canon::CanonHostInfo::IPV4; 2033 } 2034 2035 namespace { 2036 2037 const unsigned char kIPv4MappedPrefix[] = 2038 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; 2039 } 2040 2041 IPAddressNumber ConvertIPv4NumberToIPv6Number( 2042 const IPAddressNumber& ipv4_number) { 2043 DCHECK(ipv4_number.size() == 4); 2044 2045 // IPv4-mapped addresses are formed by: 2046 // <80 bits of zeros> + <16 bits of ones> + <32-bit IPv4 address>. 2047 IPAddressNumber ipv6_number; 2048 ipv6_number.reserve(16); 2049 ipv6_number.insert(ipv6_number.end(), 2050 kIPv4MappedPrefix, 2051 kIPv4MappedPrefix + arraysize(kIPv4MappedPrefix)); 2052 ipv6_number.insert(ipv6_number.end(), ipv4_number.begin(), ipv4_number.end()); 2053 return ipv6_number; 2054 } 2055 2056 bool IsIPv4Mapped(const IPAddressNumber& address) { 2057 if (address.size() != kIPv6AddressSize) 2058 return false; 2059 return std::equal(address.begin(), 2060 address.begin() + arraysize(kIPv4MappedPrefix), 2061 kIPv4MappedPrefix); 2062 } 2063 2064 IPAddressNumber ConvertIPv4MappedToIPv4(const IPAddressNumber& address) { 2065 DCHECK(IsIPv4Mapped(address)); 2066 return IPAddressNumber(address.begin() + arraysize(kIPv4MappedPrefix), 2067 address.end()); 2068 } 2069 2070 bool ParseCIDRBlock(const std::string& cidr_literal, 2071 IPAddressNumber* ip_number, 2072 size_t* prefix_length_in_bits) { 2073 // We expect CIDR notation to match one of these two templates: 2074 // <IPv4-literal> "/" <number of bits> 2075 // <IPv6-literal> "/" <number of bits> 2076 2077 std::vector<std::string> parts; 2078 base::SplitString(cidr_literal, '/', &parts); 2079 if (parts.size() != 2) 2080 return false; 2081 2082 // Parse the IP address. 2083 if (!ParseIPLiteralToNumber(parts[0], ip_number)) 2084 return false; 2085 2086 // Parse the prefix length. 2087 int number_of_bits = -1; 2088 if (!base::StringToInt(parts[1], &number_of_bits)) 2089 return false; 2090 2091 // Make sure the prefix length is in a valid range. 2092 if (number_of_bits < 0 || 2093 number_of_bits > static_cast<int>(ip_number->size() * 8)) 2094 return false; 2095 2096 *prefix_length_in_bits = static_cast<size_t>(number_of_bits); 2097 return true; 2098 } 2099 2100 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 2101 const IPAddressNumber& ip_prefix, 2102 size_t prefix_length_in_bits) { 2103 // Both the input IP address and the prefix IP address should be 2104 // either IPv4 or IPv6. 2105 DCHECK(ip_number.size() == 4 || ip_number.size() == 16); 2106 DCHECK(ip_prefix.size() == 4 || ip_prefix.size() == 16); 2107 2108 DCHECK_LE(prefix_length_in_bits, ip_prefix.size() * 8); 2109 2110 // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to 2111 // IPv6 addresses in order to do the comparison. 2112 if (ip_number.size() != ip_prefix.size()) { 2113 if (ip_number.size() == 4) { 2114 return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number), 2115 ip_prefix, prefix_length_in_bits); 2116 } 2117 return IPNumberMatchesPrefix(ip_number, 2118 ConvertIPv4NumberToIPv6Number(ip_prefix), 2119 96 + prefix_length_in_bits); 2120 } 2121 2122 return IPNumberPrefixCheck(ip_number, &ip_prefix[0], prefix_length_in_bits); 2123 } 2124 2125 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 2126 socklen_t address_len) { 2127 if (address->sa_family == AF_INET) { 2128 DCHECK_LE(sizeof(sockaddr_in), static_cast<size_t>(address_len)); 2129 const struct sockaddr_in* sockaddr = 2130 reinterpret_cast<const struct sockaddr_in*>(address); 2131 return &sockaddr->sin_port; 2132 } else if (address->sa_family == AF_INET6) { 2133 DCHECK_LE(sizeof(sockaddr_in6), static_cast<size_t>(address_len)); 2134 const struct sockaddr_in6* sockaddr = 2135 reinterpret_cast<const struct sockaddr_in6*>(address); 2136 return &sockaddr->sin6_port; 2137 } else { 2138 NOTREACHED(); 2139 return NULL; 2140 } 2141 } 2142 2143 int GetPortFromSockaddr(const struct sockaddr* address, socklen_t address_len) { 2144 const uint16* port_field = GetPortFieldFromSockaddr(address, address_len); 2145 if (!port_field) 2146 return -1; 2147 return base::NetToHost16(*port_field); 2148 } 2149 2150 bool IsLocalhost(const std::string& host) { 2151 if (host == "localhost" || 2152 host == "localhost.localdomain" || 2153 host == "localhost6" || 2154 host == "localhost6.localdomain6") 2155 return true; 2156 2157 IPAddressNumber ip_number; 2158 if (ParseIPLiteralToNumber(host, &ip_number)) { 2159 size_t size = ip_number.size(); 2160 switch (size) { 2161 case kIPv4AddressSize: { 2162 IPAddressNumber localhost_prefix; 2163 localhost_prefix.push_back(127); 2164 for (int i = 0; i < 3; ++i) { 2165 localhost_prefix.push_back(0); 2166 } 2167 return IPNumberMatchesPrefix(ip_number, localhost_prefix, 8); 2168 } 2169 2170 case kIPv6AddressSize: { 2171 struct in6_addr sin6_addr; 2172 memcpy(&sin6_addr, &ip_number[0], kIPv6AddressSize); 2173 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr); 2174 } 2175 2176 default: 2177 NOTREACHED(); 2178 } 2179 } 2180 2181 return false; 2182 } 2183 2184 NetworkInterface::NetworkInterface() : network_prefix(0) { 2185 } 2186 2187 NetworkInterface::NetworkInterface(const std::string& name, 2188 uint32 interface_index, 2189 const IPAddressNumber& address, 2190 size_t network_prefix) 2191 : name(name), 2192 interface_index(interface_index), 2193 address(address), 2194 network_prefix(network_prefix) { 2195 } 2196 2197 NetworkInterface::~NetworkInterface() { 2198 } 2199 2200 unsigned CommonPrefixLength(const IPAddressNumber& a1, 2201 const IPAddressNumber& a2) { 2202 DCHECK_EQ(a1.size(), a2.size()); 2203 for (size_t i = 0; i < a1.size(); ++i) { 2204 unsigned diff = a1[i] ^ a2[i]; 2205 if (!diff) 2206 continue; 2207 for (unsigned j = 0; j < CHAR_BIT; ++j) { 2208 if (diff & (1 << (CHAR_BIT - 1))) 2209 return i * CHAR_BIT + j; 2210 diff <<= 1; 2211 } 2212 NOTREACHED(); 2213 } 2214 return a1.size() * CHAR_BIT; 2215 } 2216 2217 unsigned MaskPrefixLength(const IPAddressNumber& mask) { 2218 IPAddressNumber all_ones(mask.size(), 0xFF); 2219 return CommonPrefixLength(mask, all_ones); 2220 } 2221 2222 } // namespace net 2223