1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/net_util.h" 6 7 #include <algorithm> 8 #include <iterator> 9 #include <map> 10 11 #include "build/build_config.h" 12 13 #if defined(OS_WIN) 14 #include <windows.h> 15 #include <iphlpapi.h> 16 #include <winsock2.h> 17 #pragma comment(lib, "iphlpapi.lib") 18 #elif defined(OS_POSIX) 19 #include <fcntl.h> 20 #if !defined(OS_ANDROID) 21 #include <ifaddrs.h> 22 #endif 23 #include <net/if.h> 24 #include <netdb.h> 25 #include <netinet/in.h> 26 #endif 27 28 #include "base/basictypes.h" 29 #include "base/file_util.h" 30 #include "base/files/file_path.h" 31 #include "base/i18n/file_util_icu.h" 32 #include "base/i18n/icu_string_conversions.h" 33 #include "base/i18n/time_formatting.h" 34 #include "base/json/string_escape.h" 35 #include "base/lazy_instance.h" 36 #include "base/logging.h" 37 #include "base/memory/singleton.h" 38 #include "base/message_loop/message_loop.h" 39 #include "base/metrics/histogram.h" 40 #include "base/path_service.h" 41 #include "base/stl_util.h" 42 #include "base/strings/string_number_conversions.h" 43 #include "base/strings/string_piece.h" 44 #include "base/strings/string_split.h" 45 #include "base/strings/string_tokenizer.h" 46 #include "base/strings/string_util.h" 47 #include "base/strings/stringprintf.h" 48 #include "base/strings/sys_string_conversions.h" 49 #include "base/strings/utf_offset_string_conversions.h" 50 #include "base/strings/utf_string_conversions.h" 51 #include "base/synchronization/lock.h" 52 #include "base/sys_byteorder.h" 53 #include "base/time/time.h" 54 #include "base/values.h" 55 #include "grit/net_resources.h" 56 #include "url/gurl.h" 57 #include "url/url_canon.h" 58 #include "url/url_canon_ip.h" 59 #include "url/url_parse.h" 60 #if defined(OS_ANDROID) 61 #include "net/android/network_library.h" 62 #endif 63 #include "net/base/dns_util.h" 64 #include "net/base/escape.h" 65 #include "net/base/mime_util.h" 66 #include "net/base/net_module.h" 67 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 68 #if defined(OS_WIN) 69 #include "net/base/winsock_init.h" 70 #endif 71 #include "net/http/http_content_disposition.h" 72 #include "third_party/icu/source/common/unicode/uidna.h" 73 #include "third_party/icu/source/common/unicode/uniset.h" 74 #include "third_party/icu/source/common/unicode/uscript.h" 75 #include "third_party/icu/source/common/unicode/uset.h" 76 #include "third_party/icu/source/i18n/unicode/datefmt.h" 77 #include "third_party/icu/source/i18n/unicode/regex.h" 78 #include "third_party/icu/source/i18n/unicode/ulocdata.h" 79 80 using base::Time; 81 82 namespace net { 83 84 namespace { 85 86 // what we prepend to get a file URL 87 static const base::FilePath::CharType kFileURLPrefix[] = 88 FILE_PATH_LITERAL("file:///"); 89 90 // The general list of blocked ports. Will be blocked unless a specific 91 // protocol overrides it. (Ex: ftp can use ports 20 and 21) 92 static const int kRestrictedPorts[] = { 93 1, // tcpmux 94 7, // echo 95 9, // discard 96 11, // systat 97 13, // daytime 98 15, // netstat 99 17, // qotd 100 19, // chargen 101 20, // ftp data 102 21, // ftp access 103 22, // ssh 104 23, // telnet 105 25, // smtp 106 37, // time 107 42, // name 108 43, // nicname 109 53, // domain 110 77, // priv-rjs 111 79, // finger 112 87, // ttylink 113 95, // supdup 114 101, // hostriame 115 102, // iso-tsap 116 103, // gppitnp 117 104, // acr-nema 118 109, // pop2 119 110, // pop3 120 111, // sunrpc 121 113, // auth 122 115, // sftp 123 117, // uucp-path 124 119, // nntp 125 123, // NTP 126 135, // loc-srv /epmap 127 139, // netbios 128 143, // imap2 129 179, // BGP 130 389, // ldap 131 465, // smtp+ssl 132 512, // print / exec 133 513, // login 134 514, // shell 135 515, // printer 136 526, // tempo 137 530, // courier 138 531, // chat 139 532, // netnews 140 540, // uucp 141 556, // remotefs 142 563, // nntp+ssl 143 587, // stmp? 144 601, // ?? 145 636, // ldap+ssl 146 993, // ldap+ssl 147 995, // pop3+ssl 148 2049, // nfs 149 3659, // apple-sasl / PasswordServer 150 4045, // lockd 151 6000, // X11 152 6665, // Alternate IRC [Apple addition] 153 6666, // Alternate IRC [Apple addition] 154 6667, // Standard IRC [Apple addition] 155 6668, // Alternate IRC [Apple addition] 156 6669, // Alternate IRC [Apple addition] 157 0xFFFF, // Used to block all invalid port numbers (see 158 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) 159 }; 160 161 // FTP overrides the following restricted ports. 162 static const int kAllowedFtpPorts[] = { 163 21, // ftp data 164 22, // ssh 165 }; 166 167 // Does some simple normalization of scripts so we can allow certain scripts 168 // to exist together. 169 // TODO(brettw) bug 880223: we should allow some other languages to be 170 // oombined such as Chinese and Latin. We will probably need a more 171 // complicated system of language pairs to have more fine-grained control. 172 UScriptCode NormalizeScript(UScriptCode code) { 173 switch (code) { 174 case USCRIPT_KATAKANA: 175 case USCRIPT_HIRAGANA: 176 case USCRIPT_KATAKANA_OR_HIRAGANA: 177 case USCRIPT_HANGUL: // This one is arguable. 178 return USCRIPT_HAN; 179 default: 180 return code; 181 } 182 } 183 184 bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) { 185 UScriptCode first_script = USCRIPT_INVALID_CODE; 186 bool is_first = true; 187 188 int i = 0; 189 while (i < str_len) { 190 unsigned code_point; 191 U16_NEXT(str, i, str_len, code_point); 192 193 UErrorCode err = U_ZERO_ERROR; 194 UScriptCode cur_script = uscript_getScript(code_point, &err); 195 if (err != U_ZERO_ERROR) 196 return false; // Report mixed on error. 197 cur_script = NormalizeScript(cur_script); 198 199 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. 200 if (is_first && cur_script != USCRIPT_COMMON) { 201 first_script = cur_script; 202 is_first = false; 203 } else { 204 if (cur_script != USCRIPT_COMMON && cur_script != first_script) 205 return false; 206 } 207 } 208 return true; 209 } 210 211 // Check if the script of a language can be 'safely' mixed with 212 // Latin letters in the ASCII range. 213 bool IsCompatibleWithASCIILetters(const std::string& lang) { 214 // For now, just list Chinese, Japanese and Korean (positive list). 215 // An alternative is negative-listing (languages using Greek and 216 // Cyrillic letters), but it can be more dangerous. 217 return !lang.substr(0, 2).compare("zh") || 218 !lang.substr(0, 2).compare("ja") || 219 !lang.substr(0, 2).compare("ko"); 220 } 221 222 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; 223 224 class LangToExemplarSet { 225 public: 226 static LangToExemplarSet* GetInstance() { 227 return Singleton<LangToExemplarSet>::get(); 228 } 229 230 private: 231 LangToExemplarSetMap map; 232 LangToExemplarSet() { } 233 ~LangToExemplarSet() { 234 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); 235 } 236 237 friend class Singleton<LangToExemplarSet>; 238 friend struct DefaultSingletonTraits<LangToExemplarSet>; 239 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); 240 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); 241 242 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); 243 }; 244 245 bool GetExemplarSetForLang(const std::string& lang, 246 icu::UnicodeSet** lang_set) { 247 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 248 LangToExemplarSetMap::const_iterator pos = map.find(lang); 249 if (pos != map.end()) { 250 *lang_set = pos->second; 251 return true; 252 } 253 return false; 254 } 255 256 void SetExemplarSetForLang(const std::string& lang, 257 icu::UnicodeSet* lang_set) { 258 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 259 map.insert(std::make_pair(lang, lang_set)); 260 } 261 262 static base::LazyInstance<base::Lock>::Leaky 263 g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; 264 265 // Returns true if all the characters in component_characters are used by 266 // the language |lang|. 267 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, 268 const std::string& lang) { 269 CR_DEFINE_STATIC_LOCAL( 270 const icu::UnicodeSet, kASCIILetters, ('a', 'z')); 271 icu::UnicodeSet* lang_set = NULL; 272 // We're called from both the UI thread and the history thread. 273 { 274 base::AutoLock lock(g_lang_set_lock.Get()); 275 if (!GetExemplarSetForLang(lang, &lang_set)) { 276 UErrorCode status = U_ZERO_ERROR; 277 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); 278 // TODO(jungshik) Turn this check on when the ICU data file is 279 // rebuilt with the minimal subset of locale data for languages 280 // to which Chrome is not localized but which we offer in the list 281 // of languages selectable for Accept-Languages. With the rebuilt ICU 282 // data, ulocdata_open never should fall back to the default locale. 283 // (issue 2078) 284 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); 285 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { 286 lang_set = reinterpret_cast<icu::UnicodeSet *>( 287 ulocdata_getExemplarSet(uld, NULL, 0, 288 ULOCDATA_ES_STANDARD, &status)); 289 // If |lang| is compatible with ASCII Latin letters, add them. 290 if (IsCompatibleWithASCIILetters(lang)) 291 lang_set->addAll(kASCIILetters); 292 } else { 293 lang_set = new icu::UnicodeSet(1, 0); 294 } 295 lang_set->freeze(); 296 SetExemplarSetForLang(lang, lang_set); 297 ulocdata_close(uld); 298 } 299 } 300 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); 301 } 302 303 // Returns true if the given Unicode host component is safe to display to the 304 // user. 305 bool IsIDNComponentSafe(const base::char16* str, 306 int str_len, 307 const std::string& languages) { 308 // Most common cases (non-IDN) do not reach here so that we don't 309 // need a fast return path. 310 // TODO(jungshik) : Check if there's any character inappropriate 311 // (although allowed) for domain names. 312 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and 313 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt 314 // For now, we borrow the list from Mozilla and tweaked it slightly. 315 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 316 // they're gonna be canonicalized to U+0020 and full stop before 317 // reaching here.) 318 // The original list is available at 319 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 320 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 321 322 UErrorCode status = U_ZERO_ERROR; 323 #ifdef U_WCHAR_IS_UTF16 324 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 325 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 326 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 327 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 328 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 329 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 330 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 331 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 332 L"[\ufffa-\ufffd]]"), status); 333 DCHECK(U_SUCCESS(status)); 334 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 335 // Lone katakana no, so, or n 336 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 337 // Repeating Japanese accent characters 338 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 339 0, status); 340 #else 341 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 342 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 343 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 344 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 345 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 346 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 347 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 348 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 349 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 350 DCHECK(U_SUCCESS(status)); 351 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 352 // Lone katakana no, so, or n 353 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" 354 // Repeating Japanese accent characters 355 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), 356 0, status); 357 #endif 358 DCHECK(U_SUCCESS(status)); 359 icu::UnicodeSet component_characters; 360 icu::UnicodeString component_string(str, str_len); 361 component_characters.addAll(component_string); 362 if (dangerous_characters.containsSome(component_characters)) 363 return false; 364 365 DCHECK(U_SUCCESS(status)); 366 dangerous_patterns.reset(component_string); 367 if (dangerous_patterns.find()) 368 return false; 369 370 // If the language list is empty, the result is completely determined 371 // by whether a component is a single script or not. This will block 372 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are 373 // allowed with |languages| (while it blocks Chinese + Latin letters with 374 // an accent as should be the case), but we want to err on the safe side 375 // when |languages| is empty. 376 if (languages.empty()) 377 return IsIDNComponentInSingleScript(str, str_len); 378 379 // |common_characters| is made up of ASCII numbers, hyphen, plus and 380 // underscore that are used across scripts and allowed in domain names. 381 // (sync'd with characters allowed in url_canon_host with square 382 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. 383 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), 384 status); 385 DCHECK(U_SUCCESS(status)); 386 // Subtract common characters because they're always allowed so that 387 // we just have to check if a language-specific set contains 388 // the remainder. 389 component_characters.removeAll(common_characters); 390 391 base::StringTokenizer t(languages, ","); 392 while (t.GetNext()) { 393 if (IsComponentCoveredByLang(component_characters, t.token())) 394 return true; 395 } 396 return false; 397 } 398 399 // Converts one component of a host (between dots) to IDN if safe. The result 400 // will be APPENDED to the given output string and will be the same as the input 401 // if it is not IDN or the IDN is unsafe to display. Returns whether any 402 // conversion was performed. 403 bool IDNToUnicodeOneComponent(const base::char16* comp, 404 size_t comp_len, 405 const std::string& languages, 406 base::string16* out) { 407 DCHECK(out); 408 if (comp_len == 0) 409 return false; 410 411 // Only transform if the input can be an IDN component. 412 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 413 if ((comp_len > arraysize(kIdnPrefix)) && 414 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { 415 // Repeatedly expand the output string until it's big enough. It looks like 416 // ICU will return the required size of the buffer, but that's not 417 // documented, so we'll just grow by 2x. This should be rare and is not on a 418 // critical path. 419 size_t original_length = out->length(); 420 for (int extra_space = 64; ; extra_space *= 2) { 421 UErrorCode status = U_ZERO_ERROR; 422 out->resize(out->length() + extra_space); 423 int output_chars = uidna_IDNToUnicode(comp, 424 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, 425 UIDNA_DEFAULT, NULL, &status); 426 if (status == U_ZERO_ERROR) { 427 // Converted successfully. 428 out->resize(original_length + output_chars); 429 if (IsIDNComponentSafe(out->data() + original_length, output_chars, 430 languages)) 431 return true; 432 } 433 434 if (status != U_BUFFER_OVERFLOW_ERROR) 435 break; 436 } 437 // Failed, revert back to original string. 438 out->resize(original_length); 439 } 440 441 // We get here with no IDN or on error, in which case we just append the 442 // literal input. 443 out->append(comp, comp_len); 444 return false; 445 } 446 447 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. 448 void LimitOffsets(const base::string16& str, 449 std::vector<size_t>* offsets_for_adjustment) { 450 if (offsets_for_adjustment) { 451 std::for_each(offsets_for_adjustment->begin(), 452 offsets_for_adjustment->end(), 453 base::LimitOffset<base::string16>(str.length())); 454 } 455 } 456 457 // TODO(brettw) bug 734373: check the scripts for each host component and 458 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for 459 // scripts that the user has installed. For now, just put the entire 460 // path through IDN. Maybe this feature can be implemented in ICU itself? 461 // 462 // We may want to skip this step in the case of file URLs to allow unicode 463 // UNC hostnames regardless of encodings. 464 base::string16 IDNToUnicodeWithOffsets( 465 const std::string& host, 466 const std::string& languages, 467 std::vector<size_t>* offsets_for_adjustment) { 468 // Convert the ASCII input to a base::string16 for ICU. 469 base::string16 input16; 470 input16.reserve(host.length()); 471 input16.insert(input16.end(), host.begin(), host.end()); 472 473 // Do each component of the host separately, since we enforce script matching 474 // on a per-component basis. 475 base::string16 out16; 476 { 477 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 478 for (size_t component_start = 0, component_end; 479 component_start < input16.length(); 480 component_start = component_end + 1) { 481 // Find the end of the component. 482 component_end = input16.find('.', component_start); 483 if (component_end == base::string16::npos) 484 component_end = input16.length(); // For getting the last component. 485 size_t component_length = component_end - component_start; 486 size_t new_component_start = out16.length(); 487 bool converted_idn = false; 488 if (component_end > component_start) { 489 // Add the substring that we just found. 490 converted_idn = IDNToUnicodeOneComponent( 491 input16.data() + component_start, component_length, languages, 492 &out16); 493 } 494 size_t new_component_length = out16.length() - new_component_start; 495 496 if (converted_idn && offsets_for_adjustment) { 497 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start, 498 component_length, new_component_length)); 499 } 500 501 // Need to add the dot we just found (if we found one). 502 if (component_end < input16.length()) 503 out16.push_back('.'); 504 } 505 } 506 507 LimitOffsets(out16, offsets_for_adjustment); 508 return out16; 509 } 510 511 // Transforms |original_offsets| by subtracting |component_begin| from all 512 // offsets. Any offset which was not at least this large to begin with is set 513 // to std::string::npos. 514 std::vector<size_t> OffsetsIntoComponent( 515 const std::vector<size_t>& original_offsets, 516 size_t component_begin) { 517 DCHECK_NE(std::string::npos, component_begin); 518 std::vector<size_t> offsets_into_component(original_offsets); 519 for (std::vector<size_t>::iterator i(offsets_into_component.begin()); 520 i != offsets_into_component.end(); ++i) { 521 if (*i != std::string::npos) 522 *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); 523 } 524 return offsets_into_component; 525 } 526 527 // Called after we transform a component and append it to an output string. 528 // Maps |transformed_offsets|, which represent offsets into the transformed 529 // component itself, into appropriate offsets for the output string, by adding 530 // |output_component_begin| to each. Determines which offsets need mapping by 531 // checking to see which of the |original_offsets| were within the designated 532 // original component, using its provided endpoints. 533 void AdjustForComponentTransform( 534 const std::vector<size_t>& original_offsets, 535 size_t original_component_begin, 536 size_t original_component_end, 537 const std::vector<size_t>& transformed_offsets, 538 size_t output_component_begin, 539 std::vector<size_t>* offsets_for_adjustment) { 540 if (!offsets_for_adjustment) 541 return; 542 543 DCHECK_NE(std::string::npos, original_component_begin); 544 DCHECK_NE(std::string::npos, original_component_end); 545 DCHECK_NE(base::string16::npos, output_component_begin); 546 size_t offsets_size = offsets_for_adjustment->size(); 547 DCHECK_EQ(offsets_size, original_offsets.size()); 548 DCHECK_EQ(offsets_size, transformed_offsets.size()); 549 for (size_t i = 0; i < offsets_size; ++i) { 550 size_t original_offset = original_offsets[i]; 551 if ((original_offset >= original_component_begin) && 552 (original_offset < original_component_end)) { 553 size_t transformed_offset = transformed_offsets[i]; 554 (*offsets_for_adjustment)[i] = 555 (transformed_offset == base::string16::npos) ? 556 base::string16::npos : (output_component_begin + transformed_offset); 557 } 558 } 559 } 560 561 // If |component| is valid, its begin is incremented by |delta|. 562 void AdjustComponent(int delta, url_parse::Component* component) { 563 if (!component->is_valid()) 564 return; 565 566 DCHECK(delta >= 0 || component->begin >= -delta); 567 component->begin += delta; 568 } 569 570 // Adjusts all the components of |parsed| by |delta|, except for the scheme. 571 void AdjustComponents(int delta, url_parse::Parsed* parsed) { 572 AdjustComponent(delta, &(parsed->username)); 573 AdjustComponent(delta, &(parsed->password)); 574 AdjustComponent(delta, &(parsed->host)); 575 AdjustComponent(delta, &(parsed->port)); 576 AdjustComponent(delta, &(parsed->path)); 577 AdjustComponent(delta, &(parsed->query)); 578 AdjustComponent(delta, &(parsed->ref)); 579 } 580 581 // Helper for FormatUrlWithOffsets(). 582 base::string16 FormatViewSourceUrl( 583 const GURL& url, 584 const std::vector<size_t>& original_offsets, 585 const std::string& languages, 586 FormatUrlTypes format_types, 587 UnescapeRule::Type unescape_rules, 588 url_parse::Parsed* new_parsed, 589 size_t* prefix_end, 590 std::vector<size_t>* offsets_for_adjustment) { 591 DCHECK(new_parsed); 592 const char kViewSource[] = "view-source:"; 593 const size_t kViewSourceLength = arraysize(kViewSource) - 1; 594 std::vector<size_t> offsets_into_url( 595 OffsetsIntoComponent(original_offsets, kViewSourceLength)); 596 597 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); 598 base::string16 result(ASCIIToUTF16(kViewSource) + 599 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, 600 new_parsed, prefix_end, &offsets_into_url)); 601 602 // Adjust position values. 603 if (new_parsed->scheme.is_nonempty()) { 604 // Assume "view-source:real-scheme" as a scheme. 605 new_parsed->scheme.len += kViewSourceLength; 606 } else { 607 new_parsed->scheme.begin = 0; 608 new_parsed->scheme.len = kViewSourceLength - 1; 609 } 610 AdjustComponents(kViewSourceLength, new_parsed); 611 if (prefix_end) 612 *prefix_end += kViewSourceLength; 613 AdjustForComponentTransform(original_offsets, kViewSourceLength, 614 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, 615 offsets_for_adjustment); 616 LimitOffsets(result, offsets_for_adjustment); 617 return result; 618 } 619 620 class AppendComponentTransform { 621 public: 622 AppendComponentTransform() {} 623 virtual ~AppendComponentTransform() {} 624 625 virtual base::string16 Execute( 626 const std::string& component_text, 627 std::vector<size_t>* offsets_into_component) const = 0; 628 629 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an 630 // accessible copy constructor in order to call AppendFormattedComponent() 631 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). 632 }; 633 634 class HostComponentTransform : public AppendComponentTransform { 635 public: 636 explicit HostComponentTransform(const std::string& languages) 637 : languages_(languages) { 638 } 639 640 private: 641 virtual base::string16 Execute( 642 const std::string& component_text, 643 std::vector<size_t>* offsets_into_component) const OVERRIDE { 644 return IDNToUnicodeWithOffsets(component_text, languages_, 645 offsets_into_component); 646 } 647 648 const std::string& languages_; 649 }; 650 651 class NonHostComponentTransform : public AppendComponentTransform { 652 public: 653 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) 654 : unescape_rules_(unescape_rules) { 655 } 656 657 private: 658 virtual base::string16 Execute( 659 const std::string& component_text, 660 std::vector<size_t>* offsets_into_component) const OVERRIDE { 661 return (unescape_rules_ == UnescapeRule::NONE) ? 662 base::UTF8ToUTF16AndAdjustOffsets(component_text, 663 offsets_into_component) : 664 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, 665 unescape_rules_, offsets_into_component); 666 } 667 668 const UnescapeRule::Type unescape_rules_; 669 }; 670 671 void AppendFormattedComponent(const std::string& spec, 672 const url_parse::Component& original_component, 673 const std::vector<size_t>& original_offsets, 674 const AppendComponentTransform& transform, 675 base::string16* output, 676 url_parse::Component* output_component, 677 std::vector<size_t>* offsets_for_adjustment) { 678 DCHECK(output); 679 if (original_component.is_nonempty()) { 680 size_t original_component_begin = 681 static_cast<size_t>(original_component.begin); 682 size_t output_component_begin = output->length(); 683 if (output_component) 684 output_component->begin = static_cast<int>(output_component_begin); 685 686 std::vector<size_t> offsets_into_component = 687 OffsetsIntoComponent(original_offsets, original_component_begin); 688 output->append(transform.Execute(std::string(spec, original_component_begin, 689 static_cast<size_t>(original_component.len)), &offsets_into_component)); 690 691 if (output_component) { 692 output_component->len = 693 static_cast<int>(output->length() - output_component_begin); 694 } 695 AdjustForComponentTransform(original_offsets, original_component_begin, 696 static_cast<size_t>(original_component.end()), 697 offsets_into_component, output_component_begin, 698 offsets_for_adjustment); 699 } else if (output_component) { 700 output_component->reset(); 701 } 702 } 703 704 void SanitizeGeneratedFileName(base::FilePath::StringType* filename, 705 bool replace_trailing) { 706 const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-"); 707 if (filename->empty()) 708 return; 709 if (replace_trailing) { 710 // Handle CreateFile() stripping trailing dots and spaces on filenames 711 // http://support.microsoft.com/kb/115827 712 size_t length = filename->size(); 713 size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" .")); 714 filename->resize((pos == std::string::npos) ? 0 : (pos + 1)); 715 TrimWhitespace(*filename, TRIM_TRAILING, filename); 716 if (filename->empty()) 717 return; 718 size_t trimmed = length - filename->size(); 719 if (trimmed) 720 filename->insert(filename->end(), trimmed, kReplace[0]); 721 } 722 TrimString(*filename, FILE_PATH_LITERAL("."), filename); 723 if (filename->empty()) 724 return; 725 // Replace any path information by changing path separators. 726 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace); 727 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace); 728 } 729 730 // Returns the filename determined from the last component of the path portion 731 // of the URL. Returns an empty string if the URL doesn't have a path or is 732 // invalid. If the generated filename is not reliable, 733 // |should_overwrite_extension| will be set to true, in which case a better 734 // extension should be determined based on the content type. 735 std::string GetFileNameFromURL(const GURL& url, 736 const std::string& referrer_charset, 737 bool* should_overwrite_extension) { 738 // about: and data: URLs don't have file names, but esp. data: URLs may 739 // contain parts that look like ones (i.e., contain a slash). Therefore we 740 // don't attempt to divine a file name out of them. 741 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) 742 return std::string(); 743 744 const std::string unescaped_url_filename = UnescapeURLComponent( 745 url.ExtractFileName(), 746 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 747 748 // The URL's path should be escaped UTF-8, but may not be. 749 std::string decoded_filename = unescaped_url_filename; 750 if (!IsStringUTF8(decoded_filename)) { 751 // TODO(jshin): this is probably not robust enough. To be sure, we need 752 // encoding detection. 753 base::string16 utf16_output; 754 if (!referrer_charset.empty() && 755 base::CodepageToUTF16(unescaped_url_filename, 756 referrer_charset.c_str(), 757 base::OnStringConversionError::FAIL, 758 &utf16_output)) { 759 decoded_filename = UTF16ToUTF8(utf16_output); 760 } else { 761 decoded_filename = WideToUTF8( 762 base::SysNativeMBToWide(unescaped_url_filename)); 763 } 764 } 765 // If the URL contains a (possibly empty) query, assume it is a generator, and 766 // allow the determined extension to be overwritten. 767 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); 768 769 return decoded_filename; 770 } 771 772 // Returns whether the specified extension is automatically integrated into the 773 // windows shell. 774 bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) { 775 base::FilePath::StringType extension_lower = StringToLowerASCII(extension); 776 777 // http://msdn.microsoft.com/en-us/library/ms811694.aspx 778 // Right-clicking on shortcuts can be magical. 779 if ((extension_lower == FILE_PATH_LITERAL("local")) || 780 (extension_lower == FILE_PATH_LITERAL("lnk"))) 781 return true; 782 783 // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html 784 // Files become magical if they end in a CLSID, so block such extensions. 785 if (!extension_lower.empty() && 786 (extension_lower[0] == FILE_PATH_LITERAL('{')) && 787 (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}'))) 788 return true; 789 return false; 790 } 791 792 // Returns whether the specified file name is a reserved name on windows. 793 // This includes names like "com2.zip" (which correspond to devices) and 794 // desktop.ini and thumbs.db which have special meaning to the windows shell. 795 bool IsReservedName(const base::FilePath::StringType& filename) { 796 // This list is taken from the MSDN article "Naming a file" 797 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx 798 // I also added clock$ because GetSaveFileName seems to consider it as a 799 // reserved name too. 800 static const char* const known_devices[] = { 801 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", 802 "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", 803 "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$" 804 }; 805 #if defined(OS_WIN) 806 std::string filename_lower = StringToLowerASCII(WideToUTF8(filename)); 807 #elif defined(OS_POSIX) 808 std::string filename_lower = StringToLowerASCII(filename); 809 #endif 810 811 for (size_t i = 0; i < arraysize(known_devices); ++i) { 812 // Exact match. 813 if (filename_lower == known_devices[i]) 814 return true; 815 // Starts with "DEVICE.". 816 if (filename_lower.find(std::string(known_devices[i]) + ".") == 0) 817 return true; 818 } 819 820 static const char* const magic_names[] = { 821 // These file names are used by the "Customize folder" feature of the shell. 822 "desktop.ini", 823 "thumbs.db", 824 }; 825 826 for (size_t i = 0; i < arraysize(magic_names); ++i) { 827 if (filename_lower == magic_names[i]) 828 return true; 829 } 830 831 return false; 832 } 833 834 // Examines the current extension in |file_name| and modifies it if necessary in 835 // order to ensure the filename is safe. If |file_name| doesn't contain an 836 // extension or if |ignore_extension| is true, then a new extension will be 837 // constructed based on the |mime_type|. 838 // 839 // We're addressing two things here: 840 // 841 // 1) Usability. If there is no reliable file extension, we want to guess a 842 // reasonable file extension based on the content type. 843 // 844 // 2) Shell integration. Some file extensions automatically integrate with the 845 // shell. We block these extensions to prevent a malicious web site from 846 // integrating with the user's shell. 847 void EnsureSafeExtension(const std::string& mime_type, 848 bool ignore_extension, 849 base::FilePath* file_name) { 850 // See if our file name already contains an extension. 851 base::FilePath::StringType extension = file_name->Extension(); 852 if (!extension.empty()) 853 extension.erase(extension.begin()); // Erase preceding '.'. 854 855 if ((ignore_extension || extension.empty()) && !mime_type.empty()) { 856 base::FilePath::StringType preferred_mime_extension; 857 std::vector<base::FilePath::StringType> all_mime_extensions; 858 // The GetPreferredExtensionForMimeType call will end up going to disk. Do 859 // this on another thread to avoid slowing the IO thread. 860 // http://crbug.com/61827 861 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched 862 // over to IO safe threads. 863 base::ThreadRestrictions::ScopedAllowIO allow_io; 864 net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension); 865 net::GetExtensionsForMimeType(mime_type, &all_mime_extensions); 866 // If the existing extension is in the list of valid extensions for the 867 // given type, use it. This avoids doing things like pointlessly renaming 868 // "foo.jpg" to "foo.jpeg". 869 if (std::find(all_mime_extensions.begin(), 870 all_mime_extensions.end(), 871 extension) != all_mime_extensions.end()) { 872 // leave |extension| alone 873 } else if (!preferred_mime_extension.empty()) { 874 extension = preferred_mime_extension; 875 } 876 } 877 878 #if defined(OS_WIN) 879 static const base::FilePath::CharType default_extension[] = 880 FILE_PATH_LITERAL("download"); 881 882 // Rename shell-integrated extensions. 883 // TODO(asanka): Consider stripping out the bad extension and replacing it 884 // with the preferred extension for the MIME type if one is available. 885 if (IsShellIntegratedExtension(extension)) 886 extension.assign(default_extension); 887 #endif 888 889 *file_name = file_name->ReplaceExtension(extension); 890 } 891 892 bool FilePathToString16(const base::FilePath& path, base::string16* converted) { 893 #if defined(OS_WIN) 894 return WideToUTF16(path.value().c_str(), path.value().size(), converted); 895 #elif defined(OS_POSIX) 896 std::string component8 = path.AsUTF8Unsafe(); 897 return !component8.empty() && 898 UTF8ToUTF16(component8.c_str(), component8.size(), converted); 899 #endif 900 } 901 902 } // namespace 903 904 const FormatUrlType kFormatUrlOmitNothing = 0; 905 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 906 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 907 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 908 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | 909 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; 910 911 static base::LazyInstance<std::multiset<int> >::Leaky 912 g_explicitly_allowed_ports = LAZY_INSTANCE_INITIALIZER; 913 914 size_t GetCountOfExplicitlyAllowedPorts() { 915 return g_explicitly_allowed_ports.Get().size(); 916 } 917 918 GURL FilePathToFileURL(const base::FilePath& path) { 919 // Produce a URL like "file:///C:/foo" for a regular file, or 920 // "file://///server/path" for UNC. The URL canonicalizer will fix up the 921 // latter case to be the canonical UNC form: "file://server/path" 922 base::FilePath::StringType url_string(kFileURLPrefix); 923 url_string.append(path.value()); 924 925 // Now do replacement of some characters. Since we assume the input is a 926 // literal filename, anything the URL parser might consider special should 927 // be escaped here. 928 929 // must be the first substitution since others will introduce percents as the 930 // escape character 931 ReplaceSubstringsAfterOffset(&url_string, 0, 932 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); 933 934 // semicolon is supposed to be some kind of separator according to RFC 2396 935 ReplaceSubstringsAfterOffset(&url_string, 0, 936 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); 937 938 ReplaceSubstringsAfterOffset(&url_string, 0, 939 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); 940 941 ReplaceSubstringsAfterOffset(&url_string, 0, 942 FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F")); 943 944 #if defined(OS_POSIX) 945 ReplaceSubstringsAfterOffset(&url_string, 0, 946 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); 947 #endif 948 949 return GURL(url_string); 950 } 951 952 std::string GetSpecificHeader(const std::string& headers, 953 const std::string& name) { 954 // We want to grab the Value from the "Key: Value" pairs in the headers, 955 // which should look like this (no leading spaces, \n-separated) (we format 956 // them this way in url_request_inet.cc): 957 // HTTP/1.1 200 OK\n 958 // ETag: "6d0b8-947-24f35ec0"\n 959 // Content-Length: 2375\n 960 // Content-Type: text/html; charset=UTF-8\n 961 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n 962 if (headers.empty()) 963 return std::string(); 964 965 std::string match('\n' + name + ':'); 966 967 std::string::const_iterator begin = 968 std::search(headers.begin(), headers.end(), match.begin(), match.end(), 969 base::CaseInsensitiveCompareASCII<char>()); 970 971 if (begin == headers.end()) 972 return std::string(); 973 974 begin += match.length(); 975 976 std::string ret; 977 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')), 978 TRIM_ALL, &ret); 979 return ret; 980 } 981 982 base::string16 IDNToUnicode(const std::string& host, 983 const std::string& languages) { 984 return IDNToUnicodeWithOffsets(host, languages, NULL); 985 } 986 987 std::string CanonicalizeHost(const std::string& host, 988 url_canon::CanonHostInfo* host_info) { 989 // Try to canonicalize the host. 990 const url_parse::Component raw_host_component( 991 0, static_cast<int>(host.length())); 992 std::string canon_host; 993 url_canon::StdStringCanonOutput canon_host_output(&canon_host); 994 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, 995 &canon_host_output, host_info); 996 997 if (host_info->out_host.is_nonempty() && 998 host_info->family != url_canon::CanonHostInfo::BROKEN) { 999 // Success! Assert that there's no extra garbage. 1000 canon_host_output.Complete(); 1001 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); 1002 } else { 1003 // Empty host, or canonicalization failed. We'll return empty. 1004 canon_host.clear(); 1005 } 1006 1007 return canon_host; 1008 } 1009 1010 std::string GetDirectoryListingHeader(const base::string16& title) { 1011 static const base::StringPiece header( 1012 NetModule::GetResource(IDR_DIR_HEADER_HTML)); 1013 // This can be null in unit tests. 1014 DLOG_IF(WARNING, header.empty()) << 1015 "Missing resource: directory listing header"; 1016 1017 std::string result; 1018 if (!header.empty()) 1019 result.assign(header.data(), header.size()); 1020 1021 result.append("<script>start("); 1022 base::JsonDoubleQuote(title, true, &result); 1023 result.append(");</script>\n"); 1024 1025 return result; 1026 } 1027 1028 inline bool IsHostCharAlphanumeric(char c) { 1029 // We can just check lowercase because uppercase characters have already been 1030 // normalized. 1031 return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')); 1032 } 1033 1034 bool IsCanonicalizedHostCompliant(const std::string& host, 1035 const std::string& desired_tld) { 1036 if (host.empty()) 1037 return false; 1038 1039 bool in_component = false; 1040 bool most_recent_component_started_alphanumeric = false; 1041 bool last_char_was_underscore = false; 1042 1043 for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { 1044 const char c = *i; 1045 if (!in_component) { 1046 most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c); 1047 if (!most_recent_component_started_alphanumeric && (c != '-')) 1048 return false; 1049 in_component = true; 1050 } else { 1051 if (c == '.') { 1052 if (last_char_was_underscore) 1053 return false; 1054 in_component = false; 1055 } else if (IsHostCharAlphanumeric(c) || (c == '-')) { 1056 last_char_was_underscore = false; 1057 } else if (c == '_') { 1058 last_char_was_underscore = true; 1059 } else { 1060 return false; 1061 } 1062 } 1063 } 1064 1065 return most_recent_component_started_alphanumeric || 1066 (!desired_tld.empty() && IsHostCharAlphanumeric(desired_tld[0])); 1067 } 1068 1069 std::string GetDirectoryListingEntry(const base::string16& name, 1070 const std::string& raw_bytes, 1071 bool is_dir, 1072 int64 size, 1073 Time modified) { 1074 std::string result; 1075 result.append("<script>addRow("); 1076 base::JsonDoubleQuote(name, true, &result); 1077 result.append(","); 1078 if (raw_bytes.empty()) { 1079 base::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)), 1080 true, &result); 1081 } else { 1082 base::JsonDoubleQuote(EscapePath(raw_bytes), true, &result); 1083 } 1084 if (is_dir) { 1085 result.append(",1,"); 1086 } else { 1087 result.append(",0,"); 1088 } 1089 1090 // Negative size means unknown or not applicable (e.g. directory). 1091 base::string16 size_string; 1092 if (size >= 0) 1093 size_string = FormatBytesUnlocalized(size); 1094 base::JsonDoubleQuote(size_string, true, &result); 1095 1096 result.append(","); 1097 1098 base::string16 modified_str; 1099 // |modified| can be NULL in FTP listings. 1100 if (!modified.is_null()) { 1101 modified_str = base::TimeFormatShortDateAndTime(modified); 1102 } 1103 base::JsonDoubleQuote(modified_str, true, &result); 1104 1105 result.append(");</script>\n"); 1106 1107 return result; 1108 } 1109 1110 base::string16 StripWWW(const base::string16& text) { 1111 const base::string16 www(ASCIIToUTF16("www.")); 1112 return StartsWith(text, www, true) ? text.substr(www.length()) : text; 1113 } 1114 1115 base::string16 StripWWWFromHost(const GURL& url) { 1116 DCHECK(url.is_valid()); 1117 return StripWWW(ASCIIToUTF16(url.host())); 1118 } 1119 1120 bool IsSafePortablePathComponent(const base::FilePath& component) { 1121 base::string16 component16; 1122 base::FilePath::StringType sanitized = component.value(); 1123 SanitizeGeneratedFileName(&sanitized, true); 1124 base::FilePath::StringType extension = component.Extension(); 1125 if (!extension.empty()) 1126 extension.erase(extension.begin()); // Erase preceding '.'. 1127 return !component.empty() && 1128 (component == component.BaseName()) && 1129 (component == component.StripTrailingSeparators()) && 1130 FilePathToString16(component, &component16) && 1131 file_util::IsFilenameLegal(component16) && 1132 !IsShellIntegratedExtension(extension) && 1133 (sanitized == component.value()); 1134 } 1135 1136 bool IsSafePortableBasename(const base::FilePath& filename) { 1137 return IsSafePortablePathComponent(filename) && 1138 !IsReservedName(filename.value()); 1139 } 1140 1141 bool IsSafePortableRelativePath(const base::FilePath& path) { 1142 if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator()) 1143 return false; 1144 std::vector<base::FilePath::StringType> components; 1145 path.GetComponents(&components); 1146 if (components.empty()) 1147 return false; 1148 for (size_t i = 0; i < components.size() - 1; ++i) { 1149 if (!IsSafePortablePathComponent(base::FilePath(components[i]))) 1150 return false; 1151 } 1152 return IsSafePortableBasename(path.BaseName()); 1153 } 1154 1155 void GenerateSafeFileName(const std::string& mime_type, 1156 bool ignore_extension, 1157 base::FilePath* file_path) { 1158 // Make sure we get the right file extension 1159 EnsureSafeExtension(mime_type, ignore_extension, file_path); 1160 1161 #if defined(OS_WIN) 1162 // Prepend "_" to the file name if it's a reserved name 1163 base::FilePath::StringType leaf_name = file_path->BaseName().value(); 1164 DCHECK(!leaf_name.empty()); 1165 if (IsReservedName(leaf_name)) { 1166 leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; 1167 *file_path = file_path->DirName(); 1168 if (file_path->value() == base::FilePath::kCurrentDirectory) { 1169 *file_path = base::FilePath(leaf_name); 1170 } else { 1171 *file_path = file_path->Append(leaf_name); 1172 } 1173 } 1174 #endif 1175 } 1176 1177 base::string16 GetSuggestedFilename(const GURL& url, 1178 const std::string& content_disposition, 1179 const std::string& referrer_charset, 1180 const std::string& suggested_name, 1181 const std::string& mime_type, 1182 const std::string& default_name) { 1183 // TODO: this function to be updated to match the httpbis recommendations. 1184 // Talk to abarth for the latest news. 1185 1186 // We don't translate this fallback string, "download". If localization is 1187 // needed, the caller should provide localized fallback in |default_name|. 1188 static const base::FilePath::CharType kFinalFallbackName[] = 1189 FILE_PATH_LITERAL("download"); 1190 std::string filename; // In UTF-8 1191 bool overwrite_extension = false; 1192 1193 // Try to extract a filename from content-disposition first. 1194 if (!content_disposition.empty()) { 1195 HttpContentDisposition header(content_disposition, referrer_charset); 1196 filename = header.filename(); 1197 } 1198 1199 // Then try to use the suggested name. 1200 if (filename.empty() && !suggested_name.empty()) 1201 filename = suggested_name; 1202 1203 // Now try extracting the filename from the URL. GetFileNameFromURL() only 1204 // looks at the last component of the URL and doesn't return the hostname as a 1205 // failover. 1206 if (filename.empty()) 1207 filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension); 1208 1209 // Finally try the URL hostname, but only if there's no default specified in 1210 // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a 1211 // host name. 1212 if (filename.empty() && 1213 default_name.empty() && 1214 url.is_valid() && 1215 !url.host().empty()) { 1216 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) 1217 filename = url.host(); 1218 } 1219 1220 bool replace_trailing = false; 1221 base::FilePath::StringType result_str, default_name_str; 1222 #if defined(OS_WIN) 1223 replace_trailing = true; 1224 result_str = UTF8ToUTF16(filename); 1225 default_name_str = UTF8ToUTF16(default_name); 1226 #else 1227 result_str = filename; 1228 default_name_str = default_name; 1229 #endif 1230 SanitizeGeneratedFileName(&result_str, replace_trailing); 1231 if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) == 1232 base::FilePath::StringType::npos) { 1233 result_str = !default_name_str.empty() ? default_name_str : 1234 base::FilePath::StringType(kFinalFallbackName); 1235 overwrite_extension = false; 1236 } 1237 file_util::ReplaceIllegalCharactersInPath(&result_str, '-'); 1238 base::FilePath result(result_str); 1239 GenerateSafeFileName(mime_type, overwrite_extension, &result); 1240 1241 base::string16 result16; 1242 if (!FilePathToString16(result, &result16)) { 1243 result = base::FilePath(default_name_str); 1244 if (!FilePathToString16(result, &result16)) { 1245 result = base::FilePath(kFinalFallbackName); 1246 FilePathToString16(result, &result16); 1247 } 1248 } 1249 return result16; 1250 } 1251 1252 base::FilePath GenerateFileName(const GURL& url, 1253 const std::string& content_disposition, 1254 const std::string& referrer_charset, 1255 const std::string& suggested_name, 1256 const std::string& mime_type, 1257 const std::string& default_file_name) { 1258 base::string16 file_name = GetSuggestedFilename(url, 1259 content_disposition, 1260 referrer_charset, 1261 suggested_name, 1262 mime_type, 1263 default_file_name); 1264 1265 #if defined(OS_WIN) 1266 base::FilePath generated_name(file_name); 1267 #else 1268 base::FilePath generated_name( 1269 base::SysWideToNativeMB(UTF16ToWide(file_name))); 1270 #endif 1271 1272 #if defined(OS_CHROMEOS) 1273 // When doing file manager operations on ChromeOS, the file paths get 1274 // normalized in WebKit layer, so let's ensure downloaded files have 1275 // normalized names. Otherwise, we won't be able to handle files with NFD 1276 // utf8 encoded characters in name. 1277 file_util::NormalizeFileNameEncoding(&generated_name); 1278 #endif 1279 1280 DCHECK(!generated_name.empty()); 1281 1282 return generated_name; 1283 } 1284 1285 bool IsPortAllowedByDefault(int port) { 1286 int array_size = arraysize(kRestrictedPorts); 1287 for (int i = 0; i < array_size; i++) { 1288 if (kRestrictedPorts[i] == port) { 1289 return false; 1290 } 1291 } 1292 return true; 1293 } 1294 1295 bool IsPortAllowedByFtp(int port) { 1296 int array_size = arraysize(kAllowedFtpPorts); 1297 for (int i = 0; i < array_size; i++) { 1298 if (kAllowedFtpPorts[i] == port) { 1299 return true; 1300 } 1301 } 1302 // Port not explicitly allowed by FTP, so return the default restrictions. 1303 return IsPortAllowedByDefault(port); 1304 } 1305 1306 bool IsPortAllowedByOverride(int port) { 1307 if (g_explicitly_allowed_ports.Get().empty()) 1308 return false; 1309 1310 return g_explicitly_allowed_ports.Get().count(port) > 0; 1311 } 1312 1313 int SetNonBlocking(int fd) { 1314 #if defined(OS_WIN) 1315 unsigned long no_block = 1; 1316 return ioctlsocket(fd, FIONBIO, &no_block); 1317 #elif defined(OS_POSIX) 1318 int flags = fcntl(fd, F_GETFL, 0); 1319 if (-1 == flags) 1320 return flags; 1321 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 1322 #endif 1323 } 1324 1325 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 1326 std::string::const_iterator host_and_port_end, 1327 std::string* host, 1328 int* port) { 1329 if (host_and_port_begin >= host_and_port_end) 1330 return false; 1331 1332 // When using url_parse, we use char*. 1333 const char* auth_begin = &(*host_and_port_begin); 1334 int auth_len = host_and_port_end - host_and_port_begin; 1335 1336 url_parse::Component auth_component(0, auth_len); 1337 url_parse::Component username_component; 1338 url_parse::Component password_component; 1339 url_parse::Component hostname_component; 1340 url_parse::Component port_component; 1341 1342 url_parse::ParseAuthority(auth_begin, auth_component, &username_component, 1343 &password_component, &hostname_component, &port_component); 1344 1345 // There shouldn't be a username/password. 1346 if (username_component.is_valid() || password_component.is_valid()) 1347 return false; 1348 1349 if (!hostname_component.is_nonempty()) 1350 return false; // Failed parsing. 1351 1352 int parsed_port_number = -1; 1353 if (port_component.is_nonempty()) { 1354 parsed_port_number = url_parse::ParsePort(auth_begin, port_component); 1355 1356 // If parsing failed, port_number will be either PORT_INVALID or 1357 // PORT_UNSPECIFIED, both of which are negative. 1358 if (parsed_port_number < 0) 1359 return false; // Failed parsing the port number. 1360 } 1361 1362 if (port_component.len == 0) 1363 return false; // Reject inputs like "foo:" 1364 1365 // Pass results back to caller. 1366 host->assign(auth_begin + hostname_component.begin, hostname_component.len); 1367 *port = parsed_port_number; 1368 1369 return true; // Success. 1370 } 1371 1372 bool ParseHostAndPort(const std::string& host_and_port, 1373 std::string* host, 1374 int* port) { 1375 return ParseHostAndPort( 1376 host_and_port.begin(), host_and_port.end(), host, port); 1377 } 1378 1379 std::string GetHostAndPort(const GURL& url) { 1380 // For IPv6 literals, GURL::host() already includes the brackets so it is 1381 // safe to just append a colon. 1382 return base::StringPrintf("%s:%d", url.host().c_str(), 1383 url.EffectiveIntPort()); 1384 } 1385 1386 std::string GetHostAndOptionalPort(const GURL& url) { 1387 // For IPv6 literals, GURL::host() already includes the brackets 1388 // so it is safe to just append a colon. 1389 if (url.has_port()) 1390 return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str()); 1391 return url.host(); 1392 } 1393 1394 // static 1395 bool IsHostnameNonUnique(const std::string& hostname) { 1396 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address. 1397 const std::string host_or_ip = hostname.find(':') != std::string::npos ? 1398 "[" + hostname + "]" : hostname; 1399 url_canon::CanonHostInfo host_info; 1400 std::string canonical_name = CanonicalizeHost(host_or_ip, &host_info); 1401 1402 // If canonicalization fails, then the input is truly malformed. However, 1403 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique. 1404 if (canonical_name.empty()) 1405 return false; 1406 1407 // If |hostname| is an IP address, presume it's unique. 1408 // TODO(rsleevi): In the future, this should also reject IP addresses in 1409 // IANA-reserved ranges. 1410 if (host_info.IsIPAddress()) 1411 return false; 1412 1413 // Check for a registry controlled portion of |hostname|, ignoring private 1414 // registries, as they already chain to ICANN-administered registries, 1415 // and explicitly ignoring unknown registries. 1416 // 1417 // Note: This means that as new gTLDs are introduced on the Internet, they 1418 // will be treated as non-unique until the registry controlled domain list 1419 // is updated. However, because gTLDs are expected to provide significant 1420 // advance notice to deprecate older versions of this code, this an 1421 // acceptable tradeoff. 1422 return 0 == registry_controlled_domains::GetRegistryLength( 1423 canonical_name, 1424 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 1425 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 1426 } 1427 1428 // Extracts the address and port portions of a sockaddr. 1429 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr, 1430 socklen_t sock_addr_len, 1431 const uint8** address, 1432 size_t* address_len, 1433 uint16* port) { 1434 if (sock_addr->sa_family == AF_INET) { 1435 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in))) 1436 return false; 1437 const struct sockaddr_in* addr = 1438 reinterpret_cast<const struct sockaddr_in*>(sock_addr); 1439 *address = reinterpret_cast<const uint8*>(&addr->sin_addr); 1440 *address_len = kIPv4AddressSize; 1441 if (port) 1442 *port = base::NetToHost16(addr->sin_port); 1443 return true; 1444 } 1445 1446 if (sock_addr->sa_family == AF_INET6) { 1447 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in6))) 1448 return false; 1449 const struct sockaddr_in6* addr = 1450 reinterpret_cast<const struct sockaddr_in6*>(sock_addr); 1451 *address = reinterpret_cast<const unsigned char*>(&addr->sin6_addr); 1452 *address_len = kIPv6AddressSize; 1453 if (port) 1454 *port = base::NetToHost16(addr->sin6_port); 1455 return true; 1456 } 1457 1458 return false; // Unrecognized |sa_family|. 1459 } 1460 1461 std::string IPAddressToString(const uint8* address, 1462 size_t address_len) { 1463 std::string str; 1464 url_canon::StdStringCanonOutput output(&str); 1465 1466 if (address_len == kIPv4AddressSize) { 1467 url_canon::AppendIPv4Address(address, &output); 1468 } else if (address_len == kIPv6AddressSize) { 1469 url_canon::AppendIPv6Address(address, &output); 1470 } else { 1471 CHECK(false) << "Invalid IP address with length: " << address_len; 1472 } 1473 1474 output.Complete(); 1475 return str; 1476 } 1477 1478 std::string IPAddressToStringWithPort(const uint8* address, 1479 size_t address_len, 1480 uint16 port) { 1481 std::string address_str = IPAddressToString(address, address_len); 1482 1483 if (address_len == kIPv6AddressSize) { 1484 // Need to bracket IPv6 addresses since they contain colons. 1485 return base::StringPrintf("[%s]:%d", address_str.c_str(), port); 1486 } 1487 return base::StringPrintf("%s:%d", address_str.c_str(), port); 1488 } 1489 1490 std::string NetAddressToString(const struct sockaddr* sa, 1491 socklen_t sock_addr_len) { 1492 const uint8* address; 1493 size_t address_len; 1494 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address, 1495 &address_len, NULL)) { 1496 NOTREACHED(); 1497 return std::string(); 1498 } 1499 return IPAddressToString(address, address_len); 1500 } 1501 1502 std::string NetAddressToStringWithPort(const struct sockaddr* sa, 1503 socklen_t sock_addr_len) { 1504 const uint8* address; 1505 size_t address_len; 1506 uint16 port; 1507 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address, 1508 &address_len, &port)) { 1509 NOTREACHED(); 1510 return std::string(); 1511 } 1512 return IPAddressToStringWithPort(address, address_len, port); 1513 } 1514 1515 std::string IPAddressToString(const IPAddressNumber& addr) { 1516 return IPAddressToString(&addr.front(), addr.size()); 1517 } 1518 1519 std::string IPAddressToStringWithPort(const IPAddressNumber& addr, 1520 uint16 port) { 1521 return IPAddressToStringWithPort(&addr.front(), addr.size(), port); 1522 } 1523 1524 std::string GetHostName() { 1525 #if defined(OS_WIN) 1526 EnsureWinsockInit(); 1527 #endif 1528 1529 // Host names are limited to 255 bytes. 1530 char buffer[256]; 1531 int result = gethostname(buffer, sizeof(buffer)); 1532 if (result != 0) { 1533 DVLOG(1) << "gethostname() failed with " << result; 1534 buffer[0] = '\0'; 1535 } 1536 return std::string(buffer); 1537 } 1538 1539 void GetIdentityFromURL(const GURL& url, 1540 base::string16* username, 1541 base::string16* password) { 1542 UnescapeRule::Type flags = 1543 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; 1544 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); 1545 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); 1546 } 1547 1548 std::string GetHostOrSpecFromURL(const GURL& url) { 1549 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); 1550 } 1551 1552 void AppendFormattedHost(const GURL& url, 1553 const std::string& languages, 1554 base::string16* output) { 1555 std::vector<size_t> offsets; 1556 AppendFormattedComponent(url.possibly_invalid_spec(), 1557 url.parsed_for_possibly_invalid_spec().host, offsets, 1558 HostComponentTransform(languages), output, NULL, NULL); 1559 } 1560 1561 base::string16 FormatUrlWithOffsets( 1562 const GURL& url, 1563 const std::string& languages, 1564 FormatUrlTypes format_types, 1565 UnescapeRule::Type unescape_rules, 1566 url_parse::Parsed* new_parsed, 1567 size_t* prefix_end, 1568 std::vector<size_t>* offsets_for_adjustment) { 1569 url_parse::Parsed parsed_temp; 1570 if (!new_parsed) 1571 new_parsed = &parsed_temp; 1572 else 1573 *new_parsed = url_parse::Parsed(); 1574 std::vector<size_t> original_offsets; 1575 if (offsets_for_adjustment) 1576 original_offsets = *offsets_for_adjustment; 1577 1578 // Special handling for view-source:. Don't use content::kViewSourceScheme 1579 // because this library shouldn't depend on chrome. 1580 const char* const kViewSource = "view-source"; 1581 // Reject "view-source:view-source:..." to avoid deep recursion. 1582 const char* const kViewSourceTwice = "view-source:view-source:"; 1583 if (url.SchemeIs(kViewSource) && 1584 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { 1585 return FormatViewSourceUrl(url, original_offsets, languages, format_types, 1586 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); 1587 } 1588 1589 // We handle both valid and invalid URLs (this will give us the spec 1590 // regardless of validity). 1591 const std::string& spec = url.possibly_invalid_spec(); 1592 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 1593 1594 // Scheme & separators. These are ASCII. 1595 base::string16 url_string; 1596 url_string.insert(url_string.end(), spec.begin(), 1597 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, 1598 true)); 1599 const char kHTTP[] = "http://"; 1600 const char kFTP[] = "ftp."; 1601 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This 1602 // means that if we trim "http://" off a URL whose host starts with "ftp." and 1603 // the user inputs this into any field subject to fixup (which is basically 1604 // all input fields), the meaning would be changed. (In fact, often the 1605 // formatted URL is directly pre-filled into an input field.) For this reason 1606 // we avoid stripping "http://" in this case. 1607 bool omit_http = (format_types & kFormatUrlOmitHTTP) && 1608 EqualsASCII(url_string, kHTTP) && 1609 !StartsWithASCII(url.host(), kFTP, true); 1610 new_parsed->scheme = parsed.scheme; 1611 1612 // Username & password. 1613 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { 1614 // Remove the username and password fields. We don't want to display those 1615 // to the user since they can be used for attacks, 1616 // e.g. "http://google.com:search@evil.ru/" 1617 new_parsed->username.reset(); 1618 new_parsed->password.reset(); 1619 // Update the offsets based on removed username and/or password. 1620 if (offsets_for_adjustment && !offsets_for_adjustment->empty() && 1621 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { 1622 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 1623 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { 1624 // The seeming off-by-one and off-by-two in these first two lines are to 1625 // account for the ':' after the username and '@' after the password. 1626 offset_adjuster.Add(base::OffsetAdjuster::Adjustment( 1627 static_cast<size_t>(parsed.username.begin), 1628 static_cast<size_t>(parsed.username.len + parsed.password.len + 2), 1629 0)); 1630 } else { 1631 const url_parse::Component* nonempty_component = 1632 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; 1633 // The seeming off-by-one in below is to account for the '@' after the 1634 // username/password. 1635 offset_adjuster.Add(base::OffsetAdjuster::Adjustment( 1636 static_cast<size_t>(nonempty_component->begin), 1637 static_cast<size_t>(nonempty_component->len + 1), 0)); 1638 } 1639 } 1640 } else { 1641 AppendFormattedComponent(spec, parsed.username, original_offsets, 1642 NonHostComponentTransform(unescape_rules), &url_string, 1643 &new_parsed->username, offsets_for_adjustment); 1644 if (parsed.password.is_valid()) { 1645 size_t colon = parsed.username.end(); 1646 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); 1647 std::vector<size_t>::const_iterator colon_iter = 1648 std::find(original_offsets.begin(), original_offsets.end(), colon); 1649 if (colon_iter != original_offsets.end()) { 1650 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = 1651 url_string.length(); 1652 } 1653 url_string.push_back(':'); 1654 } 1655 AppendFormattedComponent(spec, parsed.password, original_offsets, 1656 NonHostComponentTransform(unescape_rules), &url_string, 1657 &new_parsed->password, offsets_for_adjustment); 1658 if (parsed.username.is_valid() || parsed.password.is_valid()) { 1659 size_t at_sign = (parsed.password.is_valid() ? 1660 parsed.password : parsed.username).end(); 1661 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); 1662 std::vector<size_t>::const_iterator at_sign_iter = 1663 std::find(original_offsets.begin(), original_offsets.end(), at_sign); 1664 if (at_sign_iter != original_offsets.end()) { 1665 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = 1666 url_string.length(); 1667 } 1668 url_string.push_back('@'); 1669 } 1670 } 1671 if (prefix_end) 1672 *prefix_end = static_cast<size_t>(url_string.length()); 1673 1674 // Host. 1675 AppendFormattedComponent(spec, parsed.host, original_offsets, 1676 HostComponentTransform(languages), &url_string, &new_parsed->host, 1677 offsets_for_adjustment); 1678 1679 // Port. 1680 if (parsed.port.is_nonempty()) { 1681 url_string.push_back(':'); 1682 new_parsed->port.begin = url_string.length(); 1683 url_string.insert(url_string.end(), 1684 spec.begin() + parsed.port.begin, 1685 spec.begin() + parsed.port.end()); 1686 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 1687 } else { 1688 new_parsed->port.reset(); 1689 } 1690 1691 // Path & query. Both get the same general unescape & convert treatment. 1692 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || 1693 !CanStripTrailingSlash(url)) { 1694 AppendFormattedComponent(spec, parsed.path, original_offsets, 1695 NonHostComponentTransform(unescape_rules), &url_string, 1696 &new_parsed->path, offsets_for_adjustment); 1697 } 1698 if (parsed.query.is_valid()) 1699 url_string.push_back('?'); 1700 AppendFormattedComponent(spec, parsed.query, original_offsets, 1701 NonHostComponentTransform(unescape_rules), &url_string, 1702 &new_parsed->query, offsets_for_adjustment); 1703 1704 // Ref. This is valid, unescaped UTF-8, so we can just convert. 1705 if (parsed.ref.is_valid()) { 1706 url_string.push_back('#'); 1707 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); 1708 size_t output_ref_begin = url_string.length(); 1709 new_parsed->ref.begin = static_cast<int>(output_ref_begin); 1710 1711 std::vector<size_t> offsets_into_ref( 1712 OffsetsIntoComponent(original_offsets, original_ref_begin)); 1713 if (parsed.ref.len > 0) { 1714 url_string.append(base::UTF8ToUTF16AndAdjustOffsets( 1715 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), 1716 &offsets_into_ref)); 1717 } 1718 1719 new_parsed->ref.len = 1720 static_cast<int>(url_string.length() - new_parsed->ref.begin); 1721 AdjustForComponentTransform(original_offsets, original_ref_begin, 1722 static_cast<size_t>(parsed.ref.end()), offsets_into_ref, 1723 output_ref_begin, offsets_for_adjustment); 1724 } 1725 1726 // If we need to strip out http do it after the fact. This way we don't need 1727 // to worry about how offset_for_adjustment is interpreted. 1728 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { 1729 const size_t kHTTPSize = arraysize(kHTTP) - 1; 1730 url_string = url_string.substr(kHTTPSize); 1731 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { 1732 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); 1733 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); 1734 } 1735 if (prefix_end) 1736 *prefix_end -= kHTTPSize; 1737 1738 // Adjust new_parsed. 1739 DCHECK(new_parsed->scheme.is_valid()); 1740 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. 1741 new_parsed->scheme.reset(); 1742 AdjustComponents(delta, new_parsed); 1743 } 1744 1745 LimitOffsets(url_string, offsets_for_adjustment); 1746 return url_string; 1747 } 1748 1749 base::string16 FormatUrl(const GURL& url, 1750 const std::string& languages, 1751 FormatUrlTypes format_types, 1752 UnescapeRule::Type unescape_rules, 1753 url_parse::Parsed* new_parsed, 1754 size_t* prefix_end, 1755 size_t* offset_for_adjustment) { 1756 std::vector<size_t> offsets; 1757 if (offset_for_adjustment) 1758 offsets.push_back(*offset_for_adjustment); 1759 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, 1760 unescape_rules, new_parsed, prefix_end, &offsets); 1761 if (offset_for_adjustment) 1762 *offset_for_adjustment = offsets[0]; 1763 return result; 1764 } 1765 1766 bool CanStripTrailingSlash(const GURL& url) { 1767 // Omit the path only for standard, non-file URLs with nothing but "/" after 1768 // the hostname. 1769 return url.IsStandard() && !url.SchemeIsFile() && 1770 !url.SchemeIsFileSystem() && !url.has_query() && !url.has_ref() 1771 && url.path() == "/"; 1772 } 1773 1774 GURL SimplifyUrlForRequest(const GURL& url) { 1775 DCHECK(url.is_valid()); 1776 GURL::Replacements replacements; 1777 replacements.ClearUsername(); 1778 replacements.ClearPassword(); 1779 replacements.ClearRef(); 1780 return url.ReplaceComponents(replacements); 1781 } 1782 1783 // Specifies a comma separated list of port numbers that should be accepted 1784 // despite bans. If the string is invalid no allowed ports are stored. 1785 void SetExplicitlyAllowedPorts(const std::string& allowed_ports) { 1786 if (allowed_ports.empty()) 1787 return; 1788 1789 std::multiset<int> ports; 1790 size_t last = 0; 1791 size_t size = allowed_ports.size(); 1792 // The comma delimiter. 1793 const std::string::value_type kComma = ','; 1794 1795 // Overflow is still possible for evil user inputs. 1796 for (size_t i = 0; i <= size; ++i) { 1797 // The string should be composed of only digits and commas. 1798 if (i != size && !IsAsciiDigit(allowed_ports[i]) && 1799 (allowed_ports[i] != kComma)) 1800 return; 1801 if (i == size || allowed_ports[i] == kComma) { 1802 if (i > last) { 1803 int port; 1804 base::StringToInt(base::StringPiece(allowed_ports.begin() + last, 1805 allowed_ports.begin() + i), 1806 &port); 1807 ports.insert(port); 1808 } 1809 last = i + 1; 1810 } 1811 } 1812 g_explicitly_allowed_ports.Get() = ports; 1813 } 1814 1815 ScopedPortException::ScopedPortException(int port) : port_(port) { 1816 g_explicitly_allowed_ports.Get().insert(port); 1817 } 1818 1819 ScopedPortException::~ScopedPortException() { 1820 std::multiset<int>::iterator it = 1821 g_explicitly_allowed_ports.Get().find(port_); 1822 if (it != g_explicitly_allowed_ports.Get().end()) 1823 g_explicitly_allowed_ports.Get().erase(it); 1824 else 1825 NOTREACHED(); 1826 } 1827 1828 bool HaveOnlyLoopbackAddresses() { 1829 #if defined(OS_ANDROID) 1830 return android::HaveOnlyLoopbackAddresses(); 1831 #elif defined(OS_POSIX) 1832 struct ifaddrs* interface_addr = NULL; 1833 int rv = getifaddrs(&interface_addr); 1834 if (rv != 0) { 1835 DVLOG(1) << "getifaddrs() failed with errno = " << errno; 1836 return false; 1837 } 1838 1839 bool result = true; 1840 for (struct ifaddrs* interface = interface_addr; 1841 interface != NULL; 1842 interface = interface->ifa_next) { 1843 if (!(IFF_UP & interface->ifa_flags)) 1844 continue; 1845 if (IFF_LOOPBACK & interface->ifa_flags) 1846 continue; 1847 const struct sockaddr* addr = interface->ifa_addr; 1848 if (!addr) 1849 continue; 1850 if (addr->sa_family == AF_INET6) { 1851 // Safe cast since this is AF_INET6. 1852 const struct sockaddr_in6* addr_in6 = 1853 reinterpret_cast<const struct sockaddr_in6*>(addr); 1854 const struct in6_addr* sin6_addr = &addr_in6->sin6_addr; 1855 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr)) 1856 continue; 1857 } 1858 if (addr->sa_family != AF_INET6 && addr->sa_family != AF_INET) 1859 continue; 1860 1861 result = false; 1862 break; 1863 } 1864 freeifaddrs(interface_addr); 1865 return result; 1866 #elif defined(OS_WIN) 1867 // TODO(wtc): implement with the GetAdaptersAddresses function. 1868 NOTIMPLEMENTED(); 1869 return false; 1870 #else 1871 NOTIMPLEMENTED(); 1872 return false; 1873 #endif // defined(various platforms) 1874 } 1875 1876 AddressFamily GetAddressFamily(const IPAddressNumber& address) { 1877 switch (address.size()) { 1878 case kIPv4AddressSize: 1879 return ADDRESS_FAMILY_IPV4; 1880 case kIPv6AddressSize: 1881 return ADDRESS_FAMILY_IPV6; 1882 default: 1883 return ADDRESS_FAMILY_UNSPECIFIED; 1884 } 1885 } 1886 1887 bool ParseIPLiteralToNumber(const std::string& ip_literal, 1888 IPAddressNumber* ip_number) { 1889 // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains 1890 // a colon however, it must be an IPv6 address. 1891 if (ip_literal.find(':') != std::string::npos) { 1892 // GURL expects IPv6 hostnames to be surrounded with brackets. 1893 std::string host_brackets = "[" + ip_literal + "]"; 1894 url_parse::Component host_comp(0, host_brackets.size()); 1895 1896 // Try parsing the hostname as an IPv6 literal. 1897 ip_number->resize(16); // 128 bits. 1898 return url_canon::IPv6AddressToNumber(host_brackets.data(), 1899 host_comp, 1900 &(*ip_number)[0]); 1901 } 1902 1903 // Otherwise the string is an IPv4 address. 1904 ip_number->resize(4); // 32 bits. 1905 url_parse::Component host_comp(0, ip_literal.size()); 1906 int num_components; 1907 url_canon::CanonHostInfo::Family family = url_canon::IPv4AddressToNumber( 1908 ip_literal.data(), host_comp, &(*ip_number)[0], &num_components); 1909 return family == url_canon::CanonHostInfo::IPV4; 1910 } 1911 1912 namespace { 1913 1914 const unsigned char kIPv4MappedPrefix[] = 1915 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; 1916 } 1917 1918 IPAddressNumber ConvertIPv4NumberToIPv6Number( 1919 const IPAddressNumber& ipv4_number) { 1920 DCHECK(ipv4_number.size() == 4); 1921 1922 // IPv4-mapped addresses are formed by: 1923 // <80 bits of zeros> + <16 bits of ones> + <32-bit IPv4 address>. 1924 IPAddressNumber ipv6_number; 1925 ipv6_number.reserve(16); 1926 ipv6_number.insert(ipv6_number.end(), 1927 kIPv4MappedPrefix, 1928 kIPv4MappedPrefix + arraysize(kIPv4MappedPrefix)); 1929 ipv6_number.insert(ipv6_number.end(), ipv4_number.begin(), ipv4_number.end()); 1930 return ipv6_number; 1931 } 1932 1933 bool IsIPv4Mapped(const IPAddressNumber& address) { 1934 if (address.size() != kIPv6AddressSize) 1935 return false; 1936 return std::equal(address.begin(), 1937 address.begin() + arraysize(kIPv4MappedPrefix), 1938 kIPv4MappedPrefix); 1939 } 1940 1941 IPAddressNumber ConvertIPv4MappedToIPv4(const IPAddressNumber& address) { 1942 DCHECK(IsIPv4Mapped(address)); 1943 return IPAddressNumber(address.begin() + arraysize(kIPv4MappedPrefix), 1944 address.end()); 1945 } 1946 1947 bool ParseCIDRBlock(const std::string& cidr_literal, 1948 IPAddressNumber* ip_number, 1949 size_t* prefix_length_in_bits) { 1950 // We expect CIDR notation to match one of these two templates: 1951 // <IPv4-literal> "/" <number of bits> 1952 // <IPv6-literal> "/" <number of bits> 1953 1954 std::vector<std::string> parts; 1955 base::SplitString(cidr_literal, '/', &parts); 1956 if (parts.size() != 2) 1957 return false; 1958 1959 // Parse the IP address. 1960 if (!ParseIPLiteralToNumber(parts[0], ip_number)) 1961 return false; 1962 1963 // Parse the prefix length. 1964 int number_of_bits = -1; 1965 if (!base::StringToInt(parts[1], &number_of_bits)) 1966 return false; 1967 1968 // Make sure the prefix length is in a valid range. 1969 if (number_of_bits < 0 || 1970 number_of_bits > static_cast<int>(ip_number->size() * 8)) 1971 return false; 1972 1973 *prefix_length_in_bits = static_cast<size_t>(number_of_bits); 1974 return true; 1975 } 1976 1977 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 1978 const IPAddressNumber& ip_prefix, 1979 size_t prefix_length_in_bits) { 1980 // Both the input IP address and the prefix IP address should be 1981 // either IPv4 or IPv6. 1982 DCHECK(ip_number.size() == 4 || ip_number.size() == 16); 1983 DCHECK(ip_prefix.size() == 4 || ip_prefix.size() == 16); 1984 1985 DCHECK_LE(prefix_length_in_bits, ip_prefix.size() * 8); 1986 1987 // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to 1988 // IPv6 addresses in order to do the comparison. 1989 if (ip_number.size() != ip_prefix.size()) { 1990 if (ip_number.size() == 4) { 1991 return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number), 1992 ip_prefix, prefix_length_in_bits); 1993 } 1994 return IPNumberMatchesPrefix(ip_number, 1995 ConvertIPv4NumberToIPv6Number(ip_prefix), 1996 96 + prefix_length_in_bits); 1997 } 1998 1999 // Otherwise we are comparing two IPv4 addresses, or two IPv6 addresses. 2000 // Compare all the bytes that fall entirely within the prefix. 2001 int num_entire_bytes_in_prefix = prefix_length_in_bits / 8; 2002 for (int i = 0; i < num_entire_bytes_in_prefix; ++i) { 2003 if (ip_number[i] != ip_prefix[i]) 2004 return false; 2005 } 2006 2007 // In case the prefix was not a multiple of 8, there will be 1 byte 2008 // which is only partially masked. 2009 int remaining_bits = prefix_length_in_bits % 8; 2010 if (remaining_bits != 0) { 2011 unsigned char mask = 0xFF << (8 - remaining_bits); 2012 int i = num_entire_bytes_in_prefix; 2013 if ((ip_number[i] & mask) != (ip_prefix[i] & mask)) 2014 return false; 2015 } 2016 2017 return true; 2018 } 2019 2020 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 2021 socklen_t address_len) { 2022 if (address->sa_family == AF_INET) { 2023 DCHECK_LE(sizeof(sockaddr_in), static_cast<size_t>(address_len)); 2024 const struct sockaddr_in* sockaddr = 2025 reinterpret_cast<const struct sockaddr_in*>(address); 2026 return &sockaddr->sin_port; 2027 } else if (address->sa_family == AF_INET6) { 2028 DCHECK_LE(sizeof(sockaddr_in6), static_cast<size_t>(address_len)); 2029 const struct sockaddr_in6* sockaddr = 2030 reinterpret_cast<const struct sockaddr_in6*>(address); 2031 return &sockaddr->sin6_port; 2032 } else { 2033 NOTREACHED(); 2034 return NULL; 2035 } 2036 } 2037 2038 int GetPortFromSockaddr(const struct sockaddr* address, socklen_t address_len) { 2039 const uint16* port_field = GetPortFieldFromSockaddr(address, address_len); 2040 if (!port_field) 2041 return -1; 2042 return base::NetToHost16(*port_field); 2043 } 2044 2045 bool IsLocalhost(const std::string& host) { 2046 if (host == "localhost" || 2047 host == "localhost.localdomain" || 2048 host == "localhost6" || 2049 host == "localhost6.localdomain6") 2050 return true; 2051 2052 IPAddressNumber ip_number; 2053 if (ParseIPLiteralToNumber(host, &ip_number)) { 2054 size_t size = ip_number.size(); 2055 switch (size) { 2056 case kIPv4AddressSize: { 2057 IPAddressNumber localhost_prefix; 2058 localhost_prefix.push_back(127); 2059 for (int i = 0; i < 3; ++i) { 2060 localhost_prefix.push_back(0); 2061 } 2062 return IPNumberMatchesPrefix(ip_number, localhost_prefix, 8); 2063 } 2064 2065 case kIPv6AddressSize: { 2066 struct in6_addr sin6_addr; 2067 memcpy(&sin6_addr, &ip_number[0], kIPv6AddressSize); 2068 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr); 2069 } 2070 2071 default: 2072 NOTREACHED(); 2073 } 2074 } 2075 2076 return false; 2077 } 2078 2079 NetworkInterface::NetworkInterface() { 2080 } 2081 2082 NetworkInterface::NetworkInterface(const std::string& name, 2083 const IPAddressNumber& address) 2084 : name(name), address(address) { 2085 } 2086 2087 NetworkInterface::~NetworkInterface() { 2088 } 2089 2090 } // namespace net 2091