1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_NET_UTIL_H_ 6 #define NET_BASE_NET_UTIL_H_ 7 #pragma once 8 9 #include "build/build_config.h" 10 11 #if defined(OS_WIN) 12 #include <windows.h> 13 #include <ws2tcpip.h> 14 #elif defined(OS_POSIX) 15 #include <sys/socket.h> 16 #endif 17 18 #include <list> 19 #include <string> 20 #include <set> 21 #include <vector> 22 23 #include "base/basictypes.h" 24 #include "base/string16.h" 25 #include "net/base/escape.h" 26 27 struct addrinfo; 28 class FilePath; 29 class GURL; 30 31 namespace base { 32 class Time; 33 } 34 35 namespace url_canon { 36 struct CanonHostInfo; 37 } 38 39 namespace url_parse { 40 struct Parsed; 41 } 42 43 namespace net { 44 45 // Used by FormatUrl to specify handling of certain parts of the url. 46 typedef uint32 FormatUrlType; 47 typedef uint32 FormatUrlTypes; 48 49 // Used by GetHeaderParamValue to determine how to handle quotes in the value. 50 class QuoteRule { 51 public: 52 enum Type { 53 KEEP_OUTER_QUOTES, 54 REMOVE_OUTER_QUOTES, 55 }; 56 57 private: 58 QuoteRule(); 59 }; 60 61 // Nothing is ommitted. 62 extern const FormatUrlType kFormatUrlOmitNothing; 63 64 // If set, any username and password are removed. 65 extern const FormatUrlType kFormatUrlOmitUsernamePassword; 66 67 // If the scheme is 'http://', it's removed. 68 extern const FormatUrlType kFormatUrlOmitHTTP; 69 70 // Omits the path if it is just a slash and there is no query or ref. This is 71 // meaningful for non-file "standard" URLs. 72 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; 73 74 // Convenience for omitting all unecessary types. 75 extern const FormatUrlType kFormatUrlOmitAll; 76 77 // Holds a list of ports that should be accepted despite bans. 78 extern std::multiset<int> explicitly_allowed_ports; 79 80 // Given the full path to a file name, creates a file: URL. The returned URL 81 // may not be valid if the input is malformed. 82 GURL FilePathToFileURL(const FilePath& path); 83 84 // Converts a file: URL back to a filename that can be passed to the OS. The 85 // file URL must be well-formed (GURL::is_valid() must return true); we don't 86 // handle degenerate cases here. Returns true on success, false if it isn't a 87 // valid file URL. On failure, *file_path will be empty. 88 bool FileURLToFilePath(const GURL& url, FilePath* file_path); 89 90 // Splits an input of the form <host>[":"<port>] into its consitituent parts. 91 // Saves the result into |*host| and |*port|. If the input did not have 92 // the optional port, sets |*port| to -1. 93 // Returns true if the parsing was successful, false otherwise. 94 // The returned host is NOT canonicalized, and may be invalid. If <host> is 95 // an IPv6 literal address, the returned host includes the square brackets. 96 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 97 std::string::const_iterator host_and_port_end, 98 std::string* host, 99 int* port); 100 bool ParseHostAndPort(const std::string& host_and_port, 101 std::string* host, 102 int* port); 103 104 // Returns a host:port string for the given URL. 105 std::string GetHostAndPort(const GURL& url); 106 107 // Returns a host[:port] string for the given URL, where the port is omitted 108 // if it is the default for the URL's scheme. 109 std::string GetHostAndOptionalPort(const GURL& url); 110 111 // Returns the string representation of an address, like "192.168.0.1". 112 // Returns empty string on failure. 113 std::string NetAddressToString(const struct addrinfo* net_address); 114 std::string NetAddressToString(const struct sockaddr* net_address, 115 socklen_t address_len); 116 117 // Same as NetAddressToString, but additionally includes the port number. For 118 // example: "192.168.0.1:99" or "[::1]:80". 119 std::string NetAddressToStringWithPort(const struct addrinfo* net_address); 120 std::string NetAddressToStringWithPort(const struct sockaddr* net_address, 121 socklen_t address_len); 122 123 // Returns the hostname of the current system. Returns empty string on failure. 124 std::string GetHostName(); 125 126 // Extracts the unescaped username/password from |url|, saving the results 127 // into |*username| and |*password|. 128 void GetIdentityFromURL(const GURL& url, 129 string16* username, 130 string16* password); 131 132 // Returns either the host from |url|, or, if the host is empty, the full spec. 133 std::string GetHostOrSpecFromURL(const GURL& url); 134 135 // Return the value of the HTTP response header with name 'name'. 'headers' 136 // should be in the format that URLRequest::GetResponseHeaders() returns. 137 // Returns the empty string if the header is not found. 138 std::wstring GetSpecificHeader(const std::wstring& headers, 139 const std::wstring& name); 140 std::string GetSpecificHeader(const std::string& headers, 141 const std::string& name); 142 143 // Return the value of the HTTP response header field's parameter named 144 // 'param_name'. Returns the empty string if the parameter is not found or is 145 // improperly formatted. 146 std::wstring GetHeaderParamValue(const std::wstring& field, 147 const std::wstring& param_name, 148 QuoteRule::Type quote_rule); 149 std::string GetHeaderParamValue(const std::string& field, 150 const std::string& param_name, 151 QuoteRule::Type quote_rule); 152 153 // Return the filename extracted from Content-Disposition header. The following 154 // formats are tried in order listed below: 155 // 156 // 1. RFC 5987 157 // 2. RFC 2047 158 // 3. Raw-8bit-characters : 159 // a. UTF-8, b. referrer_charset, c. default os codepage. 160 // 4. %-escaped UTF-8. 161 // 162 // In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped. 163 // In step 4, the fallback charsets tried in step 3 are not tried. We 164 // can consider doing that later. 165 // 166 // When a param value is ASCII, but is not in format #2 or format #4 above, 167 // it is returned as it is unless it's pretty close to two supported 168 // formats but not well-formed. In that case, an empty string is returned. 169 // 170 // In any case, a caller must check for the empty return value and resort to 171 // another means to get a filename (e.g. url). 172 // 173 // This function does not do any escaping and callers are responsible for 174 // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. 175 // 176 // TODO(jungshik): revisit this issue. At the moment, the only caller 177 // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The 178 // other caller is a unit test. Need to figure out expose this function only to 179 // net_util_unittest. 180 // 181 std::string GetFileNameFromCD(const std::string& header, 182 const std::string& referrer_charset); 183 184 // Converts the given host name to unicode characters. This can be called for 185 // any host name, if the input is not IDN or is invalid in some way, we'll just 186 // return the ASCII source so it is still usable. 187 // 188 // The input should be the canonicalized ASCII host name from GURL. This 189 // function does NOT accept UTF-8! Its length must also be given (this is 190 // designed to work on the substring of the host out of a URL spec). 191 // 192 // |languages| is a comma separated list of ISO 639 language codes. It 193 // is used to determine whether a hostname is 'comprehensible' to a user 194 // who understands languages listed. |host| will be converted to a 195 // human-readable form (Unicode) ONLY when each component of |host| is 196 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that 197 // Latin letters in the ASCII range can be mixed with a limited set of 198 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). 199 // When |languages| is empty, even that mixing is not allowed. 200 // 201 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original 202 // |url|'s spec(); each offset will be adjusted to point at the same logical 203 // place in the result strings during decoding. If this isn't possible because 204 // an offset points past the end of |host| or into the middle of a punycode 205 // sequence, the offending offset will be set to std::wstring::npos. 206 // |offset[s]_for_adjustment| may be NULL. 207 std::wstring IDNToUnicode(const char* host, 208 size_t host_len, 209 const std::wstring& languages, 210 size_t* offset_for_adjustment); 211 std::wstring IDNToUnicodeWithOffsets( 212 const char* host, 213 size_t host_len, 214 const std::wstring& languages, 215 std::vector<size_t>* offsets_for_adjustment); 216 217 // Canonicalizes |host| and returns it. Also fills |host_info| with 218 // IP address information. |host_info| must not be NULL. 219 std::string CanonicalizeHost(const std::string& host, 220 url_canon::CanonHostInfo* host_info); 221 std::string CanonicalizeHost(const std::wstring& host, 222 url_canon::CanonHostInfo* host_info); 223 224 // Returns true if |host| is not an IP address and is compliant with a set of 225 // rules based on RFC 1738 and tweaked to be compatible with the real world. 226 // The rules are: 227 // * One or more components separated by '.' 228 // * Each component begins and ends with an alphanumeric character 229 // * Each component contains only alphanumeric characters and '-' or '_' 230 // * The last component does not begin with a digit 231 // * Optional trailing dot after last component (means "treat as FQDN") 232 // If |desired_tld| is non-NULL, the host will only be considered invalid if 233 // appending it as a trailing component still results in an invalid host. This 234 // helps us avoid marking as "invalid" user attempts to open "www.401k.com" by 235 // typing 4-0-1-k-<ctrl>+<enter>. 236 // 237 // NOTE: You should only pass in hosts that have been returned from 238 // CanonicalizeHost(), or you may not get accurate results. 239 bool IsCanonicalizedHostCompliant(const std::string& host, 240 const std::string& desired_tld); 241 242 // Call these functions to get the html snippet for a directory listing. 243 // The return values of both functions are in UTF-8. 244 std::string GetDirectoryListingHeader(const string16& title); 245 246 // Given the name of a file in a directory (ftp or local) and 247 // other information (is_dir, size, modification time), it returns 248 // the html snippet to add the entry for the file to the directory listing. 249 // Currently, it's a script tag containing a call to a Javascript function 250 // |addRow|. 251 // 252 // |name| is the file name to be displayed. |raw_bytes| will be used 253 // as the actual target of the link (so for example, ftp links should use 254 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name| 255 // will be used. 256 // 257 // Both |name| and |raw_bytes| are escaped internally. 258 std::string GetDirectoryListingEntry(const string16& name, 259 const std::string& raw_bytes, 260 bool is_dir, int64 size, 261 base::Time modified); 262 263 // If text starts with "www." it is removed, otherwise text is returned 264 // unmodified. 265 string16 StripWWW(const string16& text); 266 267 // Gets the filename from the raw Content-Disposition header (as read from the 268 // network). Otherwise uses the last path component name or hostname from 269 // |url|. If there is no filename or it can't be used, the given |default_name|, 270 // will be used unless it is empty. 271 272 // Note: it's possible for the suggested filename to be empty (e.g., 273 // file:///). referrer_charset is used as one of charsets 274 // to interpret a raw 8bit string in C-D header (after interpreting 275 // as UTF-8 fails). See the comment for GetFilenameFromCD for more details. 276 string16 GetSuggestedFilename(const GURL& url, 277 const std::string& content_disposition, 278 const std::string& referrer_charset, 279 const string16& default_name); 280 281 // Checks the given port against a list of ports which are restricted by 282 // default. Returns true if the port is allowed, false if it is restricted. 283 bool IsPortAllowedByDefault(int port); 284 285 // Checks the given port against a list of ports which are restricted by the 286 // FTP protocol. Returns true if the port is allowed, false if it is 287 // restricted. 288 bool IsPortAllowedByFtp(int port); 289 290 // Check if banned |port| has been overriden by an entry in 291 // |explicitly_allowed_ports_|. 292 bool IsPortAllowedByOverride(int port); 293 294 // Set socket to non-blocking mode 295 int SetNonBlocking(int fd); 296 297 // Appends the given part of the original URL to the output string formatted for 298 // the user. The given parsed structure will be updated. The host name formatter 299 // also takes the same accept languages component as ElideURL. |new_parsed| may 300 // be null. 301 // 302 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original 303 // |url|'s spec(); each offset will be adjusted to point at the same logical 304 // place in the result strings after reformatting of the host. If this isn't 305 // possible because an offset points past the end of the host or into the middle 306 // of a multi-character sequence, the offending offset will be set to 307 // std::wstring::npos. |offset[s]_for_adjustment| may be NULL. 308 void AppendFormattedHost(const GURL& url, 309 const std::wstring& languages, 310 std::wstring* output, 311 url_parse::Parsed* new_parsed, 312 size_t* offset_for_adjustment); 313 void AppendFormattedHostWithOffsets( 314 const GURL& url, 315 const std::wstring& languages, 316 std::wstring* output, 317 url_parse::Parsed* new_parsed, 318 std::vector<size_t>* offsets_for_adjustment); 319 320 // Creates a string representation of |url|. The IDN host name may be in Unicode 321 // if |languages| accepts the Unicode representation. |format_type| is a bitmask 322 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean 323 // the URL for human readability. You will generally want |UnescapeRule::SPACES| 324 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| 325 // if not. If the path part and the query part seem to be encoded in %-encoded 326 // UTF-8, decodes %-encoding and UTF-8. 327 // 328 // The last three parameters may be NULL. 329 // |new_parsed| will be set to the parsing parameters of the resultant URL. 330 // |prefix_end| will be the length before the hostname of the resultant URL. 331 // 332 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original 333 // |url|'s spec(); each offset will be modified to reflect changes this function 334 // makes to the output string. For example, if |url| is "http://a:b@c.com/", 335 // |omit_username_password| is true, and an offset is 12 (the offset of '.'), 336 // then on return the output string will be "http://c.com/" and the offset will 337 // be 8. If an offset cannot be successfully adjusted (e.g. because it points 338 // into the middle of a component that was entirely removed, past the end of the 339 // string, or into the middle of an encoding sequence), it will be set to 340 // string16::npos. 341 string16 FormatUrl(const GURL& url, 342 const std::string& languages, 343 FormatUrlTypes format_types, 344 UnescapeRule::Type unescape_rules, 345 url_parse::Parsed* new_parsed, 346 size_t* prefix_end, 347 size_t* offset_for_adjustment); 348 string16 FormatUrlWithOffsets(const GURL& url, 349 const std::string& languages, 350 FormatUrlTypes format_types, 351 UnescapeRule::Type unescape_rules, 352 url_parse::Parsed* new_parsed, 353 size_t* prefix_end, 354 std::vector<size_t>* offsets_for_adjustment); 355 356 // This is a convenience function for FormatUrl() with 357 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical 358 // set of flags for "URLs to display to the user". You should be cautious about 359 // using this for URLs which will be parsed or sent to other applications. 360 inline string16 FormatUrl(const GURL& url, const std::string& languages) { 361 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES, 362 NULL, NULL, NULL); 363 } 364 365 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a 366 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. 367 bool CanStripTrailingSlash(const GURL& url); 368 369 // Strip the portions of |url| that aren't core to the network request. 370 // - user name / password 371 // - reference section 372 GURL SimplifyUrlForRequest(const GURL& url); 373 374 void SetExplicitlyAllowedPorts(const std::string& allowed_ports); 375 376 class ScopedPortException { 377 public: 378 ScopedPortException(int port); 379 ~ScopedPortException(); 380 381 private: 382 int port_; 383 384 DISALLOW_COPY_AND_ASSIGN(ScopedPortException); 385 }; 386 387 // Perform a simplistic test to see if IPv6 is supported by trying to create an 388 // IPv6 socket. 389 // TODO(jar): Make test more in-depth as needed. 390 bool IPv6Supported(); 391 392 // Returns true if it can determine that only loopback addresses are configured. 393 // i.e. if only 127.0.0.1 and ::1 are routable. 394 bool HaveOnlyLoopbackAddresses(); 395 396 // IPAddressNumber is used to represent an IP address's numeric value as an 397 // array of bytes, from most significant to least significant. This is the 398 // network byte ordering. 399 // 400 // IPv4 addresses will have length 4, whereas IPv6 address will have length 16. 401 typedef std::vector<unsigned char> IPAddressNumber; 402 403 static const size_t kIPv4AddressSize = 4; 404 static const size_t kIPv6AddressSize = 16; 405 406 // Parses an IP address literal (either IPv4 or IPv6) to its numeric value. 407 // Returns true on success and fills |ip_number| with the numeric value. 408 bool ParseIPLiteralToNumber(const std::string& ip_literal, 409 IPAddressNumber* ip_number); 410 411 // Converts an IPv4 address to an IPv4-mapped IPv6 address. 412 // For example 192.168.0.1 would be converted to ::ffff:192.168.0.1. 413 IPAddressNumber ConvertIPv4NumberToIPv6Number( 414 const IPAddressNumber& ipv4_number); 415 416 // Parses an IP block specifier from CIDR notation to an 417 // (IP address, prefix length) pair. Returns true on success and fills 418 // |*ip_number| with the numeric value of the IP address and sets 419 // |*prefix_length_in_bits| with the length of the prefix. 420 // 421 // CIDR notation literals can use either IPv4 or IPv6 literals. Some examples: 422 // 423 // 10.10.3.1/20 424 // a:b:c::/46 425 // ::1/128 426 bool ParseCIDRBlock(const std::string& cidr_literal, 427 IPAddressNumber* ip_number, 428 size_t* prefix_length_in_bits); 429 430 // Compares an IP address to see if it falls within the specified IP block. 431 // Returns true if it does, false otherwise. 432 // 433 // The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any 434 // IP address whose |prefix_length_in_bits| most significant bits match 435 // |ip_prefix| will be matched. 436 // 437 // In cases when an IPv4 address is being compared to an IPv6 address prefix 438 // and vice versa, the IPv4 addresses will be converted to IPv4-mapped 439 // (IPv6) addresses. 440 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 441 const IPAddressNumber& ip_prefix, 442 size_t prefix_length_in_bits); 443 444 // Makes a copy of |info|. The dynamically-allocated parts are copied as well. 445 // If |recursive| is true, chained entries via ai_next are copied too. 446 // The copy returned by this function should be freed using 447 // FreeCopyOfAddrinfo(), and NOT freeaddrinfo(). 448 struct addrinfo* CreateCopyOfAddrinfo(const struct addrinfo* info, 449 bool recursive); 450 451 // Frees an addrinfo that was created by CreateCopyOfAddrinfo(). 452 void FreeCopyOfAddrinfo(struct addrinfo* info); 453 454 // Returns the port field of the sockaddr in |info|. 455 const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info); 456 uint16* GetPortFieldFromAddrinfo(struct addrinfo* info); 457 458 // Returns the value of |info's| port (in host byte ordering). 459 int GetPortFromAddrinfo(const struct addrinfo* info); 460 461 // Same except for struct sockaddr. 462 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 463 socklen_t address_len); 464 int GetPortFromSockaddr(const struct sockaddr* address, 465 socklen_t address_len); 466 467 // Returns true if |host| is one of the names (e.g. "localhost") or IP 468 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback. 469 // 470 // Note that this function does not check for IP addresses other than 471 // the above, although other IP addresses may point to the local 472 // machine. 473 bool IsLocalhost(const std::string& host); 474 475 // struct that is used by GetNetworkList() to represent a network 476 // interface. 477 struct NetworkInterface { 478 NetworkInterface(); 479 NetworkInterface(const std::string& name, const IPAddressNumber& address); 480 ~NetworkInterface(); 481 482 std::string name; 483 IPAddressNumber address; 484 }; 485 486 typedef std::list<NetworkInterface> NetworkInterfaceList; 487 488 // Returns list of network interfaces except loopback interface. If an 489 // interface has more than one address, a separate entry is added to 490 // the list for each address. 491 // Can be called only on a thread that allows IO. 492 bool GetNetworkList(NetworkInterfaceList* networks); 493 494 // Private adjustment function called by std::transform which sets the offset 495 // to npos if the offset occurs at or before |component_start|, otherwise don't 496 // alter the offset. Exposed here for unit testing. 497 struct ClampComponentOffset { 498 explicit ClampComponentOffset(size_t component_start); 499 size_t operator()(size_t offset); 500 501 const size_t component_start; 502 }; 503 504 } // namespace net 505 506 #endif // NET_BASE_NET_UTIL_H_ 507