1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_NET_UTIL_H_ 6 #define NET_BASE_NET_UTIL_H_ 7 8 #include "build/build_config.h" 9 10 #if defined(OS_WIN) 11 #include <windows.h> 12 #include <ws2tcpip.h> 13 #elif defined(OS_POSIX) 14 #include <sys/types.h> 15 #include <sys/socket.h> 16 #endif 17 18 #include <list> 19 #include <set> 20 #include <string> 21 #include <vector> 22 23 #include "base/basictypes.h" 24 #include "base/strings/string16.h" 25 #include "net/base/address_family.h" 26 #include "net/base/escape.h" 27 #include "net/base/net_export.h" 28 #include "net/base/net_log.h" 29 30 class GURL; 31 32 namespace base { 33 class FilePath; 34 class Time; 35 } 36 37 namespace url_canon { 38 struct CanonHostInfo; 39 } 40 41 namespace url_parse { 42 struct Parsed; 43 } 44 45 namespace net { 46 47 // Used by FormatUrl to specify handling of certain parts of the url. 48 typedef uint32 FormatUrlType; 49 typedef uint32 FormatUrlTypes; 50 51 // IPAddressNumber is used to represent an IP address's numeric value as an 52 // array of bytes, from most significant to least significant. This is the 53 // network byte ordering. 54 // 55 // IPv4 addresses will have length 4, whereas IPv6 address will have length 16. 56 typedef std::vector<unsigned char> IPAddressNumber; 57 typedef std::vector<IPAddressNumber> IPAddressList; 58 59 static const size_t kIPv4AddressSize = 4; 60 static const size_t kIPv6AddressSize = 16; 61 62 // Nothing is ommitted. 63 NET_EXPORT extern const FormatUrlType kFormatUrlOmitNothing; 64 65 // If set, any username and password are removed. 66 NET_EXPORT extern const FormatUrlType kFormatUrlOmitUsernamePassword; 67 68 // If the scheme is 'http://', it's removed. 69 NET_EXPORT extern const FormatUrlType kFormatUrlOmitHTTP; 70 71 // Omits the path if it is just a slash and there is no query or ref. This is 72 // meaningful for non-file "standard" URLs. 73 NET_EXPORT extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; 74 75 // Convenience for omitting all unecessary types. 76 NET_EXPORT extern const FormatUrlType kFormatUrlOmitAll; 77 78 // Returns the number of explicitly allowed ports; for testing. 79 NET_EXPORT_PRIVATE extern size_t GetCountOfExplicitlyAllowedPorts(); 80 81 // Given the full path to a file name, creates a file: URL. The returned URL 82 // may not be valid if the input is malformed. 83 NET_EXPORT GURL FilePathToFileURL(const base::FilePath& path); 84 85 // Converts a file: URL back to a filename that can be passed to the OS. The 86 // file URL must be well-formed (GURL::is_valid() must return true); we don't 87 // handle degenerate cases here. Returns true on success, false if it isn't a 88 // valid file URL. On failure, *file_path will be empty. 89 NET_EXPORT bool FileURLToFilePath(const GURL& url, base::FilePath* file_path); 90 91 // Splits an input of the form <host>[":"<port>] into its consitituent parts. 92 // Saves the result into |*host| and |*port|. If the input did not have 93 // the optional port, sets |*port| to -1. 94 // Returns true if the parsing was successful, false otherwise. 95 // The returned host is NOT canonicalized, and may be invalid. If <host> is 96 // an IPv6 literal address, the returned host includes the square brackets. 97 NET_EXPORT bool ParseHostAndPort( 98 std::string::const_iterator host_and_port_begin, 99 std::string::const_iterator host_and_port_end, 100 std::string* host, 101 int* port); 102 NET_EXPORT bool ParseHostAndPort( 103 const std::string& host_and_port, 104 std::string* host, 105 int* port); 106 107 // Returns a host:port string for the given URL. 108 NET_EXPORT std::string GetHostAndPort(const GURL& url); 109 110 // Returns a host[:port] string for the given URL, where the port is omitted 111 // if it is the default for the URL's scheme. 112 NET_EXPORT_PRIVATE std::string GetHostAndOptionalPort(const GURL& url); 113 114 // Returns true if |hostname| contains a non-registerable or non-assignable 115 // domain name (eg: a gTLD that has not been assigned by IANA) 116 // 117 // TODO(rsleevi): http://crbug.com/119212 - Also match internal IP 118 // address ranges. 119 NET_EXPORT bool IsHostnameNonUnique(const std::string& hostname); 120 121 // Convenience struct for when you need a |struct sockaddr|. 122 struct SockaddrStorage { 123 SockaddrStorage() : addr_len(sizeof(addr_storage)), 124 addr(reinterpret_cast<struct sockaddr*>(&addr_storage)) {} 125 struct sockaddr_storage addr_storage; 126 socklen_t addr_len; 127 struct sockaddr* const addr; 128 }; 129 130 // Extracts the IP address and port portions of a sockaddr. |port| is optional, 131 // and will not be filled in if NULL. 132 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr, 133 socklen_t sock_addr_len, 134 const unsigned char** address, 135 size_t* address_len, 136 uint16* port); 137 138 // Returns the string representation of an IP address. 139 // For example: "192.168.0.1" or "::1". 140 NET_EXPORT std::string IPAddressToString(const uint8* address, 141 size_t address_len); 142 143 // Returns the string representation of an IP address along with its port. 144 // For example: "192.168.0.1:99" or "[::1]:80". 145 NET_EXPORT std::string IPAddressToStringWithPort(const uint8* address, 146 size_t address_len, 147 uint16 port); 148 149 // Same as IPAddressToString() but for a sockaddr. This output will not include 150 // the IPv6 scope ID. 151 NET_EXPORT std::string NetAddressToString(const struct sockaddr* sa, 152 socklen_t sock_addr_len); 153 154 // Same as IPAddressToStringWithPort() but for a sockaddr. This output will not 155 // include the IPv6 scope ID. 156 NET_EXPORT std::string NetAddressToStringWithPort(const struct sockaddr* sa, 157 socklen_t sock_addr_len); 158 159 // Same as IPAddressToString() but for an IPAddressNumber. 160 NET_EXPORT std::string IPAddressToString(const IPAddressNumber& addr); 161 162 // Same as IPAddressToStringWithPort() but for an IPAddressNumber. 163 NET_EXPORT std::string IPAddressToStringWithPort( 164 const IPAddressNumber& addr, uint16 port); 165 166 // Returns the hostname of the current system. Returns empty string on failure. 167 NET_EXPORT std::string GetHostName(); 168 169 // Extracts the unescaped username/password from |url|, saving the results 170 // into |*username| and |*password|. 171 NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url, 172 base::string16* username, 173 base::string16* password); 174 175 // Returns either the host from |url|, or, if the host is empty, the full spec. 176 NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url); 177 178 // Return the value of the HTTP response header with name 'name'. 'headers' 179 // should be in the format that URLRequest::GetResponseHeaders() returns. 180 // Returns the empty string if the header is not found. 181 NET_EXPORT std::string GetSpecificHeader(const std::string& headers, 182 const std::string& name); 183 184 // Converts the given host name to unicode characters. This can be called for 185 // any host name, if the input is not IDN or is invalid in some way, we'll just 186 // return the ASCII source so it is still usable. 187 // 188 // The input should be the canonicalized ASCII host name from GURL. This 189 // function does NOT accept UTF-8! 190 // 191 // |languages| is a comma separated list of ISO 639 language codes. It 192 // is used to determine whether a hostname is 'comprehensible' to a user 193 // who understands languages listed. |host| will be converted to a 194 // human-readable form (Unicode) ONLY when each component of |host| is 195 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that 196 // Latin letters in the ASCII range can be mixed with a limited set of 197 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). 198 // When |languages| is empty, even that mixing is not allowed. 199 NET_EXPORT base::string16 IDNToUnicode(const std::string& host, 200 const std::string& languages); 201 202 // Canonicalizes |host| and returns it. Also fills |host_info| with 203 // IP address information. |host_info| must not be NULL. 204 NET_EXPORT std::string CanonicalizeHost(const std::string& host, 205 url_canon::CanonHostInfo* host_info); 206 207 // Returns true if |host| is not an IP address and is compliant with a set of 208 // rules based on RFC 1738 and tweaked to be compatible with the real world. 209 // The rules are: 210 // * One or more components separated by '.' 211 // * Each component begins with an alphanumeric character or '-' 212 // * Each component contains only alphanumeric characters and '-' or '_' 213 // * Each component ends with an alphanumeric character or '-' 214 // * The last component begins with an alphanumeric character 215 // * Optional trailing dot after last component (means "treat as FQDN") 216 // If |desired_tld| is non-NULL, the host will only be considered invalid if 217 // appending it as a trailing component still results in an invalid host. This 218 // helps us avoid marking as "invalid" user attempts to open, say, "www.-9.com" 219 // by typing -, 9, <ctrl>+<enter>. 220 // 221 // NOTE: You should only pass in hosts that have been returned from 222 // CanonicalizeHost(), or you may not get accurate results. 223 NET_EXPORT bool IsCanonicalizedHostCompliant(const std::string& host, 224 const std::string& desired_tld); 225 226 // Call these functions to get the html snippet for a directory listing. 227 // The return values of both functions are in UTF-8. 228 NET_EXPORT std::string GetDirectoryListingHeader(const base::string16& title); 229 230 // Given the name of a file in a directory (ftp or local) and 231 // other information (is_dir, size, modification time), it returns 232 // the html snippet to add the entry for the file to the directory listing. 233 // Currently, it's a script tag containing a call to a Javascript function 234 // |addRow|. 235 // 236 // |name| is the file name to be displayed. |raw_bytes| will be used 237 // as the actual target of the link (so for example, ftp links should use 238 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name| 239 // will be used. 240 // 241 // Both |name| and |raw_bytes| are escaped internally. 242 NET_EXPORT std::string GetDirectoryListingEntry(const base::string16& name, 243 const std::string& raw_bytes, 244 bool is_dir, int64 size, 245 base::Time modified); 246 247 // If text starts with "www." it is removed, otherwise text is returned 248 // unmodified. 249 NET_EXPORT base::string16 StripWWW(const base::string16& text); 250 251 // Runs |url|'s host through StripWWW(). |url| must be valid. 252 NET_EXPORT base::string16 StripWWWFromHost(const GURL& url); 253 254 // Generates a filename using the first successful method from the following (in 255 // order): 256 // 257 // 1) The raw Content-Disposition header in |content_disposition| as read from 258 // the network. |referrer_charset| is used to decode non-ASCII strings. 259 // 2) |suggested_name| if specified. |suggested_name| is assumed to be in 260 // UTF-8. 261 // 3) The filename extracted from the |url|. |referrer_charset| will be used to 262 // interpret the URL if there are non-ascii characters. 263 // 4) |default_name|. If non-empty, |default_name| is assumed to be a filename 264 // and shouldn't contain a path. |default_name| is not subject to validation 265 // or sanitization, and therefore shouldn't be a user supplied string. 266 // 5) The hostname portion from the |url| 267 // 268 // Then, leading and trailing '.'s will be removed. On Windows, trailing spaces 269 // are also removed. The string "download" is the final fallback if no filename 270 // is found or the filename is empty. 271 // 272 // Any illegal characters in the filename will be replaced by '-'. If the 273 // filename doesn't contain an extension, and a |mime_type| is specified, the 274 // preferred extension for the |mime_type| will be appended to the filename. 275 // The resulting filename is then checked against a list of reserved names on 276 // Windows. If the name is reserved, an underscore will be prepended to the 277 // filename. 278 // 279 // Note: |mime_type| should only be specified if this function is called from a 280 // thread that allows IO. 281 NET_EXPORT base::string16 GetSuggestedFilename( 282 const GURL& url, 283 const std::string& content_disposition, 284 const std::string& referrer_charset, 285 const std::string& suggested_name, 286 const std::string& mime_type, 287 const std::string& default_name); 288 289 // Similar to GetSuggestedFilename(), but returns a FilePath. 290 NET_EXPORT base::FilePath GenerateFileName( 291 const GURL& url, 292 const std::string& content_disposition, 293 const std::string& referrer_charset, 294 const std::string& suggested_name, 295 const std::string& mime_type, 296 const std::string& default_name); 297 298 // Valid basenames: 299 // * are not empty 300 // * are not Windows reserved names (CON, NUL.zip, etc.) 301 // * are just basenames 302 // * do not have trailing separators 303 // * do not equal kCurrentDirectory 304 // * do not reference the parent directory 305 // * are valid path components, which: 306 // - * are not the empty string 307 // - * do not contain illegal characters 308 // - * do not end with Windows shell-integrated extensions (even on posix) 309 // - * do not begin with '.' (which would hide them in most file managers) 310 // - * do not end with ' ' or '.' 311 NET_EXPORT bool IsSafePortableBasename(const base::FilePath& path); 312 313 // Basenames of valid relative paths are IsSafePortableBasename(), and internal 314 // path components of valid relative paths are valid path components as 315 // described above IsSafePortableBasename(). Valid relative paths are not 316 // absolute paths. 317 NET_EXPORT bool IsSafePortableRelativePath(const base::FilePath& path); 318 319 // Ensures that the filename and extension is safe to use in the filesystem. 320 // 321 // Assumes that |file_path| already contains a valid path or file name. On 322 // Windows if the extension causes the file to have an unsafe interaction with 323 // the shell (see net_util::IsShellIntegratedExtension()), then it will be 324 // replaced by the string 'download'. If |file_path| doesn't contain an 325 // extension or |ignore_extension| is true then the preferred extension, if one 326 // exists, for |mime_type| will be used as the extension. 327 // 328 // On Windows, the filename will be checked against a set of reserved names, and 329 // if so, an underscore will be prepended to the name. 330 // 331 // |file_name| can either be just the file name or it can be a full path to a 332 // file. 333 // 334 // Note: |mime_type| should only be non-empty if this function is called from a 335 // thread that allows IO. 336 NET_EXPORT void GenerateSafeFileName(const std::string& mime_type, 337 bool ignore_extension, 338 base::FilePath* file_path); 339 340 // Checks |port| against a list of ports which are restricted by default. 341 // Returns true if |port| is allowed, false if it is restricted. 342 NET_EXPORT bool IsPortAllowedByDefault(int port); 343 344 // Checks |port| against a list of ports which are restricted by the FTP 345 // protocol. Returns true if |port| is allowed, false if it is restricted. 346 NET_EXPORT_PRIVATE bool IsPortAllowedByFtp(int port); 347 348 // Check if banned |port| has been overriden by an entry in 349 // |explicitly_allowed_ports_|. 350 NET_EXPORT_PRIVATE bool IsPortAllowedByOverride(int port); 351 352 // Set socket to non-blocking mode 353 NET_EXPORT int SetNonBlocking(int fd); 354 355 // Formats the host in |url| and appends it to |output|. The host formatter 356 // takes the same accept languages component as ElideURL(). 357 NET_EXPORT void AppendFormattedHost(const GURL& url, 358 const std::string& languages, 359 base::string16* output); 360 361 // Creates a string representation of |url|. The IDN host name may be in Unicode 362 // if |languages| accepts the Unicode representation. |format_type| is a bitmask 363 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean 364 // the URL for human readability. You will generally want |UnescapeRule::SPACES| 365 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| 366 // if not. If the path part and the query part seem to be encoded in %-encoded 367 // UTF-8, decodes %-encoding and UTF-8. 368 // 369 // The last three parameters may be NULL. 370 // |new_parsed| will be set to the parsing parameters of the resultant URL. 371 // |prefix_end| will be the length before the hostname of the resultant URL. 372 // 373 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original 374 // |url|'s spec(); each offset will be modified to reflect changes this function 375 // makes to the output string. For example, if |url| is "http://a:b@c.com/", 376 // |omit_username_password| is true, and an offset is 12 (the offset of '.'), 377 // then on return the output string will be "http://c.com/" and the offset will 378 // be 8. If an offset cannot be successfully adjusted (e.g. because it points 379 // into the middle of a component that was entirely removed, past the end of the 380 // string, or into the middle of an encoding sequence), it will be set to 381 // base::string16::npos. 382 NET_EXPORT base::string16 FormatUrl(const GURL& url, 383 const std::string& languages, 384 FormatUrlTypes format_types, 385 UnescapeRule::Type unescape_rules, 386 url_parse::Parsed* new_parsed, 387 size_t* prefix_end, 388 size_t* offset_for_adjustment); 389 NET_EXPORT base::string16 FormatUrlWithOffsets( 390 const GURL& url, 391 const std::string& languages, 392 FormatUrlTypes format_types, 393 UnescapeRule::Type unescape_rules, 394 url_parse::Parsed* new_parsed, 395 size_t* prefix_end, 396 std::vector<size_t>* offsets_for_adjustment); 397 398 // This is a convenience function for FormatUrl() with 399 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical 400 // set of flags for "URLs to display to the user". You should be cautious about 401 // using this for URLs which will be parsed or sent to other applications. 402 inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { 403 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES, 404 NULL, NULL, NULL); 405 } 406 407 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a 408 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. 409 NET_EXPORT bool CanStripTrailingSlash(const GURL& url); 410 411 // Strip the portions of |url| that aren't core to the network request. 412 // - user name / password 413 // - reference section 414 NET_EXPORT_PRIVATE GURL SimplifyUrlForRequest(const GURL& url); 415 416 NET_EXPORT void SetExplicitlyAllowedPorts(const std::string& allowed_ports); 417 418 class NET_EXPORT ScopedPortException { 419 public: 420 ScopedPortException(int port); 421 ~ScopedPortException(); 422 423 private: 424 int port_; 425 426 DISALLOW_COPY_AND_ASSIGN(ScopedPortException); 427 }; 428 429 // Returns true if it can determine that only loopback addresses are configured. 430 // i.e. if only 127.0.0.1 and ::1 are routable. 431 // Also returns false if it cannot determine this. 432 bool HaveOnlyLoopbackAddresses(); 433 434 // Returns AddressFamily of the address. 435 NET_EXPORT_PRIVATE AddressFamily GetAddressFamily( 436 const IPAddressNumber& address); 437 438 // Parses an IP address literal (either IPv4 or IPv6) to its numeric value. 439 // Returns true on success and fills |ip_number| with the numeric value. 440 NET_EXPORT_PRIVATE bool ParseIPLiteralToNumber(const std::string& ip_literal, 441 IPAddressNumber* ip_number); 442 443 // Converts an IPv4 address to an IPv4-mapped IPv6 address. 444 // For example 192.168.0.1 would be converted to ::ffff:192.168.0.1. 445 NET_EXPORT_PRIVATE IPAddressNumber ConvertIPv4NumberToIPv6Number( 446 const IPAddressNumber& ipv4_number); 447 448 // Returns true iff |address| is an IPv4-mapped IPv6 address. 449 NET_EXPORT_PRIVATE bool IsIPv4Mapped(const IPAddressNumber& address); 450 451 // Converts an IPv4-mapped IPv6 address to IPv4 address. Should only be called 452 // on IPv4-mapped IPv6 addresses. 453 NET_EXPORT_PRIVATE IPAddressNumber ConvertIPv4MappedToIPv4( 454 const IPAddressNumber& address); 455 456 // Parses an IP block specifier from CIDR notation to an 457 // (IP address, prefix length) pair. Returns true on success and fills 458 // |*ip_number| with the numeric value of the IP address and sets 459 // |*prefix_length_in_bits| with the length of the prefix. 460 // 461 // CIDR notation literals can use either IPv4 or IPv6 literals. Some examples: 462 // 463 // 10.10.3.1/20 464 // a:b:c::/46 465 // ::1/128 466 NET_EXPORT bool ParseCIDRBlock(const std::string& cidr_literal, 467 IPAddressNumber* ip_number, 468 size_t* prefix_length_in_bits); 469 470 // Compares an IP address to see if it falls within the specified IP block. 471 // Returns true if it does, false otherwise. 472 // 473 // The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any 474 // IP address whose |prefix_length_in_bits| most significant bits match 475 // |ip_prefix| will be matched. 476 // 477 // In cases when an IPv4 address is being compared to an IPv6 address prefix 478 // and vice versa, the IPv4 addresses will be converted to IPv4-mapped 479 // (IPv6) addresses. 480 NET_EXPORT_PRIVATE bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 481 const IPAddressNumber& ip_prefix, 482 size_t prefix_length_in_bits); 483 484 // Retuns the port field of the |sockaddr|. 485 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 486 socklen_t address_len); 487 // Returns the value of port in |sockaddr| (in host byte ordering). 488 NET_EXPORT_PRIVATE int GetPortFromSockaddr(const struct sockaddr* address, 489 socklen_t address_len); 490 491 // Returns true if |host| is one of the names (e.g. "localhost") or IP 492 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback. 493 // 494 // Note that this function does not check for IP addresses other than 495 // the above, although other IP addresses may point to the local 496 // machine. 497 NET_EXPORT_PRIVATE bool IsLocalhost(const std::string& host); 498 499 // struct that is used by GetNetworkList() to represent a network 500 // interface. 501 struct NET_EXPORT NetworkInterface { 502 NetworkInterface(); 503 NetworkInterface(const std::string& name, const IPAddressNumber& address); 504 ~NetworkInterface(); 505 506 std::string name; 507 IPAddressNumber address; 508 }; 509 510 typedef std::vector<NetworkInterface> NetworkInterfaceList; 511 512 // Returns list of network interfaces except loopback interface. If an 513 // interface has more than one address, a separate entry is added to 514 // the list for each address. 515 // Can be called only on a thread that allows IO. 516 NET_EXPORT bool GetNetworkList(NetworkInterfaceList* networks); 517 518 // General category of the IEEE 802.11 (wifi) physical layer operating mode. 519 enum WifiPHYLayerProtocol { 520 // No wifi support or no associated AP. 521 WIFI_PHY_LAYER_PROTOCOL_NONE, 522 // An obsolete modes introduced by the original 802.11, e.g. IR, FHSS, 523 WIFI_PHY_LAYER_PROTOCOL_ANCIENT, 524 // 802.11a, OFDM-based rates. 525 WIFI_PHY_LAYER_PROTOCOL_A, 526 // 802.11b, DSSS or HR DSSS. 527 WIFI_PHY_LAYER_PROTOCOL_B, 528 // 802.11g, same rates as 802.11a but compatible with 802.11b. 529 WIFI_PHY_LAYER_PROTOCOL_G, 530 // 802.11n, HT rates. 531 WIFI_PHY_LAYER_PROTOCOL_N, 532 // Unclassified mode or failure to identify. 533 WIFI_PHY_LAYER_PROTOCOL_UNKNOWN 534 }; 535 536 // Characterize the PHY mode of the currently associated access point. 537 // Currently only available on OS_WIN. 538 NET_EXPORT WifiPHYLayerProtocol GetWifiPHYLayerProtocol(); 539 540 } // namespace net 541 542 #endif // NET_BASE_NET_UTIL_H_ 543