Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef NET_BASE_NET_UTIL_H_
      6 #define NET_BASE_NET_UTIL_H_
      7 #pragma once
      8 
      9 #include "build/build_config.h"
     10 
     11 #if defined(OS_WIN)
     12 #include <windows.h>
     13 #include <ws2tcpip.h>
     14 #elif defined(OS_POSIX)
     15 #include <sys/socket.h>
     16 #endif
     17 
     18 #include <list>
     19 #include <string>
     20 #include <set>
     21 #include <vector>
     22 
     23 #include "base/basictypes.h"
     24 #include "base/string16.h"
     25 #include "net/base/escape.h"
     26 #include "net/base/net_export.h"
     27 
     28 struct addrinfo;
     29 class FilePath;
     30 class GURL;
     31 
     32 namespace base {
     33 class Time;
     34 }
     35 
     36 namespace url_canon {
     37 struct CanonHostInfo;
     38 }
     39 
     40 namespace url_parse {
     41 struct Parsed;
     42 }
     43 
     44 namespace net {
     45 
     46 // Used by FormatUrl to specify handling of certain parts of the url.
     47 typedef uint32 FormatUrlType;
     48 typedef uint32 FormatUrlTypes;
     49 
     50 // Used by GetHeaderParamValue to determine how to handle quotes in the value.
     51 class QuoteRule {
     52  public:
     53   enum Type {
     54     KEEP_OUTER_QUOTES,
     55     REMOVE_OUTER_QUOTES,
     56   };
     57 
     58  private:
     59   QuoteRule();
     60 };
     61 
     62 // Nothing is ommitted.
     63 extern const FormatUrlType kFormatUrlOmitNothing;
     64 
     65 // If set, any username and password are removed.
     66 extern const FormatUrlType kFormatUrlOmitUsernamePassword;
     67 
     68 // If the scheme is 'http://', it's removed.
     69 extern const FormatUrlType kFormatUrlOmitHTTP;
     70 
     71 // Omits the path if it is just a slash and there is no query or ref.  This is
     72 // meaningful for non-file "standard" URLs.
     73 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
     74 
     75 // Convenience for omitting all unecessary types.
     76 extern const FormatUrlType kFormatUrlOmitAll;
     77 
     78 // Holds a list of ports that should be accepted despite bans.
     79 extern std::multiset<int> explicitly_allowed_ports;
     80 
     81 // Given the full path to a file name, creates a file: URL. The returned URL
     82 // may not be valid if the input is malformed.
     83 GURL FilePathToFileURL(const FilePath& path);
     84 
     85 // Converts a file: URL back to a filename that can be passed to the OS. The
     86 // file URL must be well-formed (GURL::is_valid() must return true); we don't
     87 // handle degenerate cases here. Returns true on success, false if it isn't a
     88 // valid file URL. On failure, *file_path will be empty.
     89 bool FileURLToFilePath(const GURL& url, FilePath* file_path);
     90 
     91 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
     92 // Saves the result into |*host| and |*port|. If the input did not have
     93 // the optional port, sets |*port| to -1.
     94 // Returns true if the parsing was successful, false otherwise.
     95 // The returned host is NOT canonicalized, and may be invalid. If <host> is
     96 // an IPv6 literal address, the returned host includes the square brackets.
     97 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
     98                       std::string::const_iterator host_and_port_end,
     99                       std::string* host,
    100                       int* port);
    101 bool ParseHostAndPort(const std::string& host_and_port,
    102                       std::string* host,
    103                       int* port);
    104 
    105 // Returns a host:port string for the given URL.
    106 std::string GetHostAndPort(const GURL& url);
    107 
    108 // Returns a host[:port] string for the given URL, where the port is omitted
    109 // if it is the default for the URL's scheme.
    110 std::string GetHostAndOptionalPort(const GURL& url);
    111 
    112 // Returns the string representation of an address, like "192.168.0.1".
    113 // Returns empty string on failure.
    114 std::string NetAddressToString(const struct addrinfo* net_address);
    115 std::string NetAddressToString(const struct sockaddr* net_address,
    116                                socklen_t address_len);
    117 
    118 // Same as NetAddressToString, but additionally includes the port number. For
    119 // example: "192.168.0.1:99" or "[::1]:80".
    120 std::string NetAddressToStringWithPort(const struct addrinfo* net_address);
    121 std::string NetAddressToStringWithPort(const struct sockaddr* net_address,
    122                                        socklen_t address_len);
    123 
    124 // Returns the hostname of the current system. Returns empty string on failure.
    125 std::string GetHostName();
    126 
    127 // Extracts the unescaped username/password from |url|, saving the results
    128 // into |*username| and |*password|.
    129 void GetIdentityFromURL(const GURL& url,
    130                         string16* username,
    131                         string16* password);
    132 
    133 // Returns either the host from |url|, or, if the host is empty, the full spec.
    134 std::string GetHostOrSpecFromURL(const GURL& url);
    135 
    136 // Return the value of the HTTP response header with name 'name'.  'headers'
    137 // should be in the format that URLRequest::GetResponseHeaders() returns.
    138 // Returns the empty string if the header is not found.
    139 std::wstring GetSpecificHeader(const std::wstring& headers,
    140                                const std::wstring& name);
    141 std::string GetSpecificHeader(const std::string& headers,
    142                               const std::string& name);
    143 
    144 // Return the value of the HTTP response header field's parameter named
    145 // 'param_name'.  Returns the empty string if the parameter is not found or is
    146 // improperly formatted.
    147 std::wstring GetHeaderParamValue(const std::wstring& field,
    148                                  const std::wstring& param_name,
    149                                  QuoteRule::Type quote_rule);
    150 std::string GetHeaderParamValue(const std::string& field,
    151                                 const std::string& param_name,
    152                                 QuoteRule::Type quote_rule);
    153 
    154 // Return the filename extracted from Content-Disposition header. The following
    155 // formats are tried in order listed below:
    156 //
    157 // 1. RFC 5987
    158 // 2. RFC 2047
    159 // 3. Raw-8bit-characters :
    160 //    a. UTF-8, b. referrer_charset, c. default os codepage.
    161 // 4. %-escaped UTF-8.
    162 //
    163 // In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped.
    164 // In step 4, the fallback charsets tried in step 3 are not tried. We
    165 // can consider doing that later.
    166 //
    167 // When a param value is ASCII, but is not in format #2 or format #4 above,
    168 // it is returned as it is unless it's pretty close to two supported
    169 // formats but not well-formed. In that case, an empty string is returned.
    170 //
    171 // In any case, a caller must check for the empty return value and resort to
    172 // another means to get a filename (e.g. url).
    173 //
    174 // This function does not do any escaping and callers are responsible for
    175 // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
    176 //
    177 // TODO(jungshik): revisit this issue. At the moment, the only caller
    178 // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters.  The
    179 // other caller is a unit test. Need to figure out expose this function only to
    180 // net_util_unittest.
    181 //
    182 std::string GetFileNameFromCD(const std::string& header,
    183                               const std::string& referrer_charset);
    184 
    185 // Converts the given host name to unicode characters. This can be called for
    186 // any host name, if the input is not IDN or is invalid in some way, we'll just
    187 // return the ASCII source so it is still usable.
    188 //
    189 // The input should be the canonicalized ASCII host name from GURL. This
    190 // function does NOT accept UTF-8! Its length must also be given (this is
    191 // designed to work on the substring of the host out of a URL spec).
    192 //
    193 // |languages| is a comma separated list of ISO 639 language codes. It
    194 // is used to determine whether a hostname is 'comprehensible' to a user
    195 // who understands languages listed. |host| will be converted to a
    196 // human-readable form (Unicode) ONLY when each component of |host| is
    197 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
    198 // Latin letters in the ASCII range can be mixed with a limited set of
    199 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
    200 // When |languages| is empty, even that mixing is not allowed.
    201 //
    202 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
    203 // |url|'s spec(); each offset will be adjusted to point at the same logical
    204 // place in the result strings during decoding.  If this isn't possible because
    205 // an offset points past the end of |host| or into the middle of a punycode
    206 // sequence, the offending offset will be set to std::wstring::npos.
    207 // |offset[s]_for_adjustment| may be NULL.
    208 NET_EXPORT std::wstring IDNToUnicode(const char* host,
    209                           size_t host_len,
    210                           const std::wstring& languages,
    211                           size_t* offset_for_adjustment);
    212 std::wstring IDNToUnicodeWithOffsets(
    213     const char* host,
    214     size_t host_len,
    215     const std::wstring& languages,
    216     std::vector<size_t>* offsets_for_adjustment);
    217 
    218 // Canonicalizes |host| and returns it.  Also fills |host_info| with
    219 // IP address information.  |host_info| must not be NULL.
    220 std::string CanonicalizeHost(const std::string& host,
    221                              url_canon::CanonHostInfo* host_info);
    222 std::string CanonicalizeHost(const std::wstring& host,
    223                              url_canon::CanonHostInfo* host_info);
    224 
    225 // Returns true if |host| is not an IP address and is compliant with a set of
    226 // rules based on RFC 1738 and tweaked to be compatible with the real world.
    227 // The rules are:
    228 //   * One or more components separated by '.'
    229 //   * Each component begins and ends with an alphanumeric character
    230 //   * Each component contains only alphanumeric characters and '-' or '_'
    231 //   * The last component does not begin with a digit
    232 //   * Optional trailing dot after last component (means "treat as FQDN")
    233 // If |desired_tld| is non-NULL, the host will only be considered invalid if
    234 // appending it as a trailing component still results in an invalid host.  This
    235 // helps us avoid marking as "invalid" user attempts to open "www.401k.com" by
    236 // typing 4-0-1-k-<ctrl>+<enter>.
    237 //
    238 // NOTE: You should only pass in hosts that have been returned from
    239 // CanonicalizeHost(), or you may not get accurate results.
    240 bool IsCanonicalizedHostCompliant(const std::string& host,
    241                                   const std::string& desired_tld);
    242 
    243 // Call these functions to get the html snippet for a directory listing.
    244 // The return values of both functions are in UTF-8.
    245 std::string GetDirectoryListingHeader(const string16& title);
    246 
    247 // Given the name of a file in a directory (ftp or local) and
    248 // other information (is_dir, size, modification time), it returns
    249 // the html snippet to add the entry for the file to the directory listing.
    250 // Currently, it's a script tag containing a call to a Javascript function
    251 // |addRow|.
    252 //
    253 // |name| is the file name to be displayed. |raw_bytes| will be used
    254 // as the actual target of the link (so for example, ftp links should use
    255 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
    256 // will be used.
    257 //
    258 // Both |name| and |raw_bytes| are escaped internally.
    259 std::string GetDirectoryListingEntry(const string16& name,
    260                                      const std::string& raw_bytes,
    261                                      bool is_dir, int64 size,
    262                                      base::Time modified);
    263 
    264 // If text starts with "www." it is removed, otherwise text is returned
    265 // unmodified.
    266 string16 StripWWW(const string16& text);
    267 
    268 // Gets the filename from the raw Content-Disposition header (as read from the
    269 // network).  Otherwise uses the last path component name or hostname from
    270 // |url|. If there is no filename or it can't be used, the given |default_name|,
    271 // will be used unless it is empty.
    272 
    273 // Note: it's possible for the suggested filename to be empty (e.g.,
    274 // file:///). referrer_charset is used as one of charsets
    275 // to interpret a raw 8bit string in C-D header (after interpreting
    276 // as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
    277 string16 GetSuggestedFilename(const GURL& url,
    278                               const std::string& content_disposition,
    279                               const std::string& referrer_charset,
    280                               const string16& default_name);
    281 
    282 // Checks the given port against a list of ports which are restricted by
    283 // default.  Returns true if the port is allowed, false if it is restricted.
    284 bool IsPortAllowedByDefault(int port);
    285 
    286 // Checks the given port against a list of ports which are restricted by the
    287 // FTP protocol.  Returns true if the port is allowed, false if it is
    288 // restricted.
    289 bool IsPortAllowedByFtp(int port);
    290 
    291 // Check if banned |port| has been overriden by an entry in
    292 // |explicitly_allowed_ports_|.
    293 bool IsPortAllowedByOverride(int port);
    294 
    295 // Set socket to non-blocking mode
    296 int SetNonBlocking(int fd);
    297 
    298 // Appends the given part of the original URL to the output string formatted for
    299 // the user. The given parsed structure will be updated. The host name formatter
    300 // also takes the same accept languages component as ElideURL. |new_parsed| may
    301 // be null.
    302 //
    303 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
    304 // |url|'s spec(); each offset will be adjusted to point at the same logical
    305 // place in the result strings after reformatting of the host.  If this isn't
    306 // possible because an offset points past the end of the host or into the middle
    307 // of a multi-character sequence, the offending offset will be set to
    308 // std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
    309 void AppendFormattedHost(const GURL& url,
    310                          const std::wstring& languages,
    311                          std::wstring* output,
    312                          url_parse::Parsed* new_parsed,
    313                          size_t* offset_for_adjustment);
    314 void AppendFormattedHostWithOffsets(
    315     const GURL& url,
    316     const std::wstring& languages,
    317     std::wstring* output,
    318     url_parse::Parsed* new_parsed,
    319     std::vector<size_t>* offsets_for_adjustment);
    320 
    321 // Creates a string representation of |url|. The IDN host name may be in Unicode
    322 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
    323 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
    324 // the URL for human readability. You will generally want |UnescapeRule::SPACES|
    325 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
    326 // if not. If the path part and the query part seem to be encoded in %-encoded
    327 // UTF-8, decodes %-encoding and UTF-8.
    328 //
    329 // The last three parameters may be NULL.
    330 // |new_parsed| will be set to the parsing parameters of the resultant URL.
    331 // |prefix_end| will be the length before the hostname of the resultant URL.
    332 //
    333 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
    334 // |url|'s spec(); each offset will be modified to reflect changes this function
    335 // makes to the output string. For example, if |url| is "http://a:b@c.com/",
    336 // |omit_username_password| is true, and an offset is 12 (the offset of '.'),
    337 // then on return the output string will be "http://c.com/" and the offset will
    338 // be 8.  If an offset cannot be successfully adjusted (e.g. because it points
    339 // into the middle of a component that was entirely removed, past the end of the
    340 // string, or into the middle of an encoding sequence), it will be set to
    341 // string16::npos.
    342 string16 FormatUrl(const GURL& url,
    343                    const std::string& languages,
    344                    FormatUrlTypes format_types,
    345                    UnescapeRule::Type unescape_rules,
    346                    url_parse::Parsed* new_parsed,
    347                    size_t* prefix_end,
    348                    size_t* offset_for_adjustment);
    349 string16 FormatUrlWithOffsets(const GURL& url,
    350                               const std::string& languages,
    351                               FormatUrlTypes format_types,
    352                               UnescapeRule::Type unescape_rules,
    353                               url_parse::Parsed* new_parsed,
    354                               size_t* prefix_end,
    355                               std::vector<size_t>* offsets_for_adjustment);
    356 
    357 // This is a convenience function for FormatUrl() with
    358 // format_types = kFormatUrlOmitAll and unescape = SPACES.  This is the typical
    359 // set of flags for "URLs to display to the user".  You should be cautious about
    360 // using this for URLs which will be parsed or sent to other applications.
    361 inline string16 FormatUrl(const GURL& url, const std::string& languages) {
    362   return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
    363                    NULL, NULL, NULL);
    364 }
    365 
    366 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a
    367 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
    368 bool CanStripTrailingSlash(const GURL& url);
    369 
    370 // Strip the portions of |url| that aren't core to the network request.
    371 //   - user name / password
    372 //   - reference section
    373 GURL SimplifyUrlForRequest(const GURL& url);
    374 
    375 void SetExplicitlyAllowedPorts(const std::string& allowed_ports);
    376 
    377 class ScopedPortException {
    378  public:
    379   ScopedPortException(int port);
    380   ~ScopedPortException();
    381 
    382  private:
    383   int port_;
    384 
    385   DISALLOW_COPY_AND_ASSIGN(ScopedPortException);
    386 };
    387 
    388 // Perform a simplistic test to see if IPv6 is supported by trying to create an
    389 // IPv6 socket.
    390 // TODO(jar): Make test more in-depth as needed.
    391 bool IPv6Supported();
    392 
    393 // Returns true if it can determine that only loopback addresses are configured.
    394 // i.e. if only 127.0.0.1 and ::1 are routable.
    395 bool HaveOnlyLoopbackAddresses();
    396 
    397 // IPAddressNumber is used to represent an IP address's numeric value as an
    398 // array of bytes, from most significant to least significant. This is the
    399 // network byte ordering.
    400 //
    401 // IPv4 addresses will have length 4, whereas IPv6 address will have length 16.
    402 typedef std::vector<unsigned char> IPAddressNumber;
    403 
    404 static const size_t kIPv4AddressSize = 4;
    405 static const size_t kIPv6AddressSize = 16;
    406 
    407 // Parses an IP address literal (either IPv4 or IPv6) to its numeric value.
    408 // Returns true on success and fills |ip_number| with the numeric value.
    409 bool ParseIPLiteralToNumber(const std::string& ip_literal,
    410                             IPAddressNumber* ip_number);
    411 
    412 // Converts an IPv4 address to an IPv4-mapped IPv6 address.
    413 // For example 192.168.0.1 would be converted to ::ffff:192.168.0.1.
    414 IPAddressNumber ConvertIPv4NumberToIPv6Number(
    415     const IPAddressNumber& ipv4_number);
    416 
    417 // Parses an IP block specifier from CIDR notation to an
    418 // (IP address, prefix length) pair. Returns true on success and fills
    419 // |*ip_number| with the numeric value of the IP address and sets
    420 // |*prefix_length_in_bits| with the length of the prefix.
    421 //
    422 // CIDR notation literals can use either IPv4 or IPv6 literals. Some examples:
    423 //
    424 //    10.10.3.1/20
    425 //    a:b:c::/46
    426 //    ::1/128
    427 bool ParseCIDRBlock(const std::string& cidr_literal,
    428                     IPAddressNumber* ip_number,
    429                     size_t* prefix_length_in_bits);
    430 
    431 // Compares an IP address to see if it falls within the specified IP block.
    432 // Returns true if it does, false otherwise.
    433 //
    434 // The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any
    435 // IP address whose |prefix_length_in_bits| most significant bits match
    436 // |ip_prefix| will be matched.
    437 //
    438 // In cases when an IPv4 address is being compared to an IPv6 address prefix
    439 // and vice versa, the IPv4 addresses will be converted to IPv4-mapped
    440 // (IPv6) addresses.
    441 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
    442                            const IPAddressNumber& ip_prefix,
    443                            size_t prefix_length_in_bits);
    444 
    445 // Makes a copy of |info|. The dynamically-allocated parts are copied as well.
    446 // If |recursive| is true, chained entries via ai_next are copied too.
    447 // The copy returned by this function should be freed using
    448 // FreeCopyOfAddrinfo(), and NOT freeaddrinfo().
    449 struct addrinfo* CreateCopyOfAddrinfo(const struct addrinfo* info,
    450                                       bool recursive);
    451 
    452 // Frees an addrinfo that was created by CreateCopyOfAddrinfo().
    453 void FreeCopyOfAddrinfo(struct addrinfo* info);
    454 
    455 // Returns the port field of the sockaddr in |info|.
    456 const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info);
    457 uint16* GetPortFieldFromAddrinfo(struct addrinfo* info);
    458 
    459 // Returns the value of |info's| port (in host byte ordering).
    460 int GetPortFromAddrinfo(const struct addrinfo* info);
    461 
    462 // Same except for struct sockaddr.
    463 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
    464                                        socklen_t address_len);
    465 int GetPortFromSockaddr(const struct sockaddr* address,
    466                         socklen_t address_len);
    467 
    468 // Returns true if |host| is one of the names (e.g. "localhost") or IP
    469 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
    470 //
    471 // Note that this function does not check for IP addresses other than
    472 // the above, although other IP addresses may point to the local
    473 // machine.
    474 bool IsLocalhost(const std::string& host);
    475 
    476 // struct that is used by GetNetworkList() to represent a network
    477 // interface.
    478 struct NetworkInterface {
    479   NetworkInterface();
    480   NetworkInterface(const std::string& name, const IPAddressNumber& address);
    481   ~NetworkInterface();
    482 
    483   std::string name;
    484   IPAddressNumber address;
    485 };
    486 
    487 typedef std::list<NetworkInterface> NetworkInterfaceList;
    488 
    489 // Returns list of network interfaces except loopback interface. If an
    490 // interface has more than one address, a separate entry is added to
    491 // the list for each address.
    492 // Can be called only on a thread that allows IO.
    493 bool GetNetworkList(NetworkInterfaceList* networks);
    494 
    495 // Private adjustment function called by std::transform which sets the offset
    496 // to npos if the offset occurs at or before |component_start|, otherwise don't
    497 // alter the offset. Exposed here for unit testing.
    498 struct ClampComponentOffset {
    499   explicit ClampComponentOffset(size_t component_start);
    500   size_t operator()(size_t offset);
    501 
    502   const size_t component_start;
    503 };
    504 
    505 }  // namespace net
    506 
    507 #endif  // NET_BASE_NET_UTIL_H_
    508