Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef NET_BASE_NET_UTIL_H_
      6 #define NET_BASE_NET_UTIL_H_
      7 
      8 #include "build/build_config.h"
      9 
     10 #ifdef OS_WIN
     11 #include <windows.h>
     12 #endif
     13 
     14 #include <string>
     15 #include <set>
     16 
     17 #include "base/basictypes.h"
     18 #include "base/string16.h"
     19 #include "net/base/escape.h"
     20 
     21 struct addrinfo;
     22 class FilePath;
     23 class GURL;
     24 
     25 namespace base {
     26 class Time;
     27 }
     28 
     29 namespace url_canon {
     30 struct CanonHostInfo;
     31 }
     32 
     33 namespace url_parse {
     34 struct Parsed;
     35 }
     36 
     37 namespace net {
     38 
     39 // Holds a list of ports that should be accepted despite bans.
     40 extern std::set<int> explicitly_allowed_ports;
     41 
     42 // Given the full path to a file name, creates a file: URL. The returned URL
     43 // may not be valid if the input is malformed.
     44 GURL FilePathToFileURL(const FilePath& path);
     45 
     46 // Converts a file: URL back to a filename that can be passed to the OS. The
     47 // file URL must be well-formed (GURL::is_valid() must return true); we don't
     48 // handle degenerate cases here. Returns true on success, false if it isn't a
     49 // valid file URL. On failure, *file_path will be empty.
     50 bool FileURLToFilePath(const GURL& url, FilePath* file_path);
     51 
     52 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
     53 // Saves the result into |*host| and |*port|. If the input did not have
     54 // the optional port, sets |*port| to -1.
     55 // Returns true if the parsing was successful, false otherwise.
     56 // The returned host is NOT canonicalized, and may be invalid. If <host> is
     57 // an IPv6 literal address, the returned host includes the square brackets.
     58 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
     59                       std::string::const_iterator host_and_port_end,
     60                       std::string* host,
     61                       int* port);
     62 bool ParseHostAndPort(const std::string& host_and_port,
     63                       std::string* host,
     64                       int* port);
     65 
     66 // Returns a host:port string for the given URL.
     67 std::string GetHostAndPort(const GURL& url);
     68 
     69 // Returns a host[:port] string for the given URL, where the port is omitted
     70 // if it is the default for the URL's scheme.
     71 std::string GetHostAndOptionalPort(const GURL& url);
     72 
     73 // Returns the string representation of an address, like "192.168.0.1".
     74 // Returns empty string on failure.
     75 std::string NetAddressToString(const struct addrinfo* net_address);
     76 
     77 // Returns the hostname of the current system. Returns empty string on failure.
     78 std::string GetHostName();
     79 
     80 // Extracts the unescaped username/password from |url|, saving the results
     81 // into |*username| and |*password|.
     82 void GetIdentityFromURL(const GURL& url,
     83                         std::wstring* username,
     84                         std::wstring* password);
     85 
     86 // Return the value of the HTTP response header with name 'name'.  'headers'
     87 // should be in the format that URLRequest::GetResponseHeaders() returns.
     88 // Returns the empty string if the header is not found.
     89 std::wstring GetSpecificHeader(const std::wstring& headers,
     90                                const std::wstring& name);
     91 std::string GetSpecificHeader(const std::string& headers,
     92                               const std::string& name);
     93 
     94 // Return the value of the HTTP response header field's parameter named
     95 // 'param_name'.  Returns the empty string if the parameter is not found or is
     96 // improperly formatted.
     97 std::wstring GetHeaderParamValue(const std::wstring& field,
     98                                  const std::wstring& param_name);
     99 std::string GetHeaderParamValue(const std::string& field,
    100                                 const std::string& param_name);
    101 
    102 // Return the filename extracted from Content-Disposition header. The following
    103 // formats are tried in order listed below:
    104 //
    105 // 1. RFC 2047
    106 // 2. Raw-8bit-characters :
    107 //    a. UTF-8, b. referrer_charset, c. default os codepage.
    108 // 3. %-escaped UTF-8.
    109 //
    110 // In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped.
    111 // In step 3, the fallback charsets tried in step 2 are not tried. We
    112 // can consider doing that later.
    113 //
    114 // When a param value is ASCII, but is not in format #1 or format #3 above,
    115 // it is returned as it is unless it's pretty close to two supported
    116 // formats but not well-formed. In that case, an empty string is returned.
    117 //
    118 // In any case, a caller must check for the empty return value and resort to
    119 // another means to get a filename (e.g. url).
    120 //
    121 // This function does not do any escaping and callers are responsible for
    122 // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
    123 //
    124 // TODO(jungshik): revisit this issue. At the moment, the only caller
    125 // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters.  The
    126 // other caller is a unit test. Need to figure out expose this function only to
    127 // net_util_unittest.
    128 //
    129 std::string GetFileNameFromCD(const std::string& header,
    130                               const std::string& referrer_charset);
    131 
    132 // Converts the given host name to unicode characters. This can be called for
    133 // any host name, if the input is not IDN or is invalid in some way, we'll just
    134 // return the ASCII source so it is still usable.
    135 //
    136 // The input should be the canonicalized ASCII host name from GURL. This
    137 // function does NOT accept UTF-8! Its length must also be given (this is
    138 // designed to work on the substring of the host out of a URL spec).
    139 //
    140 // |languages| is a comma separated list of ISO 639 language codes. It
    141 // is used to determine whether a hostname is 'comprehensible' to a user
    142 // who understands languages listed. |host| will be converted to a
    143 // human-readable form (Unicode) ONLY when each component of |host| is
    144 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
    145 // Latin letters in the ASCII range can be mixed with a limited set of
    146 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
    147 // When |languages| is empty, even that mixing is not allowed.
    148 //
    149 // |offset_for_adjustment| is an offset into |host|, which will be adjusted to
    150 // point at the same logical place in the output string. If this isn't possible
    151 // because it points past the end of |host| or into the middle of a punycode
    152 // sequence, it will be set to std::wstring::npos.  |offset_for_adjustment| may
    153 // be NULL.
    154 std::wstring IDNToUnicode(const char* host,
    155                           size_t host_len,
    156                           const std::wstring& languages,
    157                           size_t* offset_for_adjustment);
    158 
    159 // Canonicalizes |host| and returns it.  Also fills |host_info| with
    160 // IP address information.  |host_info| must not be NULL.
    161 std::string CanonicalizeHost(const std::string& host,
    162                              url_canon::CanonHostInfo* host_info);
    163 std::string CanonicalizeHost(const std::wstring& host,
    164                              url_canon::CanonHostInfo* host_info);
    165 
    166 // Returns true if |host| is not an IP address and is compliant with a set of
    167 // rules based on RFC 1738 and tweaked to be compatible with the real world.
    168 // The rules are:
    169 //   * One or more components separated by '.'
    170 //   * Each component begins and ends with an alphanumeric character
    171 //   * Each component contains only alphanumeric characters and '-' or '_'
    172 //   * The last component does not begin with a digit
    173 //   * Optional trailing dot after last component (means "treat as FQDN")
    174 //
    175 // NOTE: You should only pass in hosts that have been returned from
    176 // CanonicalizeHost(), or you may not get accurate results.
    177 bool IsCanonicalizedHostCompliant(const std::string& host);
    178 
    179 // Call these functions to get the html snippet for a directory listing.
    180 // The return values of both functions are in UTF-8.
    181 std::string GetDirectoryListingHeader(const string16& title);
    182 
    183 // Given the name of a file in a directory (ftp or local) and
    184 // other information (is_dir, size, modification time), it returns
    185 // the html snippet to add the entry for the file to the directory listing.
    186 // Currently, it's a script tag containing a call to a Javascript function
    187 // |addRow|.
    188 //
    189 // Its 1st parameter is derived from |name| and is the Javascript-string
    190 // escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped
    191 // |raw_bytes| if it's not empty. If empty, the 2nd parameter is the
    192 // url-escaped |name| in UTF-8.
    193 std::string GetDirectoryListingEntry(const string16& name,
    194                                      const std::string& raw_bytes,
    195                                      bool is_dir, int64 size,
    196                                      base::Time modified);
    197 
    198 // If text starts with "www." it is removed, otherwise text is returned
    199 // unmodified.
    200 std::wstring StripWWW(const std::wstring& text);
    201 
    202 // Gets the filename from the raw Content-Disposition header (as read from the
    203 // network).  Otherwise uses the last path component name or hostname from
    204 // |url|. If there is no filename or it can't be used, the given |default_name|,
    205 // will be used unless it is empty.
    206 
    207 // Note: it's possible for the suggested filename to be empty (e.g.,
    208 // file:///). referrer_charset is used as one of charsets
    209 // to interpret a raw 8bit string in C-D header (after interpreting
    210 // as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
    211 FilePath GetSuggestedFilename(const GURL& url,
    212                               const std::string& content_disposition,
    213                               const std::string& referrer_charset,
    214                               const FilePath& default_name);
    215 
    216 // Checks the given port against a list of ports which are restricted by
    217 // default.  Returns true if the port is allowed, false if it is restricted.
    218 bool IsPortAllowedByDefault(int port);
    219 
    220 // Checks the given port against a list of ports which are restricted by the
    221 // FTP protocol.  Returns true if the port is allowed, false if it is
    222 // restricted.
    223 bool IsPortAllowedByFtp(int port);
    224 
    225 // Check if banned |port| has been overriden by an entry in
    226 // |explicitly_allowed_ports_|.
    227 bool IsPortAllowedByOverride(int port);
    228 
    229 // Set socket to non-blocking mode
    230 int SetNonBlocking(int fd);
    231 
    232 // Appends the given part of the original URL to the output string formatted for
    233 // the user. The given parsed structure will be updated. The host name formatter
    234 // also takes the same accept languages component as ElideURL. |new_parsed| may
    235 // be null.
    236 void AppendFormattedHost(const GURL& url,
    237                          const std::wstring& languages,
    238                          std::wstring* output,
    239                          url_parse::Parsed* new_parsed,
    240                          size_t* offset_for_adjustment);
    241 
    242 // Creates a string representation of |url|. The IDN host name may be in Unicode
    243 // if |languages| accepts the Unicode representation. If
    244 // |omit_username_password| is true, any username and password are removed.
    245 // |unescape_rules| defines how to clean the URL for human readability.
    246 // You will generally want |UnescapeRule::SPACES| for display to the user if you
    247 // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
    248 // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
    249 // UTF-8.
    250 //
    251 // The last three parameters may be NULL.
    252 // |new_parsed| will be set to the parsing parameters of the resultant URL.
    253 // |prefix_end| will be the length before the hostname of the resultant URL.
    254 // |offset_for_adjustment| is an offset into the original |url|'s spec(), which
    255 // will be modified to reflect changes this function makes to the output string;
    256 // for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
    257 // true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
    258 // the output string will be "http://c.com/" and |offset_for_adjustment| will be
    259 // 8.  If the offset cannot be successfully adjusted (e.g. because it points
    260 // into the middle of a component that was entirely removed, past the end of the
    261 // string, or into the middle of an encoding sequence), it will be set to
    262 // std::wstring::npos.
    263 std::wstring FormatUrl(const GURL& url,
    264                        const std::wstring& languages,
    265                        bool omit_username_password,
    266                        UnescapeRule::Type unescape_rules,
    267                        url_parse::Parsed* new_parsed,
    268                        size_t* prefix_end,
    269                        size_t* offset_for_adjustment);
    270 
    271 // Creates a string representation of |url| for display to the user.
    272 // This is a shorthand of the above function with omit_username_password=true,
    273 // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
    274 inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
    275   return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL,
    276                    NULL);
    277 }
    278 
    279 // Strip the portions of |url| that aren't core to the network request.
    280 //   - user name / password
    281 //   - reference section
    282 GURL SimplifyUrlForRequest(const GURL& url);
    283 
    284 void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports);
    285 
    286 }  // namespace net
    287 
    288 #endif  // NET_BASE_NET_UTIL_H_
    289