1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_NET_UTIL_H_ 6 #define NET_BASE_NET_UTIL_H_ 7 8 #include "build/build_config.h" 9 10 #ifdef OS_WIN 11 #include <windows.h> 12 #endif 13 14 #include <string> 15 #include <set> 16 17 #include "base/basictypes.h" 18 #include "base/string16.h" 19 #include "net/base/escape.h" 20 21 struct addrinfo; 22 class FilePath; 23 class GURL; 24 25 namespace base { 26 class Time; 27 } 28 29 namespace url_canon { 30 struct CanonHostInfo; 31 } 32 33 namespace url_parse { 34 struct Parsed; 35 } 36 37 namespace net { 38 39 // Holds a list of ports that should be accepted despite bans. 40 extern std::set<int> explicitly_allowed_ports; 41 42 // Given the full path to a file name, creates a file: URL. The returned URL 43 // may not be valid if the input is malformed. 44 GURL FilePathToFileURL(const FilePath& path); 45 46 // Converts a file: URL back to a filename that can be passed to the OS. The 47 // file URL must be well-formed (GURL::is_valid() must return true); we don't 48 // handle degenerate cases here. Returns true on success, false if it isn't a 49 // valid file URL. On failure, *file_path will be empty. 50 bool FileURLToFilePath(const GURL& url, FilePath* file_path); 51 52 // Splits an input of the form <host>[":"<port>] into its consitituent parts. 53 // Saves the result into |*host| and |*port|. If the input did not have 54 // the optional port, sets |*port| to -1. 55 // Returns true if the parsing was successful, false otherwise. 56 // The returned host is NOT canonicalized, and may be invalid. If <host> is 57 // an IPv6 literal address, the returned host includes the square brackets. 58 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 59 std::string::const_iterator host_and_port_end, 60 std::string* host, 61 int* port); 62 bool ParseHostAndPort(const std::string& host_and_port, 63 std::string* host, 64 int* port); 65 66 // Returns a host:port string for the given URL. 67 std::string GetHostAndPort(const GURL& url); 68 69 // Returns a host[:port] string for the given URL, where the port is omitted 70 // if it is the default for the URL's scheme. 71 std::string GetHostAndOptionalPort(const GURL& url); 72 73 // Returns the string representation of an address, like "192.168.0.1". 74 // Returns empty string on failure. 75 std::string NetAddressToString(const struct addrinfo* net_address); 76 77 // Returns the hostname of the current system. Returns empty string on failure. 78 std::string GetHostName(); 79 80 // Extracts the unescaped username/password from |url|, saving the results 81 // into |*username| and |*password|. 82 void GetIdentityFromURL(const GURL& url, 83 std::wstring* username, 84 std::wstring* password); 85 86 // Return the value of the HTTP response header with name 'name'. 'headers' 87 // should be in the format that URLRequest::GetResponseHeaders() returns. 88 // Returns the empty string if the header is not found. 89 std::wstring GetSpecificHeader(const std::wstring& headers, 90 const std::wstring& name); 91 std::string GetSpecificHeader(const std::string& headers, 92 const std::string& name); 93 94 // Return the value of the HTTP response header field's parameter named 95 // 'param_name'. Returns the empty string if the parameter is not found or is 96 // improperly formatted. 97 std::wstring GetHeaderParamValue(const std::wstring& field, 98 const std::wstring& param_name); 99 std::string GetHeaderParamValue(const std::string& field, 100 const std::string& param_name); 101 102 // Return the filename extracted from Content-Disposition header. The following 103 // formats are tried in order listed below: 104 // 105 // 1. RFC 2047 106 // 2. Raw-8bit-characters : 107 // a. UTF-8, b. referrer_charset, c. default os codepage. 108 // 3. %-escaped UTF-8. 109 // 110 // In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped. 111 // In step 3, the fallback charsets tried in step 2 are not tried. We 112 // can consider doing that later. 113 // 114 // When a param value is ASCII, but is not in format #1 or format #3 above, 115 // it is returned as it is unless it's pretty close to two supported 116 // formats but not well-formed. In that case, an empty string is returned. 117 // 118 // In any case, a caller must check for the empty return value and resort to 119 // another means to get a filename (e.g. url). 120 // 121 // This function does not do any escaping and callers are responsible for 122 // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. 123 // 124 // TODO(jungshik): revisit this issue. At the moment, the only caller 125 // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The 126 // other caller is a unit test. Need to figure out expose this function only to 127 // net_util_unittest. 128 // 129 std::string GetFileNameFromCD(const std::string& header, 130 const std::string& referrer_charset); 131 132 // Converts the given host name to unicode characters. This can be called for 133 // any host name, if the input is not IDN or is invalid in some way, we'll just 134 // return the ASCII source so it is still usable. 135 // 136 // The input should be the canonicalized ASCII host name from GURL. This 137 // function does NOT accept UTF-8! Its length must also be given (this is 138 // designed to work on the substring of the host out of a URL spec). 139 // 140 // |languages| is a comma separated list of ISO 639 language codes. It 141 // is used to determine whether a hostname is 'comprehensible' to a user 142 // who understands languages listed. |host| will be converted to a 143 // human-readable form (Unicode) ONLY when each component of |host| is 144 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that 145 // Latin letters in the ASCII range can be mixed with a limited set of 146 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). 147 // When |languages| is empty, even that mixing is not allowed. 148 // 149 // |offset_for_adjustment| is an offset into |host|, which will be adjusted to 150 // point at the same logical place in the output string. If this isn't possible 151 // because it points past the end of |host| or into the middle of a punycode 152 // sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may 153 // be NULL. 154 std::wstring IDNToUnicode(const char* host, 155 size_t host_len, 156 const std::wstring& languages, 157 size_t* offset_for_adjustment); 158 159 // Canonicalizes |host| and returns it. Also fills |host_info| with 160 // IP address information. |host_info| must not be NULL. 161 std::string CanonicalizeHost(const std::string& host, 162 url_canon::CanonHostInfo* host_info); 163 std::string CanonicalizeHost(const std::wstring& host, 164 url_canon::CanonHostInfo* host_info); 165 166 // Returns true if |host| is not an IP address and is compliant with a set of 167 // rules based on RFC 1738 and tweaked to be compatible with the real world. 168 // The rules are: 169 // * One or more components separated by '.' 170 // * Each component begins and ends with an alphanumeric character 171 // * Each component contains only alphanumeric characters and '-' or '_' 172 // * The last component does not begin with a digit 173 // * Optional trailing dot after last component (means "treat as FQDN") 174 // 175 // NOTE: You should only pass in hosts that have been returned from 176 // CanonicalizeHost(), or you may not get accurate results. 177 bool IsCanonicalizedHostCompliant(const std::string& host); 178 179 // Call these functions to get the html snippet for a directory listing. 180 // The return values of both functions are in UTF-8. 181 std::string GetDirectoryListingHeader(const string16& title); 182 183 // Given the name of a file in a directory (ftp or local) and 184 // other information (is_dir, size, modification time), it returns 185 // the html snippet to add the entry for the file to the directory listing. 186 // Currently, it's a script tag containing a call to a Javascript function 187 // |addRow|. 188 // 189 // Its 1st parameter is derived from |name| and is the Javascript-string 190 // escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped 191 // |raw_bytes| if it's not empty. If empty, the 2nd parameter is the 192 // url-escaped |name| in UTF-8. 193 std::string GetDirectoryListingEntry(const string16& name, 194 const std::string& raw_bytes, 195 bool is_dir, int64 size, 196 base::Time modified); 197 198 // If text starts with "www." it is removed, otherwise text is returned 199 // unmodified. 200 std::wstring StripWWW(const std::wstring& text); 201 202 // Gets the filename from the raw Content-Disposition header (as read from the 203 // network). Otherwise uses the last path component name or hostname from 204 // |url|. If there is no filename or it can't be used, the given |default_name|, 205 // will be used unless it is empty. 206 207 // Note: it's possible for the suggested filename to be empty (e.g., 208 // file:///). referrer_charset is used as one of charsets 209 // to interpret a raw 8bit string in C-D header (after interpreting 210 // as UTF-8 fails). See the comment for GetFilenameFromCD for more details. 211 FilePath GetSuggestedFilename(const GURL& url, 212 const std::string& content_disposition, 213 const std::string& referrer_charset, 214 const FilePath& default_name); 215 216 // Checks the given port against a list of ports which are restricted by 217 // default. Returns true if the port is allowed, false if it is restricted. 218 bool IsPortAllowedByDefault(int port); 219 220 // Checks the given port against a list of ports which are restricted by the 221 // FTP protocol. Returns true if the port is allowed, false if it is 222 // restricted. 223 bool IsPortAllowedByFtp(int port); 224 225 // Check if banned |port| has been overriden by an entry in 226 // |explicitly_allowed_ports_|. 227 bool IsPortAllowedByOverride(int port); 228 229 // Set socket to non-blocking mode 230 int SetNonBlocking(int fd); 231 232 // Appends the given part of the original URL to the output string formatted for 233 // the user. The given parsed structure will be updated. The host name formatter 234 // also takes the same accept languages component as ElideURL. |new_parsed| may 235 // be null. 236 void AppendFormattedHost(const GURL& url, 237 const std::wstring& languages, 238 std::wstring* output, 239 url_parse::Parsed* new_parsed, 240 size_t* offset_for_adjustment); 241 242 // Creates a string representation of |url|. The IDN host name may be in Unicode 243 // if |languages| accepts the Unicode representation. If 244 // |omit_username_password| is true, any username and password are removed. 245 // |unescape_rules| defines how to clean the URL for human readability. 246 // You will generally want |UnescapeRule::SPACES| for display to the user if you 247 // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the 248 // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and 249 // UTF-8. 250 // 251 // The last three parameters may be NULL. 252 // |new_parsed| will be set to the parsing parameters of the resultant URL. 253 // |prefix_end| will be the length before the hostname of the resultant URL. 254 // |offset_for_adjustment| is an offset into the original |url|'s spec(), which 255 // will be modified to reflect changes this function makes to the output string; 256 // for example, if |url| is "http://a:b@c.com/", |omit_username_password| is 257 // true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return 258 // the output string will be "http://c.com/" and |offset_for_adjustment| will be 259 // 8. If the offset cannot be successfully adjusted (e.g. because it points 260 // into the middle of a component that was entirely removed, past the end of the 261 // string, or into the middle of an encoding sequence), it will be set to 262 // std::wstring::npos. 263 std::wstring FormatUrl(const GURL& url, 264 const std::wstring& languages, 265 bool omit_username_password, 266 UnescapeRule::Type unescape_rules, 267 url_parse::Parsed* new_parsed, 268 size_t* prefix_end, 269 size_t* offset_for_adjustment); 270 271 // Creates a string representation of |url| for display to the user. 272 // This is a shorthand of the above function with omit_username_password=true, 273 // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. 274 inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { 275 return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL, 276 NULL); 277 } 278 279 // Strip the portions of |url| that aren't core to the network request. 280 // - user name / password 281 // - reference section 282 GURL SimplifyUrlForRequest(const GURL& url); 283 284 void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports); 285 286 } // namespace net 287 288 #endif // NET_BASE_NET_UTIL_H_ 289