1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/logging.h" 6 #include "url/url_file.h" 7 #include "url/url_parse.h" 8 #include "url/url_parse_internal.h" 9 10 // Interesting IE file:isms... 11 // 12 // INPUT OUTPUT 13 // ========================= ============================== 14 // file:/foo/bar file:///foo/bar 15 // The result here seems totally invalid!?!? This isn't UNC. 16 // 17 // file:/ 18 // file:// or any other number of slashes 19 // IE6 doesn't do anything at all if you click on this link. No error: 20 // nothing. IE6's history system seems to always color this link, so I'm 21 // guessing that it maps internally to the empty URL. 22 // 23 // C:\ file:///C:/ 24 // When on a file: URL source page, this link will work. When over HTTP, 25 // the file: URL will appear in the status bar but the link will not work 26 // (security restriction for all file URLs). 27 // 28 // file:foo/ file:foo/ (invalid?!?!?) 29 // file:/foo/ file:///foo/ (invalid?!?!?) 30 // file://foo/ file://foo/ (UNC to server "foo") 31 // file:///foo/ file:///foo/ (invalid, seems to be a file) 32 // file:////foo/ file://foo/ (UNC to server "foo") 33 // Any more than four slashes is also treated as UNC. 34 // 35 // file:C:/ file://C:/ 36 // file:/C:/ file://C:/ 37 // The number of slashes after "file:" don't matter if the thing following 38 // it looks like an absolute drive path. Also, slashes and backslashes are 39 // equally valid here. 40 41 namespace url { 42 43 namespace { 44 45 // A subcomponent of DoInitFileURL, the input of this function should be a UNC 46 // path name, with the index of the first character after the slashes following 47 // the scheme given in |after_slashes|. This will initialize the host, path, 48 // query, and ref, and leave the other output components untouched 49 // (DoInitFileURL handles these for us). 50 template<typename CHAR> 51 void DoParseUNC(const CHAR* spec, 52 int after_slashes, 53 int spec_len, 54 Parsed* parsed) { 55 int next_slash = FindNextSlash(spec, after_slashes, spec_len); 56 if (next_slash == spec_len) { 57 // No additional slash found, as in "file://foo", treat the text as the 58 // host with no path (this will end up being UNC to server "foo"). 59 int host_len = spec_len - after_slashes; 60 if (host_len) 61 parsed->host = Component(after_slashes, host_len); 62 else 63 parsed->host.reset(); 64 parsed->path.reset(); 65 return; 66 } 67 68 #ifdef WIN32 69 // See if we have something that looks like a path following the first 70 // component. As in "file://localhost/c:/", we get "c:/" out. We want to 71 // treat this as a having no host but the path given. Works on Windows only. 72 if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { 73 parsed->host.reset(); 74 ParsePathInternal(spec, MakeRange(next_slash, spec_len), 75 &parsed->path, &parsed->query, &parsed->ref); 76 return; 77 } 78 #endif 79 80 // Otherwise, everything up until that first slash we found is the host name, 81 // which will end up being the UNC host. For example "file://foo/bar.txt" 82 // will get a server name of "foo" and a path of "/bar". Later, on Windows, 83 // this should be treated as the filename "\\foo\bar.txt" in proper UNC 84 // notation. 85 int host_len = next_slash - after_slashes; 86 if (host_len) 87 parsed->host = MakeRange(after_slashes, next_slash); 88 else 89 parsed->host.reset(); 90 if (next_slash < spec_len) { 91 ParsePathInternal(spec, MakeRange(next_slash, spec_len), 92 &parsed->path, &parsed->query, &parsed->ref); 93 } else { 94 parsed->path.reset(); 95 } 96 } 97 98 // A subcomponent of DoParseFileURL, the input should be a local file, with the 99 // beginning of the path indicated by the index in |path_begin|. This will 100 // initialize the host, path, query, and ref, and leave the other output 101 // components untouched (DoInitFileURL handles these for us). 102 template<typename CHAR> 103 void DoParseLocalFile(const CHAR* spec, 104 int path_begin, 105 int spec_len, 106 Parsed* parsed) { 107 parsed->host.reset(); 108 ParsePathInternal(spec, MakeRange(path_begin, spec_len), 109 &parsed->path, &parsed->query, &parsed->ref); 110 } 111 112 // Backend for the external functions that operates on either char type. 113 // Handles cases where there is a scheme, but also when handed the first 114 // character following the "file:" at the beginning of the spec. If so, 115 // this is usually a slash, but needn't be; we allow paths like "file:c:\foo". 116 template<typename CHAR> 117 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { 118 DCHECK(spec_len >= 0); 119 120 // Get the parts we never use for file URLs out of the way. 121 parsed->username.reset(); 122 parsed->password.reset(); 123 parsed->port.reset(); 124 125 // Many of the code paths don't set these, so it's convenient to just clear 126 // them. We'll write them in those cases we need them. 127 parsed->query.reset(); 128 parsed->ref.reset(); 129 130 // Strip leading & trailing spaces and control characters. 131 int begin = 0; 132 TrimURL(spec, &begin, &spec_len); 133 134 // Find the scheme, if any. 135 int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); 136 int after_scheme; 137 int after_slashes; 138 #ifdef WIN32 139 // See how many slashes there are. We want to handle cases like UNC but also 140 // "/c:/foo". This is when there is no scheme, so we can allow pages to do 141 // links like "c:/foo/bar" or "//foo/bar". This is also called by the 142 // relative URL resolver when it determines there is an absolute URL, which 143 // may give us input like "/c:/foo". 144 after_slashes = begin + num_slashes; 145 if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { 146 // Windows path, don't try to extract the scheme (for example, "c:\foo"). 147 parsed->scheme.reset(); 148 after_scheme = after_slashes; 149 } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { 150 // Windows UNC path: don't try to extract the scheme, but keep the slashes. 151 parsed->scheme.reset(); 152 after_scheme = begin; 153 } else 154 #endif 155 { 156 // ExtractScheme doesn't understand the possibility of filenames with 157 // colons in them, in which case it returns the entire spec up to the 158 // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as 159 // the foo.c: scheme. 160 if (!num_slashes && 161 ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 162 // Offset the results since we gave ExtractScheme a substring. 163 parsed->scheme.begin += begin; 164 after_scheme = parsed->scheme.end() + 1; 165 } else { 166 // No scheme found, remember that. 167 parsed->scheme.reset(); 168 after_scheme = begin; 169 } 170 } 171 172 // Handle empty specs ones that contain only whitespace or control chars, 173 // or that are just the scheme (for example "file:"). 174 if (after_scheme == spec_len) { 175 parsed->host.reset(); 176 parsed->path.reset(); 177 return; 178 } 179 180 num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); 181 after_slashes = after_scheme + num_slashes; 182 #ifdef WIN32 183 // Check whether the input is a drive again. We checked above for windows 184 // drive specs, but that's only at the very beginning to see if we have a 185 // scheme at all. This test will be duplicated in that case, but will 186 // additionally handle all cases with a real scheme such as "file:///C:/". 187 if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && 188 num_slashes != 3) { 189 // Anything not beginning with a drive spec ("c:\") on Windows is treated 190 // as UNC, with the exception of three slashes which always means a file. 191 // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. 192 DoParseUNC(spec, after_slashes, spec_len, parsed); 193 return; 194 } 195 #else 196 // file: URL with exactly 2 slashes is considered to have a host component. 197 if (num_slashes == 2) { 198 DoParseUNC(spec, after_slashes, spec_len, parsed); 199 return; 200 } 201 #endif // WIN32 202 203 // Easy and common case, the full path immediately follows the scheme 204 // (modulo slashes), as in "file://c:/foo". Just treat everything from 205 // there to the end as the path. Empty hosts have 0 length instead of -1. 206 // We include the last slash as part of the path if there is one. 207 DoParseLocalFile(spec, 208 num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, 209 spec_len, parsed); 210 } 211 212 } // namespace 213 214 void ParseFileURL(const char* url, int url_len, Parsed* parsed) { 215 DoParseFileURL(url, url_len, parsed); 216 } 217 218 void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) { 219 DoParseFileURL(url, url_len, parsed); 220 } 221 222 } // namespace url 223