1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Functions for canonicalizing "file:" URLs. 6 7 #include "url/url_canon.h" 8 #include "url/url_canon_internal.h" 9 #include "url/url_file.h" 10 #include "url/url_parse_internal.h" 11 12 namespace url { 13 14 namespace { 15 16 #ifdef WIN32 17 18 // Given a pointer into the spec, this copies and canonicalizes the drive 19 // letter and colon to the output, if one is found. If there is not a drive 20 // spec, it won't do anything. The index of the next character in the input 21 // spec is returned (after the colon when a drive spec is found, the begin 22 // offset if one is not). 23 template<typename CHAR> 24 int FileDoDriveSpec(const CHAR* spec, int begin, int end, 25 CanonOutput* output) { 26 // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, 27 // (with backslashes instead of slashes as well). 28 int num_slashes = CountConsecutiveSlashes(spec, begin, end); 29 int after_slashes = begin + num_slashes; 30 31 if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end)) 32 return begin; // Haven't consumed any characters 33 34 // A drive spec is the start of a path, so we need to add a slash for the 35 // authority terminator (typically the third slash). 36 output->push_back('/'); 37 38 // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid 39 // and that it is followed by a colon/pipe. 40 41 // Normalize Windows drive letters to uppercase 42 if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') 43 output->push_back(spec[after_slashes] - 'a' + 'A'); 44 else 45 output->push_back(static_cast<char>(spec[after_slashes])); 46 47 // Normalize the character following it to a colon rather than pipe. 48 output->push_back(':'); 49 return after_slashes + 2; 50 } 51 52 #endif // WIN32 53 54 template<typename CHAR, typename UCHAR> 55 bool DoFileCanonicalizePath(const CHAR* spec, 56 const Component& path, 57 CanonOutput* output, 58 Component* out_path) { 59 // Copies and normalizes the "c:" at the beginning, if present. 60 out_path->begin = output->length(); 61 int after_drive; 62 #ifdef WIN32 63 after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output); 64 #else 65 after_drive = path.begin; 66 #endif 67 68 // Copies the rest of the path, starting from the slash following the 69 // drive colon (if any, Windows only), or the first slash of the path. 70 bool success = true; 71 if (after_drive < path.end()) { 72 // Use the regular path canonicalizer to canonicalize the rest of the 73 // path. Give it a fake output component to write into. DoCanonicalizeFile 74 // will compute the full path component. 75 Component sub_path = MakeRange(after_drive, path.end()); 76 Component fake_output_path; 77 success = CanonicalizePath(spec, sub_path, output, &fake_output_path); 78 } else { 79 // No input path, canonicalize to a slash. 80 output->push_back('/'); 81 } 82 83 out_path->len = output->length() - out_path->begin; 84 return success; 85 } 86 87 template<typename CHAR, typename UCHAR> 88 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source, 89 const Parsed& parsed, 90 CharsetConverter* query_converter, 91 CanonOutput* output, 92 Parsed* new_parsed) { 93 // Things we don't set in file: URLs. 94 new_parsed->username = Component(); 95 new_parsed->password = Component(); 96 new_parsed->port = Component(); 97 98 // Scheme (known, so we don't bother running it through the more 99 // complicated scheme canonicalizer). 100 new_parsed->scheme.begin = output->length(); 101 output->Append("file://", 7); 102 new_parsed->scheme.len = 4; 103 104 // Append the host. For many file URLs, this will be empty. For UNC, this 105 // will be present. 106 // TODO(brettw) This doesn't do any checking for host name validity. We 107 // should probably handle validity checking of UNC hosts differently than 108 // for regular IP hosts. 109 bool success = CanonicalizeHost(source.host, parsed.host, 110 output, &new_parsed->host); 111 success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path, 112 output, &new_parsed->path); 113 CanonicalizeQuery(source.query, parsed.query, query_converter, 114 output, &new_parsed->query); 115 116 // Ignore failure for refs since the URL can probably still be loaded. 117 CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 118 119 return success; 120 } 121 122 } // namespace 123 124 bool CanonicalizeFileURL(const char* spec, 125 int spec_len, 126 const Parsed& parsed, 127 CharsetConverter* query_converter, 128 CanonOutput* output, 129 Parsed* new_parsed) { 130 return DoCanonicalizeFileURL<char, unsigned char>( 131 URLComponentSource<char>(spec), parsed, query_converter, 132 output, new_parsed); 133 } 134 135 bool CanonicalizeFileURL(const base::char16* spec, 136 int spec_len, 137 const Parsed& parsed, 138 CharsetConverter* query_converter, 139 CanonOutput* output, 140 Parsed* new_parsed) { 141 return DoCanonicalizeFileURL<base::char16, base::char16>( 142 URLComponentSource<base::char16>(spec), parsed, query_converter, 143 output, new_parsed); 144 } 145 146 bool FileCanonicalizePath(const char* spec, 147 const Component& path, 148 CanonOutput* output, 149 Component* out_path) { 150 return DoFileCanonicalizePath<char, unsigned char>(spec, path, 151 output, out_path); 152 } 153 154 bool FileCanonicalizePath(const base::char16* spec, 155 const Component& path, 156 CanonOutput* output, 157 Component* out_path) { 158 return DoFileCanonicalizePath<base::char16, base::char16>(spec, path, 159 output, out_path); 160 } 161 162 bool ReplaceFileURL(const char* base, 163 const Parsed& base_parsed, 164 const Replacements<char>& replacements, 165 CharsetConverter* query_converter, 166 CanonOutput* output, 167 Parsed* new_parsed) { 168 URLComponentSource<char> source(base); 169 Parsed parsed(base_parsed); 170 SetupOverrideComponents(base, replacements, &source, &parsed); 171 return DoCanonicalizeFileURL<char, unsigned char>( 172 source, parsed, query_converter, output, new_parsed); 173 } 174 175 bool ReplaceFileURL(const char* base, 176 const Parsed& base_parsed, 177 const Replacements<base::char16>& replacements, 178 CharsetConverter* query_converter, 179 CanonOutput* output, 180 Parsed* new_parsed) { 181 RawCanonOutput<1024> utf8; 182 URLComponentSource<char> source(base); 183 Parsed parsed(base_parsed); 184 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 185 return DoCanonicalizeFileURL<char, unsigned char>( 186 source, parsed, query_converter, output, new_parsed); 187 } 188 189 } // namespace url 190