1 // Copyright 2007, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // Functions for canonicalizing "file:" URLs. 31 32 #include "googleurl/src/url_canon.h" 33 #include "googleurl/src/url_canon_internal.h" 34 #include "googleurl/src/url_file.h" 35 #include "googleurl/src/url_parse_internal.h" 36 37 namespace url_canon { 38 39 namespace { 40 41 #ifdef WIN32 42 43 // Given a pointer into the spec, this copies and canonicalizes the drive 44 // letter and colon to the output, if one is found. If there is not a drive 45 // spec, it won't do anything. The index of the next character in the input 46 // spec is returned (after the colon when a drive spec is found, the begin 47 // offset if one is not). 48 template<typename CHAR> 49 int FileDoDriveSpec(const CHAR* spec, int begin, int end, 50 CanonOutput* output) { 51 // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, 52 // (with backslashes instead of slashes as well). 53 int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end); 54 int after_slashes = begin + num_slashes; 55 56 if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end)) 57 return begin; // Haven't consumed any characters 58 59 // A drive spec is the start of a path, so we need to add a slash for the 60 // authority terminator (typically the third slash). 61 output->push_back('/'); 62 63 // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid 64 // and that it is followed by a colon/pipe. 65 66 // Normalize Windows drive letters to uppercase 67 if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') 68 output->push_back(spec[after_slashes] - 'a' + 'A'); 69 else 70 output->push_back(static_cast<char>(spec[after_slashes])); 71 72 // Normalize the character following it to a colon rather than pipe. 73 output->push_back(':'); 74 return after_slashes + 2; 75 } 76 77 #endif // WIN32 78 79 template<typename CHAR, typename UCHAR> 80 bool DoFileCanonicalizePath(const CHAR* spec, 81 const url_parse::Component& path, 82 CanonOutput* output, 83 url_parse::Component* out_path) { 84 // Copies and normalizes the "c:" at the beginning, if present. 85 out_path->begin = output->length(); 86 int after_drive; 87 #ifdef WIN32 88 after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output); 89 #else 90 after_drive = path.begin; 91 #endif 92 93 // Copies the rest of the path, starting from the slash following the 94 // drive colon (if any, Windows only), or the first slash of the path. 95 bool success = true; 96 if (after_drive < path.end()) { 97 // Use the regular path canonicalizer to canonicalize the rest of the 98 // path. Give it a fake output component to write into. DoCanonicalizeFile 99 // will compute the full path component. 100 url_parse::Component sub_path = 101 url_parse::MakeRange(after_drive, path.end()); 102 url_parse::Component fake_output_path; 103 success = CanonicalizePath(spec, sub_path, output, &fake_output_path); 104 } else { 105 // No input path, canonicalize to a slash. 106 output->push_back('/'); 107 } 108 109 out_path->len = output->length() - out_path->begin; 110 return success; 111 } 112 113 template<typename CHAR, typename UCHAR> 114 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source, 115 const url_parse::Parsed& parsed, 116 CharsetConverter* query_converter, 117 CanonOutput* output, 118 url_parse::Parsed* new_parsed) { 119 // Things we don't set in file: URLs. 120 new_parsed->username = url_parse::Component(); 121 new_parsed->password = url_parse::Component(); 122 new_parsed->port = url_parse::Component(); 123 124 // Scheme (known, so we don't bother running it through the more 125 // complicated scheme canonicalizer). 126 new_parsed->scheme.begin = output->length(); 127 output->Append("file://", 7); 128 new_parsed->scheme.len = 4; 129 130 // Append the host. For many file URLs, this will be empty. For UNC, this 131 // will be present. 132 // TODO(brettw) This doesn't do any checking for host name validity. We 133 // should probably handle validity checking of UNC hosts differently than 134 // for regular IP hosts. 135 bool success = CanonicalizeHost(source.host, parsed.host, 136 output, &new_parsed->host); 137 success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path, 138 output, &new_parsed->path); 139 CanonicalizeQuery(source.query, parsed.query, query_converter, 140 output, &new_parsed->query); 141 142 // Ignore failure for refs since the URL can probably still be loaded. 143 CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 144 145 return success; 146 } 147 148 } // namespace 149 150 bool CanonicalizeFileURL(const char* spec, 151 int spec_len, 152 const url_parse::Parsed& parsed, 153 CharsetConverter* query_converter, 154 CanonOutput* output, 155 url_parse::Parsed* new_parsed) { 156 return DoCanonicalizeFileURL<char, unsigned char>( 157 URLComponentSource<char>(spec), parsed, query_converter, 158 output, new_parsed); 159 } 160 161 bool CanonicalizeFileURL(const char16* spec, 162 int spec_len, 163 const url_parse::Parsed& parsed, 164 CharsetConverter* query_converter, 165 CanonOutput* output, 166 url_parse::Parsed* new_parsed) { 167 return DoCanonicalizeFileURL<char16, char16>( 168 URLComponentSource<char16>(spec), parsed, query_converter, 169 output, new_parsed); 170 } 171 172 bool FileCanonicalizePath(const char* spec, 173 const url_parse::Component& path, 174 CanonOutput* output, 175 url_parse::Component* out_path) { 176 return DoFileCanonicalizePath<char, unsigned char>(spec, path, 177 output, out_path); 178 } 179 180 bool FileCanonicalizePath(const char16* spec, 181 const url_parse::Component& path, 182 CanonOutput* output, 183 url_parse::Component* out_path) { 184 return DoFileCanonicalizePath<char16, char16>(spec, path, 185 output, out_path); 186 } 187 188 bool ReplaceFileURL(const char* base, 189 const url_parse::Parsed& base_parsed, 190 const Replacements<char>& replacements, 191 CharsetConverter* query_converter, 192 CanonOutput* output, 193 url_parse::Parsed* new_parsed) { 194 URLComponentSource<char> source(base); 195 url_parse::Parsed parsed(base_parsed); 196 SetupOverrideComponents(base, replacements, &source, &parsed); 197 return DoCanonicalizeFileURL<char, unsigned char>( 198 source, parsed, query_converter, output, new_parsed); 199 } 200 201 bool ReplaceFileURL(const char* base, 202 const url_parse::Parsed& base_parsed, 203 const Replacements<char16>& replacements, 204 CharsetConverter* query_converter, 205 CanonOutput* output, 206 url_parse::Parsed* new_parsed) { 207 RawCanonOutput<1024> utf8; 208 URLComponentSource<char> source(base); 209 url_parse::Parsed parsed(base_parsed); 210 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 211 return DoCanonicalizeFileURL<char, unsigned char>( 212 source, parsed, query_converter, output, new_parsed); 213 } 214 215 } // namespace url_canon 216