1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Functions for canonicalizing "path" URLs. Not to be confused with the path 6 // of a URL, these are URLs that have no authority section, only a path. For 7 // example, "javascript:" and "data:". 8 9 #include "url/url_canon.h" 10 #include "url/url_canon_internal.h" 11 12 namespace url_canon { 13 14 namespace { 15 16 template<typename CHAR, typename UCHAR> 17 bool DoCanonicalizePathComponent(const CHAR* source, 18 const url_parse::Component& component, 19 CHAR seperator, 20 CanonOutput* output, 21 url_parse::Component* new_parsed) { 22 bool success = true; 23 if (component.is_valid()) { 24 if (seperator) 25 output->push_back(seperator); 26 // Copy the path using path URL's more lax escaping rules (think for 27 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all 28 // ASCII characters alone. This helps readability of JavaStript. 29 new_parsed->begin = output->length(); 30 int end = component.end(); 31 for (int i = component.begin; i < end; i++) { 32 UCHAR uch = static_cast<UCHAR>(source[i]); 33 if (uch < 0x20 || uch >= 0x80) 34 success &= AppendUTF8EscapedChar(source, &i, end, output); 35 else 36 output->push_back(static_cast<char>(uch)); 37 } 38 new_parsed->len = output->length() - new_parsed->begin; 39 } else { 40 // Empty part. 41 new_parsed->reset(); 42 } 43 return success; 44 } 45 46 template<typename CHAR, typename UCHAR> 47 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, 48 const url_parse::Parsed& parsed, 49 CanonOutput* output, 50 url_parse::Parsed* new_parsed) { 51 // Scheme: this will append the colon. 52 bool success = CanonicalizeScheme(source.scheme, parsed.scheme, 53 output, &new_parsed->scheme); 54 55 // We assume there's no authority for path URLs. Note that hosts should never 56 // have -1 length. 57 new_parsed->username.reset(); 58 new_parsed->password.reset(); 59 new_parsed->host.reset(); 60 new_parsed->port.reset(); 61 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 62 source.path, parsed.path, 0, output, &new_parsed->path); 63 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 64 source.query, parsed.query, '?', output, &new_parsed->query); 65 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 66 source.ref, parsed.ref, '#', output, &new_parsed->ref); 67 68 return success; 69 } 70 71 } // namespace 72 73 bool CanonicalizePathURL(const char* spec, 74 int spec_len, 75 const url_parse::Parsed& parsed, 76 CanonOutput* output, 77 url_parse::Parsed* new_parsed) { 78 return DoCanonicalizePathURL<char, unsigned char>( 79 URLComponentSource<char>(spec), parsed, output, new_parsed); 80 } 81 82 bool CanonicalizePathURL(const base::char16* spec, 83 int spec_len, 84 const url_parse::Parsed& parsed, 85 CanonOutput* output, 86 url_parse::Parsed* new_parsed) { 87 return DoCanonicalizePathURL<base::char16, base::char16>( 88 URLComponentSource<base::char16>(spec), parsed, output, new_parsed); 89 } 90 91 bool ReplacePathURL(const char* base, 92 const url_parse::Parsed& base_parsed, 93 const Replacements<char>& replacements, 94 CanonOutput* output, 95 url_parse::Parsed* new_parsed) { 96 URLComponentSource<char> source(base); 97 url_parse::Parsed parsed(base_parsed); 98 SetupOverrideComponents(base, replacements, &source, &parsed); 99 return DoCanonicalizePathURL<char, unsigned char>( 100 source, parsed, output, new_parsed); 101 } 102 103 bool ReplacePathURL(const char* base, 104 const url_parse::Parsed& base_parsed, 105 const Replacements<base::char16>& replacements, 106 CanonOutput* output, 107 url_parse::Parsed* new_parsed) { 108 RawCanonOutput<1024> utf8; 109 URLComponentSource<char> source(base); 110 url_parse::Parsed parsed(base_parsed); 111 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 112 return DoCanonicalizePathURL<char, unsigned char>( 113 source, parsed, output, new_parsed); 114 } 115 116 } // namespace url_canon 117