1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Functions for canonicalizing "path" URLs. Not to be confused with the path 6 // of a URL, these are URLs that have no authority section, only a path. For 7 // example, "javascript:" and "data:". 8 9 #include "url/url_canon.h" 10 #include "url/url_canon_internal.h" 11 12 namespace url_canon { 13 14 namespace { 15 16 // Canonicalize the given |component| from |source| into |output| and 17 // |new_component|. If |separator| is non-zero, it is pre-pended to |ouput| 18 // prior to the canonicalized component; i.e. for the '?' or '#' characters. 19 template<typename CHAR, typename UCHAR> 20 bool DoCanonicalizePathComponent(const CHAR* source, 21 const url_parse::Component& component, 22 CHAR seperator, 23 CanonOutput* output, 24 url_parse::Component* new_component) { 25 bool success = true; 26 if (component.is_valid()) { 27 if (seperator) 28 output->push_back(seperator); 29 // Copy the path using path URL's more lax escaping rules (think for 30 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all 31 // ASCII characters alone. This helps readability of JavaStript. 32 new_component->begin = output->length(); 33 int end = component.end(); 34 for (int i = component.begin; i < end; i++) { 35 UCHAR uch = static_cast<UCHAR>(source[i]); 36 if (uch < 0x20 || uch >= 0x80) 37 success &= AppendUTF8EscapedChar(source, &i, end, output); 38 else 39 output->push_back(static_cast<char>(uch)); 40 } 41 new_component->len = output->length() - new_component->begin; 42 } else { 43 // Empty part. 44 new_component->reset(); 45 } 46 return success; 47 } 48 49 template<typename CHAR, typename UCHAR> 50 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, 51 const url_parse::Parsed& parsed, 52 CanonOutput* output, 53 url_parse::Parsed* new_parsed) { 54 // Scheme: this will append the colon. 55 bool success = CanonicalizeScheme(source.scheme, parsed.scheme, 56 output, &new_parsed->scheme); 57 58 // We assume there's no authority for path URLs. Note that hosts should never 59 // have -1 length. 60 new_parsed->username.reset(); 61 new_parsed->password.reset(); 62 new_parsed->host.reset(); 63 new_parsed->port.reset(); 64 // We allow path URLs to have the path, query and fragment components, but we 65 // will canonicalize each of the via the weaker path URL rules. 66 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 67 source.path, parsed.path, 0, output, &new_parsed->path); 68 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 69 source.query, parsed.query, '?', output, &new_parsed->query); 70 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( 71 source.ref, parsed.ref, '#', output, &new_parsed->ref); 72 73 return success; 74 } 75 76 } // namespace 77 78 bool CanonicalizePathURL(const char* spec, 79 int spec_len, 80 const url_parse::Parsed& parsed, 81 CanonOutput* output, 82 url_parse::Parsed* new_parsed) { 83 return DoCanonicalizePathURL<char, unsigned char>( 84 URLComponentSource<char>(spec), parsed, output, new_parsed); 85 } 86 87 bool CanonicalizePathURL(const base::char16* spec, 88 int spec_len, 89 const url_parse::Parsed& parsed, 90 CanonOutput* output, 91 url_parse::Parsed* new_parsed) { 92 return DoCanonicalizePathURL<base::char16, base::char16>( 93 URLComponentSource<base::char16>(spec), parsed, output, new_parsed); 94 } 95 96 bool ReplacePathURL(const char* base, 97 const url_parse::Parsed& base_parsed, 98 const Replacements<char>& replacements, 99 CanonOutput* output, 100 url_parse::Parsed* new_parsed) { 101 URLComponentSource<char> source(base); 102 url_parse::Parsed parsed(base_parsed); 103 SetupOverrideComponents(base, replacements, &source, &parsed); 104 return DoCanonicalizePathURL<char, unsigned char>( 105 source, parsed, output, new_parsed); 106 } 107 108 bool ReplacePathURL(const char* base, 109 const url_parse::Parsed& base_parsed, 110 const Replacements<base::char16>& replacements, 111 CanonOutput* output, 112 url_parse::Parsed* new_parsed) { 113 RawCanonOutput<1024> utf8; 114 URLComponentSource<char> source(base); 115 url_parse::Parsed parsed(base_parsed); 116 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 117 return DoCanonicalizePathURL<char, unsigned char>( 118 source, parsed, output, new_parsed); 119 } 120 121 } // namespace url_canon 122