Home | History | Annotate | Download | only in url
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Functions for canonicalizing "path" URLs. Not to be confused with the path
      6 // of a URL, these are URLs that have no authority section, only a path. For
      7 // example, "javascript:" and "data:".
      8 
      9 #include "url/url_canon.h"
     10 #include "url/url_canon_internal.h"
     11 
     12 namespace url_canon {
     13 
     14 namespace {
     15 
     16 template<typename CHAR, typename UCHAR>
     17 bool DoCanonicalizePathComponent(const CHAR* source,
     18                                  const url_parse::Component& component,
     19                                  CHAR seperator,
     20                                  CanonOutput* output,
     21                                  url_parse::Component* new_parsed) {
     22   bool success = true;
     23   if (component.is_valid()) {
     24     if (seperator)
     25       output->push_back(seperator);
     26     // Copy the path using path URL's more lax escaping rules (think for
     27     // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
     28     // ASCII characters alone. This helps readability of JavaStript.
     29     new_parsed->begin = output->length();
     30     int end = component.end();
     31     for (int i = component.begin; i < end; i++) {
     32       UCHAR uch = static_cast<UCHAR>(source[i]);
     33       if (uch < 0x20 || uch >= 0x80)
     34         success &= AppendUTF8EscapedChar(source, &i, end, output);
     35       else
     36         output->push_back(static_cast<char>(uch));
     37     }
     38     new_parsed->len = output->length() - new_parsed->begin;
     39   } else {
     40     // Empty part.
     41     new_parsed->reset();
     42   }
     43   return success;
     44 }
     45 
     46 template<typename CHAR, typename UCHAR>
     47 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
     48                            const url_parse::Parsed& parsed,
     49                            CanonOutput* output,
     50                            url_parse::Parsed* new_parsed) {
     51   // Scheme: this will append the colon.
     52   bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
     53                                     output, &new_parsed->scheme);
     54 
     55   // We assume there's no authority for path URLs. Note that hosts should never
     56   // have -1 length.
     57   new_parsed->username.reset();
     58   new_parsed->password.reset();
     59   new_parsed->host.reset();
     60   new_parsed->port.reset();
     61   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
     62       source.path, parsed.path, 0, output, &new_parsed->path);
     63   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
     64       source.query, parsed.query, '?', output, &new_parsed->query);
     65   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
     66       source.ref, parsed.ref, '#', output, &new_parsed->ref);
     67 
     68   return success;
     69 }
     70 
     71 }  // namespace
     72 
     73 bool CanonicalizePathURL(const char* spec,
     74                          int spec_len,
     75                          const url_parse::Parsed& parsed,
     76                          CanonOutput* output,
     77                          url_parse::Parsed* new_parsed) {
     78   return DoCanonicalizePathURL<char, unsigned char>(
     79       URLComponentSource<char>(spec), parsed, output, new_parsed);
     80 }
     81 
     82 bool CanonicalizePathURL(const base::char16* spec,
     83                          int spec_len,
     84                          const url_parse::Parsed& parsed,
     85                          CanonOutput* output,
     86                          url_parse::Parsed* new_parsed) {
     87   return DoCanonicalizePathURL<base::char16, base::char16>(
     88       URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
     89 }
     90 
     91 bool ReplacePathURL(const char* base,
     92                     const url_parse::Parsed& base_parsed,
     93                     const Replacements<char>& replacements,
     94                     CanonOutput* output,
     95                     url_parse::Parsed* new_parsed) {
     96   URLComponentSource<char> source(base);
     97   url_parse::Parsed parsed(base_parsed);
     98   SetupOverrideComponents(base, replacements, &source, &parsed);
     99   return DoCanonicalizePathURL<char, unsigned char>(
    100       source, parsed, output, new_parsed);
    101 }
    102 
    103 bool ReplacePathURL(const char* base,
    104                     const url_parse::Parsed& base_parsed,
    105                     const Replacements<base::char16>& replacements,
    106                     CanonOutput* output,
    107                     url_parse::Parsed* new_parsed) {
    108   RawCanonOutput<1024> utf8;
    109   URLComponentSource<char> source(base);
    110   url_parse::Parsed parsed(base_parsed);
    111   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
    112   return DoCanonicalizePathURL<char, unsigned char>(
    113       source, parsed, output, new_parsed);
    114 }
    115 
    116 }  // namespace url_canon
    117