Home | History | Annotate | Download | only in url
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Functions for canonicalizing "file:" URLs.
      6 
      7 #include "url/url_canon.h"
      8 #include "url/url_canon_internal.h"
      9 #include "url/url_file.h"
     10 #include "url/url_parse_internal.h"
     11 
     12 namespace url_canon {
     13 
     14 namespace {
     15 
     16 #ifdef WIN32
     17 
     18 // Given a pointer into the spec, this copies and canonicalizes the drive
     19 // letter and colon to the output, if one is found. If there is not a drive
     20 // spec, it won't do anything. The index of the next character in the input
     21 // spec is returned (after the colon when a drive spec is found, the begin
     22 // offset if one is not).
     23 template<typename CHAR>
     24 int FileDoDriveSpec(const CHAR* spec, int begin, int end,
     25                     CanonOutput* output) {
     26   // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
     27   // (with backslashes instead of slashes as well).
     28   int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end);
     29   int after_slashes = begin + num_slashes;
     30 
     31   if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end))
     32     return begin;  // Haven't consumed any characters
     33 
     34   // A drive spec is the start of a path, so we need to add a slash for the
     35   // authority terminator (typically the third slash).
     36   output->push_back('/');
     37 
     38   // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
     39   // and that it is followed by a colon/pipe.
     40 
     41   // Normalize Windows drive letters to uppercase
     42   if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
     43     output->push_back(spec[after_slashes] - 'a' + 'A');
     44   else
     45     output->push_back(static_cast<char>(spec[after_slashes]));
     46 
     47   // Normalize the character following it to a colon rather than pipe.
     48   output->push_back(':');
     49   return after_slashes + 2;
     50 }
     51 
     52 #endif  // WIN32
     53 
     54 template<typename CHAR, typename UCHAR>
     55 bool DoFileCanonicalizePath(const CHAR* spec,
     56                             const url_parse::Component& path,
     57                             CanonOutput* output,
     58                             url_parse::Component* out_path) {
     59   // Copies and normalizes the "c:" at the beginning, if present.
     60   out_path->begin = output->length();
     61   int after_drive;
     62 #ifdef WIN32
     63   after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
     64 #else
     65   after_drive = path.begin;
     66 #endif
     67 
     68   // Copies the rest of the path, starting from the slash following the
     69   // drive colon (if any, Windows only), or the first slash of the path.
     70   bool success = true;
     71   if (after_drive < path.end()) {
     72     // Use the regular path canonicalizer to canonicalize the rest of the
     73     // path. Give it a fake output component to write into. DoCanonicalizeFile
     74     // will compute the full path component.
     75     url_parse::Component sub_path =
     76         url_parse::MakeRange(after_drive, path.end());
     77     url_parse::Component fake_output_path;
     78     success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
     79   } else {
     80     // No input path, canonicalize to a slash.
     81     output->push_back('/');
     82   }
     83 
     84   out_path->len = output->length() - out_path->begin;
     85   return success;
     86 }
     87 
     88 template<typename CHAR, typename UCHAR>
     89 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
     90                            const url_parse::Parsed& parsed,
     91                            CharsetConverter* query_converter,
     92                            CanonOutput* output,
     93                            url_parse::Parsed* new_parsed) {
     94   // Things we don't set in file: URLs.
     95   new_parsed->username = url_parse::Component();
     96   new_parsed->password = url_parse::Component();
     97   new_parsed->port = url_parse::Component();
     98 
     99   // Scheme (known, so we don't bother running it through the more
    100   // complicated scheme canonicalizer).
    101   new_parsed->scheme.begin = output->length();
    102   output->Append("file://", 7);
    103   new_parsed->scheme.len = 4;
    104 
    105   // Append the host. For many file URLs, this will be empty. For UNC, this
    106   // will be present.
    107   // TODO(brettw) This doesn't do any checking for host name validity. We
    108   // should probably handle validity checking of UNC hosts differently than
    109   // for regular IP hosts.
    110   bool success = CanonicalizeHost(source.host, parsed.host,
    111                                   output, &new_parsed->host);
    112   success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
    113                                     output, &new_parsed->path);
    114   CanonicalizeQuery(source.query, parsed.query, query_converter,
    115                     output, &new_parsed->query);
    116 
    117   // Ignore failure for refs since the URL can probably still be loaded.
    118   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
    119 
    120   return success;
    121 }
    122 
    123 } // namespace
    124 
    125 bool CanonicalizeFileURL(const char* spec,
    126                          int spec_len,
    127                          const url_parse::Parsed& parsed,
    128                          CharsetConverter* query_converter,
    129                          CanonOutput* output,
    130                          url_parse::Parsed* new_parsed) {
    131   return DoCanonicalizeFileURL<char, unsigned char>(
    132       URLComponentSource<char>(spec), parsed, query_converter,
    133       output, new_parsed);
    134 }
    135 
    136 bool CanonicalizeFileURL(const base::char16* spec,
    137                          int spec_len,
    138                          const url_parse::Parsed& parsed,
    139                          CharsetConverter* query_converter,
    140                          CanonOutput* output,
    141                          url_parse::Parsed* new_parsed) {
    142   return DoCanonicalizeFileURL<base::char16, base::char16>(
    143       URLComponentSource<base::char16>(spec), parsed, query_converter,
    144       output, new_parsed);
    145 }
    146 
    147 bool FileCanonicalizePath(const char* spec,
    148                           const url_parse::Component& path,
    149                           CanonOutput* output,
    150                           url_parse::Component* out_path) {
    151   return DoFileCanonicalizePath<char, unsigned char>(spec, path,
    152                                                      output, out_path);
    153 }
    154 
    155 bool FileCanonicalizePath(const base::char16* spec,
    156                           const url_parse::Component& path,
    157                           CanonOutput* output,
    158                           url_parse::Component* out_path) {
    159   return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
    160                                                             output, out_path);
    161 }
    162 
    163 bool ReplaceFileURL(const char* base,
    164                     const url_parse::Parsed& base_parsed,
    165                     const Replacements<char>& replacements,
    166                     CharsetConverter* query_converter,
    167                     CanonOutput* output,
    168                     url_parse::Parsed* new_parsed) {
    169   URLComponentSource<char> source(base);
    170   url_parse::Parsed parsed(base_parsed);
    171   SetupOverrideComponents(base, replacements, &source, &parsed);
    172   return DoCanonicalizeFileURL<char, unsigned char>(
    173       source, parsed, query_converter, output, new_parsed);
    174 }
    175 
    176 bool ReplaceFileURL(const char* base,
    177                     const url_parse::Parsed& base_parsed,
    178                     const Replacements<base::char16>& replacements,
    179                     CharsetConverter* query_converter,
    180                     CanonOutput* output,
    181                     url_parse::Parsed* new_parsed) {
    182   RawCanonOutput<1024> utf8;
    183   URLComponentSource<char> source(base);
    184   url_parse::Parsed parsed(base_parsed);
    185   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
    186   return DoCanonicalizeFileURL<char, unsigned char>(
    187       source, parsed, query_converter, output, new_parsed);
    188 }
    189 
    190 }  // namespace url_canon
    191