Home | History | Annotate | Download | only in url
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Functions for canonicalizing "file:" URLs.
      6 
      7 #include "url/url_canon.h"
      8 #include "url/url_canon_internal.h"
      9 #include "url/url_file.h"
     10 #include "url/url_parse_internal.h"
     11 
     12 namespace url {
     13 
     14 namespace {
     15 
     16 #ifdef WIN32
     17 
     18 // Given a pointer into the spec, this copies and canonicalizes the drive
     19 // letter and colon to the output, if one is found. If there is not a drive
     20 // spec, it won't do anything. The index of the next character in the input
     21 // spec is returned (after the colon when a drive spec is found, the begin
     22 // offset if one is not).
     23 template<typename CHAR>
     24 int FileDoDriveSpec(const CHAR* spec, int begin, int end,
     25                     CanonOutput* output) {
     26   // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
     27   // (with backslashes instead of slashes as well).
     28   int num_slashes = CountConsecutiveSlashes(spec, begin, end);
     29   int after_slashes = begin + num_slashes;
     30 
     31   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
     32     return begin;  // Haven't consumed any characters
     33 
     34   // A drive spec is the start of a path, so we need to add a slash for the
     35   // authority terminator (typically the third slash).
     36   output->push_back('/');
     37 
     38   // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
     39   // and that it is followed by a colon/pipe.
     40 
     41   // Normalize Windows drive letters to uppercase
     42   if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
     43     output->push_back(spec[after_slashes] - 'a' + 'A');
     44   else
     45     output->push_back(static_cast<char>(spec[after_slashes]));
     46 
     47   // Normalize the character following it to a colon rather than pipe.
     48   output->push_back(':');
     49   return after_slashes + 2;
     50 }
     51 
     52 #endif  // WIN32
     53 
     54 template<typename CHAR, typename UCHAR>
     55 bool DoFileCanonicalizePath(const CHAR* spec,
     56                             const Component& path,
     57                             CanonOutput* output,
     58                             Component* out_path) {
     59   // Copies and normalizes the "c:" at the beginning, if present.
     60   out_path->begin = output->length();
     61   int after_drive;
     62 #ifdef WIN32
     63   after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
     64 #else
     65   after_drive = path.begin;
     66 #endif
     67 
     68   // Copies the rest of the path, starting from the slash following the
     69   // drive colon (if any, Windows only), or the first slash of the path.
     70   bool success = true;
     71   if (after_drive < path.end()) {
     72     // Use the regular path canonicalizer to canonicalize the rest of the
     73     // path. Give it a fake output component to write into. DoCanonicalizeFile
     74     // will compute the full path component.
     75     Component sub_path = MakeRange(after_drive, path.end());
     76     Component fake_output_path;
     77     success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
     78   } else {
     79     // No input path, canonicalize to a slash.
     80     output->push_back('/');
     81   }
     82 
     83   out_path->len = output->length() - out_path->begin;
     84   return success;
     85 }
     86 
     87 template<typename CHAR, typename UCHAR>
     88 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
     89                            const Parsed& parsed,
     90                            CharsetConverter* query_converter,
     91                            CanonOutput* output,
     92                            Parsed* new_parsed) {
     93   // Things we don't set in file: URLs.
     94   new_parsed->username = Component();
     95   new_parsed->password = Component();
     96   new_parsed->port = Component();
     97 
     98   // Scheme (known, so we don't bother running it through the more
     99   // complicated scheme canonicalizer).
    100   new_parsed->scheme.begin = output->length();
    101   output->Append("file://", 7);
    102   new_parsed->scheme.len = 4;
    103 
    104   // Append the host. For many file URLs, this will be empty. For UNC, this
    105   // will be present.
    106   // TODO(brettw) This doesn't do any checking for host name validity. We
    107   // should probably handle validity checking of UNC hosts differently than
    108   // for regular IP hosts.
    109   bool success = CanonicalizeHost(source.host, parsed.host,
    110                                   output, &new_parsed->host);
    111   success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
    112                                     output, &new_parsed->path);
    113   CanonicalizeQuery(source.query, parsed.query, query_converter,
    114                     output, &new_parsed->query);
    115 
    116   // Ignore failure for refs since the URL can probably still be loaded.
    117   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
    118 
    119   return success;
    120 }
    121 
    122 } // namespace
    123 
    124 bool CanonicalizeFileURL(const char* spec,
    125                          int spec_len,
    126                          const Parsed& parsed,
    127                          CharsetConverter* query_converter,
    128                          CanonOutput* output,
    129                          Parsed* new_parsed) {
    130   return DoCanonicalizeFileURL<char, unsigned char>(
    131       URLComponentSource<char>(spec), parsed, query_converter,
    132       output, new_parsed);
    133 }
    134 
    135 bool CanonicalizeFileURL(const base::char16* spec,
    136                          int spec_len,
    137                          const Parsed& parsed,
    138                          CharsetConverter* query_converter,
    139                          CanonOutput* output,
    140                          Parsed* new_parsed) {
    141   return DoCanonicalizeFileURL<base::char16, base::char16>(
    142       URLComponentSource<base::char16>(spec), parsed, query_converter,
    143       output, new_parsed);
    144 }
    145 
    146 bool FileCanonicalizePath(const char* spec,
    147                           const Component& path,
    148                           CanonOutput* output,
    149                           Component* out_path) {
    150   return DoFileCanonicalizePath<char, unsigned char>(spec, path,
    151                                                      output, out_path);
    152 }
    153 
    154 bool FileCanonicalizePath(const base::char16* spec,
    155                           const Component& path,
    156                           CanonOutput* output,
    157                           Component* out_path) {
    158   return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
    159                                                             output, out_path);
    160 }
    161 
    162 bool ReplaceFileURL(const char* base,
    163                     const Parsed& base_parsed,
    164                     const Replacements<char>& replacements,
    165                     CharsetConverter* query_converter,
    166                     CanonOutput* output,
    167                     Parsed* new_parsed) {
    168   URLComponentSource<char> source(base);
    169   Parsed parsed(base_parsed);
    170   SetupOverrideComponents(base, replacements, &source, &parsed);
    171   return DoCanonicalizeFileURL<char, unsigned char>(
    172       source, parsed, query_converter, output, new_parsed);
    173 }
    174 
    175 bool ReplaceFileURL(const char* base,
    176                     const Parsed& base_parsed,
    177                     const Replacements<base::char16>& replacements,
    178                     CharsetConverter* query_converter,
    179                     CanonOutput* output,
    180                     Parsed* new_parsed) {
    181   RawCanonOutput<1024> utf8;
    182   URLComponentSource<char> source(base);
    183   Parsed parsed(base_parsed);
    184   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
    185   return DoCanonicalizeFileURL<char, unsigned char>(
    186       source, parsed, query_converter, output, new_parsed);
    187 }
    188 
    189 }  // namespace url
    190