Home | History | Annotate | Download | only in url
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef URL_URL_CANON_INTERNAL_FILE_H_
      6 #define URL_URL_CANON_INTERNAL_FILE_H_
      7 
      8 // As with url_canon_internal.h, this file is intended to be included in
      9 // another C++ file where the template types are defined. This allows the
     10 // programmer to use this to use these functions for their own strings
     11 // types, without bloating the code by having inline templates used in
     12 // every call site.
     13 //
     14 // *** This file must be included after url_canon_internal as we depend on some
     15 // functions in it. ***
     16 
     17 
     18 #include "url/url_file.h"
     19 #include "url/url_parse_internal.h"
     20 
     21 namespace url {
     22 
     23 // Given a pointer into the spec, this copies and canonicalizes the drive
     24 // letter and colon to the output, if one is found. If there is not a drive
     25 // spec, it won't do anything. The index of the next character in the input
     26 // spec is returned (after the colon when a drive spec is found, the begin
     27 // offset if one is not).
     28 template<typename CHAR>
     29 static int FileDoDriveSpec(const CHAR* spec, int begin, int end,
     30                            CanonOutput* output) {
     31   // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
     32   // (with backslashes instead of slashes as well).
     33   int num_slashes = CountConsecutiveSlashes(spec, begin, end);
     34   int after_slashes = begin + num_slashes;
     35 
     36   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
     37     return begin;  // Haven't consumed any characters
     38 
     39   // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
     40   // and that it is followed by a colon/pipe.
     41 
     42   // Normalize Windows drive letters to uppercase
     43   if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
     44     output->push_back(spec[after_slashes] - 'a' + 'A');
     45   else
     46     output->push_back(static_cast<char>(spec[after_slashes]));
     47 
     48   // Normalize the character following it to a colon rather than pipe.
     49   output->push_back(':');
     50   output->push_back('/');
     51   return after_slashes + 2;
     52 }
     53 
     54 // FileDoDriveSpec will have already added the first backslash, so we need to
     55 // write everything following the slashes using the path canonicalizer.
     56 template<typename CHAR, typename UCHAR>
     57 static void FileDoPath(const CHAR* spec, int begin, int end,
     58                        CanonOutput* output) {
     59   // Normalize the number of slashes after the drive letter. The path
     60   // canonicalizer expects the input to begin in a slash already so
     61   // doesn't check. We want to handle no-slashes
     62   int num_slashes = CountConsecutiveSlashes(spec, begin, end);
     63   int after_slashes = begin + num_slashes;
     64 
     65   // Now use the regular path canonicalizer to canonicalize the rest of the
     66   // path. We supply it with the path following the slashes. It won't prepend
     67   // a slash because it assumes any nonempty path already starts with one.
     68   // We explicitly filter out calls with no path here to prevent that case.
     69   ParsedComponent sub_path(after_slashes, end - after_slashes);
     70   if (sub_path.len > 0) {
     71     // Give it a fake output component to write into. DoCanonicalizeFile will
     72     // compute the full path component.
     73     ParsedComponent fake_output_path;
     74     URLCanonInternal<CHAR, UCHAR>::DoPath(
     75         spec, sub_path, output, &fake_output_path);
     76   }
     77 }
     78 
     79 template<typename CHAR, typename UCHAR>
     80 static bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
     81                                   const ParsedURL& parsed,
     82                                   CanonOutput* output,
     83                                   ParsedURL* new_parsed) {
     84   // Things we don't set in file: URLs.
     85   new_parsed->username = ParsedComponent(0, -1);
     86   new_parsed->password = ParsedComponent(0, -1);
     87   new_parsed->port = ParsedComponent(0, -1);
     88 
     89   // Scheme (known, so we don't bother running it through the more
     90   // complicated scheme canonicalizer).
     91   new_parsed->scheme.begin = output->length();
     92   output->push_back('f');
     93   output->push_back('i');
     94   output->push_back('l');
     95   output->push_back('e');
     96   new_parsed->scheme.len = output->length() - new_parsed->scheme.begin;
     97   output->push_back(':');
     98 
     99   // Write the separator for the host.
    100   output->push_back('/');
    101   output->push_back('/');
    102 
    103   // Append the host. For many file URLs, this will be empty. For UNC, this
    104   // will be present.
    105   // TODO(brettw) This doesn't do any checking for host name validity. We
    106   // should probably handle validity checking of UNC hosts differently than
    107   // for regular IP hosts.
    108   bool success = URLCanonInternal<CHAR, UCHAR>::DoHost(
    109       source.host, parsed.host, output, &new_parsed->host);
    110 
    111   // Write a separator for the start of the path. We'll ignore any slashes
    112   // already at the beginning of the path.
    113   new_parsed->path.begin = output->length();
    114   output->push_back('/');
    115 
    116   // Copies and normalizes the "c:" at the beginning, if present.
    117   int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
    118                                     parsed.path.end(), output);
    119 
    120   // Copies the rest of the path
    121   FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
    122   new_parsed->path.len = output->length() - new_parsed->path.begin;
    123 
    124   // Things following the path we can use the standard canonicalizers for.
    125   success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
    126       source.query, parsed.query, output, &new_parsed->query);
    127   success &= URLCanonInternal<CHAR, UCHAR>::DoRef(
    128       source.ref, parsed.ref, output, &new_parsed->ref);
    129 
    130   return success;
    131 }
    132 
    133 }  // namespace url
    134 
    135 #endif  // URL_URL_CANON_INTERNAL_FILE_H_
    136