Home | History | Annotate | Download | only in url
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Functions for canonicalizing "mailto:" URLs.
      6 
      7 #include "url/url_canon.h"
      8 #include "url/url_canon_internal.h"
      9 #include "url/url_file.h"
     10 #include "url/url_parse_internal.h"
     11 
     12 namespace url {
     13 
     14 namespace {
     15 
     16 template <typename CHAR, typename UCHAR>
     17 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
     18                              const Parsed& parsed,
     19                              CanonOutput* output,
     20                              Parsed* new_parsed) {
     21   // mailto: only uses {scheme, path, query} -- clear the rest.
     22   new_parsed->username = Component();
     23   new_parsed->password = Component();
     24   new_parsed->host = Component();
     25   new_parsed->port = Component();
     26   new_parsed->ref = Component();
     27 
     28   // Scheme (known, so we don't bother running it through the more
     29   // complicated scheme canonicalizer).
     30   new_parsed->scheme.begin = output->length();
     31   output->Append("mailto:", 7);
     32   new_parsed->scheme.len = 6;
     33 
     34   bool success = true;
     35 
     36   // Path
     37   if (parsed.path.is_valid()) {
     38     new_parsed->path.begin = output->length();
     39 
     40     // Copy the path using path URL's more lax escaping rules.
     41     // We convert to UTF-8 and escape non-ASCII, but leave all
     42     // ASCII characters alone.
     43     int end = parsed.path.end();
     44     for (int i = parsed.path.begin; i < end; ++i) {
     45       UCHAR uch = static_cast<UCHAR>(source.path[i]);
     46       if (uch < 0x20 || uch >= 0x80)
     47         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
     48       else
     49         output->push_back(static_cast<char>(uch));
     50     }
     51 
     52     new_parsed->path.len = output->length() - new_parsed->path.begin;
     53   } else {
     54     // No path at all
     55     new_parsed->path.reset();
     56   }
     57 
     58   // Query -- always use the default utf8 charset converter.
     59   CanonicalizeQuery(source.query, parsed.query, NULL,
     60                     output, &new_parsed->query);
     61 
     62   return success;
     63 }
     64 
     65 } // namespace
     66 
     67 bool CanonicalizeMailtoURL(const char* spec,
     68                            int spec_len,
     69                            const Parsed& parsed,
     70                            CanonOutput* output,
     71                            Parsed* new_parsed) {
     72   return DoCanonicalizeMailtoURL<char, unsigned char>(
     73       URLComponentSource<char>(spec), parsed, output, new_parsed);
     74 }
     75 
     76 bool CanonicalizeMailtoURL(const base::char16* spec,
     77                            int spec_len,
     78                            const Parsed& parsed,
     79                            CanonOutput* output,
     80                            Parsed* new_parsed) {
     81   return DoCanonicalizeMailtoURL<base::char16, base::char16>(
     82       URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
     83 }
     84 
     85 bool ReplaceMailtoURL(const char* base,
     86                       const Parsed& base_parsed,
     87                       const Replacements<char>& replacements,
     88                       CanonOutput* output,
     89                       Parsed* new_parsed) {
     90   URLComponentSource<char> source(base);
     91   Parsed parsed(base_parsed);
     92   SetupOverrideComponents(base, replacements, &source, &parsed);
     93   return DoCanonicalizeMailtoURL<char, unsigned char>(
     94       source, parsed, output, new_parsed);
     95 }
     96 
     97 bool ReplaceMailtoURL(const char* base,
     98                       const Parsed& base_parsed,
     99                       const Replacements<base::char16>& replacements,
    100                       CanonOutput* output,
    101                       Parsed* new_parsed) {
    102   RawCanonOutput<1024> utf8;
    103   URLComponentSource<char> source(base);
    104   Parsed parsed(base_parsed);
    105   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
    106   return DoCanonicalizeMailtoURL<char, unsigned char>(
    107       source, parsed, output, new_parsed);
    108 }
    109 
    110 }  // namespace url
    111