Home | History | Annotate | Download | only in src
      1 // Copyright 2008, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // Functions for canonicalizing "mailto:" URLs.
     31 
     32 #include "googleurl/src/url_canon.h"
     33 #include "googleurl/src/url_canon_internal.h"
     34 #include "googleurl/src/url_file.h"
     35 #include "googleurl/src/url_parse_internal.h"
     36 
     37 namespace url_canon {
     38 
     39 namespace {
     40 
     41 
     42 template<typename CHAR, typename UCHAR>
     43 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
     44                              const url_parse::Parsed& parsed,
     45                              CanonOutput* output,
     46                              url_parse::Parsed* new_parsed) {
     47 
     48   // mailto: only uses {scheme, path, query} -- clear the rest.
     49   new_parsed->username = url_parse::Component();
     50   new_parsed->password = url_parse::Component();
     51   new_parsed->host = url_parse::Component();
     52   new_parsed->port = url_parse::Component();
     53   new_parsed->ref = url_parse::Component();
     54 
     55   // Scheme (known, so we don't bother running it through the more
     56   // complicated scheme canonicalizer).
     57   new_parsed->scheme.begin = output->length();
     58   output->Append("mailto:", 7);
     59   new_parsed->scheme.len = 6;
     60 
     61   bool success = true;
     62 
     63   // Path
     64   if (parsed.path.is_valid()) {
     65     new_parsed->path.begin = output->length();
     66 
     67     // Copy the path using path URL's more lax escaping rules.
     68     // We convert to UTF-8 and escape non-ASCII, but leave all
     69     // ASCII characters alone.
     70     int end = parsed.path.end();
     71     for (int i = parsed.path.begin; i < end; ++i) {
     72       UCHAR uch = static_cast<UCHAR>(source.path[i]);
     73       if (uch < 0x20 || uch >= 0x80)
     74         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
     75       else
     76         output->push_back(static_cast<char>(uch));
     77     }
     78 
     79     new_parsed->path.len = output->length() - new_parsed->path.begin;
     80   } else {
     81     // No path at all
     82     new_parsed->path.reset();
     83   }
     84 
     85   // Query -- always use the default utf8 charset converter.
     86   CanonicalizeQuery(source.query, parsed.query, NULL,
     87                     output, &new_parsed->query);
     88 
     89   return success;
     90 }
     91 
     92 } // namespace
     93 
     94 bool CanonicalizeMailtoURL(const char* spec,
     95                           int spec_len,
     96                           const url_parse::Parsed& parsed,
     97                           CanonOutput* output,
     98                           url_parse::Parsed* new_parsed) {
     99   return DoCanonicalizeMailtoURL<char, unsigned char>(
    100       URLComponentSource<char>(spec), parsed, output, new_parsed);
    101 }
    102 
    103 bool CanonicalizeMailtoURL(const char16* spec,
    104                            int spec_len,
    105                            const url_parse::Parsed& parsed,
    106                            CanonOutput* output,
    107                            url_parse::Parsed* new_parsed) {
    108   return DoCanonicalizeMailtoURL<char16, char16>(
    109       URLComponentSource<char16>(spec), parsed, output, new_parsed);
    110 }
    111 
    112 bool ReplaceMailtoURL(const char* base,
    113                       const url_parse::Parsed& base_parsed,
    114                       const Replacements<char>& replacements,
    115                       CanonOutput* output,
    116                       url_parse::Parsed* new_parsed) {
    117   URLComponentSource<char> source(base);
    118   url_parse::Parsed parsed(base_parsed);
    119   SetupOverrideComponents(base, replacements, &source, &parsed);
    120   return DoCanonicalizeMailtoURL<char, unsigned char>(
    121       source, parsed, output, new_parsed);
    122 }
    123 
    124 bool ReplaceMailtoURL(const char* base,
    125                       const url_parse::Parsed& base_parsed,
    126                       const Replacements<char16>& replacements,
    127                       CanonOutput* output,
    128                       url_parse::Parsed* new_parsed) {
    129   RawCanonOutput<1024> utf8;
    130   URLComponentSource<char> source(base);
    131   url_parse::Parsed parsed(base_parsed);
    132   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
    133   return DoCanonicalizeMailtoURL<char, unsigned char>(
    134       source, parsed, output, new_parsed);
    135 }
    136 
    137 }  // namespace url_canon
    138