Home | History | Annotate | Download | only in src
      1 // Copyright 2013 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #ifndef V8_URI_H_
     29 #define V8_URI_H_
     30 
     31 #include "v8.h"
     32 
     33 #include "string-search.h"
     34 #include "v8utils.h"
     35 #include "v8conversions.h"
     36 
     37 namespace v8 {
     38 namespace internal {
     39 
     40 
     41 template <typename Char>
     42 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
     43 
     44 
     45 template <>
     46 Vector<const uint8_t> GetCharVector(Handle<String> string) {
     47   String::FlatContent flat = string->GetFlatContent();
     48   ASSERT(flat.IsAscii());
     49   return flat.ToOneByteVector();
     50 }
     51 
     52 
     53 template <>
     54 Vector<const uc16> GetCharVector(Handle<String> string) {
     55   String::FlatContent flat = string->GetFlatContent();
     56   ASSERT(flat.IsTwoByte());
     57   return flat.ToUC16Vector();
     58 }
     59 
     60 
     61 class URIUnescape : public AllStatic {
     62  public:
     63   template<typename Char>
     64   static Handle<String> Unescape(Isolate* isolate, Handle<String> source);
     65 
     66  private:
     67   static const signed char kHexValue['g'];
     68 
     69   template<typename Char>
     70   static Handle<String> UnescapeSlow(
     71       Isolate* isolate, Handle<String> string, int start_index);
     72 
     73   static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
     74 
     75   template <typename Char>
     76   static INLINE(int UnescapeChar(Vector<const Char> vector,
     77                                  int i,
     78                                  int length,
     79                                  int* step));
     80 };
     81 
     82 
     83 const signed char URIUnescape::kHexValue[] = {
     84     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     85     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     86     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     87     -0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
     88     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     89     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     90     -1, 10, 11, 12, 13, 14, 15 };
     91 
     92 
     93 template<typename Char>
     94 Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) {
     95   int index;
     96   { DisallowHeapAllocation no_allocation;
     97     StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%"));
     98     index = search.Search(GetCharVector<Char>(source), 0);
     99     if (index < 0) return source;
    100   }
    101   return UnescapeSlow<Char>(isolate, source, index);
    102 }
    103 
    104 
    105 template <typename Char>
    106 Handle<String> URIUnescape::UnescapeSlow(
    107     Isolate* isolate, Handle<String> string, int start_index) {
    108   bool one_byte = true;
    109   int length = string->length();
    110 
    111   int unescaped_length = 0;
    112   { DisallowHeapAllocation no_allocation;
    113     Vector<const Char> vector = GetCharVector<Char>(string);
    114     for (int i = start_index; i < length; unescaped_length++) {
    115       int step;
    116       if (UnescapeChar(vector, i, length, &step) >
    117               String::kMaxOneByteCharCode) {
    118         one_byte = false;
    119       }
    120       i += step;
    121     }
    122   }
    123 
    124   ASSERT(start_index < length);
    125   Handle<String> first_part =
    126       isolate->factory()->NewProperSubString(string, 0, start_index);
    127 
    128   int dest_position = 0;
    129   Handle<String> second_part;
    130   if (one_byte) {
    131     Handle<SeqOneByteString> dest =
    132         isolate->factory()->NewRawOneByteString(unescaped_length);
    133     DisallowHeapAllocation no_allocation;
    134     Vector<const Char> vector = GetCharVector<Char>(string);
    135     for (int i = start_index; i < length; dest_position++) {
    136       int step;
    137       dest->SeqOneByteStringSet(dest_position,
    138                                 UnescapeChar(vector, i, length, &step));
    139       i += step;
    140     }
    141     second_part = dest;
    142   } else {
    143     Handle<SeqTwoByteString> dest =
    144         isolate->factory()->NewRawTwoByteString(unescaped_length);
    145     DisallowHeapAllocation no_allocation;
    146     Vector<const Char> vector = GetCharVector<Char>(string);
    147     for (int i = start_index; i < length; dest_position++) {
    148       int step;
    149       dest->SeqTwoByteStringSet(dest_position,
    150                                 UnescapeChar(vector, i, length, &step));
    151       i += step;
    152     }
    153     second_part = dest;
    154   }
    155   return isolate->factory()->NewConsString(first_part, second_part);
    156 }
    157 
    158 
    159 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
    160   if (character1 > 'f') return -1;
    161   int hi = kHexValue[character1];
    162   if (hi == -1) return -1;
    163   if (character2 > 'f') return -1;
    164   int lo = kHexValue[character2];
    165   if (lo == -1) return -1;
    166   return (hi << 4) + lo;
    167 }
    168 
    169 
    170 template <typename Char>
    171 int URIUnescape::UnescapeChar(Vector<const Char> vector,
    172                               int i,
    173                               int length,
    174                               int* step) {
    175   uint16_t character = vector[i];
    176   int32_t hi = 0;
    177   int32_t lo = 0;
    178   if (character == '%' &&
    179       i <= length - 6 &&
    180       vector[i + 1] == 'u' &&
    181       (hi = TwoDigitHex(vector[i + 2],
    182                         vector[i + 3])) != -1 &&
    183       (lo = TwoDigitHex(vector[i + 4],
    184                         vector[i + 5])) != -1) {
    185     *step = 6;
    186     return (hi << 8) + lo;
    187   } else if (character == '%' &&
    188       i <= length - 3 &&
    189       (lo = TwoDigitHex(vector[i + 1],
    190                         vector[i + 2])) != -1) {
    191     *step = 3;
    192     return lo;
    193   } else {
    194     *step = 1;
    195     return character;
    196   }
    197 }
    198 
    199 
    200 class URIEscape : public AllStatic {
    201  public:
    202   template<typename Char>
    203   static Handle<String> Escape(Isolate* isolate, Handle<String> string);
    204 
    205  private:
    206   static const char kHexChars[17];
    207   static const char kNotEscaped[256];
    208 
    209   static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
    210 };
    211 
    212 
    213 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
    214 
    215 
    216 // kNotEscaped is generated by the following:
    217 //
    218 // #!/bin/perl
    219 // for (my $i = 0; $i < 256; $i++) {
    220 //   print "\n" if $i % 16 == 0;
    221 //   my $c = chr($i);
    222 //   my $escaped = 1;
    223 //   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
    224 //   print $escaped ? "0, " : "1, ";
    225 // }
    226 
    227 const char URIEscape::kNotEscaped[] = {
    228     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    229     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    230     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
    231     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
    232     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    233     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
    234     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    235     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
    236     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    237     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    238     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    239     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    240     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    241     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    242     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    243     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    244 
    245 
    246 template<typename Char>
    247 Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
    248   ASSERT(string->IsFlat());
    249   int escaped_length = 0;
    250   int length = string->length();
    251 
    252   { DisallowHeapAllocation no_allocation;
    253     Vector<const Char> vector = GetCharVector<Char>(string);
    254     for (int i = 0; i < length; i++) {
    255       uint16_t c = vector[i];
    256       if (c >= 256) {
    257         escaped_length += 6;
    258       } else if (IsNotEscaped(c)) {
    259         escaped_length++;
    260       } else {
    261         escaped_length += 3;
    262       }
    263 
    264       // We don't allow strings that are longer than a maximal length.
    265       ASSERT(String::kMaxLength < 0x7fffffff - 6);  // Cannot overflow.
    266       if (escaped_length > String::kMaxLength) {
    267         isolate->context()->mark_out_of_memory();
    268         return Handle<String>::null();
    269       }
    270     }
    271   }
    272 
    273   // No length change implies no change.  Return original string if no change.
    274   if (escaped_length == length) return string;
    275 
    276   Handle<SeqOneByteString> dest =
    277       isolate->factory()->NewRawOneByteString(escaped_length);
    278   int dest_position = 0;
    279 
    280   { DisallowHeapAllocation no_allocation;
    281     Vector<const Char> vector = GetCharVector<Char>(string);
    282     for (int i = 0; i < length; i++) {
    283       uint16_t c = vector[i];
    284       if (c >= 256) {
    285         dest->SeqOneByteStringSet(dest_position, '%');
    286         dest->SeqOneByteStringSet(dest_position+1, 'u');
    287         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
    288         dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
    289         dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
    290         dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
    291         dest_position += 6;
    292       } else if (IsNotEscaped(c)) {
    293         dest->SeqOneByteStringSet(dest_position, c);
    294         dest_position++;
    295       } else {
    296         dest->SeqOneByteStringSet(dest_position, '%');
    297         dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
    298         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
    299         dest_position += 3;
    300       }
    301     }
    302   }
    303 
    304   return dest;
    305 }
    306 
    307 } }  // namespace v8::internal
    308 
    309 #endif  // V8_URI_H_
    310