Home | History | Annotate | Download | only in runtime
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/runtime/runtime-utils.h"
      6 
      7 #include "src/arguments.h"
      8 #include "src/conversions.h"
      9 #include "src/isolate-inl.h"
     10 #include "src/objects-inl.h"
     11 #include "src/string-search.h"
     12 #include "src/utils.h"
     13 
     14 namespace v8 {
     15 namespace internal {
     16 
     17 class URIUnescape : public AllStatic {
     18  public:
     19   template <typename Char>
     20   MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
     21                                                       Handle<String> source);
     22 
     23  private:
     24   static const signed char kHexValue['g'];
     25 
     26   template <typename Char>
     27   MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate,
     28                                                           Handle<String> string,
     29                                                           int start_index);
     30 
     31   static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
     32 
     33   template <typename Char>
     34   static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length,
     35                                  int* step));
     36 };
     37 
     38 
     39 const signed char URIUnescape::kHexValue[] = {
     40     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     41     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     42     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1,  2,  3,  4,  5,
     43     6,  7,  8,  9,  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1,
     44     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     45     -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15};
     46 
     47 
     48 template <typename Char>
     49 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
     50                                           Handle<String> source) {
     51   int index;
     52   {
     53     DisallowHeapAllocation no_allocation;
     54     StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
     55     index = search.Search(source->GetCharVector<Char>(), 0);
     56     if (index < 0) return source;
     57   }
     58   return UnescapeSlow<Char>(isolate, source, index);
     59 }
     60 
     61 
     62 template <typename Char>
     63 MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate,
     64                                               Handle<String> string,
     65                                               int start_index) {
     66   bool one_byte = true;
     67   int length = string->length();
     68 
     69   int unescaped_length = 0;
     70   {
     71     DisallowHeapAllocation no_allocation;
     72     Vector<const Char> vector = string->GetCharVector<Char>();
     73     for (int i = start_index; i < length; unescaped_length++) {
     74       int step;
     75       if (UnescapeChar(vector, i, length, &step) >
     76           String::kMaxOneByteCharCode) {
     77         one_byte = false;
     78       }
     79       i += step;
     80     }
     81   }
     82 
     83   DCHECK(start_index < length);
     84   Handle<String> first_part =
     85       isolate->factory()->NewProperSubString(string, 0, start_index);
     86 
     87   int dest_position = 0;
     88   Handle<String> second_part;
     89   DCHECK(unescaped_length <= String::kMaxLength);
     90   if (one_byte) {
     91     Handle<SeqOneByteString> dest = isolate->factory()
     92                                         ->NewRawOneByteString(unescaped_length)
     93                                         .ToHandleChecked();
     94     DisallowHeapAllocation no_allocation;
     95     Vector<const Char> vector = string->GetCharVector<Char>();
     96     for (int i = start_index; i < length; dest_position++) {
     97       int step;
     98       dest->SeqOneByteStringSet(dest_position,
     99                                 UnescapeChar(vector, i, length, &step));
    100       i += step;
    101     }
    102     second_part = dest;
    103   } else {
    104     Handle<SeqTwoByteString> dest = isolate->factory()
    105                                         ->NewRawTwoByteString(unescaped_length)
    106                                         .ToHandleChecked();
    107     DisallowHeapAllocation no_allocation;
    108     Vector<const Char> vector = string->GetCharVector<Char>();
    109     for (int i = start_index; i < length; dest_position++) {
    110       int step;
    111       dest->SeqTwoByteStringSet(dest_position,
    112                                 UnescapeChar(vector, i, length, &step));
    113       i += step;
    114     }
    115     second_part = dest;
    116   }
    117   return isolate->factory()->NewConsString(first_part, second_part);
    118 }
    119 
    120 
    121 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
    122   if (character1 > 'f') return -1;
    123   int hi = kHexValue[character1];
    124   if (hi == -1) return -1;
    125   if (character2 > 'f') return -1;
    126   int lo = kHexValue[character2];
    127   if (lo == -1) return -1;
    128   return (hi << 4) + lo;
    129 }
    130 
    131 
    132 template <typename Char>
    133 int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length,
    134                               int* step) {
    135   uint16_t character = vector[i];
    136   int32_t hi = 0;
    137   int32_t lo = 0;
    138   if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
    139       (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 &&
    140       (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) {
    141     *step = 6;
    142     return (hi << 8) + lo;
    143   } else if (character == '%' && i <= length - 3 &&
    144              (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) {
    145     *step = 3;
    146     return lo;
    147   } else {
    148     *step = 1;
    149     return character;
    150   }
    151 }
    152 
    153 
    154 class URIEscape : public AllStatic {
    155  public:
    156   template <typename Char>
    157   MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
    158                                                     Handle<String> string);
    159 
    160  private:
    161   static const char kHexChars[17];
    162   static const char kNotEscaped[256];
    163 
    164   static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
    165 };
    166 
    167 
    168 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
    169 
    170 
    171 // kNotEscaped is generated by the following:
    172 //
    173 // #!/bin/perl
    174 // for (my $i = 0; $i < 256; $i++) {
    175 //   print "\n" if $i % 16 == 0;
    176 //   my $c = chr($i);
    177 //   my $escaped = 1;
    178 //   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
    179 //   print $escaped ? "0, " : "1, ";
    180 // }
    181 
    182 const char URIEscape::kNotEscaped[] = {
    183     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    184     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
    185     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
    186     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
    187     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    188     1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    189     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    190     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    191     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    192     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    193     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
    194 
    195 
    196 template <typename Char>
    197 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
    198   DCHECK(string->IsFlat());
    199   int escaped_length = 0;
    200   int length = string->length();
    201 
    202   {
    203     DisallowHeapAllocation no_allocation;
    204     Vector<const Char> vector = string->GetCharVector<Char>();
    205     for (int i = 0; i < length; i++) {
    206       uint16_t c = vector[i];
    207       if (c >= 256) {
    208         escaped_length += 6;
    209       } else if (IsNotEscaped(c)) {
    210         escaped_length++;
    211       } else {
    212         escaped_length += 3;
    213       }
    214 
    215       // We don't allow strings that are longer than a maximal length.
    216       DCHECK(String::kMaxLength < 0x7fffffff - 6);     // Cannot overflow.
    217       if (escaped_length > String::kMaxLength) break;  // Provoke exception.
    218     }
    219   }
    220 
    221   // No length change implies no change.  Return original string if no change.
    222   if (escaped_length == length) return string;
    223 
    224   Handle<SeqOneByteString> dest;
    225   ASSIGN_RETURN_ON_EXCEPTION(
    226       isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
    227       String);
    228   int dest_position = 0;
    229 
    230   {
    231     DisallowHeapAllocation no_allocation;
    232     Vector<const Char> vector = string->GetCharVector<Char>();
    233     for (int i = 0; i < length; i++) {
    234       uint16_t c = vector[i];
    235       if (c >= 256) {
    236         dest->SeqOneByteStringSet(dest_position, '%');
    237         dest->SeqOneByteStringSet(dest_position + 1, 'u');
    238         dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]);
    239         dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]);
    240         dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]);
    241         dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]);
    242         dest_position += 6;
    243       } else if (IsNotEscaped(c)) {
    244         dest->SeqOneByteStringSet(dest_position, c);
    245         dest_position++;
    246       } else {
    247         dest->SeqOneByteStringSet(dest_position, '%');
    248         dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]);
    249         dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]);
    250         dest_position += 3;
    251       }
    252     }
    253   }
    254 
    255   return dest;
    256 }
    257 
    258 
    259 RUNTIME_FUNCTION(Runtime_URIEscape) {
    260   HandleScope scope(isolate);
    261   DCHECK_EQ(1, args.length());
    262   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
    263   Handle<String> source;
    264   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
    265                                      Object::ToString(isolate, input));
    266   source = String::Flatten(source);
    267   Handle<String> result;
    268   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
    269       isolate, result, source->IsOneByteRepresentationUnderneath()
    270                            ? URIEscape::Escape<uint8_t>(isolate, source)
    271                            : URIEscape::Escape<uc16>(isolate, source));
    272   return *result;
    273 }
    274 
    275 
    276 RUNTIME_FUNCTION(Runtime_URIUnescape) {
    277   HandleScope scope(isolate);
    278   DCHECK(args.length() == 1);
    279   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
    280   Handle<String> source;
    281   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source,
    282                                      Object::ToString(isolate, input));
    283   source = String::Flatten(source);
    284   Handle<String> result;
    285   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
    286       isolate, result, source->IsOneByteRepresentationUnderneath()
    287                            ? URIUnescape::Unescape<uint8_t>(isolate, source)
    288                            : URIUnescape::Unescape<uc16>(isolate, source));
    289   return *result;
    290 }
    291 
    292 }  // namespace internal
    293 }  // namespace v8
    294