1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/runtime/runtime-utils.h" 6 7 #include "src/arguments.h" 8 #include "src/conversions.h" 9 #include "src/isolate-inl.h" 10 #include "src/objects-inl.h" 11 #include "src/string-search.h" 12 #include "src/utils.h" 13 14 namespace v8 { 15 namespace internal { 16 17 class URIUnescape : public AllStatic { 18 public: 19 template <typename Char> 20 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate, 21 Handle<String> source); 22 23 private: 24 static const signed char kHexValue['g']; 25 26 template <typename Char> 27 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(Isolate* isolate, 28 Handle<String> string, 29 int start_index); 30 31 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); 32 33 template <typename Char> 34 static INLINE(int UnescapeChar(Vector<const Char> vector, int i, int length, 35 int* step)); 36 }; 37 38 39 const signed char URIUnescape::kHexValue[] = { 40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -0, 1, 2, 3, 4, 5, 43 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, 44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45 -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15}; 46 47 48 template <typename Char> 49 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate, 50 Handle<String> source) { 51 int index; 52 { 53 DisallowHeapAllocation no_allocation; 54 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%")); 55 index = search.Search(source->GetCharVector<Char>(), 0); 56 if (index < 0) return source; 57 } 58 return UnescapeSlow<Char>(isolate, source, index); 59 } 60 61 62 template <typename Char> 63 MaybeHandle<String> URIUnescape::UnescapeSlow(Isolate* isolate, 64 Handle<String> string, 65 int start_index) { 66 bool one_byte = true; 67 int length = string->length(); 68 69 int unescaped_length = 0; 70 { 71 DisallowHeapAllocation no_allocation; 72 Vector<const Char> vector = string->GetCharVector<Char>(); 73 for (int i = start_index; i < length; unescaped_length++) { 74 int step; 75 if (UnescapeChar(vector, i, length, &step) > 76 String::kMaxOneByteCharCode) { 77 one_byte = false; 78 } 79 i += step; 80 } 81 } 82 83 DCHECK(start_index < length); 84 Handle<String> first_part = 85 isolate->factory()->NewProperSubString(string, 0, start_index); 86 87 int dest_position = 0; 88 Handle<String> second_part; 89 DCHECK(unescaped_length <= String::kMaxLength); 90 if (one_byte) { 91 Handle<SeqOneByteString> dest = isolate->factory() 92 ->NewRawOneByteString(unescaped_length) 93 .ToHandleChecked(); 94 DisallowHeapAllocation no_allocation; 95 Vector<const Char> vector = string->GetCharVector<Char>(); 96 for (int i = start_index; i < length; dest_position++) { 97 int step; 98 dest->SeqOneByteStringSet(dest_position, 99 UnescapeChar(vector, i, length, &step)); 100 i += step; 101 } 102 second_part = dest; 103 } else { 104 Handle<SeqTwoByteString> dest = isolate->factory() 105 ->NewRawTwoByteString(unescaped_length) 106 .ToHandleChecked(); 107 DisallowHeapAllocation no_allocation; 108 Vector<const Char> vector = string->GetCharVector<Char>(); 109 for (int i = start_index; i < length; dest_position++) { 110 int step; 111 dest->SeqTwoByteStringSet(dest_position, 112 UnescapeChar(vector, i, length, &step)); 113 i += step; 114 } 115 second_part = dest; 116 } 117 return isolate->factory()->NewConsString(first_part, second_part); 118 } 119 120 121 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { 122 if (character1 > 'f') return -1; 123 int hi = kHexValue[character1]; 124 if (hi == -1) return -1; 125 if (character2 > 'f') return -1; 126 int lo = kHexValue[character2]; 127 if (lo == -1) return -1; 128 return (hi << 4) + lo; 129 } 130 131 132 template <typename Char> 133 int URIUnescape::UnescapeChar(Vector<const Char> vector, int i, int length, 134 int* step) { 135 uint16_t character = vector[i]; 136 int32_t hi = 0; 137 int32_t lo = 0; 138 if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' && 139 (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) != -1 && 140 (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) != -1) { 141 *step = 6; 142 return (hi << 8) + lo; 143 } else if (character == '%' && i <= length - 3 && 144 (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) != -1) { 145 *step = 3; 146 return lo; 147 } else { 148 *step = 1; 149 return character; 150 } 151 } 152 153 154 class URIEscape : public AllStatic { 155 public: 156 template <typename Char> 157 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate, 158 Handle<String> string); 159 160 private: 161 static const char kHexChars[17]; 162 static const char kNotEscaped[256]; 163 164 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } 165 }; 166 167 168 const char URIEscape::kHexChars[] = "0123456789ABCDEF"; 169 170 171 // kNotEscaped is generated by the following: 172 // 173 // #!/bin/perl 174 // for (my $i = 0; $i < 256; $i++) { 175 // print "\n" if $i % 16 == 0; 176 // my $c = chr($i); 177 // my $escaped = 1; 178 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; 179 // print $escaped ? "0, " : "1, "; 180 // } 181 182 const char URIEscape::kNotEscaped[] = { 183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 185 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 186 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 187 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 188 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 194 195 196 template <typename Char> 197 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { 198 DCHECK(string->IsFlat()); 199 int escaped_length = 0; 200 int length = string->length(); 201 202 { 203 DisallowHeapAllocation no_allocation; 204 Vector<const Char> vector = string->GetCharVector<Char>(); 205 for (int i = 0; i < length; i++) { 206 uint16_t c = vector[i]; 207 if (c >= 256) { 208 escaped_length += 6; 209 } else if (IsNotEscaped(c)) { 210 escaped_length++; 211 } else { 212 escaped_length += 3; 213 } 214 215 // We don't allow strings that are longer than a maximal length. 216 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. 217 if (escaped_length > String::kMaxLength) break; // Provoke exception. 218 } 219 } 220 221 // No length change implies no change. Return original string if no change. 222 if (escaped_length == length) return string; 223 224 Handle<SeqOneByteString> dest; 225 ASSIGN_RETURN_ON_EXCEPTION( 226 isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length), 227 String); 228 int dest_position = 0; 229 230 { 231 DisallowHeapAllocation no_allocation; 232 Vector<const Char> vector = string->GetCharVector<Char>(); 233 for (int i = 0; i < length; i++) { 234 uint16_t c = vector[i]; 235 if (c >= 256) { 236 dest->SeqOneByteStringSet(dest_position, '%'); 237 dest->SeqOneByteStringSet(dest_position + 1, 'u'); 238 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c >> 12]); 239 dest->SeqOneByteStringSet(dest_position + 3, kHexChars[(c >> 8) & 0xf]); 240 dest->SeqOneByteStringSet(dest_position + 4, kHexChars[(c >> 4) & 0xf]); 241 dest->SeqOneByteStringSet(dest_position + 5, kHexChars[c & 0xf]); 242 dest_position += 6; 243 } else if (IsNotEscaped(c)) { 244 dest->SeqOneByteStringSet(dest_position, c); 245 dest_position++; 246 } else { 247 dest->SeqOneByteStringSet(dest_position, '%'); 248 dest->SeqOneByteStringSet(dest_position + 1, kHexChars[c >> 4]); 249 dest->SeqOneByteStringSet(dest_position + 2, kHexChars[c & 0xf]); 250 dest_position += 3; 251 } 252 } 253 } 254 255 return dest; 256 } 257 258 259 RUNTIME_FUNCTION(Runtime_URIEscape) { 260 HandleScope scope(isolate); 261 DCHECK_EQ(1, args.length()); 262 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); 263 Handle<String> source; 264 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source, 265 Object::ToString(isolate, input)); 266 source = String::Flatten(source); 267 Handle<String> result; 268 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 269 isolate, result, source->IsOneByteRepresentationUnderneath() 270 ? URIEscape::Escape<uint8_t>(isolate, source) 271 : URIEscape::Escape<uc16>(isolate, source)); 272 return *result; 273 } 274 275 276 RUNTIME_FUNCTION(Runtime_URIUnescape) { 277 HandleScope scope(isolate); 278 DCHECK(args.length() == 1); 279 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); 280 Handle<String> source; 281 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, source, 282 Object::ToString(isolate, input)); 283 source = String::Flatten(source); 284 Handle<String> result; 285 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 286 isolate, result, source->IsOneByteRepresentationUnderneath() 287 ? URIUnescape::Unescape<uint8_t>(isolate, source) 288 : URIUnescape::Unescape<uc16>(isolate, source)); 289 return *result; 290 } 291 292 } // namespace internal 293 } // namespace v8 294