1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_URI_H_ 29 #define V8_URI_H_ 30 31 #include "v8.h" 32 33 #include "string-search.h" 34 #include "v8utils.h" 35 #include "v8conversions.h" 36 37 namespace v8 { 38 namespace internal { 39 40 41 template <typename Char> 42 static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); 43 44 45 template <> 46 Vector<const uint8_t> GetCharVector(Handle<String> string) { 47 String::FlatContent flat = string->GetFlatContent(); 48 ASSERT(flat.IsAscii()); 49 return flat.ToOneByteVector(); 50 } 51 52 53 template <> 54 Vector<const uc16> GetCharVector(Handle<String> string) { 55 String::FlatContent flat = string->GetFlatContent(); 56 ASSERT(flat.IsTwoByte()); 57 return flat.ToUC16Vector(); 58 } 59 60 61 class URIUnescape : public AllStatic { 62 public: 63 template<typename Char> 64 static Handle<String> Unescape(Isolate* isolate, Handle<String> source); 65 66 private: 67 static const signed char kHexValue['g']; 68 69 template<typename Char> 70 static Handle<String> UnescapeSlow( 71 Isolate* isolate, Handle<String> string, int start_index); 72 73 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); 74 75 template <typename Char> 76 static INLINE(int UnescapeChar(Vector<const Char> vector, 77 int i, 78 int length, 79 int* step)); 80 }; 81 82 83 const signed char URIUnescape::kHexValue[] = { 84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 87 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, 88 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90 -1, 10, 11, 12, 13, 14, 15 }; 91 92 93 template<typename Char> 94 Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) { 95 int index; 96 { DisallowHeapAllocation no_allocation; 97 StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%")); 98 index = search.Search(GetCharVector<Char>(source), 0); 99 if (index < 0) return source; 100 } 101 return UnescapeSlow<Char>(isolate, source, index); 102 } 103 104 105 template <typename Char> 106 Handle<String> URIUnescape::UnescapeSlow( 107 Isolate* isolate, Handle<String> string, int start_index) { 108 bool one_byte = true; 109 int length = string->length(); 110 111 int unescaped_length = 0; 112 { DisallowHeapAllocation no_allocation; 113 Vector<const Char> vector = GetCharVector<Char>(string); 114 for (int i = start_index; i < length; unescaped_length++) { 115 int step; 116 if (UnescapeChar(vector, i, length, &step) > 117 String::kMaxOneByteCharCode) { 118 one_byte = false; 119 } 120 i += step; 121 } 122 } 123 124 ASSERT(start_index < length); 125 Handle<String> first_part = 126 isolate->factory()->NewProperSubString(string, 0, start_index); 127 128 int dest_position = 0; 129 Handle<String> second_part; 130 if (one_byte) { 131 Handle<SeqOneByteString> dest = 132 isolate->factory()->NewRawOneByteString(unescaped_length); 133 DisallowHeapAllocation no_allocation; 134 Vector<const Char> vector = GetCharVector<Char>(string); 135 for (int i = start_index; i < length; dest_position++) { 136 int step; 137 dest->SeqOneByteStringSet(dest_position, 138 UnescapeChar(vector, i, length, &step)); 139 i += step; 140 } 141 second_part = dest; 142 } else { 143 Handle<SeqTwoByteString> dest = 144 isolate->factory()->NewRawTwoByteString(unescaped_length); 145 DisallowHeapAllocation no_allocation; 146 Vector<const Char> vector = GetCharVector<Char>(string); 147 for (int i = start_index; i < length; dest_position++) { 148 int step; 149 dest->SeqTwoByteStringSet(dest_position, 150 UnescapeChar(vector, i, length, &step)); 151 i += step; 152 } 153 second_part = dest; 154 } 155 return isolate->factory()->NewConsString(first_part, second_part); 156 } 157 158 159 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { 160 if (character1 > 'f') return -1; 161 int hi = kHexValue[character1]; 162 if (hi == -1) return -1; 163 if (character2 > 'f') return -1; 164 int lo = kHexValue[character2]; 165 if (lo == -1) return -1; 166 return (hi << 4) + lo; 167 } 168 169 170 template <typename Char> 171 int URIUnescape::UnescapeChar(Vector<const Char> vector, 172 int i, 173 int length, 174 int* step) { 175 uint16_t character = vector[i]; 176 int32_t hi = 0; 177 int32_t lo = 0; 178 if (character == '%' && 179 i <= length - 6 && 180 vector[i + 1] == 'u' && 181 (hi = TwoDigitHex(vector[i + 2], 182 vector[i + 3])) != -1 && 183 (lo = TwoDigitHex(vector[i + 4], 184 vector[i + 5])) != -1) { 185 *step = 6; 186 return (hi << 8) + lo; 187 } else if (character == '%' && 188 i <= length - 3 && 189 (lo = TwoDigitHex(vector[i + 1], 190 vector[i + 2])) != -1) { 191 *step = 3; 192 return lo; 193 } else { 194 *step = 1; 195 return character; 196 } 197 } 198 199 200 class URIEscape : public AllStatic { 201 public: 202 template<typename Char> 203 static Handle<String> Escape(Isolate* isolate, Handle<String> string); 204 205 private: 206 static const char kHexChars[17]; 207 static const char kNotEscaped[256]; 208 209 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } 210 }; 211 212 213 const char URIEscape::kHexChars[] = "0123456789ABCDEF"; 214 215 216 // kNotEscaped is generated by the following: 217 // 218 // #!/bin/perl 219 // for (my $i = 0; $i < 256; $i++) { 220 // print "\n" if $i % 16 == 0; 221 // my $c = chr($i); 222 // my $escaped = 1; 223 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; 224 // print $escaped ? "0, " : "1, "; 225 // } 226 227 const char URIEscape::kNotEscaped[] = { 228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 234 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 237 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 239 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 241 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 242 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 244 245 246 template<typename Char> 247 Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { 248 ASSERT(string->IsFlat()); 249 int escaped_length = 0; 250 int length = string->length(); 251 252 { DisallowHeapAllocation no_allocation; 253 Vector<const Char> vector = GetCharVector<Char>(string); 254 for (int i = 0; i < length; i++) { 255 uint16_t c = vector[i]; 256 if (c >= 256) { 257 escaped_length += 6; 258 } else if (IsNotEscaped(c)) { 259 escaped_length++; 260 } else { 261 escaped_length += 3; 262 } 263 264 // We don't allow strings that are longer than a maximal length. 265 ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. 266 if (escaped_length > String::kMaxLength) { 267 isolate->context()->mark_out_of_memory(); 268 return Handle<String>::null(); 269 } 270 } 271 } 272 273 // No length change implies no change. Return original string if no change. 274 if (escaped_length == length) return string; 275 276 Handle<SeqOneByteString> dest = 277 isolate->factory()->NewRawOneByteString(escaped_length); 278 int dest_position = 0; 279 280 { DisallowHeapAllocation no_allocation; 281 Vector<const Char> vector = GetCharVector<Char>(string); 282 for (int i = 0; i < length; i++) { 283 uint16_t c = vector[i]; 284 if (c >= 256) { 285 dest->SeqOneByteStringSet(dest_position, '%'); 286 dest->SeqOneByteStringSet(dest_position+1, 'u'); 287 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); 288 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); 289 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); 290 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); 291 dest_position += 6; 292 } else if (IsNotEscaped(c)) { 293 dest->SeqOneByteStringSet(dest_position, c); 294 dest_position++; 295 } else { 296 dest->SeqOneByteStringSet(dest_position, '%'); 297 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); 298 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); 299 dest_position += 3; 300 } 301 } 302 } 303 304 return dest; 305 } 306 307 } } // namespace v8::internal 308 309 #endif // V8_URI_H_ 310