1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // This file relies on the fact that the following declaration has been made 29 // in runtime.js: 30 // var $Array = global.Array; 31 32 // ------------------------------------------------------------------- 33 34 // This file contains support for URI manipulations written in 35 // JavaScript. 36 37 // Lazily initialized. 38 var hexCharArray = 0; 39 var hexCharCodeArray = 0; 40 41 42 function URIAddEncodedOctetToBuffer(octet, result, index) { 43 result[index++] = 37; // Char code of '%'. 44 result[index++] = hexCharCodeArray[octet >> 4]; 45 result[index++] = hexCharCodeArray[octet & 0x0F]; 46 return index; 47 } 48 49 50 function URIEncodeOctets(octets, result, index) { 51 if (hexCharCodeArray === 0) { 52 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 53 65, 66, 67, 68, 69, 70]; 54 } 55 index = URIAddEncodedOctetToBuffer(octets[0], result, index); 56 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); 57 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); 58 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); 59 return index; 60 } 61 62 63 function URIEncodeSingle(cc, result, index) { 64 var x = (cc >> 12) & 0xF; 65 var y = (cc >> 6) & 63; 66 var z = cc & 63; 67 var octets = new $Array(3); 68 if (cc <= 0x007F) { 69 octets[0] = cc; 70 } else if (cc <= 0x07FF) { 71 octets[0] = y + 192; 72 octets[1] = z + 128; 73 } else { 74 octets[0] = x + 224; 75 octets[1] = y + 128; 76 octets[2] = z + 128; 77 } 78 return URIEncodeOctets(octets, result, index); 79 } 80 81 82 function URIEncodePair(cc1 , cc2, result, index) { 83 var u = ((cc1 >> 6) & 0xF) + 1; 84 var w = (cc1 >> 2) & 0xF; 85 var x = cc1 & 3; 86 var y = (cc2 >> 6) & 0xF; 87 var z = cc2 & 63; 88 var octets = new $Array(4); 89 octets[0] = (u >> 2) + 240; 90 octets[1] = (((u & 3) << 4) | w) + 128; 91 octets[2] = ((x << 4) | y) + 128; 92 octets[3] = z + 128; 93 return URIEncodeOctets(octets, result, index); 94 } 95 96 97 function URIHexCharsToCharCode(highChar, lowChar) { 98 var highCode = HexValueOf(highChar); 99 var lowCode = HexValueOf(lowChar); 100 if (highCode == -1 || lowCode == -1) { 101 throw new $URIError("URI malformed"); 102 } 103 return (highCode << 4) | lowCode; 104 } 105 106 107 function URIDecodeOctets(octets, result, index) { 108 var value; 109 var o0 = octets[0]; 110 if (o0 < 0x80) { 111 value = o0; 112 } else if (o0 < 0xc2) { 113 throw new $URIError("URI malformed"); 114 } else { 115 var o1 = octets[1]; 116 if (o0 < 0xe0) { 117 var a = o0 & 0x1f; 118 if ((o1 < 0x80) || (o1 > 0xbf)) { 119 throw new $URIError("URI malformed"); 120 } 121 var b = o1 & 0x3f; 122 value = (a << 6) + b; 123 if (value < 0x80 || value > 0x7ff) { 124 throw new $URIError("URI malformed"); 125 } 126 } else { 127 var o2 = octets[2]; 128 if (o0 < 0xf0) { 129 var a = o0 & 0x0f; 130 if ((o1 < 0x80) || (o1 > 0xbf)) { 131 throw new $URIError("URI malformed"); 132 } 133 var b = o1 & 0x3f; 134 if ((o2 < 0x80) || (o2 > 0xbf)) { 135 throw new $URIError("URI malformed"); 136 } 137 var c = o2 & 0x3f; 138 value = (a << 12) + (b << 6) + c; 139 if ((value < 0x800) || (value > 0xffff)) { 140 throw new $URIError("URI malformed"); 141 } 142 } else { 143 var o3 = octets[3]; 144 if (o0 < 0xf8) { 145 var a = (o0 & 0x07); 146 if ((o1 < 0x80) || (o1 > 0xbf)) { 147 throw new $URIError("URI malformed"); 148 } 149 var b = (o1 & 0x3f); 150 if ((o2 < 0x80) || (o2 > 0xbf)) { 151 throw new $URIError("URI malformed"); 152 } 153 var c = (o2 & 0x3f); 154 if ((o3 < 0x80) || (o3 > 0xbf)) { 155 throw new $URIError("URI malformed"); 156 } 157 var d = (o3 & 0x3f); 158 value = (a << 18) + (b << 12) + (c << 6) + d; 159 if ((value < 0x10000) || (value > 0x10ffff)) { 160 throw new $URIError("URI malformed"); 161 } 162 } else { 163 throw new $URIError("URI malformed"); 164 } 165 } 166 } 167 } 168 if (0xD800 <= value && value <= 0xDFFF) { 169 throw new $URIError("URI malformed"); 170 } 171 if (value < 0x10000) { 172 %_TwoByteSeqStringSetChar(result, index++, value); 173 return index; 174 } else { 175 %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0); 176 %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00); 177 return index; 178 } 179 } 180 181 182 // ECMA-262, section 15.1.3 183 function Encode(uri, unescape) { 184 var uriLength = uri.length; 185 var array = new InternalArray(uriLength); 186 var index = 0; 187 for (var k = 0; k < uriLength; k++) { 188 var cc1 = uri.charCodeAt(k); 189 if (unescape(cc1)) { 190 array[index++] = cc1; 191 } else { 192 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed"); 193 if (cc1 < 0xD800 || cc1 > 0xDBFF) { 194 index = URIEncodeSingle(cc1, array, index); 195 } else { 196 k++; 197 if (k == uriLength) throw new $URIError("URI malformed"); 198 var cc2 = uri.charCodeAt(k); 199 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed"); 200 index = URIEncodePair(cc1, cc2, array, index); 201 } 202 } 203 } 204 205 var result = %NewString(array.length, NEW_ONE_BYTE_STRING); 206 for (var i = 0; i < array.length; i++) { 207 %_OneByteSeqStringSetChar(result, i, array[i]); 208 } 209 return result; 210 } 211 212 213 // ECMA-262, section 15.1.3 214 function Decode(uri, reserved) { 215 var uriLength = uri.length; 216 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING); 217 var index = 0; 218 var k = 0; 219 220 // Optimistically assume ascii string. 221 for ( ; k < uriLength; k++) { 222 var code = uri.charCodeAt(k); 223 if (code == 37) { // '%' 224 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 225 var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2)); 226 if (cc >> 7) break; // Assumption wrong, two byte string. 227 if (reserved(cc)) { 228 %_OneByteSeqStringSetChar(one_byte, index++, 37); // '%'. 229 %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1)); 230 %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2)); 231 } else { 232 %_OneByteSeqStringSetChar(one_byte, index++, cc); 233 } 234 k += 2; 235 } else { 236 if (code > 0x7f) break; // Assumption wrong, two byte string. 237 %_OneByteSeqStringSetChar(one_byte, index++, code); 238 } 239 } 240 241 one_byte = %TruncateString(one_byte, index); 242 if (k == uriLength) return one_byte; 243 244 // Write into two byte string. 245 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING); 246 index = 0; 247 248 for ( ; k < uriLength; k++) { 249 var code = uri.charCodeAt(k); 250 if (code == 37) { // '%' 251 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 252 var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k)); 253 if (cc >> 7) { 254 var n = 0; 255 while (((cc << ++n) & 0x80) != 0) { } 256 if (n == 1 || n > 4) throw new $URIError("URI malformed"); 257 var octets = new $Array(n); 258 octets[0] = cc; 259 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed"); 260 for (var i = 1; i < n; i++) { 261 if (uri.charAt(++k) != '%') throw new $URIError("URI malformed"); 262 octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k), 263 uri.charCodeAt(++k)); 264 } 265 index = URIDecodeOctets(octets, two_byte, index); 266 } else if (reserved(cc)) { 267 %_TwoByteSeqStringSetChar(two_byte, index++, 37); // '%'. 268 %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1)); 269 %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k)); 270 } else { 271 %_TwoByteSeqStringSetChar(two_byte, index++, cc); 272 } 273 } else { 274 %_TwoByteSeqStringSetChar(two_byte, index++, code); 275 } 276 } 277 278 two_byte = %TruncateString(two_byte, index); 279 return one_byte + two_byte; 280 } 281 282 283 // ECMA-262 - 15.1.3.1. 284 function URIDecode(uri) { 285 var reservedPredicate = function(cc) { 286 // #$ 287 if (35 <= cc && cc <= 36) return true; 288 // & 289 if (cc == 38) return true; 290 // +, 291 if (43 <= cc && cc <= 44) return true; 292 // / 293 if (cc == 47) return true; 294 // :; 295 if (58 <= cc && cc <= 59) return true; 296 // = 297 if (cc == 61) return true; 298 // ?@ 299 if (63 <= cc && cc <= 64) return true; 300 301 return false; 302 }; 303 var string = ToString(uri); 304 return Decode(string, reservedPredicate); 305 } 306 307 308 // ECMA-262 - 15.1.3.2. 309 function URIDecodeComponent(component) { 310 var reservedPredicate = function(cc) { return false; }; 311 var string = ToString(component); 312 return Decode(string, reservedPredicate); 313 } 314 315 316 // Does the char code correspond to an alpha-numeric char. 317 function isAlphaNumeric(cc) { 318 // a - z 319 if (97 <= cc && cc <= 122) return true; 320 // A - Z 321 if (65 <= cc && cc <= 90) return true; 322 // 0 - 9 323 if (48 <= cc && cc <= 57) return true; 324 325 return false; 326 } 327 328 329 // ECMA-262 - 15.1.3.3. 330 function URIEncode(uri) { 331 var unescapePredicate = function(cc) { 332 if (isAlphaNumeric(cc)) return true; 333 // ! 334 if (cc == 33) return true; 335 // #$ 336 if (35 <= cc && cc <= 36) return true; 337 // &'()*+,-./ 338 if (38 <= cc && cc <= 47) return true; 339 // :; 340 if (58 <= cc && cc <= 59) return true; 341 // = 342 if (cc == 61) return true; 343 // ?@ 344 if (63 <= cc && cc <= 64) return true; 345 // _ 346 if (cc == 95) return true; 347 // ~ 348 if (cc == 126) return true; 349 350 return false; 351 }; 352 353 var string = ToString(uri); 354 return Encode(string, unescapePredicate); 355 } 356 357 358 // ECMA-262 - 15.1.3.4 359 function URIEncodeComponent(component) { 360 var unescapePredicate = function(cc) { 361 if (isAlphaNumeric(cc)) return true; 362 // ! 363 if (cc == 33) return true; 364 // '()* 365 if (39 <= cc && cc <= 42) return true; 366 // -. 367 if (45 <= cc && cc <= 46) return true; 368 // _ 369 if (cc == 95) return true; 370 // ~ 371 if (cc == 126) return true; 372 373 return false; 374 }; 375 376 var string = ToString(component); 377 return Encode(string, unescapePredicate); 378 } 379 380 381 function HexValueOf(code) { 382 // 0-9 383 if (code >= 48 && code <= 57) return code - 48; 384 // A-F 385 if (code >= 65 && code <= 70) return code - 55; 386 // a-f 387 if (code >= 97 && code <= 102) return code - 87; 388 389 return -1; 390 } 391 392 393 // Convert a character code to 4-digit hex string representation 394 // 64 -> 0040, 62234 -> F31A. 395 function CharCodeToHex4Str(cc) { 396 var r = ""; 397 if (hexCharArray === 0) { 398 hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 399 "A", "B", "C", "D", "E", "F"]; 400 } 401 for (var i = 0; i < 4; ++i) { 402 var c = hexCharArray[cc & 0x0F]; 403 r = c + r; 404 cc = cc >>> 4; 405 } 406 return r; 407 } 408 409 410 // Returns true if all digits in string s are valid hex numbers 411 function IsValidHex(s) { 412 for (var i = 0; i < s.length; ++i) { 413 var cc = s.charCodeAt(i); 414 if ((48 <= cc && cc <= 57) || 415 (65 <= cc && cc <= 70) || 416 (97 <= cc && cc <= 102)) { 417 // '0'..'9', 'A'..'F' and 'a' .. 'f'. 418 } else { 419 return false; 420 } 421 } 422 return true; 423 } 424 425 426 // ECMA-262 - B.2.1. 427 function URIEscape(str) { 428 var s = ToString(str); 429 return %URIEscape(s); 430 } 431 432 433 // ECMA-262 - B.2.2. 434 function URIUnescape(str) { 435 var s = ToString(str); 436 return %URIUnescape(s); 437 } 438 439 440 // ------------------------------------------------------------------- 441 442 function SetUpUri() { 443 %CheckIsBootstrapping(); 444 445 // Set up non-enumerable URI functions on the global object and set 446 // their names. 447 InstallFunctions(global, DONT_ENUM, $Array( 448 "escape", URIEscape, 449 "unescape", URIUnescape, 450 "decodeURI", URIDecode, 451 "decodeURIComponent", URIDecodeComponent, 452 "encodeURI", URIEncode, 453 "encodeURIComponent", URIEncodeComponent 454 )); 455 } 456 457 SetUpUri(); 458