1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // This file contains support for URI manipulations written in 29 // JavaScript. 30 31 // Expect $String = global.String; 32 33 // Lazily initialized. 34 var hexCharArray = 0; 35 var hexCharCodeArray = 0; 36 37 38 function URIAddEncodedOctetToBuffer(octet, result, index) { 39 result[index++] = 37; // Char code of '%'. 40 result[index++] = hexCharCodeArray[octet >> 4]; 41 result[index++] = hexCharCodeArray[octet & 0x0F]; 42 return index; 43 } 44 45 46 function URIEncodeOctets(octets, result, index) { 47 if (hexCharCodeArray === 0) { 48 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 49 65, 66, 67, 68, 69, 70]; 50 } 51 index = URIAddEncodedOctetToBuffer(octets[0], result, index); 52 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); 53 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); 54 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); 55 return index; 56 } 57 58 59 function URIEncodeSingle(cc, result, index) { 60 var x = (cc >> 12) & 0xF; 61 var y = (cc >> 6) & 63; 62 var z = cc & 63; 63 var octets = new $Array(3); 64 if (cc <= 0x007F) { 65 octets[0] = cc; 66 } else if (cc <= 0x07FF) { 67 octets[0] = y + 192; 68 octets[1] = z + 128; 69 } else { 70 octets[0] = x + 224; 71 octets[1] = y + 128; 72 octets[2] = z + 128; 73 } 74 return URIEncodeOctets(octets, result, index); 75 } 76 77 78 function URIEncodePair(cc1 , cc2, result, index) { 79 var u = ((cc1 >> 6) & 0xF) + 1; 80 var w = (cc1 >> 2) & 0xF; 81 var x = cc1 & 3; 82 var y = (cc2 >> 6) & 0xF; 83 var z = cc2 & 63; 84 var octets = new $Array(4); 85 octets[0] = (u >> 2) + 240; 86 octets[1] = (((u & 3) << 4) | w) + 128; 87 octets[2] = ((x << 4) | y) + 128; 88 octets[3] = z + 128; 89 return URIEncodeOctets(octets, result, index); 90 } 91 92 93 function URIHexCharsToCharCode(highChar, lowChar) { 94 var highCode = HexValueOf(highChar); 95 var lowCode = HexValueOf(lowChar); 96 if (highCode == -1 || lowCode == -1) { 97 throw new $URIError("URI malformed"); 98 } 99 return (highCode << 4) | lowCode; 100 } 101 102 103 function URIDecodeOctets(octets, result, index) { 104 var value; 105 var o0 = octets[0]; 106 if (o0 < 0x80) { 107 value = o0; 108 } else if (o0 < 0xc2) { 109 throw new $URIError("URI malformed"); 110 } else { 111 var o1 = octets[1]; 112 if (o0 < 0xe0) { 113 var a = o0 & 0x1f; 114 if ((o1 < 0x80) || (o1 > 0xbf)) { 115 throw new $URIError("URI malformed"); 116 } 117 var b = o1 & 0x3f; 118 value = (a << 6) + b; 119 if (value < 0x80 || value > 0x7ff) { 120 throw new $URIError("URI malformed"); 121 } 122 } else { 123 var o2 = octets[2]; 124 if (o0 < 0xf0) { 125 var a = o0 & 0x0f; 126 if ((o1 < 0x80) || (o1 > 0xbf)) { 127 throw new $URIError("URI malformed"); 128 } 129 var b = o1 & 0x3f; 130 if ((o2 < 0x80) || (o2 > 0xbf)) { 131 throw new $URIError("URI malformed"); 132 } 133 var c = o2 & 0x3f; 134 value = (a << 12) + (b << 6) + c; 135 if ((value < 0x800) || (value > 0xffff)) { 136 throw new $URIError("URI malformed"); 137 } 138 } else { 139 var o3 = octets[3]; 140 if (o0 < 0xf8) { 141 var a = (o0 & 0x07); 142 if ((o1 < 0x80) || (o1 > 0xbf)) { 143 throw new $URIError("URI malformed"); 144 } 145 var b = (o1 & 0x3f); 146 if ((o2 < 0x80) || (o2 > 0xbf)) { 147 throw new $URIError("URI malformed"); 148 } 149 var c = (o2 & 0x3f); 150 if ((o3 < 0x80) || (o3 > 0xbf)) { 151 throw new $URIError("URI malformed"); 152 } 153 var d = (o3 & 0x3f); 154 value = (a << 18) + (b << 12) + (c << 6) + d; 155 if ((value < 0x10000) || (value > 0x10ffff)) { 156 throw new $URIError("URI malformed"); 157 } 158 } else { 159 throw new $URIError("URI malformed"); 160 } 161 } 162 } 163 } 164 if (0xD800 <= value && value <= 0xDFFF) { 165 throw new $URIError("URI malformed"); 166 } 167 if (value < 0x10000) { 168 result[index++] = value; 169 return index; 170 } else { 171 result[index++] = (value >> 10) + 0xd7c0; 172 result[index++] = (value & 0x3ff) + 0xdc00; 173 return index; 174 } 175 } 176 177 178 // ECMA-262, section 15.1.3 179 function Encode(uri, unescape) { 180 var uriLength = uri.length; 181 // We are going to pass result to %StringFromCharCodeArray 182 // which does not expect any getters/setters installed 183 // on the incoming array. 184 var result = new InternalArray(uriLength); 185 var index = 0; 186 for (var k = 0; k < uriLength; k++) { 187 var cc1 = uri.charCodeAt(k); 188 if (unescape(cc1)) { 189 result[index++] = cc1; 190 } else { 191 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed"); 192 if (cc1 < 0xD800 || cc1 > 0xDBFF) { 193 index = URIEncodeSingle(cc1, result, index); 194 } else { 195 k++; 196 if (k == uriLength) throw new $URIError("URI malformed"); 197 var cc2 = uri.charCodeAt(k); 198 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed"); 199 index = URIEncodePair(cc1, cc2, result, index); 200 } 201 } 202 } 203 return %StringFromCharCodeArray(result); 204 } 205 206 207 // ECMA-262, section 15.1.3 208 function Decode(uri, reserved) { 209 var uriLength = uri.length; 210 // We are going to pass result to %StringFromCharCodeArray 211 // which does not expect any getters/setters installed 212 // on the incoming array. 213 var result = new InternalArray(uriLength); 214 var index = 0; 215 for (var k = 0; k < uriLength; k++) { 216 var ch = uri.charAt(k); 217 if (ch == '%') { 218 if (k + 2 >= uriLength) throw new $URIError("URI malformed"); 219 var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k)); 220 if (cc >> 7) { 221 var n = 0; 222 while (((cc << ++n) & 0x80) != 0) { } 223 if (n == 1 || n > 4) throw new $URIError("URI malformed"); 224 var octets = new $Array(n); 225 octets[0] = cc; 226 if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed"); 227 for (var i = 1; i < n; i++) { 228 if (uri.charAt(++k) != '%') throw new $URIError("URI malformed"); 229 octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k), 230 uri.charCodeAt(++k)); 231 } 232 index = URIDecodeOctets(octets, result, index); 233 } else { 234 if (reserved(cc)) { 235 result[index++] = 37; // Char code of '%'. 236 result[index++] = uri.charCodeAt(k - 1); 237 result[index++] = uri.charCodeAt(k); 238 } else { 239 result[index++] = cc; 240 } 241 } 242 } else { 243 result[index++] = ch.charCodeAt(0); 244 } 245 } 246 result.length = index; 247 return %StringFromCharCodeArray(result); 248 } 249 250 251 // ECMA-262 - 15.1.3.1. 252 function URIDecode(uri) { 253 var reservedPredicate = function(cc) { 254 // #$ 255 if (35 <= cc && cc <= 36) return true; 256 // & 257 if (cc == 38) return true; 258 // +, 259 if (43 <= cc && cc <= 44) return true; 260 // / 261 if (cc == 47) return true; 262 // :; 263 if (58 <= cc && cc <= 59) return true; 264 // = 265 if (cc == 61) return true; 266 // ?@ 267 if (63 <= cc && cc <= 64) return true; 268 269 return false; 270 }; 271 var string = ToString(uri); 272 return Decode(string, reservedPredicate); 273 } 274 275 276 // ECMA-262 - 15.1.3.2. 277 function URIDecodeComponent(component) { 278 var reservedPredicate = function(cc) { return false; }; 279 var string = ToString(component); 280 return Decode(string, reservedPredicate); 281 } 282 283 284 // Does the char code correspond to an alpha-numeric char. 285 function isAlphaNumeric(cc) { 286 // a - z 287 if (97 <= cc && cc <= 122) return true; 288 // A - Z 289 if (65 <= cc && cc <= 90) return true; 290 // 0 - 9 291 if (48 <= cc && cc <= 57) return true; 292 293 return false; 294 } 295 296 297 // ECMA-262 - 15.1.3.3. 298 function URIEncode(uri) { 299 var unescapePredicate = function(cc) { 300 if (isAlphaNumeric(cc)) return true; 301 // ! 302 if (cc == 33) return true; 303 // #$ 304 if (35 <= cc && cc <= 36) return true; 305 // &'()*+,-./ 306 if (38 <= cc && cc <= 47) return true; 307 // :; 308 if (58 <= cc && cc <= 59) return true; 309 // = 310 if (cc == 61) return true; 311 // ?@ 312 if (63 <= cc && cc <= 64) return true; 313 // _ 314 if (cc == 95) return true; 315 // ~ 316 if (cc == 126) return true; 317 318 return false; 319 }; 320 321 var string = ToString(uri); 322 return Encode(string, unescapePredicate); 323 } 324 325 326 // ECMA-262 - 15.1.3.4 327 function URIEncodeComponent(component) { 328 var unescapePredicate = function(cc) { 329 if (isAlphaNumeric(cc)) return true; 330 // ! 331 if (cc == 33) return true; 332 // '()* 333 if (39 <= cc && cc <= 42) return true; 334 // -. 335 if (45 <= cc && cc <= 46) return true; 336 // _ 337 if (cc == 95) return true; 338 // ~ 339 if (cc == 126) return true; 340 341 return false; 342 }; 343 344 var string = ToString(component); 345 return Encode(string, unescapePredicate); 346 } 347 348 349 function HexValueOf(code) { 350 // 0-9 351 if (code >= 48 && code <= 57) return code - 48; 352 // A-F 353 if (code >= 65 && code <= 70) return code - 55; 354 // a-f 355 if (code >= 97 && code <= 102) return code - 87; 356 357 return -1; 358 } 359 360 361 // Convert a character code to 4-digit hex string representation 362 // 64 -> 0040, 62234 -> F31A. 363 function CharCodeToHex4Str(cc) { 364 var r = ""; 365 if (hexCharArray === 0) { 366 hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 367 "A", "B", "C", "D", "E", "F"]; 368 } 369 for (var i = 0; i < 4; ++i) { 370 var c = hexCharArray[cc & 0x0F]; 371 r = c + r; 372 cc = cc >>> 4; 373 } 374 return r; 375 } 376 377 378 // Returns true if all digits in string s are valid hex numbers 379 function IsValidHex(s) { 380 for (var i = 0; i < s.length; ++i) { 381 var cc = s.charCodeAt(i); 382 if ((48 <= cc && cc <= 57) || 383 (65 <= cc && cc <= 70) || 384 (97 <= cc && cc <= 102)) { 385 // '0'..'9', 'A'..'F' and 'a' .. 'f'. 386 } else { 387 return false; 388 } 389 } 390 return true; 391 } 392 393 394 // ECMA-262 - B.2.1. 395 function URIEscape(str) { 396 var s = ToString(str); 397 return %URIEscape(s); 398 } 399 400 401 // ECMA-262 - B.2.2. 402 function URIUnescape(str) { 403 var s = ToString(str); 404 return %URIUnescape(s); 405 } 406 407 408 // ------------------------------------------------------------------- 409 410 function SetUpUri() { 411 %CheckIsBootstrapping(); 412 // Set up non-enumerable URI functions on the global object and set 413 // their names. 414 InstallFunctions(global, DONT_ENUM, $Array( 415 "escape", URIEscape, 416 "unescape", URIUnescape, 417 "decodeURI", URIDecode, 418 "decodeURIComponent", URIDecodeComponent, 419 "encodeURI", URIEncode, 420 "encodeURIComponent", URIEncodeComponent 421 )); 422 } 423 424 SetUpUri(); 425