1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file contains support for URI manipulations written in 6 // JavaScript. 7 8 (function(global, utils) { 9 10 "use strict"; 11 12 %CheckIsBootstrapping(); 13 14 //- ------------------------------------------------------------------ 15 // Imports 16 17 var GlobalObject = global.Object; 18 var GlobalArray = global.Array; 19 var InternalArray = utils.InternalArray; 20 var MakeURIError; 21 22 utils.Import(function(from) { 23 MakeURIError = from.MakeURIError; 24 }); 25 26 27 // ------------------------------------------------------------------- 28 // Define internal helper functions. 29 30 function HexValueOf(code) { 31 // 0-9 32 if (code >= 48 && code <= 57) return code - 48; 33 // A-F 34 if (code >= 65 && code <= 70) return code - 55; 35 // a-f 36 if (code >= 97 && code <= 102) return code - 87; 37 38 return -1; 39 } 40 41 // Does the char code correspond to an alpha-numeric char. 42 function isAlphaNumeric(cc) { 43 // a - z 44 if (97 <= cc && cc <= 122) return true; 45 // A - Z 46 if (65 <= cc && cc <= 90) return true; 47 // 0 - 9 48 if (48 <= cc && cc <= 57) return true; 49 50 return false; 51 } 52 53 // Lazily initialized. 54 var hexCharCodeArray = 0; 55 56 function URIAddEncodedOctetToBuffer(octet, result, index) { 57 result[index++] = 37; // Char code of '%'. 58 result[index++] = hexCharCodeArray[octet >> 4]; 59 result[index++] = hexCharCodeArray[octet & 0x0F]; 60 return index; 61 } 62 63 function URIEncodeOctets(octets, result, index) { 64 if (hexCharCodeArray === 0) { 65 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 66 65, 66, 67, 68, 69, 70]; 67 } 68 index = URIAddEncodedOctetToBuffer(octets[0], result, index); 69 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); 70 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); 71 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); 72 return index; 73 } 74 75 function URIEncodeSingle(cc, result, index) { 76 var x = (cc >> 12) & 0xF; 77 var y = (cc >> 6) & 63; 78 var z = cc & 63; 79 var octets = new GlobalArray(3); 80 if (cc <= 0x007F) { 81 octets[0] = cc; 82 } else if (cc <= 0x07FF) { 83 octets[0] = y + 192; 84 octets[1] = z + 128; 85 } else { 86 octets[0] = x + 224; 87 octets[1] = y + 128; 88 octets[2] = z + 128; 89 } 90 return URIEncodeOctets(octets, result, index); 91 } 92 93 function URIEncodePair(cc1 , cc2, result, index) { 94 var u = ((cc1 >> 6) & 0xF) + 1; 95 var w = (cc1 >> 2) & 0xF; 96 var x = cc1 & 3; 97 var y = (cc2 >> 6) & 0xF; 98 var z = cc2 & 63; 99 var octets = new GlobalArray(4); 100 octets[0] = (u >> 2) + 240; 101 octets[1] = (((u & 3) << 4) | w) + 128; 102 octets[2] = ((x << 4) | y) + 128; 103 octets[3] = z + 128; 104 return URIEncodeOctets(octets, result, index); 105 } 106 107 function URIHexCharsToCharCode(highChar, lowChar) { 108 var highCode = HexValueOf(highChar); 109 var lowCode = HexValueOf(lowChar); 110 if (highCode == -1 || lowCode == -1) throw MakeURIError(); 111 return (highCode << 4) | lowCode; 112 } 113 114 // Callers must ensure that |result| is a sufficiently long sequential 115 // two-byte string! 116 function URIDecodeOctets(octets, result, index) { 117 var value; 118 var o0 = octets[0]; 119 if (o0 < 0x80) { 120 value = o0; 121 } else if (o0 < 0xc2) { 122 throw MakeURIError(); 123 } else { 124 var o1 = octets[1]; 125 if (o0 < 0xe0) { 126 var a = o0 & 0x1f; 127 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); 128 var b = o1 & 0x3f; 129 value = (a << 6) + b; 130 if (value < 0x80 || value > 0x7ff) throw MakeURIError(); 131 } else { 132 var o2 = octets[2]; 133 if (o0 < 0xf0) { 134 var a = o0 & 0x0f; 135 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); 136 var b = o1 & 0x3f; 137 if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError(); 138 var c = o2 & 0x3f; 139 value = (a << 12) + (b << 6) + c; 140 if ((value < 0x800) || (value > 0xffff)) throw MakeURIError(); 141 } else { 142 var o3 = octets[3]; 143 if (o0 < 0xf8) { 144 var a = (o0 & 0x07); 145 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); 146 var b = (o1 & 0x3f); 147 if ((o2 < 0x80) || (o2 > 0xbf)) { 148 throw MakeURIError(); 149 } 150 var c = (o2 & 0x3f); 151 if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError(); 152 var d = (o3 & 0x3f); 153 value = (a << 18) + (b << 12) + (c << 6) + d; 154 if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError(); 155 } else { 156 throw MakeURIError(); 157 } 158 } 159 } 160 } 161 if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError(); 162 if (value < 0x10000) { 163 %_TwoByteSeqStringSetChar(index++, value, result); 164 } else { 165 %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result); 166 %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result); 167 } 168 return index; 169 } 170 171 // ECMA-262, section 15.1.3 172 function Encode(uri, unescape) { 173 uri = TO_STRING(uri); 174 var uriLength = uri.length; 175 var array = new InternalArray(uriLength); 176 var index = 0; 177 for (var k = 0; k < uriLength; k++) { 178 var cc1 = %_StringCharCodeAt(uri, k); 179 if (unescape(cc1)) { 180 array[index++] = cc1; 181 } else { 182 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError(); 183 if (cc1 < 0xD800 || cc1 > 0xDBFF) { 184 index = URIEncodeSingle(cc1, array, index); 185 } else { 186 k++; 187 if (k == uriLength) throw MakeURIError(); 188 var cc2 = %_StringCharCodeAt(uri, k); 189 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError(); 190 index = URIEncodePair(cc1, cc2, array, index); 191 } 192 } 193 } 194 195 var result = %NewString(array.length, NEW_ONE_BYTE_STRING); 196 for (var i = 0; i < array.length; i++) { 197 %_OneByteSeqStringSetChar(i, array[i], result); 198 } 199 return result; 200 } 201 202 // ECMA-262, section 15.1.3 203 function Decode(uri, reserved) { 204 uri = TO_STRING(uri); 205 var uriLength = uri.length; 206 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING); 207 var index = 0; 208 var k = 0; 209 210 // Optimistically assume one-byte string. 211 for ( ; k < uriLength; k++) { 212 var code = %_StringCharCodeAt(uri, k); 213 if (code == 37) { // '%' 214 if (k + 2 >= uriLength) throw MakeURIError(); 215 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1), 216 %_StringCharCodeAt(uri, k+2)); 217 if (cc >> 7) break; // Assumption wrong, two-byte string. 218 if (reserved(cc)) { 219 %_OneByteSeqStringSetChar(index++, 37, one_byte); // '%'. 220 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1), 221 one_byte); 222 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2), 223 one_byte); 224 } else { 225 %_OneByteSeqStringSetChar(index++, cc, one_byte); 226 } 227 k += 2; 228 } else { 229 if (code > 0x7f) break; // Assumption wrong, two-byte string. 230 %_OneByteSeqStringSetChar(index++, code, one_byte); 231 } 232 } 233 234 one_byte = %TruncateString(one_byte, index); 235 if (k == uriLength) return one_byte; 236 237 // Write into two byte string. 238 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING); 239 index = 0; 240 241 for ( ; k < uriLength; k++) { 242 var code = %_StringCharCodeAt(uri, k); 243 if (code == 37) { // '%' 244 if (k + 2 >= uriLength) throw MakeURIError(); 245 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k), 246 %_StringCharCodeAt(uri, ++k)); 247 if (cc >> 7) { 248 var n = 0; 249 while (((cc << ++n) & 0x80) != 0) { } 250 if (n == 1 || n > 4) throw MakeURIError(); 251 var octets = new GlobalArray(n); 252 octets[0] = cc; 253 if (k + 3 * (n - 1) >= uriLength) throw MakeURIError(); 254 for (var i = 1; i < n; i++) { 255 if (uri[++k] != '%') throw MakeURIError(); 256 octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k), 257 %_StringCharCodeAt(uri, ++k)); 258 } 259 index = URIDecodeOctets(octets, two_byte, index); 260 } else if (reserved(cc)) { 261 %_TwoByteSeqStringSetChar(index++, 37, two_byte); // '%'. 262 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1), 263 two_byte); 264 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k), 265 two_byte); 266 } else { 267 %_TwoByteSeqStringSetChar(index++, cc, two_byte); 268 } 269 } else { 270 %_TwoByteSeqStringSetChar(index++, code, two_byte); 271 } 272 } 273 274 two_byte = %TruncateString(two_byte, index); 275 return one_byte + two_byte; 276 } 277 278 // ------------------------------------------------------------------- 279 // Define exported functions. 280 281 // ECMA-262 - B.2.1. 282 function URIEscapeJS(s) { 283 return %URIEscape(s); 284 } 285 286 // ECMA-262 - B.2.2. 287 function URIUnescapeJS(s) { 288 return %URIUnescape(s); 289 } 290 291 // ECMA-262 - 15.1.3.1. 292 function URIDecode(uri) { 293 var reservedPredicate = function(cc) { 294 // #$ 295 if (35 <= cc && cc <= 36) return true; 296 // & 297 if (cc == 38) return true; 298 // +, 299 if (43 <= cc && cc <= 44) return true; 300 // / 301 if (cc == 47) return true; 302 // :; 303 if (58 <= cc && cc <= 59) return true; 304 // = 305 if (cc == 61) return true; 306 // ?@ 307 if (63 <= cc && cc <= 64) return true; 308 309 return false; 310 }; 311 return Decode(uri, reservedPredicate); 312 } 313 314 // ECMA-262 - 15.1.3.2. 315 function URIDecodeComponent(component) { 316 var reservedPredicate = function(cc) { return false; }; 317 return Decode(component, reservedPredicate); 318 } 319 320 // ECMA-262 - 15.1.3.3. 321 function URIEncode(uri) { 322 var unescapePredicate = function(cc) { 323 if (isAlphaNumeric(cc)) return true; 324 // ! 325 if (cc == 33) return true; 326 // #$ 327 if (35 <= cc && cc <= 36) return true; 328 // &'()*+,-./ 329 if (38 <= cc && cc <= 47) return true; 330 // :; 331 if (58 <= cc && cc <= 59) return true; 332 // = 333 if (cc == 61) return true; 334 // ?@ 335 if (63 <= cc && cc <= 64) return true; 336 // _ 337 if (cc == 95) return true; 338 // ~ 339 if (cc == 126) return true; 340 341 return false; 342 }; 343 return Encode(uri, unescapePredicate); 344 } 345 346 // ECMA-262 - 15.1.3.4 347 function URIEncodeComponent(component) { 348 var unescapePredicate = function(cc) { 349 if (isAlphaNumeric(cc)) return true; 350 // ! 351 if (cc == 33) return true; 352 // '()* 353 if (39 <= cc && cc <= 42) return true; 354 // -. 355 if (45 <= cc && cc <= 46) return true; 356 // _ 357 if (cc == 95) return true; 358 // ~ 359 if (cc == 126) return true; 360 361 return false; 362 }; 363 return Encode(component, unescapePredicate); 364 } 365 366 // ------------------------------------------------------------------- 367 // Install exported functions. 368 369 // Set up non-enumerable URI functions on the global object and set 370 // their names. 371 utils.InstallFunctions(global, DONT_ENUM, [ 372 "escape", URIEscapeJS, 373 "unescape", URIUnescapeJS, 374 "decodeURI", URIDecode, 375 "decodeURIComponent", URIDecodeComponent, 376 "encodeURI", URIEncode, 377 "encodeURIComponent", URIEncodeComponent 378 ]); 379 380 }) 381