Home | History | Annotate | Download | only in js
      1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file contains support for URI manipulations written in
      6 // JavaScript.
      7 
      8 (function(global, utils) {
      9 
     10 "use strict";
     11 
     12 %CheckIsBootstrapping();
     13 
     14 //- ------------------------------------------------------------------
     15 // Imports
     16 
     17 var GlobalObject = global.Object;
     18 var GlobalArray = global.Array;
     19 var InternalArray = utils.InternalArray;
     20 var MakeURIError;
     21 
     22 utils.Import(function(from) {
     23   MakeURIError = from.MakeURIError;
     24 });
     25 
     26 
     27 // -------------------------------------------------------------------
     28 // Define internal helper functions.
     29 
     30 function HexValueOf(code) {
     31   // 0-9
     32   if (code >= 48 && code <= 57) return code - 48;
     33   // A-F
     34   if (code >= 65 && code <= 70) return code - 55;
     35   // a-f
     36   if (code >= 97 && code <= 102) return code - 87;
     37 
     38   return -1;
     39 }
     40 
     41 // Does the char code correspond to an alpha-numeric char.
     42 function isAlphaNumeric(cc) {
     43   // a - z
     44   if (97 <= cc && cc <= 122) return true;
     45   // A - Z
     46   if (65 <= cc && cc <= 90) return true;
     47   // 0 - 9
     48   if (48 <= cc && cc <= 57) return true;
     49 
     50   return false;
     51 }
     52 
     53 // Lazily initialized.
     54 var hexCharCodeArray = 0;
     55 
     56 function URIAddEncodedOctetToBuffer(octet, result, index) {
     57   result[index++] = 37; // Char code of '%'.
     58   result[index++] = hexCharCodeArray[octet >> 4];
     59   result[index++] = hexCharCodeArray[octet & 0x0F];
     60   return index;
     61 }
     62 
     63 function URIEncodeOctets(octets, result, index) {
     64   if (hexCharCodeArray === 0) {
     65     hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
     66                         65, 66, 67, 68, 69, 70];
     67   }
     68   index = URIAddEncodedOctetToBuffer(octets[0], result, index);
     69   if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
     70   if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
     71   if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
     72   return index;
     73 }
     74 
     75 function URIEncodeSingle(cc, result, index) {
     76   var x = (cc >> 12) & 0xF;
     77   var y = (cc >> 6) & 63;
     78   var z = cc & 63;
     79   var octets = new GlobalArray(3);
     80   if (cc <= 0x007F) {
     81     octets[0] = cc;
     82   } else if (cc <= 0x07FF) {
     83     octets[0] = y + 192;
     84     octets[1] = z + 128;
     85   } else {
     86     octets[0] = x + 224;
     87     octets[1] = y + 128;
     88     octets[2] = z + 128;
     89   }
     90   return URIEncodeOctets(octets, result, index);
     91 }
     92 
     93 function URIEncodePair(cc1 , cc2, result, index) {
     94   var u = ((cc1 >> 6) & 0xF) + 1;
     95   var w = (cc1 >> 2) & 0xF;
     96   var x = cc1 & 3;
     97   var y = (cc2 >> 6) & 0xF;
     98   var z = cc2 & 63;
     99   var octets = new GlobalArray(4);
    100   octets[0] = (u >> 2) + 240;
    101   octets[1] = (((u & 3) << 4) | w) + 128;
    102   octets[2] = ((x << 4) | y) + 128;
    103   octets[3] = z + 128;
    104   return URIEncodeOctets(octets, result, index);
    105 }
    106 
    107 function URIHexCharsToCharCode(highChar, lowChar) {
    108   var highCode = HexValueOf(highChar);
    109   var lowCode = HexValueOf(lowChar);
    110   if (highCode == -1 || lowCode == -1) throw MakeURIError();
    111   return (highCode << 4) | lowCode;
    112 }
    113 
    114 // Callers must ensure that |result| is a sufficiently long sequential
    115 // two-byte string!
    116 function URIDecodeOctets(octets, result, index) {
    117   var value;
    118   var o0 = octets[0];
    119   if (o0 < 0x80) {
    120     value = o0;
    121   } else if (o0 < 0xc2) {
    122     throw MakeURIError();
    123   } else {
    124     var o1 = octets[1];
    125     if (o0 < 0xe0) {
    126       var a = o0 & 0x1f;
    127       if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
    128       var b = o1 & 0x3f;
    129       value = (a << 6) + b;
    130       if (value < 0x80 || value > 0x7ff) throw MakeURIError();
    131     } else {
    132       var o2 = octets[2];
    133       if (o0 < 0xf0) {
    134         var a = o0 & 0x0f;
    135         if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
    136         var b = o1 & 0x3f;
    137         if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError();
    138         var c = o2 & 0x3f;
    139         value = (a << 12) + (b << 6) + c;
    140         if ((value < 0x800) || (value > 0xffff)) throw MakeURIError();
    141       } else {
    142         var o3 = octets[3];
    143         if (o0 < 0xf8) {
    144           var a = (o0 & 0x07);
    145           if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError();
    146           var b = (o1 & 0x3f);
    147           if ((o2 < 0x80) || (o2 > 0xbf)) {
    148             throw MakeURIError();
    149           }
    150           var c = (o2 & 0x3f);
    151           if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError();
    152           var d = (o3 & 0x3f);
    153           value = (a << 18) + (b << 12) + (c << 6) + d;
    154           if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError();
    155         } else {
    156           throw MakeURIError();
    157         }
    158       }
    159     }
    160   }
    161   if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError();
    162   if (value < 0x10000) {
    163     %_TwoByteSeqStringSetChar(index++, value, result);
    164   } else {
    165     %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result);
    166     %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result);
    167   }
    168   return index;
    169 }
    170 
    171 // ECMA-262, section 15.1.3
    172 function Encode(uri, unescape) {
    173   uri = TO_STRING(uri);
    174   var uriLength = uri.length;
    175   var array = new InternalArray(uriLength);
    176   var index = 0;
    177   for (var k = 0; k < uriLength; k++) {
    178     var cc1 = %_StringCharCodeAt(uri, k);
    179     if (unescape(cc1)) {
    180       array[index++] = cc1;
    181     } else {
    182       if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
    183       if (cc1 < 0xD800 || cc1 > 0xDBFF) {
    184         index = URIEncodeSingle(cc1, array, index);
    185       } else {
    186         k++;
    187         if (k == uriLength) throw MakeURIError();
    188         var cc2 = %_StringCharCodeAt(uri, k);
    189         if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
    190         index = URIEncodePair(cc1, cc2, array, index);
    191       }
    192     }
    193   }
    194 
    195   var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
    196   for (var i = 0; i < array.length; i++) {
    197     %_OneByteSeqStringSetChar(i, array[i], result);
    198   }
    199   return result;
    200 }
    201 
    202 // ECMA-262, section 15.1.3
    203 function Decode(uri, reserved) {
    204   uri = TO_STRING(uri);
    205   var uriLength = uri.length;
    206   var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
    207   var index = 0;
    208   var k = 0;
    209 
    210   // Optimistically assume one-byte string.
    211   for ( ; k < uriLength; k++) {
    212     var code = %_StringCharCodeAt(uri, k);
    213     if (code == 37) {  // '%'
    214       if (k + 2 >= uriLength) throw MakeURIError();
    215       var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1),
    216                                      %_StringCharCodeAt(uri, k+2));
    217       if (cc >> 7) break;  // Assumption wrong, two-byte string.
    218       if (reserved(cc)) {
    219         %_OneByteSeqStringSetChar(index++, 37, one_byte);  // '%'.
    220         %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1),
    221                                   one_byte);
    222         %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2),
    223                                   one_byte);
    224       } else {
    225         %_OneByteSeqStringSetChar(index++, cc, one_byte);
    226       }
    227       k += 2;
    228     } else {
    229       if (code > 0x7f) break;  // Assumption wrong, two-byte string.
    230       %_OneByteSeqStringSetChar(index++, code, one_byte);
    231     }
    232   }
    233 
    234   one_byte = %TruncateString(one_byte, index);
    235   if (k == uriLength) return one_byte;
    236 
    237   // Write into two byte string.
    238   var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
    239   index = 0;
    240 
    241   for ( ; k < uriLength; k++) {
    242     var code = %_StringCharCodeAt(uri, k);
    243     if (code == 37) {  // '%'
    244       if (k + 2 >= uriLength) throw MakeURIError();
    245       var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
    246                                      %_StringCharCodeAt(uri, ++k));
    247       if (cc >> 7) {
    248         var n = 0;
    249         while (((cc << ++n) & 0x80) != 0) { }
    250         if (n == 1 || n > 4) throw MakeURIError();
    251         var octets = new GlobalArray(n);
    252         octets[0] = cc;
    253         if (k + 3 * (n - 1) >= uriLength) throw MakeURIError();
    254         for (var i = 1; i < n; i++) {
    255           if (uri[++k] != '%') throw MakeURIError();
    256           octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k),
    257                                             %_StringCharCodeAt(uri, ++k));
    258         }
    259         index = URIDecodeOctets(octets, two_byte, index);
    260       } else  if (reserved(cc)) {
    261         %_TwoByteSeqStringSetChar(index++, 37, two_byte);  // '%'.
    262         %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1),
    263                                   two_byte);
    264         %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k),
    265                                   two_byte);
    266       } else {
    267         %_TwoByteSeqStringSetChar(index++, cc, two_byte);
    268       }
    269     } else {
    270       %_TwoByteSeqStringSetChar(index++, code, two_byte);
    271     }
    272   }
    273 
    274   two_byte = %TruncateString(two_byte, index);
    275   return one_byte + two_byte;
    276 }
    277 
    278 // -------------------------------------------------------------------
    279 // Define exported functions.
    280 
    281 // ECMA-262 - B.2.1.
    282 function URIEscapeJS(s) {
    283   return %URIEscape(s);
    284 }
    285 
    286 // ECMA-262 - B.2.2.
    287 function URIUnescapeJS(s) {
    288   return %URIUnescape(s);
    289 }
    290 
    291 // ECMA-262 - 15.1.3.1.
    292 function URIDecode(uri) {
    293   var reservedPredicate = function(cc) {
    294     // #$
    295     if (35 <= cc && cc <= 36) return true;
    296     // &
    297     if (cc == 38) return true;
    298     // +,
    299     if (43 <= cc && cc <= 44) return true;
    300     // /
    301     if (cc == 47) return true;
    302     // :;
    303     if (58 <= cc && cc <= 59) return true;
    304     // =
    305     if (cc == 61) return true;
    306     // ?@
    307     if (63 <= cc && cc <= 64) return true;
    308 
    309     return false;
    310   };
    311   return Decode(uri, reservedPredicate);
    312 }
    313 
    314 // ECMA-262 - 15.1.3.2.
    315 function URIDecodeComponent(component) {
    316   var reservedPredicate = function(cc) { return false; };
    317   return Decode(component, reservedPredicate);
    318 }
    319 
    320 // ECMA-262 - 15.1.3.3.
    321 function URIEncode(uri) {
    322   var unescapePredicate = function(cc) {
    323     if (isAlphaNumeric(cc)) return true;
    324     // !
    325     if (cc == 33) return true;
    326     // #$
    327     if (35 <= cc && cc <= 36) return true;
    328     // &'()*+,-./
    329     if (38 <= cc && cc <= 47) return true;
    330     // :;
    331     if (58 <= cc && cc <= 59) return true;
    332     // =
    333     if (cc == 61) return true;
    334     // ?@
    335     if (63 <= cc && cc <= 64) return true;
    336     // _
    337     if (cc == 95) return true;
    338     // ~
    339     if (cc == 126) return true;
    340 
    341     return false;
    342   };
    343   return Encode(uri, unescapePredicate);
    344 }
    345 
    346 // ECMA-262 - 15.1.3.4
    347 function URIEncodeComponent(component) {
    348   var unescapePredicate = function(cc) {
    349     if (isAlphaNumeric(cc)) return true;
    350     // !
    351     if (cc == 33) return true;
    352     // '()*
    353     if (39 <= cc && cc <= 42) return true;
    354     // -.
    355     if (45 <= cc && cc <= 46) return true;
    356     // _
    357     if (cc == 95) return true;
    358     // ~
    359     if (cc == 126) return true;
    360 
    361     return false;
    362   };
    363   return Encode(component, unescapePredicate);
    364 }
    365 
    366 // -------------------------------------------------------------------
    367 // Install exported functions.
    368 
    369 // Set up non-enumerable URI functions on the global object and set
    370 // their names.
    371 utils.InstallFunctions(global, DONT_ENUM, [
    372   "escape", URIEscapeJS,
    373   "unescape", URIUnescapeJS,
    374   "decodeURI", URIDecode,
    375   "decodeURIComponent", URIDecodeComponent,
    376   "encodeURI", URIEncode,
    377   "encodeURIComponent", URIEncodeComponent
    378 ]);
    379 
    380 })
    381