Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // This file relies on the fact that the following declaration has been made
     29 // in runtime.js:
     30 // var $Array = global.Array;
     31 
     32 // -------------------------------------------------------------------
     33 
     34 // This file contains support for URI manipulations written in
     35 // JavaScript.
     36 
     37 // Lazily initialized.
     38 var hexCharArray = 0;
     39 var hexCharCodeArray = 0;
     40 
     41 
     42 function URIAddEncodedOctetToBuffer(octet, result, index) {
     43   result[index++] = 37; // Char code of '%'.
     44   result[index++] = hexCharCodeArray[octet >> 4];
     45   result[index++] = hexCharCodeArray[octet & 0x0F];
     46   return index;
     47 }
     48 
     49 
     50 function URIEncodeOctets(octets, result, index) {
     51   if (hexCharCodeArray === 0) {
     52     hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
     53                         65, 66, 67, 68, 69, 70];
     54   }
     55   index = URIAddEncodedOctetToBuffer(octets[0], result, index);
     56   if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
     57   if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
     58   if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
     59   return index;
     60 }
     61 
     62 
     63 function URIEncodeSingle(cc, result, index) {
     64   var x = (cc >> 12) & 0xF;
     65   var y = (cc >> 6) & 63;
     66   var z = cc & 63;
     67   var octets = new $Array(3);
     68   if (cc <= 0x007F) {
     69     octets[0] = cc;
     70   } else if (cc <= 0x07FF) {
     71     octets[0] = y + 192;
     72     octets[1] = z + 128;
     73   } else {
     74     octets[0] = x + 224;
     75     octets[1] = y + 128;
     76     octets[2] = z + 128;
     77   }
     78   return URIEncodeOctets(octets, result, index);
     79 }
     80 
     81 
     82 function URIEncodePair(cc1 , cc2, result, index) {
     83   var u = ((cc1 >> 6) & 0xF) + 1;
     84   var w = (cc1 >> 2) & 0xF;
     85   var x = cc1 & 3;
     86   var y = (cc2 >> 6) & 0xF;
     87   var z = cc2 & 63;
     88   var octets = new $Array(4);
     89   octets[0] = (u >> 2) + 240;
     90   octets[1] = (((u & 3) << 4) | w) + 128;
     91   octets[2] = ((x << 4) | y) + 128;
     92   octets[3] = z + 128;
     93   return URIEncodeOctets(octets, result, index);
     94 }
     95 
     96 
     97 function URIHexCharsToCharCode(highChar, lowChar) {
     98   var highCode = HexValueOf(highChar);
     99   var lowCode = HexValueOf(lowChar);
    100   if (highCode == -1 || lowCode == -1) {
    101     throw new $URIError("URI malformed");
    102   }
    103   return (highCode << 4) | lowCode;
    104 }
    105 
    106 
    107 function URIDecodeOctets(octets, result, index) {
    108   var value;
    109   var o0 = octets[0];
    110   if (o0 < 0x80) {
    111     value = o0;
    112   } else if (o0 < 0xc2) {
    113     throw new $URIError("URI malformed");
    114   } else {
    115     var o1 = octets[1];
    116     if (o0 < 0xe0) {
    117       var a = o0 & 0x1f;
    118       if ((o1 < 0x80) || (o1 > 0xbf)) {
    119         throw new $URIError("URI malformed");
    120       }
    121       var b = o1 & 0x3f;
    122       value = (a << 6) + b;
    123       if (value < 0x80 || value > 0x7ff) {
    124         throw new $URIError("URI malformed");
    125       }
    126     } else {
    127       var o2 = octets[2];
    128       if (o0 < 0xf0) {
    129         var a = o0 & 0x0f;
    130         if ((o1 < 0x80) || (o1 > 0xbf)) {
    131           throw new $URIError("URI malformed");
    132         }
    133         var b = o1 & 0x3f;
    134         if ((o2 < 0x80) || (o2 > 0xbf)) {
    135           throw new $URIError("URI malformed");
    136         }
    137         var c = o2 & 0x3f;
    138         value = (a << 12) + (b << 6) + c;
    139         if ((value < 0x800) || (value > 0xffff)) {
    140           throw new $URIError("URI malformed");
    141         }
    142       } else {
    143         var o3 = octets[3];
    144         if (o0 < 0xf8) {
    145           var a = (o0 & 0x07);
    146           if ((o1 < 0x80) || (o1 > 0xbf)) {
    147             throw new $URIError("URI malformed");
    148           }
    149           var b = (o1 & 0x3f);
    150           if ((o2 < 0x80) || (o2 > 0xbf)) {
    151             throw new $URIError("URI malformed");
    152           }
    153           var c = (o2 & 0x3f);
    154           if ((o3 < 0x80) || (o3 > 0xbf)) {
    155             throw new $URIError("URI malformed");
    156           }
    157           var d = (o3 & 0x3f);
    158           value = (a << 18) + (b << 12) + (c << 6) + d;
    159           if ((value < 0x10000) || (value > 0x10ffff)) {
    160             throw new $URIError("URI malformed");
    161           }
    162         } else {
    163           throw new $URIError("URI malformed");
    164         }
    165       }
    166     }
    167   }
    168   if (0xD800 <= value && value <= 0xDFFF) {
    169     throw new $URIError("URI malformed");
    170   }
    171   if (value < 0x10000) {
    172     %_TwoByteSeqStringSetChar(result, index++, value);
    173     return index;
    174   } else {
    175     %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0);
    176     %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00);
    177     return index;
    178   }
    179 }
    180 
    181 
    182 // ECMA-262, section 15.1.3
    183 function Encode(uri, unescape) {
    184   var uriLength = uri.length;
    185   var array = new InternalArray(uriLength);
    186   var index = 0;
    187   for (var k = 0; k < uriLength; k++) {
    188     var cc1 = uri.charCodeAt(k);
    189     if (unescape(cc1)) {
    190       array[index++] = cc1;
    191     } else {
    192       if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
    193       if (cc1 < 0xD800 || cc1 > 0xDBFF) {
    194         index = URIEncodeSingle(cc1, array, index);
    195       } else {
    196         k++;
    197         if (k == uriLength) throw new $URIError("URI malformed");
    198         var cc2 = uri.charCodeAt(k);
    199         if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
    200         index = URIEncodePair(cc1, cc2, array, index);
    201       }
    202     }
    203   }
    204 
    205   var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
    206   for (var i = 0; i < array.length; i++) {
    207     %_OneByteSeqStringSetChar(result, i, array[i]);
    208   }
    209   return result;
    210 }
    211 
    212 
    213 // ECMA-262, section 15.1.3
    214 function Decode(uri, reserved) {
    215   var uriLength = uri.length;
    216   var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
    217   var index = 0;
    218   var k = 0;
    219 
    220   // Optimistically assume ascii string.
    221   for ( ; k < uriLength; k++) {
    222     var code = uri.charCodeAt(k);
    223     if (code == 37) {  // '%'
    224       if (k + 2 >= uriLength) throw new $URIError("URI malformed");
    225       var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
    226       if (cc >> 7) break;  // Assumption wrong, two byte string.
    227       if (reserved(cc)) {
    228         %_OneByteSeqStringSetChar(one_byte, index++, 37);  // '%'.
    229         %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1));
    230         %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2));
    231       } else {
    232         %_OneByteSeqStringSetChar(one_byte, index++, cc);
    233       }
    234       k += 2;
    235     } else {
    236       if (code > 0x7f) break;  // Assumption wrong, two byte string.
    237       %_OneByteSeqStringSetChar(one_byte, index++, code);
    238     }
    239   }
    240 
    241   one_byte = %TruncateString(one_byte, index);
    242   if (k == uriLength) return one_byte;
    243 
    244   // Write into two byte string.
    245   var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
    246   index = 0;
    247 
    248   for ( ; k < uriLength; k++) {
    249     var code = uri.charCodeAt(k);
    250     if (code == 37) {  // '%'
    251       if (k + 2 >= uriLength) throw new $URIError("URI malformed");
    252       var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
    253       if (cc >> 7) {
    254         var n = 0;
    255         while (((cc << ++n) & 0x80) != 0) { }
    256         if (n == 1 || n > 4) throw new $URIError("URI malformed");
    257         var octets = new $Array(n);
    258         octets[0] = cc;
    259         if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
    260         for (var i = 1; i < n; i++) {
    261           if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
    262           octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
    263                                             uri.charCodeAt(++k));
    264         }
    265         index = URIDecodeOctets(octets, two_byte, index);
    266       } else  if (reserved(cc)) {
    267         %_TwoByteSeqStringSetChar(two_byte, index++, 37);  // '%'.
    268         %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1));
    269         %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k));
    270       } else {
    271         %_TwoByteSeqStringSetChar(two_byte, index++, cc);
    272       }
    273     } else {
    274       %_TwoByteSeqStringSetChar(two_byte, index++, code);
    275     }
    276   }
    277 
    278   two_byte = %TruncateString(two_byte, index);
    279   return one_byte + two_byte;
    280 }
    281 
    282 
    283 // ECMA-262 - 15.1.3.1.
    284 function URIDecode(uri) {
    285   var reservedPredicate = function(cc) {
    286     // #$
    287     if (35 <= cc && cc <= 36) return true;
    288     // &
    289     if (cc == 38) return true;
    290     // +,
    291     if (43 <= cc && cc <= 44) return true;
    292     // /
    293     if (cc == 47) return true;
    294     // :;
    295     if (58 <= cc && cc <= 59) return true;
    296     // =
    297     if (cc == 61) return true;
    298     // ?@
    299     if (63 <= cc && cc <= 64) return true;
    300 
    301     return false;
    302   };
    303   var string = ToString(uri);
    304   return Decode(string, reservedPredicate);
    305 }
    306 
    307 
    308 // ECMA-262 - 15.1.3.2.
    309 function URIDecodeComponent(component) {
    310   var reservedPredicate = function(cc) { return false; };
    311   var string = ToString(component);
    312   return Decode(string, reservedPredicate);
    313 }
    314 
    315 
    316 // Does the char code correspond to an alpha-numeric char.
    317 function isAlphaNumeric(cc) {
    318   // a - z
    319   if (97 <= cc && cc <= 122) return true;
    320   // A - Z
    321   if (65 <= cc && cc <= 90) return true;
    322   // 0 - 9
    323   if (48 <= cc && cc <= 57) return true;
    324 
    325   return false;
    326 }
    327 
    328 
    329 // ECMA-262 - 15.1.3.3.
    330 function URIEncode(uri) {
    331   var unescapePredicate = function(cc) {
    332     if (isAlphaNumeric(cc)) return true;
    333     // !
    334     if (cc == 33) return true;
    335     // #$
    336     if (35 <= cc && cc <= 36) return true;
    337     // &'()*+,-./
    338     if (38 <= cc && cc <= 47) return true;
    339     // :;
    340     if (58 <= cc && cc <= 59) return true;
    341     // =
    342     if (cc == 61) return true;
    343     // ?@
    344     if (63 <= cc && cc <= 64) return true;
    345     // _
    346     if (cc == 95) return true;
    347     // ~
    348     if (cc == 126) return true;
    349 
    350     return false;
    351   };
    352 
    353   var string = ToString(uri);
    354   return Encode(string, unescapePredicate);
    355 }
    356 
    357 
    358 // ECMA-262 - 15.1.3.4
    359 function URIEncodeComponent(component) {
    360   var unescapePredicate = function(cc) {
    361     if (isAlphaNumeric(cc)) return true;
    362     // !
    363     if (cc == 33) return true;
    364     // '()*
    365     if (39 <= cc && cc <= 42) return true;
    366     // -.
    367     if (45 <= cc && cc <= 46) return true;
    368     // _
    369     if (cc == 95) return true;
    370     // ~
    371     if (cc == 126) return true;
    372 
    373     return false;
    374   };
    375 
    376   var string = ToString(component);
    377   return Encode(string, unescapePredicate);
    378 }
    379 
    380 
    381 function HexValueOf(code) {
    382   // 0-9
    383   if (code >= 48 && code <= 57) return code - 48;
    384   // A-F
    385   if (code >= 65 && code <= 70) return code - 55;
    386   // a-f
    387   if (code >= 97 && code <= 102) return code - 87;
    388 
    389   return -1;
    390 }
    391 
    392 
    393 // Convert a character code to 4-digit hex string representation
    394 // 64 -> 0040, 62234 -> F31A.
    395 function CharCodeToHex4Str(cc) {
    396   var r = "";
    397   if (hexCharArray === 0) {
    398     hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
    399                     "A", "B", "C", "D", "E", "F"];
    400   }
    401   for (var i = 0; i < 4; ++i) {
    402     var c = hexCharArray[cc & 0x0F];
    403     r = c + r;
    404     cc = cc >>> 4;
    405   }
    406   return r;
    407 }
    408 
    409 
    410 // Returns true if all digits in string s are valid hex numbers
    411 function IsValidHex(s) {
    412   for (var i = 0; i < s.length; ++i) {
    413     var cc = s.charCodeAt(i);
    414     if ((48 <= cc && cc <= 57) ||
    415         (65 <= cc && cc <= 70) ||
    416         (97 <= cc && cc <= 102)) {
    417       // '0'..'9', 'A'..'F' and 'a' .. 'f'.
    418     } else {
    419       return false;
    420     }
    421   }
    422   return true;
    423 }
    424 
    425 
    426 // ECMA-262 - B.2.1.
    427 function URIEscape(str) {
    428   var s = ToString(str);
    429   return %URIEscape(s);
    430 }
    431 
    432 
    433 // ECMA-262 - B.2.2.
    434 function URIUnescape(str) {
    435   var s = ToString(str);
    436   return %URIUnescape(s);
    437 }
    438 
    439 
    440 // -------------------------------------------------------------------
    441 
    442 function SetUpUri() {
    443   %CheckIsBootstrapping();
    444 
    445   // Set up non-enumerable URI functions on the global object and set
    446   // their names.
    447   InstallFunctions(global, DONT_ENUM, $Array(
    448     "escape", URIEscape,
    449     "unescape", URIUnescape,
    450     "decodeURI", URIDecode,
    451     "decodeURIComponent", URIDecodeComponent,
    452     "encodeURI", URIEncode,
    453     "encodeURIComponent", URIEncodeComponent
    454   ));
    455 }
    456 
    457 SetUpUri();
    458