Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // This file contains support for URI manipulations written in
     29 // JavaScript.
     30 
     31 // Expect $String = global.String;
     32 
     33 // Lazily initialized.
     34 var hexCharArray = 0;
     35 var hexCharCodeArray = 0;
     36 
     37 
     38 function URIAddEncodedOctetToBuffer(octet, result, index) {
     39   result[index++] = 37; // Char code of '%'.
     40   result[index++] = hexCharCodeArray[octet >> 4];
     41   result[index++] = hexCharCodeArray[octet & 0x0F];
     42   return index;
     43 }
     44 
     45 
     46 function URIEncodeOctets(octets, result, index) {
     47   if (hexCharCodeArray === 0) {
     48     hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
     49                         65, 66, 67, 68, 69, 70];
     50   }
     51   index = URIAddEncodedOctetToBuffer(octets[0], result, index);
     52   if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
     53   if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
     54   if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
     55   return index;
     56 }
     57 
     58 
     59 function URIEncodeSingle(cc, result, index) {
     60   var x = (cc >> 12) & 0xF;
     61   var y = (cc >> 6) & 63;
     62   var z = cc & 63;
     63   var octets = new $Array(3);
     64   if (cc <= 0x007F) {
     65     octets[0] = cc;
     66   } else if (cc <= 0x07FF) {
     67     octets[0] = y + 192;
     68     octets[1] = z + 128;
     69   } else {
     70     octets[0] = x + 224;
     71     octets[1] = y + 128;
     72     octets[2] = z + 128;
     73   }
     74   return URIEncodeOctets(octets, result, index);
     75 }
     76 
     77 
     78 function URIEncodePair(cc1 , cc2, result, index) {
     79   var u = ((cc1 >> 6) & 0xF) + 1;
     80   var w = (cc1 >> 2) & 0xF;
     81   var x = cc1 & 3;
     82   var y = (cc2 >> 6) & 0xF;
     83   var z = cc2 & 63;
     84   var octets = new $Array(4);
     85   octets[0] = (u >> 2) + 240;
     86   octets[1] = (((u & 3) << 4) | w) + 128;
     87   octets[2] = ((x << 4) | y) + 128;
     88   octets[3] = z + 128;
     89   return URIEncodeOctets(octets, result, index);
     90 }
     91 
     92 
     93 function URIHexCharsToCharCode(highChar, lowChar) {
     94   var highCode = HexValueOf(highChar);
     95   var lowCode = HexValueOf(lowChar);
     96   if (highCode == -1 || lowCode == -1) {
     97     throw new $URIError("URI malformed");
     98   }
     99   return (highCode << 4) | lowCode;
    100 }
    101 
    102 
    103 function URIDecodeOctets(octets, result, index) {
    104   var value;
    105   var o0 = octets[0];
    106   if (o0 < 0x80) {
    107     value = o0;
    108   } else if (o0 < 0xc2) {
    109     throw new $URIError("URI malformed");
    110   } else {
    111     var o1 = octets[1];
    112     if (o0 < 0xe0) {
    113       var a = o0 & 0x1f;
    114       if ((o1 < 0x80) || (o1 > 0xbf)) {
    115         throw new $URIError("URI malformed");
    116       }
    117       var b = o1 & 0x3f;
    118       value = (a << 6) + b;
    119       if (value < 0x80 || value > 0x7ff) {
    120         throw new $URIError("URI malformed");
    121       }
    122     } else {
    123       var o2 = octets[2];
    124       if (o0 < 0xf0) {
    125         var a = o0 & 0x0f;
    126         if ((o1 < 0x80) || (o1 > 0xbf)) {
    127           throw new $URIError("URI malformed");
    128         }
    129         var b = o1 & 0x3f;
    130         if ((o2 < 0x80) || (o2 > 0xbf)) {
    131           throw new $URIError("URI malformed");
    132         }
    133         var c = o2 & 0x3f;
    134         value = (a << 12) + (b << 6) + c;
    135         if ((value < 0x800) || (value > 0xffff)) {
    136           throw new $URIError("URI malformed");
    137         }
    138       } else {
    139         var o3 = octets[3];
    140         if (o0 < 0xf8) {
    141           var a = (o0 & 0x07);
    142           if ((o1 < 0x80) || (o1 > 0xbf)) {
    143             throw new $URIError("URI malformed");
    144           }
    145           var b = (o1 & 0x3f);
    146           if ((o2 < 0x80) || (o2 > 0xbf)) {
    147             throw new $URIError("URI malformed");
    148           }
    149           var c = (o2 & 0x3f);
    150           if ((o3 < 0x80) || (o3 > 0xbf)) {
    151             throw new $URIError("URI malformed");
    152           }
    153           var d = (o3 & 0x3f);
    154           value = (a << 18) + (b << 12) + (c << 6) + d;
    155           if ((value < 0x10000) || (value > 0x10ffff)) {
    156             throw new $URIError("URI malformed");
    157           }
    158         } else {
    159           throw new $URIError("URI malformed");
    160         }
    161       }
    162     }
    163   }
    164   if (0xD800 <= value && value <= 0xDFFF) {
    165     throw new $URIError("URI malformed");
    166   }
    167   if (value < 0x10000) {
    168     result[index++] = value;
    169     return index;
    170   } else {
    171     result[index++] = (value >> 10) + 0xd7c0;
    172     result[index++] = (value & 0x3ff) + 0xdc00;
    173     return index;
    174   }
    175 }
    176 
    177 
    178 // ECMA-262, section 15.1.3
    179 function Encode(uri, unescape) {
    180   var uriLength = uri.length;
    181   // We are going to pass result to %StringFromCharCodeArray
    182   // which does not expect any getters/setters installed
    183   // on the incoming array.
    184   var result = new InternalArray(uriLength);
    185   var index = 0;
    186   for (var k = 0; k < uriLength; k++) {
    187     var cc1 = uri.charCodeAt(k);
    188     if (unescape(cc1)) {
    189       result[index++] = cc1;
    190     } else {
    191       if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
    192       if (cc1 < 0xD800 || cc1 > 0xDBFF) {
    193         index = URIEncodeSingle(cc1, result, index);
    194       } else {
    195         k++;
    196         if (k == uriLength) throw new $URIError("URI malformed");
    197         var cc2 = uri.charCodeAt(k);
    198         if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
    199         index = URIEncodePair(cc1, cc2, result, index);
    200       }
    201     }
    202   }
    203   return %StringFromCharCodeArray(result);
    204 }
    205 
    206 
    207 // ECMA-262, section 15.1.3
    208 function Decode(uri, reserved) {
    209   var uriLength = uri.length;
    210   // We are going to pass result to %StringFromCharCodeArray
    211   // which does not expect any getters/setters installed
    212   // on the incoming array.
    213   var result = new InternalArray(uriLength);
    214   var index = 0;
    215   for (var k = 0; k < uriLength; k++) {
    216     var ch = uri.charAt(k);
    217     if (ch == '%') {
    218       if (k + 2 >= uriLength) throw new $URIError("URI malformed");
    219       var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
    220       if (cc >> 7) {
    221         var n = 0;
    222         while (((cc << ++n) & 0x80) != 0) { }
    223         if (n == 1 || n > 4) throw new $URIError("URI malformed");
    224         var octets = new $Array(n);
    225         octets[0] = cc;
    226         if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
    227         for (var i = 1; i < n; i++) {
    228           if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
    229           octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
    230                                             uri.charCodeAt(++k));
    231         }
    232         index = URIDecodeOctets(octets, result, index);
    233       } else {
    234         if (reserved(cc)) {
    235           result[index++] = 37; // Char code of '%'.
    236           result[index++] = uri.charCodeAt(k - 1);
    237           result[index++] = uri.charCodeAt(k);
    238         } else {
    239           result[index++] = cc;
    240         }
    241       }
    242     } else {
    243       result[index++] = ch.charCodeAt(0);
    244     }
    245   }
    246   result.length = index;
    247   return %StringFromCharCodeArray(result);
    248 }
    249 
    250 
    251 // ECMA-262 - 15.1.3.1.
    252 function URIDecode(uri) {
    253   var reservedPredicate = function(cc) {
    254     // #$
    255     if (35 <= cc && cc <= 36) return true;
    256     // &
    257     if (cc == 38) return true;
    258     // +,
    259     if (43 <= cc && cc <= 44) return true;
    260     // /
    261     if (cc == 47) return true;
    262     // :;
    263     if (58 <= cc && cc <= 59) return true;
    264     // =
    265     if (cc == 61) return true;
    266     // ?@
    267     if (63 <= cc && cc <= 64) return true;
    268 
    269     return false;
    270   };
    271   var string = ToString(uri);
    272   return Decode(string, reservedPredicate);
    273 }
    274 
    275 
    276 // ECMA-262 - 15.1.3.2.
    277 function URIDecodeComponent(component) {
    278   var reservedPredicate = function(cc) { return false; };
    279   var string = ToString(component);
    280   return Decode(string, reservedPredicate);
    281 }
    282 
    283 
    284 // Does the char code correspond to an alpha-numeric char.
    285 function isAlphaNumeric(cc) {
    286   // a - z
    287   if (97 <= cc && cc <= 122) return true;
    288   // A - Z
    289   if (65 <= cc && cc <= 90) return true;
    290   // 0 - 9
    291   if (48 <= cc && cc <= 57) return true;
    292 
    293   return false;
    294 }
    295 
    296 
    297 // ECMA-262 - 15.1.3.3.
    298 function URIEncode(uri) {
    299   var unescapePredicate = function(cc) {
    300     if (isAlphaNumeric(cc)) return true;
    301     // !
    302     if (cc == 33) return true;
    303     // #$
    304     if (35 <= cc && cc <= 36) return true;
    305     // &'()*+,-./
    306     if (38 <= cc && cc <= 47) return true;
    307     // :;
    308     if (58 <= cc && cc <= 59) return true;
    309     // =
    310     if (cc == 61) return true;
    311     // ?@
    312     if (63 <= cc && cc <= 64) return true;
    313     // _
    314     if (cc == 95) return true;
    315     // ~
    316     if (cc == 126) return true;
    317 
    318     return false;
    319   };
    320 
    321   var string = ToString(uri);
    322   return Encode(string, unescapePredicate);
    323 }
    324 
    325 
    326 // ECMA-262 - 15.1.3.4
    327 function URIEncodeComponent(component) {
    328   var unescapePredicate = function(cc) {
    329     if (isAlphaNumeric(cc)) return true;
    330     // !
    331     if (cc == 33) return true;
    332     // '()*
    333     if (39 <= cc && cc <= 42) return true;
    334     // -.
    335     if (45 <= cc && cc <= 46) return true;
    336     // _
    337     if (cc == 95) return true;
    338     // ~
    339     if (cc == 126) return true;
    340 
    341     return false;
    342   };
    343 
    344   var string = ToString(component);
    345   return Encode(string, unescapePredicate);
    346 }
    347 
    348 
    349 function HexValueOf(code) {
    350   // 0-9
    351   if (code >= 48 && code <= 57) return code - 48;
    352   // A-F
    353   if (code >= 65 && code <= 70) return code - 55;
    354   // a-f
    355   if (code >= 97 && code <= 102) return code - 87;
    356 
    357   return -1;
    358 }
    359 
    360 
    361 // Convert a character code to 4-digit hex string representation
    362 // 64 -> 0040, 62234 -> F31A.
    363 function CharCodeToHex4Str(cc) {
    364   var r = "";
    365   if (hexCharArray === 0) {
    366     hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
    367                     "A", "B", "C", "D", "E", "F"];
    368   }
    369   for (var i = 0; i < 4; ++i) {
    370     var c = hexCharArray[cc & 0x0F];
    371     r = c + r;
    372     cc = cc >>> 4;
    373   }
    374   return r;
    375 }
    376 
    377 
    378 // Returns true if all digits in string s are valid hex numbers
    379 function IsValidHex(s) {
    380   for (var i = 0; i < s.length; ++i) {
    381     var cc = s.charCodeAt(i);
    382     if ((48 <= cc && cc <= 57) ||
    383         (65 <= cc && cc <= 70) ||
    384         (97 <= cc && cc <= 102)) {
    385       // '0'..'9', 'A'..'F' and 'a' .. 'f'.
    386     } else {
    387       return false;
    388     }
    389   }
    390   return true;
    391 }
    392 
    393 
    394 // ECMA-262 - B.2.1.
    395 function URIEscape(str) {
    396   var s = ToString(str);
    397   return %URIEscape(s);
    398 }
    399 
    400 
    401 // ECMA-262 - B.2.2.
    402 function URIUnescape(str) {
    403   var s = ToString(str);
    404   return %URIUnescape(s);
    405 }
    406 
    407 
    408 // -------------------------------------------------------------------
    409 
    410 function SetUpUri() {
    411   %CheckIsBootstrapping();
    412   // Set up non-enumerable URI functions on the global object and set
    413   // their names.
    414   InstallFunctions(global, DONT_ENUM, $Array(
    415     "escape", URIEscape,
    416     "unescape", URIUnescape,
    417     "decodeURI", URIDecode,
    418     "decodeURIComponent", URIDecodeComponent,
    419     "encodeURI", URIEncode,
    420     "encodeURIComponent", URIEncodeComponent
    421   ));
    422 }
    423 
    424 SetUpUri();
    425