Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 "use strict";
      6 
      7 // This file relies on the fact that the following declaration has been made
      8 // in runtime.js:
      9 // var $Array = global.Array;
     10 
     11 // -------------------------------------------------------------------
     12 
     13 // This file contains support for URI manipulations written in
     14 // JavaScript.
     15 
     16 
     17 (function() {
     18 
     19   // -------------------------------------------------------------------
     20   // Define internal helper functions.
     21 
     22   function HexValueOf(code) {
     23     // 0-9
     24     if (code >= 48 && code <= 57) return code - 48;
     25     // A-F
     26     if (code >= 65 && code <= 70) return code - 55;
     27     // a-f
     28     if (code >= 97 && code <= 102) return code - 87;
     29 
     30     return -1;
     31   }
     32 
     33   // Does the char code correspond to an alpha-numeric char.
     34   function isAlphaNumeric(cc) {
     35     // a - z
     36     if (97 <= cc && cc <= 122) return true;
     37     // A - Z
     38     if (65 <= cc && cc <= 90) return true;
     39     // 0 - 9
     40     if (48 <= cc && cc <= 57) return true;
     41 
     42     return false;
     43   }
     44 
     45   //Lazily initialized.
     46   var hexCharCodeArray = 0;
     47 
     48   function URIAddEncodedOctetToBuffer(octet, result, index) {
     49     result[index++] = 37; // Char code of '%'.
     50     result[index++] = hexCharCodeArray[octet >> 4];
     51     result[index++] = hexCharCodeArray[octet & 0x0F];
     52     return index;
     53   }
     54 
     55   function URIEncodeOctets(octets, result, index) {
     56     if (hexCharCodeArray === 0) {
     57       hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
     58                           65, 66, 67, 68, 69, 70];
     59     }
     60     index = URIAddEncodedOctetToBuffer(octets[0], result, index);
     61     if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
     62     if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
     63     if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
     64     return index;
     65   }
     66 
     67   function URIEncodeSingle(cc, result, index) {
     68     var x = (cc >> 12) & 0xF;
     69     var y = (cc >> 6) & 63;
     70     var z = cc & 63;
     71     var octets = new $Array(3);
     72     if (cc <= 0x007F) {
     73       octets[0] = cc;
     74     } else if (cc <= 0x07FF) {
     75       octets[0] = y + 192;
     76       octets[1] = z + 128;
     77     } else {
     78       octets[0] = x + 224;
     79       octets[1] = y + 128;
     80       octets[2] = z + 128;
     81     }
     82     return URIEncodeOctets(octets, result, index);
     83   }
     84 
     85   function URIEncodePair(cc1 , cc2, result, index) {
     86     var u = ((cc1 >> 6) & 0xF) + 1;
     87     var w = (cc1 >> 2) & 0xF;
     88     var x = cc1 & 3;
     89     var y = (cc2 >> 6) & 0xF;
     90     var z = cc2 & 63;
     91     var octets = new $Array(4);
     92     octets[0] = (u >> 2) + 240;
     93     octets[1] = (((u & 3) << 4) | w) + 128;
     94     octets[2] = ((x << 4) | y) + 128;
     95     octets[3] = z + 128;
     96     return URIEncodeOctets(octets, result, index);
     97   }
     98 
     99   function URIHexCharsToCharCode(highChar, lowChar) {
    100     var highCode = HexValueOf(highChar);
    101     var lowCode = HexValueOf(lowChar);
    102     if (highCode == -1 || lowCode == -1) {
    103       throw new $URIError("URI malformed");
    104     }
    105     return (highCode << 4) | lowCode;
    106   }
    107 
    108   // Callers must ensure that |result| is a sufficiently long sequential
    109   // two-byte string!
    110   function URIDecodeOctets(octets, result, index) {
    111     var value;
    112     var o0 = octets[0];
    113     if (o0 < 0x80) {
    114       value = o0;
    115     } else if (o0 < 0xc2) {
    116       throw new $URIError("URI malformed");
    117     } else {
    118       var o1 = octets[1];
    119       if (o0 < 0xe0) {
    120         var a = o0 & 0x1f;
    121         if ((o1 < 0x80) || (o1 > 0xbf)) {
    122           throw new $URIError("URI malformed");
    123         }
    124         var b = o1 & 0x3f;
    125         value = (a << 6) + b;
    126         if (value < 0x80 || value > 0x7ff) {
    127           throw new $URIError("URI malformed");
    128         }
    129       } else {
    130         var o2 = octets[2];
    131         if (o0 < 0xf0) {
    132           var a = o0 & 0x0f;
    133           if ((o1 < 0x80) || (o1 > 0xbf)) {
    134             throw new $URIError("URI malformed");
    135           }
    136           var b = o1 & 0x3f;
    137           if ((o2 < 0x80) || (o2 > 0xbf)) {
    138             throw new $URIError("URI malformed");
    139           }
    140           var c = o2 & 0x3f;
    141           value = (a << 12) + (b << 6) + c;
    142           if ((value < 0x800) || (value > 0xffff)) {
    143             throw new $URIError("URI malformed");
    144           }
    145         } else {
    146           var o3 = octets[3];
    147           if (o0 < 0xf8) {
    148             var a = (o0 & 0x07);
    149             if ((o1 < 0x80) || (o1 > 0xbf)) {
    150               throw new $URIError("URI malformed");
    151             }
    152             var b = (o1 & 0x3f);
    153             if ((o2 < 0x80) || (o2 > 0xbf)) {
    154               throw new $URIError("URI malformed");
    155             }
    156             var c = (o2 & 0x3f);
    157             if ((o3 < 0x80) || (o3 > 0xbf)) {
    158               throw new $URIError("URI malformed");
    159             }
    160             var d = (o3 & 0x3f);
    161             value = (a << 18) + (b << 12) + (c << 6) + d;
    162             if ((value < 0x10000) || (value > 0x10ffff)) {
    163               throw new $URIError("URI malformed");
    164             }
    165           } else {
    166             throw new $URIError("URI malformed");
    167           }
    168         }
    169       }
    170     }
    171     if (0xD800 <= value && value <= 0xDFFF) {
    172       throw new $URIError("URI malformed");
    173     }
    174     if (value < 0x10000) {
    175       %_TwoByteSeqStringSetChar(result, index++, value);
    176     } else {
    177       %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0);
    178       %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00);
    179     }
    180     return index;
    181   }
    182 
    183   // ECMA-262, section 15.1.3
    184   function Encode(uri, unescape) {
    185     var uriLength = uri.length;
    186     var array = new InternalArray(uriLength);
    187     var index = 0;
    188     for (var k = 0; k < uriLength; k++) {
    189       var cc1 = uri.charCodeAt(k);
    190       if (unescape(cc1)) {
    191         array[index++] = cc1;
    192       } else {
    193         if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
    194         if (cc1 < 0xD800 || cc1 > 0xDBFF) {
    195           index = URIEncodeSingle(cc1, array, index);
    196         } else {
    197           k++;
    198           if (k == uriLength) throw new $URIError("URI malformed");
    199           var cc2 = uri.charCodeAt(k);
    200           if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
    201           index = URIEncodePair(cc1, cc2, array, index);
    202         }
    203       }
    204     }
    205 
    206     var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
    207     for (var i = 0; i < array.length; i++) {
    208       %_OneByteSeqStringSetChar(result, i, array[i]);
    209     }
    210     return result;
    211   }
    212 
    213   // ECMA-262, section 15.1.3
    214   function Decode(uri, reserved) {
    215     var uriLength = uri.length;
    216     var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
    217     var index = 0;
    218     var k = 0;
    219 
    220     // Optimistically assume ascii string.
    221     for ( ; k < uriLength; k++) {
    222       var code = uri.charCodeAt(k);
    223       if (code == 37) {  // '%'
    224         if (k + 2 >= uriLength) throw new $URIError("URI malformed");
    225         var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
    226         if (cc >> 7) break;  // Assumption wrong, two byte string.
    227         if (reserved(cc)) {
    228           %_OneByteSeqStringSetChar(one_byte, index++, 37);  // '%'.
    229           %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1));
    230           %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2));
    231         } else {
    232           %_OneByteSeqStringSetChar(one_byte, index++, cc);
    233         }
    234         k += 2;
    235       } else {
    236         if (code > 0x7f) break;  // Assumption wrong, two byte string.
    237         %_OneByteSeqStringSetChar(one_byte, index++, code);
    238       }
    239     }
    240 
    241     one_byte = %TruncateString(one_byte, index);
    242     if (k == uriLength) return one_byte;
    243 
    244     // Write into two byte string.
    245     var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
    246     index = 0;
    247 
    248     for ( ; k < uriLength; k++) {
    249       var code = uri.charCodeAt(k);
    250       if (code == 37) {  // '%'
    251         if (k + 2 >= uriLength) throw new $URIError("URI malformed");
    252         var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
    253         if (cc >> 7) {
    254           var n = 0;
    255           while (((cc << ++n) & 0x80) != 0) { }
    256           if (n == 1 || n > 4) throw new $URIError("URI malformed");
    257           var octets = new $Array(n);
    258           octets[0] = cc;
    259           if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
    260           for (var i = 1; i < n; i++) {
    261             if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
    262             octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
    263                                               uri.charCodeAt(++k));
    264           }
    265           index = URIDecodeOctets(octets, two_byte, index);
    266         } else  if (reserved(cc)) {
    267           %_TwoByteSeqStringSetChar(two_byte, index++, 37);  // '%'.
    268           %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1));
    269           %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k));
    270         } else {
    271           %_TwoByteSeqStringSetChar(two_byte, index++, cc);
    272         }
    273       } else {
    274         %_TwoByteSeqStringSetChar(two_byte, index++, code);
    275       }
    276     }
    277 
    278     two_byte = %TruncateString(two_byte, index);
    279     return one_byte + two_byte;
    280   }
    281 
    282   // -------------------------------------------------------------------
    283   // Define exported functions.
    284 
    285   // ECMA-262 - B.2.1.
    286   function URIEscapeJS(str) {
    287     var s = ToString(str);
    288     return %URIEscape(s);
    289   }
    290 
    291   // ECMA-262 - B.2.2.
    292   function URIUnescapeJS(str) {
    293     var s = ToString(str);
    294     return %URIUnescape(s);
    295   }
    296 
    297   // ECMA-262 - 15.1.3.1.
    298   function URIDecode(uri) {
    299     var reservedPredicate = function(cc) {
    300       // #$
    301       if (35 <= cc && cc <= 36) return true;
    302       // &
    303       if (cc == 38) return true;
    304       // +,
    305       if (43 <= cc && cc <= 44) return true;
    306       // /
    307       if (cc == 47) return true;
    308       // :;
    309       if (58 <= cc && cc <= 59) return true;
    310       // =
    311       if (cc == 61) return true;
    312       // ?@
    313       if (63 <= cc && cc <= 64) return true;
    314 
    315       return false;
    316     };
    317     var string = ToString(uri);
    318     return Decode(string, reservedPredicate);
    319   }
    320 
    321   // ECMA-262 - 15.1.3.2.
    322   function URIDecodeComponent(component) {
    323     var reservedPredicate = function(cc) { return false; };
    324     var string = ToString(component);
    325     return Decode(string, reservedPredicate);
    326   }
    327 
    328   // ECMA-262 - 15.1.3.3.
    329   function URIEncode(uri) {
    330     var unescapePredicate = function(cc) {
    331       if (isAlphaNumeric(cc)) return true;
    332       // !
    333       if (cc == 33) return true;
    334       // #$
    335       if (35 <= cc && cc <= 36) return true;
    336       // &'()*+,-./
    337       if (38 <= cc && cc <= 47) return true;
    338       // :;
    339       if (58 <= cc && cc <= 59) return true;
    340       // =
    341       if (cc == 61) return true;
    342       // ?@
    343       if (63 <= cc && cc <= 64) return true;
    344       // _
    345       if (cc == 95) return true;
    346       // ~
    347       if (cc == 126) return true;
    348 
    349       return false;
    350     };
    351     var string = ToString(uri);
    352     return Encode(string, unescapePredicate);
    353   }
    354 
    355   // ECMA-262 - 15.1.3.4
    356   function URIEncodeComponent(component) {
    357     var unescapePredicate = function(cc) {
    358       if (isAlphaNumeric(cc)) return true;
    359       // !
    360       if (cc == 33) return true;
    361       // '()*
    362       if (39 <= cc && cc <= 42) return true;
    363       // -.
    364       if (45 <= cc && cc <= 46) return true;
    365       // _
    366       if (cc == 95) return true;
    367       // ~
    368       if (cc == 126) return true;
    369 
    370       return false;
    371     };
    372     var string = ToString(component);
    373     return Encode(string, unescapePredicate);
    374   }
    375 
    376   // -------------------------------------------------------------------
    377   // Install exported functions.
    378 
    379   %CheckIsBootstrapping();
    380 
    381   // Set up non-enumerable URI functions on the global object and set
    382   // their names.
    383   InstallFunctions(global, DONT_ENUM, $Array(
    384       "escape", URIEscapeJS,
    385       "unescape", URIUnescapeJS,
    386       "decodeURI", URIDecode,
    387       "decodeURIComponent", URIDecodeComponent,
    388       "encodeURI", URIEncode,
    389       "encodeURIComponent", URIEncodeComponent
    390   ));
    391 
    392 })();
    393