Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // This file relies on the fact that the following declaration has been made
     29 // in runtime.js:
     30 // var $Object = global.Object;
     31 // var $Array = global.Array;
     32 
     33 var $RegExp = global.RegExp;
     34 
     35 // -------------------------------------------------------------------
     36 
     37 // A recursive descent parser for Patterns according to the grammar of
     38 // ECMA-262 15.10.1, with deviations noted below.
     39 function DoConstructRegExp(object, pattern, flags) {
     40   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     41   if (IS_REGEXP(pattern)) {
     42     if (!IS_UNDEFINED(flags)) {
     43       throw MakeTypeError('regexp_flags', []);
     44     }
     45     flags = (pattern.global ? 'g' : '')
     46         + (pattern.ignoreCase ? 'i' : '')
     47         + (pattern.multiline ? 'm' : '');
     48     pattern = pattern.source;
     49   }
     50 
     51   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     52   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     53 
     54   var global = false;
     55   var ignoreCase = false;
     56   var multiline = false;
     57   for (var i = 0; i < flags.length; i++) {
     58     var c = %_CallFunction(flags, i, StringCharAt);
     59     switch (c) {
     60       case 'g':
     61         if (global) {
     62           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     63         }
     64         global = true;
     65         break;
     66       case 'i':
     67         if (ignoreCase) {
     68           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     69         }
     70         ignoreCase = true;
     71         break;
     72       case 'm':
     73         if (multiline) {
     74           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     75         }
     76         multiline = true;
     77         break;
     78       default:
     79         throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     80     }
     81   }
     82 
     83   %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
     84 
     85   // Call internal function to compile the pattern.
     86   %RegExpCompile(object, pattern, flags);
     87 }
     88 
     89 
     90 function RegExpConstructor(pattern, flags) {
     91   if (%_IsConstructCall()) {
     92     DoConstructRegExp(this, pattern, flags);
     93   } else {
     94     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
     95     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
     96       return pattern;
     97     }
     98     return new $RegExp(pattern, flags);
     99   }
    100 }
    101 
    102 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
    103 // were called again.  In SpiderMonkey, this method returns the regexp object.
    104 // In JSC, it returns undefined.  For compatibility with JSC, we match their
    105 // behavior.
    106 function RegExpCompile(pattern, flags) {
    107   // Both JSC and SpiderMonkey treat a missing pattern argument as the
    108   // empty subject string, and an actual undefined value passed as the
    109   // pattern as the string 'undefined'.  Note that JSC is inconsistent
    110   // here, treating undefined values differently in
    111   // RegExp.prototype.compile and in the constructor, where they are
    112   // the empty string.  For compatibility with JSC, we match their
    113   // behavior.
    114   if (this == $RegExp.prototype) {
    115     // We don't allow recompiling RegExp.prototype.
    116     throw MakeTypeError('incompatible_method_receiver',
    117                         ['RegExp.prototype.compile', this]);
    118   }
    119   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
    120     DoConstructRegExp(this, 'undefined', flags);
    121   } else {
    122     DoConstructRegExp(this, pattern, flags);
    123   }
    124 }
    125 
    126 
    127 function DoRegExpExec(regexp, string, index) {
    128   var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
    129   if (result !== null) lastMatchInfoOverride = null;
    130   return result;
    131 }
    132 
    133 
    134 function BuildResultFromMatchInfo(lastMatchInfo, s) {
    135   var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
    136   var start = lastMatchInfo[CAPTURE0];
    137   var end = lastMatchInfo[CAPTURE1];
    138   var result = %_RegExpConstructResult(numResults, start, s);
    139   result[0] = %_SubString(s, start, end);
    140   var j = REGEXP_FIRST_CAPTURE + 2;
    141   for (var i = 1; i < numResults; i++) {
    142     start = lastMatchInfo[j++];
    143     if (start != -1) {
    144       end = lastMatchInfo[j];
    145       result[i] = %_SubString(s, start, end);
    146     }
    147     j++;
    148   }
    149   return result;
    150 }
    151 
    152 
    153 function RegExpExecNoTests(regexp, string, start) {
    154   // Must be called with RegExp, string and positive integer as arguments.
    155   var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
    156   if (matchInfo !== null) {
    157     lastMatchInfoOverride = null;
    158     return BuildResultFromMatchInfo(matchInfo, string);
    159   }
    160   regexp.lastIndex = 0;
    161   return null;
    162 }
    163 
    164 
    165 function RegExpExec(string) {
    166   if (!IS_REGEXP(this)) {
    167     throw MakeTypeError('incompatible_method_receiver',
    168                         ['RegExp.prototype.exec', this]);
    169   }
    170 
    171   string = TO_STRING_INLINE(string);
    172   var lastIndex = this.lastIndex;
    173 
    174   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    175   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    176   var i = TO_INTEGER(lastIndex);
    177 
    178   var global = this.global;
    179   if (global) {
    180     if (i < 0 || i > string.length) {
    181       this.lastIndex = 0;
    182       return null;
    183     }
    184   } else {
    185     i = 0;
    186   }
    187 
    188   %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    189   // matchIndices is either null or the lastMatchInfo array.
    190   var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    191 
    192   if (matchIndices === null) {
    193     this.lastIndex = 0;
    194     return null;
    195   }
    196 
    197   // Successful match.
    198   lastMatchInfoOverride = null;
    199   if (global) {
    200     this.lastIndex = lastMatchInfo[CAPTURE1];
    201   }
    202   return BuildResultFromMatchInfo(matchIndices, string);
    203 }
    204 
    205 
    206 // One-element cache for the simplified test regexp.
    207 var regexp_key;
    208 var regexp_val;
    209 
    210 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    211 // that test is defined in terms of String.prototype.exec. However, it probably
    212 // means the original value of String.prototype.exec, which is what everybody
    213 // else implements.
    214 function RegExpTest(string) {
    215   if (!IS_REGEXP(this)) {
    216     throw MakeTypeError('incompatible_method_receiver',
    217                         ['RegExp.prototype.test', this]);
    218   }
    219   string = TO_STRING_INLINE(string);
    220 
    221   var lastIndex = this.lastIndex;
    222 
    223   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    224   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    225   var i = TO_INTEGER(lastIndex);
    226 
    227   if (this.global) {
    228     if (i < 0 || i > string.length) {
    229       this.lastIndex = 0;
    230       return false;
    231     }
    232     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    233     // matchIndices is either null or the lastMatchInfo array.
    234     var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    235     if (matchIndices === null) {
    236       this.lastIndex = 0;
    237       return false;
    238     }
    239     lastMatchInfoOverride = null;
    240     this.lastIndex = lastMatchInfo[CAPTURE1];
    241     return true;
    242   } else {
    243     // Non-global regexp.
    244     // Remove irrelevant preceeding '.*' in a non-global test regexp.
    245     // The expression checks whether this.source starts with '.*' and
    246     // that the third char is not a '?'.
    247     var regexp = this;
    248     if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
    249         %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
    250         %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
    251       regexp = TrimRegExp(regexp);
    252     }
    253     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]);
    254     // matchIndices is either null or the lastMatchInfo array.
    255     var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
    256     if (matchIndices === null) {
    257       this.lastIndex = 0;
    258       return false;
    259     }
    260     lastMatchInfoOverride = null;
    261     return true;
    262   }
    263 }
    264 
    265 function TrimRegExp(regexp) {
    266   if (!%_ObjectEquals(regexp_key, regexp)) {
    267     regexp_key = regexp;
    268     regexp_val =
    269       new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
    270                   (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
    271                                      : regexp.multiline ? "m" : ""));
    272   }
    273   return regexp_val;
    274 }
    275 
    276 
    277 function RegExpToString() {
    278   if (!IS_REGEXP(this)) {
    279     throw MakeTypeError('incompatible_method_receiver',
    280                         ['RegExp.prototype.toString', this]);
    281   }
    282   var result = '/' + this.source + '/';
    283   if (this.global) result += 'g';
    284   if (this.ignoreCase) result += 'i';
    285   if (this.multiline) result += 'm';
    286   return result;
    287 }
    288 
    289 
    290 // Getters for the static properties lastMatch, lastParen, leftContext, and
    291 // rightContext of the RegExp constructor.  The properties are computed based
    292 // on the captures array of the last successful match and the subject string
    293 // of the last successful match.
    294 function RegExpGetLastMatch() {
    295   if (lastMatchInfoOverride !== null) {
    296     return OVERRIDE_MATCH(lastMatchInfoOverride);
    297   }
    298   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    299   return %_SubString(regExpSubject,
    300                      lastMatchInfo[CAPTURE0],
    301                      lastMatchInfo[CAPTURE1]);
    302 }
    303 
    304 
    305 function RegExpGetLastParen() {
    306   if (lastMatchInfoOverride) {
    307     var override = lastMatchInfoOverride;
    308     if (override.length <= 3) return '';
    309     return override[override.length - 3];
    310   }
    311   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    312   if (length <= 2) return '';  // There were no captures.
    313   // We match the SpiderMonkey behavior: return the substring defined by the
    314   // last pair (after the first pair) of elements of the capture array even if
    315   // it is empty.
    316   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    317   var start = lastMatchInfo[CAPTURE(length - 2)];
    318   var end = lastMatchInfo[CAPTURE(length - 1)];
    319   if (start != -1 && end != -1) {
    320     return %_SubString(regExpSubject, start, end);
    321   }
    322   return "";
    323 }
    324 
    325 
    326 function RegExpGetLeftContext() {
    327   var start_index;
    328   var subject;
    329   if (!lastMatchInfoOverride) {
    330     start_index = lastMatchInfo[CAPTURE0];
    331     subject = LAST_SUBJECT(lastMatchInfo);
    332   } else {
    333     var override = lastMatchInfoOverride;
    334     start_index = OVERRIDE_POS(override);
    335     subject = OVERRIDE_SUBJECT(override);
    336   }
    337   return %_SubString(subject, 0, start_index);
    338 }
    339 
    340 
    341 function RegExpGetRightContext() {
    342   var start_index;
    343   var subject;
    344   if (!lastMatchInfoOverride) {
    345     start_index = lastMatchInfo[CAPTURE1];
    346     subject = LAST_SUBJECT(lastMatchInfo);
    347   } else {
    348     var override = lastMatchInfoOverride;
    349     subject = OVERRIDE_SUBJECT(override);
    350     var match = OVERRIDE_MATCH(override);
    351     start_index = OVERRIDE_POS(override) + match.length;
    352   }
    353   return %_SubString(subject, start_index, subject.length);
    354 }
    355 
    356 
    357 // The properties $1..$9 are the first nine capturing substrings of the last
    358 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    359 // called with indices from 1 to 9.
    360 function RegExpMakeCaptureGetter(n) {
    361   return function() {
    362     if (lastMatchInfoOverride) {
    363       if (n < lastMatchInfoOverride.length - 2) {
    364         return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
    365       }
    366       return '';
    367     }
    368     var index = n * 2;
    369     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    370     var matchStart = lastMatchInfo[CAPTURE(index)];
    371     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    372     if (matchStart == -1 || matchEnd == -1) return '';
    373     return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    374   };
    375 }
    376 
    377 
    378 // Property of the builtins object for recording the result of the last
    379 // regexp match.  The property lastMatchInfo includes the matchIndices
    380 // array of the last successful regexp match (an array of start/end index
    381 // pairs for the match and all the captured substrings), the invariant is
    382 // that there are at least two capture indeces.  The array also contains
    383 // the subject string for the last successful match.
    384 var lastMatchInfo = new InternalPackedArray(
    385     2,                 // REGEXP_NUMBER_OF_CAPTURES
    386     "",                // Last subject.
    387     void 0,            // Last input - settable with RegExpSetInput.
    388     0,                 // REGEXP_FIRST_CAPTURE + 0
    389     0                  // REGEXP_FIRST_CAPTURE + 1
    390 );
    391 
    392 // Override last match info with an array of actual substrings.
    393 // Used internally by replace regexp with function.
    394 // The array has the format of an "apply" argument for a replacement
    395 // function.
    396 var lastMatchInfoOverride = null;
    397 
    398 // -------------------------------------------------------------------
    399 
    400 function SetUpRegExp() {
    401   %CheckIsBootstrapping();
    402   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    403   %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    404   %SetCode($RegExp, RegExpConstructor);
    405 
    406   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    407     "exec", RegExpExec,
    408     "test", RegExpTest,
    409     "toString", RegExpToString,
    410     "compile", RegExpCompile
    411   ));
    412 
    413   // The length of compile is 1 in SpiderMonkey.
    414   %FunctionSetLength($RegExp.prototype.compile, 1);
    415 
    416   // The properties input, $input, and $_ are aliases for each other.  When this
    417   // value is set the value it is set to is coerced to a string.
    418   // Getter and setter for the input.
    419   var RegExpGetInput = function() {
    420     var regExpInput = LAST_INPUT(lastMatchInfo);
    421     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    422   };
    423   var RegExpSetInput = function(string) {
    424     LAST_INPUT(lastMatchInfo) = ToString(string);
    425   };
    426 
    427   %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
    428   %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
    429                                     RegExpSetInput, DONT_DELETE);
    430   %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
    431                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    432   %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
    433                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    434 
    435   // The properties multiline and $* are aliases for each other.  When this
    436   // value is set in SpiderMonkey, the value it is set to is coerced to a
    437   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    438   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    439   // boolean false (i.e., the value after coercion), while in V8 it is the value
    440   // null (i.e., the value before coercion).
    441 
    442   // Getter and setter for multiline.
    443   var multiline = false;
    444   var RegExpGetMultiline = function() { return multiline; };
    445   var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
    446 
    447   %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
    448                                     RegExpSetMultiline, DONT_DELETE);
    449   %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
    450                                     RegExpSetMultiline,
    451                                     DONT_ENUM | DONT_DELETE);
    452 
    453 
    454   var NoOpSetter = function(ignored) {};
    455 
    456 
    457   // Static properties set by a successful match.
    458   %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
    459                                     NoOpSetter, DONT_DELETE);
    460   %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
    461                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    462   %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
    463                                     NoOpSetter, DONT_DELETE);
    464   %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
    465                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    466   %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
    467                                     RegExpGetLeftContext, NoOpSetter,
    468                                     DONT_DELETE);
    469   %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
    470                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    471   %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
    472                                     RegExpGetRightContext, NoOpSetter,
    473                                     DONT_DELETE);
    474   %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
    475                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    476 
    477   for (var i = 1; i < 10; ++i) {
    478     %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
    479                                       RegExpMakeCaptureGetter(i), NoOpSetter,
    480                                       DONT_DELETE);
    481   }
    482   %ToFastProperties($RegExp);
    483 }
    484 
    485 SetUpRegExp();
    486