Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Expect $Object = global.Object;
     29 // Expect $Array = global.Array;
     30 
     31 var $RegExp = global.RegExp;
     32 
     33 // A recursive descent parser for Patterns according to the grammar of
     34 // ECMA-262 15.10.1, with deviations noted below.
     35 function DoConstructRegExp(object, pattern, flags) {
     36   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     37   if (IS_REGEXP(pattern)) {
     38     if (!IS_UNDEFINED(flags)) {
     39       throw MakeTypeError('regexp_flags', []);
     40     }
     41     flags = (pattern.global ? 'g' : '')
     42         + (pattern.ignoreCase ? 'i' : '')
     43         + (pattern.multiline ? 'm' : '');
     44     pattern = pattern.source;
     45   }
     46 
     47   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     48   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     49 
     50   var global = false;
     51   var ignoreCase = false;
     52   var multiline = false;
     53   for (var i = 0; i < flags.length; i++) {
     54     var c = %_CallFunction(flags, i, StringCharAt);
     55     switch (c) {
     56       case 'g':
     57         if (global) {
     58           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     59         }
     60         global = true;
     61         break;
     62       case 'i':
     63         if (ignoreCase) {
     64           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     65         }
     66         ignoreCase = true;
     67         break;
     68       case 'm':
     69         if (multiline) {
     70           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     71         }
     72         multiline = true;
     73         break;
     74       default:
     75         throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     76     }
     77   }
     78 
     79   %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
     80 
     81   // Call internal function to compile the pattern.
     82   %RegExpCompile(object, pattern, flags);
     83 }
     84 
     85 
     86 function RegExpConstructor(pattern, flags) {
     87   if (%_IsConstructCall()) {
     88     DoConstructRegExp(this, pattern, flags);
     89   } else {
     90     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
     91     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
     92       return pattern;
     93     }
     94     return new $RegExp(pattern, flags);
     95   }
     96 }
     97 
     98 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
     99 // were called again.  In SpiderMonkey, this method returns the regexp object.
    100 // In JSC, it returns undefined.  For compatibility with JSC, we match their
    101 // behavior.
    102 function RegExpCompile(pattern, flags) {
    103   // Both JSC and SpiderMonkey treat a missing pattern argument as the
    104   // empty subject string, and an actual undefined value passed as the
    105   // pattern as the string 'undefined'.  Note that JSC is inconsistent
    106   // here, treating undefined values differently in
    107   // RegExp.prototype.compile and in the constructor, where they are
    108   // the empty string.  For compatibility with JSC, we match their
    109   // behavior.
    110   if (this == $RegExp.prototype) {
    111     // We don't allow recompiling RegExp.prototype.
    112     throw MakeTypeError('incompatible_method_receiver',
    113                         ['RegExp.prototype.compile', this]);
    114   }
    115   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
    116     DoConstructRegExp(this, 'undefined', flags);
    117   } else {
    118     DoConstructRegExp(this, pattern, flags);
    119   }
    120 }
    121 
    122 
    123 function DoRegExpExec(regexp, string, index) {
    124   var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
    125   if (result !== null) lastMatchInfoOverride = null;
    126   return result;
    127 }
    128 
    129 
    130 function BuildResultFromMatchInfo(lastMatchInfo, s) {
    131   var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
    132   var start = lastMatchInfo[CAPTURE0];
    133   var end = lastMatchInfo[CAPTURE1];
    134   var result = %_RegExpConstructResult(numResults, start, s);
    135   if (start + 1 == end) {
    136     result[0] = %_StringCharAt(s, start);
    137   } else {
    138     result[0] = %_SubString(s, start, end);
    139   }
    140   var j = REGEXP_FIRST_CAPTURE + 2;
    141   for (var i = 1; i < numResults; i++) {
    142     start = lastMatchInfo[j++];
    143     end = lastMatchInfo[j++];
    144     if (end != -1) {
    145       if (start + 1 == end) {
    146         result[i] = %_StringCharAt(s, start);
    147       } else {
    148         result[i] = %_SubString(s, start, end);
    149       }
    150     } else {
    151       // Make sure the element is present. Avoid reading the undefined
    152       // property from the global object since this may change.
    153       result[i] = void 0;
    154     }
    155   }
    156   return result;
    157 }
    158 
    159 
    160 function RegExpExecNoTests(regexp, string, start) {
    161   // Must be called with RegExp, string and positive integer as arguments.
    162   var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
    163   if (matchInfo !== null) {
    164     lastMatchInfoOverride = null;
    165     return BuildResultFromMatchInfo(matchInfo, string);
    166   }
    167   return null;
    168 }
    169 
    170 
    171 function RegExpExec(string) {
    172   if (!IS_REGEXP(this)) {
    173     throw MakeTypeError('incompatible_method_receiver',
    174                         ['RegExp.prototype.exec', this]);
    175   }
    176 
    177   string = TO_STRING_INLINE(string);
    178   var lastIndex = this.lastIndex;
    179 
    180   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    181   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    182   var i = TO_INTEGER(lastIndex);
    183 
    184   var global = this.global;
    185   if (global) {
    186     if (i < 0 || i > string.length) {
    187       this.lastIndex = 0;
    188       return null;
    189     }
    190   } else {
    191     i = 0;
    192   }
    193 
    194   %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    195   // matchIndices is either null or the lastMatchInfo array.
    196   var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    197 
    198   if (matchIndices === null) {
    199     if (global) this.lastIndex = 0;
    200     return null;
    201   }
    202 
    203   // Successful match.
    204   lastMatchInfoOverride = null;
    205   if (global) {
    206     this.lastIndex = lastMatchInfo[CAPTURE1];
    207   }
    208   return BuildResultFromMatchInfo(matchIndices, string);
    209 }
    210 
    211 
    212 // One-element cache for the simplified test regexp.
    213 var regexp_key;
    214 var regexp_val;
    215 
    216 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    217 // that test is defined in terms of String.prototype.exec. However, it probably
    218 // means the original value of String.prototype.exec, which is what everybody
    219 // else implements.
    220 function RegExpTest(string) {
    221   if (!IS_REGEXP(this)) {
    222     throw MakeTypeError('incompatible_method_receiver',
    223                         ['RegExp.prototype.test', this]);
    224   }
    225   string = TO_STRING_INLINE(string);
    226 
    227   var lastIndex = this.lastIndex;
    228 
    229   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    230   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    231   var i = TO_INTEGER(lastIndex);
    232 
    233   if (this.global) {
    234     if (i < 0 || i > string.length) {
    235       this.lastIndex = 0;
    236       return false;
    237     }
    238     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    239     // matchIndices is either null or the lastMatchInfo array.
    240     var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    241     if (matchIndices === null) {
    242       this.lastIndex = 0;
    243       return false;
    244     }
    245     lastMatchInfoOverride = null;
    246     this.lastIndex = lastMatchInfo[CAPTURE1];
    247     return true;
    248   } else {
    249     // Non-global regexp.
    250     // Remove irrelevant preceeding '.*' in a non-global test regexp.
    251     // The expression checks whether this.source starts with '.*' and
    252     // that the third char is not a '?'.
    253     var regexp = this;
    254     if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
    255         %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
    256         %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
    257       regexp = TrimRegExp(regexp);
    258     }
    259     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]);
    260     // matchIndices is either null or the lastMatchInfo array.
    261     var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
    262     if (matchIndices === null) return false;
    263     lastMatchInfoOverride = null;
    264     return true;
    265   }
    266 }
    267 
    268 function TrimRegExp(regexp) {
    269   if (!%_ObjectEquals(regexp_key, regexp)) {
    270     regexp_key = regexp;
    271     regexp_val =
    272       new $RegExp(SubString(regexp.source, 2, regexp.source.length),
    273                   (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
    274                                      : regexp.multiline ? "m" : ""));
    275   }
    276   return regexp_val;
    277 }
    278 
    279 
    280 function RegExpToString() {
    281   // If this.source is an empty string, output /(?:)/.
    282   // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
    283   // ecma_2/RegExp/properties-001.js.
    284   var src = this.source ? this.source : '(?:)';
    285   var result = '/' + src + '/';
    286   if (this.global) result += 'g';
    287   if (this.ignoreCase) result += 'i';
    288   if (this.multiline) result += 'm';
    289   return result;
    290 }
    291 
    292 
    293 // Getters for the static properties lastMatch, lastParen, leftContext, and
    294 // rightContext of the RegExp constructor.  The properties are computed based
    295 // on the captures array of the last successful match and the subject string
    296 // of the last successful match.
    297 function RegExpGetLastMatch() {
    298   if (lastMatchInfoOverride !== null) {
    299     return lastMatchInfoOverride[0];
    300   }
    301   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    302   return SubString(regExpSubject,
    303                    lastMatchInfo[CAPTURE0],
    304                    lastMatchInfo[CAPTURE1]);
    305 }
    306 
    307 
    308 function RegExpGetLastParen() {
    309   if (lastMatchInfoOverride) {
    310     var override = lastMatchInfoOverride;
    311     if (override.length <= 3) return '';
    312     return override[override.length - 3];
    313   }
    314   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    315   if (length <= 2) return '';  // There were no captures.
    316   // We match the SpiderMonkey behavior: return the substring defined by the
    317   // last pair (after the first pair) of elements of the capture array even if
    318   // it is empty.
    319   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    320   var start = lastMatchInfo[CAPTURE(length - 2)];
    321   var end = lastMatchInfo[CAPTURE(length - 1)];
    322   if (start != -1 && end != -1) {
    323     return SubString(regExpSubject, start, end);
    324   }
    325   return "";
    326 }
    327 
    328 
    329 function RegExpGetLeftContext() {
    330   var start_index;
    331   var subject;
    332   if (!lastMatchInfoOverride) {
    333     start_index = lastMatchInfo[CAPTURE0];
    334     subject = LAST_SUBJECT(lastMatchInfo);
    335   } else {
    336     var override = lastMatchInfoOverride;
    337     start_index = override[override.length - 2];
    338     subject = override[override.length - 1];
    339   }
    340   return SubString(subject, 0, start_index);
    341 }
    342 
    343 
    344 function RegExpGetRightContext() {
    345   var start_index;
    346   var subject;
    347   if (!lastMatchInfoOverride) {
    348     start_index = lastMatchInfo[CAPTURE1];
    349     subject = LAST_SUBJECT(lastMatchInfo);
    350   } else {
    351     var override = lastMatchInfoOverride;
    352     subject = override[override.length - 1];
    353     var pattern = override[override.length - 3];
    354     start_index = override[override.length - 2] + pattern.length;
    355   }
    356   return SubString(subject, start_index, subject.length);
    357 }
    358 
    359 
    360 // The properties $1..$9 are the first nine capturing substrings of the last
    361 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    362 // called with indices from 1 to 9.
    363 function RegExpMakeCaptureGetter(n) {
    364   return function() {
    365     if (lastMatchInfoOverride) {
    366       if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
    367       return '';
    368     }
    369     var index = n * 2;
    370     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    371     var matchStart = lastMatchInfo[CAPTURE(index)];
    372     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    373     if (matchStart == -1 || matchEnd == -1) return '';
    374     return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    375   };
    376 }
    377 
    378 
    379 // Property of the builtins object for recording the result of the last
    380 // regexp match.  The property lastMatchInfo includes the matchIndices
    381 // array of the last successful regexp match (an array of start/end index
    382 // pairs for the match and all the captured substrings), the invariant is
    383 // that there are at least two capture indeces.  The array also contains
    384 // the subject string for the last successful match.
    385 var lastMatchInfo = new InternalArray(
    386     2,                 // REGEXP_NUMBER_OF_CAPTURES
    387     "",                // Last subject.
    388     void 0,            // Last input - settable with RegExpSetInput.
    389     0,                 // REGEXP_FIRST_CAPTURE + 0
    390     0                  // REGEXP_FIRST_CAPTURE + 1
    391 );
    392 
    393 // Override last match info with an array of actual substrings.
    394 // Used internally by replace regexp with function.
    395 // The array has the format of an "apply" argument for a replacement
    396 // function.
    397 var lastMatchInfoOverride = null;
    398 
    399 // -------------------------------------------------------------------
    400 
    401 function SetUpRegExp() {
    402   %CheckIsBootstrapping();
    403   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    404   %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    405   %SetCode($RegExp, RegExpConstructor);
    406 
    407   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    408     "exec", RegExpExec,
    409     "test", RegExpTest,
    410     "toString", RegExpToString,
    411     "compile", RegExpCompile
    412   ));
    413 
    414   // The length of compile is 1 in SpiderMonkey.
    415   %FunctionSetLength($RegExp.prototype.compile, 1);
    416 
    417   // The properties input, $input, and $_ are aliases for each other.  When this
    418   // value is set the value it is set to is coerced to a string.
    419   // Getter and setter for the input.
    420   var RegExpGetInput = function() {
    421     var regExpInput = LAST_INPUT(lastMatchInfo);
    422     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    423   };
    424   var RegExpSetInput = function(string) {
    425     LAST_INPUT(lastMatchInfo) = ToString(string);
    426   };
    427 
    428   %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
    429                                     RegExpSetInput, DONT_DELETE);
    430   %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
    431                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    432   %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
    433                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    434 
    435   // The properties multiline and $* are aliases for each other.  When this
    436   // value is set in SpiderMonkey, the value it is set to is coerced to a
    437   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    438   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    439   // boolean false (i.e., the value after coercion), while in V8 it is the value
    440   // null (i.e., the value before coercion).
    441 
    442   // Getter and setter for multiline.
    443   var multiline = false;
    444   var RegExpGetMultiline = function() { return multiline; };
    445   var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
    446 
    447   %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
    448                                     RegExpSetMultiline, DONT_DELETE);
    449   %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
    450                                     RegExpSetMultiline,
    451                                     DONT_ENUM | DONT_DELETE);
    452 
    453 
    454   var NoOpSetter = function(ignored) {};
    455 
    456 
    457   // Static properties set by a successful match.
    458   %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
    459                                     NoOpSetter, DONT_DELETE);
    460   %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
    461                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    462   %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
    463                                     NoOpSetter, DONT_DELETE);
    464   %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
    465                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    466   %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
    467                                     RegExpGetLeftContext, NoOpSetter,
    468                                     DONT_DELETE);
    469   %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
    470                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    471   %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
    472                                     RegExpGetRightContext, NoOpSetter,
    473                                     DONT_DELETE);
    474   %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
    475                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    476 
    477   for (var i = 1; i < 10; ++i) {
    478     %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
    479                                       RegExpMakeCaptureGetter(i), NoOpSetter,
    480                                       DONT_DELETE);
    481   }
    482 }
    483 
    484 SetUpRegExp();
    485