Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file relies on the fact that the following declaration has been made
      6 // in runtime.js:
      7 // var $Object = global.Object;
      8 // var $Array = global.Array;
      9 
     10 var $RegExp = global.RegExp;
     11 
     12 // -------------------------------------------------------------------
     13 
     14 // A recursive descent parser for Patterns according to the grammar of
     15 // ECMA-262 15.10.1, with deviations noted below.
     16 function DoConstructRegExp(object, pattern, flags) {
     17   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     18   if (IS_REGEXP(pattern)) {
     19     if (!IS_UNDEFINED(flags)) {
     20       throw MakeTypeError('regexp_flags', []);
     21     }
     22     flags = (pattern.global ? 'g' : '')
     23         + (pattern.ignoreCase ? 'i' : '')
     24         + (pattern.multiline ? 'm' : '');
     25     pattern = pattern.source;
     26   }
     27 
     28   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     29   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     30 
     31   var global = false;
     32   var ignoreCase = false;
     33   var multiline = false;
     34   for (var i = 0; i < flags.length; i++) {
     35     var c = %_CallFunction(flags, i, StringCharAt);
     36     switch (c) {
     37       case 'g':
     38         if (global) {
     39           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     40         }
     41         global = true;
     42         break;
     43       case 'i':
     44         if (ignoreCase) {
     45           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     46         }
     47         ignoreCase = true;
     48         break;
     49       case 'm':
     50         if (multiline) {
     51           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     52         }
     53         multiline = true;
     54         break;
     55       default:
     56         throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     57     }
     58   }
     59 
     60   %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
     61 
     62   // Call internal function to compile the pattern.
     63   %RegExpCompile(object, pattern, flags);
     64 }
     65 
     66 
     67 function RegExpConstructor(pattern, flags) {
     68   if (%_IsConstructCall()) {
     69     DoConstructRegExp(this, pattern, flags);
     70   } else {
     71     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
     72     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
     73       return pattern;
     74     }
     75     return new $RegExp(pattern, flags);
     76   }
     77 }
     78 
     79 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
     80 // were called again.  In SpiderMonkey, this method returns the regexp object.
     81 // In JSC, it returns undefined.  For compatibility with JSC, we match their
     82 // behavior.
     83 function RegExpCompileJS(pattern, flags) {
     84   // Both JSC and SpiderMonkey treat a missing pattern argument as the
     85   // empty subject string, and an actual undefined value passed as the
     86   // pattern as the string 'undefined'.  Note that JSC is inconsistent
     87   // here, treating undefined values differently in
     88   // RegExp.prototype.compile and in the constructor, where they are
     89   // the empty string.  For compatibility with JSC, we match their
     90   // behavior.
     91   if (this == $RegExp.prototype) {
     92     // We don't allow recompiling RegExp.prototype.
     93     throw MakeTypeError('incompatible_method_receiver',
     94                         ['RegExp.prototype.compile', this]);
     95   }
     96   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
     97     DoConstructRegExp(this, 'undefined', flags);
     98   } else {
     99     DoConstructRegExp(this, pattern, flags);
    100   }
    101 }
    102 
    103 
    104 function DoRegExpExec(regexp, string, index) {
    105   var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
    106   if (result !== null) lastMatchInfoOverride = null;
    107   return result;
    108 }
    109 
    110 
    111 // This is kind of performance sensitive, so we want to avoid unnecessary
    112 // type checks on inputs. But we also don't want to inline it several times
    113 // manually, so we use a macro :-)
    114 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
    115   var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
    116   var start = MATCHINFO[CAPTURE0];
    117   var end = MATCHINFO[CAPTURE1];
    118   // Calculate the substring of the first match before creating the result array
    119   // to avoid an unnecessary write barrier storing the first result.
    120   var first = %_SubString(STRING, start, end);
    121   var result = %_RegExpConstructResult(numResults, start, STRING);
    122   result[0] = first;
    123   if (numResults == 1) return result;
    124   var j = REGEXP_FIRST_CAPTURE + 2;
    125   for (var i = 1; i < numResults; i++) {
    126     start = MATCHINFO[j++];
    127     if (start != -1) {
    128       end = MATCHINFO[j];
    129       result[i] = %_SubString(STRING, start, end);
    130     }
    131     j++;
    132   }
    133   return result;
    134 endmacro
    135 
    136 
    137 function RegExpExecNoTests(regexp, string, start) {
    138   // Must be called with RegExp, string and positive integer as arguments.
    139   var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
    140   if (matchInfo !== null) {
    141     lastMatchInfoOverride = null;
    142     RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
    143   }
    144   regexp.lastIndex = 0;
    145   return null;
    146 }
    147 
    148 
    149 function RegExpExec(string) {
    150   if (!IS_REGEXP(this)) {
    151     throw MakeTypeError('incompatible_method_receiver',
    152                         ['RegExp.prototype.exec', this]);
    153   }
    154 
    155   string = TO_STRING_INLINE(string);
    156   var lastIndex = this.lastIndex;
    157 
    158   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    159   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    160   var i = TO_INTEGER(lastIndex);
    161 
    162   var global = this.global;
    163   if (global) {
    164     if (i < 0 || i > string.length) {
    165       this.lastIndex = 0;
    166       return null;
    167     }
    168   } else {
    169     i = 0;
    170   }
    171 
    172   // matchIndices is either null or the lastMatchInfo array.
    173   var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    174 
    175   if (IS_NULL(matchIndices)) {
    176     this.lastIndex = 0;
    177     return null;
    178   }
    179 
    180   // Successful match.
    181   lastMatchInfoOverride = null;
    182   if (global) {
    183     this.lastIndex = lastMatchInfo[CAPTURE1];
    184   }
    185   RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
    186 }
    187 
    188 
    189 // One-element cache for the simplified test regexp.
    190 var regexp_key;
    191 var regexp_val;
    192 
    193 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    194 // that test is defined in terms of String.prototype.exec. However, it probably
    195 // means the original value of String.prototype.exec, which is what everybody
    196 // else implements.
    197 function RegExpTest(string) {
    198   if (!IS_REGEXP(this)) {
    199     throw MakeTypeError('incompatible_method_receiver',
    200                         ['RegExp.prototype.test', this]);
    201   }
    202   string = TO_STRING_INLINE(string);
    203 
    204   var lastIndex = this.lastIndex;
    205 
    206   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    207   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    208   var i = TO_INTEGER(lastIndex);
    209 
    210   if (this.global) {
    211     if (i < 0 || i > string.length) {
    212       this.lastIndex = 0;
    213       return false;
    214     }
    215     // matchIndices is either null or the lastMatchInfo array.
    216     var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    217     if (IS_NULL(matchIndices)) {
    218       this.lastIndex = 0;
    219       return false;
    220     }
    221     lastMatchInfoOverride = null;
    222     this.lastIndex = lastMatchInfo[CAPTURE1];
    223     return true;
    224   } else {
    225     // Non-global regexp.
    226     // Remove irrelevant preceeding '.*' in a non-global test regexp.
    227     // The expression checks whether this.source starts with '.*' and
    228     // that the third char is not a '?'.
    229     var regexp = this;
    230     if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
    231         %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
    232         %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
    233       regexp = TrimRegExp(regexp);
    234     }
    235     // matchIndices is either null or the lastMatchInfo array.
    236     var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
    237     if (IS_NULL(matchIndices)) {
    238       this.lastIndex = 0;
    239       return false;
    240     }
    241     lastMatchInfoOverride = null;
    242     return true;
    243   }
    244 }
    245 
    246 function TrimRegExp(regexp) {
    247   if (!%_ObjectEquals(regexp_key, regexp)) {
    248     regexp_key = regexp;
    249     regexp_val =
    250       new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
    251                   (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
    252                                      : regexp.multiline ? "m" : ""));
    253   }
    254   return regexp_val;
    255 }
    256 
    257 
    258 function RegExpToString() {
    259   if (!IS_REGEXP(this)) {
    260     throw MakeTypeError('incompatible_method_receiver',
    261                         ['RegExp.prototype.toString', this]);
    262   }
    263   var result = '/' + this.source + '/';
    264   if (this.global) result += 'g';
    265   if (this.ignoreCase) result += 'i';
    266   if (this.multiline) result += 'm';
    267   return result;
    268 }
    269 
    270 
    271 // Getters for the static properties lastMatch, lastParen, leftContext, and
    272 // rightContext of the RegExp constructor.  The properties are computed based
    273 // on the captures array of the last successful match and the subject string
    274 // of the last successful match.
    275 function RegExpGetLastMatch() {
    276   if (lastMatchInfoOverride !== null) {
    277     return OVERRIDE_MATCH(lastMatchInfoOverride);
    278   }
    279   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    280   return %_SubString(regExpSubject,
    281                      lastMatchInfo[CAPTURE0],
    282                      lastMatchInfo[CAPTURE1]);
    283 }
    284 
    285 
    286 function RegExpGetLastParen() {
    287   if (lastMatchInfoOverride) {
    288     var override = lastMatchInfoOverride;
    289     if (override.length <= 3) return '';
    290     return override[override.length - 3];
    291   }
    292   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    293   if (length <= 2) return '';  // There were no captures.
    294   // We match the SpiderMonkey behavior: return the substring defined by the
    295   // last pair (after the first pair) of elements of the capture array even if
    296   // it is empty.
    297   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    298   var start = lastMatchInfo[CAPTURE(length - 2)];
    299   var end = lastMatchInfo[CAPTURE(length - 1)];
    300   if (start != -1 && end != -1) {
    301     return %_SubString(regExpSubject, start, end);
    302   }
    303   return "";
    304 }
    305 
    306 
    307 function RegExpGetLeftContext() {
    308   var start_index;
    309   var subject;
    310   if (!lastMatchInfoOverride) {
    311     start_index = lastMatchInfo[CAPTURE0];
    312     subject = LAST_SUBJECT(lastMatchInfo);
    313   } else {
    314     var override = lastMatchInfoOverride;
    315     start_index = OVERRIDE_POS(override);
    316     subject = OVERRIDE_SUBJECT(override);
    317   }
    318   return %_SubString(subject, 0, start_index);
    319 }
    320 
    321 
    322 function RegExpGetRightContext() {
    323   var start_index;
    324   var subject;
    325   if (!lastMatchInfoOverride) {
    326     start_index = lastMatchInfo[CAPTURE1];
    327     subject = LAST_SUBJECT(lastMatchInfo);
    328   } else {
    329     var override = lastMatchInfoOverride;
    330     subject = OVERRIDE_SUBJECT(override);
    331     var match = OVERRIDE_MATCH(override);
    332     start_index = OVERRIDE_POS(override) + match.length;
    333   }
    334   return %_SubString(subject, start_index, subject.length);
    335 }
    336 
    337 
    338 // The properties $1..$9 are the first nine capturing substrings of the last
    339 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    340 // called with indices from 1 to 9.
    341 function RegExpMakeCaptureGetter(n) {
    342   return function() {
    343     if (lastMatchInfoOverride) {
    344       if (n < lastMatchInfoOverride.length - 2) {
    345         return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
    346       }
    347       return '';
    348     }
    349     var index = n * 2;
    350     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    351     var matchStart = lastMatchInfo[CAPTURE(index)];
    352     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    353     if (matchStart == -1 || matchEnd == -1) return '';
    354     return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    355   };
    356 }
    357 
    358 
    359 // Property of the builtins object for recording the result of the last
    360 // regexp match.  The property lastMatchInfo includes the matchIndices
    361 // array of the last successful regexp match (an array of start/end index
    362 // pairs for the match and all the captured substrings), the invariant is
    363 // that there are at least two capture indeces.  The array also contains
    364 // the subject string for the last successful match.
    365 var lastMatchInfo = new InternalPackedArray(
    366     2,                 // REGEXP_NUMBER_OF_CAPTURES
    367     "",                // Last subject.
    368     UNDEFINED,         // Last input - settable with RegExpSetInput.
    369     0,                 // REGEXP_FIRST_CAPTURE + 0
    370     0                  // REGEXP_FIRST_CAPTURE + 1
    371 );
    372 
    373 // Override last match info with an array of actual substrings.
    374 // Used internally by replace regexp with function.
    375 // The array has the format of an "apply" argument for a replacement
    376 // function.
    377 var lastMatchInfoOverride = null;
    378 
    379 // -------------------------------------------------------------------
    380 
    381 function SetUpRegExp() {
    382   %CheckIsBootstrapping();
    383   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    384   %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    385   %SetCode($RegExp, RegExpConstructor);
    386 
    387   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    388     "exec", RegExpExec,
    389     "test", RegExpTest,
    390     "toString", RegExpToString,
    391     "compile", RegExpCompileJS
    392   ));
    393 
    394   // The length of compile is 1 in SpiderMonkey.
    395   %FunctionSetLength($RegExp.prototype.compile, 1);
    396 
    397   // The properties input, $input, and $_ are aliases for each other.  When this
    398   // value is set the value it is set to is coerced to a string.
    399   // Getter and setter for the input.
    400   var RegExpGetInput = function() {
    401     var regExpInput = LAST_INPUT(lastMatchInfo);
    402     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    403   };
    404   var RegExpSetInput = function(string) {
    405     LAST_INPUT(lastMatchInfo) = ToString(string);
    406   };
    407 
    408   %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
    409   %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
    410                                     RegExpSetInput, DONT_DELETE);
    411   %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
    412                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    413   %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
    414                                     RegExpSetInput, DONT_ENUM | DONT_DELETE);
    415 
    416   // The properties multiline and $* are aliases for each other.  When this
    417   // value is set in SpiderMonkey, the value it is set to is coerced to a
    418   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    419   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    420   // boolean false (i.e., the value after coercion), while in V8 it is the value
    421   // null (i.e., the value before coercion).
    422 
    423   // Getter and setter for multiline.
    424   var multiline = false;
    425   var RegExpGetMultiline = function() { return multiline; };
    426   var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
    427 
    428   %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
    429                                     RegExpSetMultiline, DONT_DELETE);
    430   %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
    431                                     RegExpSetMultiline,
    432                                     DONT_ENUM | DONT_DELETE);
    433 
    434 
    435   var NoOpSetter = function(ignored) {};
    436 
    437 
    438   // Static properties set by a successful match.
    439   %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
    440                                     NoOpSetter, DONT_DELETE);
    441   %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
    442                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    443   %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
    444                                     NoOpSetter, DONT_DELETE);
    445   %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
    446                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    447   %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
    448                                     RegExpGetLeftContext, NoOpSetter,
    449                                     DONT_DELETE);
    450   %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
    451                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    452   %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
    453                                     RegExpGetRightContext, NoOpSetter,
    454                                     DONT_DELETE);
    455   %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
    456                                     NoOpSetter, DONT_ENUM | DONT_DELETE);
    457 
    458   for (var i = 1; i < 10; ++i) {
    459     %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
    460                                       RegExpMakeCaptureGetter(i), NoOpSetter,
    461                                       DONT_DELETE);
    462   }
    463   %ToFastProperties($RegExp);
    464 }
    465 
    466 SetUpRegExp();
    467