Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file relies on the fact that the following declaration has been made
      6 // in runtime.js:
      7 // var $Object = global.Object;
      8 // var $Array = global.Array;
      9 
     10 var $RegExp = global.RegExp;
     11 
     12 // -------------------------------------------------------------------
     13 
     14 // A recursive descent parser for Patterns according to the grammar of
     15 // ECMA-262 15.10.1, with deviations noted below.
     16 function DoConstructRegExp(object, pattern, flags) {
     17   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     18   if (IS_REGEXP(pattern)) {
     19     if (!IS_UNDEFINED(flags)) {
     20       throw MakeTypeError('regexp_flags', []);
     21     }
     22     flags = (pattern.global ? 'g' : '')
     23         + (pattern.ignoreCase ? 'i' : '')
     24         + (pattern.multiline ? 'm' : '');
     25     if (harmony_regexps)
     26         flags += (pattern.sticky ? 'y' : '');
     27     pattern = pattern.source;
     28   }
     29 
     30   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     31   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     32 
     33   var global = false;
     34   var ignoreCase = false;
     35   var multiline = false;
     36   var sticky = false;
     37   for (var i = 0; i < flags.length; i++) {
     38     var c = %_CallFunction(flags, i, StringCharAt);
     39     switch (c) {
     40       case 'g':
     41         if (global) {
     42           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     43         }
     44         global = true;
     45         break;
     46       case 'i':
     47         if (ignoreCase) {
     48           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     49         }
     50         ignoreCase = true;
     51         break;
     52       case 'm':
     53         if (multiline) {
     54           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     55         }
     56         multiline = true;
     57         break;
     58       case 'y':
     59         if (!harmony_regexps || sticky) {
     60           throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     61         }
     62         sticky = true;
     63         break;
     64       default:
     65         throw MakeSyntaxError("invalid_regexp_flags", [flags]);
     66     }
     67   }
     68 
     69   %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline, sticky);
     70 
     71   // Call internal function to compile the pattern.
     72   %RegExpCompile(object, pattern, flags);
     73 }
     74 
     75 
     76 function RegExpConstructor(pattern, flags) {
     77   if (%_IsConstructCall()) {
     78     DoConstructRegExp(this, pattern, flags);
     79   } else {
     80     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
     81     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
     82       return pattern;
     83     }
     84     return new $RegExp(pattern, flags);
     85   }
     86 }
     87 
     88 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
     89 // were called again.  In SpiderMonkey, this method returns the regexp object.
     90 // In JSC, it returns undefined.  For compatibility with JSC, we match their
     91 // behavior.
     92 function RegExpCompileJS(pattern, flags) {
     93   // Both JSC and SpiderMonkey treat a missing pattern argument as the
     94   // empty subject string, and an actual undefined value passed as the
     95   // pattern as the string 'undefined'.  Note that JSC is inconsistent
     96   // here, treating undefined values differently in
     97   // RegExp.prototype.compile and in the constructor, where they are
     98   // the empty string.  For compatibility with JSC, we match their
     99   // behavior.
    100   if (this == $RegExp.prototype) {
    101     // We don't allow recompiling RegExp.prototype.
    102     throw MakeTypeError('incompatible_method_receiver',
    103                         ['RegExp.prototype.compile', this]);
    104   }
    105   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
    106     DoConstructRegExp(this, 'undefined', flags);
    107   } else {
    108     DoConstructRegExp(this, pattern, flags);
    109   }
    110 }
    111 
    112 
    113 function DoRegExpExec(regexp, string, index) {
    114   var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
    115   if (result !== null) lastMatchInfoOverride = null;
    116   return result;
    117 }
    118 
    119 
    120 // This is kind of performance sensitive, so we want to avoid unnecessary
    121 // type checks on inputs. But we also don't want to inline it several times
    122 // manually, so we use a macro :-)
    123 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
    124   var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
    125   var start = MATCHINFO[CAPTURE0];
    126   var end = MATCHINFO[CAPTURE1];
    127   // Calculate the substring of the first match before creating the result array
    128   // to avoid an unnecessary write barrier storing the first result.
    129   var first = %_SubString(STRING, start, end);
    130   var result = %_RegExpConstructResult(numResults, start, STRING);
    131   result[0] = first;
    132   if (numResults == 1) return result;
    133   var j = REGEXP_FIRST_CAPTURE + 2;
    134   for (var i = 1; i < numResults; i++) {
    135     start = MATCHINFO[j++];
    136     if (start != -1) {
    137       end = MATCHINFO[j];
    138       result[i] = %_SubString(STRING, start, end);
    139     }
    140     j++;
    141   }
    142   return result;
    143 endmacro
    144 
    145 
    146 function RegExpExecNoTests(regexp, string, start) {
    147   // Must be called with RegExp, string and positive integer as arguments.
    148   var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
    149   if (matchInfo !== null) {
    150     lastMatchInfoOverride = null;
    151     RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
    152   }
    153   regexp.lastIndex = 0;
    154   return null;
    155 }
    156 
    157 
    158 function RegExpExec(string) {
    159   if (!IS_REGEXP(this)) {
    160     throw MakeTypeError('incompatible_method_receiver',
    161                         ['RegExp.prototype.exec', this]);
    162   }
    163 
    164   string = TO_STRING_INLINE(string);
    165   var lastIndex = this.lastIndex;
    166 
    167   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    168   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    169   var i = TO_INTEGER(lastIndex);
    170 
    171   var updateLastIndex = this.global || (harmony_regexps && this.sticky);
    172   if (updateLastIndex) {
    173     if (i < 0 || i > string.length) {
    174       this.lastIndex = 0;
    175       return null;
    176     }
    177   } else {
    178     i = 0;
    179   }
    180 
    181   // matchIndices is either null or the lastMatchInfo array.
    182   var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    183 
    184   if (IS_NULL(matchIndices)) {
    185     this.lastIndex = 0;
    186     return null;
    187   }
    188 
    189   // Successful match.
    190   lastMatchInfoOverride = null;
    191   if (updateLastIndex) {
    192     this.lastIndex = lastMatchInfo[CAPTURE1];
    193   }
    194   RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
    195 }
    196 
    197 
    198 // One-element cache for the simplified test regexp.
    199 var regexp_key;
    200 var regexp_val;
    201 
    202 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    203 // that test is defined in terms of String.prototype.exec. However, it probably
    204 // means the original value of String.prototype.exec, which is what everybody
    205 // else implements.
    206 function RegExpTest(string) {
    207   if (!IS_REGEXP(this)) {
    208     throw MakeTypeError('incompatible_method_receiver',
    209                         ['RegExp.prototype.test', this]);
    210   }
    211   string = TO_STRING_INLINE(string);
    212 
    213   var lastIndex = this.lastIndex;
    214 
    215   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    216   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    217   var i = TO_INTEGER(lastIndex);
    218 
    219   if (this.global || (harmony_regexps && this.sticky)) {
    220     if (i < 0 || i > string.length) {
    221       this.lastIndex = 0;
    222       return false;
    223     }
    224     // matchIndices is either null or the lastMatchInfo array.
    225     var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    226     if (IS_NULL(matchIndices)) {
    227       this.lastIndex = 0;
    228       return false;
    229     }
    230     lastMatchInfoOverride = null;
    231     this.lastIndex = lastMatchInfo[CAPTURE1];
    232     return true;
    233   } else {
    234     // Non-global, non-sticky regexp.
    235     // Remove irrelevant preceeding '.*' in a test regexp.  The expression
    236     // checks whether this.source starts with '.*' and that the third char is
    237     // not a '?'.  But see https://code.google.com/p/v8/issues/detail?id=3560
    238     var regexp = this;
    239     if (regexp.source.length >= 3 &&
    240         %_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
    241         %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
    242         %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
    243       regexp = TrimRegExp(regexp);
    244     }
    245     // matchIndices is either null or the lastMatchInfo array.
    246     var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
    247     if (IS_NULL(matchIndices)) {
    248       this.lastIndex = 0;
    249       return false;
    250     }
    251     lastMatchInfoOverride = null;
    252     return true;
    253   }
    254 }
    255 
    256 function TrimRegExp(regexp) {
    257   if (!%_ObjectEquals(regexp_key, regexp)) {
    258     regexp_key = regexp;
    259     regexp_val =
    260       new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
    261                   (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
    262                                      : regexp.multiline ? "m" : ""));
    263   }
    264   return regexp_val;
    265 }
    266 
    267 
    268 function RegExpToString() {
    269   if (!IS_REGEXP(this)) {
    270     throw MakeTypeError('incompatible_method_receiver',
    271                         ['RegExp.prototype.toString', this]);
    272   }
    273   var result = '/' + this.source + '/';
    274   if (this.global) result += 'g';
    275   if (this.ignoreCase) result += 'i';
    276   if (this.multiline) result += 'm';
    277   if (harmony_regexps && this.sticky) result += 'y';
    278   return result;
    279 }
    280 
    281 
    282 // Getters for the static properties lastMatch, lastParen, leftContext, and
    283 // rightContext of the RegExp constructor.  The properties are computed based
    284 // on the captures array of the last successful match and the subject string
    285 // of the last successful match.
    286 function RegExpGetLastMatch() {
    287   if (lastMatchInfoOverride !== null) {
    288     return OVERRIDE_MATCH(lastMatchInfoOverride);
    289   }
    290   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    291   return %_SubString(regExpSubject,
    292                      lastMatchInfo[CAPTURE0],
    293                      lastMatchInfo[CAPTURE1]);
    294 }
    295 
    296 
    297 function RegExpGetLastParen() {
    298   if (lastMatchInfoOverride) {
    299     var override = lastMatchInfoOverride;
    300     if (override.length <= 3) return '';
    301     return override[override.length - 3];
    302   }
    303   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    304   if (length <= 2) return '';  // There were no captures.
    305   // We match the SpiderMonkey behavior: return the substring defined by the
    306   // last pair (after the first pair) of elements of the capture array even if
    307   // it is empty.
    308   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    309   var start = lastMatchInfo[CAPTURE(length - 2)];
    310   var end = lastMatchInfo[CAPTURE(length - 1)];
    311   if (start != -1 && end != -1) {
    312     return %_SubString(regExpSubject, start, end);
    313   }
    314   return "";
    315 }
    316 
    317 
    318 function RegExpGetLeftContext() {
    319   var start_index;
    320   var subject;
    321   if (!lastMatchInfoOverride) {
    322     start_index = lastMatchInfo[CAPTURE0];
    323     subject = LAST_SUBJECT(lastMatchInfo);
    324   } else {
    325     var override = lastMatchInfoOverride;
    326     start_index = OVERRIDE_POS(override);
    327     subject = OVERRIDE_SUBJECT(override);
    328   }
    329   return %_SubString(subject, 0, start_index);
    330 }
    331 
    332 
    333 function RegExpGetRightContext() {
    334   var start_index;
    335   var subject;
    336   if (!lastMatchInfoOverride) {
    337     start_index = lastMatchInfo[CAPTURE1];
    338     subject = LAST_SUBJECT(lastMatchInfo);
    339   } else {
    340     var override = lastMatchInfoOverride;
    341     subject = OVERRIDE_SUBJECT(override);
    342     var match = OVERRIDE_MATCH(override);
    343     start_index = OVERRIDE_POS(override) + match.length;
    344   }
    345   return %_SubString(subject, start_index, subject.length);
    346 }
    347 
    348 
    349 // The properties $1..$9 are the first nine capturing substrings of the last
    350 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    351 // called with indices from 1 to 9.
    352 function RegExpMakeCaptureGetter(n) {
    353   return function() {
    354     if (lastMatchInfoOverride) {
    355       if (n < lastMatchInfoOverride.length - 2) {
    356         return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
    357       }
    358       return '';
    359     }
    360     var index = n * 2;
    361     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    362     var matchStart = lastMatchInfo[CAPTURE(index)];
    363     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    364     if (matchStart == -1 || matchEnd == -1) return '';
    365     return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    366   };
    367 }
    368 
    369 
    370 // Property of the builtins object for recording the result of the last
    371 // regexp match.  The property lastMatchInfo includes the matchIndices
    372 // array of the last successful regexp match (an array of start/end index
    373 // pairs for the match and all the captured substrings), the invariant is
    374 // that there are at least two capture indeces.  The array also contains
    375 // the subject string for the last successful match.
    376 var lastMatchInfo = new InternalPackedArray(
    377     2,                 // REGEXP_NUMBER_OF_CAPTURES
    378     "",                // Last subject.
    379     UNDEFINED,         // Last input - settable with RegExpSetInput.
    380     0,                 // REGEXP_FIRST_CAPTURE + 0
    381     0                  // REGEXP_FIRST_CAPTURE + 1
    382 );
    383 
    384 // Override last match info with an array of actual substrings.
    385 // Used internally by replace regexp with function.
    386 // The array has the format of an "apply" argument for a replacement
    387 // function.
    388 var lastMatchInfoOverride = null;
    389 
    390 // -------------------------------------------------------------------
    391 
    392 function SetUpRegExp() {
    393   %CheckIsBootstrapping();
    394   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    395   %AddNamedProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    396   %SetCode($RegExp, RegExpConstructor);
    397 
    398   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    399     "exec", RegExpExec,
    400     "test", RegExpTest,
    401     "toString", RegExpToString,
    402     "compile", RegExpCompileJS
    403   ));
    404 
    405   // The length of compile is 1 in SpiderMonkey.
    406   %FunctionSetLength($RegExp.prototype.compile, 1);
    407 
    408   // The properties `input` and `$_` are aliases for each other.  When this
    409   // value is set the value it is set to is coerced to a string.
    410   // Getter and setter for the input.
    411   var RegExpGetInput = function() {
    412     var regExpInput = LAST_INPUT(lastMatchInfo);
    413     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    414   };
    415   var RegExpSetInput = function(string) {
    416     LAST_INPUT(lastMatchInfo) = ToString(string);
    417   };
    418 
    419   %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
    420   %DefineAccessorPropertyUnchecked($RegExp, 'input', RegExpGetInput,
    421                                    RegExpSetInput, DONT_DELETE);
    422   %DefineAccessorPropertyUnchecked($RegExp, '$_', RegExpGetInput,
    423                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
    424 
    425   // The properties multiline and $* are aliases for each other.  When this
    426   // value is set in SpiderMonkey, the value it is set to is coerced to a
    427   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    428   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    429   // boolean false (i.e., the value after coercion), while in V8 it is the value
    430   // null (i.e., the value before coercion).
    431 
    432   // Getter and setter for multiline.
    433   var multiline = false;
    434   var RegExpGetMultiline = function() { return multiline; };
    435   var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
    436 
    437   %DefineAccessorPropertyUnchecked($RegExp, 'multiline', RegExpGetMultiline,
    438                                    RegExpSetMultiline, DONT_DELETE);
    439   %DefineAccessorPropertyUnchecked($RegExp, '$*', RegExpGetMultiline,
    440                                    RegExpSetMultiline,
    441                                    DONT_ENUM | DONT_DELETE);
    442 
    443 
    444   var NoOpSetter = function(ignored) {};
    445 
    446 
    447   // Static properties set by a successful match.
    448   %DefineAccessorPropertyUnchecked($RegExp, 'lastMatch', RegExpGetLastMatch,
    449                                    NoOpSetter, DONT_DELETE);
    450   %DefineAccessorPropertyUnchecked($RegExp, '$&', RegExpGetLastMatch,
    451                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
    452   %DefineAccessorPropertyUnchecked($RegExp, 'lastParen', RegExpGetLastParen,
    453                                    NoOpSetter, DONT_DELETE);
    454   %DefineAccessorPropertyUnchecked($RegExp, '$+', RegExpGetLastParen,
    455                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
    456   %DefineAccessorPropertyUnchecked($RegExp, 'leftContext',
    457                                    RegExpGetLeftContext, NoOpSetter,
    458                                    DONT_DELETE);
    459   %DefineAccessorPropertyUnchecked($RegExp, '$`', RegExpGetLeftContext,
    460                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
    461   %DefineAccessorPropertyUnchecked($RegExp, 'rightContext',
    462                                    RegExpGetRightContext, NoOpSetter,
    463                                    DONT_DELETE);
    464   %DefineAccessorPropertyUnchecked($RegExp, "$'", RegExpGetRightContext,
    465                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
    466 
    467   for (var i = 1; i < 10; ++i) {
    468     %DefineAccessorPropertyUnchecked($RegExp, '$' + i,
    469                                      RegExpMakeCaptureGetter(i), NoOpSetter,
    470                                      DONT_DELETE);
    471   }
    472   %ToFastProperties($RegExp);
    473 }
    474 
    475 SetUpRegExp();
    476