Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Expect $Object = global.Object;
     29 // Expect $Array = global.Array;
     30 
     31 const $RegExp = global.RegExp;
     32 
     33 // A recursive descent parser for Patterns according to the grammar of
     34 // ECMA-262 15.10.1, with deviations noted below.
     35 function DoConstructRegExp(object, pattern, flags) {
     36   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     37   if (IS_REGEXP(pattern)) {
     38     if (!IS_UNDEFINED(flags)) {
     39       throw MakeTypeError('regexp_flags', []);
     40     }
     41     flags = (pattern.global ? 'g' : '')
     42         + (pattern.ignoreCase ? 'i' : '')
     43         + (pattern.multiline ? 'm' : '');
     44     pattern = pattern.source;
     45   }
     46 
     47   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     48   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     49 
     50   var global = false;
     51   var ignoreCase = false;
     52   var multiline = false;
     53 
     54   for (var i = 0; i < flags.length; i++) {
     55     var c = %_CallFunction(flags, i, StringCharAt);
     56     switch (c) {
     57       case 'g':
     58         // Allow duplicate flags to be consistent with JSC and others.
     59         global = true;
     60         break;
     61       case 'i':
     62         ignoreCase = true;
     63         break;
     64       case 'm':
     65         multiline = true;
     66         break;
     67       default:
     68         // Ignore flags that have no meaning to be consistent with
     69         // JSC.
     70         break;
     71     }
     72   }
     73 
     74   %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
     75 
     76   // Call internal function to compile the pattern.
     77   %RegExpCompile(object, pattern, flags);
     78 }
     79 
     80 
     81 function RegExpConstructor(pattern, flags) {
     82   if (%_IsConstructCall()) {
     83     DoConstructRegExp(this, pattern, flags);
     84   } else {
     85     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
     86     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
     87       return pattern;
     88     }
     89     return new $RegExp(pattern, flags);
     90   }
     91 }
     92 
     93 
     94 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
     95 // were called again.  In SpiderMonkey, this method returns the regexp object.
     96 // In JSC, it returns undefined.  For compatibility with JSC, we match their
     97 // behavior.
     98 function CompileRegExp(pattern, flags) {
     99   // Both JSC and SpiderMonkey treat a missing pattern argument as the
    100   // empty subject string, and an actual undefined value passed as the
    101   // pattern as the string 'undefined'.  Note that JSC is inconsistent
    102   // here, treating undefined values differently in
    103   // RegExp.prototype.compile and in the constructor, where they are
    104   // the empty string.  For compatibility with JSC, we match their
    105   // behavior.
    106   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
    107     DoConstructRegExp(this, 'undefined', flags);
    108   } else {
    109     DoConstructRegExp(this, pattern, flags);
    110   }
    111 }
    112 
    113 
    114 function DoRegExpExec(regexp, string, index) {
    115   var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
    116   if (result !== null) lastMatchInfoOverride = null;
    117   return result;
    118 }
    119 
    120 
    121 function BuildResultFromMatchInfo(lastMatchInfo, s) {
    122   var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
    123   var start = lastMatchInfo[CAPTURE0];
    124   var end = lastMatchInfo[CAPTURE1];
    125   var result = %_RegExpConstructResult(numResults, start, s);
    126   if (start + 1 == end) {
    127     result[0] = %_StringCharAt(s, start);
    128   } else {
    129     result[0] = %_SubString(s, start, end);
    130   }
    131   var j = REGEXP_FIRST_CAPTURE + 2;
    132   for (var i = 1; i < numResults; i++) {
    133     start = lastMatchInfo[j++];
    134     end = lastMatchInfo[j++];
    135     if (end != -1) {
    136       if (start + 1 == end) {
    137         result[i] = %_StringCharAt(s, start);
    138       } else {
    139         result[i] = %_SubString(s, start, end);
    140       }
    141     } else {
    142       // Make sure the element is present. Avoid reading the undefined
    143       // property from the global object since this may change.
    144       result[i] = void 0;
    145     }
    146   }
    147   return result;
    148 }
    149 
    150 
    151 function RegExpExecNoTests(regexp, string, start) {
    152   // Must be called with RegExp, string and positive integer as arguments.
    153   var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
    154   if (matchInfo !== null) {
    155     lastMatchInfoOverride = null;
    156     return BuildResultFromMatchInfo(matchInfo, string);
    157   }
    158   return null;
    159 }
    160 
    161 
    162 function RegExpExec(string) {
    163   if (!IS_REGEXP(this)) {
    164     throw MakeTypeError('incompatible_method_receiver',
    165                         ['RegExp.prototype.exec', this]);
    166   }
    167 
    168   if (%_ArgumentsLength() === 0) {
    169     var regExpInput = LAST_INPUT(lastMatchInfo);
    170     if (IS_UNDEFINED(regExpInput)) {
    171       throw MakeError('no_input_to_regexp', [this]);
    172     }
    173     string = regExpInput;
    174   }
    175   string = TO_STRING_INLINE(string);
    176   var lastIndex = this.lastIndex;
    177 
    178   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    179   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    180   var i = TO_INTEGER(lastIndex);
    181 
    182   var global = this.global;
    183   if (global) {
    184     if (i < 0 || i > string.length) {
    185       this.lastIndex = 0;
    186       return null;
    187     }
    188   } else {
    189     i = 0;
    190   }
    191 
    192   %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    193   // matchIndices is either null or the lastMatchInfo array.
    194   var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    195 
    196   if (matchIndices === null) {
    197     if (global) this.lastIndex = 0;
    198     return null;
    199   }
    200 
    201   // Successful match.
    202   lastMatchInfoOverride = null;
    203   if (global) {
    204     this.lastIndex = lastMatchInfo[CAPTURE1];
    205   }
    206   return BuildResultFromMatchInfo(matchIndices, string);
    207 }
    208 
    209 
    210 // One-element cache for the simplified test regexp.
    211 var regexp_key;
    212 var regexp_val;
    213 
    214 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    215 // that test is defined in terms of String.prototype.exec. However, it probably
    216 // means the original value of String.prototype.exec, which is what everybody
    217 // else implements.
    218 function RegExpTest(string) {
    219   if (!IS_REGEXP(this)) {
    220     throw MakeTypeError('incompatible_method_receiver',
    221                         ['RegExp.prototype.test', this]);
    222   }
    223   if (%_ArgumentsLength() == 0) {
    224     var regExpInput = LAST_INPUT(lastMatchInfo);
    225     if (IS_UNDEFINED(regExpInput)) {
    226       throw MakeError('no_input_to_regexp', [this]);
    227     }
    228     string = regExpInput;
    229   }
    230 
    231   string = TO_STRING_INLINE(string);
    232 
    233   var lastIndex = this.lastIndex;
    234 
    235   // Conversion is required by the ES5 specification (RegExp.prototype.exec
    236   // algorithm, step 5) even if the value is discarded for non-global RegExps.
    237   var i = TO_INTEGER(lastIndex);
    238   
    239   if (this.global) {
    240     if (i < 0 || i > string.length) {
    241       this.lastIndex = 0;
    242       return false;
    243     }
    244     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    245     // matchIndices is either null or the lastMatchInfo array.
    246     var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
    247     if (matchIndices === null) {
    248       this.lastIndex = 0;
    249       return false;
    250     }
    251     lastMatchInfoOverride = null;
    252     this.lastIndex = lastMatchInfo[CAPTURE1];
    253     return true;    
    254   } else {
    255     // Non-global regexp.
    256     // Remove irrelevant preceeding '.*' in a non-global test regexp. 
    257     // The expression checks whether this.source starts with '.*' and 
    258     // that the third char is not a '?'.
    259     if (%_StringCharCodeAt(this.source, 0) == 46 &&  // '.'
    260         %_StringCharCodeAt(this.source, 1) == 42 &&  // '*'
    261         %_StringCharCodeAt(this.source, 2) != 63) {  // '?'
    262       if (!%_ObjectEquals(regexp_key, this)) {
    263         regexp_key = this;
    264         regexp_val = new $RegExp(SubString(this.source, 2, this.source.length),
    265                                  (!this.ignoreCase 
    266                                   ? !this.multiline ? "" : "m"
    267                                   : !this.multiline ? "i" : "im"));
    268       }
    269       if (%_RegExpExec(regexp_val, string, 0, lastMatchInfo) === null) {
    270         return false;
    271       }
    272     }    
    273     %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
    274     // matchIndices is either null or the lastMatchInfo array.
    275     var matchIndices = %_RegExpExec(this, string, 0, lastMatchInfo);
    276     if (matchIndices === null) return false;
    277     lastMatchInfoOverride = null;
    278     return true;
    279   }
    280 }
    281 
    282 
    283 function RegExpToString() {
    284   // If this.source is an empty string, output /(?:)/.
    285   // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
    286   // ecma_2/RegExp/properties-001.js.
    287   var src = this.source ? this.source : '(?:)';
    288   var result = '/' + src + '/';
    289   if (this.global) result += 'g';
    290   if (this.ignoreCase) result += 'i';
    291   if (this.multiline) result += 'm';
    292   return result;
    293 }
    294 
    295 
    296 // Getters for the static properties lastMatch, lastParen, leftContext, and
    297 // rightContext of the RegExp constructor.  The properties are computed based
    298 // on the captures array of the last successful match and the subject string
    299 // of the last successful match.
    300 function RegExpGetLastMatch() {
    301   if (lastMatchInfoOverride !== null) {
    302     return lastMatchInfoOverride[0];
    303   }
    304   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    305   return SubString(regExpSubject,
    306                    lastMatchInfo[CAPTURE0],
    307                    lastMatchInfo[CAPTURE1]);
    308 }
    309 
    310 
    311 function RegExpGetLastParen() {
    312   if (lastMatchInfoOverride) {
    313     var override = lastMatchInfoOverride;
    314     if (override.length <= 3) return '';
    315     return override[override.length - 3];
    316   }
    317   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    318   if (length <= 2) return '';  // There were no captures.
    319   // We match the SpiderMonkey behavior: return the substring defined by the
    320   // last pair (after the first pair) of elements of the capture array even if
    321   // it is empty.
    322   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    323   var start = lastMatchInfo[CAPTURE(length - 2)];
    324   var end = lastMatchInfo[CAPTURE(length - 1)];
    325   if (start != -1 && end != -1) {
    326     return SubString(regExpSubject, start, end);
    327   }
    328   return "";
    329 }
    330 
    331 
    332 function RegExpGetLeftContext() {
    333   var start_index;
    334   var subject;
    335   if (!lastMatchInfoOverride) {
    336     start_index = lastMatchInfo[CAPTURE0];
    337     subject = LAST_SUBJECT(lastMatchInfo);
    338   } else {
    339     var override = lastMatchInfoOverride;
    340     start_index = override[override.length - 2];
    341     subject = override[override.length - 1];
    342   }
    343   return SubString(subject, 0, start_index);
    344 }
    345 
    346 
    347 function RegExpGetRightContext() {
    348   var start_index;
    349   var subject;
    350   if (!lastMatchInfoOverride) {
    351     start_index = lastMatchInfo[CAPTURE1];
    352     subject = LAST_SUBJECT(lastMatchInfo);
    353   } else {
    354     var override = lastMatchInfoOverride;
    355     subject = override[override.length - 1];
    356     start_index = override[override.length - 2] + subject.length;
    357   }
    358   return SubString(subject, start_index, subject.length);
    359 }
    360 
    361 
    362 // The properties $1..$9 are the first nine capturing substrings of the last
    363 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    364 // called with indices from 1 to 9.
    365 function RegExpMakeCaptureGetter(n) {
    366   return function() {
    367     if (lastMatchInfoOverride) {
    368       if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
    369       return '';
    370     }
    371     var index = n * 2;
    372     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    373     var matchStart = lastMatchInfo[CAPTURE(index)];
    374     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    375     if (matchStart == -1 || matchEnd == -1) return '';
    376     return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    377   };
    378 }
    379 
    380 
    381 // Property of the builtins object for recording the result of the last
    382 // regexp match.  The property lastMatchInfo includes the matchIndices
    383 // array of the last successful regexp match (an array of start/end index
    384 // pairs for the match and all the captured substrings), the invariant is
    385 // that there are at least two capture indeces.  The array also contains
    386 // the subject string for the last successful match.
    387 var lastMatchInfo = new InternalArray(
    388     2,                 // REGEXP_NUMBER_OF_CAPTURES
    389     "",                // Last subject.
    390     void 0,            // Last input - settable with RegExpSetInput.
    391     0,                 // REGEXP_FIRST_CAPTURE + 0
    392     0                  // REGEXP_FIRST_CAPTURE + 1
    393 );
    394 
    395 // Override last match info with an array of actual substrings.
    396 // Used internally by replace regexp with function.
    397 // The array has the format of an "apply" argument for a replacement
    398 // function.
    399 var lastMatchInfoOverride = null;
    400 
    401 // -------------------------------------------------------------------
    402 
    403 function SetupRegExp() {
    404   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    405   %FunctionSetPrototype($RegExp, new $Object());
    406   %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    407   %SetCode($RegExp, RegExpConstructor);
    408 
    409   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    410     "exec", RegExpExec,
    411     "test", RegExpTest,
    412     "toString", RegExpToString,
    413     "compile", CompileRegExp
    414   ));
    415 
    416   // The length of compile is 1 in SpiderMonkey.
    417   %FunctionSetLength($RegExp.prototype.compile, 1);
    418 
    419   // The properties input, $input, and $_ are aliases for each other.  When this
    420   // value is set the value it is set to is coerced to a string.
    421   // Getter and setter for the input.
    422   function RegExpGetInput() {
    423     var regExpInput = LAST_INPUT(lastMatchInfo);
    424     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    425   }
    426   function RegExpSetInput(string) {
    427     LAST_INPUT(lastMatchInfo) = ToString(string);
    428   };
    429 
    430   %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE);
    431   %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE);
    432   %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
    433   %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
    434   %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
    435   %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
    436 
    437   // The properties multiline and $* are aliases for each other.  When this
    438   // value is set in SpiderMonkey, the value it is set to is coerced to a
    439   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    440   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    441   // boolean false (ie, the value after coercion), while in V8 it is the value
    442   // null (ie, the value before coercion).
    443 
    444   // Getter and setter for multiline.
    445   var multiline = false;
    446   function RegExpGetMultiline() { return multiline; };
    447   function RegExpSetMultiline(flag) { multiline = flag ? true : false; };
    448 
    449   %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE);
    450   %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE);
    451   %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE);
    452   %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE);
    453 
    454 
    455   function NoOpSetter(ignored) {}
    456 
    457 
    458   // Static properties set by a successful match.
    459   %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE);
    460   %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE);
    461   %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE);
    462   %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    463   %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE);
    464   %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE);
    465   %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE);
    466   %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    467   %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE);
    468   %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE);
    469   %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE);
    470   %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    471   %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE);
    472   %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE);
    473   %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE);
    474   %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    475 
    476   for (var i = 1; i < 10; ++i) {
    477     %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE);
    478     %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE);
    479   }
    480 }
    481 
    482 
    483 SetupRegExp();
    484