Home | History | Annotate | Download | only in src
      1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Expect $Object = global.Object;
     29 // Expect $Array = global.Array;
     30 
     31 const $RegExp = global.RegExp;
     32 
     33 // A recursive descent parser for Patterns according to the grammar of
     34 // ECMA-262 15.10.1, with deviations noted below.
     35 function DoConstructRegExp(object, pattern, flags, isConstructorCall) {
     36   // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
     37   if (IS_REGEXP(pattern)) {
     38     if (!IS_UNDEFINED(flags)) {
     39       throw MakeTypeError('regexp_flags', []);
     40     }
     41     flags = (pattern.global ? 'g' : '')
     42         + (pattern.ignoreCase ? 'i' : '')
     43         + (pattern.multiline ? 'm' : '');
     44     pattern = pattern.source;
     45   }
     46 
     47   pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
     48   flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
     49 
     50   var global = false;
     51   var ignoreCase = false;
     52   var multiline = false;
     53 
     54   for (var i = 0; i < flags.length; i++) {
     55     var c = StringCharAt.call(flags, i);
     56     switch (c) {
     57       case 'g':
     58         // Allow duplicate flags to be consistent with JSC and others.
     59         global = true;
     60         break;
     61       case 'i':
     62         ignoreCase = true;
     63         break;
     64       case 'm':
     65         multiline = true;
     66         break;
     67       default:
     68         // Ignore flags that have no meaning to be consistent with
     69         // JSC.
     70         break;
     71     }
     72   }
     73 
     74   if (isConstructorCall) {
     75     // ECMA-262, section 15.10.7.1.
     76     %SetProperty(object, 'source', pattern,
     77                  DONT_DELETE |  READ_ONLY | DONT_ENUM);
     78 
     79     // ECMA-262, section 15.10.7.2.
     80     %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM);
     81 
     82     // ECMA-262, section 15.10.7.3.
     83     %SetProperty(object, 'ignoreCase', ignoreCase,
     84                  DONT_DELETE | READ_ONLY | DONT_ENUM);
     85 
     86     // ECMA-262, section 15.10.7.4.
     87     %SetProperty(object, 'multiline', multiline,
     88                  DONT_DELETE | READ_ONLY | DONT_ENUM);
     89 
     90     // ECMA-262, section 15.10.7.5.
     91     %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM);
     92   } else { // RegExp is being recompiled via RegExp.prototype.compile.
     93     %IgnoreAttributesAndSetProperty(object, 'source', pattern);
     94     %IgnoreAttributesAndSetProperty(object, 'global', global);
     95     %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase);
     96     %IgnoreAttributesAndSetProperty(object, 'multiline', multiline);
     97     %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0);
     98   }
     99 
    100   // Call internal function to compile the pattern.
    101   %RegExpCompile(object, pattern, flags);
    102 }
    103 
    104 
    105 function RegExpConstructor(pattern, flags) {
    106   if (%_IsConstructCall()) {
    107     DoConstructRegExp(this, pattern, flags, true);
    108   } else {
    109     // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
    110     if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
    111       return pattern;
    112     }
    113     return new $RegExp(pattern, flags);
    114   }
    115 }
    116 
    117 
    118 // Deprecated RegExp.prototype.compile method.  We behave like the constructor
    119 // were called again.  In SpiderMonkey, this method returns the regexp object.
    120 // In JSC, it returns undefined.  For compatibility with JSC, we match their
    121 // behavior.
    122 function CompileRegExp(pattern, flags) {
    123   // Both JSC and SpiderMonkey treat a missing pattern argument as the
    124   // empty subject string, and an actual undefined value passed as the
    125   // pattern as the string 'undefined'.  Note that JSC is inconsistent
    126   // here, treating undefined values differently in
    127   // RegExp.prototype.compile and in the constructor, where they are
    128   // the empty string.  For compatibility with JSC, we match their
    129   // behavior.
    130   if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
    131     DoConstructRegExp(this, 'undefined', flags, false);
    132   } else {
    133     DoConstructRegExp(this, pattern, flags, false);
    134   }
    135 }
    136 
    137 
    138 function DoRegExpExec(regexp, string, index) {
    139   return %_RegExpExec(regexp, string, index, lastMatchInfo);
    140 }
    141 
    142 
    143 function RegExpExec(string) {
    144   if (!IS_REGEXP(this)) {
    145     throw MakeTypeError('method_called_on_incompatible',
    146                         ['RegExp.prototype.exec', this]);
    147   }
    148   if (%_ArgumentsLength() == 0) {
    149     var regExpInput = LAST_INPUT(lastMatchInfo);
    150     if (IS_UNDEFINED(regExpInput)) {
    151       throw MakeError('no_input_to_regexp', [this]);
    152     }
    153     string = regExpInput;
    154   }
    155   var s = ToString(string);
    156   var length = s.length;
    157   var lastIndex = this.lastIndex;
    158   var i = this.global ? TO_INTEGER(lastIndex) : 0;
    159 
    160   if (i < 0 || i > s.length) {
    161     this.lastIndex = 0;
    162     return null;
    163   }
    164 
    165   %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
    166   // matchIndices is either null or the lastMatchInfo array.
    167   var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
    168 
    169   if (matchIndices == null) {
    170     if (this.global) this.lastIndex = 0;
    171     return matchIndices; // no match
    172   }
    173 
    174   var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
    175   var result = new $Array(numResults);
    176   for (var i = 0; i < numResults; i++) {
    177     var matchStart = lastMatchInfo[CAPTURE(i << 1)];
    178     var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)];
    179     if (matchStart != -1 && matchEnd != -1) {
    180       result[i] = SubString(s, matchStart, matchEnd);
    181     } else {
    182       // Make sure the element is present. Avoid reading the undefined
    183       // property from the global object since this may change.
    184       result[i] = void 0;
    185     }
    186   }
    187 
    188   if (this.global)
    189     this.lastIndex = lastMatchInfo[CAPTURE1];
    190   result.index = lastMatchInfo[CAPTURE0];
    191   result.input = s;
    192   return result;
    193 }
    194 
    195 
    196 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
    197 // that test is defined in terms of String.prototype.exec. However, it probably
    198 // means the original value of String.prototype.exec, which is what everybody
    199 // else implements.
    200 function RegExpTest(string) {
    201   if (!IS_REGEXP(this)) {
    202     throw MakeTypeError('method_called_on_incompatible',
    203                         ['RegExp.prototype.test', this]);
    204   }
    205   if (%_ArgumentsLength() == 0) {
    206     var regExpInput = LAST_INPUT(lastMatchInfo);
    207     if (IS_UNDEFINED(regExpInput)) {
    208       throw MakeError('no_input_to_regexp', [this]);
    209     }
    210     string = regExpInput;
    211   }
    212   var s = ToString(string);
    213   var length = s.length;
    214   var lastIndex = this.lastIndex;
    215   var i = this.global ? TO_INTEGER(lastIndex) : 0;
    216 
    217   if (i < 0 || i > s.length) {
    218     this.lastIndex = 0;
    219     return false;
    220   }
    221 
    222   %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
    223   // matchIndices is either null or the lastMatchInfo array.
    224   var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
    225 
    226   if (matchIndices == null) {
    227     if (this.global) this.lastIndex = 0;
    228     return false;
    229   }
    230 
    231   if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1];
    232   return true;
    233 }
    234 
    235 
    236 function RegExpToString() {
    237   // If this.source is an empty string, output /(?:)/.
    238   // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
    239   // ecma_2/RegExp/properties-001.js.
    240   var src = this.source ? this.source : '(?:)';
    241   var result = '/' + src + '/';
    242   if (this.global)
    243     result += 'g';
    244   if (this.ignoreCase)
    245     result += 'i';
    246   if (this.multiline)
    247     result += 'm';
    248   return result;
    249 }
    250 
    251 
    252 // Getters for the static properties lastMatch, lastParen, leftContext, and
    253 // rightContext of the RegExp constructor.  The properties are computed based
    254 // on the captures array of the last successful match and the subject string
    255 // of the last successful match.
    256 function RegExpGetLastMatch() {
    257   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    258   return SubString(regExpSubject,
    259                    lastMatchInfo[CAPTURE0],
    260                    lastMatchInfo[CAPTURE1]);
    261 }
    262 
    263 
    264 function RegExpGetLastParen() {
    265   var length = NUMBER_OF_CAPTURES(lastMatchInfo);
    266   if (length <= 2) return '';  // There were no captures.
    267   // We match the SpiderMonkey behavior: return the substring defined by the
    268   // last pair (after the first pair) of elements of the capture array even if
    269   // it is empty.
    270   var regExpSubject = LAST_SUBJECT(lastMatchInfo);
    271   var start = lastMatchInfo[CAPTURE(length - 2)];
    272   var end = lastMatchInfo[CAPTURE(length - 1)];
    273   if (start != -1 && end != -1) {
    274     return SubString(regExpSubject, start, end);
    275   }
    276   return "";
    277 }
    278 
    279 
    280 function RegExpGetLeftContext() {
    281   return SubString(LAST_SUBJECT(lastMatchInfo),
    282                    0,
    283                    lastMatchInfo[CAPTURE0]);
    284 }
    285 
    286 
    287 function RegExpGetRightContext() {
    288   var subject = LAST_SUBJECT(lastMatchInfo);
    289   return SubString(subject,
    290                    lastMatchInfo[CAPTURE1],
    291                    subject.length);
    292 }
    293 
    294 
    295 // The properties $1..$9 are the first nine capturing substrings of the last
    296 // successful match, or ''.  The function RegExpMakeCaptureGetter will be
    297 // called with indices from 1 to 9.
    298 function RegExpMakeCaptureGetter(n) {
    299   return function() {
    300     var index = n * 2;
    301     if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
    302     var matchStart = lastMatchInfo[CAPTURE(index)];
    303     var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
    304     if (matchStart == -1 || matchEnd == -1) return '';
    305     return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
    306   };
    307 }
    308 
    309 
    310 // Property of the builtins object for recording the result of the last
    311 // regexp match.  The property lastMatchInfo includes the matchIndices
    312 // array of the last successful regexp match (an array of start/end index
    313 // pairs for the match and all the captured substrings), the invariant is
    314 // that there are at least two capture indeces.  The array also contains
    315 // the subject string for the last successful match.
    316 var lastMatchInfo = [
    317     2,                 // REGEXP_NUMBER_OF_CAPTURES
    318     "",                // Last subject.
    319     void 0,            // Last input - settable with RegExpSetInput.
    320     0,                 // REGEXP_FIRST_CAPTURE + 0
    321     0,                 // REGEXP_FIRST_CAPTURE + 1
    322 ];
    323 
    324 // -------------------------------------------------------------------
    325 
    326 function SetupRegExp() {
    327   %FunctionSetInstanceClassName($RegExp, 'RegExp');
    328   %FunctionSetPrototype($RegExp, new $Object());
    329   %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
    330   %SetCode($RegExp, RegExpConstructor);
    331 
    332   InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
    333     "exec", RegExpExec,
    334     "test", RegExpTest,
    335     "toString", RegExpToString,
    336     "compile", CompileRegExp
    337   ));
    338 
    339   // The length of compile is 1 in SpiderMonkey.
    340   %FunctionSetLength($RegExp.prototype.compile, 1);
    341 
    342   // The properties input, $input, and $_ are aliases for each other.  When this
    343   // value is set the value it is set to is coerced to a string. 
    344   // Getter and setter for the input.
    345   function RegExpGetInput() {
    346     var regExpInput = LAST_INPUT(lastMatchInfo);
    347     return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
    348   }
    349   function RegExpSetInput(string) {
    350     LAST_INPUT(lastMatchInfo) = ToString(string);
    351   };
    352 
    353   %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE);
    354   %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE);
    355   %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
    356   %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
    357   %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE);
    358   %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE);
    359 
    360   // The properties multiline and $* are aliases for each other.  When this
    361   // value is set in SpiderMonkey, the value it is set to is coerced to a
    362   // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
    363   // the value of the expression 'RegExp.multiline = null' (for instance) is the
    364   // boolean false (ie, the value after coercion), while in V8 it is the value
    365   // null (ie, the value before coercion).
    366 
    367   // Getter and setter for multiline.
    368   var multiline = false;
    369   function RegExpGetMultiline() { return multiline; };
    370   function RegExpSetMultiline(flag) { multiline = flag ? true : false; };
    371 
    372   %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE);
    373   %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE);
    374   %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE);
    375   %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE);
    376 
    377 
    378   function NoOpSetter(ignored) {}
    379 
    380 
    381   // Static properties set by a successful match.
    382   %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE);
    383   %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE);
    384   %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE);
    385   %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    386   %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE);
    387   %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE);
    388   %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE);
    389   %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    390   %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE);
    391   %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE);
    392   %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE);
    393   %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    394   %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE);
    395   %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE);
    396   %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE);
    397   %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE);
    398 
    399   for (var i = 1; i < 10; ++i) {
    400     %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE);
    401     %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE);
    402   }
    403 }
    404 
    405 
    406 SetupRegExp();
    407