1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // Expect $Object = global.Object; 29 // Expect $Array = global.Array; 30 31 const $RegExp = global.RegExp; 32 33 // A recursive descent parser for Patterns according to the grammar of 34 // ECMA-262 15.10.1, with deviations noted below. 35 function DoConstructRegExp(object, pattern, flags, isConstructorCall) { 36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 37 if (IS_REGEXP(pattern)) { 38 if (!IS_UNDEFINED(flags)) { 39 throw MakeTypeError('regexp_flags', []); 40 } 41 flags = (pattern.global ? 'g' : '') 42 + (pattern.ignoreCase ? 'i' : '') 43 + (pattern.multiline ? 'm' : ''); 44 pattern = pattern.source; 45 } 46 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 49 50 var global = false; 51 var ignoreCase = false; 52 var multiline = false; 53 54 for (var i = 0; i < flags.length; i++) { 55 var c = StringCharAt.call(flags, i); 56 switch (c) { 57 case 'g': 58 // Allow duplicate flags to be consistent with JSC and others. 59 global = true; 60 break; 61 case 'i': 62 ignoreCase = true; 63 break; 64 case 'm': 65 multiline = true; 66 break; 67 default: 68 // Ignore flags that have no meaning to be consistent with 69 // JSC. 70 break; 71 } 72 } 73 74 if (isConstructorCall) { 75 // ECMA-262, section 15.10.7.1. 76 %SetProperty(object, 'source', pattern, 77 DONT_DELETE | READ_ONLY | DONT_ENUM); 78 79 // ECMA-262, section 15.10.7.2. 80 %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM); 81 82 // ECMA-262, section 15.10.7.3. 83 %SetProperty(object, 'ignoreCase', ignoreCase, 84 DONT_DELETE | READ_ONLY | DONT_ENUM); 85 86 // ECMA-262, section 15.10.7.4. 87 %SetProperty(object, 'multiline', multiline, 88 DONT_DELETE | READ_ONLY | DONT_ENUM); 89 90 // ECMA-262, section 15.10.7.5. 91 %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM); 92 } else { // RegExp is being recompiled via RegExp.prototype.compile. 93 %IgnoreAttributesAndSetProperty(object, 'source', pattern); 94 %IgnoreAttributesAndSetProperty(object, 'global', global); 95 %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase); 96 %IgnoreAttributesAndSetProperty(object, 'multiline', multiline); 97 %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0); 98 } 99 100 // Call internal function to compile the pattern. 101 %RegExpCompile(object, pattern, flags); 102 } 103 104 105 function RegExpConstructor(pattern, flags) { 106 if (%_IsConstructCall()) { 107 DoConstructRegExp(this, pattern, flags, true); 108 } else { 109 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 111 return pattern; 112 } 113 return new $RegExp(pattern, flags); 114 } 115 } 116 117 118 // Deprecated RegExp.prototype.compile method. We behave like the constructor 119 // were called again. In SpiderMonkey, this method returns the regexp object. 120 // In JSC, it returns undefined. For compatibility with JSC, we match their 121 // behavior. 122 function CompileRegExp(pattern, flags) { 123 // Both JSC and SpiderMonkey treat a missing pattern argument as the 124 // empty subject string, and an actual undefined value passed as the 125 // pattern as the string 'undefined'. Note that JSC is inconsistent 126 // here, treating undefined values differently in 127 // RegExp.prototype.compile and in the constructor, where they are 128 // the empty string. For compatibility with JSC, we match their 129 // behavior. 130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 131 DoConstructRegExp(this, 'undefined', flags, false); 132 } else { 133 DoConstructRegExp(this, pattern, flags, false); 134 } 135 } 136 137 138 function DoRegExpExec(regexp, string, index) { 139 return %_RegExpExec(regexp, string, index, lastMatchInfo); 140 } 141 142 143 function RegExpExec(string) { 144 if (!IS_REGEXP(this)) { 145 throw MakeTypeError('method_called_on_incompatible', 146 ['RegExp.prototype.exec', this]); 147 } 148 if (%_ArgumentsLength() == 0) { 149 var regExpInput = LAST_INPUT(lastMatchInfo); 150 if (IS_UNDEFINED(regExpInput)) { 151 throw MakeError('no_input_to_regexp', [this]); 152 } 153 string = regExpInput; 154 } 155 var s = ToString(string); 156 var length = s.length; 157 var lastIndex = this.lastIndex; 158 var i = this.global ? TO_INTEGER(lastIndex) : 0; 159 160 if (i < 0 || i > s.length) { 161 this.lastIndex = 0; 162 return null; 163 } 164 165 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 166 // matchIndices is either null or the lastMatchInfo array. 167 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); 168 169 if (matchIndices == null) { 170 if (this.global) this.lastIndex = 0; 171 return matchIndices; // no match 172 } 173 174 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 175 var result = new $Array(numResults); 176 for (var i = 0; i < numResults; i++) { 177 var matchStart = lastMatchInfo[CAPTURE(i << 1)]; 178 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; 179 if (matchStart != -1 && matchEnd != -1) { 180 result[i] = SubString(s, matchStart, matchEnd); 181 } else { 182 // Make sure the element is present. Avoid reading the undefined 183 // property from the global object since this may change. 184 result[i] = void 0; 185 } 186 } 187 188 if (this.global) 189 this.lastIndex = lastMatchInfo[CAPTURE1]; 190 result.index = lastMatchInfo[CAPTURE0]; 191 result.input = s; 192 return result; 193 } 194 195 196 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 197 // that test is defined in terms of String.prototype.exec. However, it probably 198 // means the original value of String.prototype.exec, which is what everybody 199 // else implements. 200 function RegExpTest(string) { 201 if (!IS_REGEXP(this)) { 202 throw MakeTypeError('method_called_on_incompatible', 203 ['RegExp.prototype.test', this]); 204 } 205 if (%_ArgumentsLength() == 0) { 206 var regExpInput = LAST_INPUT(lastMatchInfo); 207 if (IS_UNDEFINED(regExpInput)) { 208 throw MakeError('no_input_to_regexp', [this]); 209 } 210 string = regExpInput; 211 } 212 var s = ToString(string); 213 var length = s.length; 214 var lastIndex = this.lastIndex; 215 var i = this.global ? TO_INTEGER(lastIndex) : 0; 216 217 if (i < 0 || i > s.length) { 218 this.lastIndex = 0; 219 return false; 220 } 221 222 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); 223 // matchIndices is either null or the lastMatchInfo array. 224 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); 225 226 if (matchIndices == null) { 227 if (this.global) this.lastIndex = 0; 228 return false; 229 } 230 231 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; 232 return true; 233 } 234 235 236 function RegExpToString() { 237 // If this.source is an empty string, output /(?:)/. 238 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 239 // ecma_2/RegExp/properties-001.js. 240 var src = this.source ? this.source : '(?:)'; 241 var result = '/' + src + '/'; 242 if (this.global) 243 result += 'g'; 244 if (this.ignoreCase) 245 result += 'i'; 246 if (this.multiline) 247 result += 'm'; 248 return result; 249 } 250 251 252 // Getters for the static properties lastMatch, lastParen, leftContext, and 253 // rightContext of the RegExp constructor. The properties are computed based 254 // on the captures array of the last successful match and the subject string 255 // of the last successful match. 256 function RegExpGetLastMatch() { 257 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 258 return SubString(regExpSubject, 259 lastMatchInfo[CAPTURE0], 260 lastMatchInfo[CAPTURE1]); 261 } 262 263 264 function RegExpGetLastParen() { 265 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 266 if (length <= 2) return ''; // There were no captures. 267 // We match the SpiderMonkey behavior: return the substring defined by the 268 // last pair (after the first pair) of elements of the capture array even if 269 // it is empty. 270 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 271 var start = lastMatchInfo[CAPTURE(length - 2)]; 272 var end = lastMatchInfo[CAPTURE(length - 1)]; 273 if (start != -1 && end != -1) { 274 return SubString(regExpSubject, start, end); 275 } 276 return ""; 277 } 278 279 280 function RegExpGetLeftContext() { 281 return SubString(LAST_SUBJECT(lastMatchInfo), 282 0, 283 lastMatchInfo[CAPTURE0]); 284 } 285 286 287 function RegExpGetRightContext() { 288 var subject = LAST_SUBJECT(lastMatchInfo); 289 return SubString(subject, 290 lastMatchInfo[CAPTURE1], 291 subject.length); 292 } 293 294 295 // The properties $1..$9 are the first nine capturing substrings of the last 296 // successful match, or ''. The function RegExpMakeCaptureGetter will be 297 // called with indices from 1 to 9. 298 function RegExpMakeCaptureGetter(n) { 299 return function() { 300 var index = n * 2; 301 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 302 var matchStart = lastMatchInfo[CAPTURE(index)]; 303 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 304 if (matchStart == -1 || matchEnd == -1) return ''; 305 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 306 }; 307 } 308 309 310 // Property of the builtins object for recording the result of the last 311 // regexp match. The property lastMatchInfo includes the matchIndices 312 // array of the last successful regexp match (an array of start/end index 313 // pairs for the match and all the captured substrings), the invariant is 314 // that there are at least two capture indeces. The array also contains 315 // the subject string for the last successful match. 316 var lastMatchInfo = [ 317 2, // REGEXP_NUMBER_OF_CAPTURES 318 "", // Last subject. 319 void 0, // Last input - settable with RegExpSetInput. 320 0, // REGEXP_FIRST_CAPTURE + 0 321 0, // REGEXP_FIRST_CAPTURE + 1 322 ]; 323 324 // ------------------------------------------------------------------- 325 326 function SetupRegExp() { 327 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 328 %FunctionSetPrototype($RegExp, new $Object()); 329 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 330 %SetCode($RegExp, RegExpConstructor); 331 332 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 333 "exec", RegExpExec, 334 "test", RegExpTest, 335 "toString", RegExpToString, 336 "compile", CompileRegExp 337 )); 338 339 // The length of compile is 1 in SpiderMonkey. 340 %FunctionSetLength($RegExp.prototype.compile, 1); 341 342 // The properties input, $input, and $_ are aliases for each other. When this 343 // value is set the value it is set to is coerced to a string. 344 // Getter and setter for the input. 345 function RegExpGetInput() { 346 var regExpInput = LAST_INPUT(lastMatchInfo); 347 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 348 } 349 function RegExpSetInput(string) { 350 LAST_INPUT(lastMatchInfo) = ToString(string); 351 }; 352 353 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); 354 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); 355 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 356 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 357 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE); 358 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE); 359 360 // The properties multiline and $* are aliases for each other. When this 361 // value is set in SpiderMonkey, the value it is set to is coerced to a 362 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 363 // the value of the expression 'RegExp.multiline = null' (for instance) is the 364 // boolean false (ie, the value after coercion), while in V8 it is the value 365 // null (ie, the value before coercion). 366 367 // Getter and setter for multiline. 368 var multiline = false; 369 function RegExpGetMultiline() { return multiline; }; 370 function RegExpSetMultiline(flag) { multiline = flag ? true : false; }; 371 372 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE); 373 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE); 374 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DELETE); 375 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DELETE); 376 377 378 function NoOpSetter(ignored) {} 379 380 381 // Static properties set by a successful match. 382 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE); 383 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE); 384 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DELETE); 385 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 386 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE); 387 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE); 388 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DELETE); 389 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 390 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DELETE); 391 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE); 392 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_DELETE); 393 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 394 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_DELETE); 395 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE); 396 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT_DELETE); 397 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); 398 399 for (var i = 1; i < 10; ++i) { 400 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_DELETE); 401 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); 402 } 403 } 404 405 406 SetupRegExp(); 407