1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // This file relies on the fact that the following declaration has been made 29 // in runtime.js: 30 // var $Object = global.Object; 31 // var $Array = global.Array; 32 33 var $RegExp = global.RegExp; 34 35 // ------------------------------------------------------------------- 36 37 // A recursive descent parser for Patterns according to the grammar of 38 // ECMA-262 15.10.1, with deviations noted below. 39 function DoConstructRegExp(object, pattern, flags) { 40 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 41 if (IS_REGEXP(pattern)) { 42 if (!IS_UNDEFINED(flags)) { 43 throw MakeTypeError('regexp_flags', []); 44 } 45 flags = (pattern.global ? 'g' : '') 46 + (pattern.ignoreCase ? 'i' : '') 47 + (pattern.multiline ? 'm' : ''); 48 pattern = pattern.source; 49 } 50 51 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 52 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 53 54 var global = false; 55 var ignoreCase = false; 56 var multiline = false; 57 for (var i = 0; i < flags.length; i++) { 58 var c = %_CallFunction(flags, i, StringCharAt); 59 switch (c) { 60 case 'g': 61 if (global) { 62 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 63 } 64 global = true; 65 break; 66 case 'i': 67 if (ignoreCase) { 68 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 69 } 70 ignoreCase = true; 71 break; 72 case 'm': 73 if (multiline) { 74 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 75 } 76 multiline = true; 77 break; 78 default: 79 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 80 } 81 } 82 83 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline); 84 85 // Call internal function to compile the pattern. 86 %RegExpCompile(object, pattern, flags); 87 } 88 89 90 function RegExpConstructor(pattern, flags) { 91 if (%_IsConstructCall()) { 92 DoConstructRegExp(this, pattern, flags); 93 } else { 94 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 95 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 96 return pattern; 97 } 98 return new $RegExp(pattern, flags); 99 } 100 } 101 102 // Deprecated RegExp.prototype.compile method. We behave like the constructor 103 // were called again. In SpiderMonkey, this method returns the regexp object. 104 // In JSC, it returns undefined. For compatibility with JSC, we match their 105 // behavior. 106 function RegExpCompile(pattern, flags) { 107 // Both JSC and SpiderMonkey treat a missing pattern argument as the 108 // empty subject string, and an actual undefined value passed as the 109 // pattern as the string 'undefined'. Note that JSC is inconsistent 110 // here, treating undefined values differently in 111 // RegExp.prototype.compile and in the constructor, where they are 112 // the empty string. For compatibility with JSC, we match their 113 // behavior. 114 if (this == $RegExp.prototype) { 115 // We don't allow recompiling RegExp.prototype. 116 throw MakeTypeError('incompatible_method_receiver', 117 ['RegExp.prototype.compile', this]); 118 } 119 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 120 DoConstructRegExp(this, 'undefined', flags); 121 } else { 122 DoConstructRegExp(this, pattern, flags); 123 } 124 } 125 126 127 function DoRegExpExec(regexp, string, index) { 128 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 129 if (result !== null) lastMatchInfoOverride = null; 130 return result; 131 } 132 133 134 function BuildResultFromMatchInfo(lastMatchInfo, s) { 135 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 136 var start = lastMatchInfo[CAPTURE0]; 137 var end = lastMatchInfo[CAPTURE1]; 138 var result = %_RegExpConstructResult(numResults, start, s); 139 result[0] = %_SubString(s, start, end); 140 var j = REGEXP_FIRST_CAPTURE + 2; 141 for (var i = 1; i < numResults; i++) { 142 start = lastMatchInfo[j++]; 143 if (start != -1) { 144 end = lastMatchInfo[j]; 145 result[i] = %_SubString(s, start, end); 146 } 147 j++; 148 } 149 return result; 150 } 151 152 153 function RegExpExecNoTests(regexp, string, start) { 154 // Must be called with RegExp, string and positive integer as arguments. 155 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 156 if (matchInfo !== null) { 157 lastMatchInfoOverride = null; 158 return BuildResultFromMatchInfo(matchInfo, string); 159 } 160 regexp.lastIndex = 0; 161 return null; 162 } 163 164 165 function RegExpExec(string) { 166 if (!IS_REGEXP(this)) { 167 throw MakeTypeError('incompatible_method_receiver', 168 ['RegExp.prototype.exec', this]); 169 } 170 171 string = TO_STRING_INLINE(string); 172 var lastIndex = this.lastIndex; 173 174 // Conversion is required by the ES5 specification (RegExp.prototype.exec 175 // algorithm, step 5) even if the value is discarded for non-global RegExps. 176 var i = TO_INTEGER(lastIndex); 177 178 var global = this.global; 179 if (global) { 180 if (i < 0 || i > string.length) { 181 this.lastIndex = 0; 182 return null; 183 } 184 } else { 185 i = 0; 186 } 187 188 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 189 // matchIndices is either null or the lastMatchInfo array. 190 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 191 192 if (IS_NULL(matchIndices)) { 193 this.lastIndex = 0; 194 return null; 195 } 196 197 // Successful match. 198 lastMatchInfoOverride = null; 199 if (global) { 200 this.lastIndex = lastMatchInfo[CAPTURE1]; 201 } 202 return BuildResultFromMatchInfo(matchIndices, string); 203 } 204 205 206 // One-element cache for the simplified test regexp. 207 var regexp_key; 208 var regexp_val; 209 210 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 211 // that test is defined in terms of String.prototype.exec. However, it probably 212 // means the original value of String.prototype.exec, which is what everybody 213 // else implements. 214 function RegExpTest(string) { 215 if (!IS_REGEXP(this)) { 216 throw MakeTypeError('incompatible_method_receiver', 217 ['RegExp.prototype.test', this]); 218 } 219 string = TO_STRING_INLINE(string); 220 221 var lastIndex = this.lastIndex; 222 223 // Conversion is required by the ES5 specification (RegExp.prototype.exec 224 // algorithm, step 5) even if the value is discarded for non-global RegExps. 225 var i = TO_INTEGER(lastIndex); 226 227 if (this.global) { 228 if (i < 0 || i > string.length) { 229 this.lastIndex = 0; 230 return false; 231 } 232 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 233 // matchIndices is either null or the lastMatchInfo array. 234 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 235 if (IS_NULL(matchIndices)) { 236 this.lastIndex = 0; 237 return false; 238 } 239 lastMatchInfoOverride = null; 240 this.lastIndex = lastMatchInfo[CAPTURE1]; 241 return true; 242 } else { 243 // Non-global regexp. 244 // Remove irrelevant preceeding '.*' in a non-global test regexp. 245 // The expression checks whether this.source starts with '.*' and 246 // that the third char is not a '?'. 247 var regexp = this; 248 if (%_StringCharCodeAt(regexp.source, 0) == 46 && // '.' 249 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' 250 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' 251 regexp = TrimRegExp(regexp); 252 } 253 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]); 254 // matchIndices is either null or the lastMatchInfo array. 255 var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo); 256 if (IS_NULL(matchIndices)) { 257 this.lastIndex = 0; 258 return false; 259 } 260 lastMatchInfoOverride = null; 261 return true; 262 } 263 } 264 265 function TrimRegExp(regexp) { 266 if (!%_ObjectEquals(regexp_key, regexp)) { 267 regexp_key = regexp; 268 regexp_val = 269 new $RegExp(%_SubString(regexp.source, 2, regexp.source.length), 270 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" 271 : regexp.multiline ? "m" : "")); 272 } 273 return regexp_val; 274 } 275 276 277 function RegExpToString() { 278 if (!IS_REGEXP(this)) { 279 throw MakeTypeError('incompatible_method_receiver', 280 ['RegExp.prototype.toString', this]); 281 } 282 var result = '/' + this.source + '/'; 283 if (this.global) result += 'g'; 284 if (this.ignoreCase) result += 'i'; 285 if (this.multiline) result += 'm'; 286 return result; 287 } 288 289 290 // Getters for the static properties lastMatch, lastParen, leftContext, and 291 // rightContext of the RegExp constructor. The properties are computed based 292 // on the captures array of the last successful match and the subject string 293 // of the last successful match. 294 function RegExpGetLastMatch() { 295 if (lastMatchInfoOverride !== null) { 296 return OVERRIDE_MATCH(lastMatchInfoOverride); 297 } 298 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 299 return %_SubString(regExpSubject, 300 lastMatchInfo[CAPTURE0], 301 lastMatchInfo[CAPTURE1]); 302 } 303 304 305 function RegExpGetLastParen() { 306 if (lastMatchInfoOverride) { 307 var override = lastMatchInfoOverride; 308 if (override.length <= 3) return ''; 309 return override[override.length - 3]; 310 } 311 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 312 if (length <= 2) return ''; // There were no captures. 313 // We match the SpiderMonkey behavior: return the substring defined by the 314 // last pair (after the first pair) of elements of the capture array even if 315 // it is empty. 316 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 317 var start = lastMatchInfo[CAPTURE(length - 2)]; 318 var end = lastMatchInfo[CAPTURE(length - 1)]; 319 if (start != -1 && end != -1) { 320 return %_SubString(regExpSubject, start, end); 321 } 322 return ""; 323 } 324 325 326 function RegExpGetLeftContext() { 327 var start_index; 328 var subject; 329 if (!lastMatchInfoOverride) { 330 start_index = lastMatchInfo[CAPTURE0]; 331 subject = LAST_SUBJECT(lastMatchInfo); 332 } else { 333 var override = lastMatchInfoOverride; 334 start_index = OVERRIDE_POS(override); 335 subject = OVERRIDE_SUBJECT(override); 336 } 337 return %_SubString(subject, 0, start_index); 338 } 339 340 341 function RegExpGetRightContext() { 342 var start_index; 343 var subject; 344 if (!lastMatchInfoOverride) { 345 start_index = lastMatchInfo[CAPTURE1]; 346 subject = LAST_SUBJECT(lastMatchInfo); 347 } else { 348 var override = lastMatchInfoOverride; 349 subject = OVERRIDE_SUBJECT(override); 350 var match = OVERRIDE_MATCH(override); 351 start_index = OVERRIDE_POS(override) + match.length; 352 } 353 return %_SubString(subject, start_index, subject.length); 354 } 355 356 357 // The properties $1..$9 are the first nine capturing substrings of the last 358 // successful match, or ''. The function RegExpMakeCaptureGetter will be 359 // called with indices from 1 to 9. 360 function RegExpMakeCaptureGetter(n) { 361 return function() { 362 if (lastMatchInfoOverride) { 363 if (n < lastMatchInfoOverride.length - 2) { 364 return OVERRIDE_CAPTURE(lastMatchInfoOverride, n); 365 } 366 return ''; 367 } 368 var index = n * 2; 369 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 370 var matchStart = lastMatchInfo[CAPTURE(index)]; 371 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 372 if (matchStart == -1 || matchEnd == -1) return ''; 373 return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 374 }; 375 } 376 377 378 // Property of the builtins object for recording the result of the last 379 // regexp match. The property lastMatchInfo includes the matchIndices 380 // array of the last successful regexp match (an array of start/end index 381 // pairs for the match and all the captured substrings), the invariant is 382 // that there are at least two capture indeces. The array also contains 383 // the subject string for the last successful match. 384 var lastMatchInfo = new InternalPackedArray( 385 2, // REGEXP_NUMBER_OF_CAPTURES 386 "", // Last subject. 387 UNDEFINED, // Last input - settable with RegExpSetInput. 388 0, // REGEXP_FIRST_CAPTURE + 0 389 0 // REGEXP_FIRST_CAPTURE + 1 390 ); 391 392 // Override last match info with an array of actual substrings. 393 // Used internally by replace regexp with function. 394 // The array has the format of an "apply" argument for a replacement 395 // function. 396 var lastMatchInfoOverride = null; 397 398 // ------------------------------------------------------------------- 399 400 function SetUpRegExp() { 401 %CheckIsBootstrapping(); 402 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 403 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 404 %SetCode($RegExp, RegExpConstructor); 405 406 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 407 "exec", RegExpExec, 408 "test", RegExpTest, 409 "toString", RegExpToString, 410 "compile", RegExpCompile 411 )); 412 413 // The length of compile is 1 in SpiderMonkey. 414 %FunctionSetLength($RegExp.prototype.compile, 1); 415 416 // The properties input, $input, and $_ are aliases for each other. When this 417 // value is set the value it is set to is coerced to a string. 418 // Getter and setter for the input. 419 var RegExpGetInput = function() { 420 var regExpInput = LAST_INPUT(lastMatchInfo); 421 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 422 }; 423 var RegExpSetInput = function(string) { 424 LAST_INPUT(lastMatchInfo) = ToString(string); 425 }; 426 427 %OptimizeObjectForAddingMultipleProperties($RegExp, 22); 428 %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput, 429 RegExpSetInput, DONT_DELETE); 430 %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput, 431 RegExpSetInput, DONT_ENUM | DONT_DELETE); 432 %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput, 433 RegExpSetInput, DONT_ENUM | DONT_DELETE); 434 435 // The properties multiline and $* are aliases for each other. When this 436 // value is set in SpiderMonkey, the value it is set to is coerced to a 437 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 438 // the value of the expression 'RegExp.multiline = null' (for instance) is the 439 // boolean false (i.e., the value after coercion), while in V8 it is the value 440 // null (i.e., the value before coercion). 441 442 // Getter and setter for multiline. 443 var multiline = false; 444 var RegExpGetMultiline = function() { return multiline; }; 445 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; 446 447 %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline, 448 RegExpSetMultiline, DONT_DELETE); 449 %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline, 450 RegExpSetMultiline, 451 DONT_ENUM | DONT_DELETE); 452 453 454 var NoOpSetter = function(ignored) {}; 455 456 457 // Static properties set by a successful match. 458 %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch, 459 NoOpSetter, DONT_DELETE); 460 %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch, 461 NoOpSetter, DONT_ENUM | DONT_DELETE); 462 %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen, 463 NoOpSetter, DONT_DELETE); 464 %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen, 465 NoOpSetter, DONT_ENUM | DONT_DELETE); 466 %DefineOrRedefineAccessorProperty($RegExp, 'leftContext', 467 RegExpGetLeftContext, NoOpSetter, 468 DONT_DELETE); 469 %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext, 470 NoOpSetter, DONT_ENUM | DONT_DELETE); 471 %DefineOrRedefineAccessorProperty($RegExp, 'rightContext', 472 RegExpGetRightContext, NoOpSetter, 473 DONT_DELETE); 474 %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext, 475 NoOpSetter, DONT_ENUM | DONT_DELETE); 476 477 for (var i = 1; i < 10; ++i) { 478 %DefineOrRedefineAccessorProperty($RegExp, '$' + i, 479 RegExpMakeCaptureGetter(i), NoOpSetter, 480 DONT_DELETE); 481 } 482 %ToFastProperties($RegExp); 483 } 484 485 SetUpRegExp(); 486