1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file relies on the fact that the following declaration has been made 6 // in runtime.js: 7 // var $Object = global.Object; 8 // var $Array = global.Array; 9 10 var $RegExp = global.RegExp; 11 12 // ------------------------------------------------------------------- 13 14 // A recursive descent parser for Patterns according to the grammar of 15 // ECMA-262 15.10.1, with deviations noted below. 16 function DoConstructRegExp(object, pattern, flags) { 17 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 18 if (IS_REGEXP(pattern)) { 19 if (!IS_UNDEFINED(flags)) { 20 throw MakeTypeError('regexp_flags', []); 21 } 22 flags = (pattern.global ? 'g' : '') 23 + (pattern.ignoreCase ? 'i' : '') 24 + (pattern.multiline ? 'm' : ''); 25 pattern = pattern.source; 26 } 27 28 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 29 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 30 31 var global = false; 32 var ignoreCase = false; 33 var multiline = false; 34 for (var i = 0; i < flags.length; i++) { 35 var c = %_CallFunction(flags, i, StringCharAt); 36 switch (c) { 37 case 'g': 38 if (global) { 39 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 40 } 41 global = true; 42 break; 43 case 'i': 44 if (ignoreCase) { 45 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 46 } 47 ignoreCase = true; 48 break; 49 case 'm': 50 if (multiline) { 51 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 52 } 53 multiline = true; 54 break; 55 default: 56 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 57 } 58 } 59 60 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline); 61 62 // Call internal function to compile the pattern. 63 %RegExpCompile(object, pattern, flags); 64 } 65 66 67 function RegExpConstructor(pattern, flags) { 68 if (%_IsConstructCall()) { 69 DoConstructRegExp(this, pattern, flags); 70 } else { 71 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 72 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 73 return pattern; 74 } 75 return new $RegExp(pattern, flags); 76 } 77 } 78 79 // Deprecated RegExp.prototype.compile method. We behave like the constructor 80 // were called again. In SpiderMonkey, this method returns the regexp object. 81 // In JSC, it returns undefined. For compatibility with JSC, we match their 82 // behavior. 83 function RegExpCompileJS(pattern, flags) { 84 // Both JSC and SpiderMonkey treat a missing pattern argument as the 85 // empty subject string, and an actual undefined value passed as the 86 // pattern as the string 'undefined'. Note that JSC is inconsistent 87 // here, treating undefined values differently in 88 // RegExp.prototype.compile and in the constructor, where they are 89 // the empty string. For compatibility with JSC, we match their 90 // behavior. 91 if (this == $RegExp.prototype) { 92 // We don't allow recompiling RegExp.prototype. 93 throw MakeTypeError('incompatible_method_receiver', 94 ['RegExp.prototype.compile', this]); 95 } 96 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 97 DoConstructRegExp(this, 'undefined', flags); 98 } else { 99 DoConstructRegExp(this, pattern, flags); 100 } 101 } 102 103 104 function DoRegExpExec(regexp, string, index) { 105 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 106 if (result !== null) lastMatchInfoOverride = null; 107 return result; 108 } 109 110 111 // This is kind of performance sensitive, so we want to avoid unnecessary 112 // type checks on inputs. But we also don't want to inline it several times 113 // manually, so we use a macro :-) 114 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 115 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 116 var start = MATCHINFO[CAPTURE0]; 117 var end = MATCHINFO[CAPTURE1]; 118 // Calculate the substring of the first match before creating the result array 119 // to avoid an unnecessary write barrier storing the first result. 120 var first = %_SubString(STRING, start, end); 121 var result = %_RegExpConstructResult(numResults, start, STRING); 122 result[0] = first; 123 if (numResults == 1) return result; 124 var j = REGEXP_FIRST_CAPTURE + 2; 125 for (var i = 1; i < numResults; i++) { 126 start = MATCHINFO[j++]; 127 if (start != -1) { 128 end = MATCHINFO[j]; 129 result[i] = %_SubString(STRING, start, end); 130 } 131 j++; 132 } 133 return result; 134 endmacro 135 136 137 function RegExpExecNoTests(regexp, string, start) { 138 // Must be called with RegExp, string and positive integer as arguments. 139 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 140 if (matchInfo !== null) { 141 lastMatchInfoOverride = null; 142 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); 143 } 144 regexp.lastIndex = 0; 145 return null; 146 } 147 148 149 function RegExpExec(string) { 150 if (!IS_REGEXP(this)) { 151 throw MakeTypeError('incompatible_method_receiver', 152 ['RegExp.prototype.exec', this]); 153 } 154 155 string = TO_STRING_INLINE(string); 156 var lastIndex = this.lastIndex; 157 158 // Conversion is required by the ES5 specification (RegExp.prototype.exec 159 // algorithm, step 5) even if the value is discarded for non-global RegExps. 160 var i = TO_INTEGER(lastIndex); 161 162 var global = this.global; 163 if (global) { 164 if (i < 0 || i > string.length) { 165 this.lastIndex = 0; 166 return null; 167 } 168 } else { 169 i = 0; 170 } 171 172 // matchIndices is either null or the lastMatchInfo array. 173 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 174 175 if (IS_NULL(matchIndices)) { 176 this.lastIndex = 0; 177 return null; 178 } 179 180 // Successful match. 181 lastMatchInfoOverride = null; 182 if (global) { 183 this.lastIndex = lastMatchInfo[CAPTURE1]; 184 } 185 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 186 } 187 188 189 // One-element cache for the simplified test regexp. 190 var regexp_key; 191 var regexp_val; 192 193 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 194 // that test is defined in terms of String.prototype.exec. However, it probably 195 // means the original value of String.prototype.exec, which is what everybody 196 // else implements. 197 function RegExpTest(string) { 198 if (!IS_REGEXP(this)) { 199 throw MakeTypeError('incompatible_method_receiver', 200 ['RegExp.prototype.test', this]); 201 } 202 string = TO_STRING_INLINE(string); 203 204 var lastIndex = this.lastIndex; 205 206 // Conversion is required by the ES5 specification (RegExp.prototype.exec 207 // algorithm, step 5) even if the value is discarded for non-global RegExps. 208 var i = TO_INTEGER(lastIndex); 209 210 if (this.global) { 211 if (i < 0 || i > string.length) { 212 this.lastIndex = 0; 213 return false; 214 } 215 // matchIndices is either null or the lastMatchInfo array. 216 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 217 if (IS_NULL(matchIndices)) { 218 this.lastIndex = 0; 219 return false; 220 } 221 lastMatchInfoOverride = null; 222 this.lastIndex = lastMatchInfo[CAPTURE1]; 223 return true; 224 } else { 225 // Non-global regexp. 226 // Remove irrelevant preceeding '.*' in a non-global test regexp. 227 // The expression checks whether this.source starts with '.*' and 228 // that the third char is not a '?'. 229 var regexp = this; 230 if (%_StringCharCodeAt(regexp.source, 0) == 46 && // '.' 231 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' 232 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' 233 regexp = TrimRegExp(regexp); 234 } 235 // matchIndices is either null or the lastMatchInfo array. 236 var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo); 237 if (IS_NULL(matchIndices)) { 238 this.lastIndex = 0; 239 return false; 240 } 241 lastMatchInfoOverride = null; 242 return true; 243 } 244 } 245 246 function TrimRegExp(regexp) { 247 if (!%_ObjectEquals(regexp_key, regexp)) { 248 regexp_key = regexp; 249 regexp_val = 250 new $RegExp(%_SubString(regexp.source, 2, regexp.source.length), 251 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" 252 : regexp.multiline ? "m" : "")); 253 } 254 return regexp_val; 255 } 256 257 258 function RegExpToString() { 259 if (!IS_REGEXP(this)) { 260 throw MakeTypeError('incompatible_method_receiver', 261 ['RegExp.prototype.toString', this]); 262 } 263 var result = '/' + this.source + '/'; 264 if (this.global) result += 'g'; 265 if (this.ignoreCase) result += 'i'; 266 if (this.multiline) result += 'm'; 267 return result; 268 } 269 270 271 // Getters for the static properties lastMatch, lastParen, leftContext, and 272 // rightContext of the RegExp constructor. The properties are computed based 273 // on the captures array of the last successful match and the subject string 274 // of the last successful match. 275 function RegExpGetLastMatch() { 276 if (lastMatchInfoOverride !== null) { 277 return OVERRIDE_MATCH(lastMatchInfoOverride); 278 } 279 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 280 return %_SubString(regExpSubject, 281 lastMatchInfo[CAPTURE0], 282 lastMatchInfo[CAPTURE1]); 283 } 284 285 286 function RegExpGetLastParen() { 287 if (lastMatchInfoOverride) { 288 var override = lastMatchInfoOverride; 289 if (override.length <= 3) return ''; 290 return override[override.length - 3]; 291 } 292 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 293 if (length <= 2) return ''; // There were no captures. 294 // We match the SpiderMonkey behavior: return the substring defined by the 295 // last pair (after the first pair) of elements of the capture array even if 296 // it is empty. 297 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 298 var start = lastMatchInfo[CAPTURE(length - 2)]; 299 var end = lastMatchInfo[CAPTURE(length - 1)]; 300 if (start != -1 && end != -1) { 301 return %_SubString(regExpSubject, start, end); 302 } 303 return ""; 304 } 305 306 307 function RegExpGetLeftContext() { 308 var start_index; 309 var subject; 310 if (!lastMatchInfoOverride) { 311 start_index = lastMatchInfo[CAPTURE0]; 312 subject = LAST_SUBJECT(lastMatchInfo); 313 } else { 314 var override = lastMatchInfoOverride; 315 start_index = OVERRIDE_POS(override); 316 subject = OVERRIDE_SUBJECT(override); 317 } 318 return %_SubString(subject, 0, start_index); 319 } 320 321 322 function RegExpGetRightContext() { 323 var start_index; 324 var subject; 325 if (!lastMatchInfoOverride) { 326 start_index = lastMatchInfo[CAPTURE1]; 327 subject = LAST_SUBJECT(lastMatchInfo); 328 } else { 329 var override = lastMatchInfoOverride; 330 subject = OVERRIDE_SUBJECT(override); 331 var match = OVERRIDE_MATCH(override); 332 start_index = OVERRIDE_POS(override) + match.length; 333 } 334 return %_SubString(subject, start_index, subject.length); 335 } 336 337 338 // The properties $1..$9 are the first nine capturing substrings of the last 339 // successful match, or ''. The function RegExpMakeCaptureGetter will be 340 // called with indices from 1 to 9. 341 function RegExpMakeCaptureGetter(n) { 342 return function() { 343 if (lastMatchInfoOverride) { 344 if (n < lastMatchInfoOverride.length - 2) { 345 return OVERRIDE_CAPTURE(lastMatchInfoOverride, n); 346 } 347 return ''; 348 } 349 var index = n * 2; 350 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 351 var matchStart = lastMatchInfo[CAPTURE(index)]; 352 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 353 if (matchStart == -1 || matchEnd == -1) return ''; 354 return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 355 }; 356 } 357 358 359 // Property of the builtins object for recording the result of the last 360 // regexp match. The property lastMatchInfo includes the matchIndices 361 // array of the last successful regexp match (an array of start/end index 362 // pairs for the match and all the captured substrings), the invariant is 363 // that there are at least two capture indeces. The array also contains 364 // the subject string for the last successful match. 365 var lastMatchInfo = new InternalPackedArray( 366 2, // REGEXP_NUMBER_OF_CAPTURES 367 "", // Last subject. 368 UNDEFINED, // Last input - settable with RegExpSetInput. 369 0, // REGEXP_FIRST_CAPTURE + 0 370 0 // REGEXP_FIRST_CAPTURE + 1 371 ); 372 373 // Override last match info with an array of actual substrings. 374 // Used internally by replace regexp with function. 375 // The array has the format of an "apply" argument for a replacement 376 // function. 377 var lastMatchInfoOverride = null; 378 379 // ------------------------------------------------------------------- 380 381 function SetUpRegExp() { 382 %CheckIsBootstrapping(); 383 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 384 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 385 %SetCode($RegExp, RegExpConstructor); 386 387 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 388 "exec", RegExpExec, 389 "test", RegExpTest, 390 "toString", RegExpToString, 391 "compile", RegExpCompileJS 392 )); 393 394 // The length of compile is 1 in SpiderMonkey. 395 %FunctionSetLength($RegExp.prototype.compile, 1); 396 397 // The properties input, $input, and $_ are aliases for each other. When this 398 // value is set the value it is set to is coerced to a string. 399 // Getter and setter for the input. 400 var RegExpGetInput = function() { 401 var regExpInput = LAST_INPUT(lastMatchInfo); 402 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 403 }; 404 var RegExpSetInput = function(string) { 405 LAST_INPUT(lastMatchInfo) = ToString(string); 406 }; 407 408 %OptimizeObjectForAddingMultipleProperties($RegExp, 22); 409 %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput, 410 RegExpSetInput, DONT_DELETE); 411 %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput, 412 RegExpSetInput, DONT_ENUM | DONT_DELETE); 413 %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput, 414 RegExpSetInput, DONT_ENUM | DONT_DELETE); 415 416 // The properties multiline and $* are aliases for each other. When this 417 // value is set in SpiderMonkey, the value it is set to is coerced to a 418 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 419 // the value of the expression 'RegExp.multiline = null' (for instance) is the 420 // boolean false (i.e., the value after coercion), while in V8 it is the value 421 // null (i.e., the value before coercion). 422 423 // Getter and setter for multiline. 424 var multiline = false; 425 var RegExpGetMultiline = function() { return multiline; }; 426 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; 427 428 %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline, 429 RegExpSetMultiline, DONT_DELETE); 430 %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline, 431 RegExpSetMultiline, 432 DONT_ENUM | DONT_DELETE); 433 434 435 var NoOpSetter = function(ignored) {}; 436 437 438 // Static properties set by a successful match. 439 %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch, 440 NoOpSetter, DONT_DELETE); 441 %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch, 442 NoOpSetter, DONT_ENUM | DONT_DELETE); 443 %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen, 444 NoOpSetter, DONT_DELETE); 445 %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen, 446 NoOpSetter, DONT_ENUM | DONT_DELETE); 447 %DefineOrRedefineAccessorProperty($RegExp, 'leftContext', 448 RegExpGetLeftContext, NoOpSetter, 449 DONT_DELETE); 450 %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext, 451 NoOpSetter, DONT_ENUM | DONT_DELETE); 452 %DefineOrRedefineAccessorProperty($RegExp, 'rightContext', 453 RegExpGetRightContext, NoOpSetter, 454 DONT_DELETE); 455 %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext, 456 NoOpSetter, DONT_ENUM | DONT_DELETE); 457 458 for (var i = 1; i < 10; ++i) { 459 %DefineOrRedefineAccessorProperty($RegExp, '$' + i, 460 RegExpMakeCaptureGetter(i), NoOpSetter, 461 DONT_DELETE); 462 } 463 %ToFastProperties($RegExp); 464 } 465 466 SetUpRegExp(); 467