1 // Copyright 2008 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 function testEscape(str, regex) { 29 assertEquals("foo:bar:baz", str.split(regex).join(":")); 30 } 31 32 testEscape("foo\nbar\nbaz", /\n/); 33 testEscape("foo bar baz", /\s/); 34 testEscape("foo\tbar\tbaz", /\s/); 35 testEscape("foo-bar-baz", /\u002D/); 36 37 // Test containing null char in regexp. 38 var s = '[' + String.fromCharCode(0) + ']'; 39 var re = new RegExp(s); 40 assertEquals(s.match(re).length, 1); 41 assertEquals(s.match(re)[0], String.fromCharCode(0)); 42 43 // Test strings containing all line separators 44 s = 'aA\nbB\rcC\r\ndD\u2028eE\u2029fF'; 45 re = /^./gm; // any non-newline character at the beginning of a line 46 var result = s.match(re); 47 assertEquals(result.length, 6); 48 assertEquals(result[0], 'a'); 49 assertEquals(result[1], 'b'); 50 assertEquals(result[2], 'c'); 51 assertEquals(result[3], 'd'); 52 assertEquals(result[4], 'e'); 53 assertEquals(result[5], 'f'); 54 55 re = /.$/gm; // any non-newline character at the end of a line 56 result = s.match(re); 57 assertEquals(result.length, 6); 58 assertEquals(result[0], 'A'); 59 assertEquals(result[1], 'B'); 60 assertEquals(result[2], 'C'); 61 assertEquals(result[3], 'D'); 62 assertEquals(result[4], 'E'); 63 assertEquals(result[5], 'F'); 64 65 re = /^[^]/gm; // *any* character at the beginning of a line 66 result = s.match(re); 67 assertEquals(result.length, 7); 68 assertEquals(result[0], 'a'); 69 assertEquals(result[1], 'b'); 70 assertEquals(result[2], 'c'); 71 assertEquals(result[3], '\n'); 72 assertEquals(result[4], 'd'); 73 assertEquals(result[5], 'e'); 74 assertEquals(result[6], 'f'); 75 76 re = /[^]$/gm; // *any* character at the end of a line 77 result = s.match(re); 78 assertEquals(result.length, 7); 79 assertEquals(result[0], 'A'); 80 assertEquals(result[1], 'B'); 81 assertEquals(result[2], 'C'); 82 assertEquals(result[3], '\r'); 83 assertEquals(result[4], 'D'); 84 assertEquals(result[5], 'E'); 85 assertEquals(result[6], 'F'); 86 87 // Some tests from the Mozilla tests, where our behavior differs from 88 // SpiderMonkey. 89 // From ecma_3/RegExp/regress-334158.js 90 assertTrue(/\ca/.test( "\x01" )); 91 assertFalse(/\ca/.test( "\\ca" )); 92 // Passes in KJS, fails in IrregularExpressions. 93 // See http://code.google.com/p/v8/issues/detail?id=152 94 //assertTrue(/\c[a/]/.test( "\x1ba/]" )); 95 96 97 // Test \c in character class 98 re = /^[\cM]$/; 99 assertTrue(re.test("\r")); 100 assertFalse(re.test("M")); 101 assertFalse(re.test("c")); 102 assertFalse(re.test("\\")); 103 assertFalse(re.test("\x03")); // I.e., read as \cc 104 105 re = /^[\c]]$/; 106 assertTrue(re.test("c]")); 107 assertFalse(re.test("\\]")); 108 assertFalse(re.test("\x1d")); // ']' & 0x1f 109 assertFalse(re.test("\\]")); 110 assertFalse(re.test("\x03]")); // I.e., read as \cc 111 112 113 // Test that we handle \s and \S correctly inside some bizarre 114 // character classes. 115 re = /[\s-:]/; 116 assertTrue(re.test('-')); 117 assertTrue(re.test(':')); 118 assertTrue(re.test(' ')); 119 assertTrue(re.test('\t')); 120 assertTrue(re.test('\n')); 121 assertFalse(re.test('a')); 122 assertFalse(re.test('Z')); 123 124 re = /[\S-:]/; 125 assertTrue(re.test('-')); 126 assertTrue(re.test(':')); 127 assertFalse(re.test(' ')); 128 assertFalse(re.test('\t')); 129 assertFalse(re.test('\n')); 130 assertTrue(re.test('a')); 131 assertTrue(re.test('Z')); 132 133 re = /[^\s-:]/; 134 assertFalse(re.test('-')); 135 assertFalse(re.test(':')); 136 assertFalse(re.test(' ')); 137 assertFalse(re.test('\t')); 138 assertFalse(re.test('\n')); 139 assertTrue(re.test('a')); 140 assertTrue(re.test('Z')); 141 142 re = /[^\S-:]/; 143 assertFalse(re.test('-')); 144 assertFalse(re.test(':')); 145 assertTrue(re.test(' ')); 146 assertTrue(re.test('\t')); 147 assertTrue(re.test('\n')); 148 assertFalse(re.test('a')); 149 assertFalse(re.test('Z')); 150 151 re = /[\s]/; 152 assertFalse(re.test('-')); 153 assertFalse(re.test(':')); 154 assertTrue(re.test(' ')); 155 assertTrue(re.test('\t')); 156 assertTrue(re.test('\n')); 157 assertFalse(re.test('a')); 158 assertFalse(re.test('Z')); 159 160 re = /[^\s]/; 161 assertTrue(re.test('-')); 162 assertTrue(re.test(':')); 163 assertFalse(re.test(' ')); 164 assertFalse(re.test('\t')); 165 assertFalse(re.test('\n')); 166 assertTrue(re.test('a')); 167 assertTrue(re.test('Z')); 168 169 re = /[\S]/; 170 assertTrue(re.test('-')); 171 assertTrue(re.test(':')); 172 assertFalse(re.test(' ')); 173 assertFalse(re.test('\t')); 174 assertFalse(re.test('\n')); 175 assertTrue(re.test('a')); 176 assertTrue(re.test('Z')); 177 178 re = /[^\S]/; 179 assertFalse(re.test('-')); 180 assertFalse(re.test(':')); 181 assertTrue(re.test(' ')); 182 assertTrue(re.test('\t')); 183 assertTrue(re.test('\n')); 184 assertFalse(re.test('a')); 185 assertFalse(re.test('Z')); 186 187 re = /[\s\S]/; 188 assertTrue(re.test('-')); 189 assertTrue(re.test(':')); 190 assertTrue(re.test(' ')); 191 assertTrue(re.test('\t')); 192 assertTrue(re.test('\n')); 193 assertTrue(re.test('a')); 194 assertTrue(re.test('Z')); 195 196 re = /[^\s\S]/; 197 assertFalse(re.test('-')); 198 assertFalse(re.test(':')); 199 assertFalse(re.test(' ')); 200 assertFalse(re.test('\t')); 201 assertFalse(re.test('\n')); 202 assertFalse(re.test('a')); 203 assertFalse(re.test('Z')); 204 205 // Test beginning and end of line assertions with or without the 206 // multiline flag. 207 re = /^\d+/; 208 assertFalse(re.test("asdf\n123")); 209 re = /^\d+/m; 210 assertTrue(re.test("asdf\n123")); 211 212 re = /\d+$/; 213 assertFalse(re.test("123\nasdf")); 214 re = /\d+$/m; 215 assertTrue(re.test("123\nasdf")); 216 217 // Test that empty matches are handled correctly for multiline global 218 // regexps. 219 re = /^(.*)/mg; 220 assertEquals(3, "a\n\rb".match(re).length); 221 assertEquals("*a\n*b\r*c\n*\r*d\r*\n*e", "a\nb\rc\n\rd\r\ne".replace(re, "*$1")); 222 223 // Test that empty matches advance one character 224 re = new RegExp("", "g"); 225 assertEquals("xAx", "A".replace(re, "x")); 226 assertEquals(3, String.fromCharCode(161).replace(re, "x").length); 227 228 // Test that we match the KJS behavior with regard to undefined constructor 229 // arguments: 230 re = new RegExp(); 231 // KJS actually shows this as '//'. Here we match the Firefox behavior (ie, 232 // giving a syntactically legal regexp literal). 233 assertEquals('/(?:)/', re.toString()); 234 re = new RegExp(void 0); 235 assertEquals('/(?:)/', re.toString()); 236 re.compile(); 237 assertEquals('/(?:)/', re.toString()); 238 re.compile(void 0); 239 assertEquals('/undefined/', re.toString()); 240 241 242 // Check for lazy RegExp literal creation 243 function lazyLiteral(doit) { 244 if (doit) return "".replace(/foo(/gi, ""); 245 return true; 246 } 247 248 assertTrue(lazyLiteral(false)); 249 assertThrows("lazyLiteral(true)"); 250 251 // Check $01 and $10 252 re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)"); 253 assertEquals("t", "123456789t".replace(re, "$10"), "$10"); 254 assertEquals("15", "123456789t".replace(re, "$15"), "$10"); 255 assertEquals("1", "123456789t".replace(re, "$01"), "$01"); 256 assertEquals("$001", "123456789t".replace(re, "$001"), "$001"); 257 re = new RegExp("foo(.)"); 258 assertEquals("bar$0", "foox".replace(re, "bar$0"), "$0"); 259 assertEquals("bar$00", "foox".replace(re, "bar$00"), "$00"); 260 assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000"); 261 assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2"); 262 assertEquals("barx5", "foox".replace(re, "bar$15"), "$15"); 263 264 assertFalse(/()foo$\1/.test("football"), "football1"); 265 assertFalse(/foo$(?=ball)/.test("football"), "football2"); 266 assertFalse(/foo$(?!bar)/.test("football"), "football3"); 267 assertTrue(/()foo$\1/.test("foo"), "football4"); 268 assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5"); 269 assertTrue(/()foo$(?!bar)/.test("foo"), "football6"); 270 assertFalse(/(x?)foo$\1/.test("football"), "football7"); 271 assertFalse(/foo$(?=ball)/.test("football"), "football8"); 272 assertFalse(/foo$(?!bar)/.test("football"), "football9"); 273 assertTrue(/(x?)foo$\1/.test("foo"), "football10"); 274 assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11"); 275 assertTrue(/foo$(?!bar)/.test("foo"), "football12"); 276 277 // Check that the back reference has two successors. See 278 // BackReferenceNode::PropagateForward. 279 assertFalse(/f(o)\b\1/.test('foo')); 280 assertTrue(/f(o)\B\1/.test('foo')); 281 282 // Back-reference, ignore case: 283 // ASCII 284 assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "backref-ASCII"); 285 assertFalse(/x(...)\1/i.test("xaaaaa"), "backref-ASCII-short"); 286 assertTrue(/x((?:))\1\1x/i.test("xx"), "backref-ASCII-empty"); 287 assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "backref-ASCII-uncaptured"); 288 assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "backref-ASCII-backtrack"); 289 assertEquals("xaBcAbCABCx,aBc", 290 String(/x(...)\1\1x/i.exec("xaBcAbCABCx")), 291 "backref-ASCII-twice"); 292 293 for (var i = 0; i < 128; i++) { 294 var testName = "backref-ASCII-char-" + i + "," + (i^0x20); 295 var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20)) 296 var c = String.fromCharCode(i); 297 if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { 298 assertTrue(test, testName); 299 } else { 300 assertFalse(test, testName); 301 } 302 } 303 304 assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end"); 305 306 // Check decimal escapes doesn't overflow. 307 // (Note: \214 is interpreted as octal). 308 assertEquals(/\2147483648/.exec("\x8c7483648"), 309 ["\x8c7483648"], 310 "Overflow decimal escape"); 311 312 313 // Check numbers in quantifiers doesn't overflow and doesn't throw on 314 // too large numbers. 315 assertFalse(/a{111111111111111111111111111111111111111111111}/.test('b'), 316 "overlarge1"); 317 assertFalse(/a{999999999999999999999999999999999999999999999}/.test('b'), 318 "overlarge2"); 319 assertFalse(/a{1,111111111111111111111111111111111111111111111}/.test('b'), 320 "overlarge3"); 321 assertFalse(/a{1,999999999999999999999999999999999999999999999}/.test('b'), 322 "overlarge4"); 323 assertFalse(/a{2147483648}/.test('b'), 324 "overlarge5"); 325 assertFalse(/a{21474836471}/.test('b'), 326 "overlarge6"); 327 assertFalse(/a{1,2147483648}/.test('b'), 328 "overlarge7"); 329 assertFalse(/a{1,21474836471}/.test('b'), 330 "overlarge8"); 331 assertFalse(/a{2147483648,2147483648}/.test('b'), 332 "overlarge9"); 333 assertFalse(/a{21474836471,21474836471}/.test('b'), 334 "overlarge10"); 335 assertFalse(/a{2147483647}/.test('b'), 336 "overlarge11"); 337 assertFalse(/a{1,2147483647}/.test('b'), 338 "overlarge12"); 339 assertTrue(/a{1,2147483647}/.test('a'), 340 "overlarge13"); 341 assertFalse(/a{2147483647,2147483647}/.test('a'), 342 "overlarge14"); 343 344 345 // Check that we don't read past the end of the string. 346 assertFalse(/f/.test('b')); 347 assertFalse(/[abc]f/.test('x')); 348 assertFalse(/[abc]f/.test('xa')); 349 assertFalse(/[abc]</.test('x')); 350 assertFalse(/[abc]</.test('xa')); 351 assertFalse(/f/i.test('b')); 352 assertFalse(/[abc]f/i.test('x')); 353 assertFalse(/[abc]f/i.test('xa')); 354 assertFalse(/[abc]</i.test('x')); 355 assertFalse(/[abc]</i.test('xa')); 356 assertFalse(/f[abc]/.test('x')); 357 assertFalse(/f[abc]/.test('xa')); 358 assertFalse(/<[abc]/.test('x')); 359 assertFalse(/<[abc]/.test('xa')); 360 assertFalse(/f[abc]/i.test('x')); 361 assertFalse(/f[abc]/i.test('xa')); 362 assertFalse(/<[abc]/i.test('x')); 363 assertFalse(/<[abc]/i.test('xa')); 364 365 // Test that merging of quick test masks gets it right. 366 assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt'); 367 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2'); 368 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3'); 369 assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4'); 370 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5'); 371 assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6'); 372 assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7'); 373 assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8'); 374 375 376 // Don't hang on this one. 377 /[^\xfe-\xff]*/.test(""); 378 379 380 var long = "a"; 381 for (var i = 0; i < 100000; i++) { 382 long = "a?" + long; 383 } 384 // Don't crash on this one, but maybe throw an exception. 385 try { 386 RegExp(long).exec("a"); 387 } catch (e) { 388 assertTrue(String(e).indexOf("Stack overflow") >= 0, "overflow"); 389 } 390 391 392 // Test that compile works on modified objects 393 var re = /re+/; 394 assertEquals("re+", re.source); 395 assertFalse(re.global); 396 assertFalse(re.ignoreCase); 397 assertFalse(re.multiline); 398 assertEquals(0, re.lastIndex); 399 400 re.compile("ro+", "gim"); 401 assertEquals("ro+", re.source); 402 assertTrue(re.global); 403 assertTrue(re.ignoreCase); 404 assertTrue(re.multiline); 405 assertEquals(0, re.lastIndex); 406 407 re.lastIndex = 42; 408 re.someOtherProperty = 42; 409 re.someDeletableProperty = 42; 410 re[37] = 37; 411 re[42] = 42; 412 413 re.compile("ra+", "i"); 414 assertEquals("ra+", re.source); 415 assertFalse(re.global); 416 assertTrue(re.ignoreCase); 417 assertFalse(re.multiline); 418 assertEquals(0, re.lastIndex); 419 420 assertEquals(42, re.someOtherProperty); 421 assertEquals(42, re.someDeletableProperty); 422 assertEquals(37, re[37]); 423 assertEquals(42, re[42]); 424 425 re.lastIndex = -1; 426 re.someOtherProperty = 37; 427 re[42] = 37; 428 assertTrue(delete re[37]); 429 assertTrue(delete re.someDeletableProperty); 430 re.compile("ri+", "gm"); 431 432 assertEquals("ri+", re.source); 433 assertTrue(re.global); 434 assertFalse(re.ignoreCase); 435 assertTrue(re.multiline); 436 assertEquals(0, re.lastIndex); 437 assertEquals(37, re.someOtherProperty); 438 assertEquals(37, re[42]); 439 440 // Test boundary-checks. 441 function assertRegExpTest(re, input, test) { 442 assertEquals(test, re.test(input), "test:" + re + ":" + input); 443 } 444 445 assertRegExpTest(/b\b/, "b", true); 446 assertRegExpTest(/b\b$/, "b", true); 447 assertRegExpTest(/\bb/, "b", true); 448 assertRegExpTest(/^\bb/, "b", true); 449 assertRegExpTest(/,\b/, ",", false); 450 assertRegExpTest(/,\b$/, ",", false); 451 assertRegExpTest(/\b,/, ",", false); 452 assertRegExpTest(/^\b,/, ",", false); 453 454 assertRegExpTest(/b\B/, "b", false); 455 assertRegExpTest(/b\B$/, "b", false); 456 assertRegExpTest(/\Bb/, "b", false); 457 assertRegExpTest(/^\Bb/, "b", false); 458 assertRegExpTest(/,\B/, ",", true); 459 assertRegExpTest(/,\B$/, ",", true); 460 assertRegExpTest(/\B,/, ",", true); 461 assertRegExpTest(/^\B,/, ",", true); 462 463 assertRegExpTest(/b\b/, "b,", true); 464 assertRegExpTest(/b\b/, "ba", false); 465 assertRegExpTest(/b\B/, "b,", false); 466 assertRegExpTest(/b\B/, "ba", true); 467 468 assertRegExpTest(/b\Bb/, "bb", true); 469 assertRegExpTest(/b\bb/, "bb", false); 470 471 assertRegExpTest(/b\b[,b]/, "bb", false); 472 assertRegExpTest(/b\B[,b]/, "bb", true); 473 assertRegExpTest(/b\b[,b]/, "b,", true); 474 assertRegExpTest(/b\B[,b]/, "b,", false); 475 476 assertRegExpTest(/[,b]\bb/, "bb", false); 477 assertRegExpTest(/[,b]\Bb/, "bb", true); 478 assertRegExpTest(/[,b]\bb/, ",b", true); 479 assertRegExpTest(/[,b]\Bb/, ",b", false); 480 481 assertRegExpTest(/[,b]\b[,b]/, "bb", false); 482 assertRegExpTest(/[,b]\B[,b]/, "bb", true); 483 assertRegExpTest(/[,b]\b[,b]/, ",b", true); 484 assertRegExpTest(/[,b]\B[,b]/, ",b", false); 485 assertRegExpTest(/[,b]\b[,b]/, "b,", true); 486 assertRegExpTest(/[,b]\B[,b]/, "b,", false); 487