Home | History | Annotate | Download | only in mjsunit
      1 // Copyright 2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 function testEscape(str, regex) {
     29   assertEquals("foo:bar:baz", str.split(regex).join(":"));
     30 }
     31 
     32 testEscape("foo\nbar\nbaz", /\n/);
     33 testEscape("foo bar baz", /\s/);
     34 testEscape("foo\tbar\tbaz", /\s/);
     35 testEscape("foo-bar-baz", /\u002D/);
     36 
     37 // Test containing null char in regexp.
     38 var s = '[' + String.fromCharCode(0) + ']';
     39 var re = new RegExp(s);
     40 assertEquals(s.match(re).length, 1);
     41 assertEquals(s.match(re)[0], String.fromCharCode(0));
     42 
     43 // Test strings containing all line separators
     44 s = 'aA\nbB\rcC\r\ndD\u2028eE\u2029fF';
     45 re = /^./gm; // any non-newline character at the beginning of a line
     46 var result = s.match(re);
     47 assertEquals(result.length, 6);
     48 assertEquals(result[0], 'a');
     49 assertEquals(result[1], 'b');
     50 assertEquals(result[2], 'c');
     51 assertEquals(result[3], 'd');
     52 assertEquals(result[4], 'e');
     53 assertEquals(result[5], 'f');
     54 
     55 re = /.$/gm; // any non-newline character at the end of a line
     56 result = s.match(re);
     57 assertEquals(result.length, 6);
     58 assertEquals(result[0], 'A');
     59 assertEquals(result[1], 'B');
     60 assertEquals(result[2], 'C');
     61 assertEquals(result[3], 'D');
     62 assertEquals(result[4], 'E');
     63 assertEquals(result[5], 'F');
     64 
     65 re = /^[^]/gm; // *any* character at the beginning of a line
     66 result = s.match(re);
     67 assertEquals(result.length, 7);
     68 assertEquals(result[0], 'a');
     69 assertEquals(result[1], 'b');
     70 assertEquals(result[2], 'c');
     71 assertEquals(result[3], '\n');
     72 assertEquals(result[4], 'd');
     73 assertEquals(result[5], 'e');
     74 assertEquals(result[6], 'f');
     75 
     76 re = /[^]$/gm; // *any* character at the end of a line
     77 result = s.match(re);
     78 assertEquals(result.length, 7);
     79 assertEquals(result[0], 'A');
     80 assertEquals(result[1], 'B');
     81 assertEquals(result[2], 'C');
     82 assertEquals(result[3], '\r');
     83 assertEquals(result[4], 'D');
     84 assertEquals(result[5], 'E');
     85 assertEquals(result[6], 'F');
     86 
     87 // Some tests from the Mozilla tests, where our behavior differs from
     88 // SpiderMonkey.
     89 // From ecma_3/RegExp/regress-334158.js
     90 assertTrue(/\ca/.test( "\x01" ));
     91 assertFalse(/\ca/.test( "\\ca" ));
     92 // Passes in KJS, fails in IrregularExpressions.
     93 // See http://code.google.com/p/v8/issues/detail?id=152
     94 //assertTrue(/\c[a/]/.test( "\x1ba/]" ));
     95 
     96 
     97 // Test \c in character class
     98 re = /^[\cM]$/;
     99 assertTrue(re.test("\r"));
    100 assertFalse(re.test("M"));
    101 assertFalse(re.test("c"));
    102 assertFalse(re.test("\\"));
    103 assertFalse(re.test("\x03"));  // I.e., read as \cc
    104 
    105 re = /^[\c]]$/;
    106 assertTrue(re.test("c]"));
    107 assertFalse(re.test("\\]"));
    108 assertFalse(re.test("\x1d"));  // ']' & 0x1f
    109 assertFalse(re.test("\\]"));
    110 assertFalse(re.test("\x03]"));  // I.e., read as \cc
    111 
    112 
    113 // Test that we handle \s and \S correctly inside some bizarre
    114 // character classes.
    115 re = /[\s-:]/;
    116 assertTrue(re.test('-'));
    117 assertTrue(re.test(':'));
    118 assertTrue(re.test(' '));
    119 assertTrue(re.test('\t'));
    120 assertTrue(re.test('\n'));
    121 assertFalse(re.test('a'));
    122 assertFalse(re.test('Z'));
    123 
    124 re = /[\S-:]/;
    125 assertTrue(re.test('-'));
    126 assertTrue(re.test(':'));
    127 assertFalse(re.test(' '));
    128 assertFalse(re.test('\t'));
    129 assertFalse(re.test('\n'));
    130 assertTrue(re.test('a'));
    131 assertTrue(re.test('Z'));
    132 
    133 re = /[^\s-:]/;
    134 assertFalse(re.test('-'));
    135 assertFalse(re.test(':'));
    136 assertFalse(re.test(' '));
    137 assertFalse(re.test('\t'));
    138 assertFalse(re.test('\n'));
    139 assertTrue(re.test('a'));
    140 assertTrue(re.test('Z'));
    141 
    142 re = /[^\S-:]/;
    143 assertFalse(re.test('-'));
    144 assertFalse(re.test(':'));
    145 assertTrue(re.test(' '));
    146 assertTrue(re.test('\t'));
    147 assertTrue(re.test('\n'));
    148 assertFalse(re.test('a'));
    149 assertFalse(re.test('Z'));
    150 
    151 re = /[\s]/;
    152 assertFalse(re.test('-'));
    153 assertFalse(re.test(':'));
    154 assertTrue(re.test(' '));
    155 assertTrue(re.test('\t'));
    156 assertTrue(re.test('\n'));
    157 assertFalse(re.test('a'));
    158 assertFalse(re.test('Z'));
    159 
    160 re = /[^\s]/;
    161 assertTrue(re.test('-'));
    162 assertTrue(re.test(':'));
    163 assertFalse(re.test(' '));
    164 assertFalse(re.test('\t'));
    165 assertFalse(re.test('\n'));
    166 assertTrue(re.test('a'));
    167 assertTrue(re.test('Z'));
    168 
    169 re = /[\S]/;
    170 assertTrue(re.test('-'));
    171 assertTrue(re.test(':'));
    172 assertFalse(re.test(' '));
    173 assertFalse(re.test('\t'));
    174 assertFalse(re.test('\n'));
    175 assertTrue(re.test('a'));
    176 assertTrue(re.test('Z'));
    177 
    178 re = /[^\S]/;
    179 assertFalse(re.test('-'));
    180 assertFalse(re.test(':'));
    181 assertTrue(re.test(' '));
    182 assertTrue(re.test('\t'));
    183 assertTrue(re.test('\n'));
    184 assertFalse(re.test('a'));
    185 assertFalse(re.test('Z'));
    186 
    187 re = /[\s\S]/;
    188 assertTrue(re.test('-'));
    189 assertTrue(re.test(':'));
    190 assertTrue(re.test(' '));
    191 assertTrue(re.test('\t'));
    192 assertTrue(re.test('\n'));
    193 assertTrue(re.test('a'));
    194 assertTrue(re.test('Z'));
    195 
    196 re = /[^\s\S]/;
    197 assertFalse(re.test('-'));
    198 assertFalse(re.test(':'));
    199 assertFalse(re.test(' '));
    200 assertFalse(re.test('\t'));
    201 assertFalse(re.test('\n'));
    202 assertFalse(re.test('a'));
    203 assertFalse(re.test('Z'));
    204 
    205 // Test beginning and end of line assertions with or without the
    206 // multiline flag.
    207 re = /^\d+/;
    208 assertFalse(re.test("asdf\n123"));
    209 re = /^\d+/m;
    210 assertTrue(re.test("asdf\n123"));
    211 
    212 re = /\d+$/;
    213 assertFalse(re.test("123\nasdf"));
    214 re = /\d+$/m;
    215 assertTrue(re.test("123\nasdf"));
    216 
    217 // Test that empty matches are handled correctly for multiline global
    218 // regexps.
    219 re = /^(.*)/mg;
    220 assertEquals(3, "a\n\rb".match(re).length);
    221 assertEquals("*a\n*b\r*c\n*\r*d\r*\n*e", "a\nb\rc\n\rd\r\ne".replace(re, "*$1"));
    222 
    223 // Test that empty matches advance one character
    224 re = new RegExp("", "g");
    225 assertEquals("xAx", "A".replace(re, "x"));
    226 assertEquals(3, String.fromCharCode(161).replace(re, "x").length);
    227 
    228 // Test that we match the KJS behavior with regard to undefined constructor
    229 // arguments:
    230 re = new RegExp();
    231 // KJS actually shows this as '//'.  Here we match the Firefox behavior (ie,
    232 // giving a syntactically legal regexp literal).
    233 assertEquals('/(?:)/', re.toString());
    234 re = new RegExp(void 0);
    235 assertEquals('/(?:)/', re.toString());
    236 re.compile();
    237 assertEquals('/(?:)/', re.toString());
    238 re.compile(void 0);
    239 assertEquals('/undefined/', re.toString());
    240 
    241 
    242 // Check for lazy RegExp literal creation
    243 function lazyLiteral(doit) {
    244   if (doit) return "".replace(/foo(/gi, "");
    245   return true;
    246 }
    247 
    248 assertTrue(lazyLiteral(false));
    249 assertThrows("lazyLiteral(true)");
    250 
    251 // Check $01 and $10
    252 re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)");
    253 assertEquals("t", "123456789t".replace(re, "$10"), "$10");
    254 assertEquals("15", "123456789t".replace(re, "$15"), "$10");
    255 assertEquals("1", "123456789t".replace(re, "$01"), "$01");
    256 assertEquals("$001", "123456789t".replace(re, "$001"), "$001");
    257 re = new RegExp("foo(.)");
    258 assertEquals("bar$0", "foox".replace(re, "bar$0"), "$0");
    259 assertEquals("bar$00", "foox".replace(re, "bar$00"), "$00");
    260 assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000");
    261 assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2");
    262 assertEquals("barx5", "foox".replace(re, "bar$15"), "$15");
    263 
    264 assertFalse(/()foo$\1/.test("football"), "football1");
    265 assertFalse(/foo$(?=ball)/.test("football"), "football2");
    266 assertFalse(/foo$(?!bar)/.test("football"), "football3");
    267 assertTrue(/()foo$\1/.test("foo"), "football4");
    268 assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5");
    269 assertTrue(/()foo$(?!bar)/.test("foo"), "football6");
    270 assertFalse(/(x?)foo$\1/.test("football"), "football7");
    271 assertFalse(/foo$(?=ball)/.test("football"), "football8");
    272 assertFalse(/foo$(?!bar)/.test("football"), "football9");
    273 assertTrue(/(x?)foo$\1/.test("foo"), "football10");
    274 assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11");
    275 assertTrue(/foo$(?!bar)/.test("foo"), "football12");
    276 
    277 // Check that the back reference has two successors.  See
    278 // BackReferenceNode::PropagateForward.
    279 assertFalse(/f(o)\b\1/.test('foo'));
    280 assertTrue(/f(o)\B\1/.test('foo'));
    281 
    282 // Back-reference, ignore case:
    283 // ASCII
    284 assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "backref-ASCII");
    285 assertFalse(/x(...)\1/i.test("xaaaaa"), "backref-ASCII-short");
    286 assertTrue(/x((?:))\1\1x/i.test("xx"), "backref-ASCII-empty");
    287 assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "backref-ASCII-uncaptured");
    288 assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "backref-ASCII-backtrack");
    289 assertEquals("xaBcAbCABCx,aBc",
    290              String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
    291              "backref-ASCII-twice");
    292 
    293 for (var i = 0; i < 128; i++) {
    294   var testName = "backref-ASCII-char-" + i + "," + (i^0x20);
    295   var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
    296   var c = String.fromCharCode(i);
    297   if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
    298     assertTrue(test, testName);
    299   } else {
    300     assertFalse(test, testName);
    301   }
    302 }
    303 
    304 assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
    305 
    306 // Check decimal escapes doesn't overflow.
    307 // (Note: \214 is interpreted as octal).
    308 assertEquals(/\2147483648/.exec("\x8c7483648"),
    309              ["\x8c7483648"],
    310              "Overflow decimal escape");
    311 
    312 
    313 // Check numbers in quantifiers doesn't overflow and doesn't throw on
    314 // too large numbers.
    315 assertFalse(/a{111111111111111111111111111111111111111111111}/.test('b'),
    316             "overlarge1");
    317 assertFalse(/a{999999999999999999999999999999999999999999999}/.test('b'),
    318             "overlarge2");
    319 assertFalse(/a{1,111111111111111111111111111111111111111111111}/.test('b'),
    320             "overlarge3");
    321 assertFalse(/a{1,999999999999999999999999999999999999999999999}/.test('b'),
    322             "overlarge4");
    323 assertFalse(/a{2147483648}/.test('b'),
    324             "overlarge5");
    325 assertFalse(/a{21474836471}/.test('b'),
    326             "overlarge6");
    327 assertFalse(/a{1,2147483648}/.test('b'),
    328             "overlarge7");
    329 assertFalse(/a{1,21474836471}/.test('b'),
    330             "overlarge8");
    331 assertFalse(/a{2147483648,2147483648}/.test('b'),
    332             "overlarge9");
    333 assertFalse(/a{21474836471,21474836471}/.test('b'),
    334             "overlarge10");
    335 assertFalse(/a{2147483647}/.test('b'),
    336             "overlarge11");
    337 assertFalse(/a{1,2147483647}/.test('b'),
    338             "overlarge12");
    339 assertTrue(/a{1,2147483647}/.test('a'),
    340             "overlarge13");
    341 assertFalse(/a{2147483647,2147483647}/.test('a'),
    342             "overlarge14");
    343 
    344 
    345 // Check that we don't read past the end of the string.
    346 assertFalse(/f/.test('b'));
    347 assertFalse(/[abc]f/.test('x'));
    348 assertFalse(/[abc]f/.test('xa'));
    349 assertFalse(/[abc]</.test('x'));
    350 assertFalse(/[abc]</.test('xa'));
    351 assertFalse(/f/i.test('b'));
    352 assertFalse(/[abc]f/i.test('x'));
    353 assertFalse(/[abc]f/i.test('xa'));
    354 assertFalse(/[abc]</i.test('x'));
    355 assertFalse(/[abc]</i.test('xa'));
    356 assertFalse(/f[abc]/.test('x'));
    357 assertFalse(/f[abc]/.test('xa'));
    358 assertFalse(/<[abc]/.test('x'));
    359 assertFalse(/<[abc]/.test('xa'));
    360 assertFalse(/f[abc]/i.test('x'));
    361 assertFalse(/f[abc]/i.test('xa'));
    362 assertFalse(/<[abc]/i.test('x'));
    363 assertFalse(/<[abc]/i.test('xa'));
    364 
    365 // Test that merging of quick test masks gets it right.
    366 assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt');
    367 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2');
    368 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3');
    369 assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4');
    370 assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5');
    371 assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6');
    372 assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7');
    373 assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8');
    374 
    375 
    376 // Don't hang on this one.
    377 /[^\xfe-\xff]*/.test("");
    378 
    379 
    380 var long = "a";
    381 for (var i = 0; i < 100000; i++) {
    382   long = "a?" + long;
    383 }
    384 // Don't crash on this one, but maybe throw an exception.
    385 try {
    386   RegExp(long).exec("a");
    387 } catch (e) {
    388   assertTrue(String(e).indexOf("Stack overflow") >= 0, "overflow");
    389 }
    390 
    391 
    392 // Test that compile works on modified objects
    393 var re = /re+/;
    394 assertEquals("re+", re.source);
    395 assertFalse(re.global);
    396 assertFalse(re.ignoreCase);
    397 assertFalse(re.multiline);
    398 assertEquals(0, re.lastIndex);
    399 
    400 re.compile("ro+", "gim");
    401 assertEquals("ro+", re.source);
    402 assertTrue(re.global);
    403 assertTrue(re.ignoreCase);
    404 assertTrue(re.multiline);
    405 assertEquals(0, re.lastIndex);
    406 
    407 re.lastIndex = 42;
    408 re.someOtherProperty = 42;
    409 re.someDeletableProperty = 42;
    410 re[37] = 37;  
    411 re[42] = 42;  
    412 
    413 re.compile("ra+", "i");
    414 assertEquals("ra+", re.source);
    415 assertFalse(re.global);
    416 assertTrue(re.ignoreCase);
    417 assertFalse(re.multiline);
    418 assertEquals(0, re.lastIndex);
    419 
    420 assertEquals(42, re.someOtherProperty);
    421 assertEquals(42, re.someDeletableProperty);
    422 assertEquals(37, re[37]);
    423 assertEquals(42, re[42]);
    424 
    425 re.lastIndex = -1;
    426 re.someOtherProperty = 37;
    427 re[42] = 37;
    428 assertTrue(delete re[37]);
    429 assertTrue(delete re.someDeletableProperty);
    430 re.compile("ri+", "gm");
    431 
    432 assertEquals("ri+", re.source);
    433 assertTrue(re.global);
    434 assertFalse(re.ignoreCase);
    435 assertTrue(re.multiline);
    436 assertEquals(0, re.lastIndex);
    437 assertEquals(37, re.someOtherProperty);
    438 assertEquals(37, re[42]);
    439 
    440 // Test boundary-checks.
    441 function assertRegExpTest(re, input, test) { 
    442   assertEquals(test, re.test(input), "test:" + re + ":" + input);
    443 }
    444 
    445 assertRegExpTest(/b\b/, "b", true);
    446 assertRegExpTest(/b\b$/, "b", true);
    447 assertRegExpTest(/\bb/, "b", true);
    448 assertRegExpTest(/^\bb/, "b", true);
    449 assertRegExpTest(/,\b/, ",", false);
    450 assertRegExpTest(/,\b$/, ",", false);
    451 assertRegExpTest(/\b,/, ",", false);
    452 assertRegExpTest(/^\b,/, ",", false);
    453 
    454 assertRegExpTest(/b\B/, "b", false);
    455 assertRegExpTest(/b\B$/, "b", false);
    456 assertRegExpTest(/\Bb/, "b", false);
    457 assertRegExpTest(/^\Bb/, "b", false);
    458 assertRegExpTest(/,\B/, ",", true);
    459 assertRegExpTest(/,\B$/, ",", true);
    460 assertRegExpTest(/\B,/, ",", true);
    461 assertRegExpTest(/^\B,/, ",", true);
    462 
    463 assertRegExpTest(/b\b/, "b,", true);
    464 assertRegExpTest(/b\b/, "ba", false);
    465 assertRegExpTest(/b\B/, "b,", false);
    466 assertRegExpTest(/b\B/, "ba", true);
    467 
    468 assertRegExpTest(/b\Bb/, "bb", true);
    469 assertRegExpTest(/b\bb/, "bb", false);
    470 
    471 assertRegExpTest(/b\b[,b]/, "bb", false);
    472 assertRegExpTest(/b\B[,b]/, "bb", true);
    473 assertRegExpTest(/b\b[,b]/, "b,", true);
    474 assertRegExpTest(/b\B[,b]/, "b,", false);
    475 
    476 assertRegExpTest(/[,b]\bb/, "bb", false);
    477 assertRegExpTest(/[,b]\Bb/, "bb", true);
    478 assertRegExpTest(/[,b]\bb/, ",b", true);
    479 assertRegExpTest(/[,b]\Bb/, ",b", false);
    480 
    481 assertRegExpTest(/[,b]\b[,b]/, "bb", false);
    482 assertRegExpTest(/[,b]\B[,b]/, "bb", true);
    483 assertRegExpTest(/[,b]\b[,b]/, ",b", true);
    484 assertRegExpTest(/[,b]\B[,b]/, ",b", false);
    485 assertRegExpTest(/[,b]\b[,b]/, "b,", true);
    486 assertRegExpTest(/[,b]\B[,b]/, "b,", false);
    487