Home | History | Annotate | Download | only in es6
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
      6 
      7 function testRegexpHelper(r) {
      8   assertTrue(r.test("foo"));
      9   assertTrue(r.test("boo"));
     10   assertFalse(r.test("moo"));
     11 }
     12 
     13 
     14 (function TestUnicodeEscapes() {
     15   testRegexpHelper(/(\u0066|\u0062)oo/);
     16   testRegexpHelper(/(\u0066|\u0062)oo/u);
     17   testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
     18   testRegexpHelper(/(\u{66}|\u{000062})oo/u);
     19 
     20   // Note that we need \\ inside a string, otherwise it's interpreted as a
     21   // unicode escape inside a string.
     22   testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
     23   testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
     24   testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
     25   testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
     26 
     27   // Though, unicode escapes via strings should work too.
     28   testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
     29   testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
     30   testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
     31   testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
     32 })();
     33 
     34 
     35 (function TestUnicodeEscapesInCharacterClasses() {
     36   testRegexpHelper(/[\u0062-\u0066]oo/);
     37   testRegexpHelper(/[\u0062-\u0066]oo/u);
     38   testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
     39   testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
     40 
     41   // Note that we need \\ inside a string, otherwise it's interpreted as a
     42   // unicode escape inside a string.
     43   testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
     44   testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
     45   testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
     46   testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
     47 
     48   // Though, unicode escapes via strings should work too.
     49   testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
     50   testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
     51   testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
     52   testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
     53 })();
     54 
     55 
     56 (function TestBraceEscapesWithoutUnicodeFlag() {
     57   // \u followed by illegal escape will be parsed as u. {x} will be the
     58   // character count.
     59   function helper1(r) {
     60     assertFalse(r.test("fbar"));
     61     assertFalse(r.test("fubar"));
     62     assertTrue(r.test("fuubar"));
     63     assertFalse(r.test("fuuubar"));
     64   }
     65   helper1(/f\u{2}bar/);
     66   helper1(new RegExp("f\\u{2}bar"));
     67 
     68   function helper2(r) {
     69     assertFalse(r.test("fbar"));
     70     assertTrue(r.test("fubar"));
     71     assertTrue(r.test("fuubar"));
     72     assertFalse(r.test("fuuubar"));
     73   }
     74 
     75   helper2(/f\u{1,2}bar/);
     76   helper2(new RegExp("f\\u{1,2}bar"));
     77 
     78   function helper3(r) {
     79     assertTrue(r.test("u"));
     80     assertTrue(r.test("{"));
     81     assertTrue(r.test("2"));
     82     assertTrue(r.test("}"));
     83     assertFalse(r.test("q"));
     84     assertFalse(r.test("("));
     85     assertFalse(r.test(")"));
     86   }
     87   helper3(/[\u{2}]/);
     88   helper3(new RegExp("[\\u{2}]"));
     89 })();
     90 
     91 
     92 (function TestInvalidEscapes() {
     93   // Without the u flag, invalid unicode escapes and other invalid escapes are
     94   // treated as identity escapes.
     95   function helper1(r) {
     96     assertTrue(r.test("firstuxz89second"));
     97   }
     98   helper1(/first\u\x\z\8\9second/);
     99   helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
    100 
    101   function helper2(r) {
    102     assertTrue(r.test("u"));
    103     assertTrue(r.test("x"));
    104     assertTrue(r.test("z"));
    105     assertTrue(r.test("8"));
    106     assertTrue(r.test("9"));
    107     assertFalse(r.test("q"));
    108     assertFalse(r.test("7"));
    109   }
    110   helper2(/[\u\x\z\8\9]/);
    111   helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
    112 
    113   // However, with the u flag, these are treated as invalid escapes.
    114   assertThrows("/\\u/u", SyntaxError);
    115   assertThrows("/\\u12/u", SyntaxError);
    116   assertThrows("/\\ufoo/u", SyntaxError);
    117   assertThrows("/\\x/u", SyntaxError);
    118   assertThrows("/\\xfoo/u", SyntaxError);
    119   assertThrows("/\\z/u", SyntaxError);
    120   assertThrows("/\\8/u", SyntaxError);
    121   assertThrows("/\\9/u", SyntaxError);
    122 
    123   assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
    124   assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
    125   assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
    126   assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
    127   assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
    128   assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
    129   assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
    130   assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
    131 })();
    132 
    133 
    134 (function TestTooBigHexEscape() {
    135   // The hex number inside \u{} has a maximum value.
    136   /\u{10ffff}/u
    137   new RegExp("\\u{10ffff}", "u")
    138   assertThrows("/\\u{110000}/u", SyntaxError);
    139   assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
    140 
    141   // Without the u flag, they're of course fine ({x} is the count).
    142   /\u{110000}/
    143   new RegExp("\\u{110000}")
    144 })();
    145 
    146 
    147 (function TestSyntaxEscapes() {
    148   // Syntax escapes work the same with or without the u flag.
    149   function helper(r) {
    150     assertTrue(r.test("foo[bar"));
    151     assertFalse(r.test("foo]bar"));
    152   }
    153   helper(/foo\[bar/);
    154   helper(new RegExp("foo\\[bar"));
    155   helper(/foo\[bar/u);
    156   helper(new RegExp("foo\\[bar", "u"));
    157 })();
    158 
    159 
    160 (function TestUnicodeSurrogates() {
    161   // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
    162   function helper(r) {
    163     assertTrue(r.test("foo\u{10e6d}bar"));
    164   }
    165   helper(/foo\ud803\ude6dbar/u);
    166   helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
    167 })();
    168 
    169 
    170 (function AllFlags() {
    171   // Test that we can pass all possible regexp flags and they work properly.
    172   function helper1(r) {
    173     assertTrue(r.global);
    174     assertTrue(r.ignoreCase);
    175     assertTrue(r.multiline);
    176     assertTrue(r.sticky);
    177     assertTrue(r.unicode);
    178   }
    179 
    180   helper1(/foo/gimyu);
    181   helper1(new RegExp("foo", "gimyu"));
    182 
    183   function helper2(r) {
    184     assertFalse(r.global);
    185     assertFalse(r.ignoreCase);
    186     assertFalse(r.multiline);
    187     assertFalse(r.sticky);
    188     assertFalse(r.unicode);
    189   }
    190 
    191   helper2(/foo/);
    192   helper2(new RegExp("foo"));
    193 })();
    194 
    195 
    196 (function DuplicatedFlags() {
    197   // Test that duplicating the u flag is not allowed.
    198   assertThrows("/foo/ugu");
    199   assertThrows("new RegExp('foo', 'ugu')");
    200 })();
    201 
    202 
    203 (function ToString() {
    204   // Test that the u flag is included in the string representation of regexps.
    205   function helper(r) {
    206     assertEquals(r.toString(), "/foo/u");
    207   }
    208   helper(/foo/u);
    209   helper(new RegExp("foo", "u"));
    210 })();
    211 
    212 // Non-BMP patterns.
    213 // Single character atom.
    214 assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}"));
    215 assertTrue(/\u{12345}/u.test("\u{12345}"));
    216 assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45"));
    217 assertTrue(/\u{12345}/u.test("\ud808\udf45"));
    218 assertFalse(new RegExp("\u{12345}", "u").test("\udf45"));
    219 assertFalse(/\u{12345}/u.test("\udf45"));
    220 
    221 // Multi-character atom.
    222 assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b"));
    223 assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c"));
    224 assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b"));
    225 assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c"));
    226 
    227 // Disjunction.
    228 assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test(
    229     "a\u{12345}\u{23456}b"));
    230 assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c"));
    231 assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test(
    232     "a\udf45\u{23456}b"));
    233 assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c"));
    234 
    235 // Alternative.
    236 assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b"));
    237 assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c"));
    238 assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db"));
    239 assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c"));
    240 
    241 // Capture.
    242 assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
    243     "\u{12345}b\u{12345}"));
    244 assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}"));
    245 assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
    246     "\u{12345}b\u{23456}"));
    247 assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}"));
    248 
    249 // Quantifier.
    250 assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}"));
    251 assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}"));
    252 assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45"));
    253 assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45"));
    254 assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}"));
    255 assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45"));
    256 assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45"));
    257 
    258 // Literal surrogates.
    259 assertEquals(["\u{10000}\u{10000}"],
    260              new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
    261 assertEquals(["\u{10000}\u{10000}"],
    262              new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
    263 
    264 assertEquals(["\u{10003}\u{50001}"],
    265              new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec(
    266                  "\u{10003}\u{50001}"));
    267 assertEquals(["\u{10003}\u{50001}"],
    268              new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec(
    269                  "\u{10003}\u{50001}"));
    270 
    271 // Unicode escape sequences to represent a non-BMP character cannot have
    272 // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
    273 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
    274 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
    275 assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
    276 assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
    277 
    278 assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}"));
    279 assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}"));
    280 assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}"));
    281 assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}"));
    282 
    283 assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00"));
    284 assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00"));
    285 assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00"));
    286