1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // ES6 extends the \uxxxx escape and also allows \u{xxxxx}. 6 7 function testRegexpHelper(r) { 8 assertTrue(r.test("foo")); 9 assertTrue(r.test("boo")); 10 assertFalse(r.test("moo")); 11 } 12 13 14 (function TestUnicodeEscapes() { 15 testRegexpHelper(/(\u0066|\u0062)oo/); 16 testRegexpHelper(/(\u0066|\u0062)oo/u); 17 testRegexpHelper(/(\u{0066}|\u{0062})oo/u); 18 testRegexpHelper(/(\u{66}|\u{000062})oo/u); 19 20 // Note that we need \\ inside a string, otherwise it's interpreted as a 21 // unicode escape inside a string. 22 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo")); 23 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u")); 24 testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u")); 25 testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u")); 26 27 // Though, unicode escapes via strings should work too. 28 testRegexpHelper(new RegExp("(\u0066|\u0062)oo")); 29 testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u")); 30 testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u")); 31 testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u")); 32 })(); 33 34 35 (function TestUnicodeEscapesInCharacterClasses() { 36 testRegexpHelper(/[\u0062-\u0066]oo/); 37 testRegexpHelper(/[\u0062-\u0066]oo/u); 38 testRegexpHelper(/[\u{0062}-\u{0066}]oo/u); 39 testRegexpHelper(/[\u{62}-\u{00000066}]oo/u); 40 41 // Note that we need \\ inside a string, otherwise it's interpreted as a 42 // unicode escape inside a string. 43 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo")); 44 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u")); 45 testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u")); 46 testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u")); 47 48 // Though, unicode escapes via strings should work too. 49 testRegexpHelper(new RegExp("[\u0062-\u0066]oo")); 50 testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u")); 51 testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u")); 52 testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u")); 53 })(); 54 55 56 (function TestBraceEscapesWithoutUnicodeFlag() { 57 // \u followed by illegal escape will be parsed as u. {x} will be the 58 // character count. 59 function helper1(r) { 60 assertFalse(r.test("fbar")); 61 assertFalse(r.test("fubar")); 62 assertTrue(r.test("fuubar")); 63 assertFalse(r.test("fuuubar")); 64 } 65 helper1(/f\u{2}bar/); 66 helper1(new RegExp("f\\u{2}bar")); 67 68 function helper2(r) { 69 assertFalse(r.test("fbar")); 70 assertTrue(r.test("fubar")); 71 assertTrue(r.test("fuubar")); 72 assertFalse(r.test("fuuubar")); 73 } 74 75 helper2(/f\u{1,2}bar/); 76 helper2(new RegExp("f\\u{1,2}bar")); 77 78 function helper3(r) { 79 assertTrue(r.test("u")); 80 assertTrue(r.test("{")); 81 assertTrue(r.test("2")); 82 assertTrue(r.test("}")); 83 assertFalse(r.test("q")); 84 assertFalse(r.test("(")); 85 assertFalse(r.test(")")); 86 } 87 helper3(/[\u{2}]/); 88 helper3(new RegExp("[\\u{2}]")); 89 })(); 90 91 92 (function TestInvalidEscapes() { 93 // Without the u flag, invalid unicode escapes and other invalid escapes are 94 // treated as identity escapes. 95 function helper1(r) { 96 assertTrue(r.test("firstuxz89second")); 97 } 98 helper1(/first\u\x\z\8\9second/); 99 helper1(new RegExp("first\\u\\x\\z\\8\\9second")); 100 101 function helper2(r) { 102 assertTrue(r.test("u")); 103 assertTrue(r.test("x")); 104 assertTrue(r.test("z")); 105 assertTrue(r.test("8")); 106 assertTrue(r.test("9")); 107 assertFalse(r.test("q")); 108 assertFalse(r.test("7")); 109 } 110 helper2(/[\u\x\z\8\9]/); 111 helper2(new RegExp("[\\u\\x\\z\\8\\9]")); 112 113 // However, with the u flag, these are treated as invalid escapes. 114 assertThrows("/\\u/u", SyntaxError); 115 assertThrows("/\\u12/u", SyntaxError); 116 assertThrows("/\\ufoo/u", SyntaxError); 117 assertThrows("/\\x/u", SyntaxError); 118 assertThrows("/\\xfoo/u", SyntaxError); 119 assertThrows("/\\z/u", SyntaxError); 120 assertThrows("/\\8/u", SyntaxError); 121 assertThrows("/\\9/u", SyntaxError); 122 123 assertThrows("new RegExp('\\\\u', 'u')", SyntaxError); 124 assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError); 125 assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError); 126 assertThrows("new RegExp('\\\\x', 'u')", SyntaxError); 127 assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError); 128 assertThrows("new RegExp('\\\\z', 'u')", SyntaxError); 129 assertThrows("new RegExp('\\\\8', 'u')", SyntaxError); 130 assertThrows("new RegExp('\\\\9', 'u')", SyntaxError); 131 })(); 132 133 134 (function TestTooBigHexEscape() { 135 // The hex number inside \u{} has a maximum value. 136 /\u{10ffff}/u 137 new RegExp("\\u{10ffff}", "u") 138 assertThrows("/\\u{110000}/u", SyntaxError); 139 assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError); 140 141 // Without the u flag, they're of course fine ({x} is the count). 142 /\u{110000}/ 143 new RegExp("\\u{110000}") 144 })(); 145 146 147 (function TestSyntaxEscapes() { 148 // Syntax escapes work the same with or without the u flag. 149 function helper(r) { 150 assertTrue(r.test("foo[bar")); 151 assertFalse(r.test("foo]bar")); 152 } 153 helper(/foo\[bar/); 154 helper(new RegExp("foo\\[bar")); 155 helper(/foo\[bar/u); 156 helper(new RegExp("foo\\[bar", "u")); 157 })(); 158 159 160 (function TestUnicodeSurrogates() { 161 // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D]. 162 function helper(r) { 163 assertTrue(r.test("foo\u{10e6d}bar")); 164 } 165 helper(/foo\ud803\ude6dbar/u); 166 helper(new RegExp("foo\\ud803\\ude6dbar", "u")); 167 })(); 168 169 170 (function AllFlags() { 171 // Test that we can pass all possible regexp flags and they work properly. 172 function helper1(r) { 173 assertTrue(r.global); 174 assertTrue(r.ignoreCase); 175 assertTrue(r.multiline); 176 assertTrue(r.sticky); 177 assertTrue(r.unicode); 178 } 179 180 helper1(/foo/gimyu); 181 helper1(new RegExp("foo", "gimyu")); 182 183 function helper2(r) { 184 assertFalse(r.global); 185 assertFalse(r.ignoreCase); 186 assertFalse(r.multiline); 187 assertFalse(r.sticky); 188 assertFalse(r.unicode); 189 } 190 191 helper2(/foo/); 192 helper2(new RegExp("foo")); 193 })(); 194 195 196 (function DuplicatedFlags() { 197 // Test that duplicating the u flag is not allowed. 198 assertThrows("/foo/ugu"); 199 assertThrows("new RegExp('foo', 'ugu')"); 200 })(); 201 202 203 (function ToString() { 204 // Test that the u flag is included in the string representation of regexps. 205 function helper(r) { 206 assertEquals(r.toString(), "/foo/u"); 207 } 208 helper(/foo/u); 209 helper(new RegExp("foo", "u")); 210 })(); 211 212 // Non-BMP patterns. 213 // Single character atom. 214 assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}")); 215 assertTrue(/\u{12345}/u.test("\u{12345}")); 216 assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45")); 217 assertTrue(/\u{12345}/u.test("\ud808\udf45")); 218 assertFalse(new RegExp("\u{12345}", "u").test("\udf45")); 219 assertFalse(/\u{12345}/u.test("\udf45")); 220 221 // Multi-character atom. 222 assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b")); 223 assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c")); 224 assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b")); 225 assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c")); 226 227 // Disjunction. 228 assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test( 229 "a\u{12345}\u{23456}b")); 230 assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c")); 231 assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test( 232 "a\udf45\u{23456}b")); 233 assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c")); 234 235 // Alternative. 236 assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b")); 237 assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c")); 238 assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db")); 239 assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c")); 240 241 // Capture. 242 assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( 243 "\u{12345}b\u{12345}")); 244 assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}")); 245 assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( 246 "\u{12345}b\u{23456}")); 247 assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}")); 248 249 // Quantifier. 250 assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}")); 251 assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}")); 252 assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45")); 253 assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45")); 254 assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}")); 255 assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45")); 256 assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45")); 257 258 // Literal surrogates. 259 assertEquals(["\u{10000}\u{10000}"], 260 new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); 261 assertEquals(["\u{10000}\u{10000}"], 262 new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); 263 264 assertEquals(["\u{10003}\u{50001}"], 265 new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec( 266 "\u{10003}\u{50001}")); 267 assertEquals(["\u{10003}\u{50001}"], 268 new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec( 269 "\u{10003}\u{50001}")); 270 271 // Unicode escape sequences to represent a non-BMP character cannot have 272 // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence. 273 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); 274 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); 275 assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); 276 assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); 277 278 assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}")); 279 assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}")); 280 assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}")); 281 assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}")); 282 283 assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00")); 284 assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00")); 285 assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00")); 286