1 # This set of tests is for UTF-16 and UTF-32 support, including Unicode 2 # properties. It is relevant only to the 16-bit and 32-bit libraries. The 3 # output is different for each library, so there are separate output files. 4 5 /xxx/IB,utf,no_utf_check 6 7 /abc/utf 8 ] 9 10 # Check maximum character size 11 12 /\x{ffff}/IB,utf 13 14 /\x{10000}/IB,utf 15 16 /\x{100}/IB,utf 17 18 /\x{1000}/IB,utf 19 20 /\x{10000}/IB,utf 21 22 /\x{100000}/IB,utf 23 24 /\x{10ffff}/IB,utf 25 26 /[\x{ff}]/IB,utf 27 28 /[\x{100}]/IB,utf 29 30 /\x80/IB,utf 31 32 /\xff/IB,utf 33 34 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 35 \x{D55c}\x{ad6d}\x{C5B4} 36 37 /\x{65e5}\x{672c}\x{8a9e}/IB,utf 38 \x{65e5}\x{672c}\x{8a9e} 39 40 /\x{80}/IB,utf 41 42 /\x{084}/IB,utf 43 44 /\x{104}/IB,utf 45 46 /\x{861}/IB,utf 47 48 /\x{212ab}/IB,utf 49 50 /[^ab\xC0-\xF0]/IB,utf 51 \x{f1} 52 \x{bf} 53 \x{100} 54 \x{1000} 55 \= Expect no match 56 \x{c0} 57 \x{f0} 58 59 /{3,4}/IB,utf 60 \x{100}\x{100}\x{100}\x{100\x{100} 61 62 /(\x{100}+|x)/IB,utf 63 64 /(\x{100}*a|x)/IB,utf 65 66 /(\x{100}{0,2}a|x)/IB,utf 67 68 /(\x{100}{1,2}a|x)/IB,utf 69 70 /\x{100}/IB,utf 71 72 /a\x{100}\x{101}*/IB,utf 73 74 /a\x{100}\x{101}+/IB,utf 75 76 /[^\x{c4}]/IB 77 78 /[\x{100}]/IB,utf 79 \x{100} 80 Z\x{100} 81 \x{100}Z 82 83 /[\xff]/IB,utf 84 >\x{ff}< 85 86 /[^\xff]/IB,utf 87 88 /\x{100}abc(xyz(?1))/IB,utf 89 90 /\777/I,utf 91 \x{1ff} 92 \777 93 94 /\x{100}+\x{200}/IB,utf 95 96 /\x{100}+X/IB,utf 97 98 /^[\Q\E-\Q\E/B,utf 99 100 /X/utf 101 XX\x{d800}\=no_utf_check 102 XX\x{da00}\=no_utf_check 103 XX\x{dc00}\=no_utf_check 104 XX\x{de00}\=no_utf_check 105 XX\x{dfff}\=no_utf_check 106 \= Expect UTF error 107 XX\x{d800} 108 XX\x{da00} 109 XX\x{dc00} 110 XX\x{de00} 111 XX\x{dfff} 112 XX\x{110000} 113 XX\x{d800}\x{1234} 114 \= Expect no match 115 XX\x{d800}\=offset=3 116 117 /(?<=.)X/utf 118 XX\x{d800}\=offset=3 119 120 /(*UTF16)\x{11234}/ 121 abcd\x{11234}pqr 122 123 /(*UTF)\x{11234}/I 124 abcd\x{11234}pqr 125 126 /(*UTF-32)\x{11234}/ 127 abcd\x{11234}pqr 128 129 /(*UTF-32)\x{112}/ 130 abcd\x{11234}pqr 131 132 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I 133 134 /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I 135 136 /\h/I,utf 137 ABC\x{09} 138 ABC\x{20} 139 ABC\x{a0} 140 ABC\x{1680} 141 ABC\x{180e} 142 ABC\x{2000} 143 ABC\x{202f} 144 ABC\x{205f} 145 ABC\x{3000} 146 147 /\v/I,utf 148 ABC\x{0a} 149 ABC\x{0b} 150 ABC\x{0c} 151 ABC\x{0d} 152 ABC\x{85} 153 ABC\x{2028} 154 155 /\h*A/I,utf 156 CDBABC 157 \x{2000}ABC 158 159 /\R*A/I,bsr=unicode,utf 160 CDBABC 161 \x{2028}A 162 163 /\v+A/I,utf 164 165 /\s?xxx\s/I,utf 166 167 /\sxxx\s/I,utf,tables=2 168 AB\x{85}xxx\x{a0}XYZ 169 AB\x{a0}xxx\x{85}XYZ 170 171 /\S \S/I,utf,tables=2 172 \x{a2} \x{84} 173 A Z 174 175 /a+/utf 176 a\x{123}aa\=offset=1 177 a\x{123}aa\=offset=2 178 a\x{123}aa\=offset=3 179 \= Expect no match 180 a\x{123}aa\=offset=4 181 \= Expect bad offset error 182 a\x{123}aa\=offset=5 183 a\x{123}aa\=offset=6 184 185 /\x{1234}+/Ii,utf 186 187 /\x{1234}+?/Ii,utf 188 189 /\x{1234}++/Ii,utf 190 191 /\x{1234}{2}/Ii,utf 192 193 /[^\x{c4}]/IB,utf 194 195 /X+\x{200}/IB,utf 196 197 /\R/I,utf 198 199 # Check bad offset 200 201 /a/utf 202 \= Expect bad UTF-16 offset, or no match in 32-bit 203 \x{10000}\=offset=1 204 \x{10000}ab\=offset=1 205 \= Expect 16-bit match, 32-bit no match 206 \x{10000}ab\=offset=2 207 \= Expect no match 208 \x{10000}ab\=offset=3 209 \= Expect no match in 16-bit, bad offset in 32-bit 210 \x{10000}ab\=offset=4 211 \= Expect bad offset 212 \x{10000}ab\=offset=5 213 214 //utf 215 216 /\w+\x{C4}/B,utf 217 a\x{C4}\x{C4} 218 219 /\w+\x{C4}/B,utf,tables=2 220 a\x{C4}\x{C4} 221 222 /\W+\x{C4}/B,utf 223 !\x{C4} 224 225 /\W+\x{C4}/B,utf,tables=2 226 !\x{C4} 227 228 /\W+\x{A1}/B,utf 229 !\x{A1} 230 231 /\W+\x{A1}/B,utf,tables=2 232 !\x{A1} 233 234 /X\s+\x{A0}/B,utf 235 X\x20\x{A0}\x{A0} 236 237 /X\s+\x{A0}/B,utf,tables=2 238 X\x20\x{A0}\x{A0} 239 240 /\S+\x{A0}/B,utf 241 X\x{A0}\x{A0} 242 243 /\S+\x{A0}/B,utf,tables=2 244 X\x{A0}\x{A0} 245 246 /\x{a0}+\s!/B,utf 247 \x{a0}\x20! 248 249 /\x{a0}+\s!/B,utf,tables=2 250 \x{a0}\x20! 251 252 /(*UTF)abc/never_utf 253 254 /abc/utf,never_utf 255 256 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 257 258 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 259 260 /AB\x{1fb0}/IB,utf 261 262 /AB\x{1fb0}/IBi,utf 263 264 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 265 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 266 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 267 268 /[]/Bi,utf 269 270 /[^]/Bi,utf 271 272 /[[:blank:]]/B,ucp 273 274 /\x{212a}+/Ii,utf 275 KKkk\x{212a} 276 277 /s+/Ii,utf 278 SSss\x{17f} 279 280 # Non-UTF characters should give errors in both 16-bit and 32-bit modes. 281 282 /\x{110000}/utf 283 284 /\o{4200000}/utf 285 286 /\x{100}*A/IB,utf 287 A 288 289 /\x{100}*\d(?R)/IB,utf 290 291 /[Z\x{100}]/IB,utf 292 Z\x{100} 293 \x{100} 294 \x{100}Z 295 296 /[z-\x{100}]/IB,utf 297 298 /[z\Qa-d]\E]/IB,utf 299 \x{100} 300 301 302 /[ab\x{100}]abc(xyz(?1))/IB,utf 303 304 /\x{100}*\s/IB,utf 305 306 /\x{100}*\d/IB,utf 307 308 /\x{100}*\w/IB,utf 309 310 /\x{100}*\D/IB,utf 311 312 /\x{100}*\S/IB,utf 313 314 /\x{100}*\W/IB,utf 315 316 /[\x{105}-\x{109}]/IBi,utf 317 \x{104} 318 \x{105} 319 \x{109} 320 \= Expect no match 321 \x{100} 322 \x{10a} 323 324 /[z-\x{100}]/IBi,utf 325 Z 326 z 327 \x{39c} 328 \x{178} 329 | 330 \x{80} 331 \x{ff} 332 \x{100} 333 \x{101} 334 \= Expect no match 335 \x{102} 336 Y 337 y 338 339 /[z-\x{100}]/IBi,utf 340 341 /\x{3a3}B/IBi,utf 342 343 /./utf 344 \x{110000} 345 346 # End of testinput12 347