1 //--------------------------------------------------------------------------------- 2 // 3 // Generated Header File. Do not edit by hand. 4 // This file contains the state table for the ICU Regular Expression Pattern Parser 5 // It is generated by the Perl script "regexcst.pl" from 6 // the rule parser state definitions file "regexcst.txt". 7 // 8 // Copyright (C) 2002-2015 International Business Machines Corporation 9 // and others. All rights reserved. 10 // 11 //--------------------------------------------------------------------------------- 12 #ifndef RBBIRPT_H 13 #define RBBIRPT_H 14 15 U_NAMESPACE_BEGIN 16 // 17 // Character classes for regex pattern scanning. 18 // 19 static const uint8_t kRuleSet_ascii_letter = 128; 20 static const uint8_t kRuleSet_digit_char = 129; 21 static const uint8_t kRuleSet_rule_char = 130; 22 23 24 enum Regex_PatternParseAction { 25 doSetBackslash_V, 26 doSetBackslash_h, 27 doBeginNamedBackRef, 28 doSetMatchMode, 29 doEnterQuoteMode, 30 doOpenCaptureParen, 31 doContinueNamedCapture, 32 doSetBackslash_d, 33 doBeginMatchMode, 34 doBackslashX, 35 doSetPosixProp, 36 doIntervalError, 37 doSetLiteralEscaped, 38 doSetBackslash_s, 39 doNOP, 40 doBackslashv, 41 doOpenLookBehind, 42 doPatStart, 43 doPossessiveInterval, 44 doOpenAtomicParen, 45 doOpenLookAheadNeg, 46 doBackslashd, 47 doBackslashZ, 48 doIntervalUpperDigit, 49 doBadNamedCapture, 50 doSetDifference2, 51 doSetAddAmp, 52 doSetNamedChar, 53 doNamedChar, 54 doSetBackslash_H, 55 doBackslashb, 56 doBackslashz, 57 doSetBeginDifference1, 58 doOpenLookAhead, 59 doMatchModeParen, 60 doBackslashV, 61 doIntevalLowerDigit, 62 doCaret, 63 doSetEnd, 64 doSetNegate, 65 doBackslashS, 66 doOrOperator, 67 doBackslashB, 68 doBackslashw, 69 doBackslashR, 70 doRuleError, 71 doDotAny, 72 doMatchMode, 73 doSetBackslash_W, 74 doNGPlus, 75 doSetBackslash_D, 76 doPossessiveOpt, 77 doSetNamedRange, 78 doConditionalExpr, 79 doBackslashs, 80 doPossessiveStar, 81 doPlus, 82 doBadOpenParenType, 83 doCloseParen, 84 doNGInterval, 85 doSetProp, 86 doBackRef, 87 doSetBeginUnion, 88 doEscapeError, 89 doOpt, 90 doSetBeginIntersection1, 91 doPossessivePlus, 92 doBackslashD, 93 doOpenLookBehindNeg, 94 doSetBegin, 95 doSetIntersection2, 96 doCompleteNamedBackRef, 97 doSetRange, 98 doDollar, 99 doBackslashH, 100 doExit, 101 doNGOpt, 102 doOpenNonCaptureParen, 103 doBackslashA, 104 doSetBackslash_v, 105 doBackslashh, 106 doBadModeFlag, 107 doSetNoCloseError, 108 doIntervalSame, 109 doSetAddDash, 110 doBackslashW, 111 doPerlInline, 112 doSetOpError, 113 doSetLiteral, 114 doPatFinish, 115 doBeginNamedCapture, 116 doEscapedLiteralChar, 117 doLiteralChar, 118 doSuppressComments, 119 doMismatchedParenErr, 120 doNGStar, 121 doSetFinish, 122 doInterval, 123 doBackslashG, 124 doStar, 125 doSetBackslash_w, 126 doSetBackslash_S, 127 doProperty, 128 doContinueNamedBackRef, 129 doIntervalInit, 130 rbbiLastAction}; 131 132 //------------------------------------------------------------------------------- 133 // 134 // RegexTableEl represents the structure of a row in the transition table 135 // for the pattern parser state machine. 136 //------------------------------------------------------------------------------- 137 struct RegexTableEl { 138 Regex_PatternParseAction fAction; 139 uint8_t fCharClass; // 0-127: an individual ASCII character 140 // 128-255: character class index 141 uint8_t fNextState; // 0-250: normal next-state numbers 142 // 255: pop next-state from stack. 143 uint8_t fPushState; 144 UBool fNextChar; 145 }; 146 147 static const struct RegexTableEl gRuleParseStateTable[] = { 148 {doNOP, 0, 0, 0, TRUE} 149 , {doPatStart, 255, 2,0, FALSE} // 1 start 150 , {doLiteralChar, 254, 14,0, TRUE} // 2 term 151 , {doLiteralChar, 130, 14,0, TRUE} // 3 152 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4 153 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5 154 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6 155 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7 156 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8 157 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9 158 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10 159 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11 160 , {doPatFinish, 253, 2,0, FALSE} // 12 161 , {doRuleError, 255, 206,0, FALSE} // 13 162 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant 163 , {doNOP, 43 /* + */, 71,0, TRUE} // 15 164 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16 165 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17 166 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18 167 , {doNOP, 255, 20,0, FALSE} // 19 168 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont 169 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21 170 , {doNOP, 255, 2,0, FALSE} // 22 171 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant 172 , {doNOP, 255, 27,0, FALSE} // 24 173 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2 174 , {doNOP, 255, 29,0, FALSE} // 26 175 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren 176 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28 177 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended 178 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30 179 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31 180 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32 181 , {doNOP, 60 /* < */, 46,0, TRUE} // 33 182 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34 183 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35 184 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36 185 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37 186 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38 187 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39 188 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40 189 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41 190 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42 191 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43 192 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44 193 , {doBadOpenParenType, 255, 206,0, FALSE} // 45 194 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind 195 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47 196 , {doBeginNamedCapture, 128, 64,0, FALSE} // 48 197 , {doBadOpenParenType, 255, 206,0, FALSE} // 49 198 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment 199 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51 200 , {doNOP, 255, 50,0, TRUE} // 52 201 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag 202 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54 203 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55 204 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56 205 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57 206 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58 207 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59 208 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60 209 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61 210 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62 211 , {doBadModeFlag, 255, 206,0, FALSE} // 63 212 , {doContinueNamedCapture, 128, 64,0, TRUE} // 64 named-capture 213 , {doContinueNamedCapture, 129, 64,0, TRUE} // 65 214 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66 215 , {doBadNamedCapture, 255, 206,0, FALSE} // 67 216 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star 217 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69 218 , {doStar, 255, 20,0, FALSE} // 70 219 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus 220 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72 221 , {doPlus, 255, 20,0, FALSE} // 73 222 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt 223 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75 224 , {doOpt, 255, 20,0, FALSE} // 76 225 , {doNOP, 129, 79,0, FALSE} // 77 interval-open 226 , {doIntervalError, 255, 206,0, FALSE} // 78 227 , {doIntevalLowerDigit, 129, 79,0, TRUE} // 79 interval-lower 228 , {doNOP, 44 /* , */, 83,0, TRUE} // 80 229 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81 230 , {doIntervalError, 255, 206,0, FALSE} // 82 231 , {doIntervalUpperDigit, 129, 83,0, TRUE} // 83 interval-upper 232 , {doNOP, 125 /* } */, 86,0, TRUE} // 84 233 , {doIntervalError, 255, 206,0, FALSE} // 85 234 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type 235 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87 236 , {doInterval, 255, 20,0, FALSE} // 88 237 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash 238 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90 239 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91 240 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92 241 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93 242 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94 243 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95 244 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96 245 , {doNOP, 107 /* k */, 115,0, TRUE} // 97 246 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98 247 , {doProperty, 112 /* p */, 14,0, FALSE} // 99 248 , {doProperty, 80 /* P */, 14,0, FALSE} // 100 249 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101 250 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102 251 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103 252 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104 253 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105 254 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106 255 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107 256 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108 257 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109 258 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110 259 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111 260 , {doBackRef, 129, 14,0, TRUE} // 112 261 , {doEscapeError, 253, 206,0, FALSE} // 113 262 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114 263 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref 264 , {doBadNamedCapture, 255, 206,0, FALSE} // 116 265 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 117 named-backref-2 266 , {doBadNamedCapture, 255, 206,0, FALSE} // 118 267 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 119 named-backref-3 268 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 120 269 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121 270 , {doBadNamedCapture, 255, 206,0, FALSE} // 122 271 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open 272 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124 273 , {doNOP, 255, 126,0, FALSE} // 125 274 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2 275 , {doNOP, 255, 131,0, FALSE} // 127 276 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix 277 , {doNOP, 58 /* : */, 131,0, FALSE} // 129 278 , {doRuleError, 255, 206,0, FALSE} // 130 279 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start 280 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132 281 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133 282 , {doNOP, 45 /* - */, 137,0, TRUE} // 134 283 , {doNOP, 38 /* & */, 139,0, TRUE} // 135 284 , {doSetLiteral, 255, 141,0, TRUE} // 136 285 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash 286 , {doSetAddDash, 255, 141,0, FALSE} // 138 287 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp 288 , {doSetAddAmp, 255, 141,0, FALSE} // 140 289 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit 290 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142 291 , {doNOP, 45 /* - */, 178,0, TRUE} // 143 292 , {doNOP, 38 /* & */, 169,0, TRUE} // 144 293 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145 294 , {doSetNoCloseError, 253, 206,0, FALSE} // 146 295 , {doSetLiteral, 255, 141,0, TRUE} // 147 296 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set 297 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149 298 , {doNOP, 45 /* - */, 171,0, TRUE} // 150 299 , {doNOP, 38 /* & */, 166,0, TRUE} // 151 300 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152 301 , {doSetNoCloseError, 253, 206,0, FALSE} // 153 302 , {doSetLiteral, 255, 141,0, TRUE} // 154 303 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range 304 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156 305 , {doNOP, 45 /* - */, 174,0, TRUE} // 157 306 , {doNOP, 38 /* & */, 176,0, TRUE} // 158 307 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159 308 , {doSetNoCloseError, 253, 206,0, FALSE} // 160 309 , {doSetLiteral, 255, 141,0, TRUE} // 161 310 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op 311 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163 312 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164 313 , {doSetLiteral, 255, 141,0, TRUE} // 165 314 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp 315 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167 316 , {doSetAddAmp, 255, 141,0, FALSE} // 168 317 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp 318 , {doSetAddAmp, 255, 141,0, FALSE} // 170 319 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash 320 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172 321 , {doSetAddDash, 255, 141,0, FALSE} // 173 322 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash 323 , {doSetAddDash, 255, 141,0, FALSE} // 175 324 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp 325 , {doSetAddAmp, 255, 141,0, FALSE} // 177 326 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash 327 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179 328 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180 329 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181 330 , {doSetRange, 255, 155,0, TRUE} // 182 331 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape 332 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184 333 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185 334 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186 335 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187 336 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188 337 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189 338 , {doSetRange, 255, 155,0, TRUE} // 190 339 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape 340 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192 341 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193 342 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194 343 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195 344 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196 345 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197 346 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198 347 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199 348 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200 349 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201 350 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202 351 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203 352 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204 353 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish 354 , {doExit, 255, 206,0, TRUE} // 206 errorDeath 355 }; 356 static const char * const RegexStateNames[] = { 0, 357 "start", 358 "term", 359 0, 360 0, 361 0, 362 0, 363 0, 364 0, 365 0, 366 0, 367 0, 368 0, 369 0, 370 "expr-quant", 371 0, 372 0, 373 0, 374 0, 375 0, 376 "expr-cont", 377 0, 378 0, 379 "open-paren-quant", 380 0, 381 "open-paren-quant2", 382 0, 383 "open-paren", 384 0, 385 "open-paren-extended", 386 0, 387 0, 388 0, 389 0, 390 0, 391 0, 392 0, 393 0, 394 0, 395 0, 396 0, 397 0, 398 0, 399 0, 400 0, 401 0, 402 "open-paren-lookbehind", 403 0, 404 0, 405 0, 406 "paren-comment", 407 0, 408 0, 409 "paren-flag", 410 0, 411 0, 412 0, 413 0, 414 0, 415 0, 416 0, 417 0, 418 0, 419 0, 420 "named-capture", 421 0, 422 0, 423 0, 424 "quant-star", 425 0, 426 0, 427 "quant-plus", 428 0, 429 0, 430 "quant-opt", 431 0, 432 0, 433 "interval-open", 434 0, 435 "interval-lower", 436 0, 437 0, 438 0, 439 "interval-upper", 440 0, 441 0, 442 "interval-type", 443 0, 444 0, 445 "backslash", 446 0, 447 0, 448 0, 449 0, 450 0, 451 0, 452 0, 453 0, 454 0, 455 0, 456 0, 457 0, 458 0, 459 0, 460 0, 461 0, 462 0, 463 0, 464 0, 465 0, 466 0, 467 0, 468 0, 469 0, 470 0, 471 "named-backref", 472 0, 473 "named-backref-2", 474 0, 475 "named-backref-3", 476 0, 477 0, 478 0, 479 "set-open", 480 0, 481 0, 482 "set-open2", 483 0, 484 "set-posix", 485 0, 486 0, 487 "set-start", 488 0, 489 0, 490 0, 491 0, 492 0, 493 "set-start-dash", 494 0, 495 "set-start-amp", 496 0, 497 "set-after-lit", 498 0, 499 0, 500 0, 501 0, 502 0, 503 0, 504 "set-after-set", 505 0, 506 0, 507 0, 508 0, 509 0, 510 0, 511 "set-after-range", 512 0, 513 0, 514 0, 515 0, 516 0, 517 0, 518 "set-after-op", 519 0, 520 0, 521 0, 522 "set-set-amp", 523 0, 524 0, 525 "set-lit-amp", 526 0, 527 "set-set-dash", 528 0, 529 0, 530 "set-range-dash", 531 0, 532 "set-range-amp", 533 0, 534 "set-lit-dash", 535 0, 536 0, 537 0, 538 0, 539 "set-lit-dash-escape", 540 0, 541 0, 542 0, 543 0, 544 0, 545 0, 546 0, 547 "set-escape", 548 0, 549 0, 550 0, 551 0, 552 0, 553 0, 554 0, 555 0, 556 0, 557 0, 558 0, 559 0, 560 0, 561 "set-finish", 562 "errorDeath", 563 0}; 564 565 U_NAMESPACE_END 566 #endif 567