1 //--------------------------------------------------------------------------------- 2 // 3 // Generated Header File. Do not edit by hand. 4 // This file contains the state table for the ICU Regular Expression Pattern Parser 5 // It is generated by the Perl script "regexcst.pl" from 6 // the rule parser state definitions file "regexcst.txt". 7 // 8 // Copyright (C) 2002-2007 International Business Machines Corporation 9 // and others. All rights reserved. 10 // 11 //--------------------------------------------------------------------------------- 12 #ifndef RBBIRPT_H 13 #define RBBIRPT_H 14 15 U_NAMESPACE_BEGIN 16 // 17 // Character classes for regex pattern scanning. 18 // 19 static const uint8_t kRuleSet_digit_char = 128; 20 static const uint8_t kRuleSet_rule_char = 129; 21 22 23 enum Regex_PatternParseAction { 24 doLiteralChar, 25 doSetEnd, 26 doBackslashA, 27 doSetBeginUnion, 28 doNOP, 29 doSetBackslash_w, 30 doSetRange, 31 doBackslashG, 32 doPerlInline, 33 doSetAddDash, 34 doIntevalLowerDigit, 35 doProperty, 36 doBackslashX, 37 doOpenAtomicParen, 38 doSetLiteralEscaped, 39 doPatFinish, 40 doSetBackslash_D, 41 doSetDifference2, 42 doNamedChar, 43 doNGPlus, 44 doOpenLookBehindNeg, 45 doIntervalError, 46 doIntervalSame, 47 doBackRef, 48 doPlus, 49 doOpenCaptureParen, 50 doMismatchedParenErr, 51 doBeginMatchMode, 52 doEscapeError, 53 doOpenNonCaptureParen, 54 doDollar, 55 doSetProp, 56 doIntervalUpperDigit, 57 doSetBegin, 58 doBackslashs, 59 doOpenLookBehind, 60 doSetMatchMode, 61 doOrOperator, 62 doCaret, 63 doMatchModeParen, 64 doStar, 65 doOpt, 66 doMatchMode, 67 doSuppressComments, 68 doPossessiveInterval, 69 doOpenLookAheadNeg, 70 doBackslashW, 71 doCloseParen, 72 doSetOpError, 73 doIntervalInit, 74 doSetFinish, 75 doSetIntersection2, 76 doNGStar, 77 doEnterQuoteMode, 78 doSetAddAmp, 79 doBackslashB, 80 doBackslashw, 81 doPossessiveOpt, 82 doSetNegate, 83 doRuleError, 84 doBackslashb, 85 doConditionalExpr, 86 doPossessivePlus, 87 doBadOpenParenType, 88 doNGInterval, 89 doSetLiteral, 90 doSetNamedChar, 91 doBackslashd, 92 doSetBeginDifference1, 93 doBackslashD, 94 doExit, 95 doSetBackslash_S, 96 doInterval, 97 doSetNoCloseError, 98 doNGOpt, 99 doSetPosixProp, 100 doBackslashS, 101 doBackslashZ, 102 doSetBeginIntersection1, 103 doSetBackslash_W, 104 doSetBackslash_d, 105 doOpenLookAhead, 106 doBadModeFlag, 107 doPatStart, 108 doSetNamedRange, 109 doPossessiveStar, 110 doEscapedLiteralChar, 111 doSetBackslash_s, 112 doBackslashz, 113 doDotAny, 114 rbbiLastAction}; 115 116 //------------------------------------------------------------------------------- 117 // 118 // RegexTableEl represents the structure of a row in the transition table 119 // for the pattern parser state machine. 120 //------------------------------------------------------------------------------- 121 struct RegexTableEl { 122 Regex_PatternParseAction fAction; 123 uint8_t fCharClass; // 0-127: an individual ASCII character 124 // 128-255: character class index 125 uint8_t fNextState; // 0-250: normal next-state numbers 126 // 255: pop next-state from stack. 127 uint8_t fPushState; 128 UBool fNextChar; 129 }; 130 131 static const struct RegexTableEl gRuleParseStateTable[] = { 132 {doNOP, 0, 0, 0, TRUE} 133 , {doPatStart, 255, 2,0, FALSE} // 1 start 134 , {doLiteralChar, 254, 14,0, TRUE} // 2 term 135 , {doLiteralChar, 129, 14,0, TRUE} // 3 136 , {doSetBegin, 91 /* [ */, 104, 182, TRUE} // 4 137 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5 138 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6 139 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7 140 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8 141 , {doNOP, 92 /* \ */, 84,0, TRUE} // 9 142 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10 143 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11 144 , {doPatFinish, 253, 2,0, FALSE} // 12 145 , {doRuleError, 255, 183,0, FALSE} // 13 146 , {doNOP, 42 /* * */, 63,0, TRUE} // 14 expr-quant 147 , {doNOP, 43 /* + */, 66,0, TRUE} // 15 148 , {doNOP, 63 /* ? */, 69,0, TRUE} // 16 149 , {doIntervalInit, 123 /* { */, 72,0, TRUE} // 17 150 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18 151 , {doNOP, 255, 20,0, FALSE} // 19 152 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont 153 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21 154 , {doNOP, 255, 2,0, FALSE} // 22 155 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant 156 , {doNOP, 255, 27,0, FALSE} // 24 157 , {doNOP, 35 /* # */, 49, 14, TRUE} // 25 open-paren-quant2 158 , {doNOP, 255, 29,0, FALSE} // 26 159 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren 160 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28 161 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended 162 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30 163 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31 164 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32 165 , {doNOP, 60 /* < */, 46,0, TRUE} // 33 166 , {doNOP, 35 /* # */, 49, 2, TRUE} // 34 167 , {doBeginMatchMode, 105 /* i */, 52,0, FALSE} // 35 168 , {doBeginMatchMode, 100 /* d */, 52,0, FALSE} // 36 169 , {doBeginMatchMode, 109 /* m */, 52,0, FALSE} // 37 170 , {doBeginMatchMode, 115 /* s */, 52,0, FALSE} // 38 171 , {doBeginMatchMode, 117 /* u */, 52,0, FALSE} // 39 172 , {doBeginMatchMode, 119 /* w */, 52,0, FALSE} // 40 173 , {doBeginMatchMode, 120 /* x */, 52,0, FALSE} // 41 174 , {doBeginMatchMode, 45 /* - */, 52,0, FALSE} // 42 175 , {doConditionalExpr, 40 /* ( */, 183,0, TRUE} // 43 176 , {doPerlInline, 123 /* { */, 183,0, TRUE} // 44 177 , {doBadOpenParenType, 255, 183,0, FALSE} // 45 178 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind 179 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47 180 , {doBadOpenParenType, 255, 183,0, FALSE} // 48 181 , {doNOP, 41 /* ) */, 255,0, TRUE} // 49 paren-comment 182 , {doMismatchedParenErr, 253, 183,0, FALSE} // 50 183 , {doNOP, 255, 49,0, TRUE} // 51 184 , {doMatchMode, 105 /* i */, 52,0, TRUE} // 52 paren-flag 185 , {doMatchMode, 100 /* d */, 52,0, TRUE} // 53 186 , {doMatchMode, 109 /* m */, 52,0, TRUE} // 54 187 , {doMatchMode, 115 /* s */, 52,0, TRUE} // 55 188 , {doMatchMode, 117 /* u */, 52,0, TRUE} // 56 189 , {doMatchMode, 119 /* w */, 52,0, TRUE} // 57 190 , {doMatchMode, 120 /* x */, 52,0, TRUE} // 58 191 , {doMatchMode, 45 /* - */, 52,0, TRUE} // 59 192 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 60 193 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 61 194 , {doBadModeFlag, 255, 183,0, FALSE} // 62 195 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 63 quant-star 196 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 64 197 , {doStar, 255, 20,0, FALSE} // 65 198 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 66 quant-plus 199 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 67 200 , {doPlus, 255, 20,0, FALSE} // 68 201 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 69 quant-opt 202 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 70 203 , {doOpt, 255, 20,0, FALSE} // 71 204 , {doNOP, 128, 74,0, FALSE} // 72 interval-open 205 , {doIntervalError, 255, 183,0, FALSE} // 73 206 , {doIntevalLowerDigit, 128, 74,0, TRUE} // 74 interval-lower 207 , {doNOP, 44 /* , */, 78,0, TRUE} // 75 208 , {doIntervalSame, 125 /* } */, 81,0, TRUE} // 76 209 , {doIntervalError, 255, 183,0, FALSE} // 77 210 , {doIntervalUpperDigit, 128, 78,0, TRUE} // 78 interval-upper 211 , {doNOP, 125 /* } */, 81,0, TRUE} // 79 212 , {doIntervalError, 255, 183,0, FALSE} // 80 213 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 81 interval-type 214 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 82 215 , {doInterval, 255, 20,0, FALSE} // 83 216 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 84 backslash 217 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 85 218 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 86 219 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 87 220 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 88 221 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 89 222 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 90 223 , {doProperty, 112 /* p */, 14,0, FALSE} // 91 224 , {doProperty, 80 /* P */, 14,0, FALSE} // 92 225 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 93 226 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 94 227 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 95 228 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 96 229 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 97 230 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 98 231 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 99 232 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 100 233 , {doBackRef, 128, 14,0, TRUE} // 101 234 , {doEscapeError, 253, 183,0, FALSE} // 102 235 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 103 236 , {doSetNegate, 94 /* ^ */, 107,0, TRUE} // 104 set-open 237 , {doSetPosixProp, 58 /* : */, 109,0, FALSE} // 105 238 , {doNOP, 255, 107,0, FALSE} // 106 239 , {doSetLiteral, 93 /* ] */, 122,0, TRUE} // 107 set-open2 240 , {doNOP, 255, 112,0, FALSE} // 108 241 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 109 set-posix 242 , {doNOP, 58 /* : */, 112,0, FALSE} // 110 243 , {doRuleError, 255, 183,0, FALSE} // 111 244 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 112 set-start 245 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 113 246 , {doNOP, 92 /* \ */, 172,0, TRUE} // 114 247 , {doNOP, 45 /* - */, 118,0, TRUE} // 115 248 , {doNOP, 38 /* & */, 120,0, TRUE} // 116 249 , {doSetLiteral, 255, 122,0, TRUE} // 117 250 , {doRuleError, 45 /* - */, 183,0, FALSE} // 118 set-start-dash 251 , {doSetAddDash, 255, 122,0, FALSE} // 119 252 , {doRuleError, 38 /* & */, 183,0, FALSE} // 120 set-start-amp 253 , {doSetAddAmp, 255, 122,0, FALSE} // 121 254 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 122 set-after-lit 255 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 123 256 , {doNOP, 45 /* - */, 159,0, TRUE} // 124 257 , {doNOP, 38 /* & */, 150,0, TRUE} // 125 258 , {doNOP, 92 /* \ */, 172,0, TRUE} // 126 259 , {doSetNoCloseError, 253, 183,0, FALSE} // 127 260 , {doSetLiteral, 255, 122,0, TRUE} // 128 261 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 129 set-after-set 262 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 130 263 , {doNOP, 45 /* - */, 152,0, TRUE} // 131 264 , {doNOP, 38 /* & */, 147,0, TRUE} // 132 265 , {doNOP, 92 /* \ */, 172,0, TRUE} // 133 266 , {doSetNoCloseError, 253, 183,0, FALSE} // 134 267 , {doSetLiteral, 255, 122,0, TRUE} // 135 268 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-range 269 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 137 270 , {doNOP, 45 /* - */, 155,0, TRUE} // 138 271 , {doNOP, 38 /* & */, 157,0, TRUE} // 139 272 , {doNOP, 92 /* \ */, 172,0, TRUE} // 140 273 , {doSetNoCloseError, 253, 183,0, FALSE} // 141 274 , {doSetLiteral, 255, 122,0, TRUE} // 142 275 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 143 set-after-op 276 , {doSetOpError, 93 /* ] */, 183,0, FALSE} // 144 277 , {doNOP, 92 /* \ */, 172,0, TRUE} // 145 278 , {doSetLiteral, 255, 122,0, TRUE} // 146 279 , {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE} // 147 set-set-amp 280 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 148 281 , {doSetAddAmp, 255, 122,0, FALSE} // 149 282 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 150 set-lit-amp 283 , {doSetAddAmp, 255, 122,0, FALSE} // 151 284 , {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE} // 152 set-set-dash 285 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 153 286 , {doSetAddDash, 255, 122,0, FALSE} // 154 287 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 155 set-range-dash 288 , {doSetAddDash, 255, 122,0, FALSE} // 156 289 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 157 set-range-amp 290 , {doSetAddAmp, 255, 122,0, FALSE} // 158 291 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 159 set-lit-dash 292 , {doSetAddDash, 91 /* [ */, 122,0, FALSE} // 160 293 , {doSetAddDash, 93 /* ] */, 122,0, FALSE} // 161 294 , {doNOP, 92 /* \ */, 164,0, TRUE} // 162 295 , {doSetRange, 255, 136,0, TRUE} // 163 296 , {doSetOpError, 115 /* s */, 183,0, FALSE} // 164 set-lit-dash-escape 297 , {doSetOpError, 83 /* S */, 183,0, FALSE} // 165 298 , {doSetOpError, 119 /* w */, 183,0, FALSE} // 166 299 , {doSetOpError, 87 /* W */, 183,0, FALSE} // 167 300 , {doSetOpError, 100 /* d */, 183,0, FALSE} // 168 301 , {doSetOpError, 68 /* D */, 183,0, FALSE} // 169 302 , {doSetNamedRange, 78 /* N */, 136,0, FALSE} // 170 303 , {doSetRange, 255, 136,0, TRUE} // 171 304 , {doSetProp, 112 /* p */, 129,0, FALSE} // 172 set-escape 305 , {doSetProp, 80 /* P */, 129,0, FALSE} // 173 306 , {doSetNamedChar, 78 /* N */, 122,0, FALSE} // 174 307 , {doSetBackslash_s, 115 /* s */, 136,0, TRUE} // 175 308 , {doSetBackslash_S, 83 /* S */, 136,0, TRUE} // 176 309 , {doSetBackslash_w, 119 /* w */, 136,0, TRUE} // 177 310 , {doSetBackslash_W, 87 /* W */, 136,0, TRUE} // 178 311 , {doSetBackslash_d, 100 /* d */, 136,0, TRUE} // 179 312 , {doSetBackslash_D, 68 /* D */, 136,0, TRUE} // 180 313 , {doSetLiteralEscaped, 255, 122,0, TRUE} // 181 314 , {doSetFinish, 255, 14,0, FALSE} // 182 set-finish 315 , {doExit, 255, 183,0, TRUE} // 183 errorDeath 316 }; 317 static const char * const RegexStateNames[] = { 0, 318 "start", 319 "term", 320 0, 321 0, 322 0, 323 0, 324 0, 325 0, 326 0, 327 0, 328 0, 329 0, 330 0, 331 "expr-quant", 332 0, 333 0, 334 0, 335 0, 336 0, 337 "expr-cont", 338 0, 339 0, 340 "open-paren-quant", 341 0, 342 "open-paren-quant2", 343 0, 344 "open-paren", 345 0, 346 "open-paren-extended", 347 0, 348 0, 349 0, 350 0, 351 0, 352 0, 353 0, 354 0, 355 0, 356 0, 357 0, 358 0, 359 0, 360 0, 361 0, 362 0, 363 "open-paren-lookbehind", 364 0, 365 0, 366 "paren-comment", 367 0, 368 0, 369 "paren-flag", 370 0, 371 0, 372 0, 373 0, 374 0, 375 0, 376 0, 377 0, 378 0, 379 0, 380 "quant-star", 381 0, 382 0, 383 "quant-plus", 384 0, 385 0, 386 "quant-opt", 387 0, 388 0, 389 "interval-open", 390 0, 391 "interval-lower", 392 0, 393 0, 394 0, 395 "interval-upper", 396 0, 397 0, 398 "interval-type", 399 0, 400 0, 401 "backslash", 402 0, 403 0, 404 0, 405 0, 406 0, 407 0, 408 0, 409 0, 410 0, 411 0, 412 0, 413 0, 414 0, 415 0, 416 0, 417 0, 418 0, 419 0, 420 0, 421 "set-open", 422 0, 423 0, 424 "set-open2", 425 0, 426 "set-posix", 427 0, 428 0, 429 "set-start", 430 0, 431 0, 432 0, 433 0, 434 0, 435 "set-start-dash", 436 0, 437 "set-start-amp", 438 0, 439 "set-after-lit", 440 0, 441 0, 442 0, 443 0, 444 0, 445 0, 446 "set-after-set", 447 0, 448 0, 449 0, 450 0, 451 0, 452 0, 453 "set-after-range", 454 0, 455 0, 456 0, 457 0, 458 0, 459 0, 460 "set-after-op", 461 0, 462 0, 463 0, 464 "set-set-amp", 465 0, 466 0, 467 "set-lit-amp", 468 0, 469 "set-set-dash", 470 0, 471 0, 472 "set-range-dash", 473 0, 474 "set-range-amp", 475 0, 476 "set-lit-dash", 477 0, 478 0, 479 0, 480 0, 481 "set-lit-dash-escape", 482 0, 483 0, 484 0, 485 0, 486 0, 487 0, 488 0, 489 "set-escape", 490 0, 491 0, 492 0, 493 0, 494 0, 495 0, 496 0, 497 0, 498 0, 499 "set-finish", 500 "errorDeath", 501 0}; 502 503 U_NAMESPACE_END 504 #endif 505