Home | History | Annotate | Download | only in i18n
      1 //---------------------------------------------------------------------------------
      2 //
      3 // Generated Header File.  Do not edit by hand.
      4 //    This file contains the state table for the ICU Regular Expression Pattern Parser
      5 //    It is generated by the Perl script "regexcst.pl" from
      6 //    the rule parser state definitions file "regexcst.txt".
      7 //
      8 //   Copyright (C) 2002-2007 International Business Machines Corporation
      9 //   and others. All rights reserved.
     10 //
     11 //---------------------------------------------------------------------------------
     12 #ifndef RBBIRPT_H
     13 #define RBBIRPT_H
     14 
     15 U_NAMESPACE_BEGIN
     16 //
     17 // Character classes for regex pattern scanning.
     18 //
     19     static const uint8_t kRuleSet_digit_char = 128;
     20     static const uint8_t kRuleSet_rule_char = 129;
     21 
     22 
     23 enum Regex_PatternParseAction {
     24     doLiteralChar,
     25     doSetEnd,
     26     doBackslashA,
     27     doSetBeginUnion,
     28     doNOP,
     29     doSetBackslash_w,
     30     doSetRange,
     31     doBackslashG,
     32     doPerlInline,
     33     doSetAddDash,
     34     doIntevalLowerDigit,
     35     doProperty,
     36     doBackslashX,
     37     doOpenAtomicParen,
     38     doSetLiteralEscaped,
     39     doPatFinish,
     40     doSetBackslash_D,
     41     doSetDifference2,
     42     doNamedChar,
     43     doNGPlus,
     44     doOpenLookBehindNeg,
     45     doIntervalError,
     46     doIntervalSame,
     47     doBackRef,
     48     doPlus,
     49     doOpenCaptureParen,
     50     doMismatchedParenErr,
     51     doBeginMatchMode,
     52     doEscapeError,
     53     doOpenNonCaptureParen,
     54     doDollar,
     55     doSetProp,
     56     doIntervalUpperDigit,
     57     doSetBegin,
     58     doBackslashs,
     59     doOpenLookBehind,
     60     doSetMatchMode,
     61     doOrOperator,
     62     doCaret,
     63     doMatchModeParen,
     64     doStar,
     65     doOpt,
     66     doMatchMode,
     67     doSuppressComments,
     68     doPossessiveInterval,
     69     doOpenLookAheadNeg,
     70     doBackslashW,
     71     doCloseParen,
     72     doSetOpError,
     73     doIntervalInit,
     74     doSetFinish,
     75     doSetIntersection2,
     76     doNGStar,
     77     doEnterQuoteMode,
     78     doSetAddAmp,
     79     doBackslashB,
     80     doBackslashw,
     81     doPossessiveOpt,
     82     doSetNegate,
     83     doRuleError,
     84     doBackslashb,
     85     doConditionalExpr,
     86     doPossessivePlus,
     87     doBadOpenParenType,
     88     doNGInterval,
     89     doSetLiteral,
     90     doSetNamedChar,
     91     doBackslashd,
     92     doSetBeginDifference1,
     93     doBackslashD,
     94     doExit,
     95     doSetBackslash_S,
     96     doInterval,
     97     doSetNoCloseError,
     98     doNGOpt,
     99     doSetPosixProp,
    100     doBackslashS,
    101     doBackslashZ,
    102     doSetBeginIntersection1,
    103     doSetBackslash_W,
    104     doSetBackslash_d,
    105     doOpenLookAhead,
    106     doBadModeFlag,
    107     doPatStart,
    108     doSetNamedRange,
    109     doPossessiveStar,
    110     doEscapedLiteralChar,
    111     doSetBackslash_s,
    112     doBackslashz,
    113     doDotAny,
    114     rbbiLastAction};
    115 
    116 //-------------------------------------------------------------------------------
    117 //
    118 //  RegexTableEl       represents the structure of a row in the transition table
    119 //                     for the pattern parser state machine.
    120 //-------------------------------------------------------------------------------
    121 struct RegexTableEl {
    122     Regex_PatternParseAction      fAction;
    123     uint8_t                       fCharClass;       // 0-127:    an individual ASCII character
    124                                                     // 128-255:  character class index
    125     uint8_t                       fNextState;       // 0-250:    normal next-state numbers
    126                                                     // 255:      pop next-state from stack.
    127     uint8_t                       fPushState;
    128     UBool                         fNextChar;
    129 };
    130 
    131 static const struct RegexTableEl gRuleParseStateTable[] = {
    132     {doNOP, 0, 0, 0, TRUE}
    133     , {doPatStart, 255, 2,0,  FALSE}     //  1      start
    134     , {doLiteralChar, 254, 14,0,  TRUE}     //  2      term
    135     , {doLiteralChar, 129, 14,0,  TRUE}     //  3
    136     , {doSetBegin, 91 /* [ */, 104, 182, TRUE}     //  4
    137     , {doNOP, 40 /* ( */, 27,0,  TRUE}     //  5
    138     , {doDotAny, 46 /* . */, 14,0,  TRUE}     //  6
    139     , {doCaret, 94 /* ^ */, 14,0,  TRUE}     //  7
    140     , {doDollar, 36 /* $ */, 14,0,  TRUE}     //  8
    141     , {doNOP, 92 /* \ */, 84,0,  TRUE}     //  9
    142     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  10
    143     , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  11
    144     , {doPatFinish, 253, 2,0,  FALSE}     //  12
    145     , {doRuleError, 255, 183,0,  FALSE}     //  13
    146     , {doNOP, 42 /* * */, 63,0,  TRUE}     //  14      expr-quant
    147     , {doNOP, 43 /* + */, 66,0,  TRUE}     //  15
    148     , {doNOP, 63 /* ? */, 69,0,  TRUE}     //  16
    149     , {doIntervalInit, 123 /* { */, 72,0,  TRUE}     //  17
    150     , {doNOP, 40 /* ( */, 23,0,  TRUE}     //  18
    151     , {doNOP, 255, 20,0,  FALSE}     //  19
    152     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  20      expr-cont
    153     , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  21
    154     , {doNOP, 255, 2,0,  FALSE}     //  22
    155     , {doSuppressComments, 63 /* ? */, 25,0,  TRUE}     //  23      open-paren-quant
    156     , {doNOP, 255, 27,0,  FALSE}     //  24
    157     , {doNOP, 35 /* # */, 49, 14, TRUE}     //  25      open-paren-quant2
    158     , {doNOP, 255, 29,0,  FALSE}     //  26
    159     , {doSuppressComments, 63 /* ? */, 29,0,  TRUE}     //  27      open-paren
    160     , {doOpenCaptureParen, 255, 2, 14, FALSE}     //  28
    161     , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE}     //  29      open-paren-extended
    162     , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE}     //  30
    163     , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE}     //  31
    164     , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE}     //  32
    165     , {doNOP, 60 /* < */, 46,0,  TRUE}     //  33
    166     , {doNOP, 35 /* # */, 49, 2, TRUE}     //  34
    167     , {doBeginMatchMode, 105 /* i */, 52,0,  FALSE}     //  35
    168     , {doBeginMatchMode, 100 /* d */, 52,0,  FALSE}     //  36
    169     , {doBeginMatchMode, 109 /* m */, 52,0,  FALSE}     //  37
    170     , {doBeginMatchMode, 115 /* s */, 52,0,  FALSE}     //  38
    171     , {doBeginMatchMode, 117 /* u */, 52,0,  FALSE}     //  39
    172     , {doBeginMatchMode, 119 /* w */, 52,0,  FALSE}     //  40
    173     , {doBeginMatchMode, 120 /* x */, 52,0,  FALSE}     //  41
    174     , {doBeginMatchMode, 45 /* - */, 52,0,  FALSE}     //  42
    175     , {doConditionalExpr, 40 /* ( */, 183,0,  TRUE}     //  43
    176     , {doPerlInline, 123 /* { */, 183,0,  TRUE}     //  44
    177     , {doBadOpenParenType, 255, 183,0,  FALSE}     //  45
    178     , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  46      open-paren-lookbehind
    179     , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  47
    180     , {doBadOpenParenType, 255, 183,0,  FALSE}     //  48
    181     , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  49      paren-comment
    182     , {doMismatchedParenErr, 253, 183,0,  FALSE}     //  50
    183     , {doNOP, 255, 49,0,  TRUE}     //  51
    184     , {doMatchMode, 105 /* i */, 52,0,  TRUE}     //  52      paren-flag
    185     , {doMatchMode, 100 /* d */, 52,0,  TRUE}     //  53
    186     , {doMatchMode, 109 /* m */, 52,0,  TRUE}     //  54
    187     , {doMatchMode, 115 /* s */, 52,0,  TRUE}     //  55
    188     , {doMatchMode, 117 /* u */, 52,0,  TRUE}     //  56
    189     , {doMatchMode, 119 /* w */, 52,0,  TRUE}     //  57
    190     , {doMatchMode, 120 /* x */, 52,0,  TRUE}     //  58
    191     , {doMatchMode, 45 /* - */, 52,0,  TRUE}     //  59
    192     , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  60
    193     , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  61
    194     , {doBadModeFlag, 255, 183,0,  FALSE}     //  62
    195     , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  63      quant-star
    196     , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  64
    197     , {doStar, 255, 20,0,  FALSE}     //  65
    198     , {doNGPlus, 63 /* ? */, 20,0,  TRUE}     //  66      quant-plus
    199     , {doPossessivePlus, 43 /* + */, 20,0,  TRUE}     //  67
    200     , {doPlus, 255, 20,0,  FALSE}     //  68
    201     , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  69      quant-opt
    202     , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  70
    203     , {doOpt, 255, 20,0,  FALSE}     //  71
    204     , {doNOP, 128, 74,0,  FALSE}     //  72      interval-open
    205     , {doIntervalError, 255, 183,0,  FALSE}     //  73
    206     , {doIntevalLowerDigit, 128, 74,0,  TRUE}     //  74      interval-lower
    207     , {doNOP, 44 /* , */, 78,0,  TRUE}     //  75
    208     , {doIntervalSame, 125 /* } */, 81,0,  TRUE}     //  76
    209     , {doIntervalError, 255, 183,0,  FALSE}     //  77
    210     , {doIntervalUpperDigit, 128, 78,0,  TRUE}     //  78      interval-upper
    211     , {doNOP, 125 /* } */, 81,0,  TRUE}     //  79
    212     , {doIntervalError, 255, 183,0,  FALSE}     //  80
    213     , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  81      interval-type
    214     , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  82
    215     , {doInterval, 255, 20,0,  FALSE}     //  83
    216     , {doBackslashA, 65 /* A */, 2,0,  TRUE}     //  84      backslash
    217     , {doBackslashB, 66 /* B */, 2,0,  TRUE}     //  85
    218     , {doBackslashb, 98 /* b */, 2,0,  TRUE}     //  86
    219     , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  87
    220     , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  88
    221     , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  89
    222     , {doNamedChar, 78 /* N */, 14,0,  FALSE}     //  90
    223     , {doProperty, 112 /* p */, 14,0,  FALSE}     //  91
    224     , {doProperty, 80 /* P */, 14,0,  FALSE}     //  92
    225     , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  93
    226     , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  94
    227     , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  95
    228     , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  96
    229     , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  97
    230     , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  98
    231     , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  99
    232     , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  100
    233     , {doBackRef, 128, 14,0,  TRUE}     //  101
    234     , {doEscapeError, 253, 183,0,  FALSE}     //  102
    235     , {doEscapedLiteralChar, 255, 14,0,  TRUE}     //  103
    236     , {doSetNegate, 94 /* ^ */, 107,0,  TRUE}     //  104      set-open
    237     , {doSetPosixProp, 58 /* : */, 109,0,  FALSE}     //  105
    238     , {doNOP, 255, 107,0,  FALSE}     //  106
    239     , {doSetLiteral, 93 /* ] */, 122,0,  TRUE}     //  107      set-open2
    240     , {doNOP, 255, 112,0,  FALSE}     //  108
    241     , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  109      set-posix
    242     , {doNOP, 58 /* : */, 112,0,  FALSE}     //  110
    243     , {doRuleError, 255, 183,0,  FALSE}     //  111
    244     , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  112      set-start
    245     , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE}     //  113
    246     , {doNOP, 92 /* \ */, 172,0,  TRUE}     //  114
    247     , {doNOP, 45 /* - */, 118,0,  TRUE}     //  115
    248     , {doNOP, 38 /* & */, 120,0,  TRUE}     //  116
    249     , {doSetLiteral, 255, 122,0,  TRUE}     //  117
    250     , {doRuleError, 45 /* - */, 183,0,  FALSE}     //  118      set-start-dash
    251     , {doSetAddDash, 255, 122,0,  FALSE}     //  119
    252     , {doRuleError, 38 /* & */, 183,0,  FALSE}     //  120      set-start-amp
    253     , {doSetAddAmp, 255, 122,0,  FALSE}     //  121
    254     , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  122      set-after-lit
    255     , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE}     //  123
    256     , {doNOP, 45 /* - */, 159,0,  TRUE}     //  124
    257     , {doNOP, 38 /* & */, 150,0,  TRUE}     //  125
    258     , {doNOP, 92 /* \ */, 172,0,  TRUE}     //  126
    259     , {doSetNoCloseError, 253, 183,0,  FALSE}     //  127
    260     , {doSetLiteral, 255, 122,0,  TRUE}     //  128
    261     , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  129      set-after-set
    262     , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE}     //  130
    263     , {doNOP, 45 /* - */, 152,0,  TRUE}     //  131
    264     , {doNOP, 38 /* & */, 147,0,  TRUE}     //  132
    265     , {doNOP, 92 /* \ */, 172,0,  TRUE}     //  133
    266     , {doSetNoCloseError, 253, 183,0,  FALSE}     //  134
    267     , {doSetLiteral, 255, 122,0,  TRUE}     //  135
    268     , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  136      set-after-range
    269     , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE}     //  137
    270     , {doNOP, 45 /* - */, 155,0,  TRUE}     //  138
    271     , {doNOP, 38 /* & */, 157,0,  TRUE}     //  139
    272     , {doNOP, 92 /* \ */, 172,0,  TRUE}     //  140
    273     , {doSetNoCloseError, 253, 183,0,  FALSE}     //  141
    274     , {doSetLiteral, 255, 122,0,  TRUE}     //  142
    275     , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE}     //  143      set-after-op
    276     , {doSetOpError, 93 /* ] */, 183,0,  FALSE}     //  144
    277     , {doNOP, 92 /* \ */, 172,0,  TRUE}     //  145
    278     , {doSetLiteral, 255, 122,0,  TRUE}     //  146
    279     , {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE}     //  147      set-set-amp
    280     , {doSetIntersection2, 38 /* & */, 143,0,  TRUE}     //  148
    281     , {doSetAddAmp, 255, 122,0,  FALSE}     //  149
    282     , {doSetIntersection2, 38 /* & */, 143,0,  TRUE}     //  150      set-lit-amp
    283     , {doSetAddAmp, 255, 122,0,  FALSE}     //  151
    284     , {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE}     //  152      set-set-dash
    285     , {doSetDifference2, 45 /* - */, 143,0,  TRUE}     //  153
    286     , {doSetAddDash, 255, 122,0,  FALSE}     //  154
    287     , {doSetDifference2, 45 /* - */, 143,0,  TRUE}     //  155      set-range-dash
    288     , {doSetAddDash, 255, 122,0,  FALSE}     //  156
    289     , {doSetIntersection2, 38 /* & */, 143,0,  TRUE}     //  157      set-range-amp
    290     , {doSetAddAmp, 255, 122,0,  FALSE}     //  158
    291     , {doSetDifference2, 45 /* - */, 143,0,  TRUE}     //  159      set-lit-dash
    292     , {doSetAddDash, 91 /* [ */, 122,0,  FALSE}     //  160
    293     , {doSetAddDash, 93 /* ] */, 122,0,  FALSE}     //  161
    294     , {doNOP, 92 /* \ */, 164,0,  TRUE}     //  162
    295     , {doSetRange, 255, 136,0,  TRUE}     //  163
    296     , {doSetOpError, 115 /* s */, 183,0,  FALSE}     //  164      set-lit-dash-escape
    297     , {doSetOpError, 83 /* S */, 183,0,  FALSE}     //  165
    298     , {doSetOpError, 119 /* w */, 183,0,  FALSE}     //  166
    299     , {doSetOpError, 87 /* W */, 183,0,  FALSE}     //  167
    300     , {doSetOpError, 100 /* d */, 183,0,  FALSE}     //  168
    301     , {doSetOpError, 68 /* D */, 183,0,  FALSE}     //  169
    302     , {doSetNamedRange, 78 /* N */, 136,0,  FALSE}     //  170
    303     , {doSetRange, 255, 136,0,  TRUE}     //  171
    304     , {doSetProp, 112 /* p */, 129,0,  FALSE}     //  172      set-escape
    305     , {doSetProp, 80 /* P */, 129,0,  FALSE}     //  173
    306     , {doSetNamedChar, 78 /* N */, 122,0,  FALSE}     //  174
    307     , {doSetBackslash_s, 115 /* s */, 136,0,  TRUE}     //  175
    308     , {doSetBackslash_S, 83 /* S */, 136,0,  TRUE}     //  176
    309     , {doSetBackslash_w, 119 /* w */, 136,0,  TRUE}     //  177
    310     , {doSetBackslash_W, 87 /* W */, 136,0,  TRUE}     //  178
    311     , {doSetBackslash_d, 100 /* d */, 136,0,  TRUE}     //  179
    312     , {doSetBackslash_D, 68 /* D */, 136,0,  TRUE}     //  180
    313     , {doSetLiteralEscaped, 255, 122,0,  TRUE}     //  181
    314     , {doSetFinish, 255, 14,0,  FALSE}     //  182      set-finish
    315     , {doExit, 255, 183,0,  TRUE}     //  183      errorDeath
    316  };
    317 static const char * const RegexStateNames[] = {    0,
    318      "start",
    319      "term",
    320     0,
    321     0,
    322     0,
    323     0,
    324     0,
    325     0,
    326     0,
    327     0,
    328     0,
    329     0,
    330     0,
    331      "expr-quant",
    332     0,
    333     0,
    334     0,
    335     0,
    336     0,
    337      "expr-cont",
    338     0,
    339     0,
    340      "open-paren-quant",
    341     0,
    342      "open-paren-quant2",
    343     0,
    344      "open-paren",
    345     0,
    346      "open-paren-extended",
    347     0,
    348     0,
    349     0,
    350     0,
    351     0,
    352     0,
    353     0,
    354     0,
    355     0,
    356     0,
    357     0,
    358     0,
    359     0,
    360     0,
    361     0,
    362     0,
    363      "open-paren-lookbehind",
    364     0,
    365     0,
    366      "paren-comment",
    367     0,
    368     0,
    369      "paren-flag",
    370     0,
    371     0,
    372     0,
    373     0,
    374     0,
    375     0,
    376     0,
    377     0,
    378     0,
    379     0,
    380      "quant-star",
    381     0,
    382     0,
    383      "quant-plus",
    384     0,
    385     0,
    386      "quant-opt",
    387     0,
    388     0,
    389      "interval-open",
    390     0,
    391      "interval-lower",
    392     0,
    393     0,
    394     0,
    395      "interval-upper",
    396     0,
    397     0,
    398      "interval-type",
    399     0,
    400     0,
    401      "backslash",
    402     0,
    403     0,
    404     0,
    405     0,
    406     0,
    407     0,
    408     0,
    409     0,
    410     0,
    411     0,
    412     0,
    413     0,
    414     0,
    415     0,
    416     0,
    417     0,
    418     0,
    419     0,
    420     0,
    421      "set-open",
    422     0,
    423     0,
    424      "set-open2",
    425     0,
    426      "set-posix",
    427     0,
    428     0,
    429      "set-start",
    430     0,
    431     0,
    432     0,
    433     0,
    434     0,
    435      "set-start-dash",
    436     0,
    437      "set-start-amp",
    438     0,
    439      "set-after-lit",
    440     0,
    441     0,
    442     0,
    443     0,
    444     0,
    445     0,
    446      "set-after-set",
    447     0,
    448     0,
    449     0,
    450     0,
    451     0,
    452     0,
    453      "set-after-range",
    454     0,
    455     0,
    456     0,
    457     0,
    458     0,
    459     0,
    460      "set-after-op",
    461     0,
    462     0,
    463     0,
    464      "set-set-amp",
    465     0,
    466     0,
    467      "set-lit-amp",
    468     0,
    469      "set-set-dash",
    470     0,
    471     0,
    472      "set-range-dash",
    473     0,
    474      "set-range-amp",
    475     0,
    476      "set-lit-dash",
    477     0,
    478     0,
    479     0,
    480     0,
    481      "set-lit-dash-escape",
    482     0,
    483     0,
    484     0,
    485     0,
    486     0,
    487     0,
    488     0,
    489      "set-escape",
    490     0,
    491     0,
    492     0,
    493     0,
    494     0,
    495     0,
    496     0,
    497     0,
    498     0,
    499      "set-finish",
    500      "errorDeath",
    501     0};
    502 
    503 U_NAMESPACE_END
    504 #endif
    505