Home | History | Annotate | Download | only in draft
      1 #
      2 #  start state, scan position is at the beginning of the pattern.
      3 #
      4 start:
      5     '['                  n set-open       ^set-finish
      6     '\'                  n set-escape     ^set-finish
      7     default                errorDeath                               doRuleError
      8     
      9 #
     10 # [set expression] parsing,
     11 #    All states involved in parsing set expressions have names beginning with "set-"
     12 #
     13 
     14 set-open:
     15    '^'                   n  set-open2                               doSetNegate
     16    ':'                      set-posix                               doSetPosixProp
     17    default                  set-open2
     18 
     19 set-open2:
     20    ']'                   n  set-after-lit                           doSetLiteral
     21    default                  set-start
     22 
     23 #  set-posix:
     24 #                  scanned a '[:'  If it really is a [:property:], doSetPosixProp will have
     25 #                  moved the scan to the closing ']'.  If it wasn't a property
     26 #                  expression, the scan will still be at the opening ':', which should
     27 #                  be interpreted as a normal set expression.
     28 set-posix:
     29     ']'                  n   pop                                    doSetEnd
     30     ':'                      set-start
     31     default                  errorDeath                             doRuleError  # should not be possible.
     32 
     33 #
     34 #   set-start   after the [ and special case leading characters (^ and/or ]) but before
     35 #               everything else.   A '-' is literal at this point.
     36 #
     37 set-start:
     38     ']'                  n  pop                                     doSetEnd
     39     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     40     '\'                  n  set-escape
     41     '-'                  n  set-start-dash
     42     '&'                  n  set-start-amp
     43     default              n  set-after-lit                           doSetLiteral
     44 
     45 #    set-start-dash    Turn "[--" into a syntax error.
     46 #                           "[-x" is good, - and x are literals.
     47 #
     48 set-start-dash:
     49     '-'                     errorDeath                              doRuleError
     50     default                 set-after-lit                           doSetAddDash
     51 
     52 #    set-start-amp     Turn "[&&" into a syntax error.
     53 #                           "[&x" is good, & and x are literals.
     54 #
     55 set-start-amp:
     56     '&'                     errorDeath                              doRuleError
     57     default                 set-after-lit                           doSetAddAmp
     58 
     59 #
     60 #   set-after-lit    The last thing scanned was a literal character within a set.
     61 #                    Can be followed by anything.  Single '-' or '&' are
     62 #                    literals in this context, not operators.
     63 set-after-lit:
     64     ']'                  n  pop                                     doSetEnd
     65     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     66     '-'                  n  set-lit-dash
     67     '&'                  n  set-lit-amp
     68     '\'                  n  set-escape
     69     eof                     errorDeath                              doSetNoCloseError
     70     default              n  set-after-lit                           doSetLiteral
     71 
     72 set-after-set:
     73     ']'                  n  pop                                     doSetEnd
     74     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     75     '-'                  n  set-set-dash
     76     '&'                  n  set-set-amp
     77     '\'                  n  set-escape
     78     eof                     errorDeath                              doSetNoCloseError
     79     default              n  set-after-lit                           doSetLiteral
     80 
     81 set-after-range:
     82     ']'                  n  pop                                     doSetEnd
     83     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     84     '-'                  n  set-range-dash
     85     '&'                  n  set-range-amp
     86     '\'                  n  set-escape
     87     eof                     errorDeath                              doSetNoCloseError
     88     default              n  set-after-lit                           doSetLiteral
     89     
     90 
     91 # set-after-op
     92 #     After a --  or &&
     93 #     It is an error to close a set at this point.
     94 #
     95 set-after-op:
     96     '['                  n  set-open         ^set-after-set         doSetBeginUnion
     97     ']'                     errorDeath                              doSetOpError
     98     '\'                  n  set-escape
     99     default              n  set-after-lit                           doSetLiteral
    100 
    101 #
    102 #   set-set-amp
    103 #      Have scanned [[set]&
    104 #      Could be a '&' intersection operator, if a set follows.
    105 #      Could be the start of a '&&' operator.
    106 #      Otherewise is a literal.
    107 set-set-amp:
    108     '['                  n  set-open      ^set-after-set           doSetBeginIntersection1
    109     '&'                  n  set-after-op                           doSetIntersection2
    110     default                 set-after-lit                          doSetAddAmp
    111 
    112 
    113 # set-lit-amp   Have scanned "[literals&"
    114 #               Could be a start of "&&" operator or a literal
    115 #               In [abc&[def]],   the '&' is a literal
    116 #
    117 set-lit-amp:
    118     '&'                  n  set-after-op                            doSetIntersection2
    119     default                 set-after-lit                           doSetAddAmp
    120 
    121 
    122 #
    123 #  set-set-dash
    124 #      Have scanned [set]-
    125 #      Could be a '-' difference operator, if a [set] follows.
    126 #      Could be the start of a '--' operator.
    127 #      Otherwise is a literal.
    128 set-set-dash:
    129     '['                  n  set-open      ^set-after-set           doSetBeginDifference1
    130     '-'                  n  set-after-op                           doSetDifference2
    131     default                 set-after-lit                          doSetAddDash
    132 
    133 
    134 #
    135 #  set-range-dash
    136 #      scanned  a-b-  or \w-
    137 #         any set or range like item where the trailing single '-' should
    138 #         be literal, not a set difference operation.
    139 #         A trailing "--" is still a difference operator.
    140 set-range-dash:
    141     '-'                  n  set-after-op                           doSetDifference2
    142     default                 set-after-lit                          doSetAddDash
    143 
    144 
    145 set-range-amp:
    146     '&'                  n  set-after-op                           doSetIntersection2
    147     default                 set-after-lit                          doSetAddAmp
    148 
    149 
    150 #  set-lit-dash
    151 #     Have scanned "[literals-" Could be a range or a -- operator or a literal
    152 #     In [abc-[def]], the '-' is a literal (confirmed with a Java test)
    153 #        [abc-\p{xx}  the '-' is an error
    154 #        [abc-]       the '-' is a literal
    155 #        [ab-xy]      the '-' is a range
    156 #
    157 set-lit-dash:
    158     '-'                  n  set-after-op                            doSetDifference2
    159     '['                     set-after-lit                           doSetAddDash
    160     ']'                     set-after-lit                           doSetAddDash
    161     '\'                  n  set-lit-dash-escape
    162     default              n  set-after-range                         doSetRange
    163 
    164 # set-lit-dash-escape
    165 #
    166 #    scanned "[literal-\"
    167 #    Could be a range, if the \ introduces an escaped literal char or a named char.
    168 #    Otherwise it is an error.
    169 #
    170 set-lit-dash-escape:
    171    's'                      errorDeath                             doSetOpError
    172    'S'                      errorDeath                             doSetOpError
    173    'w'                      errorDeath                             doSetOpError
    174    'W'                      errorDeath                             doSetOpError
    175    'd'                      errorDeath                             doSetOpError
    176    'D'                      errorDeath                             doSetOpError
    177    'N'                      set-name-start    ^set-after-range          doStartNamedChar
    178    'x'                      set-hex-start    ^set-after-range          doStartHex
    179    default               n  set-after-range                        doSetRange
    180 # TODO fix 'N', 'x'
    181    
    182 #
    183 #  set-escape
    184 #       Common back-slash escape processing within set expressions
    185 #
    186 set-escape:
    187    'p'                   n  set-prop-start    ^set-after-set          doStartSetProp
    188    'P'                   n  set-prop-start    ^set-after-set          doStartSetProp
    189    'N'                   n  set-name-start    ^set-after-lit          doStartNamedChar
    190    'x'                   n  set-hex-start ^set-after-lit         doStartHex
    191    's'                   n  set-after-range                         doSetBackslash_s
    192    'S'                   n  set-after-range                         doSetBackslash_S
    193    'w'                   n  set-after-range                         doSetBackslash_w
    194    'W'                   n  set-after-range                         doSetBackslash_W
    195    'd'                   n  set-after-range                         doSetBackslash_d
    196    'D'                   n  set-after-range                         doSetBackslash_D
    197    default               n  set-after-lit                           doSetLiteralEscaped 
    198 # TODO add \r, \n, etc
    199 
    200 set-prop-start:
    201     '{'                  n  set-prop-cont                                    
    202     default                 errorDeath
    203 
    204 set-prop-cont:
    205     '}'                  n  pop                                     doPropName
    206     '='                  n  set-value                               doPropRelation
    207     ''                  n  set-value                               doPropRelation
    208     default              n  set-prop-cont
    209 
    210 set-value:
    211     '}'                  n  pop                                     doPropValue
    212     default              n  set-value
    213 
    214 set-name-start:
    215     '{'                  n  set-name-cont                                    
    216     default                 errorDeath
    217 
    218 set-name-cont:
    219     '}'                  n  pop                                     doName
    220     [\ \-0-9A-Za-z]      n  set-name-cont
    221     default              n  errorDeath
    222 
    223 set-hex-start:
    224     '{'                  n  set-hex-cont                                     
    225     default                 errorDeath
    226 
    227 set-hex-cont:
    228     '}'                  n  pop                                     doHex
    229     [0-9A-Fa-f]          n  set-hex-cont
    230     default              n  errorDeath
    231     
    232 #
    233 # set-finish
    234 #     Have just encountered the final ']' that completes a [set], and
    235 #     arrived here via a pop.  From here, we exit the set parsing world, and go
    236 #     back to generic regular expression parsing.
    237 #
    238 set-finish:
    239     default                 exit                              doSetFinish
    240