1 # 2 # start state, scan position is at the beginning of the pattern. 3 # 4 start: 5 '[' n set-open ^set-finish 6 '\' n set-escape ^set-finish 7 default errorDeath doRuleError 8 9 # 10 # [set expression] parsing, 11 # All states involved in parsing set expressions have names beginning with "set-" 12 # 13 14 set-open: 15 '^' n set-open2 doSetNegate 16 ':' set-posix doSetPosixProp 17 default set-open2 18 19 set-open2: 20 ']' n set-after-lit doSetLiteral 21 default set-start 22 23 # set-posix: 24 # scanned a '[:' If it really is a [:property:], doSetPosixProp will have 25 # moved the scan to the closing ']'. If it wasn't a property 26 # expression, the scan will still be at the opening ':', which should 27 # be interpreted as a normal set expression. 28 set-posix: 29 ']' n pop doSetEnd 30 ':' set-start 31 default errorDeath doRuleError # should not be possible. 32 33 # 34 # set-start after the [ and special case leading characters (^ and/or ]) but before 35 # everything else. A '-' is literal at this point. 36 # 37 set-start: 38 ']' n pop doSetEnd 39 '[' n set-open ^set-after-set doSetBeginUnion 40 '\' n set-escape 41 '-' n set-start-dash 42 '&' n set-start-amp 43 default n set-after-lit doSetLiteral 44 45 # set-start-dash Turn "[--" into a syntax error. 46 # "[-x" is good, - and x are literals. 47 # 48 set-start-dash: 49 '-' errorDeath doRuleError 50 default set-after-lit doSetAddDash 51 52 # set-start-amp Turn "[&&" into a syntax error. 53 # "[&x" is good, & and x are literals. 54 # 55 set-start-amp: 56 '&' errorDeath doRuleError 57 default set-after-lit doSetAddAmp 58 59 # 60 # set-after-lit The last thing scanned was a literal character within a set. 61 # Can be followed by anything. Single '-' or '&' are 62 # literals in this context, not operators. 63 set-after-lit: 64 ']' n pop doSetEnd 65 '[' n set-open ^set-after-set doSetBeginUnion 66 '-' n set-lit-dash 67 '&' n set-lit-amp 68 '\' n set-escape 69 eof errorDeath doSetNoCloseError 70 default n set-after-lit doSetLiteral 71 72 set-after-set: 73 ']' n pop doSetEnd 74 '[' n set-open ^set-after-set doSetBeginUnion 75 '-' n set-set-dash 76 '&' n set-set-amp 77 '\' n set-escape 78 eof errorDeath doSetNoCloseError 79 default n set-after-lit doSetLiteral 80 81 set-after-range: 82 ']' n pop doSetEnd 83 '[' n set-open ^set-after-set doSetBeginUnion 84 '-' n set-range-dash 85 '&' n set-range-amp 86 '\' n set-escape 87 eof errorDeath doSetNoCloseError 88 default n set-after-lit doSetLiteral 89 90 91 # set-after-op 92 # After a -- or && 93 # It is an error to close a set at this point. 94 # 95 set-after-op: 96 '[' n set-open ^set-after-set doSetBeginUnion 97 ']' errorDeath doSetOpError 98 '\' n set-escape 99 default n set-after-lit doSetLiteral 100 101 # 102 # set-set-amp 103 # Have scanned [[set]& 104 # Could be a '&' intersection operator, if a set follows. 105 # Could be the start of a '&&' operator. 106 # Otherewise is a literal. 107 set-set-amp: 108 '[' n set-open ^set-after-set doSetBeginIntersection1 109 '&' n set-after-op doSetIntersection2 110 default set-after-lit doSetAddAmp 111 112 113 # set-lit-amp Have scanned "[literals&" 114 # Could be a start of "&&" operator or a literal 115 # In [abc&[def]], the '&' is a literal 116 # 117 set-lit-amp: 118 '&' n set-after-op doSetIntersection2 119 default set-after-lit doSetAddAmp 120 121 122 # 123 # set-set-dash 124 # Have scanned [set]- 125 # Could be a '-' difference operator, if a [set] follows. 126 # Could be the start of a '--' operator. 127 # Otherwise is a literal. 128 set-set-dash: 129 '[' n set-open ^set-after-set doSetBeginDifference1 130 '-' n set-after-op doSetDifference2 131 default set-after-lit doSetAddDash 132 133 134 # 135 # set-range-dash 136 # scanned a-b- or \w- 137 # any set or range like item where the trailing single '-' should 138 # be literal, not a set difference operation. 139 # A trailing "--" is still a difference operator. 140 set-range-dash: 141 '-' n set-after-op doSetDifference2 142 default set-after-lit doSetAddDash 143 144 145 set-range-amp: 146 '&' n set-after-op doSetIntersection2 147 default set-after-lit doSetAddAmp 148 149 150 # set-lit-dash 151 # Have scanned "[literals-" Could be a range or a -- operator or a literal 152 # In [abc-[def]], the '-' is a literal (confirmed with a Java test) 153 # [abc-\p{xx} the '-' is an error 154 # [abc-] the '-' is a literal 155 # [ab-xy] the '-' is a range 156 # 157 set-lit-dash: 158 '-' n set-after-op doSetDifference2 159 '[' set-after-lit doSetAddDash 160 ']' set-after-lit doSetAddDash 161 '\' n set-lit-dash-escape 162 default n set-after-range doSetRange 163 164 # set-lit-dash-escape 165 # 166 # scanned "[literal-\" 167 # Could be a range, if the \ introduces an escaped literal char or a named char. 168 # Otherwise it is an error. 169 # 170 set-lit-dash-escape: 171 's' errorDeath doSetOpError 172 'S' errorDeath doSetOpError 173 'w' errorDeath doSetOpError 174 'W' errorDeath doSetOpError 175 'd' errorDeath doSetOpError 176 'D' errorDeath doSetOpError 177 'N' set-name-start ^set-after-range doStartNamedChar 178 'x' set-hex-start ^set-after-range doStartHex 179 default n set-after-range doSetRange 180 # TODO fix 'N', 'x' 181 182 # 183 # set-escape 184 # Common back-slash escape processing within set expressions 185 # 186 set-escape: 187 'p' n set-prop-start ^set-after-set doStartSetProp 188 'P' n set-prop-start ^set-after-set doStartSetProp 189 'N' n set-name-start ^set-after-lit doStartNamedChar 190 'x' n set-hex-start ^set-after-lit doStartHex 191 's' n set-after-range doSetBackslash_s 192 'S' n set-after-range doSetBackslash_S 193 'w' n set-after-range doSetBackslash_w 194 'W' n set-after-range doSetBackslash_W 195 'd' n set-after-range doSetBackslash_d 196 'D' n set-after-range doSetBackslash_D 197 default n set-after-lit doSetLiteralEscaped 198 # TODO add \r, \n, etc 199 200 set-prop-start: 201 '{' n set-prop-cont 202 default errorDeath 203 204 set-prop-cont: 205 '}' n pop doPropName 206 '=' n set-value doPropRelation 207 '' n set-value doPropRelation 208 default n set-prop-cont 209 210 set-value: 211 '}' n pop doPropValue 212 default n set-value 213 214 set-name-start: 215 '{' n set-name-cont 216 default errorDeath 217 218 set-name-cont: 219 '}' n pop doName 220 [\ \-0-9A-Za-z] n set-name-cont 221 default n errorDeath 222 223 set-hex-start: 224 '{' n set-hex-cont 225 default errorDeath 226 227 set-hex-cont: 228 '}' n pop doHex 229 [0-9A-Fa-f] n set-hex-cont 230 default n errorDeath 231 232 # 233 # set-finish 234 # Have just encountered the final ']' that completes a [set], and 235 # arrived here via a pop. From here, we exit the set parsing world, and go 236 # back to generic regular expression parsing. 237 # 238 set-finish: 239 default exit doSetFinish 240