1 # 2 # Secret Labs' Regular Expression Engine 3 # 4 # various symbols used by the regular expression engine. 5 # run this script to update the _sre include files! 6 # 7 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 8 # 9 # See the sre.py file for information on usage and redistribution. 10 # 11 12 """Internal support module for sre""" 13 14 # update when constants are added or removed 15 16 MAGIC = 20031017 17 18 try: 19 from _sre import MAXREPEAT 20 except ImportError: 21 import _sre 22 MAXREPEAT = _sre.MAXREPEAT = 65535 23 24 # SRE standard exception (access as sre.error) 25 # should this really be here? 26 27 class error(Exception): 28 pass 29 30 # operators 31 32 FAILURE = "failure" 33 SUCCESS = "success" 34 35 ANY = "any" 36 ANY_ALL = "any_all" 37 ASSERT = "assert" 38 ASSERT_NOT = "assert_not" 39 AT = "at" 40 BIGCHARSET = "bigcharset" 41 BRANCH = "branch" 42 CALL = "call" 43 CATEGORY = "category" 44 CHARSET = "charset" 45 GROUPREF = "groupref" 46 GROUPREF_IGNORE = "groupref_ignore" 47 GROUPREF_EXISTS = "groupref_exists" 48 IN = "in" 49 IN_IGNORE = "in_ignore" 50 INFO = "info" 51 JUMP = "jump" 52 LITERAL = "literal" 53 LITERAL_IGNORE = "literal_ignore" 54 MARK = "mark" 55 MAX_REPEAT = "max_repeat" 56 MAX_UNTIL = "max_until" 57 MIN_REPEAT = "min_repeat" 58 MIN_UNTIL = "min_until" 59 NEGATE = "negate" 60 NOT_LITERAL = "not_literal" 61 NOT_LITERAL_IGNORE = "not_literal_ignore" 62 RANGE = "range" 63 REPEAT = "repeat" 64 REPEAT_ONE = "repeat_one" 65 SUBPATTERN = "subpattern" 66 MIN_REPEAT_ONE = "min_repeat_one" 67 68 # positions 69 AT_BEGINNING = "at_beginning" 70 AT_BEGINNING_LINE = "at_beginning_line" 71 AT_BEGINNING_STRING = "at_beginning_string" 72 AT_BOUNDARY = "at_boundary" 73 AT_NON_BOUNDARY = "at_non_boundary" 74 AT_END = "at_end" 75 AT_END_LINE = "at_end_line" 76 AT_END_STRING = "at_end_string" 77 AT_LOC_BOUNDARY = "at_loc_boundary" 78 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 79 AT_UNI_BOUNDARY = "at_uni_boundary" 80 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 81 82 # categories 83 CATEGORY_DIGIT = "category_digit" 84 CATEGORY_NOT_DIGIT = "category_not_digit" 85 CATEGORY_SPACE = "category_space" 86 CATEGORY_NOT_SPACE = "category_not_space" 87 CATEGORY_WORD = "category_word" 88 CATEGORY_NOT_WORD = "category_not_word" 89 CATEGORY_LINEBREAK = "category_linebreak" 90 CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 91 CATEGORY_LOC_WORD = "category_loc_word" 92 CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 93 CATEGORY_UNI_DIGIT = "category_uni_digit" 94 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 95 CATEGORY_UNI_SPACE = "category_uni_space" 96 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 97 CATEGORY_UNI_WORD = "category_uni_word" 98 CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 99 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 100 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 101 102 OPCODES = [ 103 104 # failure=0 success=1 (just because it looks better that way :-) 105 FAILURE, SUCCESS, 106 107 ANY, ANY_ALL, 108 ASSERT, ASSERT_NOT, 109 AT, 110 BRANCH, 111 CALL, 112 CATEGORY, 113 CHARSET, BIGCHARSET, 114 GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, 115 IN, IN_IGNORE, 116 INFO, 117 JUMP, 118 LITERAL, LITERAL_IGNORE, 119 MARK, 120 MAX_UNTIL, 121 MIN_UNTIL, 122 NOT_LITERAL, NOT_LITERAL_IGNORE, 123 NEGATE, 124 RANGE, 125 REPEAT, 126 REPEAT_ONE, 127 SUBPATTERN, 128 MIN_REPEAT_ONE 129 130 ] 131 132 ATCODES = [ 133 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 134 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 135 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 136 AT_UNI_NON_BOUNDARY 137 ] 138 139 CHCODES = [ 140 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 141 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 142 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 143 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 144 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 145 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 146 CATEGORY_UNI_NOT_LINEBREAK 147 ] 148 149 def makedict(list): 150 d = {} 151 i = 0 152 for item in list: 153 d[item] = i 154 i = i + 1 155 return d 156 157 OPCODES = makedict(OPCODES) 158 ATCODES = makedict(ATCODES) 159 CHCODES = makedict(CHCODES) 160 161 # replacement operations for "ignore case" mode 162 OP_IGNORE = { 163 GROUPREF: GROUPREF_IGNORE, 164 IN: IN_IGNORE, 165 LITERAL: LITERAL_IGNORE, 166 NOT_LITERAL: NOT_LITERAL_IGNORE 167 } 168 169 AT_MULTILINE = { 170 AT_BEGINNING: AT_BEGINNING_LINE, 171 AT_END: AT_END_LINE 172 } 173 174 AT_LOCALE = { 175 AT_BOUNDARY: AT_LOC_BOUNDARY, 176 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 177 } 178 179 AT_UNICODE = { 180 AT_BOUNDARY: AT_UNI_BOUNDARY, 181 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 182 } 183 184 CH_LOCALE = { 185 CATEGORY_DIGIT: CATEGORY_DIGIT, 186 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 187 CATEGORY_SPACE: CATEGORY_SPACE, 188 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 189 CATEGORY_WORD: CATEGORY_LOC_WORD, 190 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 191 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 192 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 193 } 194 195 CH_UNICODE = { 196 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 197 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 198 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 199 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 200 CATEGORY_WORD: CATEGORY_UNI_WORD, 201 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 202 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 203 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 204 } 205 206 # flags 207 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 208 SRE_FLAG_IGNORECASE = 2 # case insensitive 209 SRE_FLAG_LOCALE = 4 # honour system locale 210 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 211 SRE_FLAG_DOTALL = 16 # treat target as a single string 212 SRE_FLAG_UNICODE = 32 # use unicode locale 213 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 214 SRE_FLAG_DEBUG = 128 # debugging 215 216 # flags for INFO primitive 217 SRE_INFO_PREFIX = 1 # has prefix 218 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 219 SRE_INFO_CHARSET = 4 # pattern starts with character from given set 220 221 if __name__ == "__main__": 222 def dump(f, d, prefix): 223 items = d.items() 224 items.sort(key=lambda a: a[1]) 225 for k, v in items: 226 f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) 227 f = open("sre_constants.h", "w") 228 f.write("""\ 229 /* 230 * Secret Labs' Regular Expression Engine 231 * 232 * regular expression matching engine 233 * 234 * NOTE: This file is generated by sre_constants.py. If you need 235 * to change anything in here, edit sre_constants.py and run it. 236 * 237 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 238 * 239 * See the _sre.c file for information on usage and redistribution. 240 */ 241 242 """) 243 244 f.write("#define SRE_MAGIC %d\n" % MAGIC) 245 246 dump(f, OPCODES, "SRE_OP") 247 dump(f, ATCODES, "SRE") 248 dump(f, CHCODES, "SRE") 249 250 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 251 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 252 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 253 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 254 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 255 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 256 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 257 258 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 259 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 260 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 261 262 f.close() 263 print "done" 264