1 # 2 # Secret Labs' Regular Expression Engine 3 # 4 # various symbols used by the regular expression engine. 5 # run this script to update the _sre include files! 6 # 7 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 8 # 9 # See the sre.py file for information on usage and redistribution. 10 # 11 12 """Internal support module for sre""" 13 14 # update when constants are added or removed 15 16 MAGIC = 20140917 17 18 from _sre import MAXREPEAT, MAXGROUPS 19 20 # SRE standard exception (access as sre.error) 21 # should this really be here? 22 23 class error(Exception): 24 def __init__(self, msg, pattern=None, pos=None): 25 self.msg = msg 26 self.pattern = pattern 27 self.pos = pos 28 if pattern is not None and pos is not None: 29 msg = '%s at position %d' % (msg, pos) 30 if isinstance(pattern, str): 31 newline = '\n' 32 else: 33 newline = b'\n' 34 self.lineno = pattern.count(newline, 0, pos) + 1 35 self.colno = pos - pattern.rfind(newline, 0, pos) 36 if newline in pattern: 37 msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno) 38 else: 39 self.lineno = self.colno = None 40 super().__init__(msg) 41 42 43 class _NamedIntConstant(int): 44 def __new__(cls, value, name): 45 self = super(_NamedIntConstant, cls).__new__(cls, value) 46 self.name = name 47 return self 48 49 def __str__(self): 50 return self.name 51 52 __repr__ = __str__ 53 54 MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT') 55 56 def _makecodes(names): 57 names = names.strip().split() 58 items = [_NamedIntConstant(i, name) for i, name in enumerate(names)] 59 globals().update({item.name: item for item in items}) 60 return items 61 62 # operators 63 # failure=0 success=1 (just because it looks better that way :-) 64 OPCODES = _makecodes(""" 65 FAILURE SUCCESS 66 67 ANY ANY_ALL 68 ASSERT ASSERT_NOT 69 AT 70 BRANCH 71 CALL 72 CATEGORY 73 CHARSET BIGCHARSET 74 GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE 75 IN IN_IGNORE 76 INFO 77 JUMP 78 LITERAL LITERAL_IGNORE 79 MARK 80 MAX_UNTIL 81 MIN_UNTIL 82 NOT_LITERAL NOT_LITERAL_IGNORE 83 NEGATE 84 RANGE 85 REPEAT 86 REPEAT_ONE 87 SUBPATTERN 88 MIN_REPEAT_ONE 89 RANGE_IGNORE 90 91 MIN_REPEAT MAX_REPEAT 92 """) 93 del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT 94 95 # positions 96 ATCODES = _makecodes(""" 97 AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING 98 AT_BOUNDARY AT_NON_BOUNDARY 99 AT_END AT_END_LINE AT_END_STRING 100 AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY 101 AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY 102 """) 103 104 # categories 105 CHCODES = _makecodes(""" 106 CATEGORY_DIGIT CATEGORY_NOT_DIGIT 107 CATEGORY_SPACE CATEGORY_NOT_SPACE 108 CATEGORY_WORD CATEGORY_NOT_WORD 109 CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK 110 CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD 111 CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT 112 CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE 113 CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD 114 CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK 115 """) 116 117 118 # replacement operations for "ignore case" mode 119 OP_IGNORE = { 120 GROUPREF: GROUPREF_IGNORE, 121 IN: IN_IGNORE, 122 LITERAL: LITERAL_IGNORE, 123 NOT_LITERAL: NOT_LITERAL_IGNORE, 124 RANGE: RANGE_IGNORE, 125 } 126 127 AT_MULTILINE = { 128 AT_BEGINNING: AT_BEGINNING_LINE, 129 AT_END: AT_END_LINE 130 } 131 132 AT_LOCALE = { 133 AT_BOUNDARY: AT_LOC_BOUNDARY, 134 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 135 } 136 137 AT_UNICODE = { 138 AT_BOUNDARY: AT_UNI_BOUNDARY, 139 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 140 } 141 142 CH_LOCALE = { 143 CATEGORY_DIGIT: CATEGORY_DIGIT, 144 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 145 CATEGORY_SPACE: CATEGORY_SPACE, 146 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 147 CATEGORY_WORD: CATEGORY_LOC_WORD, 148 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 149 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 150 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 151 } 152 153 CH_UNICODE = { 154 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 155 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 156 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 157 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 158 CATEGORY_WORD: CATEGORY_UNI_WORD, 159 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 160 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 161 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 162 } 163 164 # flags 165 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 166 SRE_FLAG_IGNORECASE = 2 # case insensitive 167 SRE_FLAG_LOCALE = 4 # honour system locale 168 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 169 SRE_FLAG_DOTALL = 16 # treat target as a single string 170 SRE_FLAG_UNICODE = 32 # use unicode "locale" 171 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 172 SRE_FLAG_DEBUG = 128 # debugging 173 SRE_FLAG_ASCII = 256 # use ascii "locale" 174 175 # flags for INFO primitive 176 SRE_INFO_PREFIX = 1 # has prefix 177 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 178 SRE_INFO_CHARSET = 4 # pattern starts with character from given set 179 180 if __name__ == "__main__": 181 def dump(f, d, prefix): 182 items = sorted(d) 183 for item in items: 184 f.write("#define %s_%s %d\n" % (prefix, item, item)) 185 with open("sre_constants.h", "w") as f: 186 f.write("""\ 187 /* 188 * Secret Labs' Regular Expression Engine 189 * 190 * regular expression matching engine 191 * 192 * NOTE: This file is generated by sre_constants.py. If you need 193 * to change anything in here, edit sre_constants.py and run it. 194 * 195 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 196 * 197 * See the _sre.c file for information on usage and redistribution. 198 */ 199 200 """) 201 202 f.write("#define SRE_MAGIC %d\n" % MAGIC) 203 204 dump(f, OPCODES, "SRE_OP") 205 dump(f, ATCODES, "SRE") 206 dump(f, CHCODES, "SRE") 207 208 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 209 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 210 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 211 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 212 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 213 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 214 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 215 f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG) 216 f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII) 217 218 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 219 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 220 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 221 222 print("done") 223