Home | History | Annotate | Download | only in python2.7
      1 #
      2 # Secret Labs' Regular Expression Engine
      3 #
      4 # various symbols used by the regular expression engine.
      5 # run this script to update the _sre include files!
      6 #
      7 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
      8 #
      9 # See the sre.py file for information on usage and redistribution.
     10 #
     11 
     12 """Internal support module for sre"""
     13 
     14 # update when constants are added or removed
     15 
     16 MAGIC = 20031017
     17 
     18 from _sre import MAXREPEAT
     19 
     20 # SRE standard exception (access as sre.error)
     21 # should this really be here?
     22 
     23 class error(Exception):
     24     pass
     25 
     26 # operators
     27 
     28 FAILURE = "failure"
     29 SUCCESS = "success"
     30 
     31 ANY = "any"
     32 ANY_ALL = "any_all"
     33 ASSERT = "assert"
     34 ASSERT_NOT = "assert_not"
     35 AT = "at"
     36 BIGCHARSET = "bigcharset"
     37 BRANCH = "branch"
     38 CALL = "call"
     39 CATEGORY = "category"
     40 CHARSET = "charset"
     41 GROUPREF = "groupref"
     42 GROUPREF_IGNORE = "groupref_ignore"
     43 GROUPREF_EXISTS = "groupref_exists"
     44 IN = "in"
     45 IN_IGNORE = "in_ignore"
     46 INFO = "info"
     47 JUMP = "jump"
     48 LITERAL = "literal"
     49 LITERAL_IGNORE = "literal_ignore"
     50 MARK = "mark"
     51 MAX_REPEAT = "max_repeat"
     52 MAX_UNTIL = "max_until"
     53 MIN_REPEAT = "min_repeat"
     54 MIN_UNTIL = "min_until"
     55 NEGATE = "negate"
     56 NOT_LITERAL = "not_literal"
     57 NOT_LITERAL_IGNORE = "not_literal_ignore"
     58 RANGE = "range"
     59 REPEAT = "repeat"
     60 REPEAT_ONE = "repeat_one"
     61 SUBPATTERN = "subpattern"
     62 MIN_REPEAT_ONE = "min_repeat_one"
     63 
     64 # positions
     65 AT_BEGINNING = "at_beginning"
     66 AT_BEGINNING_LINE = "at_beginning_line"
     67 AT_BEGINNING_STRING = "at_beginning_string"
     68 AT_BOUNDARY = "at_boundary"
     69 AT_NON_BOUNDARY = "at_non_boundary"
     70 AT_END = "at_end"
     71 AT_END_LINE = "at_end_line"
     72 AT_END_STRING = "at_end_string"
     73 AT_LOC_BOUNDARY = "at_loc_boundary"
     74 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
     75 AT_UNI_BOUNDARY = "at_uni_boundary"
     76 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
     77 
     78 # categories
     79 CATEGORY_DIGIT = "category_digit"
     80 CATEGORY_NOT_DIGIT = "category_not_digit"
     81 CATEGORY_SPACE = "category_space"
     82 CATEGORY_NOT_SPACE = "category_not_space"
     83 CATEGORY_WORD = "category_word"
     84 CATEGORY_NOT_WORD = "category_not_word"
     85 CATEGORY_LINEBREAK = "category_linebreak"
     86 CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
     87 CATEGORY_LOC_WORD = "category_loc_word"
     88 CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
     89 CATEGORY_UNI_DIGIT = "category_uni_digit"
     90 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
     91 CATEGORY_UNI_SPACE = "category_uni_space"
     92 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
     93 CATEGORY_UNI_WORD = "category_uni_word"
     94 CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
     95 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
     96 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
     97 
     98 OPCODES = [
     99 
    100     # failure=0 success=1 (just because it looks better that way :-)
    101     FAILURE, SUCCESS,
    102 
    103     ANY, ANY_ALL,
    104     ASSERT, ASSERT_NOT,
    105     AT,
    106     BRANCH,
    107     CALL,
    108     CATEGORY,
    109     CHARSET, BIGCHARSET,
    110     GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
    111     IN, IN_IGNORE,
    112     INFO,
    113     JUMP,
    114     LITERAL, LITERAL_IGNORE,
    115     MARK,
    116     MAX_UNTIL,
    117     MIN_UNTIL,
    118     NOT_LITERAL, NOT_LITERAL_IGNORE,
    119     NEGATE,
    120     RANGE,
    121     REPEAT,
    122     REPEAT_ONE,
    123     SUBPATTERN,
    124     MIN_REPEAT_ONE
    125 
    126 ]
    127 
    128 ATCODES = [
    129     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
    130     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
    131     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
    132     AT_UNI_NON_BOUNDARY
    133 ]
    134 
    135 CHCODES = [
    136     CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
    137     CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
    138     CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
    139     CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
    140     CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
    141     CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
    142     CATEGORY_UNI_NOT_LINEBREAK
    143 ]
    144 
    145 def makedict(list):
    146     d = {}
    147     i = 0
    148     for item in list:
    149         d[item] = i
    150         i = i + 1
    151     return d
    152 
    153 OPCODES = makedict(OPCODES)
    154 ATCODES = makedict(ATCODES)
    155 CHCODES = makedict(CHCODES)
    156 
    157 # replacement operations for "ignore case" mode
    158 OP_IGNORE = {
    159     GROUPREF: GROUPREF_IGNORE,
    160     IN: IN_IGNORE,
    161     LITERAL: LITERAL_IGNORE,
    162     NOT_LITERAL: NOT_LITERAL_IGNORE
    163 }
    164 
    165 AT_MULTILINE = {
    166     AT_BEGINNING: AT_BEGINNING_LINE,
    167     AT_END: AT_END_LINE
    168 }
    169 
    170 AT_LOCALE = {
    171     AT_BOUNDARY: AT_LOC_BOUNDARY,
    172     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
    173 }
    174 
    175 AT_UNICODE = {
    176     AT_BOUNDARY: AT_UNI_BOUNDARY,
    177     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
    178 }
    179 
    180 CH_LOCALE = {
    181     CATEGORY_DIGIT: CATEGORY_DIGIT,
    182     CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
    183     CATEGORY_SPACE: CATEGORY_SPACE,
    184     CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
    185     CATEGORY_WORD: CATEGORY_LOC_WORD,
    186     CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
    187     CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
    188     CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
    189 }
    190 
    191 CH_UNICODE = {
    192     CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
    193     CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
    194     CATEGORY_SPACE: CATEGORY_UNI_SPACE,
    195     CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
    196     CATEGORY_WORD: CATEGORY_UNI_WORD,
    197     CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
    198     CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
    199     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
    200 }
    201 
    202 # flags
    203 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
    204 SRE_FLAG_IGNORECASE = 2 # case insensitive
    205 SRE_FLAG_LOCALE = 4 # honour system locale
    206 SRE_FLAG_MULTILINE = 8 # treat target as multiline string
    207 SRE_FLAG_DOTALL = 16 # treat target as a single string
    208 SRE_FLAG_UNICODE = 32 # use unicode locale
    209 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
    210 SRE_FLAG_DEBUG = 128 # debugging
    211 
    212 # flags for INFO primitive
    213 SRE_INFO_PREFIX = 1 # has prefix
    214 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
    215 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
    216 
    217 if __name__ == "__main__":
    218     def dump(f, d, prefix):
    219         items = d.items()
    220         items.sort(key=lambda a: a[1])
    221         for k, v in items:
    222             f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
    223     f = open("sre_constants.h", "w")
    224     f.write("""\
    225 /*
    226  * Secret Labs' Regular Expression Engine
    227  *
    228  * regular expression matching engine
    229  *
    230  * NOTE: This file is generated by sre_constants.py.  If you need
    231  * to change anything in here, edit sre_constants.py and run it.
    232  *
    233  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
    234  *
    235  * See the _sre.c file for information on usage and redistribution.
    236  */
    237 
    238 """)
    239 
    240     f.write("#define SRE_MAGIC %d\n" % MAGIC)
    241 
    242     dump(f, OPCODES, "SRE_OP")
    243     dump(f, ATCODES, "SRE")
    244     dump(f, CHCODES, "SRE")
    245 
    246     f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
    247     f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
    248     f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
    249     f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
    250     f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
    251     f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
    252     f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
    253 
    254     f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
    255     f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
    256     f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
    257 
    258     f.close()
    259     print "done"
    260