Home | History | Annotate | Download | only in Lib
      1 #
      2 # Secret Labs' Regular Expression Engine
      3 #
      4 # various symbols used by the regular expression engine.
      5 # run this script to update the _sre include files!
      6 #
      7 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
      8 #
      9 # See the sre.py file for information on usage and redistribution.
     10 #
     11 
     12 """Internal support module for sre"""
     13 
     14 # update when constants are added or removed
     15 
     16 MAGIC = 20031017
     17 
     18 try:
     19     from _sre import MAXREPEAT
     20 except ImportError:
     21     import _sre
     22     MAXREPEAT = _sre.MAXREPEAT = 65535
     23 
     24 # SRE standard exception (access as sre.error)
     25 # should this really be here?
     26 
     27 class error(Exception):
     28     pass
     29 
     30 # operators
     31 
     32 FAILURE = "failure"
     33 SUCCESS = "success"
     34 
     35 ANY = "any"
     36 ANY_ALL = "any_all"
     37 ASSERT = "assert"
     38 ASSERT_NOT = "assert_not"
     39 AT = "at"
     40 BIGCHARSET = "bigcharset"
     41 BRANCH = "branch"
     42 CALL = "call"
     43 CATEGORY = "category"
     44 CHARSET = "charset"
     45 GROUPREF = "groupref"
     46 GROUPREF_IGNORE = "groupref_ignore"
     47 GROUPREF_EXISTS = "groupref_exists"
     48 IN = "in"
     49 IN_IGNORE = "in_ignore"
     50 INFO = "info"
     51 JUMP = "jump"
     52 LITERAL = "literal"
     53 LITERAL_IGNORE = "literal_ignore"
     54 MARK = "mark"
     55 MAX_REPEAT = "max_repeat"
     56 MAX_UNTIL = "max_until"
     57 MIN_REPEAT = "min_repeat"
     58 MIN_UNTIL = "min_until"
     59 NEGATE = "negate"
     60 NOT_LITERAL = "not_literal"
     61 NOT_LITERAL_IGNORE = "not_literal_ignore"
     62 RANGE = "range"
     63 REPEAT = "repeat"
     64 REPEAT_ONE = "repeat_one"
     65 SUBPATTERN = "subpattern"
     66 MIN_REPEAT_ONE = "min_repeat_one"
     67 
     68 # positions
     69 AT_BEGINNING = "at_beginning"
     70 AT_BEGINNING_LINE = "at_beginning_line"
     71 AT_BEGINNING_STRING = "at_beginning_string"
     72 AT_BOUNDARY = "at_boundary"
     73 AT_NON_BOUNDARY = "at_non_boundary"
     74 AT_END = "at_end"
     75 AT_END_LINE = "at_end_line"
     76 AT_END_STRING = "at_end_string"
     77 AT_LOC_BOUNDARY = "at_loc_boundary"
     78 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
     79 AT_UNI_BOUNDARY = "at_uni_boundary"
     80 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
     81 
     82 # categories
     83 CATEGORY_DIGIT = "category_digit"
     84 CATEGORY_NOT_DIGIT = "category_not_digit"
     85 CATEGORY_SPACE = "category_space"
     86 CATEGORY_NOT_SPACE = "category_not_space"
     87 CATEGORY_WORD = "category_word"
     88 CATEGORY_NOT_WORD = "category_not_word"
     89 CATEGORY_LINEBREAK = "category_linebreak"
     90 CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
     91 CATEGORY_LOC_WORD = "category_loc_word"
     92 CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
     93 CATEGORY_UNI_DIGIT = "category_uni_digit"
     94 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
     95 CATEGORY_UNI_SPACE = "category_uni_space"
     96 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
     97 CATEGORY_UNI_WORD = "category_uni_word"
     98 CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
     99 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
    100 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
    101 
    102 OPCODES = [
    103 
    104     # failure=0 success=1 (just because it looks better that way :-)
    105     FAILURE, SUCCESS,
    106 
    107     ANY, ANY_ALL,
    108     ASSERT, ASSERT_NOT,
    109     AT,
    110     BRANCH,
    111     CALL,
    112     CATEGORY,
    113     CHARSET, BIGCHARSET,
    114     GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
    115     IN, IN_IGNORE,
    116     INFO,
    117     JUMP,
    118     LITERAL, LITERAL_IGNORE,
    119     MARK,
    120     MAX_UNTIL,
    121     MIN_UNTIL,
    122     NOT_LITERAL, NOT_LITERAL_IGNORE,
    123     NEGATE,
    124     RANGE,
    125     REPEAT,
    126     REPEAT_ONE,
    127     SUBPATTERN,
    128     MIN_REPEAT_ONE
    129 
    130 ]
    131 
    132 ATCODES = [
    133     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
    134     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
    135     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
    136     AT_UNI_NON_BOUNDARY
    137 ]
    138 
    139 CHCODES = [
    140     CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
    141     CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
    142     CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
    143     CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
    144     CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
    145     CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
    146     CATEGORY_UNI_NOT_LINEBREAK
    147 ]
    148 
    149 def makedict(list):
    150     d = {}
    151     i = 0
    152     for item in list:
    153         d[item] = i
    154         i = i + 1
    155     return d
    156 
    157 OPCODES = makedict(OPCODES)
    158 ATCODES = makedict(ATCODES)
    159 CHCODES = makedict(CHCODES)
    160 
    161 # replacement operations for "ignore case" mode
    162 OP_IGNORE = {
    163     GROUPREF: GROUPREF_IGNORE,
    164     IN: IN_IGNORE,
    165     LITERAL: LITERAL_IGNORE,
    166     NOT_LITERAL: NOT_LITERAL_IGNORE
    167 }
    168 
    169 AT_MULTILINE = {
    170     AT_BEGINNING: AT_BEGINNING_LINE,
    171     AT_END: AT_END_LINE
    172 }
    173 
    174 AT_LOCALE = {
    175     AT_BOUNDARY: AT_LOC_BOUNDARY,
    176     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
    177 }
    178 
    179 AT_UNICODE = {
    180     AT_BOUNDARY: AT_UNI_BOUNDARY,
    181     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
    182 }
    183 
    184 CH_LOCALE = {
    185     CATEGORY_DIGIT: CATEGORY_DIGIT,
    186     CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
    187     CATEGORY_SPACE: CATEGORY_SPACE,
    188     CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
    189     CATEGORY_WORD: CATEGORY_LOC_WORD,
    190     CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
    191     CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
    192     CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
    193 }
    194 
    195 CH_UNICODE = {
    196     CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
    197     CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
    198     CATEGORY_SPACE: CATEGORY_UNI_SPACE,
    199     CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
    200     CATEGORY_WORD: CATEGORY_UNI_WORD,
    201     CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
    202     CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
    203     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
    204 }
    205 
    206 # flags
    207 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
    208 SRE_FLAG_IGNORECASE = 2 # case insensitive
    209 SRE_FLAG_LOCALE = 4 # honour system locale
    210 SRE_FLAG_MULTILINE = 8 # treat target as multiline string
    211 SRE_FLAG_DOTALL = 16 # treat target as a single string
    212 SRE_FLAG_UNICODE = 32 # use unicode locale
    213 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
    214 SRE_FLAG_DEBUG = 128 # debugging
    215 
    216 # flags for INFO primitive
    217 SRE_INFO_PREFIX = 1 # has prefix
    218 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
    219 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
    220 
    221 if __name__ == "__main__":
    222     def dump(f, d, prefix):
    223         items = d.items()
    224         items.sort(key=lambda a: a[1])
    225         for k, v in items:
    226             f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
    227     f = open("sre_constants.h", "w")
    228     f.write("""\
    229 /*
    230  * Secret Labs' Regular Expression Engine
    231  *
    232  * regular expression matching engine
    233  *
    234  * NOTE: This file is generated by sre_constants.py.  If you need
    235  * to change anything in here, edit sre_constants.py and run it.
    236  *
    237  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
    238  *
    239  * See the _sre.c file for information on usage and redistribution.
    240  */
    241 
    242 """)
    243 
    244     f.write("#define SRE_MAGIC %d\n" % MAGIC)
    245 
    246     dump(f, OPCODES, "SRE_OP")
    247     dump(f, ATCODES, "SRE")
    248     dump(f, CHCODES, "SRE")
    249 
    250     f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
    251     f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
    252     f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
    253     f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
    254     f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
    255     f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
    256     f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
    257 
    258     f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
    259     f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
    260     f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
    261 
    262     f.close()
    263     print "done"
    264