Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5  *
      6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
      7  *
      8  * See the _sre.c file for information on usage and redistribution.
      9  */
     10 
     11 #ifndef SRE_INCLUDED
     12 #define SRE_INCLUDED
     13 
     14 #include "sre_constants.h"
     15 
     16 /* size of a code word (must be unsigned short or larger, and
     17    large enough to hold a UCS4 character) */
     18 #ifdef Py_USING_UNICODE
     19 # define SRE_CODE Py_UCS4
     20 # if SIZEOF_SIZE_T > 4
     21 #  define SRE_MAXREPEAT (~(SRE_CODE)0)
     22 # else
     23 #  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
     24 # endif
     25 #else
     26 # define SRE_CODE unsigned int
     27 # if SIZEOF_SIZE_T > SIZEOF_INT
     28 #  define SRE_MAXREPEAT (~(SRE_CODE)0)
     29 # else
     30 #  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
     31 # endif
     32 #endif
     33 
     34 typedef struct {
     35     PyObject_VAR_HEAD
     36     Py_ssize_t groups; /* must be first! */
     37     PyObject* groupindex;
     38     PyObject* indexgroup;
     39     /* compatibility */
     40     PyObject* pattern; /* pattern source (or None) */
     41     int flags; /* flags used when compiling pattern source */
     42     PyObject *weakreflist; /* List of weak references */
     43     /* pattern code */
     44     Py_ssize_t codesize;
     45     SRE_CODE code[1];
     46 } PatternObject;
     47 
     48 #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
     49 
     50 typedef struct {
     51     PyObject_VAR_HEAD
     52     PyObject* string; /* link to the target string (must be first) */
     53     PyObject* regs; /* cached list of matching spans */
     54     PatternObject* pattern; /* link to the regex (pattern) object */
     55     Py_ssize_t pos, endpos; /* current target slice */
     56     Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
     57     Py_ssize_t groups; /* number of groups (start/end marks) */
     58     Py_ssize_t mark[1];
     59 } MatchObject;
     60 
     61 typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
     62 
     63 /* FIXME: <fl> shouldn't be a constant, really... */
     64 #define SRE_MARK_SIZE 200
     65 
     66 typedef struct SRE_REPEAT_T {
     67     Py_ssize_t count;
     68     SRE_CODE* pattern; /* points to REPEAT operator arguments */
     69     void* last_ptr; /* helper to check for infinite loops */
     70     struct SRE_REPEAT_T *prev; /* points to previous repeat context */
     71 } SRE_REPEAT;
     72 
     73 typedef struct {
     74     /* string pointers */
     75     void* ptr; /* current position (also end of current slice) */
     76     void* beginning; /* start of original string */
     77     void* start; /* start of current slice */
     78     void* end; /* end of original string */
     79     /* attributes for the match object */
     80     PyObject* string;
     81     Py_ssize_t pos, endpos;
     82     /* character size */
     83     int charsize;
     84     /* registers */
     85     Py_ssize_t lastindex;
     86     Py_ssize_t lastmark;
     87     void* mark[SRE_MARK_SIZE];
     88     /* dynamically allocated stuff */
     89     char* data_stack;
     90     size_t data_stack_size;
     91     size_t data_stack_base;
     92     /* current repeat context */
     93     SRE_REPEAT *repeat;
     94     /* hooks */
     95     SRE_TOLOWER_HOOK lower;
     96 } SRE_STATE;
     97 
     98 typedef struct {
     99     PyObject_HEAD
    100     PyObject* pattern;
    101     SRE_STATE state;
    102 } ScannerObject;
    103 
    104 #endif
    105