Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5  *
      6  * partial history:
      7  * 1999-10-24 fl  created (based on existing template matcher code)
      8  * 2000-03-06 fl  first alpha, sort of
      9  * 2000-08-01 fl  fixes for 1.6b1
     10  * 2000-08-07 fl  use PyOS_CheckStack() if available
     11  * 2000-09-20 fl  added expand method
     12  * 2001-03-20 fl  lots of fixes for 2.1b2
     13  * 2001-04-15 fl  export copyright as Python attribute, not global
     14  * 2001-04-28 fl  added __copy__ methods (work in progress)
     15  * 2001-05-14 fl  fixes for 1.5.2 compatibility
     16  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
     17  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
     18  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
     19  * 2001-10-21 fl  added sub/subn primitive
     20  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
     21  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
     22  * 2002-11-09 fl  fixed empty sub/subn return type
     23  * 2003-04-18 mvl fully support 4-byte codes
     24  * 2003-10-17 gn  implemented non recursive scheme
     25  *
     26  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
     27  *
     28  * This version of the SRE library can be redistributed under CNRI's
     29  * Python 1.6 license.  For any other use, please contact Secret Labs
     30  * AB (info (at) pythonware.com).
     31  *
     32  * Portions of this engine have been developed in cooperation with
     33  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
     34  * other compatibility work.
     35  */
     36 
     37 #ifndef SRE_RECURSIVE
     38 
     39 static char copyright[] =
     40     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
     41 
     42 #define PY_SSIZE_T_CLEAN
     43 
     44 #include "Python.h"
     45 #include "structmember.h" /* offsetof */
     46 
     47 #include "sre.h"
     48 
     49 #include <ctype.h>
     50 
     51 /* name of this module, minus the leading underscore */
     52 #if !defined(SRE_MODULE)
     53 #define SRE_MODULE "sre"
     54 #endif
     55 
     56 #define SRE_PY_MODULE "re"
     57 
     58 /* defining this one enables tracing */
     59 #undef VERBOSE
     60 
     61 #if PY_VERSION_HEX >= 0x01060000
     62 #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
     63 /* defining this enables unicode support (default under 1.6a1 and later) */
     64 #define HAVE_UNICODE
     65 #endif
     66 #endif
     67 
     68 /* -------------------------------------------------------------------- */
     69 /* optional features */
     70 
     71 /* enables fast searching */
     72 #define USE_FAST_SEARCH
     73 
     74 /* enables aggressive inlining (always on for Visual C) */
     75 #undef USE_INLINE
     76 
     77 /* enables copy/deepcopy handling (work in progress) */
     78 #undef USE_BUILTIN_COPY
     79 
     80 #if PY_VERSION_HEX < 0x01060000
     81 #define PyObject_DEL(op) PyMem_DEL((op))
     82 #endif
     83 
     84 /* -------------------------------------------------------------------- */
     85 
     86 #if defined(_MSC_VER)
     87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
     88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
     89 /* fastest possible local call under MSVC */
     90 #define LOCAL(type) static __inline type __fastcall
     91 #elif defined(USE_INLINE)
     92 #define LOCAL(type) static inline type
     93 #else
     94 #define LOCAL(type) static type
     95 #endif
     96 
     97 /* error codes */
     98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
     99 #define SRE_ERROR_STATE -2 /* illegal state */
    100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
    101 #define SRE_ERROR_MEMORY -9 /* out of memory */
    102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
    103 
    104 #if defined(VERBOSE)
    105 #define TRACE(v) printf v
    106 #else
    107 #define TRACE(v)
    108 #endif
    109 
    110 /* -------------------------------------------------------------------- */
    111 /* search engine state */
    112 
    113 /* default character predicates (run sre_chars.py to regenerate tables) */
    114 
    115 #define SRE_DIGIT_MASK 1
    116 #define SRE_SPACE_MASK 2
    117 #define SRE_LINEBREAK_MASK 4
    118 #define SRE_ALNUM_MASK 8
    119 #define SRE_WORD_MASK 16
    120 
    121 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
    122 
    123 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
    124 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
    125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
    126 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    127 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
    128 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    129 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
    130 
    131 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
    132 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
    133 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
    134 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
    135 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
    136 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
    137 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
    138 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    139 120, 121, 122, 123, 124, 125, 126, 127 };
    140 
    141 #define SRE_IS_DIGIT(ch)\
    142     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
    143 #define SRE_IS_SPACE(ch)\
    144     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
    145 #define SRE_IS_LINEBREAK(ch)\
    146     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
    147 #define SRE_IS_ALNUM(ch)\
    148     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
    149 #define SRE_IS_WORD(ch)\
    150     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
    151 
    152 static unsigned int sre_lower(unsigned int ch)
    153 {
    154     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
    155 }
    156 
    157 /* locale-specific character predicates */
    158 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    159  * warnings when c's type supports only numbers < N+1 */
    160 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
    161 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
    162 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
    163 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
    164 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
    165 
    166 static unsigned int sre_lower_locale(unsigned int ch)
    167 {
    168     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
    169 }
    170 
    171 /* unicode-specific character predicates */
    172 
    173 #if defined(HAVE_UNICODE)
    174 
    175 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
    176 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
    177 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
    178 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
    179 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
    180 
    181 static unsigned int sre_lower_unicode(unsigned int ch)
    182 {
    183     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
    184 }
    185 
    186 #endif
    187 
    188 LOCAL(int)
    189 sre_category(SRE_CODE category, unsigned int ch)
    190 {
    191     switch (category) {
    192 
    193     case SRE_CATEGORY_DIGIT:
    194         return SRE_IS_DIGIT(ch);
    195     case SRE_CATEGORY_NOT_DIGIT:
    196         return !SRE_IS_DIGIT(ch);
    197     case SRE_CATEGORY_SPACE:
    198         return SRE_IS_SPACE(ch);
    199     case SRE_CATEGORY_NOT_SPACE:
    200         return !SRE_IS_SPACE(ch);
    201     case SRE_CATEGORY_WORD:
    202         return SRE_IS_WORD(ch);
    203     case SRE_CATEGORY_NOT_WORD:
    204         return !SRE_IS_WORD(ch);
    205     case SRE_CATEGORY_LINEBREAK:
    206         return SRE_IS_LINEBREAK(ch);
    207     case SRE_CATEGORY_NOT_LINEBREAK:
    208         return !SRE_IS_LINEBREAK(ch);
    209 
    210     case SRE_CATEGORY_LOC_WORD:
    211         return SRE_LOC_IS_WORD(ch);
    212     case SRE_CATEGORY_LOC_NOT_WORD:
    213         return !SRE_LOC_IS_WORD(ch);
    214 
    215 #if defined(HAVE_UNICODE)
    216     case SRE_CATEGORY_UNI_DIGIT:
    217         return SRE_UNI_IS_DIGIT(ch);
    218     case SRE_CATEGORY_UNI_NOT_DIGIT:
    219         return !SRE_UNI_IS_DIGIT(ch);
    220     case SRE_CATEGORY_UNI_SPACE:
    221         return SRE_UNI_IS_SPACE(ch);
    222     case SRE_CATEGORY_UNI_NOT_SPACE:
    223         return !SRE_UNI_IS_SPACE(ch);
    224     case SRE_CATEGORY_UNI_WORD:
    225         return SRE_UNI_IS_WORD(ch);
    226     case SRE_CATEGORY_UNI_NOT_WORD:
    227         return !SRE_UNI_IS_WORD(ch);
    228     case SRE_CATEGORY_UNI_LINEBREAK:
    229         return SRE_UNI_IS_LINEBREAK(ch);
    230     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    231         return !SRE_UNI_IS_LINEBREAK(ch);
    232 #else
    233     case SRE_CATEGORY_UNI_DIGIT:
    234         return SRE_IS_DIGIT(ch);
    235     case SRE_CATEGORY_UNI_NOT_DIGIT:
    236         return !SRE_IS_DIGIT(ch);
    237     case SRE_CATEGORY_UNI_SPACE:
    238         return SRE_IS_SPACE(ch);
    239     case SRE_CATEGORY_UNI_NOT_SPACE:
    240         return !SRE_IS_SPACE(ch);
    241     case SRE_CATEGORY_UNI_WORD:
    242         return SRE_LOC_IS_WORD(ch);
    243     case SRE_CATEGORY_UNI_NOT_WORD:
    244         return !SRE_LOC_IS_WORD(ch);
    245     case SRE_CATEGORY_UNI_LINEBREAK:
    246         return SRE_IS_LINEBREAK(ch);
    247     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    248         return !SRE_IS_LINEBREAK(ch);
    249 #endif
    250     }
    251     return 0;
    252 }
    253 
    254 /* helpers */
    255 
    256 static void
    257 data_stack_dealloc(SRE_STATE* state)
    258 {
    259     if (state->data_stack) {
    260         PyMem_FREE(state->data_stack);
    261         state->data_stack = NULL;
    262     }
    263     state->data_stack_size = state->data_stack_base = 0;
    264 }
    265 
    266 static int
    267 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
    268 {
    269     Py_ssize_t minsize, cursize;
    270     minsize = state->data_stack_base+size;
    271     cursize = state->data_stack_size;
    272     if (cursize < minsize) {
    273         void* stack;
    274         cursize = minsize+minsize/4+1024;
    275         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
    276         stack = PyMem_REALLOC(state->data_stack, cursize);
    277         if (!stack) {
    278             data_stack_dealloc(state);
    279             return SRE_ERROR_MEMORY;
    280         }
    281         state->data_stack = (char *)stack;
    282         state->data_stack_size = cursize;
    283     }
    284     return 0;
    285 }
    286 
    287 /* generate 8-bit version */
    288 
    289 #define SRE_CHAR unsigned char
    290 #define SRE_AT sre_at
    291 #define SRE_COUNT sre_count
    292 #define SRE_CHARSET sre_charset
    293 #define SRE_INFO sre_info
    294 #define SRE_MATCH sre_match
    295 #define SRE_MATCH_CONTEXT sre_match_context
    296 #define SRE_SEARCH sre_search
    297 #define SRE_LITERAL_TEMPLATE sre_literal_template
    298 
    299 #if defined(HAVE_UNICODE)
    300 
    301 #define SRE_RECURSIVE
    302 #include "_sre.c"
    303 #undef SRE_RECURSIVE
    304 
    305 #undef SRE_LITERAL_TEMPLATE
    306 #undef SRE_SEARCH
    307 #undef SRE_MATCH
    308 #undef SRE_MATCH_CONTEXT
    309 #undef SRE_INFO
    310 #undef SRE_CHARSET
    311 #undef SRE_COUNT
    312 #undef SRE_AT
    313 #undef SRE_CHAR
    314 
    315 /* generate 16-bit unicode version */
    316 
    317 #define SRE_CHAR Py_UNICODE
    318 #define SRE_AT sre_uat
    319 #define SRE_COUNT sre_ucount
    320 #define SRE_CHARSET sre_ucharset
    321 #define SRE_INFO sre_uinfo
    322 #define SRE_MATCH sre_umatch
    323 #define SRE_MATCH_CONTEXT sre_umatch_context
    324 #define SRE_SEARCH sre_usearch
    325 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
    326 #endif
    327 
    328 #endif /* SRE_RECURSIVE */
    329 
    330 /* -------------------------------------------------------------------- */
    331 /* String matching engine */
    332 
    333 /* the following section is compiled twice, with different character
    334    settings */
    335 
    336 LOCAL(int)
    337 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
    338 {
    339     /* check if pointer is at given position */
    340 
    341     Py_ssize_t thisp, thatp;
    342 
    343     switch (at) {
    344 
    345     case SRE_AT_BEGINNING:
    346     case SRE_AT_BEGINNING_STRING:
    347         return ((void*) ptr == state->beginning);
    348 
    349     case SRE_AT_BEGINNING_LINE:
    350         return ((void*) ptr == state->beginning ||
    351                 SRE_IS_LINEBREAK((int) ptr[-1]));
    352 
    353     case SRE_AT_END:
    354         return (((SRE_CHAR *)state->end - ptr == 1 &&
    355                  SRE_IS_LINEBREAK((int) ptr[0])) ||
    356                 ((void*) ptr == state->end));
    357 
    358     case SRE_AT_END_LINE:
    359         return ((void*) ptr == state->end ||
    360                 SRE_IS_LINEBREAK((int) ptr[0]));
    361 
    362     case SRE_AT_END_STRING:
    363         return ((void*) ptr == state->end);
    364 
    365     case SRE_AT_BOUNDARY:
    366         if (state->beginning == state->end)
    367             return 0;
    368         thatp = ((void*) ptr > state->beginning) ?
    369             SRE_IS_WORD((int) ptr[-1]) : 0;
    370         thisp = ((void*) ptr < state->end) ?
    371             SRE_IS_WORD((int) ptr[0]) : 0;
    372         return thisp != thatp;
    373 
    374     case SRE_AT_NON_BOUNDARY:
    375         if (state->beginning == state->end)
    376             return 0;
    377         thatp = ((void*) ptr > state->beginning) ?
    378             SRE_IS_WORD((int) ptr[-1]) : 0;
    379         thisp = ((void*) ptr < state->end) ?
    380             SRE_IS_WORD((int) ptr[0]) : 0;
    381         return thisp == thatp;
    382 
    383     case SRE_AT_LOC_BOUNDARY:
    384         if (state->beginning == state->end)
    385             return 0;
    386         thatp = ((void*) ptr > state->beginning) ?
    387             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    388         thisp = ((void*) ptr < state->end) ?
    389             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    390         return thisp != thatp;
    391 
    392     case SRE_AT_LOC_NON_BOUNDARY:
    393         if (state->beginning == state->end)
    394             return 0;
    395         thatp = ((void*) ptr > state->beginning) ?
    396             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    397         thisp = ((void*) ptr < state->end) ?
    398             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    399         return thisp == thatp;
    400 
    401 #if defined(HAVE_UNICODE)
    402     case SRE_AT_UNI_BOUNDARY:
    403         if (state->beginning == state->end)
    404             return 0;
    405         thatp = ((void*) ptr > state->beginning) ?
    406             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    407         thisp = ((void*) ptr < state->end) ?
    408             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    409         return thisp != thatp;
    410 
    411     case SRE_AT_UNI_NON_BOUNDARY:
    412         if (state->beginning == state->end)
    413             return 0;
    414         thatp = ((void*) ptr > state->beginning) ?
    415             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    416         thisp = ((void*) ptr < state->end) ?
    417             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    418         return thisp == thatp;
    419 #endif
    420 
    421     }
    422 
    423     return 0;
    424 }
    425 
    426 LOCAL(int)
    427 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
    428 {
    429     /* check if character is a member of the given set */
    430 
    431     int ok = 1;
    432 
    433     for (;;) {
    434         switch (*set++) {
    435 
    436         case SRE_OP_FAILURE:
    437             return !ok;
    438 
    439         case SRE_OP_LITERAL:
    440             /* <LITERAL> <code> */
    441             if (ch == set[0])
    442                 return ok;
    443             set++;
    444             break;
    445 
    446         case SRE_OP_CATEGORY:
    447             /* <CATEGORY> <code> */
    448             if (sre_category(set[0], (int) ch))
    449                 return ok;
    450             set += 1;
    451             break;
    452 
    453         case SRE_OP_CHARSET:
    454             if (sizeof(SRE_CODE) == 2) {
    455                 /* <CHARSET> <bitmap> (16 bits per code word) */
    456                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
    457                     return ok;
    458                 set += 16;
    459             }
    460             else {
    461                 /* <CHARSET> <bitmap> (32 bits per code word) */
    462                 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
    463                     return ok;
    464                 set += 8;
    465             }
    466             break;
    467 
    468         case SRE_OP_RANGE:
    469             /* <RANGE> <lower> <upper> */
    470             if (set[0] <= ch && ch <= set[1])
    471                 return ok;
    472             set += 2;
    473             break;
    474 
    475         case SRE_OP_NEGATE:
    476             ok = !ok;
    477             break;
    478 
    479         case SRE_OP_BIGCHARSET:
    480             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
    481         {
    482             Py_ssize_t count, block;
    483             count = *(set++);
    484 
    485             if (sizeof(SRE_CODE) == 2) {
    486                 block = ((unsigned char*)set)[ch >> 8];
    487                 set += 128;
    488                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
    489                     return ok;
    490                 set += count*16;
    491             }
    492             else {
    493                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    494                  * warnings when c's type supports only numbers < N+1 */
    495                 if (!(ch & ~65535))
    496                     block = ((unsigned char*)set)[ch >> 8];
    497                 else
    498                     block = -1;
    499                 set += 64;
    500                 if (block >=0 &&
    501                     (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
    502                     return ok;
    503                 set += count*8;
    504             }
    505             break;
    506         }
    507 
    508         default:
    509             /* internal error -- there's not much we can do about it
    510                here, so let's just pretend it didn't match... */
    511             return 0;
    512         }
    513     }
    514 }
    515 
    516 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
    517 
    518 LOCAL(Py_ssize_t)
    519 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
    520 {
    521     SRE_CODE chr;
    522     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
    523     SRE_CHAR* end = (SRE_CHAR *)state->end;
    524     Py_ssize_t i;
    525 
    526     /* adjust end */
    527     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
    528         end = ptr + maxcount;
    529 
    530     switch (pattern[0]) {
    531 
    532     case SRE_OP_IN:
    533         /* repeated set */
    534         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
    535         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
    536             ptr++;
    537         break;
    538 
    539     case SRE_OP_ANY:
    540         /* repeated dot wildcard. */
    541         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
    542         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
    543             ptr++;
    544         break;
    545 
    546     case SRE_OP_ANY_ALL:
    547         /* repeated dot wildcard.  skip to the end of the target
    548            string, and backtrack from there */
    549         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
    550         ptr = end;
    551         break;
    552 
    553     case SRE_OP_LITERAL:
    554         /* repeated literal */
    555         chr = pattern[1];
    556         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
    557         while (ptr < end && (SRE_CODE) *ptr == chr)
    558             ptr++;
    559         break;
    560 
    561     case SRE_OP_LITERAL_IGNORE:
    562         /* repeated literal */
    563         chr = pattern[1];
    564         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
    565         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
    566             ptr++;
    567         break;
    568 
    569     case SRE_OP_NOT_LITERAL:
    570         /* repeated non-literal */
    571         chr = pattern[1];
    572         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
    573         while (ptr < end && (SRE_CODE) *ptr != chr)
    574             ptr++;
    575         break;
    576 
    577     case SRE_OP_NOT_LITERAL_IGNORE:
    578         /* repeated non-literal */
    579         chr = pattern[1];
    580         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
    581         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
    582             ptr++;
    583         break;
    584 
    585     default:
    586         /* repeated single character pattern */
    587         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
    588         while ((SRE_CHAR*) state->ptr < end) {
    589             i = SRE_MATCH(state, pattern);
    590             if (i < 0)
    591                 return i;
    592             if (!i)
    593                 break;
    594         }
    595         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    596                (SRE_CHAR*) state->ptr - ptr));
    597         return (SRE_CHAR*) state->ptr - ptr;
    598     }
    599 
    600     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    601            ptr - (SRE_CHAR*) state->ptr));
    602     return ptr - (SRE_CHAR*) state->ptr;
    603 }
    604 
    605 #if 0 /* not used in this release */
    606 LOCAL(int)
    607 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
    608 {
    609     /* check if an SRE_OP_INFO block matches at the current position.
    610        returns the number of SRE_CODE objects to skip if successful, 0
    611        if no match */
    612 
    613     SRE_CHAR* end = state->end;
    614     SRE_CHAR* ptr = state->ptr;
    615     Py_ssize_t i;
    616 
    617     /* check minimal length */
    618     if (pattern[3] && (end - ptr) < pattern[3])
    619         return 0;
    620 
    621     /* check known prefix */
    622     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
    623         /* <length> <skip> <prefix data> <overlap data> */
    624         for (i = 0; i < pattern[5]; i++)
    625             if ((SRE_CODE) ptr[i] != pattern[7 + i])
    626                 return 0;
    627         return pattern[0] + 2 * pattern[6];
    628     }
    629     return pattern[0];
    630 }
    631 #endif
    632 
    633 /* The macros below should be used to protect recursive SRE_MATCH()
    634  * calls that *failed* and do *not* return immediately (IOW, those
    635  * that will backtrack). Explaining:
    636  *
    637  * - Recursive SRE_MATCH() returned true: that's usually a success
    638  *   (besides atypical cases like ASSERT_NOT), therefore there's no
    639  *   reason to restore lastmark;
    640  *
    641  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
    642  *   is returning to the caller: If the current SRE_MATCH() is the
    643  *   top function of the recursion, returning false will be a matching
    644  *   failure, and it doesn't matter where lastmark is pointing to.
    645  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
    646  *   failure by itself, and the calling SRE_MATCH() will have to deal
    647  *   with the failure by the same rules explained here (it will restore
    648  *   lastmark by itself if necessary);
    649  *
    650  * - Recursive SRE_MATCH() returned false, and will continue the
    651  *   outside 'for' loop: must be protected when breaking, since the next
    652  *   OP could potentially depend on lastmark;
    653  *
    654  * - Recursive SRE_MATCH() returned false, and will be called again
    655  *   inside a local for/while loop: must be protected between each
    656  *   loop iteration, since the recursive SRE_MATCH() could do anything,
    657  *   and could potentially depend on lastmark.
    658  *
    659  * For more information, check the discussion at SF patch #712900.
    660  */
    661 #define LASTMARK_SAVE()     \
    662     do { \
    663         ctx->lastmark = state->lastmark; \
    664         ctx->lastindex = state->lastindex; \
    665     } while (0)
    666 #define LASTMARK_RESTORE()  \
    667     do { \
    668         state->lastmark = ctx->lastmark; \
    669         state->lastindex = ctx->lastindex; \
    670     } while (0)
    671 
    672 #define RETURN_ERROR(i) do { return i; } while(0)
    673 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
    674 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
    675 
    676 #define RETURN_ON_ERROR(i) \
    677     do { if (i < 0) RETURN_ERROR(i); } while (0)
    678 #define RETURN_ON_SUCCESS(i) \
    679     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
    680 #define RETURN_ON_FAILURE(i) \
    681     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
    682 
    683 #define SFY(x) #x
    684 
    685 #define DATA_STACK_ALLOC(state, type, ptr) \
    686 do { \
    687     alloc_pos = state->data_stack_base; \
    688     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
    689            "(%" PY_FORMAT_SIZE_T "d)\n", \
    690            SFY(type), alloc_pos, sizeof(type))); \
    691     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
    692         int j = data_stack_grow(state, sizeof(type)); \
    693         if (j < 0) return j; \
    694         if (ctx_pos != -1) \
    695             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    696     } \
    697     ptr = (type*)(state->data_stack+alloc_pos); \
    698     state->data_stack_base += sizeof(type); \
    699 } while (0)
    700 
    701 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
    702 do { \
    703     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
    704     ptr = (type*)(state->data_stack+pos); \
    705 } while (0)
    706 
    707 #define DATA_STACK_PUSH(state, data, size) \
    708 do { \
    709     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
    710            "(%" PY_FORMAT_SIZE_T "d)\n", \
    711            data, state->data_stack_base, size)); \
    712     if (size > state->data_stack_size - state->data_stack_base) { \
    713         int j = data_stack_grow(state, size); \
    714         if (j < 0) return j; \
    715         if (ctx_pos != -1) \
    716             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    717     } \
    718     memcpy(state->data_stack+state->data_stack_base, data, size); \
    719     state->data_stack_base += size; \
    720 } while (0)
    721 
    722 #define DATA_STACK_POP(state, data, size, discard) \
    723 do { \
    724     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
    725            "(%" PY_FORMAT_SIZE_T "d)\n", \
    726            data, state->data_stack_base-size, size)); \
    727     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
    728     if (discard) \
    729         state->data_stack_base -= size; \
    730 } while (0)
    731 
    732 #define DATA_STACK_POP_DISCARD(state, size) \
    733 do { \
    734     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
    735            "(%" PY_FORMAT_SIZE_T "d)\n", \
    736            state->data_stack_base-size, size)); \
    737     state->data_stack_base -= size; \
    738 } while(0)
    739 
    740 #define DATA_PUSH(x) \
    741     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
    742 #define DATA_POP(x) \
    743     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
    744 #define DATA_POP_DISCARD(x) \
    745     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
    746 #define DATA_ALLOC(t,p) \
    747     DATA_STACK_ALLOC(state, t, p)
    748 #define DATA_LOOKUP_AT(t,p,pos) \
    749     DATA_STACK_LOOKUP_AT(state,t,p,pos)
    750 
    751 #define MARK_PUSH(lastmark) \
    752     do if (lastmark > 0) { \
    753         i = lastmark; /* ctx->lastmark may change if reallocated */ \
    754         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
    755     } while (0)
    756 #define MARK_POP(lastmark) \
    757     do if (lastmark > 0) { \
    758         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
    759     } while (0)
    760 #define MARK_POP_KEEP(lastmark) \
    761     do if (lastmark > 0) { \
    762         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
    763     } while (0)
    764 #define MARK_POP_DISCARD(lastmark) \
    765     do if (lastmark > 0) { \
    766         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
    767     } while (0)
    768 
    769 #define JUMP_NONE            0
    770 #define JUMP_MAX_UNTIL_1     1
    771 #define JUMP_MAX_UNTIL_2     2
    772 #define JUMP_MAX_UNTIL_3     3
    773 #define JUMP_MIN_UNTIL_1     4
    774 #define JUMP_MIN_UNTIL_2     5
    775 #define JUMP_MIN_UNTIL_3     6
    776 #define JUMP_REPEAT          7
    777 #define JUMP_REPEAT_ONE_1    8
    778 #define JUMP_REPEAT_ONE_2    9
    779 #define JUMP_MIN_REPEAT_ONE  10
    780 #define JUMP_BRANCH          11
    781 #define JUMP_ASSERT          12
    782 #define JUMP_ASSERT_NOT      13
    783 
    784 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
    785     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
    786     nextctx->last_ctx_pos = ctx_pos; \
    787     nextctx->jump = jumpvalue; \
    788     nextctx->pattern = nextpattern; \
    789     ctx_pos = alloc_pos; \
    790     ctx = nextctx; \
    791     goto entrance; \
    792     jumplabel: \
    793     while (0) /* gcc doesn't like labels at end of scopes */ \
    794 
    795 typedef struct {
    796     Py_ssize_t last_ctx_pos;
    797     Py_ssize_t jump;
    798     SRE_CHAR* ptr;
    799     SRE_CODE* pattern;
    800     Py_ssize_t count;
    801     Py_ssize_t lastmark;
    802     Py_ssize_t lastindex;
    803     union {
    804         SRE_CODE chr;
    805         SRE_REPEAT* rep;
    806     } u;
    807 } SRE_MATCH_CONTEXT;
    808 
    809 /* check if string matches the given pattern.  returns <0 for
    810    error, 0 for failure, and 1 for success */
    811 LOCAL(Py_ssize_t)
    812 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
    813 {
    814     SRE_CHAR* end = (SRE_CHAR *)state->end;
    815     Py_ssize_t alloc_pos, ctx_pos = -1;
    816     Py_ssize_t i, ret = 0;
    817     Py_ssize_t jump;
    818     unsigned int sigcount=0;
    819 
    820     SRE_MATCH_CONTEXT* ctx;
    821     SRE_MATCH_CONTEXT* nextctx;
    822 
    823     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
    824 
    825     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
    826     ctx->last_ctx_pos = -1;
    827     ctx->jump = JUMP_NONE;
    828     ctx->pattern = pattern;
    829     ctx_pos = alloc_pos;
    830 
    831 entrance:
    832 
    833     ctx->ptr = (SRE_CHAR *)state->ptr;
    834 
    835     if (ctx->pattern[0] == SRE_OP_INFO) {
    836         /* optimization info block */
    837         /* <INFO> <1=skip> <2=flags> <3=min> ... */
    838         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
    839             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
    840                    "need %" PY_FORMAT_SIZE_T "d)\n",
    841                    (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
    842             RETURN_FAILURE;
    843         }
    844         ctx->pattern += ctx->pattern[1] + 1;
    845     }
    846 
    847     for (;;) {
    848         ++sigcount;
    849         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
    850             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
    851 
    852         switch (*ctx->pattern++) {
    853 
    854         case SRE_OP_MARK:
    855             /* set mark */
    856             /* <MARK> <gid> */
    857             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
    858                    ctx->ptr, ctx->pattern[0]));
    859             i = ctx->pattern[0];
    860             if (i & 1)
    861                 state->lastindex = i/2 + 1;
    862             if (i > state->lastmark) {
    863                 /* state->lastmark is the highest valid index in the
    864                    state->mark array.  If it is increased by more than 1,
    865                    the intervening marks must be set to NULL to signal
    866                    that these marks have not been encountered. */
    867                 Py_ssize_t j = state->lastmark + 1;
    868                 while (j < i)
    869                     state->mark[j++] = NULL;
    870                 state->lastmark = i;
    871             }
    872             state->mark[i] = ctx->ptr;
    873             ctx->pattern++;
    874             break;
    875 
    876         case SRE_OP_LITERAL:
    877             /* match literal string */
    878             /* <LITERAL> <code> */
    879             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
    880                    ctx->ptr, *ctx->pattern));
    881             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
    882                 RETURN_FAILURE;
    883             ctx->pattern++;
    884             ctx->ptr++;
    885             break;
    886 
    887         case SRE_OP_NOT_LITERAL:
    888             /* match anything that is not literal character */
    889             /* <NOT_LITERAL> <code> */
    890             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
    891                    ctx->ptr, *ctx->pattern));
    892             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
    893                 RETURN_FAILURE;
    894             ctx->pattern++;
    895             ctx->ptr++;
    896             break;
    897 
    898         case SRE_OP_SUCCESS:
    899             /* end of pattern */
    900             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
    901             state->ptr = ctx->ptr;
    902             RETURN_SUCCESS;
    903 
    904         case SRE_OP_AT:
    905             /* match at given position */
    906             /* <AT> <code> */
    907             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
    908             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
    909                 RETURN_FAILURE;
    910             ctx->pattern++;
    911             break;
    912 
    913         case SRE_OP_CATEGORY:
    914             /* match at given category */
    915             /* <CATEGORY> <code> */
    916             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
    917                    ctx->ptr, *ctx->pattern));
    918             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
    919                 RETURN_FAILURE;
    920             ctx->pattern++;
    921             ctx->ptr++;
    922             break;
    923 
    924         case SRE_OP_ANY:
    925             /* match anything (except a newline) */
    926             /* <ANY> */
    927             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
    928             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
    929                 RETURN_FAILURE;
    930             ctx->ptr++;
    931             break;
    932 
    933         case SRE_OP_ANY_ALL:
    934             /* match anything */
    935             /* <ANY_ALL> */
    936             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
    937             if (ctx->ptr >= end)
    938                 RETURN_FAILURE;
    939             ctx->ptr++;
    940             break;
    941 
    942         case SRE_OP_IN:
    943             /* match set member (or non_member) */
    944             /* <IN> <skip> <set> */
    945             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
    946             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
    947                 RETURN_FAILURE;
    948             ctx->pattern += ctx->pattern[0];
    949             ctx->ptr++;
    950             break;
    951 
    952         case SRE_OP_LITERAL_IGNORE:
    953             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
    954                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    955             if (ctx->ptr >= end ||
    956                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
    957                 RETURN_FAILURE;
    958             ctx->pattern++;
    959             ctx->ptr++;
    960             break;
    961 
    962         case SRE_OP_NOT_LITERAL_IGNORE:
    963             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
    964                    ctx->pattern, ctx->ptr, *ctx->pattern));
    965             if (ctx->ptr >= end ||
    966                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
    967                 RETURN_FAILURE;
    968             ctx->pattern++;
    969             ctx->ptr++;
    970             break;
    971 
    972         case SRE_OP_IN_IGNORE:
    973             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
    974             if (ctx->ptr >= end
    975                 || !SRE_CHARSET(ctx->pattern+1,
    976                                 (SRE_CODE)state->lower(*ctx->ptr)))
    977                 RETURN_FAILURE;
    978             ctx->pattern += ctx->pattern[0];
    979             ctx->ptr++;
    980             break;
    981 
    982         case SRE_OP_JUMP:
    983         case SRE_OP_INFO:
    984             /* jump forward */
    985             /* <JUMP> <offset> */
    986             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
    987                    ctx->ptr, ctx->pattern[0]));
    988             ctx->pattern += ctx->pattern[0];
    989             break;
    990 
    991         case SRE_OP_BRANCH:
    992             /* alternation */
    993             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
    994             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
    995             LASTMARK_SAVE();
    996             ctx->u.rep = state->repeat;
    997             if (ctx->u.rep)
    998                 MARK_PUSH(ctx->lastmark);
    999             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
   1000                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
   1001                     (ctx->ptr >= end ||
   1002                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
   1003                     continue;
   1004                 if (ctx->pattern[1] == SRE_OP_IN &&
   1005                     (ctx->ptr >= end ||
   1006                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
   1007                     continue;
   1008                 state->ptr = ctx->ptr;
   1009                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
   1010                 if (ret) {
   1011                     if (ctx->u.rep)
   1012                         MARK_POP_DISCARD(ctx->lastmark);
   1013                     RETURN_ON_ERROR(ret);
   1014                     RETURN_SUCCESS;
   1015                 }
   1016                 if (ctx->u.rep)
   1017                     MARK_POP_KEEP(ctx->lastmark);
   1018                 LASTMARK_RESTORE();
   1019             }
   1020             if (ctx->u.rep)
   1021                 MARK_POP_DISCARD(ctx->lastmark);
   1022             RETURN_FAILURE;
   1023 
   1024         case SRE_OP_REPEAT_ONE:
   1025             /* match repeated sequence (maximizing regexp) */
   1026 
   1027             /* this operator only works if the repeated item is
   1028                exactly one character wide, and we're not already
   1029                collecting backtracking points.  for other cases,
   1030                use the MAX_REPEAT operator */
   1031 
   1032             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1033 
   1034             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1035                    ctx->pattern[1], ctx->pattern[2]));
   1036 
   1037             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
   1038                 RETURN_FAILURE; /* cannot match */
   1039 
   1040             state->ptr = ctx->ptr;
   1041 
   1042             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
   1043             RETURN_ON_ERROR(ret);
   1044             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1045             ctx->count = ret;
   1046             ctx->ptr += ctx->count;
   1047 
   1048             /* when we arrive here, count contains the number of
   1049                matches, and ctx->ptr points to the tail of the target
   1050                string.  check if the rest of the pattern matches,
   1051                and backtrack if not. */
   1052 
   1053             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1054                 RETURN_FAILURE;
   1055 
   1056             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1057                 /* tail is empty.  we're finished */
   1058                 state->ptr = ctx->ptr;
   1059                 RETURN_SUCCESS;
   1060             }
   1061 
   1062             LASTMARK_SAVE();
   1063 
   1064             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
   1065                 /* tail starts with a literal. skip positions where
   1066                    the rest of the pattern cannot possibly match */
   1067                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
   1068                 for (;;) {
   1069                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
   1070                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
   1071                         ctx->ptr--;
   1072                         ctx->count--;
   1073                     }
   1074                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1075                         break;
   1076                     state->ptr = ctx->ptr;
   1077                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
   1078                             ctx->pattern+ctx->pattern[0]);
   1079                     if (ret) {
   1080                         RETURN_ON_ERROR(ret);
   1081                         RETURN_SUCCESS;
   1082                     }
   1083 
   1084                     LASTMARK_RESTORE();
   1085 
   1086                     ctx->ptr--;
   1087                     ctx->count--;
   1088                 }
   1089 
   1090             } else {
   1091                 /* general case */
   1092                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
   1093                     state->ptr = ctx->ptr;
   1094                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
   1095                             ctx->pattern+ctx->pattern[0]);
   1096                     if (ret) {
   1097                         RETURN_ON_ERROR(ret);
   1098                         RETURN_SUCCESS;
   1099                     }
   1100                     ctx->ptr--;
   1101                     ctx->count--;
   1102                     LASTMARK_RESTORE();
   1103                 }
   1104             }
   1105             RETURN_FAILURE;
   1106 
   1107         case SRE_OP_MIN_REPEAT_ONE:
   1108             /* match repeated sequence (minimizing regexp) */
   1109 
   1110             /* this operator only works if the repeated item is
   1111                exactly one character wide, and we're not already
   1112                collecting backtracking points.  for other cases,
   1113                use the MIN_REPEAT operator */
   1114 
   1115             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1116 
   1117             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1118                    ctx->pattern[1], ctx->pattern[2]));
   1119 
   1120             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
   1121                 RETURN_FAILURE; /* cannot match */
   1122 
   1123             state->ptr = ctx->ptr;
   1124 
   1125             if (ctx->pattern[1] == 0)
   1126                 ctx->count = 0;
   1127             else {
   1128                 /* count using pattern min as the maximum */
   1129                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
   1130                 RETURN_ON_ERROR(ret);
   1131                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1132                 if (ret < (Py_ssize_t) ctx->pattern[1])
   1133                     /* didn't match minimum number of times */
   1134                     RETURN_FAILURE;
   1135                 /* advance past minimum matches of repeat */
   1136                 ctx->count = ret;
   1137                 ctx->ptr += ctx->count;
   1138             }
   1139 
   1140             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1141                 /* tail is empty.  we're finished */
   1142                 state->ptr = ctx->ptr;
   1143                 RETURN_SUCCESS;
   1144 
   1145             } else {
   1146                 /* general case */
   1147                 LASTMARK_SAVE();
   1148                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
   1149                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
   1150                     state->ptr = ctx->ptr;
   1151                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
   1152                             ctx->pattern+ctx->pattern[0]);
   1153                     if (ret) {
   1154                         RETURN_ON_ERROR(ret);
   1155                         RETURN_SUCCESS;
   1156                     }
   1157                     state->ptr = ctx->ptr;
   1158                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
   1159                     RETURN_ON_ERROR(ret);
   1160                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1161                     if (ret == 0)
   1162                         break;
   1163                     assert(ret == 1);
   1164                     ctx->ptr++;
   1165                     ctx->count++;
   1166                     LASTMARK_RESTORE();
   1167                 }
   1168             }
   1169             RETURN_FAILURE;
   1170 
   1171         case SRE_OP_REPEAT:
   1172             /* create repeat context.  all the hard work is done
   1173                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
   1174             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
   1175             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
   1176                    ctx->pattern[1], ctx->pattern[2]));
   1177 
   1178             /* install new repeat context */
   1179             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
   1180             if (!ctx->u.rep) {
   1181                 PyErr_NoMemory();
   1182                 RETURN_FAILURE;
   1183             }
   1184             ctx->u.rep->count = -1;
   1185             ctx->u.rep->pattern = ctx->pattern;
   1186             ctx->u.rep->prev = state->repeat;
   1187             ctx->u.rep->last_ptr = NULL;
   1188             state->repeat = ctx->u.rep;
   1189 
   1190             state->ptr = ctx->ptr;
   1191             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
   1192             state->repeat = ctx->u.rep->prev;
   1193             PyObject_FREE(ctx->u.rep);
   1194 
   1195             if (ret) {
   1196                 RETURN_ON_ERROR(ret);
   1197                 RETURN_SUCCESS;
   1198             }
   1199             RETURN_FAILURE;
   1200 
   1201         case SRE_OP_MAX_UNTIL:
   1202             /* maximizing repeat */
   1203             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
   1204 
   1205             /* FIXME: we probably need to deal with zero-width
   1206                matches in here... */
   1207 
   1208             ctx->u.rep = state->repeat;
   1209             if (!ctx->u.rep)
   1210                 RETURN_ERROR(SRE_ERROR_STATE);
   1211 
   1212             state->ptr = ctx->ptr;
   1213 
   1214             ctx->count = ctx->u.rep->count+1;
   1215 
   1216             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1217                    ctx->ptr, ctx->count));
   1218 
   1219             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1220                 /* not enough matches */
   1221                 ctx->u.rep->count = ctx->count;
   1222                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
   1223                         ctx->u.rep->pattern+3);
   1224                 if (ret) {
   1225                     RETURN_ON_ERROR(ret);
   1226                     RETURN_SUCCESS;
   1227                 }
   1228                 ctx->u.rep->count = ctx->count-1;
   1229                 state->ptr = ctx->ptr;
   1230                 RETURN_FAILURE;
   1231             }
   1232 
   1233             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
   1234                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
   1235                 state->ptr != ctx->u.rep->last_ptr) {
   1236                 /* we may have enough matches, but if we can
   1237                    match another item, do so */
   1238                 ctx->u.rep->count = ctx->count;
   1239                 LASTMARK_SAVE();
   1240                 MARK_PUSH(ctx->lastmark);
   1241                 /* zero-width match protection */
   1242                 DATA_PUSH(&ctx->u.rep->last_ptr);
   1243                 ctx->u.rep->last_ptr = state->ptr;
   1244                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
   1245                         ctx->u.rep->pattern+3);
   1246                 DATA_POP(&ctx->u.rep->last_ptr);
   1247                 if (ret) {
   1248                     MARK_POP_DISCARD(ctx->lastmark);
   1249                     RETURN_ON_ERROR(ret);
   1250                     RETURN_SUCCESS;
   1251                 }
   1252                 MARK_POP(ctx->lastmark);
   1253                 LASTMARK_RESTORE();
   1254                 ctx->u.rep->count = ctx->count-1;
   1255                 state->ptr = ctx->ptr;
   1256             }
   1257 
   1258             /* cannot match more repeated items here.  make sure the
   1259                tail matches */
   1260             state->repeat = ctx->u.rep->prev;
   1261             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
   1262             RETURN_ON_SUCCESS(ret);
   1263             state->repeat = ctx->u.rep;
   1264             state->ptr = ctx->ptr;
   1265             RETURN_FAILURE;
   1266 
   1267         case SRE_OP_MIN_UNTIL:
   1268             /* minimizing repeat */
   1269             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
   1270 
   1271             ctx->u.rep = state->repeat;
   1272             if (!ctx->u.rep)
   1273                 RETURN_ERROR(SRE_ERROR_STATE);
   1274 
   1275             state->ptr = ctx->ptr;
   1276 
   1277             ctx->count = ctx->u.rep->count+1;
   1278 
   1279             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
   1280                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
   1281 
   1282             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1283                 /* not enough matches */
   1284                 ctx->u.rep->count = ctx->count;
   1285                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
   1286                         ctx->u.rep->pattern+3);
   1287                 if (ret) {
   1288                     RETURN_ON_ERROR(ret);
   1289                     RETURN_SUCCESS;
   1290                 }
   1291                 ctx->u.rep->count = ctx->count-1;
   1292                 state->ptr = ctx->ptr;
   1293                 RETURN_FAILURE;
   1294             }
   1295 
   1296             LASTMARK_SAVE();
   1297 
   1298             /* see if the tail matches */
   1299             state->repeat = ctx->u.rep->prev;
   1300             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
   1301             if (ret) {
   1302                 RETURN_ON_ERROR(ret);
   1303                 RETURN_SUCCESS;
   1304             }
   1305 
   1306             state->repeat = ctx->u.rep;
   1307             state->ptr = ctx->ptr;
   1308 
   1309             LASTMARK_RESTORE();
   1310 
   1311             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
   1312                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
   1313                 state->ptr == ctx->u.rep->last_ptr)
   1314                 RETURN_FAILURE;
   1315 
   1316             ctx->u.rep->count = ctx->count;
   1317             /* zero-width match protection */
   1318             DATA_PUSH(&ctx->u.rep->last_ptr);
   1319             ctx->u.rep->last_ptr = state->ptr;
   1320             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
   1321                     ctx->u.rep->pattern+3);
   1322             DATA_POP(&ctx->u.rep->last_ptr);
   1323             if (ret) {
   1324                 RETURN_ON_ERROR(ret);
   1325                 RETURN_SUCCESS;
   1326             }
   1327             ctx->u.rep->count = ctx->count-1;
   1328             state->ptr = ctx->ptr;
   1329             RETURN_FAILURE;
   1330 
   1331         case SRE_OP_GROUPREF:
   1332             /* match backreference */
   1333             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
   1334                    ctx->ptr, ctx->pattern[0]));
   1335             i = ctx->pattern[0];
   1336             {
   1337                 Py_ssize_t groupref = i+i;
   1338                 if (groupref >= state->lastmark) {
   1339                     RETURN_FAILURE;
   1340                 } else {
   1341                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1342                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1343                     if (!p || !e || e < p)
   1344                         RETURN_FAILURE;
   1345                     while (p < e) {
   1346                         if (ctx->ptr >= end || *ctx->ptr != *p)
   1347                             RETURN_FAILURE;
   1348                         p++; ctx->ptr++;
   1349                     }
   1350                 }
   1351             }
   1352             ctx->pattern++;
   1353             break;
   1354 
   1355         case SRE_OP_GROUPREF_IGNORE:
   1356             /* match backreference */
   1357             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
   1358                    ctx->ptr, ctx->pattern[0]));
   1359             i = ctx->pattern[0];
   1360             {
   1361                 Py_ssize_t groupref = i+i;
   1362                 if (groupref >= state->lastmark) {
   1363                     RETURN_FAILURE;
   1364                 } else {
   1365                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1366                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1367                     if (!p || !e || e < p)
   1368                         RETURN_FAILURE;
   1369                     while (p < e) {
   1370                         if (ctx->ptr >= end ||
   1371                             state->lower(*ctx->ptr) != state->lower(*p))
   1372                             RETURN_FAILURE;
   1373                         p++; ctx->ptr++;
   1374                     }
   1375                 }
   1376             }
   1377             ctx->pattern++;
   1378             break;
   1379 
   1380         case SRE_OP_GROUPREF_EXISTS:
   1381             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
   1382                    ctx->ptr, ctx->pattern[0]));
   1383             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
   1384             i = ctx->pattern[0];
   1385             {
   1386                 Py_ssize_t groupref = i+i;
   1387                 if (groupref >= state->lastmark) {
   1388                     ctx->pattern += ctx->pattern[1];
   1389                     break;
   1390                 } else {
   1391                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1392                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1393                     if (!p || !e || e < p) {
   1394                         ctx->pattern += ctx->pattern[1];
   1395                         break;
   1396                     }
   1397                 }
   1398             }
   1399             ctx->pattern += 2;
   1400             break;
   1401 
   1402         case SRE_OP_ASSERT:
   1403             /* assert subpattern */
   1404             /* <ASSERT> <skip> <back> <pattern> */
   1405             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
   1406                    ctx->ptr, ctx->pattern[1]));
   1407             if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
   1408                 RETURN_FAILURE;
   1409             state->ptr = ctx->ptr - ctx->pattern[1];
   1410             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
   1411             RETURN_ON_FAILURE(ret);
   1412             ctx->pattern += ctx->pattern[0];
   1413             break;
   1414 
   1415         case SRE_OP_ASSERT_NOT:
   1416             /* assert not subpattern */
   1417             /* <ASSERT_NOT> <skip> <back> <pattern> */
   1418             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
   1419                    ctx->ptr, ctx->pattern[1]));
   1420             if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
   1421                 state->ptr = ctx->ptr - ctx->pattern[1];
   1422                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
   1423                 if (ret) {
   1424                     RETURN_ON_ERROR(ret);
   1425                     RETURN_FAILURE;
   1426                 }
   1427             }
   1428             ctx->pattern += ctx->pattern[0];
   1429             break;
   1430 
   1431         case SRE_OP_FAILURE:
   1432             /* immediate failure */
   1433             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
   1434             RETURN_FAILURE;
   1435 
   1436         default:
   1437             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
   1438                    ctx->pattern[-1]));
   1439             RETURN_ERROR(SRE_ERROR_ILLEGAL);
   1440         }
   1441     }
   1442 
   1443 exit:
   1444     ctx_pos = ctx->last_ctx_pos;
   1445     jump = ctx->jump;
   1446     DATA_POP_DISCARD(ctx);
   1447     if (ctx_pos == -1)
   1448         return ret;
   1449     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1450 
   1451     switch (jump) {
   1452         case JUMP_MAX_UNTIL_2:
   1453             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1454             goto jump_max_until_2;
   1455         case JUMP_MAX_UNTIL_3:
   1456             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1457             goto jump_max_until_3;
   1458         case JUMP_MIN_UNTIL_2:
   1459             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1460             goto jump_min_until_2;
   1461         case JUMP_MIN_UNTIL_3:
   1462             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1463             goto jump_min_until_3;
   1464         case JUMP_BRANCH:
   1465             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
   1466             goto jump_branch;
   1467         case JUMP_MAX_UNTIL_1:
   1468             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1469             goto jump_max_until_1;
   1470         case JUMP_MIN_UNTIL_1:
   1471             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1472             goto jump_min_until_1;
   1473         case JUMP_REPEAT:
   1474             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
   1475             goto jump_repeat;
   1476         case JUMP_REPEAT_ONE_1:
   1477             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
   1478             goto jump_repeat_one_1;
   1479         case JUMP_REPEAT_ONE_2:
   1480             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
   1481             goto jump_repeat_one_2;
   1482         case JUMP_MIN_REPEAT_ONE:
   1483             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
   1484             goto jump_min_repeat_one;
   1485         case JUMP_ASSERT:
   1486             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
   1487             goto jump_assert;
   1488         case JUMP_ASSERT_NOT:
   1489             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
   1490             goto jump_assert_not;
   1491         case JUMP_NONE:
   1492             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1493                    ctx->ptr, ret));
   1494             break;
   1495     }
   1496 
   1497     return ret; /* should never get here */
   1498 }
   1499 
   1500 LOCAL(Py_ssize_t)
   1501 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
   1502 {
   1503     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
   1504     SRE_CHAR* end = (SRE_CHAR *)state->end;
   1505     Py_ssize_t status = 0;
   1506     Py_ssize_t prefix_len = 0;
   1507     Py_ssize_t prefix_skip = 0;
   1508     SRE_CODE* prefix = NULL;
   1509     SRE_CODE* charset = NULL;
   1510     SRE_CODE* overlap = NULL;
   1511     int flags = 0;
   1512 
   1513     if (ptr > end)
   1514         return 0;
   1515 
   1516     if (pattern[0] == SRE_OP_INFO) {
   1517         /* optimization info block */
   1518         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
   1519 
   1520         flags = pattern[2];
   1521 
   1522         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
   1523             TRACE(("reject (got %u chars, need %u)\n",
   1524                    (unsigned int)(end - ptr), pattern[3]));
   1525             return 0;
   1526         }
   1527         if (pattern[3] > 1) {
   1528             /* adjust end point (but make sure we leave at least one
   1529                character in there, so literal search will work) */
   1530             end -= pattern[3]-1;
   1531             if (end <= ptr)
   1532                 end = ptr+1;
   1533         }
   1534 
   1535         if (flags & SRE_INFO_PREFIX) {
   1536             /* pattern starts with a known prefix */
   1537             /* <length> <skip> <prefix data> <overlap data> */
   1538             prefix_len = pattern[5];
   1539             prefix_skip = pattern[6];
   1540             prefix = pattern + 7;
   1541             overlap = prefix + prefix_len - 1;
   1542         } else if (flags & SRE_INFO_CHARSET)
   1543             /* pattern starts with a character from a known set */
   1544             /* <charset> */
   1545             charset = pattern + 5;
   1546 
   1547         pattern += 1 + pattern[1];
   1548     }
   1549 
   1550     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
   1551            prefix, prefix_len, prefix_skip));
   1552     TRACE(("charset = %p\n", charset));
   1553 
   1554 #if defined(USE_FAST_SEARCH)
   1555     if (prefix_len > 1) {
   1556         /* pattern starts with a known prefix.  use the overlap
   1557            table to skip forward as fast as we possibly can */
   1558         Py_ssize_t i = 0;
   1559         end = (SRE_CHAR *)state->end;
   1560         while (ptr < end) {
   1561             for (;;) {
   1562                 if ((SRE_CODE) ptr[0] != prefix[i]) {
   1563                     if (!i)
   1564                         break;
   1565                     else
   1566                         i = overlap[i];
   1567                 } else {
   1568                     if (++i == prefix_len) {
   1569                         /* found a potential match */
   1570                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
   1571                         state->start = ptr + 1 - prefix_len;
   1572                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
   1573                         if (flags & SRE_INFO_LITERAL)
   1574                             return 1; /* we got all of it */
   1575                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
   1576                         if (status != 0)
   1577                             return status;
   1578                         /* close but no cigar -- try again */
   1579                         i = overlap[i];
   1580                     }
   1581                     break;
   1582                 }
   1583             }
   1584             ptr++;
   1585         }
   1586         return 0;
   1587     }
   1588 #endif
   1589 
   1590     if (pattern[0] == SRE_OP_LITERAL) {
   1591         /* pattern starts with a literal character.  this is used
   1592            for short prefixes, and if fast search is disabled */
   1593         SRE_CODE chr = pattern[1];
   1594         end = (SRE_CHAR *)state->end;
   1595         for (;;) {
   1596             while (ptr < end && (SRE_CODE) ptr[0] != chr)
   1597                 ptr++;
   1598             if (ptr >= end)
   1599                 return 0;
   1600             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
   1601             state->start = ptr;
   1602             state->ptr = ++ptr;
   1603             if (flags & SRE_INFO_LITERAL)
   1604                 return 1; /* we got all of it */
   1605             status = SRE_MATCH(state, pattern + 2);
   1606             if (status != 0)
   1607                 break;
   1608         }
   1609     } else if (charset) {
   1610         /* pattern starts with a character from a known set */
   1611         end = (SRE_CHAR *)state->end;
   1612         for (;;) {
   1613             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
   1614                 ptr++;
   1615             if (ptr >= end)
   1616                 return 0;
   1617             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
   1618             state->start = ptr;
   1619             state->ptr = ptr;
   1620             status = SRE_MATCH(state, pattern);
   1621             if (status != 0)
   1622                 break;
   1623             ptr++;
   1624         }
   1625     } else {
   1626         /* general case */
   1627         assert(ptr <= end);
   1628         while (1) {
   1629             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
   1630             state->start = state->ptr = ptr;
   1631             status = SRE_MATCH(state, pattern);
   1632             if (status != 0 || ptr >= end)
   1633                 break;
   1634             ptr++;
   1635         }
   1636     }
   1637 
   1638     return status;
   1639 }
   1640 
   1641 LOCAL(int)
   1642 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
   1643 {
   1644     /* check if given string is a literal template (i.e. no escapes) */
   1645     while (len-- > 0)
   1646         if (*ptr++ == '\\')
   1647             return 0;
   1648     return 1;
   1649 }
   1650 
   1651 #if !defined(SRE_RECURSIVE)
   1652 
   1653 /* -------------------------------------------------------------------- */
   1654 /* factories and destructors */
   1655 
   1656 /* see sre.h for object declarations */
   1657 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
   1658 static PyObject*pattern_scanner(PatternObject*, PyObject*);
   1659 
   1660 static PyObject *
   1661 sre_codesize(PyObject* self, PyObject *unused)
   1662 {
   1663     return PyInt_FromSize_t(sizeof(SRE_CODE));
   1664 }
   1665 
   1666 static PyObject *
   1667 sre_getlower(PyObject* self, PyObject* args)
   1668 {
   1669     int character, flags;
   1670     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
   1671         return NULL;
   1672     if (flags & SRE_FLAG_LOCALE)
   1673         return Py_BuildValue("i", sre_lower_locale(character));
   1674     if (flags & SRE_FLAG_UNICODE)
   1675 #if defined(HAVE_UNICODE)
   1676         return Py_BuildValue("i", sre_lower_unicode(character));
   1677 #else
   1678         return Py_BuildValue("i", sre_lower_locale(character));
   1679 #endif
   1680     return Py_BuildValue("i", sre_lower(character));
   1681 }
   1682 
   1683 LOCAL(void)
   1684 state_reset(SRE_STATE* state)
   1685 {
   1686     /* FIXME: dynamic! */
   1687     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
   1688 
   1689     state->lastmark = -1;
   1690     state->lastindex = -1;
   1691 
   1692     state->repeat = NULL;
   1693 
   1694     data_stack_dealloc(state);
   1695 }
   1696 
   1697 static void*
   1698 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
   1699 {
   1700     /* given a python object, return a data pointer, a length (in
   1701        characters), and a character size.  return NULL if the object
   1702        is not a string (or not compatible) */
   1703 
   1704     PyBufferProcs *buffer;
   1705     Py_ssize_t size, bytes;
   1706     int charsize;
   1707     void* ptr;
   1708 
   1709 #if defined(HAVE_UNICODE)
   1710     if (PyUnicode_Check(string)) {
   1711         /* unicode strings doesn't always support the buffer interface */
   1712         ptr = (void*) PyUnicode_AS_DATA(string);
   1713         /* bytes = PyUnicode_GET_DATA_SIZE(string); */
   1714         size = PyUnicode_GET_SIZE(string);
   1715         charsize = sizeof(Py_UNICODE);
   1716 
   1717     } else {
   1718 #endif
   1719 
   1720     /* get pointer to string buffer */
   1721     buffer = Py_TYPE(string)->tp_as_buffer;
   1722     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
   1723         buffer->bf_getsegcount(string, NULL) != 1) {
   1724         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
   1725         return NULL;
   1726     }
   1727 
   1728     /* determine buffer size */
   1729     bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
   1730     if (bytes < 0) {
   1731         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
   1732         return NULL;
   1733     }
   1734 
   1735     /* determine character size */
   1736 #if PY_VERSION_HEX >= 0x01060000
   1737     size = PyObject_Size(string);
   1738 #else
   1739     size = PyObject_Length(string);
   1740 #endif
   1741 
   1742     if (PyString_Check(string) || bytes == size)
   1743         charsize = 1;
   1744 #if defined(HAVE_UNICODE)
   1745     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
   1746         charsize = sizeof(Py_UNICODE);
   1747 #endif
   1748     else {
   1749         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
   1750         return NULL;
   1751     }
   1752 
   1753 #if defined(HAVE_UNICODE)
   1754     }
   1755 #endif
   1756 
   1757     *p_length = size;
   1758     *p_charsize = charsize;
   1759 
   1760     return ptr;
   1761 }
   1762 
   1763 LOCAL(PyObject*)
   1764 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
   1765            Py_ssize_t start, Py_ssize_t end)
   1766 {
   1767     /* prepare state object */
   1768 
   1769     Py_ssize_t length;
   1770     int charsize;
   1771     void* ptr;
   1772 
   1773     memset(state, 0, sizeof(SRE_STATE));
   1774 
   1775     state->lastmark = -1;
   1776     state->lastindex = -1;
   1777 
   1778     ptr = getstring(string, &length, &charsize);
   1779     if (!ptr)
   1780         return NULL;
   1781 
   1782     /* adjust boundaries */
   1783     if (start < 0)
   1784         start = 0;
   1785     else if (start > length)
   1786         start = length;
   1787 
   1788     if (end < 0)
   1789         end = 0;
   1790     else if (end > length)
   1791         end = length;
   1792 
   1793     state->charsize = charsize;
   1794 
   1795     state->beginning = ptr;
   1796 
   1797     state->start = (void*) ((char*) ptr + start * state->charsize);
   1798     state->end = (void*) ((char*) ptr + end * state->charsize);
   1799 
   1800     Py_INCREF(string);
   1801     state->string = string;
   1802     state->pos = start;
   1803     state->endpos = end;
   1804 
   1805     if (pattern->flags & SRE_FLAG_LOCALE)
   1806         state->lower = sre_lower_locale;
   1807     else if (pattern->flags & SRE_FLAG_UNICODE)
   1808 #if defined(HAVE_UNICODE)
   1809         state->lower = sre_lower_unicode;
   1810 #else
   1811         state->lower = sre_lower_locale;
   1812 #endif
   1813     else
   1814         state->lower = sre_lower;
   1815 
   1816     return string;
   1817 }
   1818 
   1819 LOCAL(void)
   1820 state_fini(SRE_STATE* state)
   1821 {
   1822     Py_XDECREF(state->string);
   1823     data_stack_dealloc(state);
   1824 }
   1825 
   1826 /* calculate offset from start of string */
   1827 #define STATE_OFFSET(state, member)\
   1828     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
   1829 
   1830 LOCAL(PyObject*)
   1831 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
   1832 {
   1833     Py_ssize_t i, j;
   1834 
   1835     index = (index - 1) * 2;
   1836 
   1837     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
   1838         if (empty)
   1839             /* want empty string */
   1840             i = j = 0;
   1841         else {
   1842             Py_INCREF(Py_None);
   1843             return Py_None;
   1844         }
   1845     } else {
   1846         i = STATE_OFFSET(state, state->mark[index]);
   1847         j = STATE_OFFSET(state, state->mark[index+1]);
   1848     }
   1849 
   1850     return PySequence_GetSlice(string, i, j);
   1851 }
   1852 
   1853 static void
   1854 pattern_error(int status)
   1855 {
   1856     switch (status) {
   1857     case SRE_ERROR_RECURSION_LIMIT:
   1858         PyErr_SetString(
   1859             PyExc_RuntimeError,
   1860             "maximum recursion limit exceeded"
   1861             );
   1862         break;
   1863     case SRE_ERROR_MEMORY:
   1864         PyErr_NoMemory();
   1865         break;
   1866     case SRE_ERROR_INTERRUPTED:
   1867     /* An exception has already been raised, so let it fly */
   1868         break;
   1869     default:
   1870         /* other error codes indicate compiler/engine bugs */
   1871         PyErr_SetString(
   1872             PyExc_RuntimeError,
   1873             "internal error in regular expression engine"
   1874             );
   1875     }
   1876 }
   1877 
   1878 static void
   1879 pattern_dealloc(PatternObject* self)
   1880 {
   1881     if (self->weakreflist != NULL)
   1882         PyObject_ClearWeakRefs((PyObject *) self);
   1883     Py_XDECREF(self->pattern);
   1884     Py_XDECREF(self->groupindex);
   1885     Py_XDECREF(self->indexgroup);
   1886     PyObject_DEL(self);
   1887 }
   1888 
   1889 static int
   1890 check_args_size(const char *name, PyObject* args, PyObject* kw, int n)
   1891 {
   1892     Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0);
   1893     if (m <= n)
   1894         return 1;
   1895     PyErr_Format(PyExc_TypeError,
   1896                  "%s() takes at most %d positional arguments (%zd given)",
   1897                  name, n, m);
   1898     return 0;
   1899 }
   1900 
   1901 static PyObject*
   1902 fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
   1903 {
   1904     if (string2 != NULL) {
   1905         char buf[100];
   1906         if (string != NULL) {
   1907             PyErr_Format(PyExc_TypeError,
   1908                          "Argument given by name ('%s') and position (1)",
   1909                          oldname);
   1910             return NULL;
   1911         }
   1912         sprintf(buf, "The '%s' keyword parameter name is deprecated.  "
   1913                      "Use 'string' instead.", oldname);
   1914         if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
   1915             return NULL;
   1916         return string2;
   1917     }
   1918     if (string == NULL) {
   1919         PyErr_SetString(PyExc_TypeError,
   1920                         "Required argument 'string' (pos 1) not found");
   1921         return NULL;
   1922     }
   1923     return string;
   1924 }
   1925 
   1926 static PyObject*
   1927 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
   1928 {
   1929     SRE_STATE state;
   1930     int status;
   1931 
   1932     PyObject *string = NULL, *string2 = NULL;
   1933     Py_ssize_t start = 0;
   1934     Py_ssize_t end = PY_SSIZE_T_MAX;
   1935     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
   1936     if (!check_args_size("match", args, kw, 3))
   1937         return NULL;
   1938 
   1939     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist,
   1940                                      &string, &start, &end, &string2))
   1941         return NULL;
   1942 
   1943     string = fix_string_param(string, string2, "pattern");
   1944     if (!string)
   1945         return NULL;
   1946 
   1947     string = state_init(&state, self, string, start, end);
   1948     if (!string)
   1949         return NULL;
   1950 
   1951     state.ptr = state.start;
   1952 
   1953     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
   1954 
   1955     if (state.charsize == 1) {
   1956         status = sre_match(&state, PatternObject_GetCode(self));
   1957     } else {
   1958 #if defined(HAVE_UNICODE)
   1959         status = sre_umatch(&state, PatternObject_GetCode(self));
   1960 #endif
   1961     }
   1962 
   1963     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   1964     if (PyErr_Occurred())
   1965         return NULL;
   1966 
   1967     state_fini(&state);
   1968 
   1969     return pattern_new_match(self, &state, status);
   1970 }
   1971 
   1972 static PyObject*
   1973 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
   1974 {
   1975     SRE_STATE state;
   1976     int status;
   1977 
   1978     PyObject *string = NULL, *string2 = NULL;
   1979     Py_ssize_t start = 0;
   1980     Py_ssize_t end = PY_SSIZE_T_MAX;
   1981     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
   1982     if (!check_args_size("search", args, kw, 3))
   1983         return NULL;
   1984 
   1985     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist,
   1986                                      &string, &start, &end, &string2))
   1987         return NULL;
   1988 
   1989     string = fix_string_param(string, string2, "pattern");
   1990     if (!string)
   1991         return NULL;
   1992 
   1993     string = state_init(&state, self, string, start, end);
   1994     if (!string)
   1995         return NULL;
   1996 
   1997     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
   1998 
   1999     if (state.charsize == 1) {
   2000         status = sre_search(&state, PatternObject_GetCode(self));
   2001     } else {
   2002 #if defined(HAVE_UNICODE)
   2003         status = sre_usearch(&state, PatternObject_GetCode(self));
   2004 #endif
   2005     }
   2006 
   2007     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   2008 
   2009     state_fini(&state);
   2010 
   2011     if (PyErr_Occurred())
   2012         return NULL;
   2013 
   2014     return pattern_new_match(self, &state, status);
   2015 }
   2016 
   2017 static PyObject*
   2018 call(char* module, char* function, PyObject* args)
   2019 {
   2020     PyObject* name;
   2021     PyObject* mod;
   2022     PyObject* func;
   2023     PyObject* result;
   2024 
   2025     if (!args)
   2026         return NULL;
   2027     name = PyString_FromString(module);
   2028     if (!name)
   2029         return NULL;
   2030     mod = PyImport_Import(name);
   2031     Py_DECREF(name);
   2032     if (!mod)
   2033         return NULL;
   2034     func = PyObject_GetAttrString(mod, function);
   2035     Py_DECREF(mod);
   2036     if (!func)
   2037         return NULL;
   2038     result = PyObject_CallObject(func, args);
   2039     Py_DECREF(func);
   2040     Py_DECREF(args);
   2041     return result;
   2042 }
   2043 
   2044 #ifdef USE_BUILTIN_COPY
   2045 static int
   2046 deepcopy(PyObject** object, PyObject* memo)
   2047 {
   2048     PyObject* copy;
   2049 
   2050     copy = call(
   2051         "copy", "deepcopy",
   2052         PyTuple_Pack(2, *object, memo)
   2053         );
   2054     if (!copy)
   2055         return 0;
   2056 
   2057     Py_SETREF(*object, copy);
   2058 
   2059     return 1; /* success */
   2060 }
   2061 #endif
   2062 
   2063 static PyObject*
   2064 join_list(PyObject* list, PyObject* string)
   2065 {
   2066     /* join list elements */
   2067 
   2068     PyObject* joiner;
   2069 #if PY_VERSION_HEX >= 0x01060000
   2070     PyObject* function;
   2071     PyObject* args;
   2072 #endif
   2073     PyObject* result;
   2074 
   2075     joiner = PySequence_GetSlice(string, 0, 0);
   2076     if (!joiner)
   2077         return NULL;
   2078 
   2079     if (PyList_GET_SIZE(list) == 0) {
   2080         Py_DECREF(list);
   2081         return joiner;
   2082     }
   2083 
   2084 #if PY_VERSION_HEX >= 0x01060000
   2085     function = PyObject_GetAttrString(joiner, "join");
   2086     if (!function) {
   2087         Py_DECREF(joiner);
   2088         return NULL;
   2089     }
   2090     args = PyTuple_New(1);
   2091     if (!args) {
   2092         Py_DECREF(function);
   2093         Py_DECREF(joiner);
   2094         return NULL;
   2095     }
   2096     PyTuple_SET_ITEM(args, 0, list);
   2097     result = PyObject_CallObject(function, args);
   2098     Py_DECREF(args); /* also removes list */
   2099     Py_DECREF(function);
   2100 #else
   2101     result = call(
   2102         "string", "join",
   2103         PyTuple_Pack(2, list, joiner)
   2104         );
   2105 #endif
   2106     Py_DECREF(joiner);
   2107 
   2108     return result;
   2109 }
   2110 
   2111 static PyObject*
   2112 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
   2113 {
   2114     SRE_STATE state;
   2115     PyObject* list;
   2116     int status;
   2117     Py_ssize_t i, b, e;
   2118 
   2119     PyObject *string = NULL, *string2 = NULL;
   2120     Py_ssize_t start = 0;
   2121     Py_ssize_t end = PY_SSIZE_T_MAX;
   2122     static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
   2123     if (!check_args_size("findall", args, kw, 3))
   2124         return NULL;
   2125 
   2126     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist,
   2127                                      &string, &start, &end, &string2))
   2128         return NULL;
   2129 
   2130     string = fix_string_param(string, string2, "source");
   2131     if (!string)
   2132         return NULL;
   2133 
   2134     string = state_init(&state, self, string, start, end);
   2135     if (!string)
   2136         return NULL;
   2137 
   2138     list = PyList_New(0);
   2139     if (!list) {
   2140         state_fini(&state);
   2141         return NULL;
   2142     }
   2143 
   2144     while (state.start <= state.end) {
   2145 
   2146         PyObject* item;
   2147 
   2148         state_reset(&state);
   2149 
   2150         state.ptr = state.start;
   2151 
   2152         if (state.charsize == 1) {
   2153             status = sre_search(&state, PatternObject_GetCode(self));
   2154         } else {
   2155 #if defined(HAVE_UNICODE)
   2156             status = sre_usearch(&state, PatternObject_GetCode(self));
   2157 #endif
   2158         }
   2159 
   2160 	if (PyErr_Occurred())
   2161 	    goto error;
   2162 
   2163         if (status <= 0) {
   2164             if (status == 0)
   2165                 break;
   2166             pattern_error(status);
   2167             goto error;
   2168         }
   2169 
   2170         /* don't bother to build a match object */
   2171         switch (self->groups) {
   2172         case 0:
   2173             b = STATE_OFFSET(&state, state.start);
   2174             e = STATE_OFFSET(&state, state.ptr);
   2175             item = PySequence_GetSlice(string, b, e);
   2176             if (!item)
   2177                 goto error;
   2178             break;
   2179         case 1:
   2180             item = state_getslice(&state, 1, string, 1);
   2181             if (!item)
   2182                 goto error;
   2183             break;
   2184         default:
   2185             item = PyTuple_New(self->groups);
   2186             if (!item)
   2187                 goto error;
   2188             for (i = 0; i < self->groups; i++) {
   2189                 PyObject* o = state_getslice(&state, i+1, string, 1);
   2190                 if (!o) {
   2191                     Py_DECREF(item);
   2192                     goto error;
   2193                 }
   2194                 PyTuple_SET_ITEM(item, i, o);
   2195             }
   2196             break;
   2197         }
   2198 
   2199         status = PyList_Append(list, item);
   2200         Py_DECREF(item);
   2201         if (status < 0)
   2202             goto error;
   2203 
   2204         if (state.ptr == state.start)
   2205             state.start = (void*) ((char*) state.ptr + state.charsize);
   2206         else
   2207             state.start = state.ptr;
   2208 
   2209     }
   2210 
   2211     state_fini(&state);
   2212     return list;
   2213 
   2214 error:
   2215     Py_DECREF(list);
   2216     state_fini(&state);
   2217     return NULL;
   2218 
   2219 }
   2220 
   2221 #if PY_VERSION_HEX >= 0x02020000
   2222 static PyObject*
   2223 pattern_finditer(PatternObject* pattern, PyObject* args)
   2224 {
   2225     PyObject* scanner;
   2226     PyObject* search;
   2227     PyObject* iterator;
   2228 
   2229     scanner = pattern_scanner(pattern, args);
   2230     if (!scanner)
   2231         return NULL;
   2232 
   2233     search = PyObject_GetAttrString(scanner, "search");
   2234     Py_DECREF(scanner);
   2235     if (!search)
   2236         return NULL;
   2237 
   2238     iterator = PyCallIter_New(search, Py_None);
   2239     Py_DECREF(search);
   2240 
   2241     return iterator;
   2242 }
   2243 #endif
   2244 
   2245 static PyObject*
   2246 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
   2247 {
   2248     SRE_STATE state;
   2249     PyObject* list;
   2250     PyObject* item;
   2251     int status;
   2252     Py_ssize_t n;
   2253     Py_ssize_t i;
   2254     void* last;
   2255 
   2256     PyObject *string = NULL, *string2 = NULL;
   2257     Py_ssize_t maxsplit = 0;
   2258     static char* kwlist[] = { "string", "maxsplit", "source", NULL };
   2259     if (!check_args_size("split", args, kw, 2))
   2260         return NULL;
   2261 
   2262     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist,
   2263                                      &string, &maxsplit, &string2))
   2264         return NULL;
   2265 
   2266     string = fix_string_param(string, string2, "source");
   2267     if (!string)
   2268         return NULL;
   2269 
   2270     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2271     if (!string)
   2272         return NULL;
   2273 
   2274     list = PyList_New(0);
   2275     if (!list) {
   2276         state_fini(&state);
   2277         return NULL;
   2278     }
   2279 
   2280     n = 0;
   2281     last = state.start;
   2282 
   2283     while (!maxsplit || n < maxsplit) {
   2284 
   2285         state_reset(&state);
   2286 
   2287         state.ptr = state.start;
   2288 
   2289         if (state.charsize == 1) {
   2290             status = sre_search(&state, PatternObject_GetCode(self));
   2291         } else {
   2292 #if defined(HAVE_UNICODE)
   2293             status = sre_usearch(&state, PatternObject_GetCode(self));
   2294 #endif
   2295         }
   2296 
   2297 	if (PyErr_Occurred())
   2298 	    goto error;
   2299 
   2300         if (status <= 0) {
   2301             if (status == 0)
   2302                 break;
   2303             pattern_error(status);
   2304             goto error;
   2305         }
   2306 
   2307         if (state.start == state.ptr) {
   2308             if (last == state.end || state.ptr == state.end)
   2309                 break;
   2310             /* skip one character */
   2311             state.start = (void*) ((char*) state.ptr + state.charsize);
   2312             continue;
   2313         }
   2314 
   2315         /* get segment before this match */
   2316         item = PySequence_GetSlice(
   2317             string, STATE_OFFSET(&state, last),
   2318             STATE_OFFSET(&state, state.start)
   2319             );
   2320         if (!item)
   2321             goto error;
   2322         status = PyList_Append(list, item);
   2323         Py_DECREF(item);
   2324         if (status < 0)
   2325             goto error;
   2326 
   2327         /* add groups (if any) */
   2328         for (i = 0; i < self->groups; i++) {
   2329             item = state_getslice(&state, i+1, string, 0);
   2330             if (!item)
   2331                 goto error;
   2332             status = PyList_Append(list, item);
   2333             Py_DECREF(item);
   2334             if (status < 0)
   2335                 goto error;
   2336         }
   2337 
   2338         n = n + 1;
   2339 
   2340         last = state.start = state.ptr;
   2341 
   2342     }
   2343 
   2344     /* get segment following last match (even if empty) */
   2345     item = PySequence_GetSlice(
   2346         string, STATE_OFFSET(&state, last), state.endpos
   2347         );
   2348     if (!item)
   2349         goto error;
   2350     status = PyList_Append(list, item);
   2351     Py_DECREF(item);
   2352     if (status < 0)
   2353         goto error;
   2354 
   2355     state_fini(&state);
   2356     return list;
   2357 
   2358 error:
   2359     Py_DECREF(list);
   2360     state_fini(&state);
   2361     return NULL;
   2362 
   2363 }
   2364 
   2365 static PyObject*
   2366 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
   2367              Py_ssize_t count, Py_ssize_t subn)
   2368 {
   2369     SRE_STATE state;
   2370     PyObject* list;
   2371     PyObject* item;
   2372     PyObject* filter;
   2373     PyObject* args;
   2374     PyObject* match;
   2375     void* ptr;
   2376     int status;
   2377     Py_ssize_t n;
   2378     Py_ssize_t i, b, e;
   2379     int bint;
   2380     int filter_is_callable;
   2381 
   2382     if (PyCallable_Check(ptemplate)) {
   2383         /* sub/subn takes either a function or a template */
   2384         filter = ptemplate;
   2385         Py_INCREF(filter);
   2386         filter_is_callable = 1;
   2387     } else {
   2388         /* if not callable, check if it's a literal string */
   2389         int literal;
   2390         ptr = getstring(ptemplate, &n, &bint);
   2391         b = bint;
   2392         if (ptr) {
   2393             if (b == 1) {
   2394 		    literal = sre_literal_template((unsigned char *)ptr, n);
   2395             } else {
   2396 #if defined(HAVE_UNICODE)
   2397 		    literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
   2398 #endif
   2399             }
   2400         } else {
   2401             PyErr_Clear();
   2402             literal = 0;
   2403         }
   2404         if (literal) {
   2405             filter = ptemplate;
   2406             Py_INCREF(filter);
   2407             filter_is_callable = 0;
   2408         } else {
   2409             /* not a literal; hand it over to the template compiler */
   2410             filter = call(
   2411                 SRE_PY_MODULE, "_subx",
   2412                 PyTuple_Pack(2, self, ptemplate)
   2413                 );
   2414             if (!filter)
   2415                 return NULL;
   2416             filter_is_callable = PyCallable_Check(filter);
   2417         }
   2418     }
   2419 
   2420     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2421     if (!string) {
   2422         Py_DECREF(filter);
   2423         return NULL;
   2424     }
   2425 
   2426     list = PyList_New(0);
   2427     if (!list) {
   2428         Py_DECREF(filter);
   2429         state_fini(&state);
   2430         return NULL;
   2431     }
   2432 
   2433     n = i = 0;
   2434 
   2435     while (!count || n < count) {
   2436 
   2437         state_reset(&state);
   2438 
   2439         state.ptr = state.start;
   2440 
   2441         if (state.charsize == 1) {
   2442             status = sre_search(&state, PatternObject_GetCode(self));
   2443         } else {
   2444 #if defined(HAVE_UNICODE)
   2445             status = sre_usearch(&state, PatternObject_GetCode(self));
   2446 #endif
   2447         }
   2448 
   2449 	if (PyErr_Occurred())
   2450 	    goto error;
   2451 
   2452         if (status <= 0) {
   2453             if (status == 0)
   2454                 break;
   2455             pattern_error(status);
   2456             goto error;
   2457         }
   2458 
   2459         b = STATE_OFFSET(&state, state.start);
   2460         e = STATE_OFFSET(&state, state.ptr);
   2461 
   2462         if (i < b) {
   2463             /* get segment before this match */
   2464             item = PySequence_GetSlice(string, i, b);
   2465             if (!item)
   2466                 goto error;
   2467             status = PyList_Append(list, item);
   2468             Py_DECREF(item);
   2469             if (status < 0)
   2470                 goto error;
   2471 
   2472         } else if (i == b && i == e && n > 0)
   2473             /* ignore empty match on latest position */
   2474             goto next;
   2475 
   2476         if (filter_is_callable) {
   2477             /* pass match object through filter */
   2478             match = pattern_new_match(self, &state, 1);
   2479             if (!match)
   2480                 goto error;
   2481             args = PyTuple_Pack(1, match);
   2482             if (!args) {
   2483                 Py_DECREF(match);
   2484                 goto error;
   2485             }
   2486             item = PyObject_CallObject(filter, args);
   2487             Py_DECREF(args);
   2488             Py_DECREF(match);
   2489             if (!item)
   2490                 goto error;
   2491         } else {
   2492             /* filter is literal string */
   2493             item = filter;
   2494             Py_INCREF(item);
   2495         }
   2496 
   2497         /* add to list */
   2498         if (item != Py_None) {
   2499             status = PyList_Append(list, item);
   2500             Py_DECREF(item);
   2501             if (status < 0)
   2502                 goto error;
   2503         }
   2504 
   2505         i = e;
   2506         n = n + 1;
   2507 
   2508 next:
   2509         /* move on */
   2510         if (state.ptr == state.end)
   2511             break;
   2512         if (state.ptr == state.start)
   2513             state.start = (void*) ((char*) state.ptr + state.charsize);
   2514         else
   2515             state.start = state.ptr;
   2516 
   2517     }
   2518 
   2519     /* get segment following last match */
   2520     if (i < state.endpos) {
   2521         item = PySequence_GetSlice(string, i, state.endpos);
   2522         if (!item)
   2523             goto error;
   2524         status = PyList_Append(list, item);
   2525         Py_DECREF(item);
   2526         if (status < 0)
   2527             goto error;
   2528     }
   2529 
   2530     state_fini(&state);
   2531 
   2532     Py_DECREF(filter);
   2533 
   2534     /* convert list to single string (also removes list) */
   2535     item = join_list(list, string);
   2536 
   2537     if (!item)
   2538         return NULL;
   2539 
   2540     if (subn)
   2541         return Py_BuildValue("Nn", item, n);
   2542 
   2543     return item;
   2544 
   2545 error:
   2546     Py_DECREF(list);
   2547     state_fini(&state);
   2548     Py_DECREF(filter);
   2549     return NULL;
   2550 
   2551 }
   2552 
   2553 static PyObject*
   2554 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
   2555 {
   2556     PyObject* ptemplate;
   2557     PyObject* string;
   2558     Py_ssize_t count = 0;
   2559     static char* kwlist[] = { "repl", "string", "count", NULL };
   2560     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
   2561                                      &ptemplate, &string, &count))
   2562         return NULL;
   2563 
   2564     return pattern_subx(self, ptemplate, string, count, 0);
   2565 }
   2566 
   2567 static PyObject*
   2568 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
   2569 {
   2570     PyObject* ptemplate;
   2571     PyObject* string;
   2572     Py_ssize_t count = 0;
   2573     static char* kwlist[] = { "repl", "string", "count", NULL };
   2574     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
   2575                                      &ptemplate, &string, &count))
   2576         return NULL;
   2577 
   2578     return pattern_subx(self, ptemplate, string, count, 1);
   2579 }
   2580 
   2581 static PyObject*
   2582 pattern_copy(PatternObject* self, PyObject *unused)
   2583 {
   2584 #ifdef USE_BUILTIN_COPY
   2585     PatternObject* copy;
   2586     int offset;
   2587 
   2588     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
   2589     if (!copy)
   2590         return NULL;
   2591 
   2592     offset = offsetof(PatternObject, groups);
   2593 
   2594     Py_XINCREF(self->groupindex);
   2595     Py_XINCREF(self->indexgroup);
   2596     Py_XINCREF(self->pattern);
   2597 
   2598     memcpy((char*) copy + offset, (char*) self + offset,
   2599            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
   2600     copy->weakreflist = NULL;
   2601 
   2602     return (PyObject*) copy;
   2603 #else
   2604     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
   2605     return NULL;
   2606 #endif
   2607 }
   2608 
   2609 static PyObject*
   2610 pattern_deepcopy(PatternObject* self, PyObject* memo)
   2611 {
   2612 #ifdef USE_BUILTIN_COPY
   2613     PatternObject* copy;
   2614 
   2615     copy = (PatternObject*) pattern_copy(self);
   2616     if (!copy)
   2617         return NULL;
   2618 
   2619     if (!deepcopy(&copy->groupindex, memo) ||
   2620         !deepcopy(&copy->indexgroup, memo) ||
   2621         !deepcopy(&copy->pattern, memo)) {
   2622         Py_DECREF(copy);
   2623         return NULL;
   2624     }
   2625 
   2626 #else
   2627     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
   2628     return NULL;
   2629 #endif
   2630 }
   2631 
   2632 PyDoc_STRVAR(pattern_match_doc,
   2633 "match(string[, pos[, endpos]]) --> match object or None.\n\
   2634     Matches zero or more characters at the beginning of the string");
   2635 
   2636 PyDoc_STRVAR(pattern_search_doc,
   2637 "search(string[, pos[, endpos]]) --> match object or None.\n\
   2638     Scan through string looking for a match, and return a corresponding\n\
   2639     match object instance. Return None if no position in the string matches.");
   2640 
   2641 PyDoc_STRVAR(pattern_split_doc,
   2642 "split(string[, maxsplit = 0])  --> list.\n\
   2643     Split string by the occurrences of pattern.");
   2644 
   2645 PyDoc_STRVAR(pattern_findall_doc,
   2646 "findall(string[, pos[, endpos]]) --> list.\n\
   2647    Return a list of all non-overlapping matches of pattern in string.");
   2648 
   2649 PyDoc_STRVAR(pattern_finditer_doc,
   2650 "finditer(string[, pos[, endpos]]) --> iterator.\n\
   2651     Return an iterator over all non-overlapping matches for the \n\
   2652     RE pattern in string. For each match, the iterator returns a\n\
   2653     match object.");
   2654 
   2655 PyDoc_STRVAR(pattern_sub_doc,
   2656 "sub(repl, string[, count = 0]) --> newstring\n\
   2657     Return the string obtained by replacing the leftmost non-overlapping\n\
   2658     occurrences of pattern in string by the replacement repl.");
   2659 
   2660 PyDoc_STRVAR(pattern_subn_doc,
   2661 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
   2662     Return the tuple (new_string, number_of_subs_made) found by replacing\n\
   2663     the leftmost non-overlapping occurrences of pattern with the\n\
   2664     replacement repl.");
   2665 
   2666 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
   2667 
   2668 static PyMethodDef pattern_methods[] = {
   2669     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
   2670 	pattern_match_doc},
   2671     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
   2672 	pattern_search_doc},
   2673     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
   2674 	pattern_sub_doc},
   2675     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
   2676 	pattern_subn_doc},
   2677     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
   2678 	pattern_split_doc},
   2679     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
   2680 	pattern_findall_doc},
   2681 #if PY_VERSION_HEX >= 0x02020000
   2682     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
   2683 	pattern_finditer_doc},
   2684 #endif
   2685     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
   2686     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
   2687     {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
   2688     {NULL, NULL}
   2689 };
   2690 
   2691 #define PAT_OFF(x) offsetof(PatternObject, x)
   2692 static PyMemberDef pattern_members[] = {
   2693     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
   2694     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
   2695     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
   2696     {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
   2697     {NULL}  /* Sentinel */
   2698 };
   2699 
   2700 statichere PyTypeObject Pattern_Type = {
   2701     PyObject_HEAD_INIT(NULL)
   2702     0, "_" SRE_MODULE ".SRE_Pattern",
   2703     sizeof(PatternObject), sizeof(SRE_CODE),
   2704     (destructor)pattern_dealloc, /*tp_dealloc*/
   2705     0,                                  /* tp_print */
   2706     0,                                  /* tp_getattrn */
   2707     0,					/* tp_setattr */
   2708     0,					/* tp_compare */
   2709     0,					/* tp_repr */
   2710     0,					/* tp_as_number */
   2711     0,					/* tp_as_sequence */
   2712     0,					/* tp_as_mapping */
   2713     0,					/* tp_hash */
   2714     0,					/* tp_call */
   2715     0,					/* tp_str */
   2716     0,					/* tp_getattro */
   2717     0,					/* tp_setattro */
   2718     0,					/* tp_as_buffer */
   2719     Py_TPFLAGS_DEFAULT,		        /* tp_flags */
   2720     pattern_doc,			/* tp_doc */
   2721     0,					/* tp_traverse */
   2722     0,					/* tp_clear */
   2723     0,					/* tp_richcompare */
   2724     offsetof(PatternObject, weakreflist),	/* tp_weaklistoffset */
   2725     0,					/* tp_iter */
   2726     0,					/* tp_iternext */
   2727     pattern_methods,			/* tp_methods */
   2728     pattern_members,			/* tp_members */
   2729 };
   2730 
   2731 static int _validate(PatternObject *self); /* Forward */
   2732 
   2733 static PyObject *
   2734 _compile(PyObject* self_, PyObject* args)
   2735 {
   2736     /* "compile" pattern descriptor to pattern object */
   2737 
   2738     PatternObject* self;
   2739     Py_ssize_t i, n;
   2740 
   2741     PyObject* pattern;
   2742     int flags = 0;
   2743     PyObject* code;
   2744     Py_ssize_t groups = 0;
   2745     PyObject* groupindex = NULL;
   2746     PyObject* indexgroup = NULL;
   2747     if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
   2748                           &PyList_Type, &code, &groups,
   2749                           &groupindex, &indexgroup))
   2750         return NULL;
   2751 
   2752     n = PyList_GET_SIZE(code);
   2753     /* coverity[ampersand_in_size] */
   2754     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
   2755     if (!self)
   2756         return NULL;
   2757     self->weakreflist = NULL;
   2758     self->pattern = NULL;
   2759     self->groupindex = NULL;
   2760     self->indexgroup = NULL;
   2761 
   2762     self->codesize = n;
   2763 
   2764     for (i = 0; i < n; i++) {
   2765         PyObject *o = PyList_GET_ITEM(code, i);
   2766         unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
   2767                                               : PyLong_AsUnsignedLong(o);
   2768         if (value == (unsigned long)-1 && PyErr_Occurred()) {
   2769             if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
   2770                 PyErr_SetString(PyExc_OverflowError,
   2771                                 "regular expression code size limit exceeded");
   2772             }
   2773             break;
   2774         }
   2775         self->code[i] = (SRE_CODE) value;
   2776         if ((unsigned long) self->code[i] != value) {
   2777             PyErr_SetString(PyExc_OverflowError,
   2778                             "regular expression code size limit exceeded");
   2779             break;
   2780         }
   2781     }
   2782 
   2783     if (PyErr_Occurred()) {
   2784         Py_DECREF(self);
   2785         return NULL;
   2786     }
   2787 
   2788     Py_INCREF(pattern);
   2789     self->pattern = pattern;
   2790 
   2791     self->flags = flags;
   2792 
   2793     self->groups = groups;
   2794 
   2795     Py_XINCREF(groupindex);
   2796     self->groupindex = groupindex;
   2797 
   2798     Py_XINCREF(indexgroup);
   2799     self->indexgroup = indexgroup;
   2800 
   2801     self->weakreflist = NULL;
   2802 
   2803     if (!_validate(self)) {
   2804         Py_DECREF(self);
   2805         return NULL;
   2806     }
   2807 
   2808     return (PyObject*) self;
   2809 }
   2810 
   2811 /* -------------------------------------------------------------------- */
   2812 /* Code validation */
   2813 
   2814 /* To learn more about this code, have a look at the _compile() function in
   2815    Lib/sre_compile.py.  The validation functions below checks the code array
   2816    for conformance with the code patterns generated there.
   2817 
   2818    The nice thing about the generated code is that it is position-independent:
   2819    all jumps are relative jumps forward.  Also, jumps don't cross each other:
   2820    the target of a later jump is always earlier than the target of an earlier
   2821    jump.  IOW, this is okay:
   2822 
   2823    J---------J-------T--------T
   2824     \         \_____/        /
   2825      \______________________/
   2826 
   2827    but this is not:
   2828 
   2829    J---------J-------T--------T
   2830     \_________\_____/        /
   2831                \____________/
   2832 
   2833    It also helps that SRE_CODE is always an unsigned type.
   2834 */
   2835 
   2836 /* Defining this one enables tracing of the validator */
   2837 #undef VVERBOSE
   2838 
   2839 /* Trace macro for the validator */
   2840 #if defined(VVERBOSE)
   2841 #define VTRACE(v) printf v
   2842 #else
   2843 #define VTRACE(v) do {} while(0)  /* do nothing */
   2844 #endif
   2845 
   2846 /* Report failure */
   2847 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
   2848 
   2849 /* Extract opcode, argument, or skip count from code array */
   2850 #define GET_OP                                          \
   2851     do {                                                \
   2852         VTRACE(("%p: ", code));                         \
   2853         if (code >= end) FAIL;                          \
   2854         op = *code++;                                   \
   2855         VTRACE(("%lu (op)\n", (unsigned long)op));      \
   2856     } while (0)
   2857 #define GET_ARG                                         \
   2858     do {                                                \
   2859         VTRACE(("%p= ", code));                         \
   2860         if (code >= end) FAIL;                          \
   2861         arg = *code++;                                  \
   2862         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
   2863     } while (0)
   2864 #define GET_SKIP_ADJ(adj)                               \
   2865     do {                                                \
   2866         VTRACE(("%p= ", code));                         \
   2867         if (code >= end) FAIL;                          \
   2868         skip = *code;                                   \
   2869         VTRACE(("%lu (skip to %p)\n",                   \
   2870                (unsigned long)skip, code+skip));        \
   2871         if (skip-adj > end-code)                        \
   2872             FAIL;                                       \
   2873         code++;                                         \
   2874     } while (0)
   2875 #define GET_SKIP GET_SKIP_ADJ(0)
   2876 
   2877 static int
   2878 _validate_charset(SRE_CODE *code, SRE_CODE *end)
   2879 {
   2880     /* Some variables are manipulated by the macros above */
   2881     SRE_CODE op;
   2882     SRE_CODE arg;
   2883     SRE_CODE offset;
   2884     int i;
   2885 
   2886     while (code < end) {
   2887         GET_OP;
   2888         switch (op) {
   2889 
   2890         case SRE_OP_NEGATE:
   2891             break;
   2892 
   2893         case SRE_OP_LITERAL:
   2894             GET_ARG;
   2895             break;
   2896 
   2897         case SRE_OP_RANGE:
   2898             GET_ARG;
   2899             GET_ARG;
   2900             break;
   2901 
   2902         case SRE_OP_CHARSET:
   2903             offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
   2904             if (offset > end-code)
   2905                 FAIL;
   2906             code += offset;
   2907             break;
   2908 
   2909         case SRE_OP_BIGCHARSET:
   2910             GET_ARG; /* Number of blocks */
   2911             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
   2912             if (offset > end-code)
   2913                 FAIL;
   2914             /* Make sure that each byte points to a valid block */
   2915             for (i = 0; i < 256; i++) {
   2916                 if (((unsigned char *)code)[i] >= arg)
   2917                     FAIL;
   2918             }
   2919             code += offset;
   2920             offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
   2921             if (offset > end-code)
   2922                 FAIL;
   2923             code += offset;
   2924             break;
   2925 
   2926         case SRE_OP_CATEGORY:
   2927             GET_ARG;
   2928             switch (arg) {
   2929             case SRE_CATEGORY_DIGIT:
   2930             case SRE_CATEGORY_NOT_DIGIT:
   2931             case SRE_CATEGORY_SPACE:
   2932             case SRE_CATEGORY_NOT_SPACE:
   2933             case SRE_CATEGORY_WORD:
   2934             case SRE_CATEGORY_NOT_WORD:
   2935             case SRE_CATEGORY_LINEBREAK:
   2936             case SRE_CATEGORY_NOT_LINEBREAK:
   2937             case SRE_CATEGORY_LOC_WORD:
   2938             case SRE_CATEGORY_LOC_NOT_WORD:
   2939             case SRE_CATEGORY_UNI_DIGIT:
   2940             case SRE_CATEGORY_UNI_NOT_DIGIT:
   2941             case SRE_CATEGORY_UNI_SPACE:
   2942             case SRE_CATEGORY_UNI_NOT_SPACE:
   2943             case SRE_CATEGORY_UNI_WORD:
   2944             case SRE_CATEGORY_UNI_NOT_WORD:
   2945             case SRE_CATEGORY_UNI_LINEBREAK:
   2946             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
   2947                 break;
   2948             default:
   2949                 FAIL;
   2950             }
   2951             break;
   2952 
   2953         default:
   2954             FAIL;
   2955 
   2956         }
   2957     }
   2958 
   2959     return 1;
   2960 }
   2961 
   2962 static int
   2963 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   2964 {
   2965     /* Some variables are manipulated by the macros above */
   2966     SRE_CODE op;
   2967     SRE_CODE arg;
   2968     SRE_CODE skip;
   2969 
   2970     VTRACE(("code=%p, end=%p\n", code, end));
   2971 
   2972     if (code > end)
   2973         FAIL;
   2974 
   2975     while (code < end) {
   2976         GET_OP;
   2977         switch (op) {
   2978 
   2979         case SRE_OP_MARK:
   2980             /* We don't check whether marks are properly nested; the
   2981                sre_match() code is robust even if they don't, and the worst
   2982                you can get is nonsensical match results. */
   2983             GET_ARG;
   2984             if (arg > 2*groups+1) {
   2985                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
   2986                 FAIL;
   2987             }
   2988             break;
   2989 
   2990         case SRE_OP_LITERAL:
   2991         case SRE_OP_NOT_LITERAL:
   2992         case SRE_OP_LITERAL_IGNORE:
   2993         case SRE_OP_NOT_LITERAL_IGNORE:
   2994             GET_ARG;
   2995             /* The arg is just a character, nothing to check */
   2996             break;
   2997 
   2998         case SRE_OP_SUCCESS:
   2999         case SRE_OP_FAILURE:
   3000             /* Nothing to check; these normally end the matching process */
   3001             break;
   3002 
   3003         case SRE_OP_AT:
   3004             GET_ARG;
   3005             switch (arg) {
   3006             case SRE_AT_BEGINNING:
   3007             case SRE_AT_BEGINNING_STRING:
   3008             case SRE_AT_BEGINNING_LINE:
   3009             case SRE_AT_END:
   3010             case SRE_AT_END_LINE:
   3011             case SRE_AT_END_STRING:
   3012             case SRE_AT_BOUNDARY:
   3013             case SRE_AT_NON_BOUNDARY:
   3014             case SRE_AT_LOC_BOUNDARY:
   3015             case SRE_AT_LOC_NON_BOUNDARY:
   3016             case SRE_AT_UNI_BOUNDARY:
   3017             case SRE_AT_UNI_NON_BOUNDARY:
   3018                 break;
   3019             default:
   3020                 FAIL;
   3021             }
   3022             break;
   3023 
   3024         case SRE_OP_ANY:
   3025         case SRE_OP_ANY_ALL:
   3026             /* These have no operands */
   3027             break;
   3028 
   3029         case SRE_OP_IN:
   3030         case SRE_OP_IN_IGNORE:
   3031             GET_SKIP;
   3032             /* Stop 1 before the end; we check the FAILURE below */
   3033             if (!_validate_charset(code, code+skip-2))
   3034                 FAIL;
   3035             if (code[skip-2] != SRE_OP_FAILURE)
   3036                 FAIL;
   3037             code += skip-1;
   3038             break;
   3039 
   3040         case SRE_OP_INFO:
   3041             {
   3042                 /* A minimal info field is
   3043                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
   3044                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
   3045                    more follows. */
   3046                 SRE_CODE flags, i;
   3047                 SRE_CODE *newcode;
   3048                 GET_SKIP;
   3049                 newcode = code+skip-1;
   3050                 GET_ARG; flags = arg;
   3051                 GET_ARG; /* min */
   3052                 GET_ARG; /* max */
   3053                 /* Check that only valid flags are present */
   3054                 if ((flags & ~(SRE_INFO_PREFIX |
   3055                                SRE_INFO_LITERAL |
   3056                                SRE_INFO_CHARSET)) != 0)
   3057                     FAIL;
   3058                 /* PREFIX and CHARSET are mutually exclusive */
   3059                 if ((flags & SRE_INFO_PREFIX) &&
   3060                     (flags & SRE_INFO_CHARSET))
   3061                     FAIL;
   3062                 /* LITERAL implies PREFIX */
   3063                 if ((flags & SRE_INFO_LITERAL) &&
   3064                     !(flags & SRE_INFO_PREFIX))
   3065                     FAIL;
   3066                 /* Validate the prefix */
   3067                 if (flags & SRE_INFO_PREFIX) {
   3068                     SRE_CODE prefix_len;
   3069                     GET_ARG; prefix_len = arg;
   3070                     GET_ARG; /* prefix skip */
   3071                     /* Here comes the prefix string */
   3072                     if (prefix_len > newcode-code)
   3073                         FAIL;
   3074                     code += prefix_len;
   3075                     /* And here comes the overlap table */
   3076                     if (prefix_len > newcode-code)
   3077                         FAIL;
   3078                     /* Each overlap value should be < prefix_len */
   3079                     for (i = 0; i < prefix_len; i++) {
   3080                         if (code[i] >= prefix_len)
   3081                             FAIL;
   3082                     }
   3083                     code += prefix_len;
   3084                 }
   3085                 /* Validate the charset */
   3086                 if (flags & SRE_INFO_CHARSET) {
   3087                     if (!_validate_charset(code, newcode-1))
   3088                         FAIL;
   3089                     if (newcode[-1] != SRE_OP_FAILURE)
   3090                         FAIL;
   3091                     code = newcode;
   3092                 }
   3093                 else if (code != newcode) {
   3094                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
   3095                     FAIL;
   3096                 }
   3097             }
   3098             break;
   3099 
   3100         case SRE_OP_BRANCH:
   3101             {
   3102                 SRE_CODE *target = NULL;
   3103                 for (;;) {
   3104                     GET_SKIP;
   3105                     if (skip == 0)
   3106                         break;
   3107                     /* Stop 2 before the end; we check the JUMP below */
   3108                     if (!_validate_inner(code, code+skip-3, groups))
   3109                         FAIL;
   3110                     code += skip-3;
   3111                     /* Check that it ends with a JUMP, and that each JUMP
   3112                        has the same target */
   3113                     GET_OP;
   3114                     if (op != SRE_OP_JUMP)
   3115                         FAIL;
   3116                     GET_SKIP;
   3117                     if (target == NULL)
   3118                         target = code+skip-1;
   3119                     else if (code+skip-1 != target)
   3120                         FAIL;
   3121                 }
   3122             }
   3123             break;
   3124 
   3125         case SRE_OP_REPEAT_ONE:
   3126         case SRE_OP_MIN_REPEAT_ONE:
   3127             {
   3128                 SRE_CODE min, max;
   3129                 GET_SKIP;
   3130                 GET_ARG; min = arg;
   3131                 GET_ARG; max = arg;
   3132                 if (min > max)
   3133                     FAIL;
   3134                 if (max > SRE_MAXREPEAT)
   3135                     FAIL;
   3136                 if (!_validate_inner(code, code+skip-4, groups))
   3137                     FAIL;
   3138                 code += skip-4;
   3139                 GET_OP;
   3140                 if (op != SRE_OP_SUCCESS)
   3141                     FAIL;
   3142             }
   3143             break;
   3144 
   3145         case SRE_OP_REPEAT:
   3146             {
   3147                 SRE_CODE min, max;
   3148                 GET_SKIP;
   3149                 GET_ARG; min = arg;
   3150                 GET_ARG; max = arg;
   3151                 if (min > max)
   3152                     FAIL;
   3153                 if (max > SRE_MAXREPEAT)
   3154                     FAIL;
   3155                 if (!_validate_inner(code, code+skip-3, groups))
   3156                     FAIL;
   3157                 code += skip-3;
   3158                 GET_OP;
   3159                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
   3160                     FAIL;
   3161             }
   3162             break;
   3163 
   3164         case SRE_OP_GROUPREF:
   3165         case SRE_OP_GROUPREF_IGNORE:
   3166             GET_ARG;
   3167             if (arg >= groups)
   3168                 FAIL;
   3169             break;
   3170 
   3171         case SRE_OP_GROUPREF_EXISTS:
   3172             /* The regex syntax for this is: '(?(group)then|else)', where
   3173                'group' is either an integer group number or a group name,
   3174                'then' and 'else' are sub-regexes, and 'else' is optional. */
   3175             GET_ARG;
   3176             if (arg >= groups)
   3177                 FAIL;
   3178             GET_SKIP_ADJ(1);
   3179             code--; /* The skip is relative to the first arg! */
   3180             /* There are two possibilities here: if there is both a 'then'
   3181                part and an 'else' part, the generated code looks like:
   3182 
   3183                GROUPREF_EXISTS
   3184                <group>
   3185                <skipyes>
   3186                ...then part...
   3187                JUMP
   3188                <skipno>
   3189                (<skipyes> jumps here)
   3190                ...else part...
   3191                (<skipno> jumps here)
   3192 
   3193                If there is only a 'then' part, it looks like:
   3194 
   3195                GROUPREF_EXISTS
   3196                <group>
   3197                <skip>
   3198                ...then part...
   3199                (<skip> jumps here)
   3200 
   3201                There is no direct way to decide which it is, and we don't want
   3202                to allow arbitrary jumps anywhere in the code; so we just look
   3203                for a JUMP opcode preceding our skip target.
   3204             */
   3205             if (skip >= 3 && skip-3 < end-code &&
   3206                 code[skip-3] == SRE_OP_JUMP)
   3207             {
   3208                 VTRACE(("both then and else parts present\n"));
   3209                 if (!_validate_inner(code+1, code+skip-3, groups))
   3210                     FAIL;
   3211                 code += skip-2; /* Position after JUMP, at <skipno> */
   3212                 GET_SKIP;
   3213                 if (!_validate_inner(code, code+skip-1, groups))
   3214                     FAIL;
   3215                 code += skip-1;
   3216             }
   3217             else {
   3218                 VTRACE(("only a then part present\n"));
   3219                 if (!_validate_inner(code+1, code+skip-1, groups))
   3220                     FAIL;
   3221                 code += skip-1;
   3222             }
   3223             break;
   3224 
   3225         case SRE_OP_ASSERT:
   3226         case SRE_OP_ASSERT_NOT:
   3227             GET_SKIP;
   3228             GET_ARG; /* 0 for lookahead, width for lookbehind */
   3229             code--; /* Back up over arg to simplify math below */
   3230             if (arg & 0x80000000)
   3231                 FAIL; /* Width too large */
   3232             /* Stop 1 before the end; we check the SUCCESS below */
   3233             if (!_validate_inner(code+1, code+skip-2, groups))
   3234                 FAIL;
   3235             code += skip-2;
   3236             GET_OP;
   3237             if (op != SRE_OP_SUCCESS)
   3238                 FAIL;
   3239             break;
   3240 
   3241         default:
   3242             FAIL;
   3243 
   3244         }
   3245     }
   3246 
   3247     VTRACE(("okay\n"));
   3248     return 1;
   3249 }
   3250 
   3251 static int
   3252 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   3253 {
   3254     if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
   3255         FAIL;
   3256     if (groups == 0)  /* fix for simplejson */
   3257         groups = 100; /* 100 groups should always be safe */
   3258     return _validate_inner(code, end-1, groups);
   3259 }
   3260 
   3261 static int
   3262 _validate(PatternObject *self)
   3263 {
   3264     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
   3265     {
   3266         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
   3267         return 0;
   3268     }
   3269     else
   3270         VTRACE(("Success!\n"));
   3271     return 1;
   3272 }
   3273 
   3274 /* -------------------------------------------------------------------- */
   3275 /* match methods */
   3276 
   3277 static void
   3278 match_dealloc(MatchObject* self)
   3279 {
   3280     Py_XDECREF(self->regs);
   3281     Py_XDECREF(self->string);
   3282     Py_DECREF(self->pattern);
   3283     PyObject_DEL(self);
   3284 }
   3285 
   3286 static PyObject*
   3287 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
   3288 {
   3289     if (index < 0 || index >= self->groups) {
   3290         /* raise IndexError if we were given a bad group number */
   3291         PyErr_SetString(
   3292             PyExc_IndexError,
   3293             "no such group"
   3294             );
   3295         return NULL;
   3296     }
   3297 
   3298     index *= 2;
   3299 
   3300     if (self->string == Py_None || self->mark[index] < 0) {
   3301         /* return default value if the string or group is undefined */
   3302         Py_INCREF(def);
   3303         return def;
   3304     }
   3305 
   3306     return PySequence_GetSlice(
   3307         self->string, self->mark[index], self->mark[index+1]
   3308         );
   3309 }
   3310 
   3311 static Py_ssize_t
   3312 match_getindex(MatchObject* self, PyObject* index)
   3313 {
   3314     Py_ssize_t i;
   3315 
   3316     if (PyInt_Check(index) || PyLong_Check(index))
   3317         return PyInt_AsSsize_t(index);
   3318 
   3319     i = -1;
   3320 
   3321     if (self->pattern->groupindex) {
   3322         index = PyObject_GetItem(self->pattern->groupindex, index);
   3323         if (index) {
   3324             if (PyInt_Check(index) || PyLong_Check(index))
   3325                 i = PyInt_AsSsize_t(index);
   3326             Py_DECREF(index);
   3327         } else
   3328             PyErr_Clear();
   3329     }
   3330 
   3331     return i;
   3332 }
   3333 
   3334 static PyObject*
   3335 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
   3336 {
   3337     return match_getslice_by_index(self, match_getindex(self, index), def);
   3338 }
   3339 
   3340 static PyObject*
   3341 match_expand(MatchObject* self, PyObject* ptemplate)
   3342 {
   3343     /* delegate to Python code */
   3344     return call(
   3345         SRE_PY_MODULE, "_expand",
   3346         PyTuple_Pack(3, self->pattern, self, ptemplate)
   3347         );
   3348 }
   3349 
   3350 static PyObject*
   3351 match_group(MatchObject* self, PyObject* args)
   3352 {
   3353     PyObject* result;
   3354     Py_ssize_t i, size;
   3355 
   3356     size = PyTuple_GET_SIZE(args);
   3357 
   3358     switch (size) {
   3359     case 0:
   3360         result = match_getslice(self, Py_False, Py_None);
   3361         break;
   3362     case 1:
   3363         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
   3364         break;
   3365     default:
   3366         /* fetch multiple items */
   3367         result = PyTuple_New(size);
   3368         if (!result)
   3369             return NULL;
   3370         for (i = 0; i < size; i++) {
   3371             PyObject* item = match_getslice(
   3372                 self, PyTuple_GET_ITEM(args, i), Py_None
   3373                 );
   3374             if (!item) {
   3375                 Py_DECREF(result);
   3376                 return NULL;
   3377             }
   3378             PyTuple_SET_ITEM(result, i, item);
   3379         }
   3380         break;
   3381     }
   3382     return result;
   3383 }
   3384 
   3385 static PyObject*
   3386 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
   3387 {
   3388     PyObject* result;
   3389     Py_ssize_t index;
   3390 
   3391     PyObject* def = Py_None;
   3392     static char* kwlist[] = { "default", NULL };
   3393     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
   3394         return NULL;
   3395 
   3396     result = PyTuple_New(self->groups-1);
   3397     if (!result)
   3398         return NULL;
   3399 
   3400     for (index = 1; index < self->groups; index++) {
   3401         PyObject* item;
   3402         item = match_getslice_by_index(self, index, def);
   3403         if (!item) {
   3404             Py_DECREF(result);
   3405             return NULL;
   3406         }
   3407         PyTuple_SET_ITEM(result, index-1, item);
   3408     }
   3409 
   3410     return result;
   3411 }
   3412 
   3413 static PyObject*
   3414 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
   3415 {
   3416     PyObject* result;
   3417     PyObject* keys;
   3418     Py_ssize_t index;
   3419 
   3420     PyObject* def = Py_None;
   3421     static char* kwlist[] = { "default", NULL };
   3422     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
   3423         return NULL;
   3424 
   3425     result = PyDict_New();
   3426     if (!result || !self->pattern->groupindex)
   3427         return result;
   3428 
   3429     keys = PyMapping_Keys(self->pattern->groupindex);
   3430     if (!keys)
   3431         goto failed;
   3432 
   3433     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
   3434         int status;
   3435         PyObject* key;
   3436         PyObject* value;
   3437         key = PyList_GET_ITEM(keys, index);
   3438         if (!key)
   3439             goto failed;
   3440         value = match_getslice(self, key, def);
   3441         if (!value)
   3442             goto failed;
   3443         status = PyDict_SetItem(result, key, value);
   3444         Py_DECREF(value);
   3445         if (status < 0)
   3446             goto failed;
   3447     }
   3448 
   3449     Py_DECREF(keys);
   3450 
   3451     return result;
   3452 
   3453 failed:
   3454     Py_XDECREF(keys);
   3455     Py_DECREF(result);
   3456     return NULL;
   3457 }
   3458 
   3459 static PyObject*
   3460 match_start(MatchObject* self, PyObject* args)
   3461 {
   3462     Py_ssize_t index;
   3463 
   3464     PyObject* index_ = Py_False; /* zero */
   3465     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
   3466         return NULL;
   3467 
   3468     index = match_getindex(self, index_);
   3469 
   3470     if (index < 0 || index >= self->groups) {
   3471         PyErr_SetString(
   3472             PyExc_IndexError,
   3473             "no such group"
   3474             );
   3475         return NULL;
   3476     }
   3477 
   3478     /* mark is -1 if group is undefined */
   3479     return PyInt_FromSsize_t(self->mark[index*2]);
   3480 }
   3481 
   3482 static PyObject*
   3483 match_end(MatchObject* self, PyObject* args)
   3484 {
   3485     Py_ssize_t index;
   3486 
   3487     PyObject* index_ = Py_False; /* zero */
   3488     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
   3489         return NULL;
   3490 
   3491     index = match_getindex(self, index_);
   3492 
   3493     if (index < 0 || index >= self->groups) {
   3494         PyErr_SetString(
   3495             PyExc_IndexError,
   3496             "no such group"
   3497             );
   3498         return NULL;
   3499     }
   3500 
   3501     /* mark is -1 if group is undefined */
   3502     return PyInt_FromSsize_t(self->mark[index*2+1]);
   3503 }
   3504 
   3505 LOCAL(PyObject*)
   3506 _pair(Py_ssize_t i1, Py_ssize_t i2)
   3507 {
   3508     PyObject* pair;
   3509     PyObject* item;
   3510 
   3511     pair = PyTuple_New(2);
   3512     if (!pair)
   3513         return NULL;
   3514 
   3515     item = PyInt_FromSsize_t(i1);
   3516     if (!item)
   3517         goto error;
   3518     PyTuple_SET_ITEM(pair, 0, item);
   3519 
   3520     item = PyInt_FromSsize_t(i2);
   3521     if (!item)
   3522         goto error;
   3523     PyTuple_SET_ITEM(pair, 1, item);
   3524 
   3525     return pair;
   3526 
   3527   error:
   3528     Py_DECREF(pair);
   3529     return NULL;
   3530 }
   3531 
   3532 static PyObject*
   3533 match_span(MatchObject* self, PyObject* args)
   3534 {
   3535     Py_ssize_t index;
   3536 
   3537     PyObject* index_ = Py_False; /* zero */
   3538     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
   3539         return NULL;
   3540 
   3541     index = match_getindex(self, index_);
   3542 
   3543     if (index < 0 || index >= self->groups) {
   3544         PyErr_SetString(
   3545             PyExc_IndexError,
   3546             "no such group"
   3547             );
   3548         return NULL;
   3549     }
   3550 
   3551     /* marks are -1 if group is undefined */
   3552     return _pair(self->mark[index*2], self->mark[index*2+1]);
   3553 }
   3554 
   3555 static PyObject*
   3556 match_regs(MatchObject* self)
   3557 {
   3558     PyObject* regs;
   3559     PyObject* item;
   3560     Py_ssize_t index;
   3561 
   3562     regs = PyTuple_New(self->groups);
   3563     if (!regs)
   3564         return NULL;
   3565 
   3566     for (index = 0; index < self->groups; index++) {
   3567         item = _pair(self->mark[index*2], self->mark[index*2+1]);
   3568         if (!item) {
   3569             Py_DECREF(regs);
   3570             return NULL;
   3571         }
   3572         PyTuple_SET_ITEM(regs, index, item);
   3573     }
   3574 
   3575     Py_INCREF(regs);
   3576     self->regs = regs;
   3577 
   3578     return regs;
   3579 }
   3580 
   3581 static PyObject*
   3582 match_copy(MatchObject* self, PyObject *unused)
   3583 {
   3584 #ifdef USE_BUILTIN_COPY
   3585     MatchObject* copy;
   3586     Py_ssize_t slots, offset;
   3587 
   3588     slots = 2 * (self->pattern->groups+1);
   3589 
   3590     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
   3591     if (!copy)
   3592         return NULL;
   3593 
   3594     /* this value a constant, but any compiler should be able to
   3595        figure that out all by itself */
   3596     offset = offsetof(MatchObject, string);
   3597 
   3598     Py_XINCREF(self->pattern);
   3599     Py_XINCREF(self->string);
   3600     Py_XINCREF(self->regs);
   3601 
   3602     memcpy((char*) copy + offset, (char*) self + offset,
   3603            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
   3604 
   3605     return (PyObject*) copy;
   3606 #else
   3607     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
   3608     return NULL;
   3609 #endif
   3610 }
   3611 
   3612 static PyObject*
   3613 match_deepcopy(MatchObject* self, PyObject* memo)
   3614 {
   3615 #ifdef USE_BUILTIN_COPY
   3616     MatchObject* copy;
   3617 
   3618     copy = (MatchObject*) match_copy(self);
   3619     if (!copy)
   3620         return NULL;
   3621 
   3622     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
   3623         !deepcopy(&copy->string, memo) ||
   3624         !deepcopy(&copy->regs, memo)) {
   3625         Py_DECREF(copy);
   3626         return NULL;
   3627     }
   3628 
   3629 #else
   3630     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
   3631     return NULL;
   3632 #endif
   3633 }
   3634 
   3635 PyDoc_STRVAR(match_doc,
   3636 "The result of re.match() and re.search().\n\
   3637 Match objects always have a boolean value of True.");
   3638 
   3639 PyDoc_STRVAR(match_group_doc,
   3640 "group([group1, ...]) -> str or tuple.\n\
   3641     Return subgroup(s) of the match by indices or names.\n\
   3642     For 0 returns the entire match.");
   3643 
   3644 PyDoc_STRVAR(match_start_doc,
   3645 "start([group=0]) -> int.\n\
   3646     Return index of the start of the substring matched by group.");
   3647 
   3648 PyDoc_STRVAR(match_end_doc,
   3649 "end([group=0]) -> int.\n\
   3650     Return index of the end of the substring matched by group.");
   3651 
   3652 PyDoc_STRVAR(match_span_doc,
   3653 "span([group]) -> tuple.\n\
   3654     For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
   3655 
   3656 PyDoc_STRVAR(match_groups_doc,
   3657 "groups([default=None]) -> tuple.\n\
   3658     Return a tuple containing all the subgroups of the match, from 1.\n\
   3659     The default argument is used for groups\n\
   3660     that did not participate in the match");
   3661 
   3662 PyDoc_STRVAR(match_groupdict_doc,
   3663 "groupdict([default=None]) -> dict.\n\
   3664     Return a dictionary containing all the named subgroups of the match,\n\
   3665     keyed by the subgroup name. The default argument is used for groups\n\
   3666     that did not participate in the match");
   3667 
   3668 PyDoc_STRVAR(match_expand_doc,
   3669 "expand(template) -> str.\n\
   3670     Return the string obtained by doing backslash substitution\n\
   3671     on the string template, as done by the sub() method.");
   3672 
   3673 static PyMethodDef match_methods[] = {
   3674     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
   3675     {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
   3676     {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
   3677     {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
   3678     {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
   3679         match_groups_doc},
   3680     {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
   3681         match_groupdict_doc},
   3682     {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
   3683     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
   3684     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
   3685     {NULL, NULL}
   3686 };
   3687 
   3688 static PyObject *
   3689 match_lastindex_get(MatchObject *self)
   3690 {
   3691     if (self->lastindex >= 0)
   3692         return PyInt_FromSsize_t(self->lastindex);
   3693     Py_INCREF(Py_None);
   3694     return Py_None;
   3695 }
   3696 
   3697 static PyObject *
   3698 match_lastgroup_get(MatchObject *self)
   3699 {
   3700     if (self->pattern->indexgroup && self->lastindex >= 0) {
   3701         PyObject* result = PySequence_GetItem(
   3702             self->pattern->indexgroup, self->lastindex
   3703             );
   3704         if (result)
   3705             return result;
   3706         PyErr_Clear();
   3707     }
   3708     Py_INCREF(Py_None);
   3709     return Py_None;
   3710 }
   3711 
   3712 static PyObject *
   3713 match_regs_get(MatchObject *self)
   3714 {
   3715     if (self->regs) {
   3716         Py_INCREF(self->regs);
   3717         return self->regs;
   3718     } else
   3719         return match_regs(self);
   3720 }
   3721 
   3722 static PyGetSetDef match_getset[] = {
   3723     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
   3724     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
   3725     {"regs",      (getter)match_regs_get,      (setter)NULL},
   3726     {NULL}
   3727 };
   3728 
   3729 #define MATCH_OFF(x) offsetof(MatchObject, x)
   3730 static PyMemberDef match_members[] = {
   3731     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
   3732     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
   3733     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
   3734     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
   3735     {NULL}
   3736 };
   3737 
   3738 
   3739 /* FIXME: implement setattr("string", None) as a special case (to
   3740    detach the associated string, if any */
   3741 
   3742 static PyTypeObject Match_Type = {
   3743     PyVarObject_HEAD_INIT(NULL, 0)
   3744     "_" SRE_MODULE ".SRE_Match",
   3745     sizeof(MatchObject), sizeof(Py_ssize_t),
   3746     (destructor)match_dealloc,  /* tp_dealloc */
   3747     0,                          /* tp_print */
   3748     0,                          /* tp_getattr */
   3749     0,                          /* tp_setattr */
   3750     0,                          /* tp_compare */
   3751     0,                          /* tp_repr */
   3752     0,                          /* tp_as_number */
   3753     0,                          /* tp_as_sequence */
   3754     0,                          /* tp_as_mapping */
   3755     0,                          /* tp_hash */
   3756     0,                          /* tp_call */
   3757     0,                          /* tp_str */
   3758     0,                          /* tp_getattro */
   3759     0,                          /* tp_setattro */
   3760     0,                          /* tp_as_buffer */
   3761     Py_TPFLAGS_DEFAULT,
   3762     match_doc,                  /* tp_doc */
   3763     0,                          /* tp_traverse */
   3764     0,                          /* tp_clear */
   3765     0,                          /* tp_richcompare */
   3766     0,                          /* tp_weaklistoffset */
   3767     0,                          /* tp_iter */
   3768     0,                          /* tp_iternext */
   3769     match_methods,		/* tp_methods */
   3770     match_members,		/* tp_members */
   3771     match_getset,  	        /* tp_getset */
   3772 };
   3773 
   3774 static PyObject*
   3775 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
   3776 {
   3777     /* create match object (from state object) */
   3778 
   3779     MatchObject* match;
   3780     Py_ssize_t i, j;
   3781     char* base;
   3782     int n;
   3783 
   3784     if (status > 0) {
   3785 
   3786         /* create match object (with room for extra group marks) */
   3787         /* coverity[ampersand_in_size] */
   3788         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
   3789                                  2*(pattern->groups+1));
   3790         if (!match)
   3791             return NULL;
   3792 
   3793         Py_INCREF(pattern);
   3794         match->pattern = pattern;
   3795 
   3796         Py_INCREF(state->string);
   3797         match->string = state->string;
   3798 
   3799         match->regs = NULL;
   3800         match->groups = pattern->groups+1;
   3801 
   3802         /* fill in group slices */
   3803 
   3804         base = (char*) state->beginning;
   3805         n = state->charsize;
   3806 
   3807         match->mark[0] = ((char*) state->start - base) / n;
   3808         match->mark[1] = ((char*) state->ptr - base) / n;
   3809 
   3810         for (i = j = 0; i < pattern->groups; i++, j+=2)
   3811             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
   3812                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
   3813                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
   3814             } else
   3815                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
   3816 
   3817         match->pos = state->pos;
   3818         match->endpos = state->endpos;
   3819 
   3820         match->lastindex = state->lastindex;
   3821 
   3822         return (PyObject*) match;
   3823 
   3824     } else if (status == 0) {
   3825 
   3826         /* no match */
   3827         Py_INCREF(Py_None);
   3828         return Py_None;
   3829 
   3830     }
   3831 
   3832     /* internal error */
   3833     pattern_error(status);
   3834     return NULL;
   3835 }
   3836 
   3837 
   3838 /* -------------------------------------------------------------------- */
   3839 /* scanner methods (experimental) */
   3840 
   3841 static void
   3842 scanner_dealloc(ScannerObject* self)
   3843 {
   3844     state_fini(&self->state);
   3845     Py_XDECREF(self->pattern);
   3846     PyObject_DEL(self);
   3847 }
   3848 
   3849 static PyObject*
   3850 scanner_match(ScannerObject* self, PyObject *unused)
   3851 {
   3852     SRE_STATE* state = &self->state;
   3853     PyObject* match;
   3854     int status;
   3855 
   3856     if (state->start == NULL)
   3857         Py_RETURN_NONE;
   3858 
   3859     state_reset(state);
   3860 
   3861     state->ptr = state->start;
   3862 
   3863     if (state->charsize == 1) {
   3864         status = sre_match(state, PatternObject_GetCode(self->pattern));
   3865     } else {
   3866 #if defined(HAVE_UNICODE)
   3867         status = sre_umatch(state, PatternObject_GetCode(self->pattern));
   3868 #endif
   3869     }
   3870     if (PyErr_Occurred())
   3871         return NULL;
   3872 
   3873     match = pattern_new_match((PatternObject*) self->pattern,
   3874                                state, status);
   3875 
   3876     if (status == 0)
   3877         state->start = NULL;
   3878     else if (state->ptr != state->start)
   3879         state->start = state->ptr;
   3880     else if (state->ptr != state->end)
   3881         state->start = (void*) ((char*) state->ptr + state->charsize);
   3882     else
   3883         state->start = NULL;
   3884 
   3885     return match;
   3886 }
   3887 
   3888 
   3889 static PyObject*
   3890 scanner_search(ScannerObject* self, PyObject *unused)
   3891 {
   3892     SRE_STATE* state = &self->state;
   3893     PyObject* match;
   3894     int status;
   3895 
   3896     if (state->start == NULL)
   3897         Py_RETURN_NONE;
   3898 
   3899     state_reset(state);
   3900 
   3901     state->ptr = state->start;
   3902 
   3903     if (state->charsize == 1) {
   3904         status = sre_search(state, PatternObject_GetCode(self->pattern));
   3905     } else {
   3906 #if defined(HAVE_UNICODE)
   3907         status = sre_usearch(state, PatternObject_GetCode(self->pattern));
   3908 #endif
   3909     }
   3910     if (PyErr_Occurred())
   3911         return NULL;
   3912 
   3913     match = pattern_new_match((PatternObject*) self->pattern,
   3914                                state, status);
   3915 
   3916     if (status == 0)
   3917         state->start = NULL;
   3918     else if (state->ptr != state->start)
   3919         state->start = state->ptr;
   3920     else if (state->ptr != state->end)
   3921         state->start = (void*) ((char*) state->ptr + state->charsize);
   3922     else
   3923         state->start = NULL;
   3924 
   3925     return match;
   3926 }
   3927 
   3928 static PyMethodDef scanner_methods[] = {
   3929     {"match", (PyCFunction) scanner_match, METH_NOARGS},
   3930     {"search", (PyCFunction) scanner_search, METH_NOARGS},
   3931     {NULL, NULL}
   3932 };
   3933 
   3934 #define SCAN_OFF(x) offsetof(ScannerObject, x)
   3935 static PyMemberDef scanner_members[] = {
   3936     {"pattern",	T_OBJECT,	SCAN_OFF(pattern),	READONLY},
   3937     {NULL}  /* Sentinel */
   3938 };
   3939 
   3940 statichere PyTypeObject Scanner_Type = {
   3941     PyObject_HEAD_INIT(NULL)
   3942     0, "_" SRE_MODULE ".SRE_Scanner",
   3943     sizeof(ScannerObject), 0,
   3944     (destructor)scanner_dealloc, /*tp_dealloc*/
   3945     0,				/* tp_print */
   3946     0,				/* tp_getattr */
   3947     0,				/* tp_setattr */
   3948     0,				/* tp_reserved */
   3949     0,				/* tp_repr */
   3950     0,				/* tp_as_number */
   3951     0,				/* tp_as_sequence */
   3952     0,				/* tp_as_mapping */
   3953     0,				/* tp_hash */
   3954     0,				/* tp_call */
   3955     0,				/* tp_str */
   3956     0,				/* tp_getattro */
   3957     0,				/* tp_setattro */
   3958     0,				/* tp_as_buffer */
   3959     Py_TPFLAGS_DEFAULT,		/* tp_flags */
   3960     0,				/* tp_doc */
   3961     0,				/* tp_traverse */
   3962     0,				/* tp_clear */
   3963     0,				/* tp_richcompare */
   3964     0,				/* tp_weaklistoffset */
   3965     0,				/* tp_iter */
   3966     0,				/* tp_iternext */
   3967     scanner_methods,		/* tp_methods */
   3968     scanner_members,		/* tp_members */
   3969     0,				/* tp_getset */
   3970 };
   3971 
   3972 static PyObject*
   3973 pattern_scanner(PatternObject* pattern, PyObject* args)
   3974 {
   3975     /* create search state object */
   3976 
   3977     ScannerObject* self;
   3978 
   3979     PyObject* string;
   3980     Py_ssize_t start = 0;
   3981     Py_ssize_t end = PY_SSIZE_T_MAX;
   3982     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
   3983         return NULL;
   3984 
   3985     /* create scanner object */
   3986     self = PyObject_NEW(ScannerObject, &Scanner_Type);
   3987     if (!self)
   3988         return NULL;
   3989     self->pattern = NULL;
   3990 
   3991     string = state_init(&self->state, pattern, string, start, end);
   3992     if (!string) {
   3993         Py_DECREF(self);
   3994         return NULL;
   3995     }
   3996 
   3997     Py_INCREF(pattern);
   3998     self->pattern = (PyObject*) pattern;
   3999 
   4000     return (PyObject*) self;
   4001 }
   4002 
   4003 static PyMethodDef _functions[] = {
   4004     {"compile", _compile, METH_VARARGS},
   4005     {"getcodesize", sre_codesize, METH_NOARGS},
   4006     {"getlower", sre_getlower, METH_VARARGS},
   4007     {NULL, NULL}
   4008 };
   4009 
   4010 #if PY_VERSION_HEX < 0x02030000
   4011 DL_EXPORT(void) init_sre(void)
   4012 #else
   4013 PyMODINIT_FUNC init_sre(void)
   4014 #endif
   4015 {
   4016     PyObject* m;
   4017     PyObject* d;
   4018     PyObject* x;
   4019 
   4020     /* Patch object types */
   4021     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
   4022         PyType_Ready(&Scanner_Type))
   4023         return;
   4024 
   4025     m = Py_InitModule("_" SRE_MODULE, _functions);
   4026     if (m == NULL)
   4027     	return;
   4028     d = PyModule_GetDict(m);
   4029 
   4030     x = PyInt_FromLong(SRE_MAGIC);
   4031     if (x) {
   4032         PyDict_SetItemString(d, "MAGIC", x);
   4033         Py_DECREF(x);
   4034     }
   4035 
   4036     x = PyInt_FromLong(sizeof(SRE_CODE));
   4037     if (x) {
   4038         PyDict_SetItemString(d, "CODESIZE", x);
   4039         Py_DECREF(x);
   4040     }
   4041 
   4042     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
   4043     if (x) {
   4044         PyDict_SetItemString(d, "MAXREPEAT", x);
   4045         Py_DECREF(x);
   4046     }
   4047 
   4048     x = PyString_FromString(copyright);
   4049     if (x) {
   4050         PyDict_SetItemString(d, "copyright", x);
   4051         Py_DECREF(x);
   4052     }
   4053 }
   4054 
   4055 #endif /* !defined(SRE_RECURSIVE) */
   4056 
   4057 /* vim:ts=4:sw=4:et
   4058 */
   4059