Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5 
      6     Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>
      7     This program and the accompanying materials are licensed and made available under
      8     the terms and conditions of the BSD License that accompanies this distribution.
      9     The full text of the license may be found at
     10     http://opensource.org/licenses/bsd-license.
     11 
     12     THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     13     WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     14  *
     15  * partial history:
     16  * 1999-10-24 fl  created (based on existing template matcher code)
     17  * 2000-03-06 fl  first alpha, sort of
     18  * 2000-08-01 fl  fixes for 1.6b1
     19  * 2000-08-07 fl  use PyOS_CheckStack() if available
     20  * 2000-09-20 fl  added expand method
     21  * 2001-03-20 fl  lots of fixes for 2.1b2
     22  * 2001-04-15 fl  export copyright as Python attribute, not global
     23  * 2001-04-28 fl  added __copy__ methods (work in progress)
     24  * 2001-05-14 fl  fixes for 1.5.2 compatibility
     25  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
     26  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
     27  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
     28  * 2001-10-21 fl  added sub/subn primitive
     29  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
     30  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
     31  * 2002-11-09 fl  fixed empty sub/subn return type
     32  * 2003-04-18 mvl fully support 4-byte codes
     33  * 2003-10-17 gn  implemented non recursive scheme
     34  *
     35  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
     36  *
     37  * This version of the SRE library can be redistributed under CNRI's
     38  * Python 1.6 license.  For any other use, please contact Secret Labs
     39  * AB (info (at) pythonware.com).
     40  *
     41  * Portions of this engine have been developed in cooperation with
     42  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
     43  * other compatibility work.
     44  */
     45 
     46 /* Get rid of these macros to prevent collisions between EFI and Python in this file. */
     47 #undef  RETURN_ERROR
     48 #undef  RETURN_SUCCESS
     49 
     50 #ifndef SRE_RECURSIVE
     51 
     52 static char copyright[] =
     53     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
     54 
     55 #define PY_SSIZE_T_CLEAN
     56 
     57 #include "Python.h"
     58 #include "structmember.h" /* offsetof */
     59 
     60 #include "sre.h"
     61 
     62 #include <ctype.h>
     63 
     64 /* name of this module, minus the leading underscore */
     65 #if !defined(SRE_MODULE)
     66 #define SRE_MODULE "sre"
     67 #endif
     68 
     69 #define SRE_PY_MODULE "re"
     70 
     71 /* defining this one enables tracing */
     72 #undef VERBOSE
     73 
     74 #if PY_VERSION_HEX >= 0x01060000
     75 #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
     76 /* defining this enables unicode support (default under 1.6a1 and later) */
     77 #define HAVE_UNICODE
     78 #endif
     79 #endif
     80 
     81 /* -------------------------------------------------------------------- */
     82 /* optional features */
     83 
     84 /* enables fast searching */
     85 #define USE_FAST_SEARCH
     86 
     87 /* enables aggressive inlining (always on for Visual C) */
     88 #undef USE_INLINE
     89 
     90 /* enables copy/deepcopy handling (work in progress) */
     91 #undef USE_BUILTIN_COPY
     92 
     93 #if PY_VERSION_HEX < 0x01060000
     94 #define PyObject_DEL(op) PyMem_DEL((op))
     95 #endif
     96 
     97 /* -------------------------------------------------------------------- */
     98 
     99 #if defined(_MSC_VER)
    100 #pragma optimize("gt", on) /* doesn't seem to make much difference... */
    101 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
    102 /* fastest possible local call under MSVC */
    103 #define LOCAL(type) static __inline type __fastcall
    104 #elif defined(USE_INLINE)
    105 #define LOCAL(type) static inline type
    106 #else
    107 #define LOCAL(type) static type
    108 #endif
    109 
    110 /* error codes */
    111 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
    112 #define SRE_ERROR_STATE -2 /* illegal state */
    113 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
    114 #define SRE_ERROR_MEMORY -9 /* out of memory */
    115 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
    116 
    117 #if defined(VERBOSE)
    118 #define TRACE(v) printf v
    119 #else
    120 #define TRACE(v)
    121 #endif
    122 
    123 /* -------------------------------------------------------------------- */
    124 /* search engine state */
    125 
    126 /* default character predicates (run sre_chars.py to regenerate tables) */
    127 
    128 #define SRE_DIGIT_MASK 1
    129 #define SRE_SPACE_MASK 2
    130 #define SRE_LINEBREAK_MASK 4
    131 #define SRE_ALNUM_MASK 8
    132 #define SRE_WORD_MASK 16
    133 
    134 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
    135 
    136 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
    137 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
    138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
    139 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    140 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
    141 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    142 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
    143 
    144 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
    145 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
    146 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
    147 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
    148 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
    149 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
    150 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
    151 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    152 120, 121, 122, 123, 124, 125, 126, 127 };
    153 
    154 #define SRE_IS_DIGIT(ch)\
    155     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
    156 #define SRE_IS_SPACE(ch)\
    157     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
    158 #define SRE_IS_LINEBREAK(ch)\
    159     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
    160 #define SRE_IS_ALNUM(ch)\
    161     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
    162 #define SRE_IS_WORD(ch)\
    163     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
    164 
    165 static unsigned int sre_lower(unsigned int ch)
    166 {
    167     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
    168 }
    169 
    170 /* locale-specific character predicates */
    171 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    172  * warnings when c's type supports only numbers < N+1 */
    173 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
    174 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
    175 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
    176 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
    177 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
    178 
    179 static unsigned int sre_lower_locale(unsigned int ch)
    180 {
    181     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
    182 }
    183 
    184 /* unicode-specific character predicates */
    185 
    186 #if defined(HAVE_UNICODE)
    187 
    188 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
    189 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
    190 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
    191 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
    192 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
    193 
    194 static unsigned int sre_lower_unicode(unsigned int ch)
    195 {
    196     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
    197 }
    198 
    199 #endif
    200 
    201 LOCAL(int)
    202 sre_category(SRE_CODE category, unsigned int ch)
    203 {
    204     switch (category) {
    205 
    206     case SRE_CATEGORY_DIGIT:
    207         return SRE_IS_DIGIT(ch);
    208     case SRE_CATEGORY_NOT_DIGIT:
    209         return !SRE_IS_DIGIT(ch);
    210     case SRE_CATEGORY_SPACE:
    211         return SRE_IS_SPACE(ch);
    212     case SRE_CATEGORY_NOT_SPACE:
    213         return !SRE_IS_SPACE(ch);
    214     case SRE_CATEGORY_WORD:
    215         return SRE_IS_WORD(ch);
    216     case SRE_CATEGORY_NOT_WORD:
    217         return !SRE_IS_WORD(ch);
    218     case SRE_CATEGORY_LINEBREAK:
    219         return SRE_IS_LINEBREAK(ch);
    220     case SRE_CATEGORY_NOT_LINEBREAK:
    221         return !SRE_IS_LINEBREAK(ch);
    222 
    223     case SRE_CATEGORY_LOC_WORD:
    224         return SRE_LOC_IS_WORD(ch);
    225     case SRE_CATEGORY_LOC_NOT_WORD:
    226         return !SRE_LOC_IS_WORD(ch);
    227 
    228 #if defined(HAVE_UNICODE)
    229     case SRE_CATEGORY_UNI_DIGIT:
    230         return SRE_UNI_IS_DIGIT(ch);
    231     case SRE_CATEGORY_UNI_NOT_DIGIT:
    232         return !SRE_UNI_IS_DIGIT(ch);
    233     case SRE_CATEGORY_UNI_SPACE:
    234         return SRE_UNI_IS_SPACE(ch);
    235     case SRE_CATEGORY_UNI_NOT_SPACE:
    236         return !SRE_UNI_IS_SPACE(ch);
    237     case SRE_CATEGORY_UNI_WORD:
    238         return SRE_UNI_IS_WORD(ch);
    239     case SRE_CATEGORY_UNI_NOT_WORD:
    240         return !SRE_UNI_IS_WORD(ch);
    241     case SRE_CATEGORY_UNI_LINEBREAK:
    242         return SRE_UNI_IS_LINEBREAK(ch);
    243     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    244         return !SRE_UNI_IS_LINEBREAK(ch);
    245 #else
    246     case SRE_CATEGORY_UNI_DIGIT:
    247         return SRE_IS_DIGIT(ch);
    248     case SRE_CATEGORY_UNI_NOT_DIGIT:
    249         return !SRE_IS_DIGIT(ch);
    250     case SRE_CATEGORY_UNI_SPACE:
    251         return SRE_IS_SPACE(ch);
    252     case SRE_CATEGORY_UNI_NOT_SPACE:
    253         return !SRE_IS_SPACE(ch);
    254     case SRE_CATEGORY_UNI_WORD:
    255         return SRE_LOC_IS_WORD(ch);
    256     case SRE_CATEGORY_UNI_NOT_WORD:
    257         return !SRE_LOC_IS_WORD(ch);
    258     case SRE_CATEGORY_UNI_LINEBREAK:
    259         return SRE_IS_LINEBREAK(ch);
    260     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    261         return !SRE_IS_LINEBREAK(ch);
    262 #endif
    263     }
    264     return 0;
    265 }
    266 
    267 /* helpers */
    268 
    269 static void
    270 data_stack_dealloc(SRE_STATE* state)
    271 {
    272     if (state->data_stack) {
    273         PyMem_FREE(state->data_stack);
    274         state->data_stack = NULL;
    275     }
    276     state->data_stack_size = state->data_stack_base = 0;
    277 }
    278 
    279 static int
    280 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
    281 {
    282     Py_ssize_t minsize, cursize;
    283     minsize = state->data_stack_base+size;
    284     cursize = state->data_stack_size;
    285     if (cursize < minsize) {
    286         void* stack;
    287         cursize = minsize+minsize/4+1024;
    288         TRACE(("allocate/grow stack %d\n", cursize));
    289         stack = PyMem_REALLOC(state->data_stack, cursize);
    290         if (!stack) {
    291             data_stack_dealloc(state);
    292             return SRE_ERROR_MEMORY;
    293         }
    294         state->data_stack = (char *)stack;
    295         state->data_stack_size = cursize;
    296     }
    297     return 0;
    298 }
    299 
    300 /* generate 8-bit version */
    301 
    302 #define SRE_CHAR unsigned char
    303 #define SRE_AT sre_at
    304 #define SRE_COUNT sre_count
    305 #define SRE_CHARSET sre_charset
    306 #define SRE_INFO sre_info
    307 #define SRE_MATCH sre_match
    308 #define SRE_MATCH_CONTEXT sre_match_context
    309 #define SRE_SEARCH sre_search
    310 #define SRE_LITERAL_TEMPLATE sre_literal_template
    311 
    312 #if defined(HAVE_UNICODE)
    313 
    314 #define SRE_RECURSIVE
    315 #include "_sre.c"
    316 #undef SRE_RECURSIVE
    317 
    318 #undef SRE_LITERAL_TEMPLATE
    319 #undef SRE_SEARCH
    320 #undef SRE_MATCH
    321 #undef SRE_MATCH_CONTEXT
    322 #undef SRE_INFO
    323 #undef SRE_CHARSET
    324 #undef SRE_COUNT
    325 #undef SRE_AT
    326 #undef SRE_CHAR
    327 
    328 /* generate 16-bit unicode version */
    329 
    330 #define SRE_CHAR Py_UNICODE
    331 #define SRE_AT sre_uat
    332 #define SRE_COUNT sre_ucount
    333 #define SRE_CHARSET sre_ucharset
    334 #define SRE_INFO sre_uinfo
    335 #define SRE_MATCH sre_umatch
    336 #define SRE_MATCH_CONTEXT sre_umatch_context
    337 #define SRE_SEARCH sre_usearch
    338 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
    339 #endif
    340 
    341 #endif /* SRE_RECURSIVE */
    342 
    343 /* -------------------------------------------------------------------- */
    344 /* String matching engine */
    345 
    346 /* the following section is compiled twice, with different character
    347    settings */
    348 
    349 LOCAL(int)
    350 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
    351 {
    352     /* check if pointer is at given position */
    353 
    354     Py_ssize_t thisp, thatp;
    355 
    356     switch (at) {
    357 
    358     case SRE_AT_BEGINNING:
    359     case SRE_AT_BEGINNING_STRING:
    360         return ((void*) ptr == state->beginning);
    361 
    362     case SRE_AT_BEGINNING_LINE:
    363         return ((void*) ptr == state->beginning ||
    364                 SRE_IS_LINEBREAK((int) ptr[-1]));
    365 
    366     case SRE_AT_END:
    367         return (((void*) (ptr+1) == state->end &&
    368                  SRE_IS_LINEBREAK((int) ptr[0])) ||
    369                 ((void*) ptr == state->end));
    370 
    371     case SRE_AT_END_LINE:
    372         return ((void*) ptr == state->end ||
    373                 SRE_IS_LINEBREAK((int) ptr[0]));
    374 
    375     case SRE_AT_END_STRING:
    376         return ((void*) ptr == state->end);
    377 
    378     case SRE_AT_BOUNDARY:
    379         if (state->beginning == state->end)
    380             return 0;
    381         thatp = ((void*) ptr > state->beginning) ?
    382             SRE_IS_WORD((int) ptr[-1]) : 0;
    383         thisp = ((void*) ptr < state->end) ?
    384             SRE_IS_WORD((int) ptr[0]) : 0;
    385         return thisp != thatp;
    386 
    387     case SRE_AT_NON_BOUNDARY:
    388         if (state->beginning == state->end)
    389             return 0;
    390         thatp = ((void*) ptr > state->beginning) ?
    391             SRE_IS_WORD((int) ptr[-1]) : 0;
    392         thisp = ((void*) ptr < state->end) ?
    393             SRE_IS_WORD((int) ptr[0]) : 0;
    394         return thisp == thatp;
    395 
    396     case SRE_AT_LOC_BOUNDARY:
    397         if (state->beginning == state->end)
    398             return 0;
    399         thatp = ((void*) ptr > state->beginning) ?
    400             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    401         thisp = ((void*) ptr < state->end) ?
    402             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    403         return thisp != thatp;
    404 
    405     case SRE_AT_LOC_NON_BOUNDARY:
    406         if (state->beginning == state->end)
    407             return 0;
    408         thatp = ((void*) ptr > state->beginning) ?
    409             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    410         thisp = ((void*) ptr < state->end) ?
    411             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    412         return thisp == thatp;
    413 
    414 #if defined(HAVE_UNICODE)
    415     case SRE_AT_UNI_BOUNDARY:
    416         if (state->beginning == state->end)
    417             return 0;
    418         thatp = ((void*) ptr > state->beginning) ?
    419             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    420         thisp = ((void*) ptr < state->end) ?
    421             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    422         return thisp != thatp;
    423 
    424     case SRE_AT_UNI_NON_BOUNDARY:
    425         if (state->beginning == state->end)
    426             return 0;
    427         thatp = ((void*) ptr > state->beginning) ?
    428             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    429         thisp = ((void*) ptr < state->end) ?
    430             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    431         return thisp == thatp;
    432 #endif
    433 
    434     }
    435 
    436     return 0;
    437 }
    438 
    439 LOCAL(int)
    440 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
    441 {
    442     /* check if character is a member of the given set */
    443 
    444     int ok = 1;
    445 
    446     for (;;) {
    447         switch (*set++) {
    448 
    449         case SRE_OP_FAILURE:
    450             return !ok;
    451 
    452         case SRE_OP_LITERAL:
    453             /* <LITERAL> <code> */
    454             if (ch == set[0])
    455                 return ok;
    456             set++;
    457             break;
    458 
    459         case SRE_OP_CATEGORY:
    460             /* <CATEGORY> <code> */
    461             if (sre_category(set[0], (int) ch))
    462                 return ok;
    463             set += 1;
    464             break;
    465 
    466         case SRE_OP_CHARSET:
    467             if (sizeof(SRE_CODE) == 2) {
    468                 /* <CHARSET> <bitmap> (16 bits per code word) */
    469                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
    470                     return ok;
    471                 set += 16;
    472             }
    473             else {
    474                 /* <CHARSET> <bitmap> (32 bits per code word) */
    475                 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
    476                     return ok;
    477                 set += 8;
    478             }
    479             break;
    480 
    481         case SRE_OP_RANGE:
    482             /* <RANGE> <lower> <upper> */
    483             if (set[0] <= ch && ch <= set[1])
    484                 return ok;
    485             set += 2;
    486             break;
    487 
    488         case SRE_OP_NEGATE:
    489             ok = !ok;
    490             break;
    491 
    492         case SRE_OP_BIGCHARSET:
    493             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
    494         {
    495             Py_ssize_t count, block;
    496             count = *(set++);
    497 
    498             if (sizeof(SRE_CODE) == 2) {
    499                 block = ((unsigned char*)set)[ch >> 8];
    500                 set += 128;
    501                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
    502                     return ok;
    503                 set += count*16;
    504             }
    505             else {
    506                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    507                  * warnings when c's type supports only numbers < N+1 */
    508                 if (!(ch & ~65535))
    509                     block = ((unsigned char*)set)[ch >> 8];
    510                 else
    511                     block = -1;
    512                 set += 64;
    513                 if (block >=0 &&
    514                     (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
    515                     return ok;
    516                 set += count*8;
    517             }
    518             break;
    519         }
    520 
    521         default:
    522             /* internal error -- there's not much we can do about it
    523                here, so let's just pretend it didn't match... */
    524             return 0;
    525         }
    526     }
    527 }
    528 
    529 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
    530 
    531 LOCAL(Py_ssize_t)
    532 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
    533 {
    534     SRE_CODE chr;
    535     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
    536     SRE_CHAR* end = (SRE_CHAR *)state->end;
    537     Py_ssize_t i;
    538 
    539     /* adjust end */
    540     if (maxcount < end - ptr && maxcount != 65535)
    541         end = ptr + maxcount;
    542 
    543     switch (pattern[0]) {
    544 
    545     case SRE_OP_IN:
    546         /* repeated set */
    547         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
    548         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
    549             ptr++;
    550         break;
    551 
    552     case SRE_OP_ANY:
    553         /* repeated dot wildcard. */
    554         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
    555         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
    556             ptr++;
    557         break;
    558 
    559     case SRE_OP_ANY_ALL:
    560         /* repeated dot wildcard.  skip to the end of the target
    561            string, and backtrack from there */
    562         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
    563         ptr = end;
    564         break;
    565 
    566     case SRE_OP_LITERAL:
    567         /* repeated literal */
    568         chr = pattern[1];
    569         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
    570         while (ptr < end && (SRE_CODE) *ptr == chr)
    571             ptr++;
    572         break;
    573 
    574     case SRE_OP_LITERAL_IGNORE:
    575         /* repeated literal */
    576         chr = pattern[1];
    577         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
    578         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
    579             ptr++;
    580         break;
    581 
    582     case SRE_OP_NOT_LITERAL:
    583         /* repeated non-literal */
    584         chr = pattern[1];
    585         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
    586         while (ptr < end && (SRE_CODE) *ptr != chr)
    587             ptr++;
    588         break;
    589 
    590     case SRE_OP_NOT_LITERAL_IGNORE:
    591         /* repeated non-literal */
    592         chr = pattern[1];
    593         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
    594         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
    595             ptr++;
    596         break;
    597 
    598     default:
    599         /* repeated single character pattern */
    600         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
    601         while ((SRE_CHAR*) state->ptr < end) {
    602             i = SRE_MATCH(state, pattern);
    603             if (i < 0)
    604                 return i;
    605             if (!i)
    606                 break;
    607         }
    608         TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
    609                (SRE_CHAR*) state->ptr - ptr));
    610         return (SRE_CHAR*) state->ptr - ptr;
    611     }
    612 
    613     TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
    614     return ptr - (SRE_CHAR*) state->ptr;
    615 }
    616 
    617 #if 0 /* not used in this release */
    618 LOCAL(int)
    619 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
    620 {
    621     /* check if an SRE_OP_INFO block matches at the current position.
    622        returns the number of SRE_CODE objects to skip if successful, 0
    623        if no match */
    624 
    625     SRE_CHAR* end = state->end;
    626     SRE_CHAR* ptr = state->ptr;
    627     Py_ssize_t i;
    628 
    629     /* check minimal length */
    630     if (pattern[3] && (end - ptr) < pattern[3])
    631         return 0;
    632 
    633     /* check known prefix */
    634     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
    635         /* <length> <skip> <prefix data> <overlap data> */
    636         for (i = 0; i < pattern[5]; i++)
    637             if ((SRE_CODE) ptr[i] != pattern[7 + i])
    638                 return 0;
    639         return pattern[0] + 2 * pattern[6];
    640     }
    641     return pattern[0];
    642 }
    643 #endif
    644 
    645 /* The macros below should be used to protect recursive SRE_MATCH()
    646  * calls that *failed* and do *not* return immediately (IOW, those
    647  * that will backtrack). Explaining:
    648  *
    649  * - Recursive SRE_MATCH() returned true: that's usually a success
    650  *   (besides atypical cases like ASSERT_NOT), therefore there's no
    651  *   reason to restore lastmark;
    652  *
    653  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
    654  *   is returning to the caller: If the current SRE_MATCH() is the
    655  *   top function of the recursion, returning false will be a matching
    656  *   failure, and it doesn't matter where lastmark is pointing to.
    657  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
    658  *   failure by itself, and the calling SRE_MATCH() will have to deal
    659  *   with the failure by the same rules explained here (it will restore
    660  *   lastmark by itself if necessary);
    661  *
    662  * - Recursive SRE_MATCH() returned false, and will continue the
    663  *   outside 'for' loop: must be protected when breaking, since the next
    664  *   OP could potentially depend on lastmark;
    665  *
    666  * - Recursive SRE_MATCH() returned false, and will be called again
    667  *   inside a local for/while loop: must be protected between each
    668  *   loop iteration, since the recursive SRE_MATCH() could do anything,
    669  *   and could potentially depend on lastmark.
    670  *
    671  * For more information, check the discussion at SF patch #712900.
    672  */
    673 #define LASTMARK_SAVE()     \
    674     do { \
    675         ctx->lastmark = state->lastmark; \
    676         ctx->lastindex = state->lastindex; \
    677     } while (0)
    678 #define LASTMARK_RESTORE()  \
    679     do { \
    680         state->lastmark = ctx->lastmark; \
    681         state->lastindex = ctx->lastindex; \
    682     } while (0)
    683 
    684 #define RETURN_ERROR(i) do { return i; } while(0)
    685 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
    686 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
    687 
    688 #define RETURN_ON_ERROR(i) \
    689     do { if (i < 0) RETURN_ERROR(i); } while (0)
    690 #define RETURN_ON_SUCCESS(i) \
    691     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
    692 #define RETURN_ON_FAILURE(i) \
    693     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
    694 
    695 #define SFY(x) #x
    696 
    697 #define DATA_STACK_ALLOC(state, type, ptr) \
    698 do { \
    699     alloc_pos = state->data_stack_base; \
    700     TRACE(("allocating %s in %d (%d)\n", \
    701            SFY(type), alloc_pos, sizeof(type))); \
    702     if (state->data_stack_size < alloc_pos+sizeof(type)) { \
    703         int j = data_stack_grow(state, sizeof(type)); \
    704         if (j < 0) return j; \
    705         if (ctx_pos != -1) \
    706             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    707     } \
    708     ptr = (type*)(state->data_stack+alloc_pos); \
    709     state->data_stack_base += sizeof(type); \
    710 } while (0)
    711 
    712 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
    713 do { \
    714     TRACE(("looking up %s at %d\n", SFY(type), pos)); \
    715     ptr = (type*)(state->data_stack+pos); \
    716 } while (0)
    717 
    718 #define DATA_STACK_PUSH(state, data, size) \
    719 do { \
    720     TRACE(("copy data in %p to %d (%d)\n", \
    721            data, state->data_stack_base, size)); \
    722     if (state->data_stack_size < state->data_stack_base+size) { \
    723         int j = data_stack_grow(state, size); \
    724         if (j < 0) return j; \
    725         if (ctx_pos != -1) \
    726             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    727     } \
    728     memcpy(state->data_stack+state->data_stack_base, data, size); \
    729     state->data_stack_base += size; \
    730 } while (0)
    731 
    732 #define DATA_STACK_POP(state, data, size, discard) \
    733 do { \
    734     TRACE(("copy data to %p from %d (%d)\n", \
    735            data, state->data_stack_base-size, size)); \
    736     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
    737     if (discard) \
    738         state->data_stack_base -= size; \
    739 } while (0)
    740 
    741 #define DATA_STACK_POP_DISCARD(state, size) \
    742 do { \
    743     TRACE(("discard data from %d (%d)\n", \
    744            state->data_stack_base-size, size)); \
    745     state->data_stack_base -= size; \
    746 } while(0)
    747 
    748 #define DATA_PUSH(x) \
    749     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
    750 #define DATA_POP(x) \
    751     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
    752 #define DATA_POP_DISCARD(x) \
    753     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
    754 #define DATA_ALLOC(t,p) \
    755     DATA_STACK_ALLOC(state, t, p)
    756 #define DATA_LOOKUP_AT(t,p,pos) \
    757     DATA_STACK_LOOKUP_AT(state,t,p,pos)
    758 
    759 #define MARK_PUSH(lastmark) \
    760     do if (lastmark > 0) { \
    761         i = lastmark; /* ctx->lastmark may change if reallocated */ \
    762         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
    763     } while (0)
    764 #define MARK_POP(lastmark) \
    765     do if (lastmark > 0) { \
    766         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
    767     } while (0)
    768 #define MARK_POP_KEEP(lastmark) \
    769     do if (lastmark > 0) { \
    770         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
    771     } while (0)
    772 #define MARK_POP_DISCARD(lastmark) \
    773     do if (lastmark > 0) { \
    774         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
    775     } while (0)
    776 
    777 #define JUMP_NONE            0
    778 #define JUMP_MAX_UNTIL_1     1
    779 #define JUMP_MAX_UNTIL_2     2
    780 #define JUMP_MAX_UNTIL_3     3
    781 #define JUMP_MIN_UNTIL_1     4
    782 #define JUMP_MIN_UNTIL_2     5
    783 #define JUMP_MIN_UNTIL_3     6
    784 #define JUMP_REPEAT          7
    785 #define JUMP_REPEAT_ONE_1    8
    786 #define JUMP_REPEAT_ONE_2    9
    787 #define JUMP_MIN_REPEAT_ONE  10
    788 #define JUMP_BRANCH          11
    789 #define JUMP_ASSERT          12
    790 #define JUMP_ASSERT_NOT      13
    791 
    792 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
    793     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
    794     nextctx->last_ctx_pos = ctx_pos; \
    795     nextctx->jump = jumpvalue; \
    796     nextctx->pattern = nextpattern; \
    797     ctx_pos = alloc_pos; \
    798     ctx = nextctx; \
    799     goto entrance; \
    800     jumplabel: \
    801     while (0) /* gcc doesn't like labels at end of scopes */ \
    802 
    803 typedef struct {
    804     Py_ssize_t last_ctx_pos;
    805     Py_ssize_t jump;
    806     SRE_CHAR* ptr;
    807     SRE_CODE* pattern;
    808     Py_ssize_t count;
    809     Py_ssize_t lastmark;
    810     Py_ssize_t lastindex;
    811     union {
    812         SRE_CODE chr;
    813         SRE_REPEAT* rep;
    814     } u;
    815 } SRE_MATCH_CONTEXT;
    816 
    817 /* check if string matches the given pattern.  returns <0 for
    818    error, 0 for failure, and 1 for success */
    819 LOCAL(Py_ssize_t)
    820 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
    821 {
    822     SRE_CHAR* end = (SRE_CHAR *)state->end;
    823     Py_ssize_t alloc_pos, ctx_pos = -1;
    824     Py_ssize_t i, ret = 0;
    825     Py_ssize_t jump;
    826     unsigned int sigcount=0;
    827 
    828     SRE_MATCH_CONTEXT* ctx;
    829     SRE_MATCH_CONTEXT* nextctx;
    830 
    831     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
    832 
    833     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
    834     ctx->last_ctx_pos = -1;
    835     ctx->jump = JUMP_NONE;
    836     ctx->pattern = pattern;
    837     ctx_pos = alloc_pos;
    838 
    839 entrance:
    840 
    841     ctx->ptr = (SRE_CHAR *)state->ptr;
    842 
    843     if (ctx->pattern[0] == SRE_OP_INFO) {
    844         /* optimization info block */
    845         /* <INFO> <1=skip> <2=flags> <3=min> ... */
    846         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
    847             TRACE(("reject (got %d chars, need %d)\n",
    848                    (end - ctx->ptr), ctx->pattern[3]));
    849             RETURN_FAILURE;
    850         }
    851         ctx->pattern += ctx->pattern[1] + 1;
    852     }
    853 
    854     for (;;) {
    855         ++sigcount;
    856         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
    857             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
    858 
    859         switch (*ctx->pattern++) {
    860 
    861         case SRE_OP_MARK:
    862             /* set mark */
    863             /* <MARK> <gid> */
    864             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
    865                    ctx->ptr, ctx->pattern[0]));
    866             i = ctx->pattern[0];
    867             if (i & 1)
    868                 state->lastindex = i/2 + 1;
    869             if (i > state->lastmark) {
    870                 /* state->lastmark is the highest valid index in the
    871                    state->mark array.  If it is increased by more than 1,
    872                    the intervening marks must be set to NULL to signal
    873                    that these marks have not been encountered. */
    874                 Py_ssize_t j = state->lastmark + 1;
    875                 while (j < i)
    876                     state->mark[j++] = NULL;
    877                 state->lastmark = i;
    878             }
    879             state->mark[i] = ctx->ptr;
    880             ctx->pattern++;
    881             break;
    882 
    883         case SRE_OP_LITERAL:
    884             /* match literal string */
    885             /* <LITERAL> <code> */
    886             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
    887                    ctx->ptr, *ctx->pattern));
    888             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
    889                 RETURN_FAILURE;
    890             ctx->pattern++;
    891             ctx->ptr++;
    892             break;
    893 
    894         case SRE_OP_NOT_LITERAL:
    895             /* match anything that is not literal character */
    896             /* <NOT_LITERAL> <code> */
    897             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
    898                    ctx->ptr, *ctx->pattern));
    899             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
    900                 RETURN_FAILURE;
    901             ctx->pattern++;
    902             ctx->ptr++;
    903             break;
    904 
    905         case SRE_OP_SUCCESS:
    906             /* end of pattern */
    907             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
    908             state->ptr = ctx->ptr;
    909             RETURN_SUCCESS;
    910 
    911         case SRE_OP_AT:
    912             /* match at given position */
    913             /* <AT> <code> */
    914             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
    915             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
    916                 RETURN_FAILURE;
    917             ctx->pattern++;
    918             break;
    919 
    920         case SRE_OP_CATEGORY:
    921             /* match at given category */
    922             /* <CATEGORY> <code> */
    923             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
    924                    ctx->ptr, *ctx->pattern));
    925             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
    926                 RETURN_FAILURE;
    927             ctx->pattern++;
    928             ctx->ptr++;
    929             break;
    930 
    931         case SRE_OP_ANY:
    932             /* match anything (except a newline) */
    933             /* <ANY> */
    934             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
    935             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
    936                 RETURN_FAILURE;
    937             ctx->ptr++;
    938             break;
    939 
    940         case SRE_OP_ANY_ALL:
    941             /* match anything */
    942             /* <ANY_ALL> */
    943             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
    944             if (ctx->ptr >= end)
    945                 RETURN_FAILURE;
    946             ctx->ptr++;
    947             break;
    948 
    949         case SRE_OP_IN:
    950             /* match set member (or non_member) */
    951             /* <IN> <skip> <set> */
    952             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
    953             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
    954                 RETURN_FAILURE;
    955             ctx->pattern += ctx->pattern[0];
    956             ctx->ptr++;
    957             break;
    958 
    959         case SRE_OP_LITERAL_IGNORE:
    960             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
    961                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    962             if (ctx->ptr >= end ||
    963                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
    964                 RETURN_FAILURE;
    965             ctx->pattern++;
    966             ctx->ptr++;
    967             break;
    968 
    969         case SRE_OP_NOT_LITERAL_IGNORE:
    970             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
    971                    ctx->pattern, ctx->ptr, *ctx->pattern));
    972             if (ctx->ptr >= end ||
    973                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
    974                 RETURN_FAILURE;
    975             ctx->pattern++;
    976             ctx->ptr++;
    977             break;
    978 
    979         case SRE_OP_IN_IGNORE:
    980             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
    981             if (ctx->ptr >= end
    982                 || !SRE_CHARSET(ctx->pattern+1,
    983                                 (SRE_CODE)state->lower(*ctx->ptr)))
    984                 RETURN_FAILURE;
    985             ctx->pattern += ctx->pattern[0];
    986             ctx->ptr++;
    987             break;
    988 
    989         case SRE_OP_JUMP:
    990         case SRE_OP_INFO:
    991             /* jump forward */
    992             /* <JUMP> <offset> */
    993             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
    994                    ctx->ptr, ctx->pattern[0]));
    995             ctx->pattern += ctx->pattern[0];
    996             break;
    997 
    998         case SRE_OP_BRANCH:
    999             /* alternation */
   1000             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
   1001             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
   1002             LASTMARK_SAVE();
   1003             ctx->u.rep = state->repeat;
   1004             if (ctx->u.rep)
   1005                 MARK_PUSH(ctx->lastmark);
   1006             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
   1007                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
   1008                     (ctx->ptr >= end ||
   1009                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
   1010                     continue;
   1011                 if (ctx->pattern[1] == SRE_OP_IN &&
   1012                     (ctx->ptr >= end ||
   1013                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
   1014                     continue;
   1015                 state->ptr = ctx->ptr;
   1016                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
   1017                 if (ret) {
   1018                     if (ctx->u.rep)
   1019                         MARK_POP_DISCARD(ctx->lastmark);
   1020                     RETURN_ON_ERROR(ret);
   1021                     RETURN_SUCCESS;
   1022                 }
   1023                 if (ctx->u.rep)
   1024                     MARK_POP_KEEP(ctx->lastmark);
   1025                 LASTMARK_RESTORE();
   1026             }
   1027             if (ctx->u.rep)
   1028                 MARK_POP_DISCARD(ctx->lastmark);
   1029             RETURN_FAILURE;
   1030 
   1031         case SRE_OP_REPEAT_ONE:
   1032             /* match repeated sequence (maximizing regexp) */
   1033 
   1034             /* this operator only works if the repeated item is
   1035                exactly one character wide, and we're not already
   1036                collecting backtracking points.  for other cases,
   1037                use the MAX_REPEAT operator */
   1038 
   1039             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1040 
   1041             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1042                    ctx->pattern[1], ctx->pattern[2]));
   1043 
   1044             if (ctx->ptr + ctx->pattern[1] > end)
   1045                 RETURN_FAILURE; /* cannot match */
   1046 
   1047             state->ptr = ctx->ptr;
   1048 
   1049             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
   1050             RETURN_ON_ERROR(ret);
   1051             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1052             ctx->count = ret;
   1053             ctx->ptr += ctx->count;
   1054 
   1055             /* when we arrive here, count contains the number of
   1056                matches, and ctx->ptr points to the tail of the target
   1057                string.  check if the rest of the pattern matches,
   1058                and backtrack if not. */
   1059 
   1060             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1061                 RETURN_FAILURE;
   1062 
   1063             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1064                 /* tail is empty.  we're finished */
   1065                 state->ptr = ctx->ptr;
   1066                 RETURN_SUCCESS;
   1067             }
   1068 
   1069             LASTMARK_SAVE();
   1070 
   1071             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
   1072                 /* tail starts with a literal. skip positions where
   1073                    the rest of the pattern cannot possibly match */
   1074                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
   1075                 for (;;) {
   1076                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
   1077                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
   1078                         ctx->ptr--;
   1079                         ctx->count--;
   1080                     }
   1081                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1082                         break;
   1083                     state->ptr = ctx->ptr;
   1084                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
   1085                             ctx->pattern+ctx->pattern[0]);
   1086                     if (ret) {
   1087                         RETURN_ON_ERROR(ret);
   1088                         RETURN_SUCCESS;
   1089                     }
   1090 
   1091                     LASTMARK_RESTORE();
   1092 
   1093                     ctx->ptr--;
   1094                     ctx->count--;
   1095                 }
   1096 
   1097             } else {
   1098                 /* general case */
   1099                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
   1100                     state->ptr = ctx->ptr;
   1101                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
   1102                             ctx->pattern+ctx->pattern[0]);
   1103                     if (ret) {
   1104                         RETURN_ON_ERROR(ret);
   1105                         RETURN_SUCCESS;
   1106                     }
   1107                     ctx->ptr--;
   1108                     ctx->count--;
   1109                     LASTMARK_RESTORE();
   1110                 }
   1111             }
   1112             RETURN_FAILURE;
   1113 
   1114         case SRE_OP_MIN_REPEAT_ONE:
   1115             /* match repeated sequence (minimizing regexp) */
   1116 
   1117             /* this operator only works if the repeated item is
   1118                exactly one character wide, and we're not already
   1119                collecting backtracking points.  for other cases,
   1120                use the MIN_REPEAT operator */
   1121 
   1122             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1123 
   1124             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1125                    ctx->pattern[1], ctx->pattern[2]));
   1126 
   1127             if (ctx->ptr + ctx->pattern[1] > end)
   1128                 RETURN_FAILURE; /* cannot match */
   1129 
   1130             state->ptr = ctx->ptr;
   1131 
   1132             if (ctx->pattern[1] == 0)
   1133                 ctx->count = 0;
   1134             else {
   1135                 /* count using pattern min as the maximum */
   1136                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
   1137                 RETURN_ON_ERROR(ret);
   1138                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1139                 if (ret < (Py_ssize_t) ctx->pattern[1])
   1140                     /* didn't match minimum number of times */
   1141                     RETURN_FAILURE;
   1142                 /* advance past minimum matches of repeat */
   1143                 ctx->count = ret;
   1144                 ctx->ptr += ctx->count;
   1145             }
   1146 
   1147             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1148                 /* tail is empty.  we're finished */
   1149                 state->ptr = ctx->ptr;
   1150                 RETURN_SUCCESS;
   1151 
   1152             } else {
   1153                 /* general case */
   1154                 LASTMARK_SAVE();
   1155                 while ((Py_ssize_t)ctx->pattern[2] == 65535
   1156                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
   1157                     state->ptr = ctx->ptr;
   1158                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
   1159                             ctx->pattern+ctx->pattern[0]);
   1160                     if (ret) {
   1161                         RETURN_ON_ERROR(ret);
   1162                         RETURN_SUCCESS;
   1163                     }
   1164                     state->ptr = ctx->ptr;
   1165                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
   1166                     RETURN_ON_ERROR(ret);
   1167                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1168                     if (ret == 0)
   1169                         break;
   1170                     assert(ret == 1);
   1171                     ctx->ptr++;
   1172                     ctx->count++;
   1173                     LASTMARK_RESTORE();
   1174                 }
   1175             }
   1176             RETURN_FAILURE;
   1177 
   1178         case SRE_OP_REPEAT:
   1179             /* create repeat context.  all the hard work is done
   1180                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
   1181             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
   1182             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
   1183                    ctx->pattern[1], ctx->pattern[2]));
   1184 
   1185             /* install new repeat context */
   1186             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
   1187             if (!ctx->u.rep) {
   1188                 PyErr_NoMemory();
   1189                 RETURN_FAILURE;
   1190             }
   1191             ctx->u.rep->count = -1;
   1192             ctx->u.rep->pattern = ctx->pattern;
   1193             ctx->u.rep->prev = state->repeat;
   1194             ctx->u.rep->last_ptr = NULL;
   1195             state->repeat = ctx->u.rep;
   1196 
   1197             state->ptr = ctx->ptr;
   1198             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
   1199             state->repeat = ctx->u.rep->prev;
   1200             PyObject_FREE(ctx->u.rep);
   1201 
   1202             if (ret) {
   1203                 RETURN_ON_ERROR(ret);
   1204                 RETURN_SUCCESS;
   1205             }
   1206             RETURN_FAILURE;
   1207 
   1208         case SRE_OP_MAX_UNTIL:
   1209             /* maximizing repeat */
   1210             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
   1211 
   1212             /* FIXME: we probably need to deal with zero-width
   1213                matches in here... */
   1214 
   1215             ctx->u.rep = state->repeat;
   1216             if (!ctx->u.rep)
   1217                 RETURN_ERROR(SRE_ERROR_STATE);
   1218 
   1219             state->ptr = ctx->ptr;
   1220 
   1221             ctx->count = ctx->u.rep->count+1;
   1222 
   1223             TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
   1224                    ctx->ptr, ctx->count));
   1225 
   1226             if (ctx->count < ctx->u.rep->pattern[1]) {
   1227                 /* not enough matches */
   1228                 ctx->u.rep->count = ctx->count;
   1229                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
   1230                         ctx->u.rep->pattern+3);
   1231                 if (ret) {
   1232                     RETURN_ON_ERROR(ret);
   1233                     RETURN_SUCCESS;
   1234                 }
   1235                 ctx->u.rep->count = ctx->count-1;
   1236                 state->ptr = ctx->ptr;
   1237                 RETURN_FAILURE;
   1238             }
   1239 
   1240             if ((ctx->count < ctx->u.rep->pattern[2] ||
   1241                 ctx->u.rep->pattern[2] == 65535) &&
   1242                 state->ptr != ctx->u.rep->last_ptr) {
   1243                 /* we may have enough matches, but if we can
   1244                    match another item, do so */
   1245                 ctx->u.rep->count = ctx->count;
   1246                 LASTMARK_SAVE();
   1247                 MARK_PUSH(ctx->lastmark);
   1248                 /* zero-width match protection */
   1249                 DATA_PUSH(&ctx->u.rep->last_ptr);
   1250                 ctx->u.rep->last_ptr = state->ptr;
   1251                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
   1252                         ctx->u.rep->pattern+3);
   1253                 DATA_POP(&ctx->u.rep->last_ptr);
   1254                 if (ret) {
   1255                     MARK_POP_DISCARD(ctx->lastmark);
   1256                     RETURN_ON_ERROR(ret);
   1257                     RETURN_SUCCESS;
   1258                 }
   1259                 MARK_POP(ctx->lastmark);
   1260                 LASTMARK_RESTORE();
   1261                 ctx->u.rep->count = ctx->count-1;
   1262                 state->ptr = ctx->ptr;
   1263             }
   1264 
   1265             /* cannot match more repeated items here.  make sure the
   1266                tail matches */
   1267             state->repeat = ctx->u.rep->prev;
   1268             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
   1269             RETURN_ON_SUCCESS(ret);
   1270             state->repeat = ctx->u.rep;
   1271             state->ptr = ctx->ptr;
   1272             RETURN_FAILURE;
   1273 
   1274         case SRE_OP_MIN_UNTIL:
   1275             /* minimizing repeat */
   1276             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
   1277 
   1278             ctx->u.rep = state->repeat;
   1279             if (!ctx->u.rep)
   1280                 RETURN_ERROR(SRE_ERROR_STATE);
   1281 
   1282             state->ptr = ctx->ptr;
   1283 
   1284             ctx->count = ctx->u.rep->count+1;
   1285 
   1286             TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
   1287                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
   1288 
   1289             if (ctx->count < ctx->u.rep->pattern[1]) {
   1290                 /* not enough matches */
   1291                 ctx->u.rep->count = ctx->count;
   1292                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
   1293                         ctx->u.rep->pattern+3);
   1294                 if (ret) {
   1295                     RETURN_ON_ERROR(ret);
   1296                     RETURN_SUCCESS;
   1297                 }
   1298                 ctx->u.rep->count = ctx->count-1;
   1299                 state->ptr = ctx->ptr;
   1300                 RETURN_FAILURE;
   1301             }
   1302 
   1303             LASTMARK_SAVE();
   1304 
   1305             /* see if the tail matches */
   1306             state->repeat = ctx->u.rep->prev;
   1307             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
   1308             if (ret) {
   1309                 RETURN_ON_ERROR(ret);
   1310                 RETURN_SUCCESS;
   1311             }
   1312 
   1313             state->repeat = ctx->u.rep;
   1314             state->ptr = ctx->ptr;
   1315 
   1316             LASTMARK_RESTORE();
   1317 
   1318             if (ctx->count >= ctx->u.rep->pattern[2]
   1319                 && ctx->u.rep->pattern[2] != 65535)
   1320                 RETURN_FAILURE;
   1321 
   1322             ctx->u.rep->count = ctx->count;
   1323             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
   1324                     ctx->u.rep->pattern+3);
   1325             if (ret) {
   1326                 RETURN_ON_ERROR(ret);
   1327                 RETURN_SUCCESS;
   1328             }
   1329             ctx->u.rep->count = ctx->count-1;
   1330             state->ptr = ctx->ptr;
   1331             RETURN_FAILURE;
   1332 
   1333         case SRE_OP_GROUPREF:
   1334             /* match backreference */
   1335             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
   1336                    ctx->ptr, ctx->pattern[0]));
   1337             i = ctx->pattern[0];
   1338             {
   1339                 Py_ssize_t groupref = i+i;
   1340                 if (groupref >= state->lastmark) {
   1341                     RETURN_FAILURE;
   1342                 } else {
   1343                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1344                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1345                     if (!p || !e || e < p)
   1346                         RETURN_FAILURE;
   1347                     while (p < e) {
   1348                         if (ctx->ptr >= end || *ctx->ptr != *p)
   1349                             RETURN_FAILURE;
   1350                         p++; ctx->ptr++;
   1351                     }
   1352                 }
   1353             }
   1354             ctx->pattern++;
   1355             break;
   1356 
   1357         case SRE_OP_GROUPREF_IGNORE:
   1358             /* match backreference */
   1359             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
   1360                    ctx->ptr, ctx->pattern[0]));
   1361             i = ctx->pattern[0];
   1362             {
   1363                 Py_ssize_t groupref = i+i;
   1364                 if (groupref >= state->lastmark) {
   1365                     RETURN_FAILURE;
   1366                 } else {
   1367                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1368                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1369                     if (!p || !e || e < p)
   1370                         RETURN_FAILURE;
   1371                     while (p < e) {
   1372                         if (ctx->ptr >= end ||
   1373                             state->lower(*ctx->ptr) != state->lower(*p))
   1374                             RETURN_FAILURE;
   1375                         p++; ctx->ptr++;
   1376                     }
   1377                 }
   1378             }
   1379             ctx->pattern++;
   1380             break;
   1381 
   1382         case SRE_OP_GROUPREF_EXISTS:
   1383             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
   1384                    ctx->ptr, ctx->pattern[0]));
   1385             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
   1386             i = ctx->pattern[0];
   1387             {
   1388                 Py_ssize_t groupref = i+i;
   1389                 if (groupref >= state->lastmark) {
   1390                     ctx->pattern += ctx->pattern[1];
   1391                     break;
   1392                 } else {
   1393                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1394                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1395                     if (!p || !e || e < p) {
   1396                         ctx->pattern += ctx->pattern[1];
   1397                         break;
   1398                     }
   1399                 }
   1400             }
   1401             ctx->pattern += 2;
   1402             break;
   1403 
   1404         case SRE_OP_ASSERT:
   1405             /* assert subpattern */
   1406             /* <ASSERT> <skip> <back> <pattern> */
   1407             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
   1408                    ctx->ptr, ctx->pattern[1]));
   1409             state->ptr = ctx->ptr - ctx->pattern[1];
   1410             if (state->ptr < state->beginning)
   1411                 RETURN_FAILURE;
   1412             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
   1413             RETURN_ON_FAILURE(ret);
   1414             ctx->pattern += ctx->pattern[0];
   1415             break;
   1416 
   1417         case SRE_OP_ASSERT_NOT:
   1418             /* assert not subpattern */
   1419             /* <ASSERT_NOT> <skip> <back> <pattern> */
   1420             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
   1421                    ctx->ptr, ctx->pattern[1]));
   1422             state->ptr = ctx->ptr - ctx->pattern[1];
   1423             if (state->ptr >= state->beginning) {
   1424                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
   1425                 if (ret) {
   1426                     RETURN_ON_ERROR(ret);
   1427                     RETURN_FAILURE;
   1428                 }
   1429             }
   1430             ctx->pattern += ctx->pattern[0];
   1431             break;
   1432 
   1433         case SRE_OP_FAILURE:
   1434             /* immediate failure */
   1435             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
   1436             RETURN_FAILURE;
   1437 
   1438         default:
   1439             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
   1440                    ctx->pattern[-1]));
   1441             RETURN_ERROR(SRE_ERROR_ILLEGAL);
   1442         }
   1443     }
   1444 
   1445 exit:
   1446     ctx_pos = ctx->last_ctx_pos;
   1447     jump = ctx->jump;
   1448     DATA_POP_DISCARD(ctx);
   1449     if (ctx_pos == -1)
   1450         return ret;
   1451     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1452 
   1453     switch (jump) {
   1454         case JUMP_MAX_UNTIL_2:
   1455             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1456             goto jump_max_until_2;
   1457         case JUMP_MAX_UNTIL_3:
   1458             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1459             goto jump_max_until_3;
   1460         case JUMP_MIN_UNTIL_2:
   1461             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1462             goto jump_min_until_2;
   1463         case JUMP_MIN_UNTIL_3:
   1464             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1465             goto jump_min_until_3;
   1466         case JUMP_BRANCH:
   1467             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
   1468             goto jump_branch;
   1469         case JUMP_MAX_UNTIL_1:
   1470             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1471             goto jump_max_until_1;
   1472         case JUMP_MIN_UNTIL_1:
   1473             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1474             goto jump_min_until_1;
   1475         case JUMP_REPEAT:
   1476             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
   1477             goto jump_repeat;
   1478         case JUMP_REPEAT_ONE_1:
   1479             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
   1480             goto jump_repeat_one_1;
   1481         case JUMP_REPEAT_ONE_2:
   1482             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
   1483             goto jump_repeat_one_2;
   1484         case JUMP_MIN_REPEAT_ONE:
   1485             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
   1486             goto jump_min_repeat_one;
   1487         case JUMP_ASSERT:
   1488             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
   1489             goto jump_assert;
   1490         case JUMP_ASSERT_NOT:
   1491             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
   1492             goto jump_assert_not;
   1493         case JUMP_NONE:
   1494             TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
   1495             break;
   1496     }
   1497 
   1498     return ret; /* should never get here */
   1499 }
   1500 
   1501 LOCAL(Py_ssize_t)
   1502 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
   1503 {
   1504     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
   1505     SRE_CHAR* end = (SRE_CHAR *)state->end;
   1506     Py_ssize_t status = 0;
   1507     Py_ssize_t prefix_len = 0;
   1508     Py_ssize_t prefix_skip = 0;
   1509     SRE_CODE* prefix = NULL;
   1510     SRE_CODE* charset = NULL;
   1511     SRE_CODE* overlap = NULL;
   1512     int flags = 0;
   1513 
   1514     if (pattern[0] == SRE_OP_INFO) {
   1515         /* optimization info block */
   1516         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
   1517 
   1518         flags = pattern[2];
   1519 
   1520         if (pattern[3] > 1) {
   1521             /* adjust end point (but make sure we leave at least one
   1522                character in there, so literal search will work) */
   1523             end -= pattern[3]-1;
   1524             if (end <= ptr)
   1525                 end = ptr+1;
   1526         }
   1527 
   1528         if (flags & SRE_INFO_PREFIX) {
   1529             /* pattern starts with a known prefix */
   1530             /* <length> <skip> <prefix data> <overlap data> */
   1531             prefix_len = pattern[5];
   1532             prefix_skip = pattern[6];
   1533             prefix = pattern + 7;
   1534             overlap = prefix + prefix_len - 1;
   1535         } else if (flags & SRE_INFO_CHARSET)
   1536             /* pattern starts with a character from a known set */
   1537             /* <charset> */
   1538             charset = pattern + 5;
   1539 
   1540         pattern += 1 + pattern[1];
   1541     }
   1542 
   1543     TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
   1544     TRACE(("charset = %p\n", charset));
   1545 
   1546 #if defined(USE_FAST_SEARCH)
   1547     if (prefix_len > 1) {
   1548         /* pattern starts with a known prefix.  use the overlap
   1549            table to skip forward as fast as we possibly can */
   1550         Py_ssize_t i = 0;
   1551         end = (SRE_CHAR *)state->end;
   1552         while (ptr < end) {
   1553             for (;;) {
   1554                 if ((SRE_CODE) ptr[0] != prefix[i]) {
   1555                     if (!i)
   1556                         break;
   1557                     else
   1558                         i = overlap[i];
   1559                 } else {
   1560                     if (++i == prefix_len) {
   1561                         /* found a potential match */
   1562                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
   1563                         state->start = ptr + 1 - prefix_len;
   1564                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
   1565                         if (flags & SRE_INFO_LITERAL)
   1566                             return 1; /* we got all of it */
   1567                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
   1568                         if (status != 0)
   1569                             return status;
   1570                         /* close but no cigar -- try again */
   1571                         i = overlap[i];
   1572                     }
   1573                     break;
   1574                 }
   1575             }
   1576             ptr++;
   1577         }
   1578         return 0;
   1579     }
   1580 #endif
   1581 
   1582     if (pattern[0] == SRE_OP_LITERAL) {
   1583         /* pattern starts with a literal character.  this is used
   1584            for short prefixes, and if fast search is disabled */
   1585         SRE_CODE chr = pattern[1];
   1586         end = (SRE_CHAR *)state->end;
   1587         for (;;) {
   1588             while (ptr < end && (SRE_CODE) ptr[0] != chr)
   1589                 ptr++;
   1590             if (ptr >= end)
   1591                 return 0;
   1592             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
   1593             state->start = ptr;
   1594             state->ptr = ++ptr;
   1595             if (flags & SRE_INFO_LITERAL)
   1596                 return 1; /* we got all of it */
   1597             status = SRE_MATCH(state, pattern + 2);
   1598             if (status != 0)
   1599                 break;
   1600         }
   1601     } else if (charset) {
   1602         /* pattern starts with a character from a known set */
   1603         end = (SRE_CHAR *)state->end;
   1604         for (;;) {
   1605             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
   1606                 ptr++;
   1607             if (ptr >= end)
   1608                 return 0;
   1609             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
   1610             state->start = ptr;
   1611             state->ptr = ptr;
   1612             status = SRE_MATCH(state, pattern);
   1613             if (status != 0)
   1614                 break;
   1615             ptr++;
   1616         }
   1617     } else
   1618         /* general case */
   1619         while (ptr <= end) {
   1620             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
   1621             state->start = state->ptr = ptr++;
   1622             status = SRE_MATCH(state, pattern);
   1623             if (status != 0)
   1624                 break;
   1625         }
   1626 
   1627     return status;
   1628 }
   1629 
   1630 LOCAL(int)
   1631 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
   1632 {
   1633     /* check if given string is a literal template (i.e. no escapes) */
   1634     while (len-- > 0)
   1635         if (*ptr++ == '\\')
   1636             return 0;
   1637     return 1;
   1638 }
   1639 
   1640 #if !defined(SRE_RECURSIVE)
   1641 
   1642 /* -------------------------------------------------------------------- */
   1643 /* factories and destructors */
   1644 
   1645 /* see sre.h for object declarations */
   1646 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
   1647 static PyObject*pattern_scanner(PatternObject*, PyObject*);
   1648 
   1649 static PyObject *
   1650 sre_codesize(PyObject* self, PyObject *unused)
   1651 {
   1652     return Py_BuildValue("l", sizeof(SRE_CODE));
   1653 }
   1654 
   1655 static PyObject *
   1656 sre_getlower(PyObject* self, PyObject* args)
   1657 {
   1658     int character, flags;
   1659     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
   1660         return NULL;
   1661     if (flags & SRE_FLAG_LOCALE)
   1662         return Py_BuildValue("i", sre_lower_locale(character));
   1663     if (flags & SRE_FLAG_UNICODE)
   1664 #if defined(HAVE_UNICODE)
   1665         return Py_BuildValue("i", sre_lower_unicode(character));
   1666 #else
   1667         return Py_BuildValue("i", sre_lower_locale(character));
   1668 #endif
   1669     return Py_BuildValue("i", sre_lower(character));
   1670 }
   1671 
   1672 LOCAL(void)
   1673 state_reset(SRE_STATE* state)
   1674 {
   1675     /* FIXME: dynamic! */
   1676     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
   1677 
   1678     state->lastmark = -1;
   1679     state->lastindex = -1;
   1680 
   1681     state->repeat = NULL;
   1682 
   1683     data_stack_dealloc(state);
   1684 }
   1685 
   1686 static void*
   1687 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
   1688 {
   1689     /* given a python object, return a data pointer, a length (in
   1690        characters), and a character size.  return NULL if the object
   1691        is not a string (or not compatible) */
   1692 
   1693     PyBufferProcs *buffer;
   1694     Py_ssize_t size, bytes;
   1695     int charsize;
   1696     void* ptr;
   1697 
   1698 #if defined(HAVE_UNICODE)
   1699     if (PyUnicode_Check(string)) {
   1700         /* unicode strings doesn't always support the buffer interface */
   1701         ptr = (void*) PyUnicode_AS_DATA(string);
   1702         /* bytes = PyUnicode_GET_DATA_SIZE(string); */
   1703         size = PyUnicode_GET_SIZE(string);
   1704         charsize = sizeof(Py_UNICODE);
   1705 
   1706     } else {
   1707 #endif
   1708 
   1709     /* get pointer to string buffer */
   1710     buffer = Py_TYPE(string)->tp_as_buffer;
   1711     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
   1712         buffer->bf_getsegcount(string, NULL) != 1) {
   1713         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
   1714         return NULL;
   1715     }
   1716 
   1717     /* determine buffer size */
   1718     bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
   1719     if (bytes < 0) {
   1720         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
   1721         return NULL;
   1722     }
   1723 
   1724     /* determine character size */
   1725 #if PY_VERSION_HEX >= 0x01060000
   1726     size = PyObject_Size(string);
   1727 #else
   1728     size = PyObject_Length(string);
   1729 #endif
   1730 
   1731     if (PyString_Check(string) || bytes == size)
   1732         charsize = 1;
   1733 #if defined(HAVE_UNICODE)
   1734     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
   1735         charsize = sizeof(Py_UNICODE);
   1736 #endif
   1737     else {
   1738         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
   1739         return NULL;
   1740     }
   1741 
   1742 #if defined(HAVE_UNICODE)
   1743     }
   1744 #endif
   1745 
   1746     *p_length = size;
   1747     *p_charsize = charsize;
   1748 
   1749     return ptr;
   1750 }
   1751 
   1752 LOCAL(PyObject*)
   1753 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
   1754            Py_ssize_t start, Py_ssize_t end)
   1755 {
   1756     /* prepare state object */
   1757 
   1758     Py_ssize_t length;
   1759     int charsize;
   1760     void* ptr;
   1761 
   1762     memset(state, 0, sizeof(SRE_STATE));
   1763 
   1764     state->lastmark = -1;
   1765     state->lastindex = -1;
   1766 
   1767     ptr = getstring(string, &length, &charsize);
   1768     if (!ptr)
   1769         return NULL;
   1770 
   1771     /* adjust boundaries */
   1772     if (start < 0)
   1773         start = 0;
   1774     else if (start > length)
   1775         start = length;
   1776 
   1777     if (end < 0)
   1778         end = 0;
   1779     else if (end > length)
   1780         end = length;
   1781 
   1782     state->charsize = charsize;
   1783 
   1784     state->beginning = ptr;
   1785 
   1786     state->start = (void*) ((char*) ptr + start * state->charsize);
   1787     state->end = (void*) ((char*) ptr + end * state->charsize);
   1788 
   1789     Py_INCREF(string);
   1790     state->string = string;
   1791     state->pos = start;
   1792     state->endpos = end;
   1793 
   1794     if (pattern->flags & SRE_FLAG_LOCALE)
   1795         state->lower = sre_lower_locale;
   1796     else if (pattern->flags & SRE_FLAG_UNICODE)
   1797 #if defined(HAVE_UNICODE)
   1798         state->lower = sre_lower_unicode;
   1799 #else
   1800         state->lower = sre_lower_locale;
   1801 #endif
   1802     else
   1803         state->lower = sre_lower;
   1804 
   1805     return string;
   1806 }
   1807 
   1808 LOCAL(void)
   1809 state_fini(SRE_STATE* state)
   1810 {
   1811     Py_XDECREF(state->string);
   1812     data_stack_dealloc(state);
   1813 }
   1814 
   1815 /* calculate offset from start of string */
   1816 #define STATE_OFFSET(state, member)\
   1817     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
   1818 
   1819 LOCAL(PyObject*)
   1820 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
   1821 {
   1822     Py_ssize_t i, j;
   1823 
   1824     index = (index - 1) * 2;
   1825 
   1826     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
   1827         if (empty)
   1828             /* want empty string */
   1829             i = j = 0;
   1830         else {
   1831             Py_INCREF(Py_None);
   1832             return Py_None;
   1833         }
   1834     } else {
   1835         i = STATE_OFFSET(state, state->mark[index]);
   1836         j = STATE_OFFSET(state, state->mark[index+1]);
   1837     }
   1838 
   1839     return PySequence_GetSlice(string, i, j);
   1840 }
   1841 
   1842 static void
   1843 pattern_error(int status)
   1844 {
   1845     switch (status) {
   1846     case SRE_ERROR_RECURSION_LIMIT:
   1847         PyErr_SetString(
   1848             PyExc_RuntimeError,
   1849             "maximum recursion limit exceeded"
   1850             );
   1851         break;
   1852     case SRE_ERROR_MEMORY:
   1853         PyErr_NoMemory();
   1854         break;
   1855     case SRE_ERROR_INTERRUPTED:
   1856     /* An exception has already been raised, so let it fly */
   1857         break;
   1858     default:
   1859         /* other error codes indicate compiler/engine bugs */
   1860         PyErr_SetString(
   1861             PyExc_RuntimeError,
   1862             "internal error in regular expression engine"
   1863             );
   1864     }
   1865 }
   1866 
   1867 static void
   1868 pattern_dealloc(PatternObject* self)
   1869 {
   1870     if (self->weakreflist != NULL)
   1871         PyObject_ClearWeakRefs((PyObject *) self);
   1872     Py_XDECREF(self->pattern);
   1873     Py_XDECREF(self->groupindex);
   1874     Py_XDECREF(self->indexgroup);
   1875     PyObject_DEL(self);
   1876 }
   1877 
   1878 static PyObject*
   1879 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
   1880 {
   1881     SRE_STATE state;
   1882     int status;
   1883 
   1884     PyObject* string;
   1885     Py_ssize_t start = 0;
   1886     Py_ssize_t end = PY_SSIZE_T_MAX;
   1887     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
   1888     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
   1889                                      &string, &start, &end))
   1890         return NULL;
   1891 
   1892     string = state_init(&state, self, string, start, end);
   1893     if (!string)
   1894         return NULL;
   1895 
   1896     state.ptr = state.start;
   1897 
   1898     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
   1899 
   1900     if (state.charsize == 1) {
   1901         status = sre_match(&state, PatternObject_GetCode(self));
   1902     } else {
   1903 #if defined(HAVE_UNICODE)
   1904         status = sre_umatch(&state, PatternObject_GetCode(self));
   1905 #endif
   1906     }
   1907 
   1908     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   1909     if (PyErr_Occurred())
   1910         return NULL;
   1911 
   1912     state_fini(&state);
   1913 
   1914     return pattern_new_match(self, &state, status);
   1915 }
   1916 
   1917 static PyObject*
   1918 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
   1919 {
   1920     SRE_STATE state;
   1921     int status;
   1922 
   1923     PyObject* string;
   1924     Py_ssize_t start = 0;
   1925     Py_ssize_t end = PY_SSIZE_T_MAX;
   1926     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
   1927     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
   1928                                      &string, &start, &end))
   1929         return NULL;
   1930 
   1931     string = state_init(&state, self, string, start, end);
   1932     if (!string)
   1933         return NULL;
   1934 
   1935     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
   1936 
   1937     if (state.charsize == 1) {
   1938         status = sre_search(&state, PatternObject_GetCode(self));
   1939     } else {
   1940 #if defined(HAVE_UNICODE)
   1941         status = sre_usearch(&state, PatternObject_GetCode(self));
   1942 #endif
   1943     }
   1944 
   1945     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   1946 
   1947     state_fini(&state);
   1948 
   1949     if (PyErr_Occurred())
   1950         return NULL;
   1951 
   1952     return pattern_new_match(self, &state, status);
   1953 }
   1954 
   1955 static PyObject*
   1956 call(char* module, char* function, PyObject* args)
   1957 {
   1958     PyObject* name;
   1959     PyObject* mod;
   1960     PyObject* func;
   1961     PyObject* result;
   1962 
   1963     if (!args)
   1964         return NULL;
   1965     name = PyString_FromString(module);
   1966     if (!name)
   1967         return NULL;
   1968     mod = PyImport_Import(name);
   1969     Py_DECREF(name);
   1970     if (!mod)
   1971         return NULL;
   1972     func = PyObject_GetAttrString(mod, function);
   1973     Py_DECREF(mod);
   1974     if (!func)
   1975         return NULL;
   1976     result = PyObject_CallObject(func, args);
   1977     Py_DECREF(func);
   1978     Py_DECREF(args);
   1979     return result;
   1980 }
   1981 
   1982 #ifdef USE_BUILTIN_COPY
   1983 static int
   1984 deepcopy(PyObject** object, PyObject* memo)
   1985 {
   1986     PyObject* copy;
   1987 
   1988     copy = call(
   1989         "copy", "deepcopy",
   1990         PyTuple_Pack(2, *object, memo)
   1991         );
   1992     if (!copy)
   1993         return 0;
   1994 
   1995     Py_DECREF(*object);
   1996     *object = copy;
   1997 
   1998     return 1; /* success */
   1999 }
   2000 #endif
   2001 
   2002 static PyObject*
   2003 join_list(PyObject* list, PyObject* string)
   2004 {
   2005     /* join list elements */
   2006 
   2007     PyObject* joiner;
   2008 #if PY_VERSION_HEX >= 0x01060000
   2009     PyObject* function;
   2010     PyObject* args;
   2011 #endif
   2012     PyObject* result;
   2013 
   2014     joiner = PySequence_GetSlice(string, 0, 0);
   2015     if (!joiner)
   2016         return NULL;
   2017 
   2018     if (PyList_GET_SIZE(list) == 0) {
   2019         Py_DECREF(list);
   2020         return joiner;
   2021     }
   2022 
   2023 #if PY_VERSION_HEX >= 0x01060000
   2024     function = PyObject_GetAttrString(joiner, "join");
   2025     if (!function) {
   2026         Py_DECREF(joiner);
   2027         return NULL;
   2028     }
   2029     args = PyTuple_New(1);
   2030     if (!args) {
   2031         Py_DECREF(function);
   2032         Py_DECREF(joiner);
   2033         return NULL;
   2034     }
   2035     PyTuple_SET_ITEM(args, 0, list);
   2036     result = PyObject_CallObject(function, args);
   2037     Py_DECREF(args); /* also removes list */
   2038     Py_DECREF(function);
   2039 #else
   2040     result = call(
   2041         "string", "join",
   2042         PyTuple_Pack(2, list, joiner)
   2043         );
   2044 #endif
   2045     Py_DECREF(joiner);
   2046 
   2047     return result;
   2048 }
   2049 
   2050 static PyObject*
   2051 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
   2052 {
   2053     SRE_STATE state;
   2054     PyObject* list;
   2055     int status;
   2056     Py_ssize_t i, b, e;
   2057 
   2058     PyObject* string;
   2059     Py_ssize_t start = 0;
   2060     Py_ssize_t end = PY_SSIZE_T_MAX;
   2061     static char* kwlist[] = { "source", "pos", "endpos", NULL };
   2062     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
   2063                                      &string, &start, &end))
   2064         return NULL;
   2065 
   2066     string = state_init(&state, self, string, start, end);
   2067     if (!string)
   2068         return NULL;
   2069 
   2070     list = PyList_New(0);
   2071     if (!list) {
   2072         state_fini(&state);
   2073         return NULL;
   2074     }
   2075 
   2076     while (state.start <= state.end) {
   2077 
   2078         PyObject* item;
   2079 
   2080         state_reset(&state);
   2081 
   2082         state.ptr = state.start;
   2083 
   2084         if (state.charsize == 1) {
   2085             status = sre_search(&state, PatternObject_GetCode(self));
   2086         } else {
   2087 #if defined(HAVE_UNICODE)
   2088             status = sre_usearch(&state, PatternObject_GetCode(self));
   2089 #endif
   2090         }
   2091 
   2092         if (PyErr_Occurred())
   2093             goto error;
   2094 
   2095         if (status <= 0) {
   2096             if (status == 0)
   2097                 break;
   2098             pattern_error(status);
   2099             goto error;
   2100         }
   2101 
   2102         /* don't bother to build a match object */
   2103         switch (self->groups) {
   2104         case 0:
   2105             b = STATE_OFFSET(&state, state.start);
   2106             e = STATE_OFFSET(&state, state.ptr);
   2107             item = PySequence_GetSlice(string, b, e);
   2108             if (!item)
   2109                 goto error;
   2110             break;
   2111         case 1:
   2112             item = state_getslice(&state, 1, string, 1);
   2113             if (!item)
   2114                 goto error;
   2115             break;
   2116         default:
   2117             item = PyTuple_New(self->groups);
   2118             if (!item)
   2119                 goto error;
   2120             for (i = 0; i < self->groups; i++) {
   2121                 PyObject* o = state_getslice(&state, i+1, string, 1);
   2122                 if (!o) {
   2123                     Py_DECREF(item);
   2124                     goto error;
   2125                 }
   2126                 PyTuple_SET_ITEM(item, i, o);
   2127             }
   2128             break;
   2129         }
   2130 
   2131         status = PyList_Append(list, item);
   2132         Py_DECREF(item);
   2133         if (status < 0)
   2134             goto error;
   2135 
   2136         if (state.ptr == state.start)
   2137             state.start = (void*) ((char*) state.ptr + state.charsize);
   2138         else
   2139             state.start = state.ptr;
   2140 
   2141     }
   2142 
   2143     state_fini(&state);
   2144     return list;
   2145 
   2146 error:
   2147     Py_DECREF(list);
   2148     state_fini(&state);
   2149     return NULL;
   2150 
   2151 }
   2152 
   2153 #if PY_VERSION_HEX >= 0x02020000
   2154 static PyObject*
   2155 pattern_finditer(PatternObject* pattern, PyObject* args)
   2156 {
   2157     PyObject* scanner;
   2158     PyObject* search;
   2159     PyObject* iterator;
   2160 
   2161     scanner = pattern_scanner(pattern, args);
   2162     if (!scanner)
   2163         return NULL;
   2164 
   2165     search = PyObject_GetAttrString(scanner, "search");
   2166     Py_DECREF(scanner);
   2167     if (!search)
   2168         return NULL;
   2169 
   2170     iterator = PyCallIter_New(search, Py_None);
   2171     Py_DECREF(search);
   2172 
   2173     return iterator;
   2174 }
   2175 #endif
   2176 
   2177 static PyObject*
   2178 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
   2179 {
   2180     SRE_STATE state;
   2181     PyObject* list;
   2182     PyObject* item;
   2183     int status;
   2184     Py_ssize_t n;
   2185     Py_ssize_t i;
   2186     void* last;
   2187 
   2188     PyObject* string;
   2189     Py_ssize_t maxsplit = 0;
   2190     static char* kwlist[] = { "source", "maxsplit", NULL };
   2191     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
   2192                                      &string, &maxsplit))
   2193         return NULL;
   2194 
   2195     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2196     if (!string)
   2197         return NULL;
   2198 
   2199     list = PyList_New(0);
   2200     if (!list) {
   2201         state_fini(&state);
   2202         return NULL;
   2203     }
   2204 
   2205     n = 0;
   2206     last = state.start;
   2207 
   2208     while (!maxsplit || n < maxsplit) {
   2209 
   2210         state_reset(&state);
   2211 
   2212         state.ptr = state.start;
   2213 
   2214         if (state.charsize == 1) {
   2215             status = sre_search(&state, PatternObject_GetCode(self));
   2216         } else {
   2217 #if defined(HAVE_UNICODE)
   2218             status = sre_usearch(&state, PatternObject_GetCode(self));
   2219 #endif
   2220         }
   2221 
   2222         if (PyErr_Occurred())
   2223             goto error;
   2224 
   2225         if (status <= 0) {
   2226             if (status == 0)
   2227                 break;
   2228             pattern_error(status);
   2229             goto error;
   2230         }
   2231 
   2232         if (state.start == state.ptr) {
   2233             if (last == state.end)
   2234                 break;
   2235             /* skip one character */
   2236             state.start = (void*) ((char*) state.ptr + state.charsize);
   2237             continue;
   2238         }
   2239 
   2240         /* get segment before this match */
   2241         item = PySequence_GetSlice(
   2242             string, STATE_OFFSET(&state, last),
   2243             STATE_OFFSET(&state, state.start)
   2244             );
   2245         if (!item)
   2246             goto error;
   2247         status = PyList_Append(list, item);
   2248         Py_DECREF(item);
   2249         if (status < 0)
   2250             goto error;
   2251 
   2252         /* add groups (if any) */
   2253         for (i = 0; i < self->groups; i++) {
   2254             item = state_getslice(&state, i+1, string, 0);
   2255             if (!item)
   2256                 goto error;
   2257             status = PyList_Append(list, item);
   2258             Py_DECREF(item);
   2259             if (status < 0)
   2260                 goto error;
   2261         }
   2262 
   2263         n = n + 1;
   2264 
   2265         last = state.start = state.ptr;
   2266 
   2267     }
   2268 
   2269     /* get segment following last match (even if empty) */
   2270     item = PySequence_GetSlice(
   2271         string, STATE_OFFSET(&state, last), state.endpos
   2272         );
   2273     if (!item)
   2274         goto error;
   2275     status = PyList_Append(list, item);
   2276     Py_DECREF(item);
   2277     if (status < 0)
   2278         goto error;
   2279 
   2280     state_fini(&state);
   2281     return list;
   2282 
   2283 error:
   2284     Py_DECREF(list);
   2285     state_fini(&state);
   2286     return NULL;
   2287 
   2288 }
   2289 
   2290 static PyObject*
   2291 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
   2292              Py_ssize_t count, Py_ssize_t subn)
   2293 {
   2294     SRE_STATE state;
   2295     PyObject* list;
   2296     PyObject* item;
   2297     PyObject* filter;
   2298     PyObject* args;
   2299     PyObject* match;
   2300     void* ptr;
   2301     int status;
   2302     Py_ssize_t n;
   2303     Py_ssize_t i, b, e;
   2304     int bint;
   2305     int filter_is_callable;
   2306 
   2307     if (PyCallable_Check(ptemplate)) {
   2308         /* sub/subn takes either a function or a template */
   2309         filter = ptemplate;
   2310         Py_INCREF(filter);
   2311         filter_is_callable = 1;
   2312     } else {
   2313         /* if not callable, check if it's a literal string */
   2314         int literal;
   2315         ptr = getstring(ptemplate, &n, &bint);
   2316         b = bint;
   2317         if (ptr) {
   2318             if (b == 1) {
   2319                 literal = sre_literal_template((unsigned char *)ptr, n);
   2320             } else {
   2321 #if defined(HAVE_UNICODE)
   2322                 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
   2323 #endif
   2324             }
   2325         } else {
   2326             PyErr_Clear();
   2327             literal = 0;
   2328         }
   2329         if (literal) {
   2330             filter = ptemplate;
   2331             Py_INCREF(filter);
   2332             filter_is_callable = 0;
   2333         } else {
   2334             /* not a literal; hand it over to the template compiler */
   2335             filter = call(
   2336                 SRE_PY_MODULE, "_subx",
   2337                 PyTuple_Pack(2, self, ptemplate)
   2338                 );
   2339             if (!filter)
   2340                 return NULL;
   2341             filter_is_callable = PyCallable_Check(filter);
   2342         }
   2343     }
   2344 
   2345     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2346     if (!string) {
   2347         Py_DECREF(filter);
   2348         return NULL;
   2349     }
   2350 
   2351     list = PyList_New(0);
   2352     if (!list) {
   2353         Py_DECREF(filter);
   2354         state_fini(&state);
   2355         return NULL;
   2356     }
   2357 
   2358     n = i = 0;
   2359 
   2360     while (!count || n < count) {
   2361 
   2362         state_reset(&state);
   2363 
   2364         state.ptr = state.start;
   2365 
   2366         if (state.charsize == 1) {
   2367             status = sre_search(&state, PatternObject_GetCode(self));
   2368         } else {
   2369 #if defined(HAVE_UNICODE)
   2370             status = sre_usearch(&state, PatternObject_GetCode(self));
   2371 #endif
   2372         }
   2373 
   2374         if (PyErr_Occurred())
   2375             goto error;
   2376 
   2377         if (status <= 0) {
   2378             if (status == 0)
   2379                 break;
   2380             pattern_error(status);
   2381             goto error;
   2382         }
   2383 
   2384         b = STATE_OFFSET(&state, state.start);
   2385         e = STATE_OFFSET(&state, state.ptr);
   2386 
   2387         if (i < b) {
   2388             /* get segment before this match */
   2389             item = PySequence_GetSlice(string, i, b);
   2390             if (!item)
   2391                 goto error;
   2392             status = PyList_Append(list, item);
   2393             Py_DECREF(item);
   2394             if (status < 0)
   2395                 goto error;
   2396 
   2397         } else if (i == b && i == e && n > 0)
   2398             /* ignore empty match on latest position */
   2399             goto next;
   2400 
   2401         if (filter_is_callable) {
   2402             /* pass match object through filter */
   2403             match = pattern_new_match(self, &state, 1);
   2404             if (!match)
   2405                 goto error;
   2406             args = PyTuple_Pack(1, match);
   2407             if (!args) {
   2408                 Py_DECREF(match);
   2409                 goto error;
   2410             }
   2411             item = PyObject_CallObject(filter, args);
   2412             Py_DECREF(args);
   2413             Py_DECREF(match);
   2414             if (!item)
   2415                 goto error;
   2416         } else {
   2417             /* filter is literal string */
   2418             item = filter;
   2419             Py_INCREF(item);
   2420         }
   2421 
   2422         /* add to list */
   2423         if (item != Py_None) {
   2424             status = PyList_Append(list, item);
   2425             Py_DECREF(item);
   2426             if (status < 0)
   2427                 goto error;
   2428         }
   2429 
   2430         i = e;
   2431         n = n + 1;
   2432 
   2433 next:
   2434         /* move on */
   2435         if (state.ptr == state.start)
   2436             state.start = (void*) ((char*) state.ptr + state.charsize);
   2437         else
   2438             state.start = state.ptr;
   2439 
   2440     }
   2441 
   2442     /* get segment following last match */
   2443     if (i < state.endpos) {
   2444         item = PySequence_GetSlice(string, i, state.endpos);
   2445         if (!item)
   2446             goto error;
   2447         status = PyList_Append(list, item);
   2448         Py_DECREF(item);
   2449         if (status < 0)
   2450             goto error;
   2451     }
   2452 
   2453     state_fini(&state);
   2454 
   2455     Py_DECREF(filter);
   2456 
   2457     /* convert list to single string (also removes list) */
   2458     item = join_list(list, string);
   2459 
   2460     if (!item)
   2461         return NULL;
   2462 
   2463     if (subn)
   2464         return Py_BuildValue("Ni", item, n);
   2465 
   2466     return item;
   2467 
   2468 error:
   2469     Py_DECREF(list);
   2470     state_fini(&state);
   2471     Py_DECREF(filter);
   2472     return NULL;
   2473 
   2474 }
   2475 
   2476 static PyObject*
   2477 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
   2478 {
   2479     PyObject* ptemplate;
   2480     PyObject* string;
   2481     Py_ssize_t count = 0;
   2482     static char* kwlist[] = { "repl", "string", "count", NULL };
   2483     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
   2484                                      &ptemplate, &string, &count))
   2485         return NULL;
   2486 
   2487     return pattern_subx(self, ptemplate, string, count, 0);
   2488 }
   2489 
   2490 static PyObject*
   2491 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
   2492 {
   2493     PyObject* ptemplate;
   2494     PyObject* string;
   2495     Py_ssize_t count = 0;
   2496     static char* kwlist[] = { "repl", "string", "count", NULL };
   2497     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
   2498                                      &ptemplate, &string, &count))
   2499         return NULL;
   2500 
   2501     return pattern_subx(self, ptemplate, string, count, 1);
   2502 }
   2503 
   2504 static PyObject*
   2505 pattern_copy(PatternObject* self, PyObject *unused)
   2506 {
   2507 #ifdef USE_BUILTIN_COPY
   2508     PatternObject* copy;
   2509     int offset;
   2510 
   2511     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
   2512     if (!copy)
   2513         return NULL;
   2514 
   2515     offset = offsetof(PatternObject, groups);
   2516 
   2517     Py_XINCREF(self->groupindex);
   2518     Py_XINCREF(self->indexgroup);
   2519     Py_XINCREF(self->pattern);
   2520 
   2521     memcpy((char*) copy + offset, (char*) self + offset,
   2522            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
   2523     copy->weakreflist = NULL;
   2524 
   2525     return (PyObject*) copy;
   2526 #else
   2527     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
   2528     return NULL;
   2529 #endif
   2530 }
   2531 
   2532 static PyObject*
   2533 pattern_deepcopy(PatternObject* self, PyObject* memo)
   2534 {
   2535 #ifdef USE_BUILTIN_COPY
   2536     PatternObject* copy;
   2537 
   2538     copy = (PatternObject*) pattern_copy(self);
   2539     if (!copy)
   2540         return NULL;
   2541 
   2542     if (!deepcopy(&copy->groupindex, memo) ||
   2543         !deepcopy(&copy->indexgroup, memo) ||
   2544         !deepcopy(&copy->pattern, memo)) {
   2545         Py_DECREF(copy);
   2546         return NULL;
   2547     }
   2548 
   2549 #else
   2550     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
   2551     return NULL;
   2552 #endif
   2553 }
   2554 
   2555 PyDoc_STRVAR(pattern_match_doc,
   2556 "match(string[, pos[, endpos]]) --> match object or None.\n\
   2557     Matches zero or more characters at the beginning of the string");
   2558 
   2559 PyDoc_STRVAR(pattern_search_doc,
   2560 "search(string[, pos[, endpos]]) --> match object or None.\n\
   2561     Scan through string looking for a match, and return a corresponding\n\
   2562     MatchObject instance. Return None if no position in the string matches.");
   2563 
   2564 PyDoc_STRVAR(pattern_split_doc,
   2565 "split(string[, maxsplit = 0])  --> list.\n\
   2566     Split string by the occurrences of pattern.");
   2567 
   2568 PyDoc_STRVAR(pattern_findall_doc,
   2569 "findall(string[, pos[, endpos]]) --> list.\n\
   2570    Return a list of all non-overlapping matches of pattern in string.");
   2571 
   2572 PyDoc_STRVAR(pattern_finditer_doc,
   2573 "finditer(string[, pos[, endpos]]) --> iterator.\n\
   2574     Return an iterator over all non-overlapping matches for the \n\
   2575     RE pattern in string. For each match, the iterator returns a\n\
   2576     match object.");
   2577 
   2578 PyDoc_STRVAR(pattern_sub_doc,
   2579 "sub(repl, string[, count = 0]) --> newstring\n\
   2580     Return the string obtained by replacing the leftmost non-overlapping\n\
   2581     occurrences of pattern in string by the replacement repl.");
   2582 
   2583 PyDoc_STRVAR(pattern_subn_doc,
   2584 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
   2585     Return the tuple (new_string, number_of_subs_made) found by replacing\n\
   2586     the leftmost non-overlapping occurrences of pattern with the\n\
   2587     replacement repl.");
   2588 
   2589 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
   2590 
   2591 static PyMethodDef pattern_methods[] = {
   2592     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
   2593         pattern_match_doc},
   2594     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
   2595         pattern_search_doc},
   2596     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
   2597         pattern_sub_doc},
   2598     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
   2599         pattern_subn_doc},
   2600     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
   2601         pattern_split_doc},
   2602     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
   2603         pattern_findall_doc},
   2604 #if PY_VERSION_HEX >= 0x02020000
   2605     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
   2606         pattern_finditer_doc},
   2607 #endif
   2608     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
   2609     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
   2610     {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
   2611     {NULL, NULL}
   2612 };
   2613 
   2614 #define PAT_OFF(x) offsetof(PatternObject, x)
   2615 static PyMemberDef pattern_members[] = {
   2616     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
   2617     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
   2618     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
   2619     {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
   2620     {NULL}  /* Sentinel */
   2621 };
   2622 
   2623 statichere PyTypeObject Pattern_Type = {
   2624     PyObject_HEAD_INIT(NULL)
   2625     0, "_" SRE_MODULE ".SRE_Pattern",
   2626     sizeof(PatternObject), sizeof(SRE_CODE),
   2627     (destructor)pattern_dealloc, /*tp_dealloc*/
   2628     0,                                  /* tp_print */
   2629     0,                                  /* tp_getattrn */
   2630     0,          /* tp_setattr */
   2631     0,          /* tp_compare */
   2632     0,          /* tp_repr */
   2633     0,          /* tp_as_number */
   2634     0,          /* tp_as_sequence */
   2635     0,          /* tp_as_mapping */
   2636     0,          /* tp_hash */
   2637     0,          /* tp_call */
   2638     0,          /* tp_str */
   2639     0,          /* tp_getattro */
   2640     0,          /* tp_setattro */
   2641     0,          /* tp_as_buffer */
   2642     Py_TPFLAGS_DEFAULT,           /* tp_flags */
   2643     pattern_doc,      /* tp_doc */
   2644     0,          /* tp_traverse */
   2645     0,          /* tp_clear */
   2646     0,          /* tp_richcompare */
   2647     offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
   2648     0,          /* tp_iter */
   2649     0,          /* tp_iternext */
   2650     pattern_methods,      /* tp_methods */
   2651     pattern_members,      /* tp_members */
   2652 };
   2653 
   2654 static int _validate(PatternObject *self); /* Forward */
   2655 
   2656 static PyObject *
   2657 _compile(PyObject* self_, PyObject* args)
   2658 {
   2659     /* "compile" pattern descriptor to pattern object */
   2660 
   2661     PatternObject* self;
   2662     Py_ssize_t i, n;
   2663 
   2664     PyObject* pattern;
   2665     int flags = 0;
   2666     PyObject* code;
   2667     Py_ssize_t groups = 0;
   2668     PyObject* groupindex = NULL;
   2669     PyObject* indexgroup = NULL;
   2670     if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
   2671                           &PyList_Type, &code, &groups,
   2672                           &groupindex, &indexgroup))
   2673         return NULL;
   2674 
   2675     n = PyList_GET_SIZE(code);
   2676     /* coverity[ampersand_in_size] */
   2677     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
   2678     if (!self)
   2679         return NULL;
   2680     self->weakreflist = NULL;
   2681     self->pattern = NULL;
   2682     self->groupindex = NULL;
   2683     self->indexgroup = NULL;
   2684 
   2685     self->codesize = n;
   2686 
   2687     for (i = 0; i < n; i++) {
   2688         PyObject *o = PyList_GET_ITEM(code, i);
   2689         unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
   2690                                               : PyLong_AsUnsignedLong(o);
   2691         self->code[i] = (SRE_CODE) value;
   2692         if ((unsigned long) self->code[i] != value) {
   2693             PyErr_SetString(PyExc_OverflowError,
   2694                             "regular expression code size limit exceeded");
   2695             break;
   2696         }
   2697     }
   2698 
   2699     if (PyErr_Occurred()) {
   2700         Py_DECREF(self);
   2701         return NULL;
   2702     }
   2703 
   2704     Py_INCREF(pattern);
   2705     self->pattern = pattern;
   2706 
   2707     self->flags = flags;
   2708 
   2709     self->groups = groups;
   2710 
   2711     Py_XINCREF(groupindex);
   2712     self->groupindex = groupindex;
   2713 
   2714     Py_XINCREF(indexgroup);
   2715     self->indexgroup = indexgroup;
   2716 
   2717     self->weakreflist = NULL;
   2718 
   2719     if (!_validate(self)) {
   2720         Py_DECREF(self);
   2721         return NULL;
   2722     }
   2723 
   2724     return (PyObject*) self;
   2725 }
   2726 
   2727 /* -------------------------------------------------------------------- */
   2728 /* Code validation */
   2729 
   2730 /* To learn more about this code, have a look at the _compile() function in
   2731    Lib/sre_compile.py.  The validation functions below checks the code array
   2732    for conformance with the code patterns generated there.
   2733 
   2734    The nice thing about the generated code is that it is position-independent:
   2735    all jumps are relative jumps forward.  Also, jumps don't cross each other:
   2736    the target of a later jump is always earlier than the target of an earlier
   2737    jump.  IOW, this is okay:
   2738 
   2739    J---------J-------T--------T
   2740     \         \_____/        /
   2741      \______________________/
   2742 
   2743    but this is not:
   2744 
   2745    J---------J-------T--------T
   2746     \_________\_____/        /
   2747                \____________/
   2748 
   2749    It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
   2750    bytes wide (the latter if Python is compiled for "wide" unicode support).
   2751 */
   2752 
   2753 /* Defining this one enables tracing of the validator */
   2754 #undef VVERBOSE
   2755 
   2756 /* Trace macro for the validator */
   2757 #if defined(VVERBOSE)
   2758 #define VTRACE(v) printf v
   2759 #else
   2760 #define VTRACE(v)
   2761 #endif
   2762 
   2763 /* Report failure */
   2764 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
   2765 
   2766 /* Extract opcode, argument, or skip count from code array */
   2767 #define GET_OP                                          \
   2768     do {                                                \
   2769         VTRACE(("%p: ", code));                         \
   2770         if (code >= end) FAIL;                          \
   2771         op = *code++;                                   \
   2772         VTRACE(("%lu (op)\n", (unsigned long)op));      \
   2773     } while (0)
   2774 #define GET_ARG                                         \
   2775     do {                                                \
   2776         VTRACE(("%p= ", code));                         \
   2777         if (code >= end) FAIL;                          \
   2778         arg = *code++;                                  \
   2779         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
   2780     } while (0)
   2781 #define GET_SKIP_ADJ(adj)                               \
   2782     do {                                                \
   2783         VTRACE(("%p= ", code));                         \
   2784         if (code >= end) FAIL;                          \
   2785         skip = *code;                                   \
   2786         VTRACE(("%lu (skip to %p)\n",                   \
   2787                (unsigned long)skip, code+skip));        \
   2788         if (code+skip-adj < code || code+skip-adj > end)\
   2789             FAIL;                                       \
   2790         code++;                                         \
   2791     } while (0)
   2792 #define GET_SKIP GET_SKIP_ADJ(0)
   2793 
   2794 static int
   2795 _validate_charset(SRE_CODE *code, SRE_CODE *end)
   2796 {
   2797     /* Some variables are manipulated by the macros above */
   2798     SRE_CODE op;
   2799     SRE_CODE arg;
   2800     SRE_CODE offset;
   2801     int i;
   2802 
   2803     while (code < end) {
   2804         GET_OP;
   2805         switch (op) {
   2806 
   2807         case SRE_OP_NEGATE:
   2808             break;
   2809 
   2810         case SRE_OP_LITERAL:
   2811             GET_ARG;
   2812             break;
   2813 
   2814         case SRE_OP_RANGE:
   2815             GET_ARG;
   2816             GET_ARG;
   2817             break;
   2818 
   2819         case SRE_OP_CHARSET:
   2820             offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
   2821             if (code+offset < code || code+offset > end)
   2822                 FAIL;
   2823             code += offset;
   2824             break;
   2825 
   2826         case SRE_OP_BIGCHARSET:
   2827             GET_ARG; /* Number of blocks */
   2828             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
   2829             if (code+offset < code || code+offset > end)
   2830                 FAIL;
   2831             /* Make sure that each byte points to a valid block */
   2832             for (i = 0; i < 256; i++) {
   2833                 if (((unsigned char *)code)[i] >= arg)
   2834                     FAIL;
   2835             }
   2836             code += offset;
   2837             offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
   2838             if (code+offset < code || code+offset > end)
   2839                 FAIL;
   2840             code += offset;
   2841             break;
   2842 
   2843         case SRE_OP_CATEGORY:
   2844             GET_ARG;
   2845             switch (arg) {
   2846             case SRE_CATEGORY_DIGIT:
   2847             case SRE_CATEGORY_NOT_DIGIT:
   2848             case SRE_CATEGORY_SPACE:
   2849             case SRE_CATEGORY_NOT_SPACE:
   2850             case SRE_CATEGORY_WORD:
   2851             case SRE_CATEGORY_NOT_WORD:
   2852             case SRE_CATEGORY_LINEBREAK:
   2853             case SRE_CATEGORY_NOT_LINEBREAK:
   2854             case SRE_CATEGORY_LOC_WORD:
   2855             case SRE_CATEGORY_LOC_NOT_WORD:
   2856             case SRE_CATEGORY_UNI_DIGIT:
   2857             case SRE_CATEGORY_UNI_NOT_DIGIT:
   2858             case SRE_CATEGORY_UNI_SPACE:
   2859             case SRE_CATEGORY_UNI_NOT_SPACE:
   2860             case SRE_CATEGORY_UNI_WORD:
   2861             case SRE_CATEGORY_UNI_NOT_WORD:
   2862             case SRE_CATEGORY_UNI_LINEBREAK:
   2863             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
   2864                 break;
   2865             default:
   2866                 FAIL;
   2867             }
   2868             break;
   2869 
   2870         default:
   2871             FAIL;
   2872 
   2873         }
   2874     }
   2875 
   2876     return 1;
   2877 }
   2878 
   2879 static int
   2880 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   2881 {
   2882     /* Some variables are manipulated by the macros above */
   2883     SRE_CODE op;
   2884     SRE_CODE arg;
   2885     SRE_CODE skip;
   2886 
   2887     VTRACE(("code=%p, end=%p\n", code, end));
   2888 
   2889     if (code > end)
   2890         FAIL;
   2891 
   2892     while (code < end) {
   2893         GET_OP;
   2894         switch (op) {
   2895 
   2896         case SRE_OP_MARK:
   2897             /* We don't check whether marks are properly nested; the
   2898                sre_match() code is robust even if they don't, and the worst
   2899                you can get is nonsensical match results. */
   2900             GET_ARG;
   2901             if (arg > 2*groups+1) {
   2902                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
   2903                 FAIL;
   2904             }
   2905             break;
   2906 
   2907         case SRE_OP_LITERAL:
   2908         case SRE_OP_NOT_LITERAL:
   2909         case SRE_OP_LITERAL_IGNORE:
   2910         case SRE_OP_NOT_LITERAL_IGNORE:
   2911             GET_ARG;
   2912             /* The arg is just a character, nothing to check */
   2913             break;
   2914 
   2915         case SRE_OP_SUCCESS:
   2916         case SRE_OP_FAILURE:
   2917             /* Nothing to check; these normally end the matching process */
   2918             break;
   2919 
   2920         case SRE_OP_AT:
   2921             GET_ARG;
   2922             switch (arg) {
   2923             case SRE_AT_BEGINNING:
   2924             case SRE_AT_BEGINNING_STRING:
   2925             case SRE_AT_BEGINNING_LINE:
   2926             case SRE_AT_END:
   2927             case SRE_AT_END_LINE:
   2928             case SRE_AT_END_STRING:
   2929             case SRE_AT_BOUNDARY:
   2930             case SRE_AT_NON_BOUNDARY:
   2931             case SRE_AT_LOC_BOUNDARY:
   2932             case SRE_AT_LOC_NON_BOUNDARY:
   2933             case SRE_AT_UNI_BOUNDARY:
   2934             case SRE_AT_UNI_NON_BOUNDARY:
   2935                 break;
   2936             default:
   2937                 FAIL;
   2938             }
   2939             break;
   2940 
   2941         case SRE_OP_ANY:
   2942         case SRE_OP_ANY_ALL:
   2943             /* These have no operands */
   2944             break;
   2945 
   2946         case SRE_OP_IN:
   2947         case SRE_OP_IN_IGNORE:
   2948             GET_SKIP;
   2949             /* Stop 1 before the end; we check the FAILURE below */
   2950             if (!_validate_charset(code, code+skip-2))
   2951                 FAIL;
   2952             if (code[skip-2] != SRE_OP_FAILURE)
   2953                 FAIL;
   2954             code += skip-1;
   2955             break;
   2956 
   2957         case SRE_OP_INFO:
   2958             {
   2959                 /* A minimal info field is
   2960                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
   2961                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
   2962                    more follows. */
   2963                 SRE_CODE flags, i;
   2964                 SRE_CODE *newcode;
   2965                 GET_SKIP;
   2966                 newcode = code+skip-1;
   2967                 GET_ARG; flags = arg;
   2968                 GET_ARG; /* min */
   2969                 GET_ARG; /* max */
   2970                 /* Check that only valid flags are present */
   2971                 if ((flags & ~(SRE_INFO_PREFIX |
   2972                                SRE_INFO_LITERAL |
   2973                                SRE_INFO_CHARSET)) != 0)
   2974                     FAIL;
   2975                 /* PREFIX and CHARSET are mutually exclusive */
   2976                 if ((flags & SRE_INFO_PREFIX) &&
   2977                     (flags & SRE_INFO_CHARSET))
   2978                     FAIL;
   2979                 /* LITERAL implies PREFIX */
   2980                 if ((flags & SRE_INFO_LITERAL) &&
   2981                     !(flags & SRE_INFO_PREFIX))
   2982                     FAIL;
   2983                 /* Validate the prefix */
   2984                 if (flags & SRE_INFO_PREFIX) {
   2985                     SRE_CODE prefix_len;
   2986                     GET_ARG; prefix_len = arg;
   2987                     GET_ARG; /* prefix skip */
   2988                     /* Here comes the prefix string */
   2989                     if (code+prefix_len < code || code+prefix_len > newcode)
   2990                         FAIL;
   2991                     code += prefix_len;
   2992                     /* And here comes the overlap table */
   2993                     if (code+prefix_len < code || code+prefix_len > newcode)
   2994                         FAIL;
   2995                     /* Each overlap value should be < prefix_len */
   2996                     for (i = 0; i < prefix_len; i++) {
   2997                         if (code[i] >= prefix_len)
   2998                             FAIL;
   2999                     }
   3000                     code += prefix_len;
   3001                 }
   3002                 /* Validate the charset */
   3003                 if (flags & SRE_INFO_CHARSET) {
   3004                     if (!_validate_charset(code, newcode-1))
   3005                         FAIL;
   3006                     if (newcode[-1] != SRE_OP_FAILURE)
   3007                         FAIL;
   3008                     code = newcode;
   3009                 }
   3010                 else if (code != newcode) {
   3011                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
   3012                     FAIL;
   3013                 }
   3014             }
   3015             break;
   3016 
   3017         case SRE_OP_BRANCH:
   3018             {
   3019                 SRE_CODE *target = NULL;
   3020                 for (;;) {
   3021                     GET_SKIP;
   3022                     if (skip == 0)
   3023                         break;
   3024                     /* Stop 2 before the end; we check the JUMP below */
   3025                     if (!_validate_inner(code, code+skip-3, groups))
   3026                         FAIL;
   3027                     code += skip-3;
   3028                     /* Check that it ends with a JUMP, and that each JUMP
   3029                        has the same target */
   3030                     GET_OP;
   3031                     if (op != SRE_OP_JUMP)
   3032                         FAIL;
   3033                     GET_SKIP;
   3034                     if (target == NULL)
   3035                         target = code+skip-1;
   3036                     else if (code+skip-1 != target)
   3037                         FAIL;
   3038                 }
   3039             }
   3040             break;
   3041 
   3042         case SRE_OP_REPEAT_ONE:
   3043         case SRE_OP_MIN_REPEAT_ONE:
   3044             {
   3045                 SRE_CODE min, max;
   3046                 GET_SKIP;
   3047                 GET_ARG; min = arg;
   3048                 GET_ARG; max = arg;
   3049                 if (min > max)
   3050                     FAIL;
   3051 #ifdef Py_UNICODE_WIDE
   3052                 if (max > 65535)
   3053                     FAIL;
   3054 #endif
   3055                 if (!_validate_inner(code, code+skip-4, groups))
   3056                     FAIL;
   3057                 code += skip-4;
   3058                 GET_OP;
   3059                 if (op != SRE_OP_SUCCESS)
   3060                     FAIL;
   3061             }
   3062             break;
   3063 
   3064         case SRE_OP_REPEAT:
   3065             {
   3066                 SRE_CODE min, max;
   3067                 GET_SKIP;
   3068                 GET_ARG; min = arg;
   3069                 GET_ARG; max = arg;
   3070                 if (min > max)
   3071                     FAIL;
   3072 #ifdef Py_UNICODE_WIDE
   3073                 if (max > 65535)
   3074                     FAIL;
   3075 #endif
   3076                 if (!_validate_inner(code, code+skip-3, groups))
   3077                     FAIL;
   3078                 code += skip-3;
   3079                 GET_OP;
   3080                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
   3081                     FAIL;
   3082             }
   3083             break;
   3084 
   3085         case SRE_OP_GROUPREF:
   3086         case SRE_OP_GROUPREF_IGNORE:
   3087             GET_ARG;
   3088             if (arg >= groups)
   3089                 FAIL;
   3090             break;
   3091 
   3092         case SRE_OP_GROUPREF_EXISTS:
   3093             /* The regex syntax for this is: '(?(group)then|else)', where
   3094                'group' is either an integer group number or a group name,
   3095                'then' and 'else' are sub-regexes, and 'else' is optional. */
   3096             GET_ARG;
   3097             if (arg >= groups)
   3098                 FAIL;
   3099             GET_SKIP_ADJ(1);
   3100             code--; /* The skip is relative to the first arg! */
   3101             /* There are two possibilities here: if there is both a 'then'
   3102                part and an 'else' part, the generated code looks like:
   3103 
   3104                GROUPREF_EXISTS
   3105                <group>
   3106                <skipyes>
   3107                ...then part...
   3108                JUMP
   3109                <skipno>
   3110                (<skipyes> jumps here)
   3111                ...else part...
   3112                (<skipno> jumps here)
   3113 
   3114                If there is only a 'then' part, it looks like:
   3115 
   3116                GROUPREF_EXISTS
   3117                <group>
   3118                <skip>
   3119                ...then part...
   3120                (<skip> jumps here)
   3121 
   3122                There is no direct way to decide which it is, and we don't want
   3123                to allow arbitrary jumps anywhere in the code; so we just look
   3124                for a JUMP opcode preceding our skip target.
   3125             */
   3126             if (skip >= 3 && code+skip-3 >= code &&
   3127                 code[skip-3] == SRE_OP_JUMP)
   3128             {
   3129                 VTRACE(("both then and else parts present\n"));
   3130                 if (!_validate_inner(code+1, code+skip-3, groups))
   3131                     FAIL;
   3132                 code += skip-2; /* Position after JUMP, at <skipno> */
   3133                 GET_SKIP;
   3134                 if (!_validate_inner(code, code+skip-1, groups))
   3135                     FAIL;
   3136                 code += skip-1;
   3137             }
   3138             else {
   3139                 VTRACE(("only a then part present\n"));
   3140                 if (!_validate_inner(code+1, code+skip-1, groups))
   3141                     FAIL;
   3142                 code += skip-1;
   3143             }
   3144             break;
   3145 
   3146         case SRE_OP_ASSERT:
   3147         case SRE_OP_ASSERT_NOT:
   3148             GET_SKIP;
   3149             GET_ARG; /* 0 for lookahead, width for lookbehind */
   3150             code--; /* Back up over arg to simplify math below */
   3151             if (arg & 0x80000000)
   3152                 FAIL; /* Width too large */
   3153             /* Stop 1 before the end; we check the SUCCESS below */
   3154             if (!_validate_inner(code+1, code+skip-2, groups))
   3155                 FAIL;
   3156             code += skip-2;
   3157             GET_OP;
   3158             if (op != SRE_OP_SUCCESS)
   3159                 FAIL;
   3160             break;
   3161 
   3162         default:
   3163             FAIL;
   3164 
   3165         }
   3166     }
   3167 
   3168     VTRACE(("okay\n"));
   3169     return 1;
   3170 }
   3171 
   3172 static int
   3173 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   3174 {
   3175     if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
   3176         FAIL;
   3177     if (groups == 0)  /* fix for simplejson */
   3178         groups = 100; /* 100 groups should always be safe */
   3179     return _validate_inner(code, end-1, groups);
   3180 }
   3181 
   3182 static int
   3183 _validate(PatternObject *self)
   3184 {
   3185     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
   3186     {
   3187         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
   3188         return 0;
   3189     }
   3190     else
   3191         VTRACE(("Success!\n"));
   3192     return 1;
   3193 }
   3194 
   3195 /* -------------------------------------------------------------------- */
   3196 /* match methods */
   3197 
   3198 static void
   3199 match_dealloc(MatchObject* self)
   3200 {
   3201     Py_XDECREF(self->regs);
   3202     Py_XDECREF(self->string);
   3203     Py_DECREF(self->pattern);
   3204     PyObject_DEL(self);
   3205 }
   3206 
   3207 static PyObject*
   3208 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
   3209 {
   3210     if (index < 0 || index >= self->groups) {
   3211         /* raise IndexError if we were given a bad group number */
   3212         PyErr_SetString(
   3213             PyExc_IndexError,
   3214             "no such group"
   3215             );
   3216         return NULL;
   3217     }
   3218 
   3219     index *= 2;
   3220 
   3221     if (self->string == Py_None || self->mark[index] < 0) {
   3222         /* return default value if the string or group is undefined */
   3223         Py_INCREF(def);
   3224         return def;
   3225     }
   3226 
   3227     return PySequence_GetSlice(
   3228         self->string, self->mark[index], self->mark[index+1]
   3229         );
   3230 }
   3231 
   3232 static Py_ssize_t
   3233 match_getindex(MatchObject* self, PyObject* index)
   3234 {
   3235     Py_ssize_t i;
   3236 
   3237     if (PyInt_Check(index))
   3238         return PyInt_AsSsize_t(index);
   3239 
   3240     i = -1;
   3241 
   3242     if (self->pattern->groupindex) {
   3243         index = PyObject_GetItem(self->pattern->groupindex, index);
   3244         if (index) {
   3245             if (PyInt_Check(index) || PyLong_Check(index))
   3246                 i = PyInt_AsSsize_t(index);
   3247             Py_DECREF(index);
   3248         } else
   3249             PyErr_Clear();
   3250     }
   3251 
   3252     return i;
   3253 }
   3254 
   3255 static PyObject*
   3256 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
   3257 {
   3258     return match_getslice_by_index(self, match_getindex(self, index), def);
   3259 }
   3260 
   3261 static PyObject*
   3262 match_expand(MatchObject* self, PyObject* ptemplate)
   3263 {
   3264     /* delegate to Python code */
   3265     return call(
   3266         SRE_PY_MODULE, "_expand",
   3267         PyTuple_Pack(3, self->pattern, self, ptemplate)
   3268         );
   3269 }
   3270 
   3271 static PyObject*
   3272 match_group(MatchObject* self, PyObject* args)
   3273 {
   3274     PyObject* result;
   3275     Py_ssize_t i, size;
   3276 
   3277     size = PyTuple_GET_SIZE(args);
   3278 
   3279     switch (size) {
   3280     case 0:
   3281         result = match_getslice(self, Py_False, Py_None);
   3282         break;
   3283     case 1:
   3284         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
   3285         break;
   3286     default:
   3287         /* fetch multiple items */
   3288         result = PyTuple_New(size);
   3289         if (!result)
   3290             return NULL;
   3291         for (i = 0; i < size; i++) {
   3292             PyObject* item = match_getslice(
   3293                 self, PyTuple_GET_ITEM(args, i), Py_None
   3294                 );
   3295             if (!item) {
   3296                 Py_DECREF(result);
   3297                 return NULL;
   3298             }
   3299             PyTuple_SET_ITEM(result, i, item);
   3300         }
   3301         break;
   3302     }
   3303     return result;
   3304 }
   3305 
   3306 static PyObject*
   3307 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
   3308 {
   3309     PyObject* result;
   3310     Py_ssize_t index;
   3311 
   3312     PyObject* def = Py_None;
   3313     static char* kwlist[] = { "default", NULL };
   3314     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
   3315         return NULL;
   3316 
   3317     result = PyTuple_New(self->groups-1);
   3318     if (!result)
   3319         return NULL;
   3320 
   3321     for (index = 1; index < self->groups; index++) {
   3322         PyObject* item;
   3323         item = match_getslice_by_index(self, index, def);
   3324         if (!item) {
   3325             Py_DECREF(result);
   3326             return NULL;
   3327         }
   3328         PyTuple_SET_ITEM(result, index-1, item);
   3329     }
   3330 
   3331     return result;
   3332 }
   3333 
   3334 static PyObject*
   3335 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
   3336 {
   3337     PyObject* result;
   3338     PyObject* keys;
   3339     Py_ssize_t index;
   3340 
   3341     PyObject* def = Py_None;
   3342     static char* kwlist[] = { "default", NULL };
   3343     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
   3344         return NULL;
   3345 
   3346     result = PyDict_New();
   3347     if (!result || !self->pattern->groupindex)
   3348         return result;
   3349 
   3350     keys = PyMapping_Keys(self->pattern->groupindex);
   3351     if (!keys)
   3352         goto failed;
   3353 
   3354     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
   3355         int status;
   3356         PyObject* key;
   3357         PyObject* value;
   3358         key = PyList_GET_ITEM(keys, index);
   3359         if (!key)
   3360             goto failed;
   3361         value = match_getslice(self, key, def);
   3362         if (!value) {
   3363             Py_DECREF(key);
   3364             goto failed;
   3365         }
   3366         status = PyDict_SetItem(result, key, value);
   3367         Py_DECREF(value);
   3368         if (status < 0)
   3369             goto failed;
   3370     }
   3371 
   3372     Py_DECREF(keys);
   3373 
   3374     return result;
   3375 
   3376 failed:
   3377     Py_XDECREF(keys);
   3378     Py_DECREF(result);
   3379     return NULL;
   3380 }
   3381 
   3382 static PyObject*
   3383 match_start(MatchObject* self, PyObject* args)
   3384 {
   3385     Py_ssize_t index;
   3386 
   3387     PyObject* index_ = Py_False; /* zero */
   3388     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
   3389         return NULL;
   3390 
   3391     index = match_getindex(self, index_);
   3392 
   3393     if (index < 0 || index >= self->groups) {
   3394         PyErr_SetString(
   3395             PyExc_IndexError,
   3396             "no such group"
   3397             );
   3398         return NULL;
   3399     }
   3400 
   3401     /* mark is -1 if group is undefined */
   3402     return Py_BuildValue("i", self->mark[index*2]);
   3403 }
   3404 
   3405 static PyObject*
   3406 match_end(MatchObject* self, PyObject* args)
   3407 {
   3408     Py_ssize_t index;
   3409 
   3410     PyObject* index_ = Py_False; /* zero */
   3411     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
   3412         return NULL;
   3413 
   3414     index = match_getindex(self, index_);
   3415 
   3416     if (index < 0 || index >= self->groups) {
   3417         PyErr_SetString(
   3418             PyExc_IndexError,
   3419             "no such group"
   3420             );
   3421         return NULL;
   3422     }
   3423 
   3424     /* mark is -1 if group is undefined */
   3425     return Py_BuildValue("i", self->mark[index*2+1]);
   3426 }
   3427 
   3428 LOCAL(PyObject*)
   3429 _pair(Py_ssize_t i1, Py_ssize_t i2)
   3430 {
   3431     PyObject* pair;
   3432     PyObject* item;
   3433 
   3434     pair = PyTuple_New(2);
   3435     if (!pair)
   3436         return NULL;
   3437 
   3438     item = PyInt_FromSsize_t(i1);
   3439     if (!item)
   3440         goto error;
   3441     PyTuple_SET_ITEM(pair, 0, item);
   3442 
   3443     item = PyInt_FromSsize_t(i2);
   3444     if (!item)
   3445         goto error;
   3446     PyTuple_SET_ITEM(pair, 1, item);
   3447 
   3448     return pair;
   3449 
   3450   error:
   3451     Py_DECREF(pair);
   3452     return NULL;
   3453 }
   3454 
   3455 static PyObject*
   3456 match_span(MatchObject* self, PyObject* args)
   3457 {
   3458     Py_ssize_t index;
   3459 
   3460     PyObject* index_ = Py_False; /* zero */
   3461     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
   3462         return NULL;
   3463 
   3464     index = match_getindex(self, index_);
   3465 
   3466     if (index < 0 || index >= self->groups) {
   3467         PyErr_SetString(
   3468             PyExc_IndexError,
   3469             "no such group"
   3470             );
   3471         return NULL;
   3472     }
   3473 
   3474     /* marks are -1 if group is undefined */
   3475     return _pair(self->mark[index*2], self->mark[index*2+1]);
   3476 }
   3477 
   3478 static PyObject*
   3479 match_regs(MatchObject* self)
   3480 {
   3481     PyObject* regs;
   3482     PyObject* item;
   3483     Py_ssize_t index;
   3484 
   3485     regs = PyTuple_New(self->groups);
   3486     if (!regs)
   3487         return NULL;
   3488 
   3489     for (index = 0; index < self->groups; index++) {
   3490         item = _pair(self->mark[index*2], self->mark[index*2+1]);
   3491         if (!item) {
   3492             Py_DECREF(regs);
   3493             return NULL;
   3494         }
   3495         PyTuple_SET_ITEM(regs, index, item);
   3496     }
   3497 
   3498     Py_INCREF(regs);
   3499     self->regs = regs;
   3500 
   3501     return regs;
   3502 }
   3503 
   3504 static PyObject*
   3505 match_copy(MatchObject* self, PyObject *unused)
   3506 {
   3507 #ifdef USE_BUILTIN_COPY
   3508     MatchObject* copy;
   3509     Py_ssize_t slots, offset;
   3510 
   3511     slots = 2 * (self->pattern->groups+1);
   3512 
   3513     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
   3514     if (!copy)
   3515         return NULL;
   3516 
   3517     /* this value a constant, but any compiler should be able to
   3518        figure that out all by itself */
   3519     offset = offsetof(MatchObject, string);
   3520 
   3521     Py_XINCREF(self->pattern);
   3522     Py_XINCREF(self->string);
   3523     Py_XINCREF(self->regs);
   3524 
   3525     memcpy((char*) copy + offset, (char*) self + offset,
   3526            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
   3527 
   3528     return (PyObject*) copy;
   3529 #else
   3530     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
   3531     return NULL;
   3532 #endif
   3533 }
   3534 
   3535 static PyObject*
   3536 match_deepcopy(MatchObject* self, PyObject* memo)
   3537 {
   3538 #ifdef USE_BUILTIN_COPY
   3539     MatchObject* copy;
   3540 
   3541     copy = (MatchObject*) match_copy(self);
   3542     if (!copy)
   3543         return NULL;
   3544 
   3545     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
   3546         !deepcopy(&copy->string, memo) ||
   3547         !deepcopy(&copy->regs, memo)) {
   3548         Py_DECREF(copy);
   3549         return NULL;
   3550     }
   3551 
   3552 #else
   3553     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
   3554     return NULL;
   3555 #endif
   3556 }
   3557 
   3558 static struct PyMethodDef match_methods[] = {
   3559     {"group", (PyCFunction) match_group, METH_VARARGS},
   3560     {"start", (PyCFunction) match_start, METH_VARARGS},
   3561     {"end", (PyCFunction) match_end, METH_VARARGS},
   3562     {"span", (PyCFunction) match_span, METH_VARARGS},
   3563     {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
   3564     {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
   3565     {"expand", (PyCFunction) match_expand, METH_O},
   3566     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
   3567     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
   3568     {NULL, NULL}
   3569 };
   3570 
   3571 static PyObject *
   3572 match_lastindex_get(MatchObject *self)
   3573 {
   3574     if (self->lastindex >= 0)
   3575   return Py_BuildValue("i", self->lastindex);
   3576     Py_INCREF(Py_None);
   3577     return Py_None;
   3578 }
   3579 
   3580 static PyObject *
   3581 match_lastgroup_get(MatchObject *self)
   3582 {
   3583     if (self->pattern->indexgroup && self->lastindex >= 0) {
   3584         PyObject* result = PySequence_GetItem(
   3585             self->pattern->indexgroup, self->lastindex
   3586             );
   3587         if (result)
   3588             return result;
   3589         PyErr_Clear();
   3590     }
   3591     Py_INCREF(Py_None);
   3592     return Py_None;
   3593 }
   3594 
   3595 static PyObject *
   3596 match_regs_get(MatchObject *self)
   3597 {
   3598     if (self->regs) {
   3599         Py_INCREF(self->regs);
   3600         return self->regs;
   3601     } else
   3602         return match_regs(self);
   3603 }
   3604 
   3605 static PyGetSetDef match_getset[] = {
   3606     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
   3607     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
   3608     {"regs",      (getter)match_regs_get,      (setter)NULL},
   3609     {NULL}
   3610 };
   3611 
   3612 #define MATCH_OFF(x) offsetof(MatchObject, x)
   3613 static PyMemberDef match_members[] = {
   3614     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
   3615     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
   3616     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
   3617     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
   3618     {NULL}
   3619 };
   3620 
   3621 
   3622 /* FIXME: implement setattr("string", None) as a special case (to
   3623    detach the associated string, if any */
   3624 
   3625 static PyTypeObject Match_Type = {
   3626     PyVarObject_HEAD_INIT(NULL, 0)
   3627     "_" SRE_MODULE ".SRE_Match",
   3628     sizeof(MatchObject), sizeof(Py_ssize_t),
   3629     (destructor)match_dealloc,  /* tp_dealloc */
   3630     0,                          /* tp_print */
   3631     0,                          /* tp_getattr */
   3632     0,                          /* tp_setattr */
   3633     0,                          /* tp_compare */
   3634     0,                          /* tp_repr */
   3635     0,                          /* tp_as_number */
   3636     0,                          /* tp_as_sequence */
   3637     0,                          /* tp_as_mapping */
   3638     0,                          /* tp_hash */
   3639     0,                          /* tp_call */
   3640     0,                          /* tp_str */
   3641     0,                          /* tp_getattro */
   3642     0,                          /* tp_setattro */
   3643     0,                          /* tp_as_buffer */
   3644     Py_TPFLAGS_DEFAULT,
   3645     0,                          /* tp_doc */
   3646     0,                          /* tp_traverse */
   3647     0,                          /* tp_clear */
   3648     0,                          /* tp_richcompare */
   3649     0,                          /* tp_weaklistoffset */
   3650     0,                          /* tp_iter */
   3651     0,                          /* tp_iternext */
   3652     match_methods,    /* tp_methods */
   3653     match_members,    /* tp_members */
   3654     match_getset,           /* tp_getset */
   3655 };
   3656 
   3657 static PyObject*
   3658 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
   3659 {
   3660     /* create match object (from state object) */
   3661 
   3662     MatchObject* match;
   3663     Py_ssize_t i, j;
   3664     char* base;
   3665     int n;
   3666 
   3667     if (status > 0) {
   3668 
   3669         /* create match object (with room for extra group marks) */
   3670         /* coverity[ampersand_in_size] */
   3671         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
   3672                                  2*(pattern->groups+1));
   3673         if (!match)
   3674             return NULL;
   3675 
   3676         Py_INCREF(pattern);
   3677         match->pattern = pattern;
   3678 
   3679         Py_INCREF(state->string);
   3680         match->string = state->string;
   3681 
   3682         match->regs = NULL;
   3683         match->groups = pattern->groups+1;
   3684 
   3685         /* fill in group slices */
   3686 
   3687         base = (char*) state->beginning;
   3688         n = state->charsize;
   3689 
   3690         match->mark[0] = ((char*) state->start - base) / n;
   3691         match->mark[1] = ((char*) state->ptr - base) / n;
   3692 
   3693         for (i = j = 0; i < pattern->groups; i++, j+=2)
   3694             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
   3695                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
   3696                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
   3697             } else
   3698                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
   3699 
   3700         match->pos = state->pos;
   3701         match->endpos = state->endpos;
   3702 
   3703         match->lastindex = state->lastindex;
   3704 
   3705         return (PyObject*) match;
   3706 
   3707     } else if (status == 0) {
   3708 
   3709         /* no match */
   3710         Py_INCREF(Py_None);
   3711         return Py_None;
   3712 
   3713     }
   3714 
   3715     /* internal error */
   3716     pattern_error(status);
   3717     return NULL;
   3718 }
   3719 
   3720 
   3721 /* -------------------------------------------------------------------- */
   3722 /* scanner methods (experimental) */
   3723 
   3724 static void
   3725 scanner_dealloc(ScannerObject* self)
   3726 {
   3727     state_fini(&self->state);
   3728     Py_XDECREF(self->pattern);
   3729     PyObject_DEL(self);
   3730 }
   3731 
   3732 static PyObject*
   3733 scanner_match(ScannerObject* self, PyObject *unused)
   3734 {
   3735     SRE_STATE* state = &self->state;
   3736     PyObject* match;
   3737     int status;
   3738 
   3739     state_reset(state);
   3740 
   3741     state->ptr = state->start;
   3742 
   3743     if (state->charsize == 1) {
   3744         status = sre_match(state, PatternObject_GetCode(self->pattern));
   3745     } else {
   3746 #if defined(HAVE_UNICODE)
   3747         status = sre_umatch(state, PatternObject_GetCode(self->pattern));
   3748 #endif
   3749     }
   3750     if (PyErr_Occurred())
   3751         return NULL;
   3752 
   3753     match = pattern_new_match((PatternObject*) self->pattern,
   3754                                state, status);
   3755 
   3756     if (status == 0 || state->ptr == state->start)
   3757         state->start = (void*) ((char*) state->ptr + state->charsize);
   3758     else
   3759         state->start = state->ptr;
   3760 
   3761     return match;
   3762 }
   3763 
   3764 
   3765 static PyObject*
   3766 scanner_search(ScannerObject* self, PyObject *unused)
   3767 {
   3768     SRE_STATE* state = &self->state;
   3769     PyObject* match;
   3770     int status;
   3771 
   3772     state_reset(state);
   3773 
   3774     state->ptr = state->start;
   3775 
   3776     if (state->charsize == 1) {
   3777         status = sre_search(state, PatternObject_GetCode(self->pattern));
   3778     } else {
   3779 #if defined(HAVE_UNICODE)
   3780         status = sre_usearch(state, PatternObject_GetCode(self->pattern));
   3781 #endif
   3782     }
   3783     if (PyErr_Occurred())
   3784         return NULL;
   3785 
   3786     match = pattern_new_match((PatternObject*) self->pattern,
   3787                                state, status);
   3788 
   3789     if (status == 0 || state->ptr == state->start)
   3790         state->start = (void*) ((char*) state->ptr + state->charsize);
   3791     else
   3792         state->start = state->ptr;
   3793 
   3794     return match;
   3795 }
   3796 
   3797 static PyMethodDef scanner_methods[] = {
   3798     {"match", (PyCFunction) scanner_match, METH_NOARGS},
   3799     {"search", (PyCFunction) scanner_search, METH_NOARGS},
   3800     {NULL, NULL}
   3801 };
   3802 
   3803 #define SCAN_OFF(x) offsetof(ScannerObject, x)
   3804 static PyMemberDef scanner_members[] = {
   3805     {"pattern", T_OBJECT, SCAN_OFF(pattern),  READONLY},
   3806     {NULL}  /* Sentinel */
   3807 };
   3808 
   3809 statichere PyTypeObject Scanner_Type = {
   3810     PyObject_HEAD_INIT(NULL)
   3811     0, "_" SRE_MODULE ".SRE_Scanner",
   3812     sizeof(ScannerObject), 0,
   3813     (destructor)scanner_dealloc, /*tp_dealloc*/
   3814     0,        /* tp_print */
   3815     0,        /* tp_getattr */
   3816     0,        /* tp_setattr */
   3817     0,        /* tp_reserved */
   3818     0,        /* tp_repr */
   3819     0,        /* tp_as_number */
   3820     0,        /* tp_as_sequence */
   3821     0,        /* tp_as_mapping */
   3822     0,        /* tp_hash */
   3823     0,        /* tp_call */
   3824     0,        /* tp_str */
   3825     0,        /* tp_getattro */
   3826     0,        /* tp_setattro */
   3827     0,        /* tp_as_buffer */
   3828     Py_TPFLAGS_DEFAULT,   /* tp_flags */
   3829     0,        /* tp_doc */
   3830     0,        /* tp_traverse */
   3831     0,        /* tp_clear */
   3832     0,        /* tp_richcompare */
   3833     0,        /* tp_weaklistoffset */
   3834     0,        /* tp_iter */
   3835     0,        /* tp_iternext */
   3836     scanner_methods,    /* tp_methods */
   3837     scanner_members,    /* tp_members */
   3838     0,        /* tp_getset */
   3839 };
   3840 
   3841 static PyObject*
   3842 pattern_scanner(PatternObject* pattern, PyObject* args)
   3843 {
   3844     /* create search state object */
   3845 
   3846     ScannerObject* self;
   3847 
   3848     PyObject* string;
   3849     Py_ssize_t start = 0;
   3850     Py_ssize_t end = PY_SSIZE_T_MAX;
   3851     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
   3852         return NULL;
   3853 
   3854     /* create scanner object */
   3855     self = PyObject_NEW(ScannerObject, &Scanner_Type);
   3856     if (!self)
   3857         return NULL;
   3858     self->pattern = NULL;
   3859 
   3860     string = state_init(&self->state, pattern, string, start, end);
   3861     if (!string) {
   3862         Py_DECREF(self);
   3863         return NULL;
   3864     }
   3865 
   3866     Py_INCREF(pattern);
   3867     self->pattern = (PyObject*) pattern;
   3868 
   3869     return (PyObject*) self;
   3870 }
   3871 
   3872 static PyMethodDef _functions[] = {
   3873     {"compile", _compile, METH_VARARGS},
   3874     {"getcodesize", sre_codesize, METH_NOARGS},
   3875     {"getlower", sre_getlower, METH_VARARGS},
   3876     {NULL, NULL}
   3877 };
   3878 
   3879 #if PY_VERSION_HEX < 0x02030000
   3880 DL_EXPORT(void) init_sre(void)
   3881 #else
   3882 PyMODINIT_FUNC init_sre(void)
   3883 #endif
   3884 {
   3885     PyObject* m;
   3886     PyObject* d;
   3887     PyObject* x;
   3888 
   3889     /* Patch object types */
   3890     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
   3891         PyType_Ready(&Scanner_Type))
   3892         return;
   3893 
   3894     m = Py_InitModule("_" SRE_MODULE, _functions);
   3895     if (m == NULL)
   3896         return;
   3897     d = PyModule_GetDict(m);
   3898 
   3899     x = PyInt_FromLong(SRE_MAGIC);
   3900     if (x) {
   3901         PyDict_SetItemString(d, "MAGIC", x);
   3902         Py_DECREF(x);
   3903     }
   3904 
   3905     x = PyInt_FromLong(sizeof(SRE_CODE));
   3906     if (x) {
   3907         PyDict_SetItemString(d, "CODESIZE", x);
   3908         Py_DECREF(x);
   3909     }
   3910 
   3911     x = PyString_FromString(copyright);
   3912     if (x) {
   3913         PyDict_SetItemString(d, "copyright", x);
   3914         Py_DECREF(x);
   3915     }
   3916 }
   3917 
   3918 #endif /* !defined(SRE_RECURSIVE) */
   3919 
   3920 /* vim:ts=4:sw=4:et
   3921 */
   3922