Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5 
      6   Copyright (c) 2015, Daryl McDaniel. All rights reserved.<BR>
      7   Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>
      8   This program and the accompanying materials are licensed and made available under
      9   the terms and conditions of the BSD License that accompanies this distribution.
     10   The full text of the license may be found at
     11   http://opensource.org/licenses/bsd-license.
     12 
     13   THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     14   WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     15  *
     16  * partial history:
     17  * 1999-10-24 fl  created (based on existing template matcher code)
     18  * 2000-03-06 fl  first alpha, sort of
     19  * 2000-08-01 fl  fixes for 1.6b1
     20  * 2000-08-07 fl  use PyOS_CheckStack() if available
     21  * 2000-09-20 fl  added expand method
     22  * 2001-03-20 fl  lots of fixes for 2.1b2
     23  * 2001-04-15 fl  export copyright as Python attribute, not global
     24  * 2001-04-28 fl  added __copy__ methods (work in progress)
     25  * 2001-05-14 fl  fixes for 1.5.2 compatibility
     26  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
     27  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
     28  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
     29  * 2001-10-21 fl  added sub/subn primitive
     30  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
     31  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
     32  * 2002-11-09 fl  fixed empty sub/subn return type
     33  * 2003-04-18 mvl fully support 4-byte codes
     34  * 2003-10-17 gn  implemented non recursive scheme
     35  *
     36  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
     37  *
     38  * This version of the SRE library can be redistributed under CNRI's
     39  * Python 1.6 license.  For any other use, please contact Secret Labs
     40  * AB (info (at) pythonware.com).
     41  *
     42  * Portions of this engine have been developed in cooperation with
     43  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
     44  * other compatibility work.
     45  */
     46 
     47 /* Get rid of these macros to prevent collisions between EFI and Python in this file. */
     48 #undef  RETURN_ERROR
     49 #undef  RETURN_SUCCESS
     50 
     51 #ifndef SRE_RECURSIVE
     52 
     53 static char copyright[] =
     54     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
     55 
     56 #define PY_SSIZE_T_CLEAN
     57 
     58 #include "Python.h"
     59 #include "structmember.h" /* offsetof */
     60 
     61 #include "sre.h"
     62 
     63 #include <ctype.h>
     64 
     65 /* name of this module, minus the leading underscore */
     66 #if !defined(SRE_MODULE)
     67 #define SRE_MODULE "sre"
     68 #endif
     69 
     70 #define SRE_PY_MODULE "re"
     71 
     72 /* defining this one enables tracing */
     73 #undef VERBOSE
     74 
     75 #if PY_VERSION_HEX >= 0x01060000
     76 #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
     77 /* defining this enables unicode support (default under 1.6a1 and later) */
     78 #define HAVE_UNICODE
     79 #endif
     80 #endif
     81 
     82 /* -------------------------------------------------------------------- */
     83 /* optional features */
     84 
     85 /* enables fast searching */
     86 #define USE_FAST_SEARCH
     87 
     88 /* enables aggressive inlining (always on for Visual C) */
     89 #undef USE_INLINE
     90 
     91 /* enables copy/deepcopy handling (work in progress) */
     92 #undef USE_BUILTIN_COPY
     93 
     94 #if PY_VERSION_HEX < 0x01060000
     95 #define PyObject_DEL(op) PyMem_DEL((op))
     96 #endif
     97 
     98 /* -------------------------------------------------------------------- */
     99 
    100 #if defined(_MSC_VER)
    101 #pragma optimize("gt", on) /* doesn't seem to make much difference... */
    102 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
    103 /* fastest possible local call under MSVC */
    104 #define LOCAL(type) static __inline type __fastcall
    105 #elif defined(USE_INLINE)
    106 #define LOCAL(type) static inline type
    107 #else
    108 #define LOCAL(type) static type
    109 #endif
    110 
    111 /* error codes */
    112 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
    113 #define SRE_ERROR_STATE -2 /* illegal state */
    114 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
    115 #define SRE_ERROR_MEMORY -9 /* out of memory */
    116 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
    117 
    118 #if defined(VERBOSE)
    119 #define TRACE(v) printf v
    120 #else
    121 #define TRACE(v)
    122 #endif
    123 
    124 /* -------------------------------------------------------------------- */
    125 /* search engine state */
    126 
    127 /* default character predicates (run sre_chars.py to regenerate tables) */
    128 
    129 #define SRE_DIGIT_MASK 1
    130 #define SRE_SPACE_MASK 2
    131 #define SRE_LINEBREAK_MASK 4
    132 #define SRE_ALNUM_MASK 8
    133 #define SRE_WORD_MASK 16
    134 
    135 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
    136 
    137 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
    138 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
    139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
    140 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    141 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
    142 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
    143 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
    144 
    145 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
    146 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
    147 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
    148 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
    149 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
    150 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
    151 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
    152 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
    153 120, 121, 122, 123, 124, 125, 126, 127 };
    154 
    155 #define SRE_IS_DIGIT(ch)\
    156     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
    157 #define SRE_IS_SPACE(ch)\
    158     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
    159 #define SRE_IS_LINEBREAK(ch)\
    160     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
    161 #define SRE_IS_ALNUM(ch)\
    162     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
    163 #define SRE_IS_WORD(ch)\
    164     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
    165 
    166 static unsigned int sre_lower(unsigned int ch)
    167 {
    168     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
    169 }
    170 
    171 /* locale-specific character predicates */
    172 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    173  * warnings when c's type supports only numbers < N+1 */
    174 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
    175 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
    176 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
    177 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
    178 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
    179 
    180 static unsigned int sre_lower_locale(unsigned int ch)
    181 {
    182     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
    183 }
    184 
    185 /* unicode-specific character predicates */
    186 
    187 #if defined(HAVE_UNICODE)
    188 
    189 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
    190 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
    191 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
    192 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
    193 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
    194 
    195 static unsigned int sre_lower_unicode(unsigned int ch)
    196 {
    197     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
    198 }
    199 
    200 #endif
    201 
    202 LOCAL(int)
    203 sre_category(SRE_CODE category, unsigned int ch)
    204 {
    205     switch (category) {
    206 
    207     case SRE_CATEGORY_DIGIT:
    208         return SRE_IS_DIGIT(ch);
    209     case SRE_CATEGORY_NOT_DIGIT:
    210         return !SRE_IS_DIGIT(ch);
    211     case SRE_CATEGORY_SPACE:
    212         return SRE_IS_SPACE(ch);
    213     case SRE_CATEGORY_NOT_SPACE:
    214         return !SRE_IS_SPACE(ch);
    215     case SRE_CATEGORY_WORD:
    216         return SRE_IS_WORD(ch);
    217     case SRE_CATEGORY_NOT_WORD:
    218         return !SRE_IS_WORD(ch);
    219     case SRE_CATEGORY_LINEBREAK:
    220         return SRE_IS_LINEBREAK(ch);
    221     case SRE_CATEGORY_NOT_LINEBREAK:
    222         return !SRE_IS_LINEBREAK(ch);
    223 
    224     case SRE_CATEGORY_LOC_WORD:
    225         return SRE_LOC_IS_WORD(ch);
    226     case SRE_CATEGORY_LOC_NOT_WORD:
    227         return !SRE_LOC_IS_WORD(ch);
    228 
    229 #if defined(HAVE_UNICODE)
    230     case SRE_CATEGORY_UNI_DIGIT:
    231         return SRE_UNI_IS_DIGIT(ch);
    232     case SRE_CATEGORY_UNI_NOT_DIGIT:
    233         return !SRE_UNI_IS_DIGIT(ch);
    234     case SRE_CATEGORY_UNI_SPACE:
    235         return SRE_UNI_IS_SPACE(ch);
    236     case SRE_CATEGORY_UNI_NOT_SPACE:
    237         return !SRE_UNI_IS_SPACE(ch);
    238     case SRE_CATEGORY_UNI_WORD:
    239         return SRE_UNI_IS_WORD(ch);
    240     case SRE_CATEGORY_UNI_NOT_WORD:
    241         return !SRE_UNI_IS_WORD(ch);
    242     case SRE_CATEGORY_UNI_LINEBREAK:
    243         return SRE_UNI_IS_LINEBREAK(ch);
    244     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    245         return !SRE_UNI_IS_LINEBREAK(ch);
    246 #else
    247     case SRE_CATEGORY_UNI_DIGIT:
    248         return SRE_IS_DIGIT(ch);
    249     case SRE_CATEGORY_UNI_NOT_DIGIT:
    250         return !SRE_IS_DIGIT(ch);
    251     case SRE_CATEGORY_UNI_SPACE:
    252         return SRE_IS_SPACE(ch);
    253     case SRE_CATEGORY_UNI_NOT_SPACE:
    254         return !SRE_IS_SPACE(ch);
    255     case SRE_CATEGORY_UNI_WORD:
    256         return SRE_LOC_IS_WORD(ch);
    257     case SRE_CATEGORY_UNI_NOT_WORD:
    258         return !SRE_LOC_IS_WORD(ch);
    259     case SRE_CATEGORY_UNI_LINEBREAK:
    260         return SRE_IS_LINEBREAK(ch);
    261     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    262         return !SRE_IS_LINEBREAK(ch);
    263 #endif
    264     }
    265     return 0;
    266 }
    267 
    268 /* helpers */
    269 
    270 static void
    271 data_stack_dealloc(SRE_STATE* state)
    272 {
    273     if (state->data_stack) {
    274         PyMem_FREE(state->data_stack);
    275         state->data_stack = NULL;
    276     }
    277     state->data_stack_size = state->data_stack_base = 0;
    278 }
    279 
    280 static int
    281 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
    282 {
    283     Py_ssize_t minsize, cursize;
    284     minsize = state->data_stack_base+size;
    285     cursize = state->data_stack_size;
    286     if (cursize < minsize) {
    287         void* stack;
    288         cursize = minsize+minsize/4+1024;
    289         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
    290         stack = PyMem_REALLOC(state->data_stack, cursize);
    291         if (!stack) {
    292             data_stack_dealloc(state);
    293             return SRE_ERROR_MEMORY;
    294         }
    295         state->data_stack = (char *)stack;
    296         state->data_stack_size = cursize;
    297     }
    298     return 0;
    299 }
    300 
    301 /* generate 8-bit version */
    302 
    303 #define SRE_CHAR unsigned char
    304 #define SRE_AT sre_at
    305 #define SRE_COUNT sre_count
    306 #define SRE_CHARSET sre_charset
    307 #define SRE_INFO sre_info
    308 #define SRE_MATCH sre_match
    309 #define SRE_MATCH_CONTEXT sre_match_context
    310 #define SRE_SEARCH sre_search
    311 #define SRE_LITERAL_TEMPLATE sre_literal_template
    312 
    313 #if defined(HAVE_UNICODE)
    314 
    315 #define SRE_RECURSIVE
    316 #include "_sre.c"
    317 #undef SRE_RECURSIVE
    318 
    319 #undef SRE_LITERAL_TEMPLATE
    320 #undef SRE_SEARCH
    321 #undef SRE_MATCH
    322 #undef SRE_MATCH_CONTEXT
    323 #undef SRE_INFO
    324 #undef SRE_CHARSET
    325 #undef SRE_COUNT
    326 #undef SRE_AT
    327 #undef SRE_CHAR
    328 
    329 /* generate 16-bit unicode version */
    330 
    331 #define SRE_CHAR Py_UNICODE
    332 #define SRE_AT sre_uat
    333 #define SRE_COUNT sre_ucount
    334 #define SRE_CHARSET sre_ucharset
    335 #define SRE_INFO sre_uinfo
    336 #define SRE_MATCH sre_umatch
    337 #define SRE_MATCH_CONTEXT sre_umatch_context
    338 #define SRE_SEARCH sre_usearch
    339 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
    340 #endif
    341 
    342 #endif /* SRE_RECURSIVE */
    343 
    344 /* -------------------------------------------------------------------- */
    345 /* String matching engine */
    346 
    347 /* the following section is compiled twice, with different character
    348    settings */
    349 
    350 LOCAL(int)
    351 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
    352 {
    353     /* check if pointer is at given position */
    354 
    355     Py_ssize_t thisp, thatp;
    356 
    357     switch (at) {
    358 
    359     case SRE_AT_BEGINNING:
    360     case SRE_AT_BEGINNING_STRING:
    361         return ((void*) ptr == state->beginning);
    362 
    363     case SRE_AT_BEGINNING_LINE:
    364         return ((void*) ptr == state->beginning ||
    365                 SRE_IS_LINEBREAK((int) ptr[-1]));
    366 
    367     case SRE_AT_END:
    368         return (((void*) (ptr+1) == state->end &&
    369                  SRE_IS_LINEBREAK((int) ptr[0])) ||
    370                 ((void*) ptr == state->end));
    371 
    372     case SRE_AT_END_LINE:
    373         return ((void*) ptr == state->end ||
    374                 SRE_IS_LINEBREAK((int) ptr[0]));
    375 
    376     case SRE_AT_END_STRING:
    377         return ((void*) ptr == state->end);
    378 
    379     case SRE_AT_BOUNDARY:
    380         if (state->beginning == state->end)
    381             return 0;
    382         thatp = ((void*) ptr > state->beginning) ?
    383             SRE_IS_WORD((int) ptr[-1]) : 0;
    384         thisp = ((void*) ptr < state->end) ?
    385             SRE_IS_WORD((int) ptr[0]) : 0;
    386         return thisp != thatp;
    387 
    388     case SRE_AT_NON_BOUNDARY:
    389         if (state->beginning == state->end)
    390             return 0;
    391         thatp = ((void*) ptr > state->beginning) ?
    392             SRE_IS_WORD((int) ptr[-1]) : 0;
    393         thisp = ((void*) ptr < state->end) ?
    394             SRE_IS_WORD((int) ptr[0]) : 0;
    395         return thisp == thatp;
    396 
    397     case SRE_AT_LOC_BOUNDARY:
    398         if (state->beginning == state->end)
    399             return 0;
    400         thatp = ((void*) ptr > state->beginning) ?
    401             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    402         thisp = ((void*) ptr < state->end) ?
    403             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    404         return thisp != thatp;
    405 
    406     case SRE_AT_LOC_NON_BOUNDARY:
    407         if (state->beginning == state->end)
    408             return 0;
    409         thatp = ((void*) ptr > state->beginning) ?
    410             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
    411         thisp = ((void*) ptr < state->end) ?
    412             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
    413         return thisp == thatp;
    414 
    415 #if defined(HAVE_UNICODE)
    416     case SRE_AT_UNI_BOUNDARY:
    417         if (state->beginning == state->end)
    418             return 0;
    419         thatp = ((void*) ptr > state->beginning) ?
    420             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    421         thisp = ((void*) ptr < state->end) ?
    422             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    423         return thisp != thatp;
    424 
    425     case SRE_AT_UNI_NON_BOUNDARY:
    426         if (state->beginning == state->end)
    427             return 0;
    428         thatp = ((void*) ptr > state->beginning) ?
    429             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
    430         thisp = ((void*) ptr < state->end) ?
    431             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
    432         return thisp == thatp;
    433 #endif
    434 
    435     }
    436 
    437     return 0;
    438 }
    439 
    440 LOCAL(int)
    441 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
    442 {
    443     /* check if character is a member of the given set */
    444 
    445     int ok = 1;
    446 
    447     for (;;) {
    448         switch (*set++) {
    449 
    450         case SRE_OP_FAILURE:
    451             return !ok;
    452 
    453         case SRE_OP_LITERAL:
    454             /* <LITERAL> <code> */
    455             if (ch == set[0])
    456                 return ok;
    457             set++;
    458             break;
    459 
    460         case SRE_OP_CATEGORY:
    461             /* <CATEGORY> <code> */
    462             if (sre_category(set[0], (int) ch))
    463                 return ok;
    464             set += 1;
    465             break;
    466 
    467         case SRE_OP_CHARSET:
    468             if (sizeof(SRE_CODE) == 2) {
    469                 /* <CHARSET> <bitmap> (16 bits per code word) */
    470                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
    471                     return ok;
    472                 set += 16;
    473             }
    474             else {
    475                 /* <CHARSET> <bitmap> (32 bits per code word) */
    476                 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
    477                     return ok;
    478                 set += 8;
    479             }
    480             break;
    481 
    482         case SRE_OP_RANGE:
    483             /* <RANGE> <lower> <upper> */
    484             if (set[0] <= ch && ch <= set[1])
    485                 return ok;
    486             set += 2;
    487             break;
    488 
    489         case SRE_OP_NEGATE:
    490             ok = !ok;
    491             break;
    492 
    493         case SRE_OP_BIGCHARSET:
    494             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
    495         {
    496             Py_ssize_t count, block;
    497             count = *(set++);
    498 
    499             if (sizeof(SRE_CODE) == 2) {
    500                 block = ((unsigned char*)set)[ch >> 8];
    501                 set += 128;
    502                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
    503                     return ok;
    504                 set += count*16;
    505             }
    506             else {
    507                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    508                  * warnings when c's type supports only numbers < N+1 */
    509                 if (!(ch & ~65535))
    510                     block = ((unsigned char*)set)[ch >> 8];
    511                 else
    512                     block = -1;
    513                 set += 64;
    514                 if (block >=0 &&
    515                     (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
    516                     return ok;
    517                 set += count*8;
    518             }
    519             break;
    520         }
    521 
    522         default:
    523             /* internal error -- there's not much we can do about it
    524                here, so let's just pretend it didn't match... */
    525             return 0;
    526         }
    527     }
    528 }
    529 
    530 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
    531 
    532 LOCAL(Py_ssize_t)
    533 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
    534 {
    535     SRE_CODE chr;
    536     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
    537     SRE_CHAR* end = (SRE_CHAR *)state->end;
    538     Py_ssize_t i;
    539 
    540     /* adjust end */
    541     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
    542         end = ptr + maxcount;
    543 
    544     switch (pattern[0]) {
    545 
    546     case SRE_OP_IN:
    547         /* repeated set */
    548         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
    549         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
    550             ptr++;
    551         break;
    552 
    553     case SRE_OP_ANY:
    554         /* repeated dot wildcard. */
    555         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
    556         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
    557             ptr++;
    558         break;
    559 
    560     case SRE_OP_ANY_ALL:
    561         /* repeated dot wildcard.  skip to the end of the target
    562            string, and backtrack from there */
    563         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
    564         ptr = end;
    565         break;
    566 
    567     case SRE_OP_LITERAL:
    568         /* repeated literal */
    569         chr = pattern[1];
    570         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
    571         while (ptr < end && (SRE_CODE) *ptr == chr)
    572             ptr++;
    573         break;
    574 
    575     case SRE_OP_LITERAL_IGNORE:
    576         /* repeated literal */
    577         chr = pattern[1];
    578         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
    579         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
    580             ptr++;
    581         break;
    582 
    583     case SRE_OP_NOT_LITERAL:
    584         /* repeated non-literal */
    585         chr = pattern[1];
    586         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
    587         while (ptr < end && (SRE_CODE) *ptr != chr)
    588             ptr++;
    589         break;
    590 
    591     case SRE_OP_NOT_LITERAL_IGNORE:
    592         /* repeated non-literal */
    593         chr = pattern[1];
    594         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
    595         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
    596             ptr++;
    597         break;
    598 
    599     default:
    600         /* repeated single character pattern */
    601         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
    602         while ((SRE_CHAR*) state->ptr < end) {
    603             i = SRE_MATCH(state, pattern);
    604             if (i < 0)
    605                 return i;
    606             if (!i)
    607                 break;
    608         }
    609         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    610                (SRE_CHAR*) state->ptr - ptr));
    611         return (SRE_CHAR*) state->ptr - ptr;
    612     }
    613 
    614     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    615            ptr - (SRE_CHAR*) state->ptr));
    616     return ptr - (SRE_CHAR*) state->ptr;
    617 }
    618 
    619 #if 0 /* not used in this release */
    620 LOCAL(int)
    621 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
    622 {
    623     /* check if an SRE_OP_INFO block matches at the current position.
    624        returns the number of SRE_CODE objects to skip if successful, 0
    625        if no match */
    626 
    627     SRE_CHAR* end = state->end;
    628     SRE_CHAR* ptr = state->ptr;
    629     Py_ssize_t i;
    630 
    631     /* check minimal length */
    632     if (pattern[3] && (end - ptr) < pattern[3])
    633         return 0;
    634 
    635     /* check known prefix */
    636     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
    637         /* <length> <skip> <prefix data> <overlap data> */
    638         for (i = 0; i < pattern[5]; i++)
    639             if ((SRE_CODE) ptr[i] != pattern[7 + i])
    640                 return 0;
    641         return pattern[0] + 2 * pattern[6];
    642     }
    643     return pattern[0];
    644 }
    645 #endif
    646 
    647 /* The macros below should be used to protect recursive SRE_MATCH()
    648  * calls that *failed* and do *not* return immediately (IOW, those
    649  * that will backtrack). Explaining:
    650  *
    651  * - Recursive SRE_MATCH() returned true: that's usually a success
    652  *   (besides atypical cases like ASSERT_NOT), therefore there's no
    653  *   reason to restore lastmark;
    654  *
    655  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
    656  *   is returning to the caller: If the current SRE_MATCH() is the
    657  *   top function of the recursion, returning false will be a matching
    658  *   failure, and it doesn't matter where lastmark is pointing to.
    659  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
    660  *   failure by itself, and the calling SRE_MATCH() will have to deal
    661  *   with the failure by the same rules explained here (it will restore
    662  *   lastmark by itself if necessary);
    663  *
    664  * - Recursive SRE_MATCH() returned false, and will continue the
    665  *   outside 'for' loop: must be protected when breaking, since the next
    666  *   OP could potentially depend on lastmark;
    667  *
    668  * - Recursive SRE_MATCH() returned false, and will be called again
    669  *   inside a local for/while loop: must be protected between each
    670  *   loop iteration, since the recursive SRE_MATCH() could do anything,
    671  *   and could potentially depend on lastmark.
    672  *
    673  * For more information, check the discussion at SF patch #712900.
    674  */
    675 #define LASTMARK_SAVE()     \
    676     do { \
    677         ctx->lastmark = state->lastmark; \
    678         ctx->lastindex = state->lastindex; \
    679     } while (0)
    680 #define LASTMARK_RESTORE()  \
    681     do { \
    682         state->lastmark = ctx->lastmark; \
    683         state->lastindex = ctx->lastindex; \
    684     } while (0)
    685 
    686 #define RETURN_ERROR(i) do { return i; } while(0)
    687 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
    688 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
    689 
    690 #define RETURN_ON_ERROR(i) \
    691     do { if (i < 0) RETURN_ERROR(i); } while (0)
    692 #define RETURN_ON_SUCCESS(i) \
    693     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
    694 #define RETURN_ON_FAILURE(i) \
    695     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
    696 
    697 #define SFY(x) #x
    698 
    699 #define DATA_STACK_ALLOC(state, type, ptr) \
    700 do { \
    701     alloc_pos = state->data_stack_base; \
    702     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
    703            "(%" PY_FORMAT_SIZE_T "d)\n", \
    704            SFY(type), alloc_pos, sizeof(type))); \
    705     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
    706         int j = data_stack_grow(state, sizeof(type)); \
    707         if (j < 0) return j; \
    708         if (ctx_pos != -1) \
    709             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    710     } \
    711     ptr = (type*)(state->data_stack+alloc_pos); \
    712     state->data_stack_base += sizeof(type); \
    713 } while (0)
    714 
    715 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
    716 do { \
    717     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
    718     ptr = (type*)(state->data_stack+pos); \
    719 } while (0)
    720 
    721 #define DATA_STACK_PUSH(state, data, size) \
    722 do { \
    723     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
    724            "(%" PY_FORMAT_SIZE_T "d)\n", \
    725            data, state->data_stack_base, size)); \
    726     if (size > state->data_stack_size - state->data_stack_base) { \
    727         int j = data_stack_grow(state, size); \
    728         if (j < 0) return j; \
    729         if (ctx_pos != -1) \
    730             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
    731     } \
    732     memcpy(state->data_stack+state->data_stack_base, data, size); \
    733     state->data_stack_base += size; \
    734 } while (0)
    735 
    736 #define DATA_STACK_POP(state, data, size, discard) \
    737 do { \
    738     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
    739            "(%" PY_FORMAT_SIZE_T "d)\n", \
    740            data, state->data_stack_base-size, size)); \
    741     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
    742     if (discard) \
    743         state->data_stack_base -= size; \
    744 } while (0)
    745 
    746 #define DATA_STACK_POP_DISCARD(state, size) \
    747 do { \
    748     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
    749            "(%" PY_FORMAT_SIZE_T "d)\n", \
    750            state->data_stack_base-size, size)); \
    751     state->data_stack_base -= size; \
    752 } while(0)
    753 
    754 #define DATA_PUSH(x) \
    755     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
    756 #define DATA_POP(x) \
    757     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
    758 #define DATA_POP_DISCARD(x) \
    759     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
    760 #define DATA_ALLOC(t,p) \
    761     DATA_STACK_ALLOC(state, t, p)
    762 #define DATA_LOOKUP_AT(t,p,pos) \
    763     DATA_STACK_LOOKUP_AT(state,t,p,pos)
    764 
    765 #define MARK_PUSH(lastmark) \
    766     do if (lastmark > 0) { \
    767         i = lastmark; /* ctx->lastmark may change if reallocated */ \
    768         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
    769     } while (0)
    770 #define MARK_POP(lastmark) \
    771     do if (lastmark > 0) { \
    772         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
    773     } while (0)
    774 #define MARK_POP_KEEP(lastmark) \
    775     do if (lastmark > 0) { \
    776         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
    777     } while (0)
    778 #define MARK_POP_DISCARD(lastmark) \
    779     do if (lastmark > 0) { \
    780         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
    781     } while (0)
    782 
    783 #define JUMP_NONE            0
    784 #define JUMP_MAX_UNTIL_1     1
    785 #define JUMP_MAX_UNTIL_2     2
    786 #define JUMP_MAX_UNTIL_3     3
    787 #define JUMP_MIN_UNTIL_1     4
    788 #define JUMP_MIN_UNTIL_2     5
    789 #define JUMP_MIN_UNTIL_3     6
    790 #define JUMP_REPEAT          7
    791 #define JUMP_REPEAT_ONE_1    8
    792 #define JUMP_REPEAT_ONE_2    9
    793 #define JUMP_MIN_REPEAT_ONE  10
    794 #define JUMP_BRANCH          11
    795 #define JUMP_ASSERT          12
    796 #define JUMP_ASSERT_NOT      13
    797 
    798 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
    799     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
    800     nextctx->last_ctx_pos = ctx_pos; \
    801     nextctx->jump = jumpvalue; \
    802     nextctx->pattern = nextpattern; \
    803     ctx_pos = alloc_pos; \
    804     ctx = nextctx; \
    805     goto entrance; \
    806     jumplabel: \
    807     while (0) /* gcc doesn't like labels at end of scopes */ \
    808 
    809 typedef struct {
    810     Py_ssize_t last_ctx_pos;
    811     Py_ssize_t jump;
    812     SRE_CHAR* ptr;
    813     SRE_CODE* pattern;
    814     Py_ssize_t count;
    815     Py_ssize_t lastmark;
    816     Py_ssize_t lastindex;
    817     union {
    818         SRE_CODE chr;
    819         SRE_REPEAT* rep;
    820     } u;
    821 } SRE_MATCH_CONTEXT;
    822 
    823 /* check if string matches the given pattern.  returns <0 for
    824    error, 0 for failure, and 1 for success */
    825 LOCAL(Py_ssize_t)
    826 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
    827 {
    828     SRE_CHAR* end = (SRE_CHAR *)state->end;
    829     Py_ssize_t alloc_pos, ctx_pos = -1;
    830     Py_ssize_t i, ret = 0;
    831     Py_ssize_t jump;
    832     unsigned int sigcount=0;
    833 
    834     SRE_MATCH_CONTEXT* ctx;
    835     SRE_MATCH_CONTEXT* nextctx;
    836 
    837     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
    838 
    839     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
    840     ctx->last_ctx_pos = -1;
    841     ctx->jump = JUMP_NONE;
    842     ctx->pattern = pattern;
    843     ctx_pos = alloc_pos;
    844 
    845 entrance:
    846 
    847     ctx->ptr = (SRE_CHAR *)state->ptr;
    848 
    849     if (ctx->pattern[0] == SRE_OP_INFO) {
    850         /* optimization info block */
    851         /* <INFO> <1=skip> <2=flags> <3=min> ... */
    852         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
    853             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
    854                    "need %" PY_FORMAT_SIZE_T "d)\n",
    855                    (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
    856             RETURN_FAILURE;
    857         }
    858         ctx->pattern += ctx->pattern[1] + 1;
    859     }
    860 
    861     for (;;) {
    862         ++sigcount;
    863         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
    864             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
    865 
    866         switch (*ctx->pattern++) {
    867 
    868         case SRE_OP_MARK:
    869             /* set mark */
    870             /* <MARK> <gid> */
    871             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
    872                    ctx->ptr, ctx->pattern[0]));
    873             i = ctx->pattern[0];
    874             if (i & 1)
    875                 state->lastindex = i/2 + 1;
    876             if (i > state->lastmark) {
    877                 /* state->lastmark is the highest valid index in the
    878                    state->mark array.  If it is increased by more than 1,
    879                    the intervening marks must be set to NULL to signal
    880                    that these marks have not been encountered. */
    881                 Py_ssize_t j = state->lastmark + 1;
    882                 while (j < i)
    883                     state->mark[j++] = NULL;
    884                 state->lastmark = i;
    885             }
    886             state->mark[i] = ctx->ptr;
    887             ctx->pattern++;
    888             break;
    889 
    890         case SRE_OP_LITERAL:
    891             /* match literal string */
    892             /* <LITERAL> <code> */
    893             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
    894                    ctx->ptr, *ctx->pattern));
    895             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
    896                 RETURN_FAILURE;
    897             ctx->pattern++;
    898             ctx->ptr++;
    899             break;
    900 
    901         case SRE_OP_NOT_LITERAL:
    902             /* match anything that is not literal character */
    903             /* <NOT_LITERAL> <code> */
    904             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
    905                    ctx->ptr, *ctx->pattern));
    906             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
    907                 RETURN_FAILURE;
    908             ctx->pattern++;
    909             ctx->ptr++;
    910             break;
    911 
    912         case SRE_OP_SUCCESS:
    913             /* end of pattern */
    914             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
    915             state->ptr = ctx->ptr;
    916             RETURN_SUCCESS;
    917 
    918         case SRE_OP_AT:
    919             /* match at given position */
    920             /* <AT> <code> */
    921             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
    922             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
    923                 RETURN_FAILURE;
    924             ctx->pattern++;
    925             break;
    926 
    927         case SRE_OP_CATEGORY:
    928             /* match at given category */
    929             /* <CATEGORY> <code> */
    930             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
    931                    ctx->ptr, *ctx->pattern));
    932             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
    933                 RETURN_FAILURE;
    934             ctx->pattern++;
    935             ctx->ptr++;
    936             break;
    937 
    938         case SRE_OP_ANY:
    939             /* match anything (except a newline) */
    940             /* <ANY> */
    941             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
    942             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
    943                 RETURN_FAILURE;
    944             ctx->ptr++;
    945             break;
    946 
    947         case SRE_OP_ANY_ALL:
    948             /* match anything */
    949             /* <ANY_ALL> */
    950             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
    951             if (ctx->ptr >= end)
    952                 RETURN_FAILURE;
    953             ctx->ptr++;
    954             break;
    955 
    956         case SRE_OP_IN:
    957             /* match set member (or non_member) */
    958             /* <IN> <skip> <set> */
    959             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
    960             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
    961                 RETURN_FAILURE;
    962             ctx->pattern += ctx->pattern[0];
    963             ctx->ptr++;
    964             break;
    965 
    966         case SRE_OP_LITERAL_IGNORE:
    967             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
    968                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    969             if (ctx->ptr >= end ||
    970                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
    971                 RETURN_FAILURE;
    972             ctx->pattern++;
    973             ctx->ptr++;
    974             break;
    975 
    976         case SRE_OP_NOT_LITERAL_IGNORE:
    977             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
    978                    ctx->pattern, ctx->ptr, *ctx->pattern));
    979             if (ctx->ptr >= end ||
    980                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
    981                 RETURN_FAILURE;
    982             ctx->pattern++;
    983             ctx->ptr++;
    984             break;
    985 
    986         case SRE_OP_IN_IGNORE:
    987             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
    988             if (ctx->ptr >= end
    989                 || !SRE_CHARSET(ctx->pattern+1,
    990                                 (SRE_CODE)state->lower(*ctx->ptr)))
    991                 RETURN_FAILURE;
    992             ctx->pattern += ctx->pattern[0];
    993             ctx->ptr++;
    994             break;
    995 
    996         case SRE_OP_JUMP:
    997         case SRE_OP_INFO:
    998             /* jump forward */
    999             /* <JUMP> <offset> */
   1000             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
   1001                    ctx->ptr, ctx->pattern[0]));
   1002             ctx->pattern += ctx->pattern[0];
   1003             break;
   1004 
   1005         case SRE_OP_BRANCH:
   1006             /* alternation */
   1007             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
   1008             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
   1009             LASTMARK_SAVE();
   1010             ctx->u.rep = state->repeat;
   1011             if (ctx->u.rep)
   1012                 MARK_PUSH(ctx->lastmark);
   1013             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
   1014                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
   1015                     (ctx->ptr >= end ||
   1016                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
   1017                     continue;
   1018                 if (ctx->pattern[1] == SRE_OP_IN &&
   1019                     (ctx->ptr >= end ||
   1020                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
   1021                     continue;
   1022                 state->ptr = ctx->ptr;
   1023                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
   1024                 if (ret) {
   1025                     if (ctx->u.rep)
   1026                         MARK_POP_DISCARD(ctx->lastmark);
   1027                     RETURN_ON_ERROR(ret);
   1028                     RETURN_SUCCESS;
   1029                 }
   1030                 if (ctx->u.rep)
   1031                     MARK_POP_KEEP(ctx->lastmark);
   1032                 LASTMARK_RESTORE();
   1033             }
   1034             if (ctx->u.rep)
   1035                 MARK_POP_DISCARD(ctx->lastmark);
   1036             RETURN_FAILURE;
   1037 
   1038         case SRE_OP_REPEAT_ONE:
   1039             /* match repeated sequence (maximizing regexp) */
   1040 
   1041             /* this operator only works if the repeated item is
   1042                exactly one character wide, and we're not already
   1043                collecting backtracking points.  for other cases,
   1044                use the MAX_REPEAT operator */
   1045 
   1046             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1047 
   1048             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1049                    ctx->pattern[1], ctx->pattern[2]));
   1050 
   1051             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
   1052                 RETURN_FAILURE; /* cannot match */
   1053 
   1054             state->ptr = ctx->ptr;
   1055 
   1056             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
   1057             RETURN_ON_ERROR(ret);
   1058             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1059             ctx->count = ret;
   1060             ctx->ptr += ctx->count;
   1061 
   1062             /* when we arrive here, count contains the number of
   1063                matches, and ctx->ptr points to the tail of the target
   1064                string.  check if the rest of the pattern matches,
   1065                and backtrack if not. */
   1066 
   1067             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1068                 RETURN_FAILURE;
   1069 
   1070             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1071                 /* tail is empty.  we're finished */
   1072                 state->ptr = ctx->ptr;
   1073                 RETURN_SUCCESS;
   1074             }
   1075 
   1076             LASTMARK_SAVE();
   1077 
   1078             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
   1079                 /* tail starts with a literal. skip positions where
   1080                    the rest of the pattern cannot possibly match */
   1081                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
   1082                 for (;;) {
   1083                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
   1084                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
   1085                         ctx->ptr--;
   1086                         ctx->count--;
   1087                     }
   1088                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
   1089                         break;
   1090                     state->ptr = ctx->ptr;
   1091                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
   1092                             ctx->pattern+ctx->pattern[0]);
   1093                     if (ret) {
   1094                         RETURN_ON_ERROR(ret);
   1095                         RETURN_SUCCESS;
   1096                     }
   1097 
   1098                     LASTMARK_RESTORE();
   1099 
   1100                     ctx->ptr--;
   1101                     ctx->count--;
   1102                 }
   1103 
   1104             } else {
   1105                 /* general case */
   1106                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
   1107                     state->ptr = ctx->ptr;
   1108                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
   1109                             ctx->pattern+ctx->pattern[0]);
   1110                     if (ret) {
   1111                         RETURN_ON_ERROR(ret);
   1112                         RETURN_SUCCESS;
   1113                     }
   1114                     ctx->ptr--;
   1115                     ctx->count--;
   1116                     LASTMARK_RESTORE();
   1117                 }
   1118             }
   1119             RETURN_FAILURE;
   1120 
   1121         case SRE_OP_MIN_REPEAT_ONE:
   1122             /* match repeated sequence (minimizing regexp) */
   1123 
   1124             /* this operator only works if the repeated item is
   1125                exactly one character wide, and we're not already
   1126                collecting backtracking points.  for other cases,
   1127                use the MIN_REPEAT operator */
   1128 
   1129             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
   1130 
   1131             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
   1132                    ctx->pattern[1], ctx->pattern[2]));
   1133 
   1134             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
   1135                 RETURN_FAILURE; /* cannot match */
   1136 
   1137             state->ptr = ctx->ptr;
   1138 
   1139             if (ctx->pattern[1] == 0)
   1140                 ctx->count = 0;
   1141             else {
   1142                 /* count using pattern min as the maximum */
   1143                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
   1144                 RETURN_ON_ERROR(ret);
   1145                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1146                 if (ret < (Py_ssize_t) ctx->pattern[1])
   1147                     /* didn't match minimum number of times */
   1148                     RETURN_FAILURE;
   1149                 /* advance past minimum matches of repeat */
   1150                 ctx->count = ret;
   1151                 ctx->ptr += ctx->count;
   1152             }
   1153 
   1154             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
   1155                 /* tail is empty.  we're finished */
   1156                 state->ptr = ctx->ptr;
   1157                 RETURN_SUCCESS;
   1158 
   1159             } else {
   1160                 /* general case */
   1161                 LASTMARK_SAVE();
   1162                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
   1163                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
   1164                     state->ptr = ctx->ptr;
   1165                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
   1166                             ctx->pattern+ctx->pattern[0]);
   1167                     if (ret) {
   1168                         RETURN_ON_ERROR(ret);
   1169                         RETURN_SUCCESS;
   1170                     }
   1171                     state->ptr = ctx->ptr;
   1172                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
   1173                     RETURN_ON_ERROR(ret);
   1174                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1175                     if (ret == 0)
   1176                         break;
   1177                     assert(ret == 1);
   1178                     ctx->ptr++;
   1179                     ctx->count++;
   1180                     LASTMARK_RESTORE();
   1181                 }
   1182             }
   1183             RETURN_FAILURE;
   1184 
   1185         case SRE_OP_REPEAT:
   1186             /* create repeat context.  all the hard work is done
   1187                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
   1188             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
   1189             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
   1190                    ctx->pattern[1], ctx->pattern[2]));
   1191 
   1192             /* install new repeat context */
   1193             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
   1194             if (!ctx->u.rep) {
   1195                 PyErr_NoMemory();
   1196                 RETURN_FAILURE;
   1197             }
   1198             ctx->u.rep->count = -1;
   1199             ctx->u.rep->pattern = ctx->pattern;
   1200             ctx->u.rep->prev = state->repeat;
   1201             ctx->u.rep->last_ptr = NULL;
   1202             state->repeat = ctx->u.rep;
   1203 
   1204             state->ptr = ctx->ptr;
   1205             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
   1206             state->repeat = ctx->u.rep->prev;
   1207             PyObject_FREE(ctx->u.rep);
   1208 
   1209             if (ret) {
   1210                 RETURN_ON_ERROR(ret);
   1211                 RETURN_SUCCESS;
   1212             }
   1213             RETURN_FAILURE;
   1214 
   1215         case SRE_OP_MAX_UNTIL:
   1216             /* maximizing repeat */
   1217             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
   1218 
   1219             /* FIXME: we probably need to deal with zero-width
   1220                matches in here... */
   1221 
   1222             ctx->u.rep = state->repeat;
   1223             if (!ctx->u.rep)
   1224                 RETURN_ERROR(SRE_ERROR_STATE);
   1225 
   1226             state->ptr = ctx->ptr;
   1227 
   1228             ctx->count = ctx->u.rep->count+1;
   1229 
   1230             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1231                    ctx->ptr, ctx->count));
   1232 
   1233             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1234                 /* not enough matches */
   1235                 ctx->u.rep->count = ctx->count;
   1236                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
   1237                         ctx->u.rep->pattern+3);
   1238                 if (ret) {
   1239                     RETURN_ON_ERROR(ret);
   1240                     RETURN_SUCCESS;
   1241                 }
   1242                 ctx->u.rep->count = ctx->count-1;
   1243                 state->ptr = ctx->ptr;
   1244                 RETURN_FAILURE;
   1245             }
   1246 
   1247             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
   1248                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
   1249                 state->ptr != ctx->u.rep->last_ptr) {
   1250                 /* we may have enough matches, but if we can
   1251                    match another item, do so */
   1252                 ctx->u.rep->count = ctx->count;
   1253                 LASTMARK_SAVE();
   1254                 MARK_PUSH(ctx->lastmark);
   1255                 /* zero-width match protection */
   1256                 DATA_PUSH(&ctx->u.rep->last_ptr);
   1257                 ctx->u.rep->last_ptr = state->ptr;
   1258                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
   1259                         ctx->u.rep->pattern+3);
   1260                 DATA_POP(&ctx->u.rep->last_ptr);
   1261                 if (ret) {
   1262                     MARK_POP_DISCARD(ctx->lastmark);
   1263                     RETURN_ON_ERROR(ret);
   1264                     RETURN_SUCCESS;
   1265                 }
   1266                 MARK_POP(ctx->lastmark);
   1267                 LASTMARK_RESTORE();
   1268                 ctx->u.rep->count = ctx->count-1;
   1269                 state->ptr = ctx->ptr;
   1270             }
   1271 
   1272             /* cannot match more repeated items here.  make sure the
   1273                tail matches */
   1274             state->repeat = ctx->u.rep->prev;
   1275             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
   1276             RETURN_ON_SUCCESS(ret);
   1277             state->repeat = ctx->u.rep;
   1278             state->ptr = ctx->ptr;
   1279             RETURN_FAILURE;
   1280 
   1281         case SRE_OP_MIN_UNTIL:
   1282             /* minimizing repeat */
   1283             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
   1284 
   1285             ctx->u.rep = state->repeat;
   1286             if (!ctx->u.rep)
   1287                 RETURN_ERROR(SRE_ERROR_STATE);
   1288 
   1289             state->ptr = ctx->ptr;
   1290 
   1291             ctx->count = ctx->u.rep->count+1;
   1292 
   1293             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
   1294                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
   1295 
   1296             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1297                 /* not enough matches */
   1298                 ctx->u.rep->count = ctx->count;
   1299                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
   1300                         ctx->u.rep->pattern+3);
   1301                 if (ret) {
   1302                     RETURN_ON_ERROR(ret);
   1303                     RETURN_SUCCESS;
   1304                 }
   1305                 ctx->u.rep->count = ctx->count-1;
   1306                 state->ptr = ctx->ptr;
   1307                 RETURN_FAILURE;
   1308             }
   1309 
   1310             LASTMARK_SAVE();
   1311 
   1312             /* see if the tail matches */
   1313             state->repeat = ctx->u.rep->prev;
   1314             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
   1315             if (ret) {
   1316                 RETURN_ON_ERROR(ret);
   1317                 RETURN_SUCCESS;
   1318             }
   1319 
   1320             state->repeat = ctx->u.rep;
   1321             state->ptr = ctx->ptr;
   1322 
   1323             LASTMARK_RESTORE();
   1324 
   1325             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
   1326                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
   1327                 state->ptr == ctx->u.rep->last_ptr)
   1328                 RETURN_FAILURE;
   1329 
   1330             ctx->u.rep->count = ctx->count;
   1331             /* zero-width match protection */
   1332             DATA_PUSH(&ctx->u.rep->last_ptr);
   1333             ctx->u.rep->last_ptr = state->ptr;
   1334             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
   1335                     ctx->u.rep->pattern+3);
   1336             DATA_POP(&ctx->u.rep->last_ptr);
   1337             if (ret) {
   1338                 RETURN_ON_ERROR(ret);
   1339                 RETURN_SUCCESS;
   1340             }
   1341             ctx->u.rep->count = ctx->count-1;
   1342             state->ptr = ctx->ptr;
   1343             RETURN_FAILURE;
   1344 
   1345         case SRE_OP_GROUPREF:
   1346             /* match backreference */
   1347             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
   1348                    ctx->ptr, ctx->pattern[0]));
   1349             i = ctx->pattern[0];
   1350             {
   1351                 Py_ssize_t groupref = i+i;
   1352                 if (groupref >= state->lastmark) {
   1353                     RETURN_FAILURE;
   1354                 } else {
   1355                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1356                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1357                     if (!p || !e || e < p)
   1358                         RETURN_FAILURE;
   1359                     while (p < e) {
   1360                         if (ctx->ptr >= end || *ctx->ptr != *p)
   1361                             RETURN_FAILURE;
   1362                         p++; ctx->ptr++;
   1363                     }
   1364                 }
   1365             }
   1366             ctx->pattern++;
   1367             break;
   1368 
   1369         case SRE_OP_GROUPREF_IGNORE:
   1370             /* match backreference */
   1371             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
   1372                    ctx->ptr, ctx->pattern[0]));
   1373             i = ctx->pattern[0];
   1374             {
   1375                 Py_ssize_t groupref = i+i;
   1376                 if (groupref >= state->lastmark) {
   1377                     RETURN_FAILURE;
   1378                 } else {
   1379                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1380                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1381                     if (!p || !e || e < p)
   1382                         RETURN_FAILURE;
   1383                     while (p < e) {
   1384                         if (ctx->ptr >= end ||
   1385                             state->lower(*ctx->ptr) != state->lower(*p))
   1386                             RETURN_FAILURE;
   1387                         p++; ctx->ptr++;
   1388                     }
   1389                 }
   1390             }
   1391             ctx->pattern++;
   1392             break;
   1393 
   1394         case SRE_OP_GROUPREF_EXISTS:
   1395             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
   1396                    ctx->ptr, ctx->pattern[0]));
   1397             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
   1398             i = ctx->pattern[0];
   1399             {
   1400                 Py_ssize_t groupref = i+i;
   1401                 if (groupref >= state->lastmark) {
   1402                     ctx->pattern += ctx->pattern[1];
   1403                     break;
   1404                 } else {
   1405                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1406                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1407                     if (!p || !e || e < p) {
   1408                         ctx->pattern += ctx->pattern[1];
   1409                         break;
   1410                     }
   1411                 }
   1412             }
   1413             ctx->pattern += 2;
   1414             break;
   1415 
   1416         case SRE_OP_ASSERT:
   1417             /* assert subpattern */
   1418             /* <ASSERT> <skip> <back> <pattern> */
   1419             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
   1420                    ctx->ptr, ctx->pattern[1]));
   1421             state->ptr = ctx->ptr - ctx->pattern[1];
   1422             if (state->ptr < state->beginning)
   1423                 RETURN_FAILURE;
   1424             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
   1425             RETURN_ON_FAILURE(ret);
   1426             ctx->pattern += ctx->pattern[0];
   1427             break;
   1428 
   1429         case SRE_OP_ASSERT_NOT:
   1430             /* assert not subpattern */
   1431             /* <ASSERT_NOT> <skip> <back> <pattern> */
   1432             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
   1433                    ctx->ptr, ctx->pattern[1]));
   1434             state->ptr = ctx->ptr - ctx->pattern[1];
   1435             if (state->ptr >= state->beginning) {
   1436                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
   1437                 if (ret) {
   1438                     RETURN_ON_ERROR(ret);
   1439                     RETURN_FAILURE;
   1440                 }
   1441             }
   1442             ctx->pattern += ctx->pattern[0];
   1443             break;
   1444 
   1445         case SRE_OP_FAILURE:
   1446             /* immediate failure */
   1447             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
   1448             RETURN_FAILURE;
   1449 
   1450         default:
   1451             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
   1452                    ctx->pattern[-1]));
   1453             RETURN_ERROR(SRE_ERROR_ILLEGAL);
   1454         }
   1455     }
   1456 
   1457 exit:
   1458     ctx_pos = ctx->last_ctx_pos;
   1459     jump = ctx->jump;
   1460     DATA_POP_DISCARD(ctx);
   1461     if (ctx_pos == -1)
   1462         return ret;
   1463     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
   1464 
   1465     switch (jump) {
   1466         case JUMP_MAX_UNTIL_2:
   1467             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1468             goto jump_max_until_2;
   1469         case JUMP_MAX_UNTIL_3:
   1470             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1471             goto jump_max_until_3;
   1472         case JUMP_MIN_UNTIL_2:
   1473             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1474             goto jump_min_until_2;
   1475         case JUMP_MIN_UNTIL_3:
   1476             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1477             goto jump_min_until_3;
   1478         case JUMP_BRANCH:
   1479             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
   1480             goto jump_branch;
   1481         case JUMP_MAX_UNTIL_1:
   1482             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1483             goto jump_max_until_1;
   1484         case JUMP_MIN_UNTIL_1:
   1485             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1486             goto jump_min_until_1;
   1487         case JUMP_REPEAT:
   1488             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
   1489             goto jump_repeat;
   1490         case JUMP_REPEAT_ONE_1:
   1491             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
   1492             goto jump_repeat_one_1;
   1493         case JUMP_REPEAT_ONE_2:
   1494             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
   1495             goto jump_repeat_one_2;
   1496         case JUMP_MIN_REPEAT_ONE:
   1497             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
   1498             goto jump_min_repeat_one;
   1499         case JUMP_ASSERT:
   1500             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
   1501             goto jump_assert;
   1502         case JUMP_ASSERT_NOT:
   1503             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
   1504             goto jump_assert_not;
   1505         case JUMP_NONE:
   1506             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1507                    ctx->ptr, ret));
   1508             break;
   1509     }
   1510 
   1511     return ret; /* should never get here */
   1512 }
   1513 
   1514 LOCAL(Py_ssize_t)
   1515 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
   1516 {
   1517     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
   1518     SRE_CHAR* end = (SRE_CHAR *)state->end;
   1519     Py_ssize_t status = 0;
   1520     Py_ssize_t prefix_len = 0;
   1521     Py_ssize_t prefix_skip = 0;
   1522     SRE_CODE* prefix = NULL;
   1523     SRE_CODE* charset = NULL;
   1524     SRE_CODE* overlap = NULL;
   1525     int flags = 0;
   1526 
   1527     if (pattern[0] == SRE_OP_INFO) {
   1528         /* optimization info block */
   1529         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
   1530 
   1531         flags = pattern[2];
   1532 
   1533         if (pattern[3] > 1) {
   1534             /* adjust end point (but make sure we leave at least one
   1535                character in there, so literal search will work) */
   1536             end -= pattern[3]-1;
   1537             if (end <= ptr)
   1538                 end = ptr+1;
   1539         }
   1540 
   1541         if (flags & SRE_INFO_PREFIX) {
   1542             /* pattern starts with a known prefix */
   1543             /* <length> <skip> <prefix data> <overlap data> */
   1544             prefix_len = pattern[5];
   1545             prefix_skip = pattern[6];
   1546             prefix = pattern + 7;
   1547             overlap = prefix + prefix_len - 1;
   1548         } else if (flags & SRE_INFO_CHARSET)
   1549             /* pattern starts with a character from a known set */
   1550             /* <charset> */
   1551             charset = pattern + 5;
   1552 
   1553         pattern += 1 + pattern[1];
   1554     }
   1555 
   1556     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
   1557            prefix, prefix_len, prefix_skip));
   1558     TRACE(("charset = %p\n", charset));
   1559 
   1560 #if defined(USE_FAST_SEARCH)
   1561     if (prefix_len > 1) {
   1562         /* pattern starts with a known prefix.  use the overlap
   1563            table to skip forward as fast as we possibly can */
   1564         Py_ssize_t i = 0;
   1565         end = (SRE_CHAR *)state->end;
   1566         while (ptr < end) {
   1567             for (;;) {
   1568                 if ((SRE_CODE) ptr[0] != prefix[i]) {
   1569                     if (!i)
   1570                         break;
   1571                     else
   1572                         i = overlap[i];
   1573                 } else {
   1574                     if (++i == prefix_len) {
   1575                         /* found a potential match */
   1576                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
   1577                         state->start = ptr + 1 - prefix_len;
   1578                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
   1579                         if (flags & SRE_INFO_LITERAL)
   1580                             return 1; /* we got all of it */
   1581                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
   1582                         if (status != 0)
   1583                             return status;
   1584                         /* close but no cigar -- try again */
   1585                         i = overlap[i];
   1586                     }
   1587                     break;
   1588                 }
   1589             }
   1590             ptr++;
   1591         }
   1592         return 0;
   1593     }
   1594 #endif
   1595 
   1596     if (pattern[0] == SRE_OP_LITERAL) {
   1597         /* pattern starts with a literal character.  this is used
   1598            for short prefixes, and if fast search is disabled */
   1599         SRE_CODE chr = pattern[1];
   1600         end = (SRE_CHAR *)state->end;
   1601         for (;;) {
   1602             while (ptr < end && (SRE_CODE) ptr[0] != chr)
   1603                 ptr++;
   1604             if (ptr >= end)
   1605                 return 0;
   1606             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
   1607             state->start = ptr;
   1608             state->ptr = ++ptr;
   1609             if (flags & SRE_INFO_LITERAL)
   1610                 return 1; /* we got all of it */
   1611             status = SRE_MATCH(state, pattern + 2);
   1612             if (status != 0)
   1613                 break;
   1614         }
   1615     } else if (charset) {
   1616         /* pattern starts with a character from a known set */
   1617         end = (SRE_CHAR *)state->end;
   1618         for (;;) {
   1619             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
   1620                 ptr++;
   1621             if (ptr >= end)
   1622                 return 0;
   1623             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
   1624             state->start = ptr;
   1625             state->ptr = ptr;
   1626             status = SRE_MATCH(state, pattern);
   1627             if (status != 0)
   1628                 break;
   1629             ptr++;
   1630         }
   1631     } else
   1632         /* general case */
   1633         while (ptr <= end) {
   1634             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
   1635             state->start = state->ptr = ptr++;
   1636             status = SRE_MATCH(state, pattern);
   1637             if (status != 0)
   1638                 break;
   1639         }
   1640 
   1641     return status;
   1642 }
   1643 
   1644 LOCAL(int)
   1645 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
   1646 {
   1647     /* check if given string is a literal template (i.e. no escapes) */
   1648     while (len-- > 0)
   1649         if (*ptr++ == '\\')
   1650             return 0;
   1651     return 1;
   1652 }
   1653 
   1654 #if !defined(SRE_RECURSIVE)
   1655 
   1656 /* -------------------------------------------------------------------- */
   1657 /* factories and destructors */
   1658 
   1659 /* see sre.h for object declarations */
   1660 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
   1661 static PyObject*pattern_scanner(PatternObject*, PyObject*);
   1662 
   1663 static PyObject *
   1664 sre_codesize(PyObject* self, PyObject *unused)
   1665 {
   1666     return PyInt_FromSize_t(sizeof(SRE_CODE));
   1667 }
   1668 
   1669 static PyObject *
   1670 sre_getlower(PyObject* self, PyObject* args)
   1671 {
   1672     int character, flags;
   1673     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
   1674         return NULL;
   1675     if (flags & SRE_FLAG_LOCALE)
   1676         return Py_BuildValue("i", sre_lower_locale(character));
   1677     if (flags & SRE_FLAG_UNICODE)
   1678 #if defined(HAVE_UNICODE)
   1679         return Py_BuildValue("i", sre_lower_unicode(character));
   1680 #else
   1681         return Py_BuildValue("i", sre_lower_locale(character));
   1682 #endif
   1683     return Py_BuildValue("i", sre_lower(character));
   1684 }
   1685 
   1686 LOCAL(void)
   1687 state_reset(SRE_STATE* state)
   1688 {
   1689     /* FIXME: dynamic! */
   1690     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
   1691 
   1692     state->lastmark = -1;
   1693     state->lastindex = -1;
   1694 
   1695     state->repeat = NULL;
   1696 
   1697     data_stack_dealloc(state);
   1698 }
   1699 
   1700 static void*
   1701 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
   1702 {
   1703     /* given a python object, return a data pointer, a length (in
   1704        characters), and a character size.  return NULL if the object
   1705        is not a string (or not compatible) */
   1706 
   1707     PyBufferProcs *buffer;
   1708     Py_ssize_t size, bytes;
   1709     int charsize;
   1710     void* ptr;
   1711 
   1712 #if defined(HAVE_UNICODE)
   1713     if (PyUnicode_Check(string)) {
   1714         /* unicode strings doesn't always support the buffer interface */
   1715         ptr = (void*) PyUnicode_AS_DATA(string);
   1716         /* bytes = PyUnicode_GET_DATA_SIZE(string); */
   1717         size = PyUnicode_GET_SIZE(string);
   1718         charsize = sizeof(Py_UNICODE);
   1719 
   1720     } else {
   1721 #endif
   1722 
   1723     /* get pointer to string buffer */
   1724     buffer = Py_TYPE(string)->tp_as_buffer;
   1725     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
   1726         buffer->bf_getsegcount(string, NULL) != 1) {
   1727         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
   1728         return NULL;
   1729     }
   1730 
   1731     /* determine buffer size */
   1732     bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
   1733     if (bytes < 0) {
   1734         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
   1735         return NULL;
   1736     }
   1737 
   1738     /* determine character size */
   1739 #if PY_VERSION_HEX >= 0x01060000
   1740     size = PyObject_Size(string);
   1741 #else
   1742     size = PyObject_Length(string);
   1743 #endif
   1744 
   1745     if (PyString_Check(string) || bytes == size)
   1746         charsize = 1;
   1747 #if defined(HAVE_UNICODE)
   1748     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
   1749         charsize = sizeof(Py_UNICODE);
   1750 #endif
   1751     else {
   1752         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
   1753         return NULL;
   1754     }
   1755 
   1756 #if defined(HAVE_UNICODE)
   1757     }
   1758 #endif
   1759 
   1760     *p_length = size;
   1761     *p_charsize = charsize;
   1762 
   1763     return ptr;
   1764 }
   1765 
   1766 LOCAL(PyObject*)
   1767 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
   1768            Py_ssize_t start, Py_ssize_t end)
   1769 {
   1770     /* prepare state object */
   1771 
   1772     Py_ssize_t length;
   1773     int charsize;
   1774     void* ptr;
   1775 
   1776     memset(state, 0, sizeof(SRE_STATE));
   1777 
   1778     state->lastmark = -1;
   1779     state->lastindex = -1;
   1780 
   1781     ptr = getstring(string, &length, &charsize);
   1782     if (!ptr)
   1783         return NULL;
   1784 
   1785     /* adjust boundaries */
   1786     if (start < 0)
   1787         start = 0;
   1788     else if (start > length)
   1789         start = length;
   1790 
   1791     if (end < 0)
   1792         end = 0;
   1793     else if (end > length)
   1794         end = length;
   1795 
   1796     state->charsize = charsize;
   1797 
   1798     state->beginning = ptr;
   1799 
   1800     state->start = (void*) ((char*) ptr + start * state->charsize);
   1801     state->end = (void*) ((char*) ptr + end * state->charsize);
   1802 
   1803     Py_INCREF(string);
   1804     state->string = string;
   1805     state->pos = start;
   1806     state->endpos = end;
   1807 
   1808     if (pattern->flags & SRE_FLAG_LOCALE)
   1809         state->lower = sre_lower_locale;
   1810     else if (pattern->flags & SRE_FLAG_UNICODE)
   1811 #if defined(HAVE_UNICODE)
   1812         state->lower = sre_lower_unicode;
   1813 #else
   1814         state->lower = sre_lower_locale;
   1815 #endif
   1816     else
   1817         state->lower = sre_lower;
   1818 
   1819     return string;
   1820 }
   1821 
   1822 LOCAL(void)
   1823 state_fini(SRE_STATE* state)
   1824 {
   1825     Py_XDECREF(state->string);
   1826     data_stack_dealloc(state);
   1827 }
   1828 
   1829 /* calculate offset from start of string */
   1830 #define STATE_OFFSET(state, member)\
   1831     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
   1832 
   1833 LOCAL(PyObject*)
   1834 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
   1835 {
   1836     Py_ssize_t i, j;
   1837 
   1838     index = (index - 1) * 2;
   1839 
   1840     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
   1841         if (empty)
   1842             /* want empty string */
   1843             i = j = 0;
   1844         else {
   1845             Py_INCREF(Py_None);
   1846             return Py_None;
   1847         }
   1848     } else {
   1849         i = STATE_OFFSET(state, state->mark[index]);
   1850         j = STATE_OFFSET(state, state->mark[index+1]);
   1851     }
   1852 
   1853     return PySequence_GetSlice(string, i, j);
   1854 }
   1855 
   1856 static void
   1857 pattern_error(int status)
   1858 {
   1859     switch (status) {
   1860     case SRE_ERROR_RECURSION_LIMIT:
   1861         PyErr_SetString(
   1862             PyExc_RuntimeError,
   1863             "maximum recursion limit exceeded"
   1864             );
   1865         break;
   1866     case SRE_ERROR_MEMORY:
   1867         PyErr_NoMemory();
   1868         break;
   1869     case SRE_ERROR_INTERRUPTED:
   1870     /* An exception has already been raised, so let it fly */
   1871         break;
   1872     default:
   1873         /* other error codes indicate compiler/engine bugs */
   1874         PyErr_SetString(
   1875             PyExc_RuntimeError,
   1876             "internal error in regular expression engine"
   1877             );
   1878     }
   1879 }
   1880 
   1881 static void
   1882 pattern_dealloc(PatternObject* self)
   1883 {
   1884     if (self->weakreflist != NULL)
   1885         PyObject_ClearWeakRefs((PyObject *) self);
   1886     Py_XDECREF(self->pattern);
   1887     Py_XDECREF(self->groupindex);
   1888     Py_XDECREF(self->indexgroup);
   1889     PyObject_DEL(self);
   1890 }
   1891 
   1892 static int
   1893 check_args_size(const char *name, PyObject* args, PyObject* kw, int n)
   1894 {
   1895     Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0);
   1896     if (m <= n)
   1897         return 1;
   1898     PyErr_Format(PyExc_TypeError,
   1899                  "%s() takes at most %d positional arguments (%zd given)",
   1900                  name, n, m);
   1901     return 0;
   1902 }
   1903 
   1904 static PyObject*
   1905 fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
   1906 {
   1907     if (string2 != NULL) {
   1908         char buf[100];
   1909         if (string != NULL) {
   1910             PyErr_Format(PyExc_TypeError,
   1911                          "Argument given by name ('%s') and position (1)",
   1912                          oldname);
   1913             return NULL;
   1914         }
   1915         sprintf(buf, "The '%s' keyword parameter name is deprecated.  "
   1916                      "Use 'string' instead.", oldname);
   1917         if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
   1918             return NULL;
   1919         return string2;
   1920     }
   1921     if (string == NULL) {
   1922         PyErr_SetString(PyExc_TypeError,
   1923                         "Required argument 'string' (pos 1) not found");
   1924         return NULL;
   1925     }
   1926     return string;
   1927 }
   1928 
   1929 static PyObject*
   1930 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
   1931 {
   1932     SRE_STATE state;
   1933     int status;
   1934 
   1935     PyObject *string = NULL, *string2 = NULL;
   1936     Py_ssize_t start = 0;
   1937     Py_ssize_t end = PY_SSIZE_T_MAX;
   1938     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
   1939     if (!check_args_size("match", args, kw, 3))
   1940         return NULL;
   1941 
   1942     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist,
   1943                                      &string, &start, &end, &string2))
   1944         return NULL;
   1945 
   1946     string = fix_string_param(string, string2, "pattern");
   1947     if (!string)
   1948         return NULL;
   1949 
   1950     string = state_init(&state, self, string, start, end);
   1951     if (!string)
   1952         return NULL;
   1953 
   1954     state.ptr = state.start;
   1955 
   1956     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
   1957 
   1958     if (state.charsize == 1) {
   1959         status = sre_match(&state, PatternObject_GetCode(self));
   1960     } else {
   1961 #if defined(HAVE_UNICODE)
   1962         status = sre_umatch(&state, PatternObject_GetCode(self));
   1963 #endif
   1964     }
   1965 
   1966     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   1967     if (PyErr_Occurred())
   1968         return NULL;
   1969 
   1970     state_fini(&state);
   1971 
   1972     return pattern_new_match(self, &state, status);
   1973 }
   1974 
   1975 static PyObject*
   1976 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
   1977 {
   1978     SRE_STATE state;
   1979     int status;
   1980 
   1981     PyObject *string = NULL, *string2 = NULL;
   1982     Py_ssize_t start = 0;
   1983     Py_ssize_t end = PY_SSIZE_T_MAX;
   1984     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
   1985     if (!check_args_size("search", args, kw, 3))
   1986         return NULL;
   1987 
   1988     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist,
   1989                                      &string, &start, &end, &string2))
   1990         return NULL;
   1991 
   1992     string = fix_string_param(string, string2, "pattern");
   1993     if (!string)
   1994         return NULL;
   1995 
   1996     string = state_init(&state, self, string, start, end);
   1997     if (!string)
   1998         return NULL;
   1999 
   2000     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
   2001 
   2002     if (state.charsize == 1) {
   2003         status = sre_search(&state, PatternObject_GetCode(self));
   2004     } else {
   2005 #if defined(HAVE_UNICODE)
   2006         status = sre_usearch(&state, PatternObject_GetCode(self));
   2007 #endif
   2008     }
   2009 
   2010     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
   2011 
   2012     state_fini(&state);
   2013 
   2014     if (PyErr_Occurred())
   2015         return NULL;
   2016 
   2017     return pattern_new_match(self, &state, status);
   2018 }
   2019 
   2020 static PyObject*
   2021 call(char* module, char* function, PyObject* args)
   2022 {
   2023     PyObject* name;
   2024     PyObject* mod;
   2025     PyObject* func;
   2026     PyObject* result;
   2027 
   2028     if (!args)
   2029         return NULL;
   2030     name = PyString_FromString(module);
   2031     if (!name)
   2032         return NULL;
   2033     mod = PyImport_Import(name);
   2034     Py_DECREF(name);
   2035     if (!mod)
   2036         return NULL;
   2037     func = PyObject_GetAttrString(mod, function);
   2038     Py_DECREF(mod);
   2039     if (!func)
   2040         return NULL;
   2041     result = PyObject_CallObject(func, args);
   2042     Py_DECREF(func);
   2043     Py_DECREF(args);
   2044     return result;
   2045 }
   2046 
   2047 #ifdef USE_BUILTIN_COPY
   2048 static int
   2049 deepcopy(PyObject** object, PyObject* memo)
   2050 {
   2051     PyObject* copy;
   2052 
   2053     copy = call(
   2054         "copy", "deepcopy",
   2055         PyTuple_Pack(2, *object, memo)
   2056         );
   2057     if (!copy)
   2058         return 0;
   2059 
   2060     Py_DECREF(*object);
   2061     *object = copy;
   2062 
   2063     return 1; /* success */
   2064 }
   2065 #endif
   2066 
   2067 static PyObject*
   2068 join_list(PyObject* list, PyObject* string)
   2069 {
   2070     /* join list elements */
   2071 
   2072     PyObject* joiner;
   2073 #if PY_VERSION_HEX >= 0x01060000
   2074     PyObject* function;
   2075     PyObject* args;
   2076 #endif
   2077     PyObject* result;
   2078 
   2079     joiner = PySequence_GetSlice(string, 0, 0);
   2080     if (!joiner)
   2081         return NULL;
   2082 
   2083     if (PyList_GET_SIZE(list) == 0) {
   2084         Py_DECREF(list);
   2085         return joiner;
   2086     }
   2087 
   2088 #if PY_VERSION_HEX >= 0x01060000
   2089     function = PyObject_GetAttrString(joiner, "join");
   2090     if (!function) {
   2091         Py_DECREF(joiner);
   2092         return NULL;
   2093     }
   2094     args = PyTuple_New(1);
   2095     if (!args) {
   2096         Py_DECREF(function);
   2097         Py_DECREF(joiner);
   2098         return NULL;
   2099     }
   2100     PyTuple_SET_ITEM(args, 0, list);
   2101     result = PyObject_CallObject(function, args);
   2102     Py_DECREF(args); /* also removes list */
   2103     Py_DECREF(function);
   2104 #else
   2105     result = call(
   2106         "string", "join",
   2107         PyTuple_Pack(2, list, joiner)
   2108         );
   2109 #endif
   2110     Py_DECREF(joiner);
   2111 
   2112     return result;
   2113 }
   2114 
   2115 static PyObject*
   2116 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
   2117 {
   2118     SRE_STATE state;
   2119     PyObject* list;
   2120     int status;
   2121     Py_ssize_t i, b, e;
   2122 
   2123     PyObject *string = NULL, *string2 = NULL;
   2124     Py_ssize_t start = 0;
   2125     Py_ssize_t end = PY_SSIZE_T_MAX;
   2126     static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
   2127     if (!check_args_size("findall", args, kw, 3))
   2128         return NULL;
   2129 
   2130     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist,
   2131                                      &string, &start, &end, &string2))
   2132         return NULL;
   2133 
   2134     string = fix_string_param(string, string2, "source");
   2135     if (!string)
   2136         return NULL;
   2137 
   2138     string = state_init(&state, self, string, start, end);
   2139     if (!string)
   2140         return NULL;
   2141 
   2142     list = PyList_New(0);
   2143     if (!list) {
   2144         state_fini(&state);
   2145         return NULL;
   2146     }
   2147 
   2148     while (state.start <= state.end) {
   2149 
   2150         PyObject* item;
   2151 
   2152         state_reset(&state);
   2153 
   2154         state.ptr = state.start;
   2155 
   2156         if (state.charsize == 1) {
   2157             status = sre_search(&state, PatternObject_GetCode(self));
   2158         } else {
   2159 #if defined(HAVE_UNICODE)
   2160             status = sre_usearch(&state, PatternObject_GetCode(self));
   2161 #endif
   2162         }
   2163 
   2164   if (PyErr_Occurred())
   2165       goto error;
   2166 
   2167         if (status <= 0) {
   2168             if (status == 0)
   2169                 break;
   2170             pattern_error(status);
   2171             goto error;
   2172         }
   2173 
   2174         /* don't bother to build a match object */
   2175         switch (self->groups) {
   2176         case 0:
   2177             b = STATE_OFFSET(&state, state.start);
   2178             e = STATE_OFFSET(&state, state.ptr);
   2179             item = PySequence_GetSlice(string, b, e);
   2180             if (!item)
   2181                 goto error;
   2182             break;
   2183         case 1:
   2184             item = state_getslice(&state, 1, string, 1);
   2185             if (!item)
   2186                 goto error;
   2187             break;
   2188         default:
   2189             item = PyTuple_New(self->groups);
   2190             if (!item)
   2191                 goto error;
   2192             for (i = 0; i < self->groups; i++) {
   2193                 PyObject* o = state_getslice(&state, i+1, string, 1);
   2194                 if (!o) {
   2195                     Py_DECREF(item);
   2196                     goto error;
   2197                 }
   2198                 PyTuple_SET_ITEM(item, i, o);
   2199             }
   2200             break;
   2201         }
   2202 
   2203         status = PyList_Append(list, item);
   2204         Py_DECREF(item);
   2205         if (status < 0)
   2206             goto error;
   2207 
   2208         if (state.ptr == state.start)
   2209             state.start = (void*) ((char*) state.ptr + state.charsize);
   2210         else
   2211             state.start = state.ptr;
   2212 
   2213     }
   2214 
   2215     state_fini(&state);
   2216     return list;
   2217 
   2218 error:
   2219     Py_DECREF(list);
   2220     state_fini(&state);
   2221     return NULL;
   2222 
   2223 }
   2224 
   2225 #if PY_VERSION_HEX >= 0x02020000
   2226 static PyObject*
   2227 pattern_finditer(PatternObject* pattern, PyObject* args)
   2228 {
   2229     PyObject* scanner;
   2230     PyObject* search;
   2231     PyObject* iterator;
   2232 
   2233     scanner = pattern_scanner(pattern, args);
   2234     if (!scanner)
   2235         return NULL;
   2236 
   2237     search = PyObject_GetAttrString(scanner, "search");
   2238     Py_DECREF(scanner);
   2239     if (!search)
   2240         return NULL;
   2241 
   2242     iterator = PyCallIter_New(search, Py_None);
   2243     Py_DECREF(search);
   2244 
   2245     return iterator;
   2246 }
   2247 #endif
   2248 
   2249 static PyObject*
   2250 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
   2251 {
   2252     SRE_STATE state;
   2253     PyObject* list;
   2254     PyObject* item;
   2255     int status;
   2256     Py_ssize_t n;
   2257     Py_ssize_t i;
   2258     void* last;
   2259 
   2260     PyObject *string = NULL, *string2 = NULL;
   2261     Py_ssize_t maxsplit = 0;
   2262     static char* kwlist[] = { "string", "maxsplit", "source", NULL };
   2263     if (!check_args_size("split", args, kw, 2))
   2264         return NULL;
   2265 
   2266     if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist,
   2267                                      &string, &maxsplit, &string2))
   2268         return NULL;
   2269 
   2270     string = fix_string_param(string, string2, "source");
   2271     if (!string)
   2272         return NULL;
   2273 
   2274     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2275     if (!string)
   2276         return NULL;
   2277 
   2278     list = PyList_New(0);
   2279     if (!list) {
   2280         state_fini(&state);
   2281         return NULL;
   2282     }
   2283 
   2284     n = 0;
   2285     last = state.start;
   2286 
   2287     while (!maxsplit || n < maxsplit) {
   2288 
   2289         state_reset(&state);
   2290 
   2291         state.ptr = state.start;
   2292 
   2293         if (state.charsize == 1) {
   2294             status = sre_search(&state, PatternObject_GetCode(self));
   2295         } else {
   2296 #if defined(HAVE_UNICODE)
   2297             status = sre_usearch(&state, PatternObject_GetCode(self));
   2298 #endif
   2299         }
   2300 
   2301   if (PyErr_Occurred())
   2302       goto error;
   2303 
   2304         if (status <= 0) {
   2305             if (status == 0)
   2306                 break;
   2307             pattern_error(status);
   2308             goto error;
   2309         }
   2310 
   2311         if (state.start == state.ptr) {
   2312             if (last == state.end)
   2313                 break;
   2314             /* skip one character */
   2315             state.start = (void*) ((char*) state.ptr + state.charsize);
   2316             continue;
   2317         }
   2318 
   2319         /* get segment before this match */
   2320         item = PySequence_GetSlice(
   2321             string, STATE_OFFSET(&state, last),
   2322             STATE_OFFSET(&state, state.start)
   2323             );
   2324         if (!item)
   2325             goto error;
   2326         status = PyList_Append(list, item);
   2327         Py_DECREF(item);
   2328         if (status < 0)
   2329             goto error;
   2330 
   2331         /* add groups (if any) */
   2332         for (i = 0; i < self->groups; i++) {
   2333             item = state_getslice(&state, i+1, string, 0);
   2334             if (!item)
   2335                 goto error;
   2336             status = PyList_Append(list, item);
   2337             Py_DECREF(item);
   2338             if (status < 0)
   2339                 goto error;
   2340         }
   2341 
   2342         n = n + 1;
   2343 
   2344         last = state.start = state.ptr;
   2345 
   2346     }
   2347 
   2348     /* get segment following last match (even if empty) */
   2349     item = PySequence_GetSlice(
   2350         string, STATE_OFFSET(&state, last), state.endpos
   2351         );
   2352     if (!item)
   2353         goto error;
   2354     status = PyList_Append(list, item);
   2355     Py_DECREF(item);
   2356     if (status < 0)
   2357         goto error;
   2358 
   2359     state_fini(&state);
   2360     return list;
   2361 
   2362 error:
   2363     Py_DECREF(list);
   2364     state_fini(&state);
   2365     return NULL;
   2366 
   2367 }
   2368 
   2369 static PyObject*
   2370 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
   2371              Py_ssize_t count, Py_ssize_t subn)
   2372 {
   2373     SRE_STATE state;
   2374     PyObject* list;
   2375     PyObject* item;
   2376     PyObject* filter;
   2377     PyObject* args;
   2378     PyObject* match;
   2379     void* ptr;
   2380     int status;
   2381     Py_ssize_t n;
   2382     Py_ssize_t i, b, e;
   2383     int bint;
   2384     int filter_is_callable;
   2385 
   2386     if (PyCallable_Check(ptemplate)) {
   2387         /* sub/subn takes either a function or a template */
   2388         filter = ptemplate;
   2389         Py_INCREF(filter);
   2390         filter_is_callable = 1;
   2391     } else {
   2392         /* if not callable, check if it's a literal string */
   2393         int literal;
   2394         ptr = getstring(ptemplate, &n, &bint);
   2395         b = bint;
   2396         if (ptr) {
   2397             if (b == 1) {
   2398         literal = sre_literal_template((unsigned char *)ptr, n);
   2399             } else {
   2400 #if defined(HAVE_UNICODE)
   2401         literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
   2402 #endif
   2403             }
   2404         } else {
   2405             PyErr_Clear();
   2406             literal = 0;
   2407         }
   2408         if (literal) {
   2409             filter = ptemplate;
   2410             Py_INCREF(filter);
   2411             filter_is_callable = 0;
   2412         } else {
   2413             /* not a literal; hand it over to the template compiler */
   2414             filter = call(
   2415                 SRE_PY_MODULE, "_subx",
   2416                 PyTuple_Pack(2, self, ptemplate)
   2417                 );
   2418             if (!filter)
   2419                 return NULL;
   2420             filter_is_callable = PyCallable_Check(filter);
   2421         }
   2422     }
   2423 
   2424     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
   2425     if (!string) {
   2426         Py_DECREF(filter);
   2427         return NULL;
   2428     }
   2429 
   2430     list = PyList_New(0);
   2431     if (!list) {
   2432         Py_DECREF(filter);
   2433         state_fini(&state);
   2434         return NULL;
   2435     }
   2436 
   2437     n = i = 0;
   2438 
   2439     while (!count || n < count) {
   2440 
   2441         state_reset(&state);
   2442 
   2443         state.ptr = state.start;
   2444 
   2445         if (state.charsize == 1) {
   2446             status = sre_search(&state, PatternObject_GetCode(self));
   2447         } else {
   2448 #if defined(HAVE_UNICODE)
   2449             status = sre_usearch(&state, PatternObject_GetCode(self));
   2450 #endif
   2451         }
   2452 
   2453   if (PyErr_Occurred())
   2454       goto error;
   2455 
   2456         if (status <= 0) {
   2457             if (status == 0)
   2458                 break;
   2459             pattern_error(status);
   2460             goto error;
   2461         }
   2462 
   2463         b = STATE_OFFSET(&state, state.start);
   2464         e = STATE_OFFSET(&state, state.ptr);
   2465 
   2466         if (i < b) {
   2467             /* get segment before this match */
   2468             item = PySequence_GetSlice(string, i, b);
   2469             if (!item)
   2470                 goto error;
   2471             status = PyList_Append(list, item);
   2472             Py_DECREF(item);
   2473             if (status < 0)
   2474                 goto error;
   2475 
   2476         } else if (i == b && i == e && n > 0)
   2477             /* ignore empty match on latest position */
   2478             goto next;
   2479 
   2480         if (filter_is_callable) {
   2481             /* pass match object through filter */
   2482             match = pattern_new_match(self, &state, 1);
   2483             if (!match)
   2484                 goto error;
   2485             args = PyTuple_Pack(1, match);
   2486             if (!args) {
   2487                 Py_DECREF(match);
   2488                 goto error;
   2489             }
   2490             item = PyObject_CallObject(filter, args);
   2491             Py_DECREF(args);
   2492             Py_DECREF(match);
   2493             if (!item)
   2494                 goto error;
   2495         } else {
   2496             /* filter is literal string */
   2497             item = filter;
   2498             Py_INCREF(item);
   2499         }
   2500 
   2501         /* add to list */
   2502         if (item != Py_None) {
   2503             status = PyList_Append(list, item);
   2504             Py_DECREF(item);
   2505             if (status < 0)
   2506                 goto error;
   2507         }
   2508 
   2509         i = e;
   2510         n = n + 1;
   2511 
   2512 next:
   2513         /* move on */
   2514         if (state.ptr == state.start)
   2515             state.start = (void*) ((char*) state.ptr + state.charsize);
   2516         else
   2517             state.start = state.ptr;
   2518 
   2519     }
   2520 
   2521     /* get segment following last match */
   2522     if (i < state.endpos) {
   2523         item = PySequence_GetSlice(string, i, state.endpos);
   2524         if (!item)
   2525             goto error;
   2526         status = PyList_Append(list, item);
   2527         Py_DECREF(item);
   2528         if (status < 0)
   2529             goto error;
   2530     }
   2531 
   2532     state_fini(&state);
   2533 
   2534     Py_DECREF(filter);
   2535 
   2536     /* convert list to single string (also removes list) */
   2537     item = join_list(list, string);
   2538 
   2539     if (!item)
   2540         return NULL;
   2541 
   2542     if (subn)
   2543         return Py_BuildValue("Nn", item, n);
   2544 
   2545     return item;
   2546 
   2547 error:
   2548     Py_DECREF(list);
   2549     state_fini(&state);
   2550     Py_DECREF(filter);
   2551     return NULL;
   2552 
   2553 }
   2554 
   2555 static PyObject*
   2556 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
   2557 {
   2558     PyObject* ptemplate;
   2559     PyObject* string;
   2560     Py_ssize_t count = 0;
   2561     static char* kwlist[] = { "repl", "string", "count", NULL };
   2562     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
   2563                                      &ptemplate, &string, &count))
   2564         return NULL;
   2565 
   2566     return pattern_subx(self, ptemplate, string, count, 0);
   2567 }
   2568 
   2569 static PyObject*
   2570 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
   2571 {
   2572     PyObject* ptemplate;
   2573     PyObject* string;
   2574     Py_ssize_t count = 0;
   2575     static char* kwlist[] = { "repl", "string", "count", NULL };
   2576     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
   2577                                      &ptemplate, &string, &count))
   2578         return NULL;
   2579 
   2580     return pattern_subx(self, ptemplate, string, count, 1);
   2581 }
   2582 
   2583 static PyObject*
   2584 pattern_copy(PatternObject* self, PyObject *unused)
   2585 {
   2586 #ifdef USE_BUILTIN_COPY
   2587     PatternObject* copy;
   2588     int offset;
   2589 
   2590     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
   2591     if (!copy)
   2592         return NULL;
   2593 
   2594     offset = offsetof(PatternObject, groups);
   2595 
   2596     Py_XINCREF(self->groupindex);
   2597     Py_XINCREF(self->indexgroup);
   2598     Py_XINCREF(self->pattern);
   2599 
   2600     memcpy((char*) copy + offset, (char*) self + offset,
   2601            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
   2602     copy->weakreflist = NULL;
   2603 
   2604     return (PyObject*) copy;
   2605 #else
   2606     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
   2607     return NULL;
   2608 #endif
   2609 }
   2610 
   2611 static PyObject*
   2612 pattern_deepcopy(PatternObject* self, PyObject* memo)
   2613 {
   2614 #ifdef USE_BUILTIN_COPY
   2615     PatternObject* copy;
   2616 
   2617     copy = (PatternObject*) pattern_copy(self);
   2618     if (!copy)
   2619         return NULL;
   2620 
   2621     if (!deepcopy(&copy->groupindex, memo) ||
   2622         !deepcopy(&copy->indexgroup, memo) ||
   2623         !deepcopy(&copy->pattern, memo)) {
   2624         Py_DECREF(copy);
   2625         return NULL;
   2626     }
   2627 
   2628 #else
   2629     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
   2630     return NULL;
   2631 #endif
   2632 }
   2633 
   2634 PyDoc_STRVAR(pattern_match_doc,
   2635 "match(string[, pos[, endpos]]) --> match object or None.\n\
   2636     Matches zero or more characters at the beginning of the string");
   2637 
   2638 PyDoc_STRVAR(pattern_search_doc,
   2639 "search(string[, pos[, endpos]]) --> match object or None.\n\
   2640     Scan through string looking for a match, and return a corresponding\n\
   2641     match object instance. Return None if no position in the string matches.");
   2642 
   2643 PyDoc_STRVAR(pattern_split_doc,
   2644 "split(string[, maxsplit = 0])  --> list.\n\
   2645     Split string by the occurrences of pattern.");
   2646 
   2647 PyDoc_STRVAR(pattern_findall_doc,
   2648 "findall(string[, pos[, endpos]]) --> list.\n\
   2649    Return a list of all non-overlapping matches of pattern in string.");
   2650 
   2651 PyDoc_STRVAR(pattern_finditer_doc,
   2652 "finditer(string[, pos[, endpos]]) --> iterator.\n\
   2653     Return an iterator over all non-overlapping matches for the \n\
   2654     RE pattern in string. For each match, the iterator returns a\n\
   2655     match object.");
   2656 
   2657 PyDoc_STRVAR(pattern_sub_doc,
   2658 "sub(repl, string[, count = 0]) --> newstring\n\
   2659     Return the string obtained by replacing the leftmost non-overlapping\n\
   2660     occurrences of pattern in string by the replacement repl.");
   2661 
   2662 PyDoc_STRVAR(pattern_subn_doc,
   2663 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
   2664     Return the tuple (new_string, number_of_subs_made) found by replacing\n\
   2665     the leftmost non-overlapping occurrences of pattern with the\n\
   2666     replacement repl.");
   2667 
   2668 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
   2669 
   2670 static PyMethodDef pattern_methods[] = {
   2671     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
   2672   pattern_match_doc},
   2673     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
   2674   pattern_search_doc},
   2675     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
   2676   pattern_sub_doc},
   2677     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
   2678   pattern_subn_doc},
   2679     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
   2680   pattern_split_doc},
   2681     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
   2682   pattern_findall_doc},
   2683 #if PY_VERSION_HEX >= 0x02020000
   2684     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
   2685   pattern_finditer_doc},
   2686 #endif
   2687     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
   2688     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
   2689     {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
   2690     {NULL, NULL}
   2691 };
   2692 
   2693 #define PAT_OFF(x) offsetof(PatternObject, x)
   2694 static PyMemberDef pattern_members[] = {
   2695     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
   2696     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
   2697     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
   2698     {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
   2699     {NULL}  /* Sentinel */
   2700 };
   2701 
   2702 statichere PyTypeObject Pattern_Type = {
   2703     PyObject_HEAD_INIT(NULL)
   2704     0, "_" SRE_MODULE ".SRE_Pattern",
   2705     sizeof(PatternObject), sizeof(SRE_CODE),
   2706     (destructor)pattern_dealloc, /*tp_dealloc*/
   2707     0,                                  /* tp_print */
   2708     0,                                  /* tp_getattrn */
   2709     0,          /* tp_setattr */
   2710     0,          /* tp_compare */
   2711     0,          /* tp_repr */
   2712     0,          /* tp_as_number */
   2713     0,          /* tp_as_sequence */
   2714     0,          /* tp_as_mapping */
   2715     0,          /* tp_hash */
   2716     0,          /* tp_call */
   2717     0,          /* tp_str */
   2718     0,          /* tp_getattro */
   2719     0,          /* tp_setattro */
   2720     0,          /* tp_as_buffer */
   2721     Py_TPFLAGS_DEFAULT,           /* tp_flags */
   2722     pattern_doc,      /* tp_doc */
   2723     0,          /* tp_traverse */
   2724     0,          /* tp_clear */
   2725     0,          /* tp_richcompare */
   2726     offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
   2727     0,          /* tp_iter */
   2728     0,          /* tp_iternext */
   2729     pattern_methods,      /* tp_methods */
   2730     pattern_members,      /* tp_members */
   2731 };
   2732 
   2733 static int _validate(PatternObject *self); /* Forward */
   2734 
   2735 static PyObject *
   2736 _compile(PyObject* self_, PyObject* args)
   2737 {
   2738     /* "compile" pattern descriptor to pattern object */
   2739 
   2740     PatternObject* self;
   2741     Py_ssize_t i, n;
   2742 
   2743     PyObject* pattern;
   2744     int flags = 0;
   2745     PyObject* code;
   2746     Py_ssize_t groups = 0;
   2747     PyObject* groupindex = NULL;
   2748     PyObject* indexgroup = NULL;
   2749     if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
   2750                           &PyList_Type, &code, &groups,
   2751                           &groupindex, &indexgroup))
   2752         return NULL;
   2753 
   2754     n = PyList_GET_SIZE(code);
   2755     /* coverity[ampersand_in_size] */
   2756     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
   2757     if (!self)
   2758         return NULL;
   2759     self->weakreflist = NULL;
   2760     self->pattern = NULL;
   2761     self->groupindex = NULL;
   2762     self->indexgroup = NULL;
   2763 
   2764     self->codesize = n;
   2765 
   2766     for (i = 0; i < n; i++) {
   2767         PyObject *o = PyList_GET_ITEM(code, i);
   2768         unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
   2769                                               : PyLong_AsUnsignedLong(o);
   2770         if (value == (unsigned long)-1 && PyErr_Occurred()) {
   2771             if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
   2772                 PyErr_SetString(PyExc_OverflowError,
   2773                                 "regular expression code size limit exceeded");
   2774             }
   2775             break;
   2776         }
   2777         self->code[i] = (SRE_CODE) value;
   2778         if ((unsigned long) self->code[i] != value) {
   2779             PyErr_SetString(PyExc_OverflowError,
   2780                             "regular expression code size limit exceeded");
   2781             break;
   2782         }
   2783     }
   2784 
   2785     if (PyErr_Occurred()) {
   2786         Py_DECREF(self);
   2787         return NULL;
   2788     }
   2789 
   2790     Py_INCREF(pattern);
   2791     self->pattern = pattern;
   2792 
   2793     self->flags = flags;
   2794 
   2795     self->groups = groups;
   2796 
   2797     Py_XINCREF(groupindex);
   2798     self->groupindex = groupindex;
   2799 
   2800     Py_XINCREF(indexgroup);
   2801     self->indexgroup = indexgroup;
   2802 
   2803     self->weakreflist = NULL;
   2804 
   2805     if (!_validate(self)) {
   2806         Py_DECREF(self);
   2807         return NULL;
   2808     }
   2809 
   2810     return (PyObject*) self;
   2811 }
   2812 
   2813 /* -------------------------------------------------------------------- */
   2814 /* Code validation */
   2815 
   2816 /* To learn more about this code, have a look at the _compile() function in
   2817    Lib/sre_compile.py.  The validation functions below checks the code array
   2818    for conformance with the code patterns generated there.
   2819 
   2820    The nice thing about the generated code is that it is position-independent:
   2821    all jumps are relative jumps forward.  Also, jumps don't cross each other:
   2822    the target of a later jump is always earlier than the target of an earlier
   2823    jump.  IOW, this is okay:
   2824 
   2825    J---------J-------T--------T
   2826     \         \_____/        /
   2827      \______________________/
   2828 
   2829    but this is not:
   2830 
   2831    J---------J-------T--------T
   2832     \_________\_____/        /
   2833                \____________/
   2834 
   2835    It also helps that SRE_CODE is always an unsigned type.
   2836 */
   2837 
   2838 /* Defining this one enables tracing of the validator */
   2839 #undef VVERBOSE
   2840 
   2841 /* Trace macro for the validator */
   2842 #if defined(VVERBOSE)
   2843 #define VTRACE(v) printf v
   2844 #else
   2845 #define VTRACE(v) do {} while(0)  /* do nothing */
   2846 #endif
   2847 
   2848 /* Report failure */
   2849 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
   2850 
   2851 /* Extract opcode, argument, or skip count from code array */
   2852 #define GET_OP                                          \
   2853     do {                                                \
   2854         VTRACE(("%p: ", code));                         \
   2855         if (code >= end) FAIL;                          \
   2856         op = *code++;                                   \
   2857         VTRACE(("%lu (op)\n", (unsigned long)op));      \
   2858     } while (0)
   2859 #define GET_ARG                                         \
   2860     do {                                                \
   2861         VTRACE(("%p= ", code));                         \
   2862         if (code >= end) FAIL;                          \
   2863         arg = *code++;                                  \
   2864         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
   2865     } while (0)
   2866 #define GET_SKIP_ADJ(adj)                               \
   2867     do {                                                \
   2868         VTRACE(("%p= ", code));                         \
   2869         if (code >= end) FAIL;                          \
   2870         skip = *code;                                   \
   2871         VTRACE(("%lu (skip to %p)\n",                   \
   2872                (unsigned long)skip, code+skip));        \
   2873         if (skip-adj > end-code)                        \
   2874             FAIL;                                       \
   2875         code++;                                         \
   2876     } while (0)
   2877 #define GET_SKIP GET_SKIP_ADJ(0)
   2878 
   2879 static int
   2880 _validate_charset(SRE_CODE *code, SRE_CODE *end)
   2881 {
   2882     /* Some variables are manipulated by the macros above */
   2883     SRE_CODE op;
   2884     SRE_CODE arg;
   2885     SRE_CODE offset;
   2886     int i;
   2887 
   2888     while (code < end) {
   2889         GET_OP;
   2890         switch (op) {
   2891 
   2892         case SRE_OP_NEGATE:
   2893             break;
   2894 
   2895         case SRE_OP_LITERAL:
   2896             GET_ARG;
   2897             break;
   2898 
   2899         case SRE_OP_RANGE:
   2900             GET_ARG;
   2901             GET_ARG;
   2902             break;
   2903 
   2904         case SRE_OP_CHARSET:
   2905             offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
   2906             if (offset > end-code)
   2907                 FAIL;
   2908             code += offset;
   2909             break;
   2910 
   2911         case SRE_OP_BIGCHARSET:
   2912             GET_ARG; /* Number of blocks */
   2913             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
   2914             if (offset > end-code)
   2915                 FAIL;
   2916             /* Make sure that each byte points to a valid block */
   2917             for (i = 0; i < 256; i++) {
   2918                 if (((unsigned char *)code)[i] >= arg)
   2919                     FAIL;
   2920             }
   2921             code += offset;
   2922             offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
   2923             if (offset > end-code)
   2924                 FAIL;
   2925             code += offset;
   2926             break;
   2927 
   2928         case SRE_OP_CATEGORY:
   2929             GET_ARG;
   2930             switch (arg) {
   2931             case SRE_CATEGORY_DIGIT:
   2932             case SRE_CATEGORY_NOT_DIGIT:
   2933             case SRE_CATEGORY_SPACE:
   2934             case SRE_CATEGORY_NOT_SPACE:
   2935             case SRE_CATEGORY_WORD:
   2936             case SRE_CATEGORY_NOT_WORD:
   2937             case SRE_CATEGORY_LINEBREAK:
   2938             case SRE_CATEGORY_NOT_LINEBREAK:
   2939             case SRE_CATEGORY_LOC_WORD:
   2940             case SRE_CATEGORY_LOC_NOT_WORD:
   2941             case SRE_CATEGORY_UNI_DIGIT:
   2942             case SRE_CATEGORY_UNI_NOT_DIGIT:
   2943             case SRE_CATEGORY_UNI_SPACE:
   2944             case SRE_CATEGORY_UNI_NOT_SPACE:
   2945             case SRE_CATEGORY_UNI_WORD:
   2946             case SRE_CATEGORY_UNI_NOT_WORD:
   2947             case SRE_CATEGORY_UNI_LINEBREAK:
   2948             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
   2949                 break;
   2950             default:
   2951                 FAIL;
   2952             }
   2953             break;
   2954 
   2955         default:
   2956             FAIL;
   2957 
   2958         }
   2959     }
   2960 
   2961     return 1;
   2962 }
   2963 
   2964 static int
   2965 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   2966 {
   2967     /* Some variables are manipulated by the macros above */
   2968     SRE_CODE op;
   2969     SRE_CODE arg;
   2970     SRE_CODE skip;
   2971 
   2972     VTRACE(("code=%p, end=%p\n", code, end));
   2973 
   2974     if (code > end)
   2975         FAIL;
   2976 
   2977     while (code < end) {
   2978         GET_OP;
   2979         switch (op) {
   2980 
   2981         case SRE_OP_MARK:
   2982             /* We don't check whether marks are properly nested; the
   2983                sre_match() code is robust even if they don't, and the worst
   2984                you can get is nonsensical match results. */
   2985             GET_ARG;
   2986             if (arg > 2*groups+1) {
   2987                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
   2988                 FAIL;
   2989             }
   2990             break;
   2991 
   2992         case SRE_OP_LITERAL:
   2993         case SRE_OP_NOT_LITERAL:
   2994         case SRE_OP_LITERAL_IGNORE:
   2995         case SRE_OP_NOT_LITERAL_IGNORE:
   2996             GET_ARG;
   2997             /* The arg is just a character, nothing to check */
   2998             break;
   2999 
   3000         case SRE_OP_SUCCESS:
   3001         case SRE_OP_FAILURE:
   3002             /* Nothing to check; these normally end the matching process */
   3003             break;
   3004 
   3005         case SRE_OP_AT:
   3006             GET_ARG;
   3007             switch (arg) {
   3008             case SRE_AT_BEGINNING:
   3009             case SRE_AT_BEGINNING_STRING:
   3010             case SRE_AT_BEGINNING_LINE:
   3011             case SRE_AT_END:
   3012             case SRE_AT_END_LINE:
   3013             case SRE_AT_END_STRING:
   3014             case SRE_AT_BOUNDARY:
   3015             case SRE_AT_NON_BOUNDARY:
   3016             case SRE_AT_LOC_BOUNDARY:
   3017             case SRE_AT_LOC_NON_BOUNDARY:
   3018             case SRE_AT_UNI_BOUNDARY:
   3019             case SRE_AT_UNI_NON_BOUNDARY:
   3020                 break;
   3021             default:
   3022                 FAIL;
   3023             }
   3024             break;
   3025 
   3026         case SRE_OP_ANY:
   3027         case SRE_OP_ANY_ALL:
   3028             /* These have no operands */
   3029             break;
   3030 
   3031         case SRE_OP_IN:
   3032         case SRE_OP_IN_IGNORE:
   3033             GET_SKIP;
   3034             /* Stop 1 before the end; we check the FAILURE below */
   3035             if (!_validate_charset(code, code+skip-2))
   3036                 FAIL;
   3037             if (code[skip-2] != SRE_OP_FAILURE)
   3038                 FAIL;
   3039             code += skip-1;
   3040             break;
   3041 
   3042         case SRE_OP_INFO:
   3043             {
   3044                 /* A minimal info field is
   3045                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
   3046                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
   3047                    more follows. */
   3048                 SRE_CODE flags, i;
   3049                 SRE_CODE *newcode;
   3050                 GET_SKIP;
   3051                 newcode = code+skip-1;
   3052                 GET_ARG; flags = arg;
   3053                 GET_ARG; /* min */
   3054                 GET_ARG; /* max */
   3055                 /* Check that only valid flags are present */
   3056                 if ((flags & ~(SRE_INFO_PREFIX |
   3057                                SRE_INFO_LITERAL |
   3058                                SRE_INFO_CHARSET)) != 0)
   3059                     FAIL;
   3060                 /* PREFIX and CHARSET are mutually exclusive */
   3061                 if ((flags & SRE_INFO_PREFIX) &&
   3062                     (flags & SRE_INFO_CHARSET))
   3063                     FAIL;
   3064                 /* LITERAL implies PREFIX */
   3065                 if ((flags & SRE_INFO_LITERAL) &&
   3066                     !(flags & SRE_INFO_PREFIX))
   3067                     FAIL;
   3068                 /* Validate the prefix */
   3069                 if (flags & SRE_INFO_PREFIX) {
   3070                     SRE_CODE prefix_len;
   3071                     GET_ARG; prefix_len = arg;
   3072                     GET_ARG; /* prefix skip */
   3073                     /* Here comes the prefix string */
   3074                     if (prefix_len > newcode-code)
   3075                         FAIL;
   3076                     code += prefix_len;
   3077                     /* And here comes the overlap table */
   3078                     if (prefix_len > newcode-code)
   3079                         FAIL;
   3080                     /* Each overlap value should be < prefix_len */
   3081                     for (i = 0; i < prefix_len; i++) {
   3082                         if (code[i] >= prefix_len)
   3083                             FAIL;
   3084                     }
   3085                     code += prefix_len;
   3086                 }
   3087                 /* Validate the charset */
   3088                 if (flags & SRE_INFO_CHARSET) {
   3089                     if (!_validate_charset(code, newcode-1))
   3090                         FAIL;
   3091                     if (newcode[-1] != SRE_OP_FAILURE)
   3092                         FAIL;
   3093                     code = newcode;
   3094                 }
   3095                 else if (code != newcode) {
   3096                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
   3097                     FAIL;
   3098                 }
   3099             }
   3100             break;
   3101 
   3102         case SRE_OP_BRANCH:
   3103             {
   3104                 SRE_CODE *target = NULL;
   3105                 for (;;) {
   3106                     GET_SKIP;
   3107                     if (skip == 0)
   3108                         break;
   3109                     /* Stop 2 before the end; we check the JUMP below */
   3110                     if (!_validate_inner(code, code+skip-3, groups))
   3111                         FAIL;
   3112                     code += skip-3;
   3113                     /* Check that it ends with a JUMP, and that each JUMP
   3114                        has the same target */
   3115                     GET_OP;
   3116                     if (op != SRE_OP_JUMP)
   3117                         FAIL;
   3118                     GET_SKIP;
   3119                     if (target == NULL)
   3120                         target = code+skip-1;
   3121                     else if (code+skip-1 != target)
   3122                         FAIL;
   3123                 }
   3124             }
   3125             break;
   3126 
   3127         case SRE_OP_REPEAT_ONE:
   3128         case SRE_OP_MIN_REPEAT_ONE:
   3129             {
   3130                 SRE_CODE min, max;
   3131                 GET_SKIP;
   3132                 GET_ARG; min = arg;
   3133                 GET_ARG; max = arg;
   3134                 if (min > max)
   3135                     FAIL;
   3136                 if (max > SRE_MAXREPEAT)
   3137                     FAIL;
   3138                 if (!_validate_inner(code, code+skip-4, groups))
   3139                     FAIL;
   3140                 code += skip-4;
   3141                 GET_OP;
   3142                 if (op != SRE_OP_SUCCESS)
   3143                     FAIL;
   3144             }
   3145             break;
   3146 
   3147         case SRE_OP_REPEAT:
   3148             {
   3149                 SRE_CODE min, max;
   3150                 GET_SKIP;
   3151                 GET_ARG; min = arg;
   3152                 GET_ARG; max = arg;
   3153                 if (min > max)
   3154                     FAIL;
   3155                 if (max > SRE_MAXREPEAT)
   3156                     FAIL;
   3157                 if (!_validate_inner(code, code+skip-3, groups))
   3158                     FAIL;
   3159                 code += skip-3;
   3160                 GET_OP;
   3161                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
   3162                     FAIL;
   3163             }
   3164             break;
   3165 
   3166         case SRE_OP_GROUPREF:
   3167         case SRE_OP_GROUPREF_IGNORE:
   3168             GET_ARG;
   3169             if (arg >= groups)
   3170                 FAIL;
   3171             break;
   3172 
   3173         case SRE_OP_GROUPREF_EXISTS:
   3174             /* The regex syntax for this is: '(?(group)then|else)', where
   3175                'group' is either an integer group number or a group name,
   3176                'then' and 'else' are sub-regexes, and 'else' is optional. */
   3177             GET_ARG;
   3178             if (arg >= groups)
   3179                 FAIL;
   3180             GET_SKIP_ADJ(1);
   3181             code--; /* The skip is relative to the first arg! */
   3182             /* There are two possibilities here: if there is both a 'then'
   3183                part and an 'else' part, the generated code looks like:
   3184 
   3185                GROUPREF_EXISTS
   3186                <group>
   3187                <skipyes>
   3188                ...then part...
   3189                JUMP
   3190                <skipno>
   3191                (<skipyes> jumps here)
   3192                ...else part...
   3193                (<skipno> jumps here)
   3194 
   3195                If there is only a 'then' part, it looks like:
   3196 
   3197                GROUPREF_EXISTS
   3198                <group>
   3199                <skip>
   3200                ...then part...
   3201                (<skip> jumps here)
   3202 
   3203                There is no direct way to decide which it is, and we don't want
   3204                to allow arbitrary jumps anywhere in the code; so we just look
   3205                for a JUMP opcode preceding our skip target.
   3206             */
   3207             if (skip >= 3 && skip-3 < end-code &&
   3208                 code[skip-3] == SRE_OP_JUMP)
   3209             {
   3210                 VTRACE(("both then and else parts present\n"));
   3211                 if (!_validate_inner(code+1, code+skip-3, groups))
   3212                     FAIL;
   3213                 code += skip-2; /* Position after JUMP, at <skipno> */
   3214                 GET_SKIP;
   3215                 if (!_validate_inner(code, code+skip-1, groups))
   3216                     FAIL;
   3217                 code += skip-1;
   3218             }
   3219             else {
   3220                 VTRACE(("only a then part present\n"));
   3221                 if (!_validate_inner(code+1, code+skip-1, groups))
   3222                     FAIL;
   3223                 code += skip-1;
   3224             }
   3225             break;
   3226 
   3227         case SRE_OP_ASSERT:
   3228         case SRE_OP_ASSERT_NOT:
   3229             GET_SKIP;
   3230             GET_ARG; /* 0 for lookahead, width for lookbehind */
   3231             code--; /* Back up over arg to simplify math below */
   3232             if (arg & 0x80000000)
   3233                 FAIL; /* Width too large */
   3234             /* Stop 1 before the end; we check the SUCCESS below */
   3235             if (!_validate_inner(code+1, code+skip-2, groups))
   3236                 FAIL;
   3237             code += skip-2;
   3238             GET_OP;
   3239             if (op != SRE_OP_SUCCESS)
   3240                 FAIL;
   3241             break;
   3242 
   3243         default:
   3244             FAIL;
   3245 
   3246         }
   3247     }
   3248 
   3249     VTRACE(("okay\n"));
   3250     return 1;
   3251 }
   3252 
   3253 static int
   3254 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   3255 {
   3256     if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
   3257         FAIL;
   3258     if (groups == 0)  /* fix for simplejson */
   3259         groups = 100; /* 100 groups should always be safe */
   3260     return _validate_inner(code, end-1, groups);
   3261 }
   3262 
   3263 static int
   3264 _validate(PatternObject *self)
   3265 {
   3266     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
   3267     {
   3268         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
   3269         return 0;
   3270     }
   3271     else
   3272         VTRACE(("Success!\n"));
   3273     return 1;
   3274 }
   3275 
   3276 /* -------------------------------------------------------------------- */
   3277 /* match methods */
   3278 
   3279 static void
   3280 match_dealloc(MatchObject* self)
   3281 {
   3282     Py_XDECREF(self->regs);
   3283     Py_XDECREF(self->string);
   3284     Py_DECREF(self->pattern);
   3285     PyObject_DEL(self);
   3286 }
   3287 
   3288 static PyObject*
   3289 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
   3290 {
   3291     if (index < 0 || index >= self->groups) {
   3292         /* raise IndexError if we were given a bad group number */
   3293         PyErr_SetString(
   3294             PyExc_IndexError,
   3295             "no such group"
   3296             );
   3297         return NULL;
   3298     }
   3299 
   3300     index *= 2;
   3301 
   3302     if (self->string == Py_None || self->mark[index] < 0) {
   3303         /* return default value if the string or group is undefined */
   3304         Py_INCREF(def);
   3305         return def;
   3306     }
   3307 
   3308     return PySequence_GetSlice(
   3309         self->string, self->mark[index], self->mark[index+1]
   3310         );
   3311 }
   3312 
   3313 static Py_ssize_t
   3314 match_getindex(MatchObject* self, PyObject* index)
   3315 {
   3316     Py_ssize_t i;
   3317 
   3318     if (PyInt_Check(index) || PyLong_Check(index))
   3319         return PyInt_AsSsize_t(index);
   3320 
   3321     i = -1;
   3322 
   3323     if (self->pattern->groupindex) {
   3324         index = PyObject_GetItem(self->pattern->groupindex, index);
   3325         if (index) {
   3326             if (PyInt_Check(index) || PyLong_Check(index))
   3327                 i = PyInt_AsSsize_t(index);
   3328             Py_DECREF(index);
   3329         } else
   3330             PyErr_Clear();
   3331     }
   3332 
   3333     return i;
   3334 }
   3335 
   3336 static PyObject*
   3337 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
   3338 {
   3339     return match_getslice_by_index(self, match_getindex(self, index), def);
   3340 }
   3341 
   3342 static PyObject*
   3343 match_expand(MatchObject* self, PyObject* ptemplate)
   3344 {
   3345     /* delegate to Python code */
   3346     return call(
   3347         SRE_PY_MODULE, "_expand",
   3348         PyTuple_Pack(3, self->pattern, self, ptemplate)
   3349         );
   3350 }
   3351 
   3352 static PyObject*
   3353 match_group(MatchObject* self, PyObject* args)
   3354 {
   3355     PyObject* result;
   3356     Py_ssize_t i, size;
   3357 
   3358     size = PyTuple_GET_SIZE(args);
   3359 
   3360     switch (size) {
   3361     case 0:
   3362         result = match_getslice(self, Py_False, Py_None);
   3363         break;
   3364     case 1:
   3365         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
   3366         break;
   3367     default:
   3368         /* fetch multiple items */
   3369         result = PyTuple_New(size);
   3370         if (!result)
   3371             return NULL;
   3372         for (i = 0; i < size; i++) {
   3373             PyObject* item = match_getslice(
   3374                 self, PyTuple_GET_ITEM(args, i), Py_None
   3375                 );
   3376             if (!item) {
   3377                 Py_DECREF(result);
   3378                 return NULL;
   3379             }
   3380             PyTuple_SET_ITEM(result, i, item);
   3381         }
   3382         break;
   3383     }
   3384     return result;
   3385 }
   3386 
   3387 static PyObject*
   3388 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
   3389 {
   3390     PyObject* result;
   3391     Py_ssize_t index;
   3392 
   3393     PyObject* def = Py_None;
   3394     static char* kwlist[] = { "default", NULL };
   3395     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
   3396         return NULL;
   3397 
   3398     result = PyTuple_New(self->groups-1);
   3399     if (!result)
   3400         return NULL;
   3401 
   3402     for (index = 1; index < self->groups; index++) {
   3403         PyObject* item;
   3404         item = match_getslice_by_index(self, index, def);
   3405         if (!item) {
   3406             Py_DECREF(result);
   3407             return NULL;
   3408         }
   3409         PyTuple_SET_ITEM(result, index-1, item);
   3410     }
   3411 
   3412     return result;
   3413 }
   3414 
   3415 static PyObject*
   3416 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
   3417 {
   3418     PyObject* result;
   3419     PyObject* keys;
   3420     Py_ssize_t index;
   3421 
   3422     PyObject* def = Py_None;
   3423     static char* kwlist[] = { "default", NULL };
   3424     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
   3425         return NULL;
   3426 
   3427     result = PyDict_New();
   3428     if (!result || !self->pattern->groupindex)
   3429         return result;
   3430 
   3431     keys = PyMapping_Keys(self->pattern->groupindex);
   3432     if (!keys)
   3433         goto failed;
   3434 
   3435     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
   3436         int status;
   3437         PyObject* key;
   3438         PyObject* value;
   3439         key = PyList_GET_ITEM(keys, index);
   3440         if (!key)
   3441             goto failed;
   3442         value = match_getslice(self, key, def);
   3443         if (!value) {
   3444             Py_DECREF(key);
   3445             goto failed;
   3446         }
   3447         status = PyDict_SetItem(result, key, value);
   3448         Py_DECREF(value);
   3449         if (status < 0)
   3450             goto failed;
   3451     }
   3452 
   3453     Py_DECREF(keys);
   3454 
   3455     return result;
   3456 
   3457 failed:
   3458     Py_XDECREF(keys);
   3459     Py_DECREF(result);
   3460     return NULL;
   3461 }
   3462 
   3463 static PyObject*
   3464 match_start(MatchObject* self, PyObject* args)
   3465 {
   3466     Py_ssize_t index;
   3467 
   3468     PyObject* index_ = Py_False; /* zero */
   3469     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
   3470         return NULL;
   3471 
   3472     index = match_getindex(self, index_);
   3473 
   3474     if (index < 0 || index >= self->groups) {
   3475         PyErr_SetString(
   3476             PyExc_IndexError,
   3477             "no such group"
   3478             );
   3479         return NULL;
   3480     }
   3481 
   3482     /* mark is -1 if group is undefined */
   3483     return PyInt_FromSsize_t(self->mark[index*2]);
   3484 }
   3485 
   3486 static PyObject*
   3487 match_end(MatchObject* self, PyObject* args)
   3488 {
   3489     Py_ssize_t index;
   3490 
   3491     PyObject* index_ = Py_False; /* zero */
   3492     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
   3493         return NULL;
   3494 
   3495     index = match_getindex(self, index_);
   3496 
   3497     if (index < 0 || index >= self->groups) {
   3498         PyErr_SetString(
   3499             PyExc_IndexError,
   3500             "no such group"
   3501             );
   3502         return NULL;
   3503     }
   3504 
   3505     /* mark is -1 if group is undefined */
   3506     return PyInt_FromSsize_t(self->mark[index*2+1]);
   3507 }
   3508 
   3509 LOCAL(PyObject*)
   3510 _pair(Py_ssize_t i1, Py_ssize_t i2)
   3511 {
   3512     PyObject* pair;
   3513     PyObject* item;
   3514 
   3515     pair = PyTuple_New(2);
   3516     if (!pair)
   3517         return NULL;
   3518 
   3519     item = PyInt_FromSsize_t(i1);
   3520     if (!item)
   3521         goto error;
   3522     PyTuple_SET_ITEM(pair, 0, item);
   3523 
   3524     item = PyInt_FromSsize_t(i2);
   3525     if (!item)
   3526         goto error;
   3527     PyTuple_SET_ITEM(pair, 1, item);
   3528 
   3529     return pair;
   3530 
   3531   error:
   3532     Py_DECREF(pair);
   3533     return NULL;
   3534 }
   3535 
   3536 static PyObject*
   3537 match_span(MatchObject* self, PyObject* args)
   3538 {
   3539     Py_ssize_t index;
   3540 
   3541     PyObject* index_ = Py_False; /* zero */
   3542     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
   3543         return NULL;
   3544 
   3545     index = match_getindex(self, index_);
   3546 
   3547     if (index < 0 || index >= self->groups) {
   3548         PyErr_SetString(
   3549             PyExc_IndexError,
   3550             "no such group"
   3551             );
   3552         return NULL;
   3553     }
   3554 
   3555     /* marks are -1 if group is undefined */
   3556     return _pair(self->mark[index*2], self->mark[index*2+1]);
   3557 }
   3558 
   3559 static PyObject*
   3560 match_regs(MatchObject* self)
   3561 {
   3562     PyObject* regs;
   3563     PyObject* item;
   3564     Py_ssize_t index;
   3565 
   3566     regs = PyTuple_New(self->groups);
   3567     if (!regs)
   3568         return NULL;
   3569 
   3570     for (index = 0; index < self->groups; index++) {
   3571         item = _pair(self->mark[index*2], self->mark[index*2+1]);
   3572         if (!item) {
   3573             Py_DECREF(regs);
   3574             return NULL;
   3575         }
   3576         PyTuple_SET_ITEM(regs, index, item);
   3577     }
   3578 
   3579     Py_INCREF(regs);
   3580     self->regs = regs;
   3581 
   3582     return regs;
   3583 }
   3584 
   3585 static PyObject*
   3586 match_copy(MatchObject* self, PyObject *unused)
   3587 {
   3588 #ifdef USE_BUILTIN_COPY
   3589     MatchObject* copy;
   3590     Py_ssize_t slots, offset;
   3591 
   3592     slots = 2 * (self->pattern->groups+1);
   3593 
   3594     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
   3595     if (!copy)
   3596         return NULL;
   3597 
   3598     /* this value a constant, but any compiler should be able to
   3599        figure that out all by itself */
   3600     offset = offsetof(MatchObject, string);
   3601 
   3602     Py_XINCREF(self->pattern);
   3603     Py_XINCREF(self->string);
   3604     Py_XINCREF(self->regs);
   3605 
   3606     memcpy((char*) copy + offset, (char*) self + offset,
   3607            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
   3608 
   3609     return (PyObject*) copy;
   3610 #else
   3611     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
   3612     return NULL;
   3613 #endif
   3614 }
   3615 
   3616 static PyObject*
   3617 match_deepcopy(MatchObject* self, PyObject* memo)
   3618 {
   3619 #ifdef USE_BUILTIN_COPY
   3620     MatchObject* copy;
   3621 
   3622     copy = (MatchObject*) match_copy(self);
   3623     if (!copy)
   3624         return NULL;
   3625 
   3626     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
   3627         !deepcopy(&copy->string, memo) ||
   3628         !deepcopy(&copy->regs, memo)) {
   3629         Py_DECREF(copy);
   3630         return NULL;
   3631     }
   3632 
   3633 #else
   3634     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
   3635     return NULL;
   3636 #endif
   3637 }
   3638 
   3639 PyDoc_STRVAR(match_doc,
   3640 "The result of re.match() and re.search().\n\
   3641 Match objects always have a boolean value of True.");
   3642 
   3643 PyDoc_STRVAR(match_group_doc,
   3644 "group([group1, ...]) -> str or tuple.\n\
   3645     Return subgroup(s) of the match by indices or names.\n\
   3646     For 0 returns the entire match.");
   3647 
   3648 PyDoc_STRVAR(match_start_doc,
   3649 "start([group=0]) -> int.\n\
   3650     Return index of the start of the substring matched by group.");
   3651 
   3652 PyDoc_STRVAR(match_end_doc,
   3653 "end([group=0]) -> int.\n\
   3654     Return index of the end of the substring matched by group.");
   3655 
   3656 PyDoc_STRVAR(match_span_doc,
   3657 "span([group]) -> tuple.\n\
   3658     For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
   3659 
   3660 PyDoc_STRVAR(match_groups_doc,
   3661 "groups([default=None]) -> tuple.\n\
   3662     Return a tuple containing all the subgroups of the match, from 1.\n\
   3663     The default argument is used for groups\n\
   3664     that did not participate in the match");
   3665 
   3666 PyDoc_STRVAR(match_groupdict_doc,
   3667 "groupdict([default=None]) -> dict.\n\
   3668     Return a dictionary containing all the named subgroups of the match,\n\
   3669     keyed by the subgroup name. The default argument is used for groups\n\
   3670     that did not participate in the match");
   3671 
   3672 PyDoc_STRVAR(match_expand_doc,
   3673 "expand(template) -> str.\n\
   3674     Return the string obtained by doing backslash substitution\n\
   3675     on the string template, as done by the sub() method.");
   3676 
   3677 static PyMethodDef match_methods[] = {
   3678     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
   3679     {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
   3680     {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
   3681     {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
   3682     {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
   3683         match_groups_doc},
   3684     {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
   3685         match_groupdict_doc},
   3686     {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
   3687     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
   3688     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
   3689     {NULL, NULL}
   3690 };
   3691 
   3692 static PyObject *
   3693 match_lastindex_get(MatchObject *self)
   3694 {
   3695     if (self->lastindex >= 0)
   3696         return PyInt_FromSsize_t(self->lastindex);
   3697     Py_INCREF(Py_None);
   3698     return Py_None;
   3699 }
   3700 
   3701 static PyObject *
   3702 match_lastgroup_get(MatchObject *self)
   3703 {
   3704     if (self->pattern->indexgroup && self->lastindex >= 0) {
   3705         PyObject* result = PySequence_GetItem(
   3706             self->pattern->indexgroup, self->lastindex
   3707             );
   3708         if (result)
   3709             return result;
   3710         PyErr_Clear();
   3711     }
   3712     Py_INCREF(Py_None);
   3713     return Py_None;
   3714 }
   3715 
   3716 static PyObject *
   3717 match_regs_get(MatchObject *self)
   3718 {
   3719     if (self->regs) {
   3720         Py_INCREF(self->regs);
   3721         return self->regs;
   3722     } else
   3723         return match_regs(self);
   3724 }
   3725 
   3726 static PyGetSetDef match_getset[] = {
   3727     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
   3728     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
   3729     {"regs",      (getter)match_regs_get,      (setter)NULL},
   3730     {NULL}
   3731 };
   3732 
   3733 #define MATCH_OFF(x) offsetof(MatchObject, x)
   3734 static PyMemberDef match_members[] = {
   3735     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
   3736     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
   3737     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
   3738     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
   3739     {NULL}
   3740 };
   3741 
   3742 
   3743 /* FIXME: implement setattr("string", None) as a special case (to
   3744    detach the associated string, if any */
   3745 
   3746 static PyTypeObject Match_Type = {
   3747     PyVarObject_HEAD_INIT(NULL, 0)
   3748     "_" SRE_MODULE ".SRE_Match",
   3749     sizeof(MatchObject), sizeof(Py_ssize_t),
   3750     (destructor)match_dealloc,  /* tp_dealloc */
   3751     0,                          /* tp_print */
   3752     0,                          /* tp_getattr */
   3753     0,                          /* tp_setattr */
   3754     0,                          /* tp_compare */
   3755     0,                          /* tp_repr */
   3756     0,                          /* tp_as_number */
   3757     0,                          /* tp_as_sequence */
   3758     0,                          /* tp_as_mapping */
   3759     0,                          /* tp_hash */
   3760     0,                          /* tp_call */
   3761     0,                          /* tp_str */
   3762     0,                          /* tp_getattro */
   3763     0,                          /* tp_setattro */
   3764     0,                          /* tp_as_buffer */
   3765     Py_TPFLAGS_DEFAULT,
   3766     match_doc,                  /* tp_doc */
   3767     0,                          /* tp_traverse */
   3768     0,                          /* tp_clear */
   3769     0,                          /* tp_richcompare */
   3770     0,                          /* tp_weaklistoffset */
   3771     0,                          /* tp_iter */
   3772     0,                          /* tp_iternext */
   3773     match_methods,    /* tp_methods */
   3774     match_members,    /* tp_members */
   3775     match_getset,           /* tp_getset */
   3776 };
   3777 
   3778 static PyObject*
   3779 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
   3780 {
   3781     /* create match object (from state object) */
   3782 
   3783     MatchObject* match;
   3784     Py_ssize_t i, j;
   3785     char* base;
   3786     int n;
   3787 
   3788     if (status > 0) {
   3789 
   3790         /* create match object (with room for extra group marks) */
   3791         /* coverity[ampersand_in_size] */
   3792         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
   3793                                  2*(pattern->groups+1));
   3794         if (!match)
   3795             return NULL;
   3796 
   3797         Py_INCREF(pattern);
   3798         match->pattern = pattern;
   3799 
   3800         Py_INCREF(state->string);
   3801         match->string = state->string;
   3802 
   3803         match->regs = NULL;
   3804         match->groups = pattern->groups+1;
   3805 
   3806         /* fill in group slices */
   3807 
   3808         base = (char*) state->beginning;
   3809         n = state->charsize;
   3810 
   3811         match->mark[0] = ((char*) state->start - base) / n;
   3812         match->mark[1] = ((char*) state->ptr - base) / n;
   3813 
   3814         for (i = j = 0; i < pattern->groups; i++, j+=2)
   3815             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
   3816                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
   3817                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
   3818             } else
   3819                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
   3820 
   3821         match->pos = state->pos;
   3822         match->endpos = state->endpos;
   3823 
   3824         match->lastindex = state->lastindex;
   3825 
   3826         return (PyObject*) match;
   3827 
   3828     } else if (status == 0) {
   3829 
   3830         /* no match */
   3831         Py_INCREF(Py_None);
   3832         return Py_None;
   3833 
   3834     }
   3835 
   3836     /* internal error */
   3837     pattern_error(status);
   3838     return NULL;
   3839 }
   3840 
   3841 
   3842 /* -------------------------------------------------------------------- */
   3843 /* scanner methods (experimental) */
   3844 
   3845 static void
   3846 scanner_dealloc(ScannerObject* self)
   3847 {
   3848     state_fini(&self->state);
   3849     Py_XDECREF(self->pattern);
   3850     PyObject_DEL(self);
   3851 }
   3852 
   3853 static PyObject*
   3854 scanner_match(ScannerObject* self, PyObject *unused)
   3855 {
   3856     SRE_STATE* state = &self->state;
   3857     PyObject* match;
   3858     int status;
   3859 
   3860     state_reset(state);
   3861 
   3862     state->ptr = state->start;
   3863 
   3864     if (state->charsize == 1) {
   3865         status = sre_match(state, PatternObject_GetCode(self->pattern));
   3866     } else {
   3867 #if defined(HAVE_UNICODE)
   3868         status = sre_umatch(state, PatternObject_GetCode(self->pattern));
   3869 #endif
   3870     }
   3871     if (PyErr_Occurred())
   3872         return NULL;
   3873 
   3874     match = pattern_new_match((PatternObject*) self->pattern,
   3875                                state, status);
   3876 
   3877     if (status == 0 || state->ptr == state->start)
   3878         state->start = (void*) ((char*) state->ptr + state->charsize);
   3879     else
   3880         state->start = state->ptr;
   3881 
   3882     return match;
   3883 }
   3884 
   3885 
   3886 static PyObject*
   3887 scanner_search(ScannerObject* self, PyObject *unused)
   3888 {
   3889     SRE_STATE* state = &self->state;
   3890     PyObject* match;
   3891     int status;
   3892 
   3893     state_reset(state);
   3894 
   3895     state->ptr = state->start;
   3896 
   3897     if (state->charsize == 1) {
   3898         status = sre_search(state, PatternObject_GetCode(self->pattern));
   3899     } else {
   3900 #if defined(HAVE_UNICODE)
   3901         status = sre_usearch(state, PatternObject_GetCode(self->pattern));
   3902 #endif
   3903     }
   3904     if (PyErr_Occurred())
   3905         return NULL;
   3906 
   3907     match = pattern_new_match((PatternObject*) self->pattern,
   3908                                state, status);
   3909 
   3910     if (status == 0 || state->ptr == state->start)
   3911         state->start = (void*) ((char*) state->ptr + state->charsize);
   3912     else
   3913         state->start = state->ptr;
   3914 
   3915     return match;
   3916 }
   3917 
   3918 static PyMethodDef scanner_methods[] = {
   3919     {"match", (PyCFunction) scanner_match, METH_NOARGS},
   3920     {"search", (PyCFunction) scanner_search, METH_NOARGS},
   3921     {NULL, NULL}
   3922 };
   3923 
   3924 #define SCAN_OFF(x) offsetof(ScannerObject, x)
   3925 static PyMemberDef scanner_members[] = {
   3926     {"pattern", T_OBJECT, SCAN_OFF(pattern),  READONLY},
   3927     {NULL}  /* Sentinel */
   3928 };
   3929 
   3930 statichere PyTypeObject Scanner_Type = {
   3931     PyObject_HEAD_INIT(NULL)
   3932     0, "_" SRE_MODULE ".SRE_Scanner",
   3933     sizeof(ScannerObject), 0,
   3934     (destructor)scanner_dealloc, /*tp_dealloc*/
   3935     0,        /* tp_print */
   3936     0,        /* tp_getattr */
   3937     0,        /* tp_setattr */
   3938     0,        /* tp_reserved */
   3939     0,        /* tp_repr */
   3940     0,        /* tp_as_number */
   3941     0,        /* tp_as_sequence */
   3942     0,        /* tp_as_mapping */
   3943     0,        /* tp_hash */
   3944     0,        /* tp_call */
   3945     0,        /* tp_str */
   3946     0,        /* tp_getattro */
   3947     0,        /* tp_setattro */
   3948     0,        /* tp_as_buffer */
   3949     Py_TPFLAGS_DEFAULT,   /* tp_flags */
   3950     0,        /* tp_doc */
   3951     0,        /* tp_traverse */
   3952     0,        /* tp_clear */
   3953     0,        /* tp_richcompare */
   3954     0,        /* tp_weaklistoffset */
   3955     0,        /* tp_iter */
   3956     0,        /* tp_iternext */
   3957     scanner_methods,    /* tp_methods */
   3958     scanner_members,    /* tp_members */
   3959     0,        /* tp_getset */
   3960 };
   3961 
   3962 static PyObject*
   3963 pattern_scanner(PatternObject* pattern, PyObject* args)
   3964 {
   3965     /* create search state object */
   3966 
   3967     ScannerObject* self;
   3968 
   3969     PyObject* string;
   3970     Py_ssize_t start = 0;
   3971     Py_ssize_t end = PY_SSIZE_T_MAX;
   3972     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
   3973         return NULL;
   3974 
   3975     /* create scanner object */
   3976     self = PyObject_NEW(ScannerObject, &Scanner_Type);
   3977     if (!self)
   3978         return NULL;
   3979     self->pattern = NULL;
   3980 
   3981     string = state_init(&self->state, pattern, string, start, end);
   3982     if (!string) {
   3983         Py_DECREF(self);
   3984         return NULL;
   3985     }
   3986 
   3987     Py_INCREF(pattern);
   3988     self->pattern = (PyObject*) pattern;
   3989 
   3990     return (PyObject*) self;
   3991 }
   3992 
   3993 static PyMethodDef _functions[] = {
   3994     {"compile", _compile, METH_VARARGS},
   3995     {"getcodesize", sre_codesize, METH_NOARGS},
   3996     {"getlower", sre_getlower, METH_VARARGS},
   3997     {NULL, NULL}
   3998 };
   3999 
   4000 #if PY_VERSION_HEX < 0x02030000
   4001 DL_EXPORT(void) init_sre(void)
   4002 #else
   4003 PyMODINIT_FUNC init_sre(void)
   4004 #endif
   4005 {
   4006     PyObject* m;
   4007     PyObject* d;
   4008     PyObject* x;
   4009 
   4010     /* Patch object types */
   4011     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
   4012         PyType_Ready(&Scanner_Type))
   4013         return;
   4014 
   4015     m = Py_InitModule("_" SRE_MODULE, _functions);
   4016     if (m == NULL)
   4017       return;
   4018     d = PyModule_GetDict(m);
   4019 
   4020     x = PyInt_FromLong(SRE_MAGIC);
   4021     if (x) {
   4022         PyDict_SetItemString(d, "MAGIC", x);
   4023         Py_DECREF(x);
   4024     }
   4025 
   4026     x = PyInt_FromLong(sizeof(SRE_CODE));
   4027     if (x) {
   4028         PyDict_SetItemString(d, "CODESIZE", x);
   4029         Py_DECREF(x);
   4030     }
   4031 
   4032     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
   4033     if (x) {
   4034         PyDict_SetItemString(d, "MAXREPEAT", x);
   4035         Py_DECREF(x);
   4036     }
   4037 
   4038     x = PyString_FromString(copyright);
   4039     if (x) {
   4040         PyDict_SetItemString(d, "copyright", x);
   4041         Py_DECREF(x);
   4042     }
   4043 }
   4044 
   4045 #endif /* !defined(SRE_RECURSIVE) */
   4046 
   4047 /* vim:ts=4:sw=4:et
   4048 */
   4049