Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5  *
      6  * partial history:
      7  * 1999-10-24 fl   created (based on existing template matcher code)
      8  * 2000-03-06 fl   first alpha, sort of
      9  * 2000-08-01 fl   fixes for 1.6b1
     10  * 2000-08-07 fl   use PyOS_CheckStack() if available
     11  * 2000-09-20 fl   added expand method
     12  * 2001-03-20 fl   lots of fixes for 2.1b2
     13  * 2001-04-15 fl   export copyright as Python attribute, not global
     14  * 2001-04-28 fl   added __copy__ methods (work in progress)
     15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
     16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
     17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
     18  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
     19  * 2001-10-21 fl   added sub/subn primitive
     20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
     21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
     22  * 2002-11-09 fl   fixed empty sub/subn return type
     23  * 2003-04-18 mvl  fully support 4-byte codes
     24  * 2003-10-17 gn   implemented non recursive scheme
     25  * 2013-02-04 mrab added fullmatch primitive
     26  *
     27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
     28  *
     29  * This version of the SRE library can be redistributed under CNRI's
     30  * Python 1.6 license.  For any other use, please contact Secret Labs
     31  * AB (info (at) pythonware.com).
     32  *
     33  * Portions of this engine have been developed in cooperation with
     34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
     35  * other compatibility work.
     36  */
     37 
     38 static const char copyright[] =
     39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
     40 
     41 #define PY_SSIZE_T_CLEAN
     42 
     43 #include "Python.h"
     44 #include "structmember.h" /* offsetof */
     45 
     46 #include "sre.h"
     47 
     48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
     49 
     50 #include <ctype.h>
     51 
     52 /* name of this module, minus the leading underscore */
     53 #if !defined(SRE_MODULE)
     54 #define SRE_MODULE "sre"
     55 #endif
     56 
     57 #define SRE_PY_MODULE "re"
     58 
     59 /* defining this one enables tracing */
     60 #undef VERBOSE
     61 
     62 /* -------------------------------------------------------------------- */
     63 
     64 #if defined(_MSC_VER)
     65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
     66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
     67 /* fastest possible local call under MSVC */
     68 #define LOCAL(type) static __inline type __fastcall
     69 #else
     70 #define LOCAL(type) static inline type
     71 #endif
     72 
     73 /* error codes */
     74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
     75 #define SRE_ERROR_STATE -2 /* illegal state */
     76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
     77 #define SRE_ERROR_MEMORY -9 /* out of memory */
     78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
     79 
     80 #if defined(VERBOSE)
     81 #define TRACE(v) printf v
     82 #else
     83 #define TRACE(v)
     84 #endif
     85 
     86 /* -------------------------------------------------------------------- */
     87 /* search engine state */
     88 
     89 #define SRE_IS_DIGIT(ch)\
     90     ((ch) < 128 && Py_ISDIGIT(ch))
     91 #define SRE_IS_SPACE(ch)\
     92     ((ch) < 128 && Py_ISSPACE(ch))
     93 #define SRE_IS_LINEBREAK(ch)\
     94     ((ch) == '\n')
     95 #define SRE_IS_ALNUM(ch)\
     96     ((ch) < 128 && Py_ISALNUM(ch))
     97 #define SRE_IS_WORD(ch)\
     98     ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
     99 
    100 static unsigned int sre_lower_ascii(unsigned int ch)
    101 {
    102     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
    103 }
    104 
    105 static unsigned int sre_upper_ascii(unsigned int ch)
    106 {
    107     return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
    108 }
    109 
    110 /* locale-specific character predicates */
    111 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    112  * warnings when c's type supports only numbers < N+1 */
    113 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
    114 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
    115 
    116 static unsigned int sre_lower_locale(unsigned int ch)
    117 {
    118     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
    119 }
    120 
    121 static unsigned int sre_upper_locale(unsigned int ch)
    122 {
    123     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
    124 }
    125 
    126 /* unicode-specific character predicates */
    127 
    128 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
    129 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
    130 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
    131 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
    132 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
    133 
    134 static unsigned int sre_lower_unicode(unsigned int ch)
    135 {
    136     return (unsigned int) Py_UNICODE_TOLOWER(ch);
    137 }
    138 
    139 static unsigned int sre_upper_unicode(unsigned int ch)
    140 {
    141     return (unsigned int) Py_UNICODE_TOUPPER(ch);
    142 }
    143 
    144 LOCAL(int)
    145 sre_category(SRE_CODE category, unsigned int ch)
    146 {
    147     switch (category) {
    148 
    149     case SRE_CATEGORY_DIGIT:
    150         return SRE_IS_DIGIT(ch);
    151     case SRE_CATEGORY_NOT_DIGIT:
    152         return !SRE_IS_DIGIT(ch);
    153     case SRE_CATEGORY_SPACE:
    154         return SRE_IS_SPACE(ch);
    155     case SRE_CATEGORY_NOT_SPACE:
    156         return !SRE_IS_SPACE(ch);
    157     case SRE_CATEGORY_WORD:
    158         return SRE_IS_WORD(ch);
    159     case SRE_CATEGORY_NOT_WORD:
    160         return !SRE_IS_WORD(ch);
    161     case SRE_CATEGORY_LINEBREAK:
    162         return SRE_IS_LINEBREAK(ch);
    163     case SRE_CATEGORY_NOT_LINEBREAK:
    164         return !SRE_IS_LINEBREAK(ch);
    165 
    166     case SRE_CATEGORY_LOC_WORD:
    167         return SRE_LOC_IS_WORD(ch);
    168     case SRE_CATEGORY_LOC_NOT_WORD:
    169         return !SRE_LOC_IS_WORD(ch);
    170 
    171     case SRE_CATEGORY_UNI_DIGIT:
    172         return SRE_UNI_IS_DIGIT(ch);
    173     case SRE_CATEGORY_UNI_NOT_DIGIT:
    174         return !SRE_UNI_IS_DIGIT(ch);
    175     case SRE_CATEGORY_UNI_SPACE:
    176         return SRE_UNI_IS_SPACE(ch);
    177     case SRE_CATEGORY_UNI_NOT_SPACE:
    178         return !SRE_UNI_IS_SPACE(ch);
    179     case SRE_CATEGORY_UNI_WORD:
    180         return SRE_UNI_IS_WORD(ch);
    181     case SRE_CATEGORY_UNI_NOT_WORD:
    182         return !SRE_UNI_IS_WORD(ch);
    183     case SRE_CATEGORY_UNI_LINEBREAK:
    184         return SRE_UNI_IS_LINEBREAK(ch);
    185     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    186         return !SRE_UNI_IS_LINEBREAK(ch);
    187     }
    188     return 0;
    189 }
    190 
    191 LOCAL(int)
    192 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
    193 {
    194     return ch == pattern
    195         || (SRE_CODE) sre_lower_locale(ch) == pattern
    196         || (SRE_CODE) sre_upper_locale(ch) == pattern;
    197 }
    198 
    199 
    200 /* helpers */
    201 
    202 static void
    203 data_stack_dealloc(SRE_STATE* state)
    204 {
    205     if (state->data_stack) {
    206         PyMem_FREE(state->data_stack);
    207         state->data_stack = NULL;
    208     }
    209     state->data_stack_size = state->data_stack_base = 0;
    210 }
    211 
    212 static int
    213 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
    214 {
    215     Py_ssize_t minsize, cursize;
    216     minsize = state->data_stack_base+size;
    217     cursize = state->data_stack_size;
    218     if (cursize < minsize) {
    219         void* stack;
    220         cursize = minsize+minsize/4+1024;
    221         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
    222         stack = PyMem_REALLOC(state->data_stack, cursize);
    223         if (!stack) {
    224             data_stack_dealloc(state);
    225             return SRE_ERROR_MEMORY;
    226         }
    227         state->data_stack = (char *)stack;
    228         state->data_stack_size = cursize;
    229     }
    230     return 0;
    231 }
    232 
    233 /* generate 8-bit version */
    234 
    235 #define SRE_CHAR Py_UCS1
    236 #define SIZEOF_SRE_CHAR 1
    237 #define SRE(F) sre_ucs1_##F
    238 #include "sre_lib.h"
    239 
    240 /* generate 16-bit unicode version */
    241 
    242 #define SRE_CHAR Py_UCS2
    243 #define SIZEOF_SRE_CHAR 2
    244 #define SRE(F) sre_ucs2_##F
    245 #include "sre_lib.h"
    246 
    247 /* generate 32-bit unicode version */
    248 
    249 #define SRE_CHAR Py_UCS4
    250 #define SIZEOF_SRE_CHAR 4
    251 #define SRE(F) sre_ucs4_##F
    252 #include "sre_lib.h"
    253 
    254 /* -------------------------------------------------------------------- */
    255 /* factories and destructors */
    256 
    257 /* see sre.h for object declarations */
    258 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
    259 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
    260 
    261 
    262 /*[clinic input]
    263 module _sre
    264 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
    265 class _sre.SRE_Match "MatchObject *" "&Match_Type"
    266 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
    267 [clinic start generated code]*/
    268 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
    269 
    270 static PyTypeObject Pattern_Type;
    271 static PyTypeObject Match_Type;
    272 static PyTypeObject Scanner_Type;
    273 
    274 /*[clinic input]
    275 _sre.getcodesize -> int
    276 [clinic start generated code]*/
    277 
    278 static int
    279 _sre_getcodesize_impl(PyObject *module)
    280 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
    281 {
    282     return sizeof(SRE_CODE);
    283 }
    284 
    285 /*[clinic input]
    286 _sre.ascii_iscased -> bool
    287 
    288     character: int
    289     /
    290 
    291 [clinic start generated code]*/
    292 
    293 static int
    294 _sre_ascii_iscased_impl(PyObject *module, int character)
    295 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
    296 {
    297     unsigned int ch = (unsigned int)character;
    298     return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
    299 }
    300 
    301 /*[clinic input]
    302 _sre.unicode_iscased -> bool
    303 
    304     character: int
    305     /
    306 
    307 [clinic start generated code]*/
    308 
    309 static int
    310 _sre_unicode_iscased_impl(PyObject *module, int character)
    311 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
    312 {
    313     unsigned int ch = (unsigned int)character;
    314     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
    315 }
    316 
    317 /*[clinic input]
    318 _sre.ascii_tolower -> int
    319 
    320     character: int
    321     /
    322 
    323 [clinic start generated code]*/
    324 
    325 static int
    326 _sre_ascii_tolower_impl(PyObject *module, int character)
    327 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
    328 {
    329     return sre_lower_ascii(character);
    330 }
    331 
    332 /*[clinic input]
    333 _sre.unicode_tolower -> int
    334 
    335     character: int
    336     /
    337 
    338 [clinic start generated code]*/
    339 
    340 static int
    341 _sre_unicode_tolower_impl(PyObject *module, int character)
    342 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
    343 {
    344     return sre_lower_unicode(character);
    345 }
    346 
    347 LOCAL(void)
    348 state_reset(SRE_STATE* state)
    349 {
    350     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
    351     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
    352 
    353     state->lastmark = -1;
    354     state->lastindex = -1;
    355 
    356     state->repeat = NULL;
    357 
    358     data_stack_dealloc(state);
    359 }
    360 
    361 static void*
    362 getstring(PyObject* string, Py_ssize_t* p_length,
    363           int* p_isbytes, int* p_charsize,
    364           Py_buffer *view)
    365 {
    366     /* given a python object, return a data pointer, a length (in
    367        characters), and a character size.  return NULL if the object
    368        is not a string (or not compatible) */
    369 
    370     /* Unicode objects do not support the buffer API. So, get the data
    371        directly instead. */
    372     if (PyUnicode_Check(string)) {
    373         if (PyUnicode_READY(string) == -1)
    374             return NULL;
    375         *p_length = PyUnicode_GET_LENGTH(string);
    376         *p_charsize = PyUnicode_KIND(string);
    377         *p_isbytes = 0;
    378         return PyUnicode_DATA(string);
    379     }
    380 
    381     /* get pointer to byte string buffer */
    382     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
    383         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
    384         return NULL;
    385     }
    386 
    387     *p_length = view->len;
    388     *p_charsize = 1;
    389     *p_isbytes = 1;
    390 
    391     if (view->buf == NULL) {
    392         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
    393         PyBuffer_Release(view);
    394         view->buf = NULL;
    395         return NULL;
    396     }
    397     return view->buf;
    398 }
    399 
    400 LOCAL(PyObject*)
    401 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
    402            Py_ssize_t start, Py_ssize_t end)
    403 {
    404     /* prepare state object */
    405 
    406     Py_ssize_t length;
    407     int isbytes, charsize;
    408     void* ptr;
    409 
    410     memset(state, 0, sizeof(SRE_STATE));
    411 
    412     state->mark = PyMem_New(void *, pattern->groups * 2);
    413     if (!state->mark) {
    414         PyErr_NoMemory();
    415         goto err;
    416     }
    417     state->lastmark = -1;
    418     state->lastindex = -1;
    419 
    420     state->buffer.buf = NULL;
    421     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
    422     if (!ptr)
    423         goto err;
    424 
    425     if (isbytes && pattern->isbytes == 0) {
    426         PyErr_SetString(PyExc_TypeError,
    427                         "cannot use a string pattern on a bytes-like object");
    428         goto err;
    429     }
    430     if (!isbytes && pattern->isbytes > 0) {
    431         PyErr_SetString(PyExc_TypeError,
    432                         "cannot use a bytes pattern on a string-like object");
    433         goto err;
    434     }
    435 
    436     /* adjust boundaries */
    437     if (start < 0)
    438         start = 0;
    439     else if (start > length)
    440         start = length;
    441 
    442     if (end < 0)
    443         end = 0;
    444     else if (end > length)
    445         end = length;
    446 
    447     state->isbytes = isbytes;
    448     state->charsize = charsize;
    449     state->match_all = 0;
    450     state->must_advance = 0;
    451 
    452     state->beginning = ptr;
    453 
    454     state->start = (void*) ((char*) ptr + start * state->charsize);
    455     state->end = (void*) ((char*) ptr + end * state->charsize);
    456 
    457     Py_INCREF(string);
    458     state->string = string;
    459     state->pos = start;
    460     state->endpos = end;
    461 
    462     return string;
    463   err:
    464     PyMem_Del(state->mark);
    465     state->mark = NULL;
    466     if (state->buffer.buf)
    467         PyBuffer_Release(&state->buffer);
    468     return NULL;
    469 }
    470 
    471 LOCAL(void)
    472 state_fini(SRE_STATE* state)
    473 {
    474     if (state->buffer.buf)
    475         PyBuffer_Release(&state->buffer);
    476     Py_XDECREF(state->string);
    477     data_stack_dealloc(state);
    478     PyMem_Del(state->mark);
    479     state->mark = NULL;
    480 }
    481 
    482 /* calculate offset from start of string */
    483 #define STATE_OFFSET(state, member)\
    484     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
    485 
    486 LOCAL(PyObject*)
    487 getslice(int isbytes, const void *ptr,
    488          PyObject* string, Py_ssize_t start, Py_ssize_t end)
    489 {
    490     if (isbytes) {
    491         if (PyBytes_CheckExact(string) &&
    492             start == 0 && end == PyBytes_GET_SIZE(string)) {
    493             Py_INCREF(string);
    494             return string;
    495         }
    496         return PyBytes_FromStringAndSize(
    497                 (const char *)ptr + start, end - start);
    498     }
    499     else {
    500         return PyUnicode_Substring(string, start, end);
    501     }
    502 }
    503 
    504 LOCAL(PyObject*)
    505 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
    506 {
    507     Py_ssize_t i, j;
    508 
    509     index = (index - 1) * 2;
    510 
    511     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
    512         if (empty)
    513             /* want empty string */
    514             i = j = 0;
    515         else {
    516             Py_RETURN_NONE;
    517         }
    518     } else {
    519         i = STATE_OFFSET(state, state->mark[index]);
    520         j = STATE_OFFSET(state, state->mark[index+1]);
    521     }
    522 
    523     return getslice(state->isbytes, state->beginning, string, i, j);
    524 }
    525 
    526 static void
    527 pattern_error(Py_ssize_t status)
    528 {
    529     switch (status) {
    530     case SRE_ERROR_RECURSION_LIMIT:
    531         /* This error code seems to be unused. */
    532         PyErr_SetString(
    533             PyExc_RecursionError,
    534             "maximum recursion limit exceeded"
    535             );
    536         break;
    537     case SRE_ERROR_MEMORY:
    538         PyErr_NoMemory();
    539         break;
    540     case SRE_ERROR_INTERRUPTED:
    541     /* An exception has already been raised, so let it fly */
    542         break;
    543     default:
    544         /* other error codes indicate compiler/engine bugs */
    545         PyErr_SetString(
    546             PyExc_RuntimeError,
    547             "internal error in regular expression engine"
    548             );
    549     }
    550 }
    551 
    552 static void
    553 pattern_dealloc(PatternObject* self)
    554 {
    555     if (self->weakreflist != NULL)
    556         PyObject_ClearWeakRefs((PyObject *) self);
    557     Py_XDECREF(self->pattern);
    558     Py_XDECREF(self->groupindex);
    559     Py_XDECREF(self->indexgroup);
    560     PyObject_DEL(self);
    561 }
    562 
    563 LOCAL(Py_ssize_t)
    564 sre_match(SRE_STATE* state, SRE_CODE* pattern)
    565 {
    566     if (state->charsize == 1)
    567         return sre_ucs1_match(state, pattern, 1);
    568     if (state->charsize == 2)
    569         return sre_ucs2_match(state, pattern, 1);
    570     assert(state->charsize == 4);
    571     return sre_ucs4_match(state, pattern, 1);
    572 }
    573 
    574 LOCAL(Py_ssize_t)
    575 sre_search(SRE_STATE* state, SRE_CODE* pattern)
    576 {
    577     if (state->charsize == 1)
    578         return sre_ucs1_search(state, pattern);
    579     if (state->charsize == 2)
    580         return sre_ucs2_search(state, pattern);
    581     assert(state->charsize == 4);
    582     return sre_ucs4_search(state, pattern);
    583 }
    584 
    585 /*[clinic input]
    586 _sre.SRE_Pattern.match
    587 
    588     string: object
    589     pos: Py_ssize_t = 0
    590     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    591 
    592 Matches zero or more characters at the beginning of the string.
    593 [clinic start generated code]*/
    594 
    595 static PyObject *
    596 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
    597                             Py_ssize_t pos, Py_ssize_t endpos)
    598 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
    599 {
    600     SRE_STATE state;
    601     Py_ssize_t status;
    602     PyObject *match;
    603 
    604     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
    605         return NULL;
    606 
    607     state.ptr = state.start;
    608 
    609     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
    610 
    611     status = sre_match(&state, PatternObject_GetCode(self));
    612 
    613     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    614     if (PyErr_Occurred()) {
    615         state_fini(&state);
    616         return NULL;
    617     }
    618 
    619     match = pattern_new_match(self, &state, status);
    620     state_fini(&state);
    621     return match;
    622 }
    623 
    624 /*[clinic input]
    625 _sre.SRE_Pattern.fullmatch
    626 
    627     string: object
    628     pos: Py_ssize_t = 0
    629     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    630 
    631 Matches against all of the string.
    632 [clinic start generated code]*/
    633 
    634 static PyObject *
    635 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
    636                                 Py_ssize_t pos, Py_ssize_t endpos)
    637 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
    638 {
    639     SRE_STATE state;
    640     Py_ssize_t status;
    641     PyObject *match;
    642 
    643     if (!state_init(&state, self, string, pos, endpos))
    644         return NULL;
    645 
    646     state.ptr = state.start;
    647 
    648     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
    649 
    650     state.match_all = 1;
    651     status = sre_match(&state, PatternObject_GetCode(self));
    652 
    653     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    654     if (PyErr_Occurred()) {
    655         state_fini(&state);
    656         return NULL;
    657     }
    658 
    659     match = pattern_new_match(self, &state, status);
    660     state_fini(&state);
    661     return match;
    662 }
    663 
    664 /*[clinic input]
    665 _sre.SRE_Pattern.search
    666 
    667     string: object
    668     pos: Py_ssize_t = 0
    669     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    670 
    671 Scan through string looking for a match, and return a corresponding match object instance.
    672 
    673 Return None if no position in the string matches.
    674 [clinic start generated code]*/
    675 
    676 static PyObject *
    677 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
    678                              Py_ssize_t pos, Py_ssize_t endpos)
    679 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
    680 {
    681     SRE_STATE state;
    682     Py_ssize_t status;
    683     PyObject *match;
    684 
    685     if (!state_init(&state, self, string, pos, endpos))
    686         return NULL;
    687 
    688     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
    689 
    690     status = sre_search(&state, PatternObject_GetCode(self));
    691 
    692     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    693 
    694     if (PyErr_Occurred()) {
    695         state_fini(&state);
    696         return NULL;
    697     }
    698 
    699     match = pattern_new_match(self, &state, status);
    700     state_fini(&state);
    701     return match;
    702 }
    703 
    704 static PyObject*
    705 call(const char* module, const char* function, PyObject* args)
    706 {
    707     PyObject* name;
    708     PyObject* mod;
    709     PyObject* func;
    710     PyObject* result;
    711 
    712     if (!args)
    713         return NULL;
    714     name = PyUnicode_FromString(module);
    715     if (!name)
    716         return NULL;
    717     mod = PyImport_Import(name);
    718     Py_DECREF(name);
    719     if (!mod)
    720         return NULL;
    721     func = PyObject_GetAttrString(mod, function);
    722     Py_DECREF(mod);
    723     if (!func)
    724         return NULL;
    725     result = PyObject_CallObject(func, args);
    726     Py_DECREF(func);
    727     Py_DECREF(args);
    728     return result;
    729 }
    730 
    731 /*[clinic input]
    732 _sre.SRE_Pattern.findall
    733 
    734     string: object
    735     pos: Py_ssize_t = 0
    736     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    737 
    738 Return a list of all non-overlapping matches of pattern in string.
    739 [clinic start generated code]*/
    740 
    741 static PyObject *
    742 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
    743                               Py_ssize_t pos, Py_ssize_t endpos)
    744 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
    745 {
    746     SRE_STATE state;
    747     PyObject* list;
    748     Py_ssize_t status;
    749     Py_ssize_t i, b, e;
    750 
    751     if (!state_init(&state, self, string, pos, endpos))
    752         return NULL;
    753 
    754     list = PyList_New(0);
    755     if (!list) {
    756         state_fini(&state);
    757         return NULL;
    758     }
    759 
    760     while (state.start <= state.end) {
    761 
    762         PyObject* item;
    763 
    764         state_reset(&state);
    765 
    766         state.ptr = state.start;
    767 
    768         status = sre_search(&state, PatternObject_GetCode(self));
    769         if (PyErr_Occurred())
    770             goto error;
    771 
    772         if (status <= 0) {
    773             if (status == 0)
    774                 break;
    775             pattern_error(status);
    776             goto error;
    777         }
    778 
    779         /* don't bother to build a match object */
    780         switch (self->groups) {
    781         case 0:
    782             b = STATE_OFFSET(&state, state.start);
    783             e = STATE_OFFSET(&state, state.ptr);
    784             item = getslice(state.isbytes, state.beginning,
    785                             string, b, e);
    786             if (!item)
    787                 goto error;
    788             break;
    789         case 1:
    790             item = state_getslice(&state, 1, string, 1);
    791             if (!item)
    792                 goto error;
    793             break;
    794         default:
    795             item = PyTuple_New(self->groups);
    796             if (!item)
    797                 goto error;
    798             for (i = 0; i < self->groups; i++) {
    799                 PyObject* o = state_getslice(&state, i+1, string, 1);
    800                 if (!o) {
    801                     Py_DECREF(item);
    802                     goto error;
    803                 }
    804                 PyTuple_SET_ITEM(item, i, o);
    805             }
    806             break;
    807         }
    808 
    809         status = PyList_Append(list, item);
    810         Py_DECREF(item);
    811         if (status < 0)
    812             goto error;
    813 
    814         state.must_advance = (state.ptr == state.start);
    815         state.start = state.ptr;
    816     }
    817 
    818     state_fini(&state);
    819     return list;
    820 
    821 error:
    822     Py_DECREF(list);
    823     state_fini(&state);
    824     return NULL;
    825 
    826 }
    827 
    828 /*[clinic input]
    829 _sre.SRE_Pattern.finditer
    830 
    831     string: object
    832     pos: Py_ssize_t = 0
    833     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    834 
    835 Return an iterator over all non-overlapping matches for the RE pattern in string.
    836 
    837 For each match, the iterator returns a match object.
    838 [clinic start generated code]*/
    839 
    840 static PyObject *
    841 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
    842                                Py_ssize_t pos, Py_ssize_t endpos)
    843 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
    844 {
    845     PyObject* scanner;
    846     PyObject* search;
    847     PyObject* iterator;
    848 
    849     scanner = pattern_scanner(self, string, pos, endpos);
    850     if (!scanner)
    851         return NULL;
    852 
    853     search = PyObject_GetAttrString(scanner, "search");
    854     Py_DECREF(scanner);
    855     if (!search)
    856         return NULL;
    857 
    858     iterator = PyCallIter_New(search, Py_None);
    859     Py_DECREF(search);
    860 
    861     return iterator;
    862 }
    863 
    864 /*[clinic input]
    865 _sre.SRE_Pattern.scanner
    866 
    867     string: object
    868     pos: Py_ssize_t = 0
    869     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    870 
    871 [clinic start generated code]*/
    872 
    873 static PyObject *
    874 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
    875                               Py_ssize_t pos, Py_ssize_t endpos)
    876 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
    877 {
    878     return pattern_scanner(self, string, pos, endpos);
    879 }
    880 
    881 /*[clinic input]
    882 _sre.SRE_Pattern.split
    883 
    884     string: object
    885     maxsplit: Py_ssize_t = 0
    886 
    887 Split string by the occurrences of pattern.
    888 [clinic start generated code]*/
    889 
    890 static PyObject *
    891 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
    892                             Py_ssize_t maxsplit)
    893 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
    894 {
    895     SRE_STATE state;
    896     PyObject* list;
    897     PyObject* item;
    898     Py_ssize_t status;
    899     Py_ssize_t n;
    900     Py_ssize_t i;
    901     void* last;
    902 
    903     assert(self->codesize != 0);
    904 
    905     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
    906         return NULL;
    907 
    908     list = PyList_New(0);
    909     if (!list) {
    910         state_fini(&state);
    911         return NULL;
    912     }
    913 
    914     n = 0;
    915     last = state.start;
    916 
    917     while (!maxsplit || n < maxsplit) {
    918 
    919         state_reset(&state);
    920 
    921         state.ptr = state.start;
    922 
    923         status = sre_search(&state, PatternObject_GetCode(self));
    924         if (PyErr_Occurred())
    925             goto error;
    926 
    927         if (status <= 0) {
    928             if (status == 0)
    929                 break;
    930             pattern_error(status);
    931             goto error;
    932         }
    933 
    934         /* get segment before this match */
    935         item = getslice(state.isbytes, state.beginning,
    936             string, STATE_OFFSET(&state, last),
    937             STATE_OFFSET(&state, state.start)
    938             );
    939         if (!item)
    940             goto error;
    941         status = PyList_Append(list, item);
    942         Py_DECREF(item);
    943         if (status < 0)
    944             goto error;
    945 
    946         /* add groups (if any) */
    947         for (i = 0; i < self->groups; i++) {
    948             item = state_getslice(&state, i+1, string, 0);
    949             if (!item)
    950                 goto error;
    951             status = PyList_Append(list, item);
    952             Py_DECREF(item);
    953             if (status < 0)
    954                 goto error;
    955         }
    956 
    957         n = n + 1;
    958         state.must_advance = (state.ptr == state.start);
    959         last = state.start = state.ptr;
    960 
    961     }
    962 
    963     /* get segment following last match (even if empty) */
    964     item = getslice(state.isbytes, state.beginning,
    965         string, STATE_OFFSET(&state, last), state.endpos
    966         );
    967     if (!item)
    968         goto error;
    969     status = PyList_Append(list, item);
    970     Py_DECREF(item);
    971     if (status < 0)
    972         goto error;
    973 
    974     state_fini(&state);
    975     return list;
    976 
    977 error:
    978     Py_DECREF(list);
    979     state_fini(&state);
    980     return NULL;
    981 
    982 }
    983 
    984 static PyObject*
    985 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
    986              Py_ssize_t count, Py_ssize_t subn)
    987 {
    988     SRE_STATE state;
    989     PyObject* list;
    990     PyObject* joiner;
    991     PyObject* item;
    992     PyObject* filter;
    993     PyObject* match;
    994     void* ptr;
    995     Py_ssize_t status;
    996     Py_ssize_t n;
    997     Py_ssize_t i, b, e;
    998     int isbytes, charsize;
    999     int filter_is_callable;
   1000     Py_buffer view;
   1001 
   1002     if (PyCallable_Check(ptemplate)) {
   1003         /* sub/subn takes either a function or a template */
   1004         filter = ptemplate;
   1005         Py_INCREF(filter);
   1006         filter_is_callable = 1;
   1007     } else {
   1008         /* if not callable, check if it's a literal string */
   1009         int literal;
   1010         view.buf = NULL;
   1011         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
   1012         b = charsize;
   1013         if (ptr) {
   1014             if (charsize == 1)
   1015                 literal = memchr(ptr, '\\', n) == NULL;
   1016             else
   1017                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
   1018         } else {
   1019             PyErr_Clear();
   1020             literal = 0;
   1021         }
   1022         if (view.buf)
   1023             PyBuffer_Release(&view);
   1024         if (literal) {
   1025             filter = ptemplate;
   1026             Py_INCREF(filter);
   1027             filter_is_callable = 0;
   1028         } else {
   1029             /* not a literal; hand it over to the template compiler */
   1030             filter = call(
   1031                 SRE_PY_MODULE, "_subx",
   1032                 PyTuple_Pack(2, self, ptemplate)
   1033                 );
   1034             if (!filter)
   1035                 return NULL;
   1036             filter_is_callable = PyCallable_Check(filter);
   1037         }
   1038     }
   1039 
   1040     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
   1041         Py_DECREF(filter);
   1042         return NULL;
   1043     }
   1044 
   1045     list = PyList_New(0);
   1046     if (!list) {
   1047         Py_DECREF(filter);
   1048         state_fini(&state);
   1049         return NULL;
   1050     }
   1051 
   1052     n = i = 0;
   1053 
   1054     while (!count || n < count) {
   1055 
   1056         state_reset(&state);
   1057 
   1058         state.ptr = state.start;
   1059 
   1060         status = sre_search(&state, PatternObject_GetCode(self));
   1061         if (PyErr_Occurred())
   1062             goto error;
   1063 
   1064         if (status <= 0) {
   1065             if (status == 0)
   1066                 break;
   1067             pattern_error(status);
   1068             goto error;
   1069         }
   1070 
   1071         b = STATE_OFFSET(&state, state.start);
   1072         e = STATE_OFFSET(&state, state.ptr);
   1073 
   1074         if (i < b) {
   1075             /* get segment before this match */
   1076             item = getslice(state.isbytes, state.beginning,
   1077                 string, i, b);
   1078             if (!item)
   1079                 goto error;
   1080             status = PyList_Append(list, item);
   1081             Py_DECREF(item);
   1082             if (status < 0)
   1083                 goto error;
   1084 
   1085         }
   1086 
   1087         if (filter_is_callable) {
   1088             /* pass match object through filter */
   1089             match = pattern_new_match(self, &state, 1);
   1090             if (!match)
   1091                 goto error;
   1092             item = PyObject_CallFunctionObjArgs(filter, match, NULL);
   1093             Py_DECREF(match);
   1094             if (!item)
   1095                 goto error;
   1096         } else {
   1097             /* filter is literal string */
   1098             item = filter;
   1099             Py_INCREF(item);
   1100         }
   1101 
   1102         /* add to list */
   1103         if (item != Py_None) {
   1104             status = PyList_Append(list, item);
   1105             Py_DECREF(item);
   1106             if (status < 0)
   1107                 goto error;
   1108         }
   1109 
   1110         i = e;
   1111         n = n + 1;
   1112         state.must_advance = (state.ptr == state.start);
   1113         state.start = state.ptr;
   1114     }
   1115 
   1116     /* get segment following last match */
   1117     if (i < state.endpos) {
   1118         item = getslice(state.isbytes, state.beginning,
   1119                         string, i, state.endpos);
   1120         if (!item)
   1121             goto error;
   1122         status = PyList_Append(list, item);
   1123         Py_DECREF(item);
   1124         if (status < 0)
   1125             goto error;
   1126     }
   1127 
   1128     state_fini(&state);
   1129 
   1130     Py_DECREF(filter);
   1131 
   1132     /* convert list to single string (also removes list) */
   1133     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
   1134     if (!joiner) {
   1135         Py_DECREF(list);
   1136         return NULL;
   1137     }
   1138     if (PyList_GET_SIZE(list) == 0) {
   1139         Py_DECREF(list);
   1140         item = joiner;
   1141     }
   1142     else {
   1143         if (state.isbytes)
   1144             item = _PyBytes_Join(joiner, list);
   1145         else
   1146             item = PyUnicode_Join(joiner, list);
   1147         Py_DECREF(joiner);
   1148         Py_DECREF(list);
   1149         if (!item)
   1150             return NULL;
   1151     }
   1152 
   1153     if (subn)
   1154         return Py_BuildValue("Nn", item, n);
   1155 
   1156     return item;
   1157 
   1158 error:
   1159     Py_DECREF(list);
   1160     state_fini(&state);
   1161     Py_DECREF(filter);
   1162     return NULL;
   1163 
   1164 }
   1165 
   1166 /*[clinic input]
   1167 _sre.SRE_Pattern.sub
   1168 
   1169     repl: object
   1170     string: object
   1171     count: Py_ssize_t = 0
   1172 
   1173 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
   1174 [clinic start generated code]*/
   1175 
   1176 static PyObject *
   1177 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
   1178                           PyObject *string, Py_ssize_t count)
   1179 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
   1180 {
   1181     return pattern_subx(self, repl, string, count, 0);
   1182 }
   1183 
   1184 /*[clinic input]
   1185 _sre.SRE_Pattern.subn
   1186 
   1187     repl: object
   1188     string: object
   1189     count: Py_ssize_t = 0
   1190 
   1191 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
   1192 [clinic start generated code]*/
   1193 
   1194 static PyObject *
   1195 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
   1196                            PyObject *string, Py_ssize_t count)
   1197 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
   1198 {
   1199     return pattern_subx(self, repl, string, count, 1);
   1200 }
   1201 
   1202 /*[clinic input]
   1203 _sre.SRE_Pattern.__copy__
   1204 
   1205 [clinic start generated code]*/
   1206 
   1207 static PyObject *
   1208 _sre_SRE_Pattern___copy___impl(PatternObject *self)
   1209 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
   1210 {
   1211     Py_INCREF(self);
   1212     return (PyObject *)self;
   1213 }
   1214 
   1215 /*[clinic input]
   1216 _sre.SRE_Pattern.__deepcopy__
   1217 
   1218     memo: object
   1219     /
   1220 
   1221 [clinic start generated code]*/
   1222 
   1223 static PyObject *
   1224 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
   1225 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
   1226 {
   1227     Py_INCREF(self);
   1228     return (PyObject *)self;
   1229 }
   1230 
   1231 static PyObject *
   1232 pattern_repr(PatternObject *obj)
   1233 {
   1234     static const struct {
   1235         const char *name;
   1236         int value;
   1237     } flag_names[] = {
   1238         {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
   1239         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
   1240         {"re.LOCALE", SRE_FLAG_LOCALE},
   1241         {"re.MULTILINE", SRE_FLAG_MULTILINE},
   1242         {"re.DOTALL", SRE_FLAG_DOTALL},
   1243         {"re.UNICODE", SRE_FLAG_UNICODE},
   1244         {"re.VERBOSE", SRE_FLAG_VERBOSE},
   1245         {"re.DEBUG", SRE_FLAG_DEBUG},
   1246         {"re.ASCII", SRE_FLAG_ASCII},
   1247     };
   1248     PyObject *result = NULL;
   1249     PyObject *flag_items;
   1250     size_t i;
   1251     int flags = obj->flags;
   1252 
   1253     /* Omit re.UNICODE for valid string patterns. */
   1254     if (obj->isbytes == 0 &&
   1255         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
   1256          SRE_FLAG_UNICODE)
   1257         flags &= ~SRE_FLAG_UNICODE;
   1258 
   1259     flag_items = PyList_New(0);
   1260     if (!flag_items)
   1261         return NULL;
   1262 
   1263     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
   1264         if (flags & flag_names[i].value) {
   1265             PyObject *item = PyUnicode_FromString(flag_names[i].name);
   1266             if (!item)
   1267                 goto done;
   1268 
   1269             if (PyList_Append(flag_items, item) < 0) {
   1270                 Py_DECREF(item);
   1271                 goto done;
   1272             }
   1273             Py_DECREF(item);
   1274             flags &= ~flag_names[i].value;
   1275         }
   1276     }
   1277     if (flags) {
   1278         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
   1279         if (!item)
   1280             goto done;
   1281 
   1282         if (PyList_Append(flag_items, item) < 0) {
   1283             Py_DECREF(item);
   1284             goto done;
   1285         }
   1286         Py_DECREF(item);
   1287     }
   1288 
   1289     if (PyList_Size(flag_items) > 0) {
   1290         PyObject *flags_result;
   1291         PyObject *sep = PyUnicode_FromString("|");
   1292         if (!sep)
   1293             goto done;
   1294         flags_result = PyUnicode_Join(sep, flag_items);
   1295         Py_DECREF(sep);
   1296         if (!flags_result)
   1297             goto done;
   1298         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
   1299                                       obj->pattern, flags_result);
   1300         Py_DECREF(flags_result);
   1301     }
   1302     else {
   1303         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
   1304     }
   1305 
   1306 done:
   1307     Py_DECREF(flag_items);
   1308     return result;
   1309 }
   1310 
   1311 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
   1312 
   1313 /* PatternObject's 'groupindex' method. */
   1314 static PyObject *
   1315 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
   1316 {
   1317     if (self->groupindex == NULL)
   1318         return PyDict_New();
   1319     return PyDictProxy_New(self->groupindex);
   1320 }
   1321 
   1322 static int _validate(PatternObject *self); /* Forward */
   1323 
   1324 /*[clinic input]
   1325 _sre.compile
   1326 
   1327     pattern: object
   1328     flags: int
   1329     code: object(subclass_of='&PyList_Type')
   1330     groups: Py_ssize_t
   1331     groupindex: object(subclass_of='&PyDict_Type')
   1332     indexgroup: object(subclass_of='&PyTuple_Type')
   1333 
   1334 [clinic start generated code]*/
   1335 
   1336 static PyObject *
   1337 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
   1338                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
   1339                   PyObject *indexgroup)
   1340 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
   1341 {
   1342     /* "compile" pattern descriptor to pattern object */
   1343 
   1344     PatternObject* self;
   1345     Py_ssize_t i, n;
   1346 
   1347     n = PyList_GET_SIZE(code);
   1348     /* coverity[ampersand_in_size] */
   1349     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
   1350     if (!self)
   1351         return NULL;
   1352     self->weakreflist = NULL;
   1353     self->pattern = NULL;
   1354     self->groupindex = NULL;
   1355     self->indexgroup = NULL;
   1356 
   1357     self->codesize = n;
   1358 
   1359     for (i = 0; i < n; i++) {
   1360         PyObject *o = PyList_GET_ITEM(code, i);
   1361         unsigned long value = PyLong_AsUnsignedLong(o);
   1362         self->code[i] = (SRE_CODE) value;
   1363         if ((unsigned long) self->code[i] != value) {
   1364             PyErr_SetString(PyExc_OverflowError,
   1365                             "regular expression code size limit exceeded");
   1366             break;
   1367         }
   1368     }
   1369 
   1370     if (PyErr_Occurred()) {
   1371         Py_DECREF(self);
   1372         return NULL;
   1373     }
   1374 
   1375     if (pattern == Py_None) {
   1376         self->isbytes = -1;
   1377     }
   1378     else {
   1379         Py_ssize_t p_length;
   1380         int charsize;
   1381         Py_buffer view;
   1382         view.buf = NULL;
   1383         if (!getstring(pattern, &p_length, &self->isbytes,
   1384                        &charsize, &view)) {
   1385             Py_DECREF(self);
   1386             return NULL;
   1387         }
   1388         if (view.buf)
   1389             PyBuffer_Release(&view);
   1390     }
   1391 
   1392     Py_INCREF(pattern);
   1393     self->pattern = pattern;
   1394 
   1395     self->flags = flags;
   1396 
   1397     self->groups = groups;
   1398 
   1399     if (PyDict_GET_SIZE(groupindex) > 0) {
   1400         Py_INCREF(groupindex);
   1401         self->groupindex = groupindex;
   1402         if (PyTuple_GET_SIZE(indexgroup) > 0) {
   1403             Py_INCREF(indexgroup);
   1404             self->indexgroup = indexgroup;
   1405         }
   1406     }
   1407 
   1408     if (!_validate(self)) {
   1409         Py_DECREF(self);
   1410         return NULL;
   1411     }
   1412 
   1413     return (PyObject*) self;
   1414 }
   1415 
   1416 /* -------------------------------------------------------------------- */
   1417 /* Code validation */
   1418 
   1419 /* To learn more about this code, have a look at the _compile() function in
   1420    Lib/sre_compile.py.  The validation functions below checks the code array
   1421    for conformance with the code patterns generated there.
   1422 
   1423    The nice thing about the generated code is that it is position-independent:
   1424    all jumps are relative jumps forward.  Also, jumps don't cross each other:
   1425    the target of a later jump is always earlier than the target of an earlier
   1426    jump.  IOW, this is okay:
   1427 
   1428    J---------J-------T--------T
   1429     \         \_____/        /
   1430      \______________________/
   1431 
   1432    but this is not:
   1433 
   1434    J---------J-------T--------T
   1435     \_________\_____/        /
   1436                \____________/
   1437 
   1438    It also helps that SRE_CODE is always an unsigned type.
   1439 */
   1440 
   1441 /* Defining this one enables tracing of the validator */
   1442 #undef VVERBOSE
   1443 
   1444 /* Trace macro for the validator */
   1445 #if defined(VVERBOSE)
   1446 #define VTRACE(v) printf v
   1447 #else
   1448 #define VTRACE(v) do {} while(0)  /* do nothing */
   1449 #endif
   1450 
   1451 /* Report failure */
   1452 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
   1453 
   1454 /* Extract opcode, argument, or skip count from code array */
   1455 #define GET_OP                                          \
   1456     do {                                                \
   1457         VTRACE(("%p: ", code));                         \
   1458         if (code >= end) FAIL;                          \
   1459         op = *code++;                                   \
   1460         VTRACE(("%lu (op)\n", (unsigned long)op));      \
   1461     } while (0)
   1462 #define GET_ARG                                         \
   1463     do {                                                \
   1464         VTRACE(("%p= ", code));                         \
   1465         if (code >= end) FAIL;                          \
   1466         arg = *code++;                                  \
   1467         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
   1468     } while (0)
   1469 #define GET_SKIP_ADJ(adj)                               \
   1470     do {                                                \
   1471         VTRACE(("%p= ", code));                         \
   1472         if (code >= end) FAIL;                          \
   1473         skip = *code;                                   \
   1474         VTRACE(("%lu (skip to %p)\n",                   \
   1475                (unsigned long)skip, code+skip));        \
   1476         if (skip-adj > (uintptr_t)(end - code))      \
   1477             FAIL;                                       \
   1478         code++;                                         \
   1479     } while (0)
   1480 #define GET_SKIP GET_SKIP_ADJ(0)
   1481 
   1482 static int
   1483 _validate_charset(SRE_CODE *code, SRE_CODE *end)
   1484 {
   1485     /* Some variables are manipulated by the macros above */
   1486     SRE_CODE op;
   1487     SRE_CODE arg;
   1488     SRE_CODE offset;
   1489     int i;
   1490 
   1491     while (code < end) {
   1492         GET_OP;
   1493         switch (op) {
   1494 
   1495         case SRE_OP_NEGATE:
   1496             break;
   1497 
   1498         case SRE_OP_LITERAL:
   1499             GET_ARG;
   1500             break;
   1501 
   1502         case SRE_OP_RANGE:
   1503         case SRE_OP_RANGE_UNI_IGNORE:
   1504             GET_ARG;
   1505             GET_ARG;
   1506             break;
   1507 
   1508         case SRE_OP_CHARSET:
   1509             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
   1510             if (offset > (uintptr_t)(end - code))
   1511                 FAIL;
   1512             code += offset;
   1513             break;
   1514 
   1515         case SRE_OP_BIGCHARSET:
   1516             GET_ARG; /* Number of blocks */
   1517             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
   1518             if (offset > (uintptr_t)(end - code))
   1519                 FAIL;
   1520             /* Make sure that each byte points to a valid block */
   1521             for (i = 0; i < 256; i++) {
   1522                 if (((unsigned char *)code)[i] >= arg)
   1523                     FAIL;
   1524             }
   1525             code += offset;
   1526             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
   1527             if (offset > (uintptr_t)(end - code))
   1528                 FAIL;
   1529             code += offset;
   1530             break;
   1531 
   1532         case SRE_OP_CATEGORY:
   1533             GET_ARG;
   1534             switch (arg) {
   1535             case SRE_CATEGORY_DIGIT:
   1536             case SRE_CATEGORY_NOT_DIGIT:
   1537             case SRE_CATEGORY_SPACE:
   1538             case SRE_CATEGORY_NOT_SPACE:
   1539             case SRE_CATEGORY_WORD:
   1540             case SRE_CATEGORY_NOT_WORD:
   1541             case SRE_CATEGORY_LINEBREAK:
   1542             case SRE_CATEGORY_NOT_LINEBREAK:
   1543             case SRE_CATEGORY_LOC_WORD:
   1544             case SRE_CATEGORY_LOC_NOT_WORD:
   1545             case SRE_CATEGORY_UNI_DIGIT:
   1546             case SRE_CATEGORY_UNI_NOT_DIGIT:
   1547             case SRE_CATEGORY_UNI_SPACE:
   1548             case SRE_CATEGORY_UNI_NOT_SPACE:
   1549             case SRE_CATEGORY_UNI_WORD:
   1550             case SRE_CATEGORY_UNI_NOT_WORD:
   1551             case SRE_CATEGORY_UNI_LINEBREAK:
   1552             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
   1553                 break;
   1554             default:
   1555                 FAIL;
   1556             }
   1557             break;
   1558 
   1559         default:
   1560             FAIL;
   1561 
   1562         }
   1563     }
   1564 
   1565     return 1;
   1566 }
   1567 
   1568 static int
   1569 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   1570 {
   1571     /* Some variables are manipulated by the macros above */
   1572     SRE_CODE op;
   1573     SRE_CODE arg;
   1574     SRE_CODE skip;
   1575 
   1576     VTRACE(("code=%p, end=%p\n", code, end));
   1577 
   1578     if (code > end)
   1579         FAIL;
   1580 
   1581     while (code < end) {
   1582         GET_OP;
   1583         switch (op) {
   1584 
   1585         case SRE_OP_MARK:
   1586             /* We don't check whether marks are properly nested; the
   1587                sre_match() code is robust even if they don't, and the worst
   1588                you can get is nonsensical match results. */
   1589             GET_ARG;
   1590             if (arg > 2 * (size_t)groups + 1) {
   1591                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
   1592                 FAIL;
   1593             }
   1594             break;
   1595 
   1596         case SRE_OP_LITERAL:
   1597         case SRE_OP_NOT_LITERAL:
   1598         case SRE_OP_LITERAL_IGNORE:
   1599         case SRE_OP_NOT_LITERAL_IGNORE:
   1600         case SRE_OP_LITERAL_UNI_IGNORE:
   1601         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
   1602         case SRE_OP_LITERAL_LOC_IGNORE:
   1603         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
   1604             GET_ARG;
   1605             /* The arg is just a character, nothing to check */
   1606             break;
   1607 
   1608         case SRE_OP_SUCCESS:
   1609         case SRE_OP_FAILURE:
   1610             /* Nothing to check; these normally end the matching process */
   1611             break;
   1612 
   1613         case SRE_OP_AT:
   1614             GET_ARG;
   1615             switch (arg) {
   1616             case SRE_AT_BEGINNING:
   1617             case SRE_AT_BEGINNING_STRING:
   1618             case SRE_AT_BEGINNING_LINE:
   1619             case SRE_AT_END:
   1620             case SRE_AT_END_LINE:
   1621             case SRE_AT_END_STRING:
   1622             case SRE_AT_BOUNDARY:
   1623             case SRE_AT_NON_BOUNDARY:
   1624             case SRE_AT_LOC_BOUNDARY:
   1625             case SRE_AT_LOC_NON_BOUNDARY:
   1626             case SRE_AT_UNI_BOUNDARY:
   1627             case SRE_AT_UNI_NON_BOUNDARY:
   1628                 break;
   1629             default:
   1630                 FAIL;
   1631             }
   1632             break;
   1633 
   1634         case SRE_OP_ANY:
   1635         case SRE_OP_ANY_ALL:
   1636             /* These have no operands */
   1637             break;
   1638 
   1639         case SRE_OP_IN:
   1640         case SRE_OP_IN_IGNORE:
   1641         case SRE_OP_IN_UNI_IGNORE:
   1642         case SRE_OP_IN_LOC_IGNORE:
   1643             GET_SKIP;
   1644             /* Stop 1 before the end; we check the FAILURE below */
   1645             if (!_validate_charset(code, code+skip-2))
   1646                 FAIL;
   1647             if (code[skip-2] != SRE_OP_FAILURE)
   1648                 FAIL;
   1649             code += skip-1;
   1650             break;
   1651 
   1652         case SRE_OP_INFO:
   1653             {
   1654                 /* A minimal info field is
   1655                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
   1656                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
   1657                    more follows. */
   1658                 SRE_CODE flags, i;
   1659                 SRE_CODE *newcode;
   1660                 GET_SKIP;
   1661                 newcode = code+skip-1;
   1662                 GET_ARG; flags = arg;
   1663                 GET_ARG;
   1664                 GET_ARG;
   1665                 /* Check that only valid flags are present */
   1666                 if ((flags & ~(SRE_INFO_PREFIX |
   1667                                SRE_INFO_LITERAL |
   1668                                SRE_INFO_CHARSET)) != 0)
   1669                     FAIL;
   1670                 /* PREFIX and CHARSET are mutually exclusive */
   1671                 if ((flags & SRE_INFO_PREFIX) &&
   1672                     (flags & SRE_INFO_CHARSET))
   1673                     FAIL;
   1674                 /* LITERAL implies PREFIX */
   1675                 if ((flags & SRE_INFO_LITERAL) &&
   1676                     !(flags & SRE_INFO_PREFIX))
   1677                     FAIL;
   1678                 /* Validate the prefix */
   1679                 if (flags & SRE_INFO_PREFIX) {
   1680                     SRE_CODE prefix_len;
   1681                     GET_ARG; prefix_len = arg;
   1682                     GET_ARG;
   1683                     /* Here comes the prefix string */
   1684                     if (prefix_len > (uintptr_t)(newcode - code))
   1685                         FAIL;
   1686                     code += prefix_len;
   1687                     /* And here comes the overlap table */
   1688                     if (prefix_len > (uintptr_t)(newcode - code))
   1689                         FAIL;
   1690                     /* Each overlap value should be < prefix_len */
   1691                     for (i = 0; i < prefix_len; i++) {
   1692                         if (code[i] >= prefix_len)
   1693                             FAIL;
   1694                     }
   1695                     code += prefix_len;
   1696                 }
   1697                 /* Validate the charset */
   1698                 if (flags & SRE_INFO_CHARSET) {
   1699                     if (!_validate_charset(code, newcode-1))
   1700                         FAIL;
   1701                     if (newcode[-1] != SRE_OP_FAILURE)
   1702                         FAIL;
   1703                     code = newcode;
   1704                 }
   1705                 else if (code != newcode) {
   1706                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
   1707                     FAIL;
   1708                 }
   1709             }
   1710             break;
   1711 
   1712         case SRE_OP_BRANCH:
   1713             {
   1714                 SRE_CODE *target = NULL;
   1715                 for (;;) {
   1716                     GET_SKIP;
   1717                     if (skip == 0)
   1718                         break;
   1719                     /* Stop 2 before the end; we check the JUMP below */
   1720                     if (!_validate_inner(code, code+skip-3, groups))
   1721                         FAIL;
   1722                     code += skip-3;
   1723                     /* Check that it ends with a JUMP, and that each JUMP
   1724                        has the same target */
   1725                     GET_OP;
   1726                     if (op != SRE_OP_JUMP)
   1727                         FAIL;
   1728                     GET_SKIP;
   1729                     if (target == NULL)
   1730                         target = code+skip-1;
   1731                     else if (code+skip-1 != target)
   1732                         FAIL;
   1733                 }
   1734             }
   1735             break;
   1736 
   1737         case SRE_OP_REPEAT_ONE:
   1738         case SRE_OP_MIN_REPEAT_ONE:
   1739             {
   1740                 SRE_CODE min, max;
   1741                 GET_SKIP;
   1742                 GET_ARG; min = arg;
   1743                 GET_ARG; max = arg;
   1744                 if (min > max)
   1745                     FAIL;
   1746                 if (max > SRE_MAXREPEAT)
   1747                     FAIL;
   1748                 if (!_validate_inner(code, code+skip-4, groups))
   1749                     FAIL;
   1750                 code += skip-4;
   1751                 GET_OP;
   1752                 if (op != SRE_OP_SUCCESS)
   1753                     FAIL;
   1754             }
   1755             break;
   1756 
   1757         case SRE_OP_REPEAT:
   1758             {
   1759                 SRE_CODE min, max;
   1760                 GET_SKIP;
   1761                 GET_ARG; min = arg;
   1762                 GET_ARG; max = arg;
   1763                 if (min > max)
   1764                     FAIL;
   1765                 if (max > SRE_MAXREPEAT)
   1766                     FAIL;
   1767                 if (!_validate_inner(code, code+skip-3, groups))
   1768                     FAIL;
   1769                 code += skip-3;
   1770                 GET_OP;
   1771                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
   1772                     FAIL;
   1773             }
   1774             break;
   1775 
   1776         case SRE_OP_GROUPREF:
   1777         case SRE_OP_GROUPREF_IGNORE:
   1778         case SRE_OP_GROUPREF_UNI_IGNORE:
   1779         case SRE_OP_GROUPREF_LOC_IGNORE:
   1780             GET_ARG;
   1781             if (arg >= (size_t)groups)
   1782                 FAIL;
   1783             break;
   1784 
   1785         case SRE_OP_GROUPREF_EXISTS:
   1786             /* The regex syntax for this is: '(?(group)then|else)', where
   1787                'group' is either an integer group number or a group name,
   1788                'then' and 'else' are sub-regexes, and 'else' is optional. */
   1789             GET_ARG;
   1790             if (arg >= (size_t)groups)
   1791                 FAIL;
   1792             GET_SKIP_ADJ(1);
   1793             code--; /* The skip is relative to the first arg! */
   1794             /* There are two possibilities here: if there is both a 'then'
   1795                part and an 'else' part, the generated code looks like:
   1796 
   1797                GROUPREF_EXISTS
   1798                <group>
   1799                <skipyes>
   1800                ...then part...
   1801                JUMP
   1802                <skipno>
   1803                (<skipyes> jumps here)
   1804                ...else part...
   1805                (<skipno> jumps here)
   1806 
   1807                If there is only a 'then' part, it looks like:
   1808 
   1809                GROUPREF_EXISTS
   1810                <group>
   1811                <skip>
   1812                ...then part...
   1813                (<skip> jumps here)
   1814 
   1815                There is no direct way to decide which it is, and we don't want
   1816                to allow arbitrary jumps anywhere in the code; so we just look
   1817                for a JUMP opcode preceding our skip target.
   1818             */
   1819             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
   1820                 code[skip-3] == SRE_OP_JUMP)
   1821             {
   1822                 VTRACE(("both then and else parts present\n"));
   1823                 if (!_validate_inner(code+1, code+skip-3, groups))
   1824                     FAIL;
   1825                 code += skip-2; /* Position after JUMP, at <skipno> */
   1826                 GET_SKIP;
   1827                 if (!_validate_inner(code, code+skip-1, groups))
   1828                     FAIL;
   1829                 code += skip-1;
   1830             }
   1831             else {
   1832                 VTRACE(("only a then part present\n"));
   1833                 if (!_validate_inner(code+1, code+skip-1, groups))
   1834                     FAIL;
   1835                 code += skip-1;
   1836             }
   1837             break;
   1838 
   1839         case SRE_OP_ASSERT:
   1840         case SRE_OP_ASSERT_NOT:
   1841             GET_SKIP;
   1842             GET_ARG; /* 0 for lookahead, width for lookbehind */
   1843             code--; /* Back up over arg to simplify math below */
   1844             if (arg & 0x80000000)
   1845                 FAIL; /* Width too large */
   1846             /* Stop 1 before the end; we check the SUCCESS below */
   1847             if (!_validate_inner(code+1, code+skip-2, groups))
   1848                 FAIL;
   1849             code += skip-2;
   1850             GET_OP;
   1851             if (op != SRE_OP_SUCCESS)
   1852                 FAIL;
   1853             break;
   1854 
   1855         default:
   1856             FAIL;
   1857 
   1858         }
   1859     }
   1860 
   1861     VTRACE(("okay\n"));
   1862     return 1;
   1863 }
   1864 
   1865 static int
   1866 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   1867 {
   1868     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
   1869         code >= end || end[-1] != SRE_OP_SUCCESS)
   1870         FAIL;
   1871     return _validate_inner(code, end-1, groups);
   1872 }
   1873 
   1874 static int
   1875 _validate(PatternObject *self)
   1876 {
   1877     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
   1878     {
   1879         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
   1880         return 0;
   1881     }
   1882     else
   1883         VTRACE(("Success!\n"));
   1884     return 1;
   1885 }
   1886 
   1887 /* -------------------------------------------------------------------- */
   1888 /* match methods */
   1889 
   1890 static void
   1891 match_dealloc(MatchObject* self)
   1892 {
   1893     Py_XDECREF(self->regs);
   1894     Py_XDECREF(self->string);
   1895     Py_DECREF(self->pattern);
   1896     PyObject_DEL(self);
   1897 }
   1898 
   1899 static PyObject*
   1900 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
   1901 {
   1902     Py_ssize_t length;
   1903     int isbytes, charsize;
   1904     Py_buffer view;
   1905     PyObject *result;
   1906     void* ptr;
   1907     Py_ssize_t i, j;
   1908 
   1909     if (index < 0 || index >= self->groups) {
   1910         /* raise IndexError if we were given a bad group number */
   1911         PyErr_SetString(
   1912             PyExc_IndexError,
   1913             "no such group"
   1914             );
   1915         return NULL;
   1916     }
   1917 
   1918     index *= 2;
   1919 
   1920     if (self->string == Py_None || self->mark[index] < 0) {
   1921         /* return default value if the string or group is undefined */
   1922         Py_INCREF(def);
   1923         return def;
   1924     }
   1925 
   1926     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
   1927     if (ptr == NULL)
   1928         return NULL;
   1929 
   1930     i = self->mark[index];
   1931     j = self->mark[index+1];
   1932     i = Py_MIN(i, length);
   1933     j = Py_MIN(j, length);
   1934     result = getslice(isbytes, ptr, self->string, i, j);
   1935     if (isbytes && view.buf != NULL)
   1936         PyBuffer_Release(&view);
   1937     return result;
   1938 }
   1939 
   1940 static Py_ssize_t
   1941 match_getindex(MatchObject* self, PyObject* index)
   1942 {
   1943     Py_ssize_t i;
   1944 
   1945     if (index == NULL)
   1946         /* Default value */
   1947         return 0;
   1948 
   1949     if (PyIndex_Check(index)) {
   1950         return PyNumber_AsSsize_t(index, NULL);
   1951     }
   1952 
   1953     i = -1;
   1954 
   1955     if (self->pattern->groupindex) {
   1956         index = PyDict_GetItem(self->pattern->groupindex, index);
   1957         if (index && PyLong_Check(index)) {
   1958             i = PyLong_AsSsize_t(index);
   1959         }
   1960     }
   1961 
   1962     return i;
   1963 }
   1964 
   1965 static PyObject*
   1966 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
   1967 {
   1968     return match_getslice_by_index(self, match_getindex(self, index), def);
   1969 }
   1970 
   1971 /*[clinic input]
   1972 _sre.SRE_Match.expand
   1973 
   1974     template: object
   1975 
   1976 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
   1977 [clinic start generated code]*/
   1978 
   1979 static PyObject *
   1980 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
   1981 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
   1982 {
   1983     /* delegate to Python code */
   1984     return call(
   1985         SRE_PY_MODULE, "_expand",
   1986         PyTuple_Pack(3, self->pattern, self, template)
   1987         );
   1988 }
   1989 
   1990 static PyObject*
   1991 match_group(MatchObject* self, PyObject* args)
   1992 {
   1993     PyObject* result;
   1994     Py_ssize_t i, size;
   1995 
   1996     size = PyTuple_GET_SIZE(args);
   1997 
   1998     switch (size) {
   1999     case 0:
   2000         result = match_getslice(self, _PyLong_Zero, Py_None);
   2001         break;
   2002     case 1:
   2003         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
   2004         break;
   2005     default:
   2006         /* fetch multiple items */
   2007         result = PyTuple_New(size);
   2008         if (!result)
   2009             return NULL;
   2010         for (i = 0; i < size; i++) {
   2011             PyObject* item = match_getslice(
   2012                 self, PyTuple_GET_ITEM(args, i), Py_None
   2013                 );
   2014             if (!item) {
   2015                 Py_DECREF(result);
   2016                 return NULL;
   2017             }
   2018             PyTuple_SET_ITEM(result, i, item);
   2019         }
   2020         break;
   2021     }
   2022     return result;
   2023 }
   2024 
   2025 static PyObject*
   2026 match_getitem(MatchObject* self, PyObject* name)
   2027 {
   2028     return match_getslice(self, name, Py_None);
   2029 }
   2030 
   2031 /*[clinic input]
   2032 _sre.SRE_Match.groups
   2033 
   2034     default: object = None
   2035         Is used for groups that did not participate in the match.
   2036 
   2037 Return a tuple containing all the subgroups of the match, from 1.
   2038 [clinic start generated code]*/
   2039 
   2040 static PyObject *
   2041 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
   2042 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
   2043 {
   2044     PyObject* result;
   2045     Py_ssize_t index;
   2046 
   2047     result = PyTuple_New(self->groups-1);
   2048     if (!result)
   2049         return NULL;
   2050 
   2051     for (index = 1; index < self->groups; index++) {
   2052         PyObject* item;
   2053         item = match_getslice_by_index(self, index, default_value);
   2054         if (!item) {
   2055             Py_DECREF(result);
   2056             return NULL;
   2057         }
   2058         PyTuple_SET_ITEM(result, index-1, item);
   2059     }
   2060 
   2061     return result;
   2062 }
   2063 
   2064 /*[clinic input]
   2065 _sre.SRE_Match.groupdict
   2066 
   2067     default: object = None
   2068         Is used for groups that did not participate in the match.
   2069 
   2070 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
   2071 [clinic start generated code]*/
   2072 
   2073 static PyObject *
   2074 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
   2075 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
   2076 {
   2077     PyObject *result;
   2078     PyObject *key;
   2079     PyObject *value;
   2080     Py_ssize_t pos = 0;
   2081     Py_hash_t hash;
   2082 
   2083     result = PyDict_New();
   2084     if (!result || !self->pattern->groupindex)
   2085         return result;
   2086 
   2087     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
   2088         int status;
   2089         Py_INCREF(key);
   2090         value = match_getslice(self, key, default_value);
   2091         if (!value) {
   2092             Py_DECREF(key);
   2093             goto failed;
   2094         }
   2095         status = _PyDict_SetItem_KnownHash(result, key, value, hash);
   2096         Py_DECREF(value);
   2097         Py_DECREF(key);
   2098         if (status < 0)
   2099             goto failed;
   2100     }
   2101 
   2102     return result;
   2103 
   2104 failed:
   2105     Py_DECREF(result);
   2106     return NULL;
   2107 }
   2108 
   2109 /*[clinic input]
   2110 _sre.SRE_Match.start -> Py_ssize_t
   2111 
   2112     group: object(c_default="NULL") = 0
   2113     /
   2114 
   2115 Return index of the start of the substring matched by group.
   2116 [clinic start generated code]*/
   2117 
   2118 static Py_ssize_t
   2119 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
   2120 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
   2121 {
   2122     Py_ssize_t index = match_getindex(self, group);
   2123 
   2124     if (index < 0 || index >= self->groups) {
   2125         PyErr_SetString(
   2126             PyExc_IndexError,
   2127             "no such group"
   2128             );
   2129         return -1;
   2130     }
   2131 
   2132     /* mark is -1 if group is undefined */
   2133     return self->mark[index*2];
   2134 }
   2135 
   2136 /*[clinic input]
   2137 _sre.SRE_Match.end -> Py_ssize_t
   2138 
   2139     group: object(c_default="NULL") = 0
   2140     /
   2141 
   2142 Return index of the end of the substring matched by group.
   2143 [clinic start generated code]*/
   2144 
   2145 static Py_ssize_t
   2146 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
   2147 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
   2148 {
   2149     Py_ssize_t index = match_getindex(self, group);
   2150 
   2151     if (index < 0 || index >= self->groups) {
   2152         PyErr_SetString(
   2153             PyExc_IndexError,
   2154             "no such group"
   2155             );
   2156         return -1;
   2157     }
   2158 
   2159     /* mark is -1 if group is undefined */
   2160     return self->mark[index*2+1];
   2161 }
   2162 
   2163 LOCAL(PyObject*)
   2164 _pair(Py_ssize_t i1, Py_ssize_t i2)
   2165 {
   2166     PyObject* pair;
   2167     PyObject* item;
   2168 
   2169     pair = PyTuple_New(2);
   2170     if (!pair)
   2171         return NULL;
   2172 
   2173     item = PyLong_FromSsize_t(i1);
   2174     if (!item)
   2175         goto error;
   2176     PyTuple_SET_ITEM(pair, 0, item);
   2177 
   2178     item = PyLong_FromSsize_t(i2);
   2179     if (!item)
   2180         goto error;
   2181     PyTuple_SET_ITEM(pair, 1, item);
   2182 
   2183     return pair;
   2184 
   2185   error:
   2186     Py_DECREF(pair);
   2187     return NULL;
   2188 }
   2189 
   2190 /*[clinic input]
   2191 _sre.SRE_Match.span
   2192 
   2193     group: object(c_default="NULL") = 0
   2194     /
   2195 
   2196 For match object m, return the 2-tuple (m.start(group), m.end(group)).
   2197 [clinic start generated code]*/
   2198 
   2199 static PyObject *
   2200 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
   2201 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
   2202 {
   2203     Py_ssize_t index = match_getindex(self, group);
   2204 
   2205     if (index < 0 || index >= self->groups) {
   2206         PyErr_SetString(
   2207             PyExc_IndexError,
   2208             "no such group"
   2209             );
   2210         return NULL;
   2211     }
   2212 
   2213     /* marks are -1 if group is undefined */
   2214     return _pair(self->mark[index*2], self->mark[index*2+1]);
   2215 }
   2216 
   2217 static PyObject*
   2218 match_regs(MatchObject* self)
   2219 {
   2220     PyObject* regs;
   2221     PyObject* item;
   2222     Py_ssize_t index;
   2223 
   2224     regs = PyTuple_New(self->groups);
   2225     if (!regs)
   2226         return NULL;
   2227 
   2228     for (index = 0; index < self->groups; index++) {
   2229         item = _pair(self->mark[index*2], self->mark[index*2+1]);
   2230         if (!item) {
   2231             Py_DECREF(regs);
   2232             return NULL;
   2233         }
   2234         PyTuple_SET_ITEM(regs, index, item);
   2235     }
   2236 
   2237     Py_INCREF(regs);
   2238     self->regs = regs;
   2239 
   2240     return regs;
   2241 }
   2242 
   2243 /*[clinic input]
   2244 _sre.SRE_Match.__copy__
   2245 
   2246 [clinic start generated code]*/
   2247 
   2248 static PyObject *
   2249 _sre_SRE_Match___copy___impl(MatchObject *self)
   2250 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
   2251 {
   2252     Py_INCREF(self);
   2253     return (PyObject *)self;
   2254 }
   2255 
   2256 /*[clinic input]
   2257 _sre.SRE_Match.__deepcopy__
   2258 
   2259     memo: object
   2260     /
   2261 
   2262 [clinic start generated code]*/
   2263 
   2264 static PyObject *
   2265 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
   2266 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
   2267 {
   2268     Py_INCREF(self);
   2269     return (PyObject *)self;
   2270 }
   2271 
   2272 PyDoc_STRVAR(match_doc,
   2273 "The result of re.match() and re.search().\n\
   2274 Match objects always have a boolean value of True.");
   2275 
   2276 PyDoc_STRVAR(match_group_doc,
   2277 "group([group1, ...]) -> str or tuple.\n\
   2278     Return subgroup(s) of the match by indices or names.\n\
   2279     For 0 returns the entire match.");
   2280 
   2281 static PyObject *
   2282 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
   2283 {
   2284     if (self->lastindex >= 0)
   2285         return PyLong_FromSsize_t(self->lastindex);
   2286     Py_RETURN_NONE;
   2287 }
   2288 
   2289 static PyObject *
   2290 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
   2291 {
   2292     if (self->pattern->indexgroup &&
   2293         self->lastindex >= 0 &&
   2294         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
   2295     {
   2296         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
   2297                                             self->lastindex);
   2298         Py_INCREF(result);
   2299         return result;
   2300     }
   2301     Py_RETURN_NONE;
   2302 }
   2303 
   2304 static PyObject *
   2305 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
   2306 {
   2307     if (self->regs) {
   2308         Py_INCREF(self->regs);
   2309         return self->regs;
   2310     } else
   2311         return match_regs(self);
   2312 }
   2313 
   2314 static PyObject *
   2315 match_repr(MatchObject *self)
   2316 {
   2317     PyObject *result;
   2318     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
   2319     if (group0 == NULL)
   2320         return NULL;
   2321     result = PyUnicode_FromFormat(
   2322             "<%s object; span=(%zd, %zd), match=%.50R>",
   2323             Py_TYPE(self)->tp_name,
   2324             self->mark[0], self->mark[1], group0);
   2325     Py_DECREF(group0);
   2326     return result;
   2327 }
   2328 
   2329 
   2330 static PyObject*
   2331 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
   2332 {
   2333     /* create match object (from state object) */
   2334 
   2335     MatchObject* match;
   2336     Py_ssize_t i, j;
   2337     char* base;
   2338     int n;
   2339 
   2340     if (status > 0) {
   2341 
   2342         /* create match object (with room for extra group marks) */
   2343         /* coverity[ampersand_in_size] */
   2344         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
   2345                                  2*(pattern->groups+1));
   2346         if (!match)
   2347             return NULL;
   2348 
   2349         Py_INCREF(pattern);
   2350         match->pattern = pattern;
   2351 
   2352         Py_INCREF(state->string);
   2353         match->string = state->string;
   2354 
   2355         match->regs = NULL;
   2356         match->groups = pattern->groups+1;
   2357 
   2358         /* fill in group slices */
   2359 
   2360         base = (char*) state->beginning;
   2361         n = state->charsize;
   2362 
   2363         match->mark[0] = ((char*) state->start - base) / n;
   2364         match->mark[1] = ((char*) state->ptr - base) / n;
   2365 
   2366         for (i = j = 0; i < pattern->groups; i++, j+=2)
   2367             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
   2368                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
   2369                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
   2370             } else
   2371                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
   2372 
   2373         match->pos = state->pos;
   2374         match->endpos = state->endpos;
   2375 
   2376         match->lastindex = state->lastindex;
   2377 
   2378         return (PyObject*) match;
   2379 
   2380     } else if (status == 0) {
   2381 
   2382         /* no match */
   2383         Py_RETURN_NONE;
   2384 
   2385     }
   2386 
   2387     /* internal error */
   2388     pattern_error(status);
   2389     return NULL;
   2390 }
   2391 
   2392 
   2393 /* -------------------------------------------------------------------- */
   2394 /* scanner methods (experimental) */
   2395 
   2396 static void
   2397 scanner_dealloc(ScannerObject* self)
   2398 {
   2399     state_fini(&self->state);
   2400     Py_XDECREF(self->pattern);
   2401     PyObject_DEL(self);
   2402 }
   2403 
   2404 /*[clinic input]
   2405 _sre.SRE_Scanner.match
   2406 
   2407 [clinic start generated code]*/
   2408 
   2409 static PyObject *
   2410 _sre_SRE_Scanner_match_impl(ScannerObject *self)
   2411 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
   2412 {
   2413     SRE_STATE* state = &self->state;
   2414     PyObject* match;
   2415     Py_ssize_t status;
   2416 
   2417     if (state->start == NULL)
   2418         Py_RETURN_NONE;
   2419 
   2420     state_reset(state);
   2421 
   2422     state->ptr = state->start;
   2423 
   2424     status = sre_match(state, PatternObject_GetCode(self->pattern));
   2425     if (PyErr_Occurred())
   2426         return NULL;
   2427 
   2428     match = pattern_new_match((PatternObject*) self->pattern,
   2429                                state, status);
   2430 
   2431     if (status == 0)
   2432         state->start = NULL;
   2433     else {
   2434         state->must_advance = (state->ptr == state->start);
   2435         state->start = state->ptr;
   2436     }
   2437 
   2438     return match;
   2439 }
   2440 
   2441 
   2442 /*[clinic input]
   2443 _sre.SRE_Scanner.search
   2444 
   2445 [clinic start generated code]*/
   2446 
   2447 static PyObject *
   2448 _sre_SRE_Scanner_search_impl(ScannerObject *self)
   2449 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
   2450 {
   2451     SRE_STATE* state = &self->state;
   2452     PyObject* match;
   2453     Py_ssize_t status;
   2454 
   2455     if (state->start == NULL)
   2456         Py_RETURN_NONE;
   2457 
   2458     state_reset(state);
   2459 
   2460     state->ptr = state->start;
   2461 
   2462     status = sre_search(state, PatternObject_GetCode(self->pattern));
   2463     if (PyErr_Occurred())
   2464         return NULL;
   2465 
   2466     match = pattern_new_match((PatternObject*) self->pattern,
   2467                                state, status);
   2468 
   2469     if (status == 0)
   2470         state->start = NULL;
   2471     else {
   2472         state->must_advance = (state->ptr == state->start);
   2473         state->start = state->ptr;
   2474     }
   2475 
   2476     return match;
   2477 }
   2478 
   2479 static PyObject *
   2480 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
   2481 {
   2482     ScannerObject* scanner;
   2483 
   2484     /* create scanner object */
   2485     scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
   2486     if (!scanner)
   2487         return NULL;
   2488     scanner->pattern = NULL;
   2489 
   2490     /* create search state object */
   2491     if (!state_init(&scanner->state, self, string, pos, endpos)) {
   2492         Py_DECREF(scanner);
   2493         return NULL;
   2494     }
   2495 
   2496     Py_INCREF(self);
   2497     scanner->pattern = (PyObject*) self;
   2498 
   2499     return (PyObject*) scanner;
   2500 }
   2501 
   2502 static Py_hash_t
   2503 pattern_hash(PatternObject *self)
   2504 {
   2505     Py_hash_t hash, hash2;
   2506 
   2507     hash = PyObject_Hash(self->pattern);
   2508     if (hash == -1) {
   2509         return -1;
   2510     }
   2511 
   2512     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
   2513     hash ^= hash2;
   2514 
   2515     hash ^= self->flags;
   2516     hash ^= self->isbytes;
   2517     hash ^= self->codesize;
   2518 
   2519     if (hash == -1) {
   2520         hash = -2;
   2521     }
   2522     return hash;
   2523 }
   2524 
   2525 static PyObject*
   2526 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
   2527 {
   2528     PatternObject *left, *right;
   2529     int cmp;
   2530 
   2531     if (op != Py_EQ && op != Py_NE) {
   2532         Py_RETURN_NOTIMPLEMENTED;
   2533     }
   2534 
   2535     if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
   2536         Py_RETURN_NOTIMPLEMENTED;
   2537     }
   2538 
   2539     if (lefto == righto) {
   2540         /* a pattern is equal to itself */
   2541         return PyBool_FromLong(op == Py_EQ);
   2542     }
   2543 
   2544     left = (PatternObject *)lefto;
   2545     right = (PatternObject *)righto;
   2546 
   2547     cmp = (left->flags == right->flags
   2548            && left->isbytes == right->isbytes
   2549            && left->codesize == right->codesize);
   2550     if (cmp) {
   2551         /* Compare the code and the pattern because the same pattern can
   2552            produce different codes depending on the locale used to compile the
   2553            pattern when the re.LOCALE flag is used. Don't compare groups,
   2554            indexgroup nor groupindex: they are derivated from the pattern. */
   2555         cmp = (memcmp(left->code, right->code,
   2556                       sizeof(left->code[0]) * left->codesize) == 0);
   2557     }
   2558     if (cmp) {
   2559         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
   2560                                        Py_EQ);
   2561         if (cmp < 0) {
   2562             return NULL;
   2563         }
   2564     }
   2565     if (op == Py_NE) {
   2566         cmp = !cmp;
   2567     }
   2568     return PyBool_FromLong(cmp);
   2569 }
   2570 
   2571 #include "clinic/_sre.c.h"
   2572 
   2573 static PyMethodDef pattern_methods[] = {
   2574     _SRE_SRE_PATTERN_MATCH_METHODDEF
   2575     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
   2576     _SRE_SRE_PATTERN_SEARCH_METHODDEF
   2577     _SRE_SRE_PATTERN_SUB_METHODDEF
   2578     _SRE_SRE_PATTERN_SUBN_METHODDEF
   2579     _SRE_SRE_PATTERN_FINDALL_METHODDEF
   2580     _SRE_SRE_PATTERN_SPLIT_METHODDEF
   2581     _SRE_SRE_PATTERN_FINDITER_METHODDEF
   2582     _SRE_SRE_PATTERN_SCANNER_METHODDEF
   2583     _SRE_SRE_PATTERN___COPY___METHODDEF
   2584     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
   2585     {NULL, NULL}
   2586 };
   2587 
   2588 static PyGetSetDef pattern_getset[] = {
   2589     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
   2590       "A dictionary mapping group names to group numbers."},
   2591     {NULL}  /* Sentinel */
   2592 };
   2593 
   2594 #define PAT_OFF(x) offsetof(PatternObject, x)
   2595 static PyMemberDef pattern_members[] = {
   2596     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
   2597      "The pattern string from which the RE object was compiled."},
   2598     {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
   2599      "The regex matching flags."},
   2600     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
   2601      "The number of capturing groups in the pattern."},
   2602     {NULL}  /* Sentinel */
   2603 };
   2604 
   2605 static PyTypeObject Pattern_Type = {
   2606     PyVarObject_HEAD_INIT(NULL, 0)
   2607     "re.Pattern",
   2608     sizeof(PatternObject), sizeof(SRE_CODE),
   2609     (destructor)pattern_dealloc,        /* tp_dealloc */
   2610     0,                                  /* tp_print */
   2611     0,                                  /* tp_getattr */
   2612     0,                                  /* tp_setattr */
   2613     0,                                  /* tp_reserved */
   2614     (reprfunc)pattern_repr,             /* tp_repr */
   2615     0,                                  /* tp_as_number */
   2616     0,                                  /* tp_as_sequence */
   2617     0,                                  /* tp_as_mapping */
   2618     (hashfunc)pattern_hash,             /* tp_hash */
   2619     0,                                  /* tp_call */
   2620     0,                                  /* tp_str */
   2621     0,                                  /* tp_getattro */
   2622     0,                                  /* tp_setattro */
   2623     0,                                  /* tp_as_buffer */
   2624     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   2625     pattern_doc,                        /* tp_doc */
   2626     0,                                  /* tp_traverse */
   2627     0,                                  /* tp_clear */
   2628     pattern_richcompare,                /* tp_richcompare */
   2629     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
   2630     0,                                  /* tp_iter */
   2631     0,                                  /* tp_iternext */
   2632     pattern_methods,                    /* tp_methods */
   2633     pattern_members,                    /* tp_members */
   2634     pattern_getset,                     /* tp_getset */
   2635 };
   2636 
   2637 /* Match objects do not support length or assignment, but do support
   2638    __getitem__. */
   2639 static PyMappingMethods match_as_mapping = {
   2640     NULL,
   2641     (binaryfunc)match_getitem,
   2642     NULL
   2643 };
   2644 
   2645 static PyMethodDef match_methods[] = {
   2646     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
   2647     _SRE_SRE_MATCH_START_METHODDEF
   2648     _SRE_SRE_MATCH_END_METHODDEF
   2649     _SRE_SRE_MATCH_SPAN_METHODDEF
   2650     _SRE_SRE_MATCH_GROUPS_METHODDEF
   2651     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
   2652     _SRE_SRE_MATCH_EXPAND_METHODDEF
   2653     _SRE_SRE_MATCH___COPY___METHODDEF
   2654     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
   2655     {NULL, NULL}
   2656 };
   2657 
   2658 static PyGetSetDef match_getset[] = {
   2659     {"lastindex", (getter)match_lastindex_get, (setter)NULL,
   2660      "The integer index of the last matched capturing group."},
   2661     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
   2662      "The name of the last matched capturing group."},
   2663     {"regs",      (getter)match_regs_get,      (setter)NULL},
   2664     {NULL}
   2665 };
   2666 
   2667 #define MATCH_OFF(x) offsetof(MatchObject, x)
   2668 static PyMemberDef match_members[] = {
   2669     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
   2670      "The string passed to match() or search()."},
   2671     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
   2672      "The regular expression object."},
   2673     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
   2674      "The index into the string at which the RE engine started looking for a match."},
   2675     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
   2676      "The index into the string beyond which the RE engine will not go."},
   2677     {NULL}
   2678 };
   2679 
   2680 /* FIXME: implement setattr("string", None) as a special case (to
   2681    detach the associated string, if any */
   2682 
   2683 static PyTypeObject Match_Type = {
   2684     PyVarObject_HEAD_INIT(NULL,0)
   2685     "re.Match",
   2686     sizeof(MatchObject), sizeof(Py_ssize_t),
   2687     (destructor)match_dealloc,  /* tp_dealloc */
   2688     0,                          /* tp_print */
   2689     0,                          /* tp_getattr */
   2690     0,                          /* tp_setattr */
   2691     0,                          /* tp_reserved */
   2692     (reprfunc)match_repr,       /* tp_repr */
   2693     0,                          /* tp_as_number */
   2694     0,                          /* tp_as_sequence */
   2695     &match_as_mapping,          /* tp_as_mapping */
   2696     0,                          /* tp_hash */
   2697     0,                          /* tp_call */
   2698     0,                          /* tp_str */
   2699     0,                          /* tp_getattro */
   2700     0,                          /* tp_setattro */
   2701     0,                          /* tp_as_buffer */
   2702     Py_TPFLAGS_DEFAULT,         /* tp_flags */
   2703     match_doc,                  /* tp_doc */
   2704     0,                          /* tp_traverse */
   2705     0,                          /* tp_clear */
   2706     0,                          /* tp_richcompare */
   2707     0,                          /* tp_weaklistoffset */
   2708     0,                          /* tp_iter */
   2709     0,                          /* tp_iternext */
   2710     match_methods,              /* tp_methods */
   2711     match_members,              /* tp_members */
   2712     match_getset,               /* tp_getset */
   2713 };
   2714 
   2715 static PyMethodDef scanner_methods[] = {
   2716     _SRE_SRE_SCANNER_MATCH_METHODDEF
   2717     _SRE_SRE_SCANNER_SEARCH_METHODDEF
   2718     {NULL, NULL}
   2719 };
   2720 
   2721 #define SCAN_OFF(x) offsetof(ScannerObject, x)
   2722 static PyMemberDef scanner_members[] = {
   2723     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
   2724     {NULL}  /* Sentinel */
   2725 };
   2726 
   2727 static PyTypeObject Scanner_Type = {
   2728     PyVarObject_HEAD_INIT(NULL, 0)
   2729     "_" SRE_MODULE ".SRE_Scanner",
   2730     sizeof(ScannerObject), 0,
   2731     (destructor)scanner_dealloc,/* tp_dealloc */
   2732     0,                          /* tp_print */
   2733     0,                          /* tp_getattr */
   2734     0,                          /* tp_setattr */
   2735     0,                          /* tp_reserved */
   2736     0,                          /* tp_repr */
   2737     0,                          /* tp_as_number */
   2738     0,                          /* tp_as_sequence */
   2739     0,                          /* tp_as_mapping */
   2740     0,                          /* tp_hash */
   2741     0,                          /* tp_call */
   2742     0,                          /* tp_str */
   2743     0,                          /* tp_getattro */
   2744     0,                          /* tp_setattro */
   2745     0,                          /* tp_as_buffer */
   2746     Py_TPFLAGS_DEFAULT,         /* tp_flags */
   2747     0,                          /* tp_doc */
   2748     0,                          /* tp_traverse */
   2749     0,                          /* tp_clear */
   2750     0,                          /* tp_richcompare */
   2751     0,                          /* tp_weaklistoffset */
   2752     0,                          /* tp_iter */
   2753     0,                          /* tp_iternext */
   2754     scanner_methods,            /* tp_methods */
   2755     scanner_members,            /* tp_members */
   2756     0,                          /* tp_getset */
   2757 };
   2758 
   2759 static PyMethodDef _functions[] = {
   2760     _SRE_COMPILE_METHODDEF
   2761     _SRE_GETCODESIZE_METHODDEF
   2762     _SRE_ASCII_ISCASED_METHODDEF
   2763     _SRE_UNICODE_ISCASED_METHODDEF
   2764     _SRE_ASCII_TOLOWER_METHODDEF
   2765     _SRE_UNICODE_TOLOWER_METHODDEF
   2766     {NULL, NULL}
   2767 };
   2768 
   2769 static struct PyModuleDef sremodule = {
   2770         PyModuleDef_HEAD_INIT,
   2771         "_" SRE_MODULE,
   2772         NULL,
   2773         -1,
   2774         _functions,
   2775         NULL,
   2776         NULL,
   2777         NULL,
   2778         NULL
   2779 };
   2780 
   2781 PyMODINIT_FUNC PyInit__sre(void)
   2782 {
   2783     PyObject* m;
   2784     PyObject* d;
   2785     PyObject* x;
   2786 
   2787     /* Patch object types */
   2788     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
   2789         PyType_Ready(&Scanner_Type))
   2790         return NULL;
   2791 
   2792     m = PyModule_Create(&sremodule);
   2793     if (m == NULL)
   2794         return NULL;
   2795     d = PyModule_GetDict(m);
   2796 
   2797     x = PyLong_FromLong(SRE_MAGIC);
   2798     if (x) {
   2799         PyDict_SetItemString(d, "MAGIC", x);
   2800         Py_DECREF(x);
   2801     }
   2802 
   2803     x = PyLong_FromLong(sizeof(SRE_CODE));
   2804     if (x) {
   2805         PyDict_SetItemString(d, "CODESIZE", x);
   2806         Py_DECREF(x);
   2807     }
   2808 
   2809     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
   2810     if (x) {
   2811         PyDict_SetItemString(d, "MAXREPEAT", x);
   2812         Py_DECREF(x);
   2813     }
   2814 
   2815     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
   2816     if (x) {
   2817         PyDict_SetItemString(d, "MAXGROUPS", x);
   2818         Py_DECREF(x);
   2819     }
   2820 
   2821     x = PyUnicode_FromString(copyright);
   2822     if (x) {
   2823         PyDict_SetItemString(d, "copyright", x);
   2824         Py_DECREF(x);
   2825     }
   2826     return m;
   2827 }
   2828 
   2829 /* vim:ts=4:sw=4:et
   2830 */
   2831