Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5  *
      6  * partial history:
      7  * 1999-10-24 fl   created (based on existing template matcher code)
      8  * 2000-03-06 fl   first alpha, sort of
      9  * 2000-08-01 fl   fixes for 1.6b1
     10  * 2000-08-07 fl   use PyOS_CheckStack() if available
     11  * 2000-09-20 fl   added expand method
     12  * 2001-03-20 fl   lots of fixes for 2.1b2
     13  * 2001-04-15 fl   export copyright as Python attribute, not global
     14  * 2001-04-28 fl   added __copy__ methods (work in progress)
     15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
     16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
     17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
     18  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
     19  * 2001-10-21 fl   added sub/subn primitive
     20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
     21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
     22  * 2002-11-09 fl   fixed empty sub/subn return type
     23  * 2003-04-18 mvl  fully support 4-byte codes
     24  * 2003-10-17 gn   implemented non recursive scheme
     25  * 2013-02-04 mrab added fullmatch primitive
     26  *
     27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
     28  *
     29  * This version of the SRE library can be redistributed under CNRI's
     30  * Python 1.6 license.  For any other use, please contact Secret Labs
     31  * AB (info (at) pythonware.com).
     32  *
     33  * Portions of this engine have been developed in cooperation with
     34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
     35  * other compatibility work.
     36  */
     37 
     38 static const char copyright[] =
     39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
     40 
     41 #define PY_SSIZE_T_CLEAN
     42 
     43 #include "Python.h"
     44 #include "structmember.h" /* offsetof */
     45 
     46 #include "sre.h"
     47 
     48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
     49 
     50 #include <ctype.h>
     51 
     52 /* name of this module, minus the leading underscore */
     53 #if !defined(SRE_MODULE)
     54 #define SRE_MODULE "sre"
     55 #endif
     56 
     57 #define SRE_PY_MODULE "re"
     58 
     59 /* defining this one enables tracing */
     60 #undef VERBOSE
     61 
     62 /* -------------------------------------------------------------------- */
     63 /* optional features */
     64 
     65 /* enables copy/deepcopy handling (work in progress) */
     66 #undef USE_BUILTIN_COPY
     67 
     68 /* -------------------------------------------------------------------- */
     69 
     70 #if defined(_MSC_VER)
     71 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
     72 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
     73 /* fastest possible local call under MSVC */
     74 #define LOCAL(type) static __inline type __fastcall
     75 #elif defined(USE_INLINE)
     76 #define LOCAL(type) static inline type
     77 #else
     78 #define LOCAL(type) static type
     79 #endif
     80 
     81 /* error codes */
     82 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
     83 #define SRE_ERROR_STATE -2 /* illegal state */
     84 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
     85 #define SRE_ERROR_MEMORY -9 /* out of memory */
     86 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
     87 
     88 #if defined(VERBOSE)
     89 #define TRACE(v) printf v
     90 #else
     91 #define TRACE(v)
     92 #endif
     93 
     94 /* -------------------------------------------------------------------- */
     95 /* search engine state */
     96 
     97 #define SRE_IS_DIGIT(ch)\
     98     ((ch) < 128 && Py_ISDIGIT(ch))
     99 #define SRE_IS_SPACE(ch)\
    100     ((ch) < 128 && Py_ISSPACE(ch))
    101 #define SRE_IS_LINEBREAK(ch)\
    102     ((ch) == '\n')
    103 #define SRE_IS_ALNUM(ch)\
    104     ((ch) < 128 && Py_ISALNUM(ch))
    105 #define SRE_IS_WORD(ch)\
    106     ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
    107 
    108 static unsigned int sre_lower(unsigned int ch)
    109 {
    110     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
    111 }
    112 
    113 static unsigned int sre_upper(unsigned int ch)
    114 {
    115     return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
    116 }
    117 
    118 /* locale-specific character predicates */
    119 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
    120  * warnings when c's type supports only numbers < N+1 */
    121 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
    122 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
    123 
    124 static unsigned int sre_lower_locale(unsigned int ch)
    125 {
    126     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
    127 }
    128 
    129 static unsigned int sre_upper_locale(unsigned int ch)
    130 {
    131     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
    132 }
    133 
    134 /* unicode-specific character predicates */
    135 
    136 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
    137 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
    138 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
    139 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
    140 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
    141 
    142 static unsigned int sre_lower_unicode(unsigned int ch)
    143 {
    144     return (unsigned int) Py_UNICODE_TOLOWER(ch);
    145 }
    146 
    147 static unsigned int sre_upper_unicode(unsigned int ch)
    148 {
    149     return (unsigned int) Py_UNICODE_TOUPPER(ch);
    150 }
    151 
    152 LOCAL(int)
    153 sre_category(SRE_CODE category, unsigned int ch)
    154 {
    155     switch (category) {
    156 
    157     case SRE_CATEGORY_DIGIT:
    158         return SRE_IS_DIGIT(ch);
    159     case SRE_CATEGORY_NOT_DIGIT:
    160         return !SRE_IS_DIGIT(ch);
    161     case SRE_CATEGORY_SPACE:
    162         return SRE_IS_SPACE(ch);
    163     case SRE_CATEGORY_NOT_SPACE:
    164         return !SRE_IS_SPACE(ch);
    165     case SRE_CATEGORY_WORD:
    166         return SRE_IS_WORD(ch);
    167     case SRE_CATEGORY_NOT_WORD:
    168         return !SRE_IS_WORD(ch);
    169     case SRE_CATEGORY_LINEBREAK:
    170         return SRE_IS_LINEBREAK(ch);
    171     case SRE_CATEGORY_NOT_LINEBREAK:
    172         return !SRE_IS_LINEBREAK(ch);
    173 
    174     case SRE_CATEGORY_LOC_WORD:
    175         return SRE_LOC_IS_WORD(ch);
    176     case SRE_CATEGORY_LOC_NOT_WORD:
    177         return !SRE_LOC_IS_WORD(ch);
    178 
    179     case SRE_CATEGORY_UNI_DIGIT:
    180         return SRE_UNI_IS_DIGIT(ch);
    181     case SRE_CATEGORY_UNI_NOT_DIGIT:
    182         return !SRE_UNI_IS_DIGIT(ch);
    183     case SRE_CATEGORY_UNI_SPACE:
    184         return SRE_UNI_IS_SPACE(ch);
    185     case SRE_CATEGORY_UNI_NOT_SPACE:
    186         return !SRE_UNI_IS_SPACE(ch);
    187     case SRE_CATEGORY_UNI_WORD:
    188         return SRE_UNI_IS_WORD(ch);
    189     case SRE_CATEGORY_UNI_NOT_WORD:
    190         return !SRE_UNI_IS_WORD(ch);
    191     case SRE_CATEGORY_UNI_LINEBREAK:
    192         return SRE_UNI_IS_LINEBREAK(ch);
    193     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    194         return !SRE_UNI_IS_LINEBREAK(ch);
    195     }
    196     return 0;
    197 }
    198 
    199 /* helpers */
    200 
    201 static void
    202 data_stack_dealloc(SRE_STATE* state)
    203 {
    204     if (state->data_stack) {
    205         PyMem_FREE(state->data_stack);
    206         state->data_stack = NULL;
    207     }
    208     state->data_stack_size = state->data_stack_base = 0;
    209 }
    210 
    211 static int
    212 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
    213 {
    214     Py_ssize_t minsize, cursize;
    215     minsize = state->data_stack_base+size;
    216     cursize = state->data_stack_size;
    217     if (cursize < minsize) {
    218         void* stack;
    219         cursize = minsize+minsize/4+1024;
    220         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
    221         stack = PyMem_REALLOC(state->data_stack, cursize);
    222         if (!stack) {
    223             data_stack_dealloc(state);
    224             return SRE_ERROR_MEMORY;
    225         }
    226         state->data_stack = (char *)stack;
    227         state->data_stack_size = cursize;
    228     }
    229     return 0;
    230 }
    231 
    232 /* generate 8-bit version */
    233 
    234 #define SRE_CHAR Py_UCS1
    235 #define SIZEOF_SRE_CHAR 1
    236 #define SRE(F) sre_ucs1_##F
    237 #include "sre_lib.h"
    238 
    239 /* generate 16-bit unicode version */
    240 
    241 #define SRE_CHAR Py_UCS2
    242 #define SIZEOF_SRE_CHAR 2
    243 #define SRE(F) sre_ucs2_##F
    244 #include "sre_lib.h"
    245 
    246 /* generate 32-bit unicode version */
    247 
    248 #define SRE_CHAR Py_UCS4
    249 #define SIZEOF_SRE_CHAR 4
    250 #define SRE(F) sre_ucs4_##F
    251 #include "sre_lib.h"
    252 
    253 /* -------------------------------------------------------------------- */
    254 /* factories and destructors */
    255 
    256 /* see sre.h for object declarations */
    257 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
    258 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
    259 
    260 
    261 /*[clinic input]
    262 module _sre
    263 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
    264 class _sre.SRE_Match "MatchObject *" "&Match_Type"
    265 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
    266 [clinic start generated code]*/
    267 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
    268 
    269 static PyTypeObject Pattern_Type;
    270 static PyTypeObject Match_Type;
    271 static PyTypeObject Scanner_Type;
    272 
    273 /*[clinic input]
    274 _sre.getcodesize -> int
    275 [clinic start generated code]*/
    276 
    277 static int
    278 _sre_getcodesize_impl(PyObject *module)
    279 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
    280 {
    281     return sizeof(SRE_CODE);
    282 }
    283 
    284 /*[clinic input]
    285 _sre.getlower -> int
    286 
    287     character: int
    288     flags: int
    289     /
    290 
    291 [clinic start generated code]*/
    292 
    293 static int
    294 _sre_getlower_impl(PyObject *module, int character, int flags)
    295 /*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
    296 {
    297     if (flags & SRE_FLAG_LOCALE)
    298         return sre_lower_locale(character);
    299     if (flags & SRE_FLAG_UNICODE)
    300         return sre_lower_unicode(character);
    301     return sre_lower(character);
    302 }
    303 
    304 LOCAL(void)
    305 state_reset(SRE_STATE* state)
    306 {
    307     /* FIXME: dynamic! */
    308     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
    309 
    310     state->lastmark = -1;
    311     state->lastindex = -1;
    312 
    313     state->repeat = NULL;
    314 
    315     data_stack_dealloc(state);
    316 }
    317 
    318 static void*
    319 getstring(PyObject* string, Py_ssize_t* p_length,
    320           int* p_isbytes, int* p_charsize,
    321           Py_buffer *view)
    322 {
    323     /* given a python object, return a data pointer, a length (in
    324        characters), and a character size.  return NULL if the object
    325        is not a string (or not compatible) */
    326 
    327     /* Unicode objects do not support the buffer API. So, get the data
    328        directly instead. */
    329     if (PyUnicode_Check(string)) {
    330         if (PyUnicode_READY(string) == -1)
    331             return NULL;
    332         *p_length = PyUnicode_GET_LENGTH(string);
    333         *p_charsize = PyUnicode_KIND(string);
    334         *p_isbytes = 0;
    335         return PyUnicode_DATA(string);
    336     }
    337 
    338     /* get pointer to byte string buffer */
    339     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
    340         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
    341         return NULL;
    342     }
    343 
    344     *p_length = view->len;
    345     *p_charsize = 1;
    346     *p_isbytes = 1;
    347 
    348     if (view->buf == NULL) {
    349         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
    350         PyBuffer_Release(view);
    351         view->buf = NULL;
    352         return NULL;
    353     }
    354     return view->buf;
    355 }
    356 
    357 LOCAL(PyObject*)
    358 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
    359            Py_ssize_t start, Py_ssize_t end)
    360 {
    361     /* prepare state object */
    362 
    363     Py_ssize_t length;
    364     int isbytes, charsize;
    365     void* ptr;
    366 
    367     memset(state, 0, sizeof(SRE_STATE));
    368 
    369     state->mark = PyMem_New(void *, pattern->groups * 2);
    370     if (!state->mark) {
    371         PyErr_NoMemory();
    372         goto err;
    373     }
    374     state->lastmark = -1;
    375     state->lastindex = -1;
    376 
    377     state->buffer.buf = NULL;
    378     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
    379     if (!ptr)
    380         goto err;
    381 
    382     if (isbytes && pattern->isbytes == 0) {
    383         PyErr_SetString(PyExc_TypeError,
    384                         "cannot use a string pattern on a bytes-like object");
    385         goto err;
    386     }
    387     if (!isbytes && pattern->isbytes > 0) {
    388         PyErr_SetString(PyExc_TypeError,
    389                         "cannot use a bytes pattern on a string-like object");
    390         goto err;
    391     }
    392 
    393     /* adjust boundaries */
    394     if (start < 0)
    395         start = 0;
    396     else if (start > length)
    397         start = length;
    398 
    399     if (end < 0)
    400         end = 0;
    401     else if (end > length)
    402         end = length;
    403 
    404     state->isbytes = isbytes;
    405     state->charsize = charsize;
    406 
    407     state->beginning = ptr;
    408 
    409     state->start = (void*) ((char*) ptr + start * state->charsize);
    410     state->end = (void*) ((char*) ptr + end * state->charsize);
    411 
    412     Py_INCREF(string);
    413     state->string = string;
    414     state->pos = start;
    415     state->endpos = end;
    416 
    417     if (pattern->flags & SRE_FLAG_LOCALE) {
    418         state->lower = sre_lower_locale;
    419         state->upper = sre_upper_locale;
    420     }
    421     else if (pattern->flags & SRE_FLAG_UNICODE) {
    422         state->lower = sre_lower_unicode;
    423         state->upper = sre_upper_unicode;
    424     }
    425     else {
    426         state->lower = sre_lower;
    427         state->upper = sre_upper;
    428     }
    429 
    430     return string;
    431   err:
    432     PyMem_Del(state->mark);
    433     state->mark = NULL;
    434     if (state->buffer.buf)
    435         PyBuffer_Release(&state->buffer);
    436     return NULL;
    437 }
    438 
    439 LOCAL(void)
    440 state_fini(SRE_STATE* state)
    441 {
    442     if (state->buffer.buf)
    443         PyBuffer_Release(&state->buffer);
    444     Py_XDECREF(state->string);
    445     data_stack_dealloc(state);
    446     PyMem_Del(state->mark);
    447     state->mark = NULL;
    448 }
    449 
    450 /* calculate offset from start of string */
    451 #define STATE_OFFSET(state, member)\
    452     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
    453 
    454 LOCAL(PyObject*)
    455 getslice(int isbytes, const void *ptr,
    456          PyObject* string, Py_ssize_t start, Py_ssize_t end)
    457 {
    458     if (isbytes) {
    459         if (PyBytes_CheckExact(string) &&
    460             start == 0 && end == PyBytes_GET_SIZE(string)) {
    461             Py_INCREF(string);
    462             return string;
    463         }
    464         return PyBytes_FromStringAndSize(
    465                 (const char *)ptr + start, end - start);
    466     }
    467     else {
    468         return PyUnicode_Substring(string, start, end);
    469     }
    470 }
    471 
    472 LOCAL(PyObject*)
    473 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
    474 {
    475     Py_ssize_t i, j;
    476 
    477     index = (index - 1) * 2;
    478 
    479     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
    480         if (empty)
    481             /* want empty string */
    482             i = j = 0;
    483         else {
    484             Py_INCREF(Py_None);
    485             return Py_None;
    486         }
    487     } else {
    488         i = STATE_OFFSET(state, state->mark[index]);
    489         j = STATE_OFFSET(state, state->mark[index+1]);
    490     }
    491 
    492     return getslice(state->isbytes, state->beginning, string, i, j);
    493 }
    494 
    495 static void
    496 pattern_error(Py_ssize_t status)
    497 {
    498     switch (status) {
    499     case SRE_ERROR_RECURSION_LIMIT:
    500         /* This error code seems to be unused. */
    501         PyErr_SetString(
    502             PyExc_RecursionError,
    503             "maximum recursion limit exceeded"
    504             );
    505         break;
    506     case SRE_ERROR_MEMORY:
    507         PyErr_NoMemory();
    508         break;
    509     case SRE_ERROR_INTERRUPTED:
    510     /* An exception has already been raised, so let it fly */
    511         break;
    512     default:
    513         /* other error codes indicate compiler/engine bugs */
    514         PyErr_SetString(
    515             PyExc_RuntimeError,
    516             "internal error in regular expression engine"
    517             );
    518     }
    519 }
    520 
    521 static void
    522 pattern_dealloc(PatternObject* self)
    523 {
    524     if (self->weakreflist != NULL)
    525         PyObject_ClearWeakRefs((PyObject *) self);
    526     Py_XDECREF(self->pattern);
    527     Py_XDECREF(self->groupindex);
    528     Py_XDECREF(self->indexgroup);
    529     PyObject_DEL(self);
    530 }
    531 
    532 LOCAL(Py_ssize_t)
    533 sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
    534 {
    535     if (state->charsize == 1)
    536         return sre_ucs1_match(state, pattern, match_all);
    537     if (state->charsize == 2)
    538         return sre_ucs2_match(state, pattern, match_all);
    539     assert(state->charsize == 4);
    540     return sre_ucs4_match(state, pattern, match_all);
    541 }
    542 
    543 LOCAL(Py_ssize_t)
    544 sre_search(SRE_STATE* state, SRE_CODE* pattern)
    545 {
    546     if (state->charsize == 1)
    547         return sre_ucs1_search(state, pattern);
    548     if (state->charsize == 2)
    549         return sre_ucs2_search(state, pattern);
    550     assert(state->charsize == 4);
    551     return sre_ucs4_search(state, pattern);
    552 }
    553 
    554 static PyObject *
    555 fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
    556 {
    557     if (string2 != NULL) {
    558         if (string != NULL) {
    559             PyErr_Format(PyExc_TypeError,
    560                          "Argument given by name ('%s') and position (1)",
    561                          oldname);
    562             return NULL;
    563         }
    564         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
    565                              "The '%s' keyword parameter name is deprecated.  "
    566                              "Use 'string' instead.", oldname) < 0)
    567             return NULL;
    568         return string2;
    569     }
    570     if (string == NULL) {
    571         PyErr_SetString(PyExc_TypeError,
    572                         "Required argument 'string' (pos 1) not found");
    573         return NULL;
    574     }
    575     return string;
    576 }
    577 
    578 /*[clinic input]
    579 _sre.SRE_Pattern.match
    580 
    581     string: object = NULL
    582     pos: Py_ssize_t = 0
    583     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    584     *
    585     pattern: object = NULL
    586 
    587 Matches zero or more characters at the beginning of the string.
    588 [clinic start generated code]*/
    589 
    590 static PyObject *
    591 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
    592                             Py_ssize_t pos, Py_ssize_t endpos,
    593                             PyObject *pattern)
    594 /*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
    595 {
    596     SRE_STATE state;
    597     Py_ssize_t status;
    598     PyObject *match;
    599 
    600     string = fix_string_param(string, pattern, "pattern");
    601     if (!string)
    602         return NULL;
    603     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
    604         return NULL;
    605 
    606     state.ptr = state.start;
    607 
    608     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
    609 
    610     status = sre_match(&state, PatternObject_GetCode(self), 0);
    611 
    612     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    613     if (PyErr_Occurred()) {
    614         state_fini(&state);
    615         return NULL;
    616     }
    617 
    618     match = pattern_new_match(self, &state, status);
    619     state_fini(&state);
    620     return match;
    621 }
    622 
    623 /*[clinic input]
    624 _sre.SRE_Pattern.fullmatch
    625 
    626     string: object = NULL
    627     pos: Py_ssize_t = 0
    628     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    629     *
    630     pattern: object = NULL
    631 
    632 Matches against all of the string
    633 [clinic start generated code]*/
    634 
    635 static PyObject *
    636 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
    637                                 Py_ssize_t pos, Py_ssize_t endpos,
    638                                 PyObject *pattern)
    639 /*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
    640 {
    641     SRE_STATE state;
    642     Py_ssize_t status;
    643     PyObject *match;
    644 
    645     string = fix_string_param(string, pattern, "pattern");
    646     if (!string)
    647         return NULL;
    648 
    649     if (!state_init(&state, self, string, pos, endpos))
    650         return NULL;
    651 
    652     state.ptr = state.start;
    653 
    654     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
    655 
    656     status = sre_match(&state, PatternObject_GetCode(self), 1);
    657 
    658     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    659     if (PyErr_Occurred()) {
    660         state_fini(&state);
    661         return NULL;
    662     }
    663 
    664     match = pattern_new_match(self, &state, status);
    665     state_fini(&state);
    666     return match;
    667 }
    668 
    669 /*[clinic input]
    670 _sre.SRE_Pattern.search
    671 
    672     string: object = NULL
    673     pos: Py_ssize_t = 0
    674     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    675     *
    676     pattern: object = NULL
    677 
    678 Scan through string looking for a match, and return a corresponding match object instance.
    679 
    680 Return None if no position in the string matches.
    681 [clinic start generated code]*/
    682 
    683 static PyObject *
    684 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
    685                              Py_ssize_t pos, Py_ssize_t endpos,
    686                              PyObject *pattern)
    687 /*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
    688 {
    689     SRE_STATE state;
    690     Py_ssize_t status;
    691     PyObject *match;
    692 
    693     string = fix_string_param(string, pattern, "pattern");
    694     if (!string)
    695         return NULL;
    696 
    697     if (!state_init(&state, self, string, pos, endpos))
    698         return NULL;
    699 
    700     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
    701 
    702     status = sre_search(&state, PatternObject_GetCode(self));
    703 
    704     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
    705 
    706     if (PyErr_Occurred()) {
    707         state_fini(&state);
    708         return NULL;
    709     }
    710 
    711     match = pattern_new_match(self, &state, status);
    712     state_fini(&state);
    713     return match;
    714 }
    715 
    716 static PyObject*
    717 call(const char* module, const char* function, PyObject* args)
    718 {
    719     PyObject* name;
    720     PyObject* mod;
    721     PyObject* func;
    722     PyObject* result;
    723 
    724     if (!args)
    725         return NULL;
    726     name = PyUnicode_FromString(module);
    727     if (!name)
    728         return NULL;
    729     mod = PyImport_Import(name);
    730     Py_DECREF(name);
    731     if (!mod)
    732         return NULL;
    733     func = PyObject_GetAttrString(mod, function);
    734     Py_DECREF(mod);
    735     if (!func)
    736         return NULL;
    737     result = PyObject_CallObject(func, args);
    738     Py_DECREF(func);
    739     Py_DECREF(args);
    740     return result;
    741 }
    742 
    743 #ifdef USE_BUILTIN_COPY
    744 static int
    745 deepcopy(PyObject** object, PyObject* memo)
    746 {
    747     PyObject* copy;
    748 
    749     copy = call(
    750         "copy", "deepcopy",
    751         PyTuple_Pack(2, *object, memo)
    752         );
    753     if (!copy)
    754         return 0;
    755 
    756     Py_SETREF(*object, copy);
    757 
    758     return 1; /* success */
    759 }
    760 #endif
    761 
    762 /*[clinic input]
    763 _sre.SRE_Pattern.findall
    764 
    765     string: object = NULL
    766     pos: Py_ssize_t = 0
    767     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    768     *
    769     source: object = NULL
    770 
    771 Return a list of all non-overlapping matches of pattern in string.
    772 [clinic start generated code]*/
    773 
    774 static PyObject *
    775 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
    776                               Py_ssize_t pos, Py_ssize_t endpos,
    777                               PyObject *source)
    778 /*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
    779 {
    780     SRE_STATE state;
    781     PyObject* list;
    782     Py_ssize_t status;
    783     Py_ssize_t i, b, e;
    784 
    785     string = fix_string_param(string, source, "source");
    786     if (!string)
    787         return NULL;
    788 
    789     if (!state_init(&state, self, string, pos, endpos))
    790         return NULL;
    791 
    792     list = PyList_New(0);
    793     if (!list) {
    794         state_fini(&state);
    795         return NULL;
    796     }
    797 
    798     while (state.start <= state.end) {
    799 
    800         PyObject* item;
    801 
    802         state_reset(&state);
    803 
    804         state.ptr = state.start;
    805 
    806         status = sre_search(&state, PatternObject_GetCode(self));
    807         if (PyErr_Occurred())
    808             goto error;
    809 
    810         if (status <= 0) {
    811             if (status == 0)
    812                 break;
    813             pattern_error(status);
    814             goto error;
    815         }
    816 
    817         /* don't bother to build a match object */
    818         switch (self->groups) {
    819         case 0:
    820             b = STATE_OFFSET(&state, state.start);
    821             e = STATE_OFFSET(&state, state.ptr);
    822             item = getslice(state.isbytes, state.beginning,
    823                             string, b, e);
    824             if (!item)
    825                 goto error;
    826             break;
    827         case 1:
    828             item = state_getslice(&state, 1, string, 1);
    829             if (!item)
    830                 goto error;
    831             break;
    832         default:
    833             item = PyTuple_New(self->groups);
    834             if (!item)
    835                 goto error;
    836             for (i = 0; i < self->groups; i++) {
    837                 PyObject* o = state_getslice(&state, i+1, string, 1);
    838                 if (!o) {
    839                     Py_DECREF(item);
    840                     goto error;
    841                 }
    842                 PyTuple_SET_ITEM(item, i, o);
    843             }
    844             break;
    845         }
    846 
    847         status = PyList_Append(list, item);
    848         Py_DECREF(item);
    849         if (status < 0)
    850             goto error;
    851 
    852         if (state.ptr == state.start)
    853             state.start = (void*) ((char*) state.ptr + state.charsize);
    854         else
    855             state.start = state.ptr;
    856 
    857     }
    858 
    859     state_fini(&state);
    860     return list;
    861 
    862 error:
    863     Py_DECREF(list);
    864     state_fini(&state);
    865     return NULL;
    866 
    867 }
    868 
    869 /*[clinic input]
    870 _sre.SRE_Pattern.finditer
    871 
    872     string: object
    873     pos: Py_ssize_t = 0
    874     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    875 
    876 Return an iterator over all non-overlapping matches for the RE pattern in string.
    877 
    878 For each match, the iterator returns a match object.
    879 [clinic start generated code]*/
    880 
    881 static PyObject *
    882 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
    883                                Py_ssize_t pos, Py_ssize_t endpos)
    884 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
    885 {
    886     PyObject* scanner;
    887     PyObject* search;
    888     PyObject* iterator;
    889 
    890     scanner = pattern_scanner(self, string, pos, endpos);
    891     if (!scanner)
    892         return NULL;
    893 
    894     search = PyObject_GetAttrString(scanner, "search");
    895     Py_DECREF(scanner);
    896     if (!search)
    897         return NULL;
    898 
    899     iterator = PyCallIter_New(search, Py_None);
    900     Py_DECREF(search);
    901 
    902     return iterator;
    903 }
    904 
    905 /*[clinic input]
    906 _sre.SRE_Pattern.scanner
    907 
    908     string: object
    909     pos: Py_ssize_t = 0
    910     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
    911 
    912 [clinic start generated code]*/
    913 
    914 static PyObject *
    915 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
    916                               Py_ssize_t pos, Py_ssize_t endpos)
    917 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
    918 {
    919     return pattern_scanner(self, string, pos, endpos);
    920 }
    921 
    922 /*[clinic input]
    923 _sre.SRE_Pattern.split
    924 
    925     string: object = NULL
    926     maxsplit: Py_ssize_t = 0
    927     *
    928     source: object = NULL
    929 
    930 Split string by the occurrences of pattern.
    931 [clinic start generated code]*/
    932 
    933 static PyObject *
    934 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
    935                             Py_ssize_t maxsplit, PyObject *source)
    936 /*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
    937 {
    938     SRE_STATE state;
    939     PyObject* list;
    940     PyObject* item;
    941     Py_ssize_t status;
    942     Py_ssize_t n;
    943     Py_ssize_t i;
    944     void* last;
    945 
    946     string = fix_string_param(string, source, "source");
    947     if (!string)
    948         return NULL;
    949 
    950     assert(self->codesize != 0);
    951     if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
    952         if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
    953             PyErr_SetString(PyExc_ValueError,
    954                             "split() requires a non-empty pattern match.");
    955             return NULL;
    956         }
    957         if (PyErr_WarnEx(PyExc_FutureWarning,
    958                          "split() requires a non-empty pattern match.",
    959                          1) < 0)
    960             return NULL;
    961     }
    962 
    963     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
    964         return NULL;
    965 
    966     list = PyList_New(0);
    967     if (!list) {
    968         state_fini(&state);
    969         return NULL;
    970     }
    971 
    972     n = 0;
    973     last = state.start;
    974 
    975     while (!maxsplit || n < maxsplit) {
    976 
    977         state_reset(&state);
    978 
    979         state.ptr = state.start;
    980 
    981         status = sre_search(&state, PatternObject_GetCode(self));
    982         if (PyErr_Occurred())
    983             goto error;
    984 
    985         if (status <= 0) {
    986             if (status == 0)
    987                 break;
    988             pattern_error(status);
    989             goto error;
    990         }
    991 
    992         if (state.start == state.ptr) {
    993             if (last == state.end || state.ptr == state.end)
    994                 break;
    995             /* skip one character */
    996             state.start = (void*) ((char*) state.ptr + state.charsize);
    997             continue;
    998         }
    999 
   1000         /* get segment before this match */
   1001         item = getslice(state.isbytes, state.beginning,
   1002             string, STATE_OFFSET(&state, last),
   1003             STATE_OFFSET(&state, state.start)
   1004             );
   1005         if (!item)
   1006             goto error;
   1007         status = PyList_Append(list, item);
   1008         Py_DECREF(item);
   1009         if (status < 0)
   1010             goto error;
   1011 
   1012         /* add groups (if any) */
   1013         for (i = 0; i < self->groups; i++) {
   1014             item = state_getslice(&state, i+1, string, 0);
   1015             if (!item)
   1016                 goto error;
   1017             status = PyList_Append(list, item);
   1018             Py_DECREF(item);
   1019             if (status < 0)
   1020                 goto error;
   1021         }
   1022 
   1023         n = n + 1;
   1024 
   1025         last = state.start = state.ptr;
   1026 
   1027     }
   1028 
   1029     /* get segment following last match (even if empty) */
   1030     item = getslice(state.isbytes, state.beginning,
   1031         string, STATE_OFFSET(&state, last), state.endpos
   1032         );
   1033     if (!item)
   1034         goto error;
   1035     status = PyList_Append(list, item);
   1036     Py_DECREF(item);
   1037     if (status < 0)
   1038         goto error;
   1039 
   1040     state_fini(&state);
   1041     return list;
   1042 
   1043 error:
   1044     Py_DECREF(list);
   1045     state_fini(&state);
   1046     return NULL;
   1047 
   1048 }
   1049 
   1050 static PyObject*
   1051 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
   1052              Py_ssize_t count, Py_ssize_t subn)
   1053 {
   1054     SRE_STATE state;
   1055     PyObject* list;
   1056     PyObject* joiner;
   1057     PyObject* item;
   1058     PyObject* filter;
   1059     PyObject* match;
   1060     void* ptr;
   1061     Py_ssize_t status;
   1062     Py_ssize_t n;
   1063     Py_ssize_t i, b, e;
   1064     int isbytes, charsize;
   1065     int filter_is_callable;
   1066     Py_buffer view;
   1067 
   1068     if (PyCallable_Check(ptemplate)) {
   1069         /* sub/subn takes either a function or a template */
   1070         filter = ptemplate;
   1071         Py_INCREF(filter);
   1072         filter_is_callable = 1;
   1073     } else {
   1074         /* if not callable, check if it's a literal string */
   1075         int literal;
   1076         view.buf = NULL;
   1077         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
   1078         b = charsize;
   1079         if (ptr) {
   1080             if (charsize == 1)
   1081                 literal = memchr(ptr, '\\', n) == NULL;
   1082             else
   1083                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
   1084         } else {
   1085             PyErr_Clear();
   1086             literal = 0;
   1087         }
   1088         if (view.buf)
   1089             PyBuffer_Release(&view);
   1090         if (literal) {
   1091             filter = ptemplate;
   1092             Py_INCREF(filter);
   1093             filter_is_callable = 0;
   1094         } else {
   1095             /* not a literal; hand it over to the template compiler */
   1096             filter = call(
   1097                 SRE_PY_MODULE, "_subx",
   1098                 PyTuple_Pack(2, self, ptemplate)
   1099                 );
   1100             if (!filter)
   1101                 return NULL;
   1102             filter_is_callable = PyCallable_Check(filter);
   1103         }
   1104     }
   1105 
   1106     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
   1107         Py_DECREF(filter);
   1108         return NULL;
   1109     }
   1110 
   1111     list = PyList_New(0);
   1112     if (!list) {
   1113         Py_DECREF(filter);
   1114         state_fini(&state);
   1115         return NULL;
   1116     }
   1117 
   1118     n = i = 0;
   1119 
   1120     while (!count || n < count) {
   1121 
   1122         state_reset(&state);
   1123 
   1124         state.ptr = state.start;
   1125 
   1126         status = sre_search(&state, PatternObject_GetCode(self));
   1127         if (PyErr_Occurred())
   1128             goto error;
   1129 
   1130         if (status <= 0) {
   1131             if (status == 0)
   1132                 break;
   1133             pattern_error(status);
   1134             goto error;
   1135         }
   1136 
   1137         b = STATE_OFFSET(&state, state.start);
   1138         e = STATE_OFFSET(&state, state.ptr);
   1139 
   1140         if (i < b) {
   1141             /* get segment before this match */
   1142             item = getslice(state.isbytes, state.beginning,
   1143                 string, i, b);
   1144             if (!item)
   1145                 goto error;
   1146             status = PyList_Append(list, item);
   1147             Py_DECREF(item);
   1148             if (status < 0)
   1149                 goto error;
   1150 
   1151         } else if (i == b && i == e && n > 0)
   1152             /* ignore empty match on latest position */
   1153             goto next;
   1154 
   1155         if (filter_is_callable) {
   1156             /* pass match object through filter */
   1157             match = pattern_new_match(self, &state, 1);
   1158             if (!match)
   1159                 goto error;
   1160             item = _PyObject_CallArg1(filter, match);
   1161             Py_DECREF(match);
   1162             if (!item)
   1163                 goto error;
   1164         } else {
   1165             /* filter is literal string */
   1166             item = filter;
   1167             Py_INCREF(item);
   1168         }
   1169 
   1170         /* add to list */
   1171         if (item != Py_None) {
   1172             status = PyList_Append(list, item);
   1173             Py_DECREF(item);
   1174             if (status < 0)
   1175                 goto error;
   1176         }
   1177 
   1178         i = e;
   1179         n = n + 1;
   1180 
   1181 next:
   1182         /* move on */
   1183         if (state.ptr == state.end)
   1184             break;
   1185         if (state.ptr == state.start)
   1186             state.start = (void*) ((char*) state.ptr + state.charsize);
   1187         else
   1188             state.start = state.ptr;
   1189 
   1190     }
   1191 
   1192     /* get segment following last match */
   1193     if (i < state.endpos) {
   1194         item = getslice(state.isbytes, state.beginning,
   1195                         string, i, state.endpos);
   1196         if (!item)
   1197             goto error;
   1198         status = PyList_Append(list, item);
   1199         Py_DECREF(item);
   1200         if (status < 0)
   1201             goto error;
   1202     }
   1203 
   1204     state_fini(&state);
   1205 
   1206     Py_DECREF(filter);
   1207 
   1208     /* convert list to single string (also removes list) */
   1209     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
   1210     if (!joiner) {
   1211         Py_DECREF(list);
   1212         return NULL;
   1213     }
   1214     if (PyList_GET_SIZE(list) == 0) {
   1215         Py_DECREF(list);
   1216         item = joiner;
   1217     }
   1218     else {
   1219         if (state.isbytes)
   1220             item = _PyBytes_Join(joiner, list);
   1221         else
   1222             item = PyUnicode_Join(joiner, list);
   1223         Py_DECREF(joiner);
   1224         Py_DECREF(list);
   1225         if (!item)
   1226             return NULL;
   1227     }
   1228 
   1229     if (subn)
   1230         return Py_BuildValue("Nn", item, n);
   1231 
   1232     return item;
   1233 
   1234 error:
   1235     Py_DECREF(list);
   1236     state_fini(&state);
   1237     Py_DECREF(filter);
   1238     return NULL;
   1239 
   1240 }
   1241 
   1242 /*[clinic input]
   1243 _sre.SRE_Pattern.sub
   1244 
   1245     repl: object
   1246     string: object
   1247     count: Py_ssize_t = 0
   1248 
   1249 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
   1250 [clinic start generated code]*/
   1251 
   1252 static PyObject *
   1253 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
   1254                           PyObject *string, Py_ssize_t count)
   1255 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
   1256 {
   1257     return pattern_subx(self, repl, string, count, 0);
   1258 }
   1259 
   1260 /*[clinic input]
   1261 _sre.SRE_Pattern.subn
   1262 
   1263     repl: object
   1264     string: object
   1265     count: Py_ssize_t = 0
   1266 
   1267 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
   1268 [clinic start generated code]*/
   1269 
   1270 static PyObject *
   1271 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
   1272                            PyObject *string, Py_ssize_t count)
   1273 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
   1274 {
   1275     return pattern_subx(self, repl, string, count, 1);
   1276 }
   1277 
   1278 /*[clinic input]
   1279 _sre.SRE_Pattern.__copy__
   1280 
   1281 [clinic start generated code]*/
   1282 
   1283 static PyObject *
   1284 _sre_SRE_Pattern___copy___impl(PatternObject *self)
   1285 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
   1286 {
   1287 #ifdef USE_BUILTIN_COPY
   1288     PatternObject* copy;
   1289     int offset;
   1290 
   1291     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
   1292     if (!copy)
   1293         return NULL;
   1294 
   1295     offset = offsetof(PatternObject, groups);
   1296 
   1297     Py_XINCREF(self->groupindex);
   1298     Py_XINCREF(self->indexgroup);
   1299     Py_XINCREF(self->pattern);
   1300 
   1301     memcpy((char*) copy + offset, (char*) self + offset,
   1302            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
   1303     copy->weakreflist = NULL;
   1304 
   1305     return (PyObject*) copy;
   1306 #else
   1307     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
   1308     return NULL;
   1309 #endif
   1310 }
   1311 
   1312 /*[clinic input]
   1313 _sre.SRE_Pattern.__deepcopy__
   1314 
   1315     memo: object
   1316 
   1317 [clinic start generated code]*/
   1318 
   1319 static PyObject *
   1320 _sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
   1321 /*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
   1322 {
   1323 #ifdef USE_BUILTIN_COPY
   1324     PatternObject* copy;
   1325 
   1326     copy = (PatternObject*) pattern_copy(self);
   1327     if (!copy)
   1328         return NULL;
   1329 
   1330     if (!deepcopy(&copy->groupindex, memo) ||
   1331         !deepcopy(&copy->indexgroup, memo) ||
   1332         !deepcopy(&copy->pattern, memo)) {
   1333         Py_DECREF(copy);
   1334         return NULL;
   1335     }
   1336 
   1337 #else
   1338     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
   1339     return NULL;
   1340 #endif
   1341 }
   1342 
   1343 static PyObject *
   1344 pattern_repr(PatternObject *obj)
   1345 {
   1346     static const struct {
   1347         const char *name;
   1348         int value;
   1349     } flag_names[] = {
   1350         {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
   1351         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
   1352         {"re.LOCALE", SRE_FLAG_LOCALE},
   1353         {"re.MULTILINE", SRE_FLAG_MULTILINE},
   1354         {"re.DOTALL", SRE_FLAG_DOTALL},
   1355         {"re.UNICODE", SRE_FLAG_UNICODE},
   1356         {"re.VERBOSE", SRE_FLAG_VERBOSE},
   1357         {"re.DEBUG", SRE_FLAG_DEBUG},
   1358         {"re.ASCII", SRE_FLAG_ASCII},
   1359     };
   1360     PyObject *result = NULL;
   1361     PyObject *flag_items;
   1362     size_t i;
   1363     int flags = obj->flags;
   1364 
   1365     /* Omit re.UNICODE for valid string patterns. */
   1366     if (obj->isbytes == 0 &&
   1367         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
   1368          SRE_FLAG_UNICODE)
   1369         flags &= ~SRE_FLAG_UNICODE;
   1370 
   1371     flag_items = PyList_New(0);
   1372     if (!flag_items)
   1373         return NULL;
   1374 
   1375     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
   1376         if (flags & flag_names[i].value) {
   1377             PyObject *item = PyUnicode_FromString(flag_names[i].name);
   1378             if (!item)
   1379                 goto done;
   1380 
   1381             if (PyList_Append(flag_items, item) < 0) {
   1382                 Py_DECREF(item);
   1383                 goto done;
   1384             }
   1385             Py_DECREF(item);
   1386             flags &= ~flag_names[i].value;
   1387         }
   1388     }
   1389     if (flags) {
   1390         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
   1391         if (!item)
   1392             goto done;
   1393 
   1394         if (PyList_Append(flag_items, item) < 0) {
   1395             Py_DECREF(item);
   1396             goto done;
   1397         }
   1398         Py_DECREF(item);
   1399     }
   1400 
   1401     if (PyList_Size(flag_items) > 0) {
   1402         PyObject *flags_result;
   1403         PyObject *sep = PyUnicode_FromString("|");
   1404         if (!sep)
   1405             goto done;
   1406         flags_result = PyUnicode_Join(sep, flag_items);
   1407         Py_DECREF(sep);
   1408         if (!flags_result)
   1409             goto done;
   1410         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
   1411                                       obj->pattern, flags_result);
   1412         Py_DECREF(flags_result);
   1413     }
   1414     else {
   1415         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
   1416     }
   1417 
   1418 done:
   1419     Py_DECREF(flag_items);
   1420     return result;
   1421 }
   1422 
   1423 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
   1424 
   1425 /* PatternObject's 'groupindex' method. */
   1426 static PyObject *
   1427 pattern_groupindex(PatternObject *self)
   1428 {
   1429     return PyDictProxy_New(self->groupindex);
   1430 }
   1431 
   1432 static int _validate(PatternObject *self); /* Forward */
   1433 
   1434 /*[clinic input]
   1435 _sre.compile
   1436 
   1437     pattern: object
   1438     flags: int
   1439     code: object(subclass_of='&PyList_Type')
   1440     groups: Py_ssize_t
   1441     groupindex: object
   1442     indexgroup: object
   1443 
   1444 [clinic start generated code]*/
   1445 
   1446 static PyObject *
   1447 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
   1448                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
   1449                   PyObject *indexgroup)
   1450 /*[clinic end generated code: output=ef9c2b3693776404 input=7d059ec8ae1edb85]*/
   1451 {
   1452     /* "compile" pattern descriptor to pattern object */
   1453 
   1454     PatternObject* self;
   1455     Py_ssize_t i, n;
   1456 
   1457     n = PyList_GET_SIZE(code);
   1458     /* coverity[ampersand_in_size] */
   1459     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
   1460     if (!self)
   1461         return NULL;
   1462     self->weakreflist = NULL;
   1463     self->pattern = NULL;
   1464     self->groupindex = NULL;
   1465     self->indexgroup = NULL;
   1466 
   1467     self->codesize = n;
   1468 
   1469     for (i = 0; i < n; i++) {
   1470         PyObject *o = PyList_GET_ITEM(code, i);
   1471         unsigned long value = PyLong_AsUnsignedLong(o);
   1472         self->code[i] = (SRE_CODE) value;
   1473         if ((unsigned long) self->code[i] != value) {
   1474             PyErr_SetString(PyExc_OverflowError,
   1475                             "regular expression code size limit exceeded");
   1476             break;
   1477         }
   1478     }
   1479 
   1480     if (PyErr_Occurred()) {
   1481         Py_DECREF(self);
   1482         return NULL;
   1483     }
   1484 
   1485     if (pattern == Py_None) {
   1486         self->isbytes = -1;
   1487     }
   1488     else {
   1489         Py_ssize_t p_length;
   1490         int charsize;
   1491         Py_buffer view;
   1492         view.buf = NULL;
   1493         if (!getstring(pattern, &p_length, &self->isbytes,
   1494                        &charsize, &view)) {
   1495             Py_DECREF(self);
   1496             return NULL;
   1497         }
   1498         if (view.buf)
   1499             PyBuffer_Release(&view);
   1500     }
   1501 
   1502     Py_INCREF(pattern);
   1503     self->pattern = pattern;
   1504 
   1505     self->flags = flags;
   1506 
   1507     self->groups = groups;
   1508 
   1509     Py_INCREF(groupindex);
   1510     self->groupindex = groupindex;
   1511 
   1512     Py_INCREF(indexgroup);
   1513     self->indexgroup = indexgroup;
   1514 
   1515     if (!_validate(self)) {
   1516         Py_DECREF(self);
   1517         return NULL;
   1518     }
   1519 
   1520     return (PyObject*) self;
   1521 }
   1522 
   1523 /* -------------------------------------------------------------------- */
   1524 /* Code validation */
   1525 
   1526 /* To learn more about this code, have a look at the _compile() function in
   1527    Lib/sre_compile.py.  The validation functions below checks the code array
   1528    for conformance with the code patterns generated there.
   1529 
   1530    The nice thing about the generated code is that it is position-independent:
   1531    all jumps are relative jumps forward.  Also, jumps don't cross each other:
   1532    the target of a later jump is always earlier than the target of an earlier
   1533    jump.  IOW, this is okay:
   1534 
   1535    J---------J-------T--------T
   1536     \         \_____/        /
   1537      \______________________/
   1538 
   1539    but this is not:
   1540 
   1541    J---------J-------T--------T
   1542     \_________\_____/        /
   1543                \____________/
   1544 
   1545    It also helps that SRE_CODE is always an unsigned type.
   1546 */
   1547 
   1548 /* Defining this one enables tracing of the validator */
   1549 #undef VVERBOSE
   1550 
   1551 /* Trace macro for the validator */
   1552 #if defined(VVERBOSE)
   1553 #define VTRACE(v) printf v
   1554 #else
   1555 #define VTRACE(v) do {} while(0)  /* do nothing */
   1556 #endif
   1557 
   1558 /* Report failure */
   1559 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
   1560 
   1561 /* Extract opcode, argument, or skip count from code array */
   1562 #define GET_OP                                          \
   1563     do {                                                \
   1564         VTRACE(("%p: ", code));                         \
   1565         if (code >= end) FAIL;                          \
   1566         op = *code++;                                   \
   1567         VTRACE(("%lu (op)\n", (unsigned long)op));      \
   1568     } while (0)
   1569 #define GET_ARG                                         \
   1570     do {                                                \
   1571         VTRACE(("%p= ", code));                         \
   1572         if (code >= end) FAIL;                          \
   1573         arg = *code++;                                  \
   1574         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
   1575     } while (0)
   1576 #define GET_SKIP_ADJ(adj)                               \
   1577     do {                                                \
   1578         VTRACE(("%p= ", code));                         \
   1579         if (code >= end) FAIL;                          \
   1580         skip = *code;                                   \
   1581         VTRACE(("%lu (skip to %p)\n",                   \
   1582                (unsigned long)skip, code+skip));        \
   1583         if (skip-adj > (uintptr_t)(end - code))      \
   1584             FAIL;                                       \
   1585         code++;                                         \
   1586     } while (0)
   1587 #define GET_SKIP GET_SKIP_ADJ(0)
   1588 
   1589 static int
   1590 _validate_charset(SRE_CODE *code, SRE_CODE *end)
   1591 {
   1592     /* Some variables are manipulated by the macros above */
   1593     SRE_CODE op;
   1594     SRE_CODE arg;
   1595     SRE_CODE offset;
   1596     int i;
   1597 
   1598     while (code < end) {
   1599         GET_OP;
   1600         switch (op) {
   1601 
   1602         case SRE_OP_NEGATE:
   1603             break;
   1604 
   1605         case SRE_OP_LITERAL:
   1606             GET_ARG;
   1607             break;
   1608 
   1609         case SRE_OP_RANGE:
   1610         case SRE_OP_RANGE_IGNORE:
   1611             GET_ARG;
   1612             GET_ARG;
   1613             break;
   1614 
   1615         case SRE_OP_CHARSET:
   1616             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
   1617             if (offset > (uintptr_t)(end - code))
   1618                 FAIL;
   1619             code += offset;
   1620             break;
   1621 
   1622         case SRE_OP_BIGCHARSET:
   1623             GET_ARG; /* Number of blocks */
   1624             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
   1625             if (offset > (uintptr_t)(end - code))
   1626                 FAIL;
   1627             /* Make sure that each byte points to a valid block */
   1628             for (i = 0; i < 256; i++) {
   1629                 if (((unsigned char *)code)[i] >= arg)
   1630                     FAIL;
   1631             }
   1632             code += offset;
   1633             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
   1634             if (offset > (uintptr_t)(end - code))
   1635                 FAIL;
   1636             code += offset;
   1637             break;
   1638 
   1639         case SRE_OP_CATEGORY:
   1640             GET_ARG;
   1641             switch (arg) {
   1642             case SRE_CATEGORY_DIGIT:
   1643             case SRE_CATEGORY_NOT_DIGIT:
   1644             case SRE_CATEGORY_SPACE:
   1645             case SRE_CATEGORY_NOT_SPACE:
   1646             case SRE_CATEGORY_WORD:
   1647             case SRE_CATEGORY_NOT_WORD:
   1648             case SRE_CATEGORY_LINEBREAK:
   1649             case SRE_CATEGORY_NOT_LINEBREAK:
   1650             case SRE_CATEGORY_LOC_WORD:
   1651             case SRE_CATEGORY_LOC_NOT_WORD:
   1652             case SRE_CATEGORY_UNI_DIGIT:
   1653             case SRE_CATEGORY_UNI_NOT_DIGIT:
   1654             case SRE_CATEGORY_UNI_SPACE:
   1655             case SRE_CATEGORY_UNI_NOT_SPACE:
   1656             case SRE_CATEGORY_UNI_WORD:
   1657             case SRE_CATEGORY_UNI_NOT_WORD:
   1658             case SRE_CATEGORY_UNI_LINEBREAK:
   1659             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
   1660                 break;
   1661             default:
   1662                 FAIL;
   1663             }
   1664             break;
   1665 
   1666         default:
   1667             FAIL;
   1668 
   1669         }
   1670     }
   1671 
   1672     return 1;
   1673 }
   1674 
   1675 static int
   1676 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   1677 {
   1678     /* Some variables are manipulated by the macros above */
   1679     SRE_CODE op;
   1680     SRE_CODE arg;
   1681     SRE_CODE skip;
   1682 
   1683     VTRACE(("code=%p, end=%p\n", code, end));
   1684 
   1685     if (code > end)
   1686         FAIL;
   1687 
   1688     while (code < end) {
   1689         GET_OP;
   1690         switch (op) {
   1691 
   1692         case SRE_OP_MARK:
   1693             /* We don't check whether marks are properly nested; the
   1694                sre_match() code is robust even if they don't, and the worst
   1695                you can get is nonsensical match results. */
   1696             GET_ARG;
   1697             if (arg > 2 * (size_t)groups + 1) {
   1698                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
   1699                 FAIL;
   1700             }
   1701             break;
   1702 
   1703         case SRE_OP_LITERAL:
   1704         case SRE_OP_NOT_LITERAL:
   1705         case SRE_OP_LITERAL_IGNORE:
   1706         case SRE_OP_NOT_LITERAL_IGNORE:
   1707             GET_ARG;
   1708             /* The arg is just a character, nothing to check */
   1709             break;
   1710 
   1711         case SRE_OP_SUCCESS:
   1712         case SRE_OP_FAILURE:
   1713             /* Nothing to check; these normally end the matching process */
   1714             break;
   1715 
   1716         case SRE_OP_AT:
   1717             GET_ARG;
   1718             switch (arg) {
   1719             case SRE_AT_BEGINNING:
   1720             case SRE_AT_BEGINNING_STRING:
   1721             case SRE_AT_BEGINNING_LINE:
   1722             case SRE_AT_END:
   1723             case SRE_AT_END_LINE:
   1724             case SRE_AT_END_STRING:
   1725             case SRE_AT_BOUNDARY:
   1726             case SRE_AT_NON_BOUNDARY:
   1727             case SRE_AT_LOC_BOUNDARY:
   1728             case SRE_AT_LOC_NON_BOUNDARY:
   1729             case SRE_AT_UNI_BOUNDARY:
   1730             case SRE_AT_UNI_NON_BOUNDARY:
   1731                 break;
   1732             default:
   1733                 FAIL;
   1734             }
   1735             break;
   1736 
   1737         case SRE_OP_ANY:
   1738         case SRE_OP_ANY_ALL:
   1739             /* These have no operands */
   1740             break;
   1741 
   1742         case SRE_OP_IN:
   1743         case SRE_OP_IN_IGNORE:
   1744             GET_SKIP;
   1745             /* Stop 1 before the end; we check the FAILURE below */
   1746             if (!_validate_charset(code, code+skip-2))
   1747                 FAIL;
   1748             if (code[skip-2] != SRE_OP_FAILURE)
   1749                 FAIL;
   1750             code += skip-1;
   1751             break;
   1752 
   1753         case SRE_OP_INFO:
   1754             {
   1755                 /* A minimal info field is
   1756                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
   1757                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
   1758                    more follows. */
   1759                 SRE_CODE flags, i;
   1760                 SRE_CODE *newcode;
   1761                 GET_SKIP;
   1762                 newcode = code+skip-1;
   1763                 GET_ARG; flags = arg;
   1764                 GET_ARG;
   1765                 GET_ARG;
   1766                 /* Check that only valid flags are present */
   1767                 if ((flags & ~(SRE_INFO_PREFIX |
   1768                                SRE_INFO_LITERAL |
   1769                                SRE_INFO_CHARSET)) != 0)
   1770                     FAIL;
   1771                 /* PREFIX and CHARSET are mutually exclusive */
   1772                 if ((flags & SRE_INFO_PREFIX) &&
   1773                     (flags & SRE_INFO_CHARSET))
   1774                     FAIL;
   1775                 /* LITERAL implies PREFIX */
   1776                 if ((flags & SRE_INFO_LITERAL) &&
   1777                     !(flags & SRE_INFO_PREFIX))
   1778                     FAIL;
   1779                 /* Validate the prefix */
   1780                 if (flags & SRE_INFO_PREFIX) {
   1781                     SRE_CODE prefix_len;
   1782                     GET_ARG; prefix_len = arg;
   1783                     GET_ARG;
   1784                     /* Here comes the prefix string */
   1785                     if (prefix_len > (uintptr_t)(newcode - code))
   1786                         FAIL;
   1787                     code += prefix_len;
   1788                     /* And here comes the overlap table */
   1789                     if (prefix_len > (uintptr_t)(newcode - code))
   1790                         FAIL;
   1791                     /* Each overlap value should be < prefix_len */
   1792                     for (i = 0; i < prefix_len; i++) {
   1793                         if (code[i] >= prefix_len)
   1794                             FAIL;
   1795                     }
   1796                     code += prefix_len;
   1797                 }
   1798                 /* Validate the charset */
   1799                 if (flags & SRE_INFO_CHARSET) {
   1800                     if (!_validate_charset(code, newcode-1))
   1801                         FAIL;
   1802                     if (newcode[-1] != SRE_OP_FAILURE)
   1803                         FAIL;
   1804                     code = newcode;
   1805                 }
   1806                 else if (code != newcode) {
   1807                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
   1808                     FAIL;
   1809                 }
   1810             }
   1811             break;
   1812 
   1813         case SRE_OP_BRANCH:
   1814             {
   1815                 SRE_CODE *target = NULL;
   1816                 for (;;) {
   1817                     GET_SKIP;
   1818                     if (skip == 0)
   1819                         break;
   1820                     /* Stop 2 before the end; we check the JUMP below */
   1821                     if (!_validate_inner(code, code+skip-3, groups))
   1822                         FAIL;
   1823                     code += skip-3;
   1824                     /* Check that it ends with a JUMP, and that each JUMP
   1825                        has the same target */
   1826                     GET_OP;
   1827                     if (op != SRE_OP_JUMP)
   1828                         FAIL;
   1829                     GET_SKIP;
   1830                     if (target == NULL)
   1831                         target = code+skip-1;
   1832                     else if (code+skip-1 != target)
   1833                         FAIL;
   1834                 }
   1835             }
   1836             break;
   1837 
   1838         case SRE_OP_REPEAT_ONE:
   1839         case SRE_OP_MIN_REPEAT_ONE:
   1840             {
   1841                 SRE_CODE min, max;
   1842                 GET_SKIP;
   1843                 GET_ARG; min = arg;
   1844                 GET_ARG; max = arg;
   1845                 if (min > max)
   1846                     FAIL;
   1847                 if (max > SRE_MAXREPEAT)
   1848                     FAIL;
   1849                 if (!_validate_inner(code, code+skip-4, groups))
   1850                     FAIL;
   1851                 code += skip-4;
   1852                 GET_OP;
   1853                 if (op != SRE_OP_SUCCESS)
   1854                     FAIL;
   1855             }
   1856             break;
   1857 
   1858         case SRE_OP_REPEAT:
   1859             {
   1860                 SRE_CODE min, max;
   1861                 GET_SKIP;
   1862                 GET_ARG; min = arg;
   1863                 GET_ARG; max = arg;
   1864                 if (min > max)
   1865                     FAIL;
   1866                 if (max > SRE_MAXREPEAT)
   1867                     FAIL;
   1868                 if (!_validate_inner(code, code+skip-3, groups))
   1869                     FAIL;
   1870                 code += skip-3;
   1871                 GET_OP;
   1872                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
   1873                     FAIL;
   1874             }
   1875             break;
   1876 
   1877         case SRE_OP_GROUPREF:
   1878         case SRE_OP_GROUPREF_IGNORE:
   1879             GET_ARG;
   1880             if (arg >= (size_t)groups)
   1881                 FAIL;
   1882             break;
   1883 
   1884         case SRE_OP_GROUPREF_EXISTS:
   1885             /* The regex syntax for this is: '(?(group)then|else)', where
   1886                'group' is either an integer group number or a group name,
   1887                'then' and 'else' are sub-regexes, and 'else' is optional. */
   1888             GET_ARG;
   1889             if (arg >= (size_t)groups)
   1890                 FAIL;
   1891             GET_SKIP_ADJ(1);
   1892             code--; /* The skip is relative to the first arg! */
   1893             /* There are two possibilities here: if there is both a 'then'
   1894                part and an 'else' part, the generated code looks like:
   1895 
   1896                GROUPREF_EXISTS
   1897                <group>
   1898                <skipyes>
   1899                ...then part...
   1900                JUMP
   1901                <skipno>
   1902                (<skipyes> jumps here)
   1903                ...else part...
   1904                (<skipno> jumps here)
   1905 
   1906                If there is only a 'then' part, it looks like:
   1907 
   1908                GROUPREF_EXISTS
   1909                <group>
   1910                <skip>
   1911                ...then part...
   1912                (<skip> jumps here)
   1913 
   1914                There is no direct way to decide which it is, and we don't want
   1915                to allow arbitrary jumps anywhere in the code; so we just look
   1916                for a JUMP opcode preceding our skip target.
   1917             */
   1918             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
   1919                 code[skip-3] == SRE_OP_JUMP)
   1920             {
   1921                 VTRACE(("both then and else parts present\n"));
   1922                 if (!_validate_inner(code+1, code+skip-3, groups))
   1923                     FAIL;
   1924                 code += skip-2; /* Position after JUMP, at <skipno> */
   1925                 GET_SKIP;
   1926                 if (!_validate_inner(code, code+skip-1, groups))
   1927                     FAIL;
   1928                 code += skip-1;
   1929             }
   1930             else {
   1931                 VTRACE(("only a then part present\n"));
   1932                 if (!_validate_inner(code+1, code+skip-1, groups))
   1933                     FAIL;
   1934                 code += skip-1;
   1935             }
   1936             break;
   1937 
   1938         case SRE_OP_ASSERT:
   1939         case SRE_OP_ASSERT_NOT:
   1940             GET_SKIP;
   1941             GET_ARG; /* 0 for lookahead, width for lookbehind */
   1942             code--; /* Back up over arg to simplify math below */
   1943             if (arg & 0x80000000)
   1944                 FAIL; /* Width too large */
   1945             /* Stop 1 before the end; we check the SUCCESS below */
   1946             if (!_validate_inner(code+1, code+skip-2, groups))
   1947                 FAIL;
   1948             code += skip-2;
   1949             GET_OP;
   1950             if (op != SRE_OP_SUCCESS)
   1951                 FAIL;
   1952             break;
   1953 
   1954         default:
   1955             FAIL;
   1956 
   1957         }
   1958     }
   1959 
   1960     VTRACE(("okay\n"));
   1961     return 1;
   1962 }
   1963 
   1964 static int
   1965 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
   1966 {
   1967     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
   1968         code >= end || end[-1] != SRE_OP_SUCCESS)
   1969         FAIL;
   1970     return _validate_inner(code, end-1, groups);
   1971 }
   1972 
   1973 static int
   1974 _validate(PatternObject *self)
   1975 {
   1976     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
   1977     {
   1978         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
   1979         return 0;
   1980     }
   1981     else
   1982         VTRACE(("Success!\n"));
   1983     return 1;
   1984 }
   1985 
   1986 /* -------------------------------------------------------------------- */
   1987 /* match methods */
   1988 
   1989 static void
   1990 match_dealloc(MatchObject* self)
   1991 {
   1992     Py_XDECREF(self->regs);
   1993     Py_XDECREF(self->string);
   1994     Py_DECREF(self->pattern);
   1995     PyObject_DEL(self);
   1996 }
   1997 
   1998 static PyObject*
   1999 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
   2000 {
   2001     Py_ssize_t length;
   2002     int isbytes, charsize;
   2003     Py_buffer view;
   2004     PyObject *result;
   2005     void* ptr;
   2006     Py_ssize_t i, j;
   2007 
   2008     if (index < 0 || index >= self->groups) {
   2009         /* raise IndexError if we were given a bad group number */
   2010         PyErr_SetString(
   2011             PyExc_IndexError,
   2012             "no such group"
   2013             );
   2014         return NULL;
   2015     }
   2016 
   2017     index *= 2;
   2018 
   2019     if (self->string == Py_None || self->mark[index] < 0) {
   2020         /* return default value if the string or group is undefined */
   2021         Py_INCREF(def);
   2022         return def;
   2023     }
   2024 
   2025     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
   2026     if (ptr == NULL)
   2027         return NULL;
   2028 
   2029     i = self->mark[index];
   2030     j = self->mark[index+1];
   2031     i = Py_MIN(i, length);
   2032     j = Py_MIN(j, length);
   2033     result = getslice(isbytes, ptr, self->string, i, j);
   2034     if (isbytes && view.buf != NULL)
   2035         PyBuffer_Release(&view);
   2036     return result;
   2037 }
   2038 
   2039 static Py_ssize_t
   2040 match_getindex(MatchObject* self, PyObject* index)
   2041 {
   2042     Py_ssize_t i;
   2043 
   2044     if (index == NULL)
   2045         /* Default value */
   2046         return 0;
   2047 
   2048     if (PyIndex_Check(index)) {
   2049         return PyNumber_AsSsize_t(index, NULL);
   2050     }
   2051 
   2052     i = -1;
   2053 
   2054     if (self->pattern->groupindex) {
   2055         index = PyObject_GetItem(self->pattern->groupindex, index);
   2056         if (index) {
   2057             if (PyLong_Check(index))
   2058                 i = PyLong_AsSsize_t(index);
   2059             Py_DECREF(index);
   2060         } else
   2061             PyErr_Clear();
   2062     }
   2063 
   2064     return i;
   2065 }
   2066 
   2067 static PyObject*
   2068 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
   2069 {
   2070     return match_getslice_by_index(self, match_getindex(self, index), def);
   2071 }
   2072 
   2073 /*[clinic input]
   2074 _sre.SRE_Match.expand
   2075 
   2076     template: object
   2077 
   2078 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
   2079 [clinic start generated code]*/
   2080 
   2081 static PyObject *
   2082 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
   2083 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
   2084 {
   2085     /* delegate to Python code */
   2086     return call(
   2087         SRE_PY_MODULE, "_expand",
   2088         PyTuple_Pack(3, self->pattern, self, template)
   2089         );
   2090 }
   2091 
   2092 static PyObject*
   2093 match_group(MatchObject* self, PyObject* args)
   2094 {
   2095     PyObject* result;
   2096     Py_ssize_t i, size;
   2097 
   2098     size = PyTuple_GET_SIZE(args);
   2099 
   2100     switch (size) {
   2101     case 0:
   2102         result = match_getslice(self, Py_False, Py_None);
   2103         break;
   2104     case 1:
   2105         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
   2106         break;
   2107     default:
   2108         /* fetch multiple items */
   2109         result = PyTuple_New(size);
   2110         if (!result)
   2111             return NULL;
   2112         for (i = 0; i < size; i++) {
   2113             PyObject* item = match_getslice(
   2114                 self, PyTuple_GET_ITEM(args, i), Py_None
   2115                 );
   2116             if (!item) {
   2117                 Py_DECREF(result);
   2118                 return NULL;
   2119             }
   2120             PyTuple_SET_ITEM(result, i, item);
   2121         }
   2122         break;
   2123     }
   2124     return result;
   2125 }
   2126 
   2127 static PyObject*
   2128 match_getitem(MatchObject* self, PyObject* name)
   2129 {
   2130     return match_getslice(self, name, Py_None);
   2131 }
   2132 
   2133 /*[clinic input]
   2134 _sre.SRE_Match.groups
   2135 
   2136     default: object = None
   2137         Is used for groups that did not participate in the match.
   2138 
   2139 Return a tuple containing all the subgroups of the match, from 1.
   2140 [clinic start generated code]*/
   2141 
   2142 static PyObject *
   2143 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
   2144 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
   2145 {
   2146     PyObject* result;
   2147     Py_ssize_t index;
   2148 
   2149     result = PyTuple_New(self->groups-1);
   2150     if (!result)
   2151         return NULL;
   2152 
   2153     for (index = 1; index < self->groups; index++) {
   2154         PyObject* item;
   2155         item = match_getslice_by_index(self, index, default_value);
   2156         if (!item) {
   2157             Py_DECREF(result);
   2158             return NULL;
   2159         }
   2160         PyTuple_SET_ITEM(result, index-1, item);
   2161     }
   2162 
   2163     return result;
   2164 }
   2165 
   2166 /*[clinic input]
   2167 _sre.SRE_Match.groupdict
   2168 
   2169     default: object = None
   2170         Is used for groups that did not participate in the match.
   2171 
   2172 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
   2173 [clinic start generated code]*/
   2174 
   2175 static PyObject *
   2176 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
   2177 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
   2178 {
   2179     PyObject* result;
   2180     PyObject* keys;
   2181     Py_ssize_t index;
   2182 
   2183     result = PyDict_New();
   2184     if (!result || !self->pattern->groupindex)
   2185         return result;
   2186 
   2187     keys = PyMapping_Keys(self->pattern->groupindex);
   2188     if (!keys)
   2189         goto failed;
   2190 
   2191     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
   2192         int status;
   2193         PyObject* key;
   2194         PyObject* value;
   2195         key = PyList_GET_ITEM(keys, index);
   2196         if (!key)
   2197             goto failed;
   2198         value = match_getslice(self, key, default_value);
   2199         if (!value)
   2200             goto failed;
   2201         status = PyDict_SetItem(result, key, value);
   2202         Py_DECREF(value);
   2203         if (status < 0)
   2204             goto failed;
   2205     }
   2206 
   2207     Py_DECREF(keys);
   2208 
   2209     return result;
   2210 
   2211 failed:
   2212     Py_XDECREF(keys);
   2213     Py_DECREF(result);
   2214     return NULL;
   2215 }
   2216 
   2217 /*[clinic input]
   2218 _sre.SRE_Match.start -> Py_ssize_t
   2219 
   2220     group: object(c_default="NULL") = 0
   2221     /
   2222 
   2223 Return index of the start of the substring matched by group.
   2224 [clinic start generated code]*/
   2225 
   2226 static Py_ssize_t
   2227 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
   2228 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
   2229 {
   2230     Py_ssize_t index = match_getindex(self, group);
   2231 
   2232     if (index < 0 || index >= self->groups) {
   2233         PyErr_SetString(
   2234             PyExc_IndexError,
   2235             "no such group"
   2236             );
   2237         return -1;
   2238     }
   2239 
   2240     /* mark is -1 if group is undefined */
   2241     return self->mark[index*2];
   2242 }
   2243 
   2244 /*[clinic input]
   2245 _sre.SRE_Match.end -> Py_ssize_t
   2246 
   2247     group: object(c_default="NULL") = 0
   2248     /
   2249 
   2250 Return index of the end of the substring matched by group.
   2251 [clinic start generated code]*/
   2252 
   2253 static Py_ssize_t
   2254 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
   2255 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
   2256 {
   2257     Py_ssize_t index = match_getindex(self, group);
   2258 
   2259     if (index < 0 || index >= self->groups) {
   2260         PyErr_SetString(
   2261             PyExc_IndexError,
   2262             "no such group"
   2263             );
   2264         return -1;
   2265     }
   2266 
   2267     /* mark is -1 if group is undefined */
   2268     return self->mark[index*2+1];
   2269 }
   2270 
   2271 LOCAL(PyObject*)
   2272 _pair(Py_ssize_t i1, Py_ssize_t i2)
   2273 {
   2274     PyObject* pair;
   2275     PyObject* item;
   2276 
   2277     pair = PyTuple_New(2);
   2278     if (!pair)
   2279         return NULL;
   2280 
   2281     item = PyLong_FromSsize_t(i1);
   2282     if (!item)
   2283         goto error;
   2284     PyTuple_SET_ITEM(pair, 0, item);
   2285 
   2286     item = PyLong_FromSsize_t(i2);
   2287     if (!item)
   2288         goto error;
   2289     PyTuple_SET_ITEM(pair, 1, item);
   2290 
   2291     return pair;
   2292 
   2293   error:
   2294     Py_DECREF(pair);
   2295     return NULL;
   2296 }
   2297 
   2298 /*[clinic input]
   2299 _sre.SRE_Match.span
   2300 
   2301     group: object(c_default="NULL") = 0
   2302     /
   2303 
   2304 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
   2305 [clinic start generated code]*/
   2306 
   2307 static PyObject *
   2308 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
   2309 /*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
   2310 {
   2311     Py_ssize_t index = match_getindex(self, group);
   2312 
   2313     if (index < 0 || index >= self->groups) {
   2314         PyErr_SetString(
   2315             PyExc_IndexError,
   2316             "no such group"
   2317             );
   2318         return NULL;
   2319     }
   2320 
   2321     /* marks are -1 if group is undefined */
   2322     return _pair(self->mark[index*2], self->mark[index*2+1]);
   2323 }
   2324 
   2325 static PyObject*
   2326 match_regs(MatchObject* self)
   2327 {
   2328     PyObject* regs;
   2329     PyObject* item;
   2330     Py_ssize_t index;
   2331 
   2332     regs = PyTuple_New(self->groups);
   2333     if (!regs)
   2334         return NULL;
   2335 
   2336     for (index = 0; index < self->groups; index++) {
   2337         item = _pair(self->mark[index*2], self->mark[index*2+1]);
   2338         if (!item) {
   2339             Py_DECREF(regs);
   2340             return NULL;
   2341         }
   2342         PyTuple_SET_ITEM(regs, index, item);
   2343     }
   2344 
   2345     Py_INCREF(regs);
   2346     self->regs = regs;
   2347 
   2348     return regs;
   2349 }
   2350 
   2351 /*[clinic input]
   2352 _sre.SRE_Match.__copy__
   2353 
   2354 [clinic start generated code]*/
   2355 
   2356 static PyObject *
   2357 _sre_SRE_Match___copy___impl(MatchObject *self)
   2358 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
   2359 {
   2360 #ifdef USE_BUILTIN_COPY
   2361     MatchObject* copy;
   2362     Py_ssize_t slots, offset;
   2363 
   2364     slots = 2 * (self->pattern->groups+1);
   2365 
   2366     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
   2367     if (!copy)
   2368         return NULL;
   2369 
   2370     /* this value a constant, but any compiler should be able to
   2371        figure that out all by itself */
   2372     offset = offsetof(MatchObject, string);
   2373 
   2374     Py_XINCREF(self->pattern);
   2375     Py_XINCREF(self->string);
   2376     Py_XINCREF(self->regs);
   2377 
   2378     memcpy((char*) copy + offset, (char*) self + offset,
   2379            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
   2380 
   2381     return (PyObject*) copy;
   2382 #else
   2383     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
   2384     return NULL;
   2385 #endif
   2386 }
   2387 
   2388 /*[clinic input]
   2389 _sre.SRE_Match.__deepcopy__
   2390 
   2391     memo: object
   2392 
   2393 [clinic start generated code]*/
   2394 
   2395 static PyObject *
   2396 _sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
   2397 /*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
   2398 {
   2399 #ifdef USE_BUILTIN_COPY
   2400     MatchObject* copy;
   2401 
   2402     copy = (MatchObject*) match_copy(self);
   2403     if (!copy)
   2404         return NULL;
   2405 
   2406     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
   2407         !deepcopy(&copy->string, memo) ||
   2408         !deepcopy(&copy->regs, memo)) {
   2409         Py_DECREF(copy);
   2410         return NULL;
   2411     }
   2412 
   2413 #else
   2414     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
   2415     return NULL;
   2416 #endif
   2417 }
   2418 
   2419 PyDoc_STRVAR(match_doc,
   2420 "The result of re.match() and re.search().\n\
   2421 Match objects always have a boolean value of True.");
   2422 
   2423 PyDoc_STRVAR(match_group_doc,
   2424 "group([group1, ...]) -> str or tuple.\n\
   2425     Return subgroup(s) of the match by indices or names.\n\
   2426     For 0 returns the entire match.");
   2427 
   2428 static PyObject *
   2429 match_lastindex_get(MatchObject *self)
   2430 {
   2431     if (self->lastindex >= 0)
   2432         return PyLong_FromSsize_t(self->lastindex);
   2433     Py_INCREF(Py_None);
   2434     return Py_None;
   2435 }
   2436 
   2437 static PyObject *
   2438 match_lastgroup_get(MatchObject *self)
   2439 {
   2440     if (self->pattern->indexgroup && self->lastindex >= 0) {
   2441         PyObject* result = PySequence_GetItem(
   2442             self->pattern->indexgroup, self->lastindex
   2443             );
   2444         if (result)
   2445             return result;
   2446         PyErr_Clear();
   2447     }
   2448     Py_INCREF(Py_None);
   2449     return Py_None;
   2450 }
   2451 
   2452 static PyObject *
   2453 match_regs_get(MatchObject *self)
   2454 {
   2455     if (self->regs) {
   2456         Py_INCREF(self->regs);
   2457         return self->regs;
   2458     } else
   2459         return match_regs(self);
   2460 }
   2461 
   2462 static PyObject *
   2463 match_repr(MatchObject *self)
   2464 {
   2465     PyObject *result;
   2466     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
   2467     if (group0 == NULL)
   2468         return NULL;
   2469     result = PyUnicode_FromFormat(
   2470             "<%s object; span=(%d, %d), match=%.50R>",
   2471             Py_TYPE(self)->tp_name,
   2472             self->mark[0], self->mark[1], group0);
   2473     Py_DECREF(group0);
   2474     return result;
   2475 }
   2476 
   2477 
   2478 static PyObject*
   2479 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
   2480 {
   2481     /* create match object (from state object) */
   2482 
   2483     MatchObject* match;
   2484     Py_ssize_t i, j;
   2485     char* base;
   2486     int n;
   2487 
   2488     if (status > 0) {
   2489 
   2490         /* create match object (with room for extra group marks) */
   2491         /* coverity[ampersand_in_size] */
   2492         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
   2493                                  2*(pattern->groups+1));
   2494         if (!match)
   2495             return NULL;
   2496 
   2497         Py_INCREF(pattern);
   2498         match->pattern = pattern;
   2499 
   2500         Py_INCREF(state->string);
   2501         match->string = state->string;
   2502 
   2503         match->regs = NULL;
   2504         match->groups = pattern->groups+1;
   2505 
   2506         /* fill in group slices */
   2507 
   2508         base = (char*) state->beginning;
   2509         n = state->charsize;
   2510 
   2511         match->mark[0] = ((char*) state->start - base) / n;
   2512         match->mark[1] = ((char*) state->ptr - base) / n;
   2513 
   2514         for (i = j = 0; i < pattern->groups; i++, j+=2)
   2515             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
   2516                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
   2517                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
   2518             } else
   2519                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
   2520 
   2521         match->pos = state->pos;
   2522         match->endpos = state->endpos;
   2523 
   2524         match->lastindex = state->lastindex;
   2525 
   2526         return (PyObject*) match;
   2527 
   2528     } else if (status == 0) {
   2529 
   2530         /* no match */
   2531         Py_INCREF(Py_None);
   2532         return Py_None;
   2533 
   2534     }
   2535 
   2536     /* internal error */
   2537     pattern_error(status);
   2538     return NULL;
   2539 }
   2540 
   2541 
   2542 /* -------------------------------------------------------------------- */
   2543 /* scanner methods (experimental) */
   2544 
   2545 static void
   2546 scanner_dealloc(ScannerObject* self)
   2547 {
   2548     state_fini(&self->state);
   2549     Py_XDECREF(self->pattern);
   2550     PyObject_DEL(self);
   2551 }
   2552 
   2553 /*[clinic input]
   2554 _sre.SRE_Scanner.match
   2555 
   2556 [clinic start generated code]*/
   2557 
   2558 static PyObject *
   2559 _sre_SRE_Scanner_match_impl(ScannerObject *self)
   2560 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
   2561 {
   2562     SRE_STATE* state = &self->state;
   2563     PyObject* match;
   2564     Py_ssize_t status;
   2565 
   2566     if (state->start == NULL)
   2567         Py_RETURN_NONE;
   2568 
   2569     state_reset(state);
   2570 
   2571     state->ptr = state->start;
   2572 
   2573     status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
   2574     if (PyErr_Occurred())
   2575         return NULL;
   2576 
   2577     match = pattern_new_match((PatternObject*) self->pattern,
   2578                                state, status);
   2579 
   2580     if (status == 0)
   2581         state->start = NULL;
   2582     else if (state->ptr != state->start)
   2583         state->start = state->ptr;
   2584     else if (state->ptr != state->end)
   2585         state->start = (void*) ((char*) state->ptr + state->charsize);
   2586     else
   2587         state->start = NULL;
   2588 
   2589     return match;
   2590 }
   2591 
   2592 
   2593 /*[clinic input]
   2594 _sre.SRE_Scanner.search
   2595 
   2596 [clinic start generated code]*/
   2597 
   2598 static PyObject *
   2599 _sre_SRE_Scanner_search_impl(ScannerObject *self)
   2600 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
   2601 {
   2602     SRE_STATE* state = &self->state;
   2603     PyObject* match;
   2604     Py_ssize_t status;
   2605 
   2606     if (state->start == NULL)
   2607         Py_RETURN_NONE;
   2608 
   2609     state_reset(state);
   2610 
   2611     state->ptr = state->start;
   2612 
   2613     status = sre_search(state, PatternObject_GetCode(self->pattern));
   2614     if (PyErr_Occurred())
   2615         return NULL;
   2616 
   2617     match = pattern_new_match((PatternObject*) self->pattern,
   2618                                state, status);
   2619 
   2620     if (status == 0)
   2621         state->start = NULL;
   2622     else if (state->ptr != state->start)
   2623         state->start = state->ptr;
   2624     else if (state->ptr != state->end)
   2625         state->start = (void*) ((char*) state->ptr + state->charsize);
   2626     else
   2627         state->start = NULL;
   2628 
   2629     return match;
   2630 }
   2631 
   2632 static PyObject *
   2633 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
   2634 {
   2635     ScannerObject* scanner;
   2636 
   2637     /* create scanner object */
   2638     scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
   2639     if (!scanner)
   2640         return NULL;
   2641     scanner->pattern = NULL;
   2642 
   2643     /* create search state object */
   2644     if (!state_init(&scanner->state, self, string, pos, endpos)) {
   2645         Py_DECREF(scanner);
   2646         return NULL;
   2647     }
   2648 
   2649     Py_INCREF(self);
   2650     scanner->pattern = (PyObject*) self;
   2651 
   2652     return (PyObject*) scanner;
   2653 }
   2654 
   2655 static Py_hash_t
   2656 pattern_hash(PatternObject *self)
   2657 {
   2658     Py_hash_t hash, hash2;
   2659 
   2660     hash = PyObject_Hash(self->pattern);
   2661     if (hash == -1) {
   2662         return -1;
   2663     }
   2664 
   2665     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
   2666     hash ^= hash2;
   2667 
   2668     hash ^= self->flags;
   2669     hash ^= self->isbytes;
   2670     hash ^= self->codesize;
   2671 
   2672     if (hash == -1) {
   2673         hash = -2;
   2674     }
   2675     return hash;
   2676 }
   2677 
   2678 static PyObject*
   2679 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
   2680 {
   2681     PatternObject *left, *right;
   2682     int cmp;
   2683 
   2684     if (op != Py_EQ && op != Py_NE) {
   2685         Py_RETURN_NOTIMPLEMENTED;
   2686     }
   2687 
   2688     if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
   2689         Py_RETURN_NOTIMPLEMENTED;
   2690     }
   2691 
   2692     if (lefto == righto) {
   2693         /* a pattern is equal to itself */
   2694         return PyBool_FromLong(op == Py_EQ);
   2695     }
   2696 
   2697     left = (PatternObject *)lefto;
   2698     right = (PatternObject *)righto;
   2699 
   2700     cmp = (left->flags == right->flags
   2701            && left->isbytes == right->isbytes
   2702            && left->codesize == right->codesize);
   2703     if (cmp) {
   2704         /* Compare the code and the pattern because the same pattern can
   2705            produce different codes depending on the locale used to compile the
   2706            pattern when the re.LOCALE flag is used. Don't compare groups,
   2707            indexgroup nor groupindex: they are derivated from the pattern. */
   2708         cmp = (memcmp(left->code, right->code,
   2709                       sizeof(left->code[0]) * left->codesize) == 0);
   2710     }
   2711     if (cmp) {
   2712         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
   2713                                        Py_EQ);
   2714         if (cmp < 0) {
   2715             return NULL;
   2716         }
   2717     }
   2718     if (op == Py_NE) {
   2719         cmp = !cmp;
   2720     }
   2721     return PyBool_FromLong(cmp);
   2722 }
   2723 
   2724 #include "clinic/_sre.c.h"
   2725 
   2726 static PyMethodDef pattern_methods[] = {
   2727     _SRE_SRE_PATTERN_MATCH_METHODDEF
   2728     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
   2729     _SRE_SRE_PATTERN_SEARCH_METHODDEF
   2730     _SRE_SRE_PATTERN_SUB_METHODDEF
   2731     _SRE_SRE_PATTERN_SUBN_METHODDEF
   2732     _SRE_SRE_PATTERN_FINDALL_METHODDEF
   2733     _SRE_SRE_PATTERN_SPLIT_METHODDEF
   2734     _SRE_SRE_PATTERN_FINDITER_METHODDEF
   2735     _SRE_SRE_PATTERN_SCANNER_METHODDEF
   2736     _SRE_SRE_PATTERN___COPY___METHODDEF
   2737     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
   2738     {NULL, NULL}
   2739 };
   2740 
   2741 static PyGetSetDef pattern_getset[] = {
   2742     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
   2743       "A dictionary mapping group names to group numbers."},
   2744     {NULL}  /* Sentinel */
   2745 };
   2746 
   2747 #define PAT_OFF(x) offsetof(PatternObject, x)
   2748 static PyMemberDef pattern_members[] = {
   2749     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
   2750     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
   2751     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
   2752     {NULL}  /* Sentinel */
   2753 };
   2754 
   2755 static PyTypeObject Pattern_Type = {
   2756     PyVarObject_HEAD_INIT(NULL, 0)
   2757     "_" SRE_MODULE ".SRE_Pattern",
   2758     sizeof(PatternObject), sizeof(SRE_CODE),
   2759     (destructor)pattern_dealloc,        /* tp_dealloc */
   2760     0,                                  /* tp_print */
   2761     0,                                  /* tp_getattr */
   2762     0,                                  /* tp_setattr */
   2763     0,                                  /* tp_reserved */
   2764     (reprfunc)pattern_repr,             /* tp_repr */
   2765     0,                                  /* tp_as_number */
   2766     0,                                  /* tp_as_sequence */
   2767     0,                                  /* tp_as_mapping */
   2768     (hashfunc)pattern_hash,             /* tp_hash */
   2769     0,                                  /* tp_call */
   2770     0,                                  /* tp_str */
   2771     0,                                  /* tp_getattro */
   2772     0,                                  /* tp_setattro */
   2773     0,                                  /* tp_as_buffer */
   2774     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   2775     pattern_doc,                        /* tp_doc */
   2776     0,                                  /* tp_traverse */
   2777     0,                                  /* tp_clear */
   2778     pattern_richcompare,                /* tp_richcompare */
   2779     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
   2780     0,                                  /* tp_iter */
   2781     0,                                  /* tp_iternext */
   2782     pattern_methods,                    /* tp_methods */
   2783     pattern_members,                    /* tp_members */
   2784     pattern_getset,                     /* tp_getset */
   2785 };
   2786 
   2787 /* Match objects do not support length or assignment, but do support
   2788    __getitem__. */
   2789 static PyMappingMethods match_as_mapping = {
   2790     NULL,
   2791     (binaryfunc)match_getitem,
   2792     NULL
   2793 };
   2794 
   2795 static PyMethodDef match_methods[] = {
   2796     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
   2797     _SRE_SRE_MATCH_START_METHODDEF
   2798     _SRE_SRE_MATCH_END_METHODDEF
   2799     _SRE_SRE_MATCH_SPAN_METHODDEF
   2800     _SRE_SRE_MATCH_GROUPS_METHODDEF
   2801     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
   2802     _SRE_SRE_MATCH_EXPAND_METHODDEF
   2803     _SRE_SRE_MATCH___COPY___METHODDEF
   2804     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
   2805     {NULL, NULL}
   2806 };
   2807 
   2808 static PyGetSetDef match_getset[] = {
   2809     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
   2810     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
   2811     {"regs",      (getter)match_regs_get,      (setter)NULL},
   2812     {NULL}
   2813 };
   2814 
   2815 #define MATCH_OFF(x) offsetof(MatchObject, x)
   2816 static PyMemberDef match_members[] = {
   2817     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
   2818     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
   2819     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
   2820     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
   2821     {NULL}
   2822 };
   2823 
   2824 /* FIXME: implement setattr("string", None) as a special case (to
   2825    detach the associated string, if any */
   2826 
   2827 static PyTypeObject Match_Type = {
   2828     PyVarObject_HEAD_INIT(NULL,0)
   2829     "_" SRE_MODULE ".SRE_Match",
   2830     sizeof(MatchObject), sizeof(Py_ssize_t),
   2831     (destructor)match_dealloc,  /* tp_dealloc */
   2832     0,                          /* tp_print */
   2833     0,                          /* tp_getattr */
   2834     0,                          /* tp_setattr */
   2835     0,                          /* tp_reserved */
   2836     (reprfunc)match_repr,       /* tp_repr */
   2837     0,                          /* tp_as_number */
   2838     0,                          /* tp_as_sequence */
   2839     &match_as_mapping,          /* tp_as_mapping */
   2840     0,                          /* tp_hash */
   2841     0,                          /* tp_call */
   2842     0,                          /* tp_str */
   2843     0,                          /* tp_getattro */
   2844     0,                          /* tp_setattro */
   2845     0,                          /* tp_as_buffer */
   2846     Py_TPFLAGS_DEFAULT,         /* tp_flags */
   2847     match_doc,                  /* tp_doc */
   2848     0,                          /* tp_traverse */
   2849     0,                          /* tp_clear */
   2850     0,                          /* tp_richcompare */
   2851     0,                          /* tp_weaklistoffset */
   2852     0,                          /* tp_iter */
   2853     0,                          /* tp_iternext */
   2854     match_methods,              /* tp_methods */
   2855     match_members,              /* tp_members */
   2856     match_getset,               /* tp_getset */
   2857 };
   2858 
   2859 static PyMethodDef scanner_methods[] = {
   2860     _SRE_SRE_SCANNER_MATCH_METHODDEF
   2861     _SRE_SRE_SCANNER_SEARCH_METHODDEF
   2862     {NULL, NULL}
   2863 };
   2864 
   2865 #define SCAN_OFF(x) offsetof(ScannerObject, x)
   2866 static PyMemberDef scanner_members[] = {
   2867     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
   2868     {NULL}  /* Sentinel */
   2869 };
   2870 
   2871 static PyTypeObject Scanner_Type = {
   2872     PyVarObject_HEAD_INIT(NULL, 0)
   2873     "_" SRE_MODULE ".SRE_Scanner",
   2874     sizeof(ScannerObject), 0,
   2875     (destructor)scanner_dealloc,/* tp_dealloc */
   2876     0,                          /* tp_print */
   2877     0,                          /* tp_getattr */
   2878     0,                          /* tp_setattr */
   2879     0,                          /* tp_reserved */
   2880     0,                          /* tp_repr */
   2881     0,                          /* tp_as_number */
   2882     0,                          /* tp_as_sequence */
   2883     0,                          /* tp_as_mapping */
   2884     0,                          /* tp_hash */
   2885     0,                          /* tp_call */
   2886     0,                          /* tp_str */
   2887     0,                          /* tp_getattro */
   2888     0,                          /* tp_setattro */
   2889     0,                          /* tp_as_buffer */
   2890     Py_TPFLAGS_DEFAULT,         /* tp_flags */
   2891     0,                          /* tp_doc */
   2892     0,                          /* tp_traverse */
   2893     0,                          /* tp_clear */
   2894     0,                          /* tp_richcompare */
   2895     0,                          /* tp_weaklistoffset */
   2896     0,                          /* tp_iter */
   2897     0,                          /* tp_iternext */
   2898     scanner_methods,            /* tp_methods */
   2899     scanner_members,            /* tp_members */
   2900     0,                          /* tp_getset */
   2901 };
   2902 
   2903 static PyMethodDef _functions[] = {
   2904     _SRE_COMPILE_METHODDEF
   2905     _SRE_GETCODESIZE_METHODDEF
   2906     _SRE_GETLOWER_METHODDEF
   2907     {NULL, NULL}
   2908 };
   2909 
   2910 static struct PyModuleDef sremodule = {
   2911         PyModuleDef_HEAD_INIT,
   2912         "_" SRE_MODULE,
   2913         NULL,
   2914         -1,
   2915         _functions,
   2916         NULL,
   2917         NULL,
   2918         NULL,
   2919         NULL
   2920 };
   2921 
   2922 PyMODINIT_FUNC PyInit__sre(void)
   2923 {
   2924     PyObject* m;
   2925     PyObject* d;
   2926     PyObject* x;
   2927 
   2928     /* Patch object types */
   2929     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
   2930         PyType_Ready(&Scanner_Type))
   2931         return NULL;
   2932 
   2933     m = PyModule_Create(&sremodule);
   2934     if (m == NULL)
   2935         return NULL;
   2936     d = PyModule_GetDict(m);
   2937 
   2938     x = PyLong_FromLong(SRE_MAGIC);
   2939     if (x) {
   2940         PyDict_SetItemString(d, "MAGIC", x);
   2941         Py_DECREF(x);
   2942     }
   2943 
   2944     x = PyLong_FromLong(sizeof(SRE_CODE));
   2945     if (x) {
   2946         PyDict_SetItemString(d, "CODESIZE", x);
   2947         Py_DECREF(x);
   2948     }
   2949 
   2950     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
   2951     if (x) {
   2952         PyDict_SetItemString(d, "MAXREPEAT", x);
   2953         Py_DECREF(x);
   2954     }
   2955 
   2956     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
   2957     if (x) {
   2958         PyDict_SetItemString(d, "MAXGROUPS", x);
   2959         Py_DECREF(x);
   2960     }
   2961 
   2962     x = PyUnicode_FromString(copyright);
   2963     if (x) {
   2964         PyDict_SetItemString(d, "copyright", x);
   2965         Py_DECREF(x);
   2966     }
   2967     return m;
   2968 }
   2969 
   2970 /* vim:ts=4:sw=4:et
   2971 */
   2972