Home | History | Annotate | Download | only in Modules
      1 /*
      2  * Secret Labs' Regular Expression Engine
      3  *
      4  * regular expression matching engine
      5  *
      6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
      7  *
      8  * See the _sre.c file for information on usage and redistribution.
      9  */
     10 
     11 /* String matching engine */
     12 
     13 /* This file is included three times, with different character settings */
     14 
     15 LOCAL(int)
     16 SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
     17 {
     18     /* check if pointer is at given position */
     19 
     20     Py_ssize_t thisp, thatp;
     21 
     22     switch (at) {
     23 
     24     case SRE_AT_BEGINNING:
     25     case SRE_AT_BEGINNING_STRING:
     26         return ((void*) ptr == state->beginning);
     27 
     28     case SRE_AT_BEGINNING_LINE:
     29         return ((void*) ptr == state->beginning ||
     30                 SRE_IS_LINEBREAK((int) ptr[-1]));
     31 
     32     case SRE_AT_END:
     33         return (((SRE_CHAR *)state->end - ptr == 1 &&
     34                  SRE_IS_LINEBREAK((int) ptr[0])) ||
     35                 ((void*) ptr == state->end));
     36 
     37     case SRE_AT_END_LINE:
     38         return ((void*) ptr == state->end ||
     39                 SRE_IS_LINEBREAK((int) ptr[0]));
     40 
     41     case SRE_AT_END_STRING:
     42         return ((void*) ptr == state->end);
     43 
     44     case SRE_AT_BOUNDARY:
     45         if (state->beginning == state->end)
     46             return 0;
     47         thatp = ((void*) ptr > state->beginning) ?
     48             SRE_IS_WORD((int) ptr[-1]) : 0;
     49         thisp = ((void*) ptr < state->end) ?
     50             SRE_IS_WORD((int) ptr[0]) : 0;
     51         return thisp != thatp;
     52 
     53     case SRE_AT_NON_BOUNDARY:
     54         if (state->beginning == state->end)
     55             return 0;
     56         thatp = ((void*) ptr > state->beginning) ?
     57             SRE_IS_WORD((int) ptr[-1]) : 0;
     58         thisp = ((void*) ptr < state->end) ?
     59             SRE_IS_WORD((int) ptr[0]) : 0;
     60         return thisp == thatp;
     61 
     62     case SRE_AT_LOC_BOUNDARY:
     63         if (state->beginning == state->end)
     64             return 0;
     65         thatp = ((void*) ptr > state->beginning) ?
     66             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
     67         thisp = ((void*) ptr < state->end) ?
     68             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
     69         return thisp != thatp;
     70 
     71     case SRE_AT_LOC_NON_BOUNDARY:
     72         if (state->beginning == state->end)
     73             return 0;
     74         thatp = ((void*) ptr > state->beginning) ?
     75             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
     76         thisp = ((void*) ptr < state->end) ?
     77             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
     78         return thisp == thatp;
     79 
     80     case SRE_AT_UNI_BOUNDARY:
     81         if (state->beginning == state->end)
     82             return 0;
     83         thatp = ((void*) ptr > state->beginning) ?
     84             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
     85         thisp = ((void*) ptr < state->end) ?
     86             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
     87         return thisp != thatp;
     88 
     89     case SRE_AT_UNI_NON_BOUNDARY:
     90         if (state->beginning == state->end)
     91             return 0;
     92         thatp = ((void*) ptr > state->beginning) ?
     93             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
     94         thisp = ((void*) ptr < state->end) ?
     95             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
     96         return thisp == thatp;
     97 
     98     }
     99 
    100     return 0;
    101 }
    102 
    103 LOCAL(int)
    104 SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
    105 {
    106     /* check if character is a member of the given set */
    107 
    108     int ok = 1;
    109 
    110     for (;;) {
    111         switch (*set++) {
    112 
    113         case SRE_OP_FAILURE:
    114             return !ok;
    115 
    116         case SRE_OP_LITERAL:
    117             /* <LITERAL> <code> */
    118             if (ch == set[0])
    119                 return ok;
    120             set++;
    121             break;
    122 
    123         case SRE_OP_CATEGORY:
    124             /* <CATEGORY> <code> */
    125             if (sre_category(set[0], (int) ch))
    126                 return ok;
    127             set++;
    128             break;
    129 
    130         case SRE_OP_CHARSET:
    131             /* <CHARSET> <bitmap> */
    132             if (ch < 256 &&
    133                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
    134                 return ok;
    135             set += 256/SRE_CODE_BITS;
    136             break;
    137 
    138         case SRE_OP_RANGE:
    139             /* <RANGE> <lower> <upper> */
    140             if (set[0] <= ch && ch <= set[1])
    141                 return ok;
    142             set += 2;
    143             break;
    144 
    145         case SRE_OP_RANGE_UNI_IGNORE:
    146             /* <RANGE_UNI_IGNORE> <lower> <upper> */
    147         {
    148             SRE_CODE uch;
    149             /* ch is already lower cased */
    150             if (set[0] <= ch && ch <= set[1])
    151                 return ok;
    152             uch = sre_upper_unicode(ch);
    153             if (set[0] <= uch && uch <= set[1])
    154                 return ok;
    155             set += 2;
    156             break;
    157         }
    158 
    159         case SRE_OP_NEGATE:
    160             ok = !ok;
    161             break;
    162 
    163         case SRE_OP_BIGCHARSET:
    164             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
    165         {
    166             Py_ssize_t count, block;
    167             count = *(set++);
    168 
    169             if (ch < 0x10000u)
    170                 block = ((unsigned char*)set)[ch >> 8];
    171             else
    172                 block = -1;
    173             set += 256/sizeof(SRE_CODE);
    174             if (block >=0 &&
    175                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
    176                     (1u << (ch & (SRE_CODE_BITS-1)))))
    177                 return ok;
    178             set += count * (256/SRE_CODE_BITS);
    179             break;
    180         }
    181 
    182         default:
    183             /* internal error -- there's not much we can do about it
    184                here, so let's just pretend it didn't match... */
    185             return 0;
    186         }
    187     }
    188 }
    189 
    190 LOCAL(int)
    191 SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
    192 {
    193     SRE_CODE lo, up;
    194     lo = sre_lower_locale(ch);
    195     if (SRE(charset)(state, set, lo))
    196        return 1;
    197 
    198     up = sre_upper_locale(ch);
    199     return up != lo && SRE(charset)(state, set, up);
    200 }
    201 
    202 LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
    203 
    204 LOCAL(Py_ssize_t)
    205 SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
    206 {
    207     SRE_CODE chr;
    208     SRE_CHAR c;
    209     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
    210     SRE_CHAR* end = (SRE_CHAR *)state->end;
    211     Py_ssize_t i;
    212 
    213     /* adjust end */
    214     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
    215         end = ptr + maxcount;
    216 
    217     switch (pattern[0]) {
    218 
    219     case SRE_OP_IN:
    220         /* repeated set */
    221         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
    222         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
    223             ptr++;
    224         break;
    225 
    226     case SRE_OP_ANY:
    227         /* repeated dot wildcard. */
    228         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
    229         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
    230             ptr++;
    231         break;
    232 
    233     case SRE_OP_ANY_ALL:
    234         /* repeated dot wildcard.  skip to the end of the target
    235            string, and backtrack from there */
    236         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
    237         ptr = end;
    238         break;
    239 
    240     case SRE_OP_LITERAL:
    241         /* repeated literal */
    242         chr = pattern[1];
    243         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
    244         c = (SRE_CHAR) chr;
    245 #if SIZEOF_SRE_CHAR < 4
    246         if ((SRE_CODE) c != chr)
    247             ; /* literal can't match: doesn't fit in char width */
    248         else
    249 #endif
    250         while (ptr < end && *ptr == c)
    251             ptr++;
    252         break;
    253 
    254     case SRE_OP_LITERAL_IGNORE:
    255         /* repeated literal */
    256         chr = pattern[1];
    257         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
    258         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
    259             ptr++;
    260         break;
    261 
    262     case SRE_OP_LITERAL_UNI_IGNORE:
    263         /* repeated literal */
    264         chr = pattern[1];
    265         TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
    266         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
    267             ptr++;
    268         break;
    269 
    270     case SRE_OP_LITERAL_LOC_IGNORE:
    271         /* repeated literal */
    272         chr = pattern[1];
    273         TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
    274         while (ptr < end && char_loc_ignore(chr, *ptr))
    275             ptr++;
    276         break;
    277 
    278     case SRE_OP_NOT_LITERAL:
    279         /* repeated non-literal */
    280         chr = pattern[1];
    281         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
    282         c = (SRE_CHAR) chr;
    283 #if SIZEOF_SRE_CHAR < 4
    284         if ((SRE_CODE) c != chr)
    285             ptr = end; /* literal can't match: doesn't fit in char width */
    286         else
    287 #endif
    288         while (ptr < end && *ptr != c)
    289             ptr++;
    290         break;
    291 
    292     case SRE_OP_NOT_LITERAL_IGNORE:
    293         /* repeated non-literal */
    294         chr = pattern[1];
    295         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
    296         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
    297             ptr++;
    298         break;
    299 
    300     case SRE_OP_NOT_LITERAL_UNI_IGNORE:
    301         /* repeated non-literal */
    302         chr = pattern[1];
    303         TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
    304         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
    305             ptr++;
    306         break;
    307 
    308     case SRE_OP_NOT_LITERAL_LOC_IGNORE:
    309         /* repeated non-literal */
    310         chr = pattern[1];
    311         TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
    312         while (ptr < end && !char_loc_ignore(chr, *ptr))
    313             ptr++;
    314         break;
    315 
    316     default:
    317         /* repeated single character pattern */
    318         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
    319         while ((SRE_CHAR*) state->ptr < end) {
    320             i = SRE(match)(state, pattern, 0);
    321             if (i < 0)
    322                 return i;
    323             if (!i)
    324                 break;
    325         }
    326         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    327                (SRE_CHAR*) state->ptr - ptr));
    328         return (SRE_CHAR*) state->ptr - ptr;
    329     }
    330 
    331     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
    332            ptr - (SRE_CHAR*) state->ptr));
    333     return ptr - (SRE_CHAR*) state->ptr;
    334 }
    335 
    336 #if 0 /* not used in this release */
    337 LOCAL(int)
    338 SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
    339 {
    340     /* check if an SRE_OP_INFO block matches at the current position.
    341        returns the number of SRE_CODE objects to skip if successful, 0
    342        if no match */
    343 
    344     SRE_CHAR* end = (SRE_CHAR*) state->end;
    345     SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
    346     Py_ssize_t i;
    347 
    348     /* check minimal length */
    349     if (pattern[3] && end - ptr < pattern[3])
    350         return 0;
    351 
    352     /* check known prefix */
    353     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
    354         /* <length> <skip> <prefix data> <overlap data> */
    355         for (i = 0; i < pattern[5]; i++)
    356             if ((SRE_CODE) ptr[i] != pattern[7 + i])
    357                 return 0;
    358         return pattern[0] + 2 * pattern[6];
    359     }
    360     return pattern[0];
    361 }
    362 #endif
    363 
    364 /* The macros below should be used to protect recursive SRE(match)()
    365  * calls that *failed* and do *not* return immediately (IOW, those
    366  * that will backtrack). Explaining:
    367  *
    368  * - Recursive SRE(match)() returned true: that's usually a success
    369  *   (besides atypical cases like ASSERT_NOT), therefore there's no
    370  *   reason to restore lastmark;
    371  *
    372  * - Recursive SRE(match)() returned false but the current SRE(match)()
    373  *   is returning to the caller: If the current SRE(match)() is the
    374  *   top function of the recursion, returning false will be a matching
    375  *   failure, and it doesn't matter where lastmark is pointing to.
    376  *   If it's *not* the top function, it will be a recursive SRE(match)()
    377  *   failure by itself, and the calling SRE(match)() will have to deal
    378  *   with the failure by the same rules explained here (it will restore
    379  *   lastmark by itself if necessary);
    380  *
    381  * - Recursive SRE(match)() returned false, and will continue the
    382  *   outside 'for' loop: must be protected when breaking, since the next
    383  *   OP could potentially depend on lastmark;
    384  *
    385  * - Recursive SRE(match)() returned false, and will be called again
    386  *   inside a local for/while loop: must be protected between each
    387  *   loop iteration, since the recursive SRE(match)() could do anything,
    388  *   and could potentially depend on lastmark.
    389  *
    390  * For more information, check the discussion at SF patch #712900.
    391  */
    392 #define LASTMARK_SAVE()     \
    393     do { \
    394         ctx->lastmark = state->lastmark; \
    395         ctx->lastindex = state->lastindex; \
    396     } while (0)
    397 #define LASTMARK_RESTORE()  \
    398     do { \
    399         state->lastmark = ctx->lastmark; \
    400         state->lastindex = ctx->lastindex; \
    401     } while (0)
    402 
    403 #define RETURN_ERROR(i) do { return i; } while(0)
    404 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
    405 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
    406 
    407 #define RETURN_ON_ERROR(i) \
    408     do { if (i < 0) RETURN_ERROR(i); } while (0)
    409 #define RETURN_ON_SUCCESS(i) \
    410     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
    411 #define RETURN_ON_FAILURE(i) \
    412     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
    413 
    414 #define DATA_STACK_ALLOC(state, type, ptr) \
    415 do { \
    416     alloc_pos = state->data_stack_base; \
    417     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
    418            "(%" PY_FORMAT_SIZE_T "d)\n", \
    419            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
    420     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
    421         int j = data_stack_grow(state, sizeof(type)); \
    422         if (j < 0) return j; \
    423         if (ctx_pos != -1) \
    424             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
    425     } \
    426     ptr = (type*)(state->data_stack+alloc_pos); \
    427     state->data_stack_base += sizeof(type); \
    428 } while (0)
    429 
    430 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
    431 do { \
    432     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
    433     ptr = (type*)(state->data_stack+pos); \
    434 } while (0)
    435 
    436 #define DATA_STACK_PUSH(state, data, size) \
    437 do { \
    438     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
    439            "(%" PY_FORMAT_SIZE_T "d)\n", \
    440            data, state->data_stack_base, size)); \
    441     if (size > state->data_stack_size - state->data_stack_base) { \
    442         int j = data_stack_grow(state, size); \
    443         if (j < 0) return j; \
    444         if (ctx_pos != -1) \
    445             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
    446     } \
    447     memcpy(state->data_stack+state->data_stack_base, data, size); \
    448     state->data_stack_base += size; \
    449 } while (0)
    450 
    451 #define DATA_STACK_POP(state, data, size, discard) \
    452 do { \
    453     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
    454            "(%" PY_FORMAT_SIZE_T "d)\n", \
    455            data, state->data_stack_base-size, size)); \
    456     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
    457     if (discard) \
    458         state->data_stack_base -= size; \
    459 } while (0)
    460 
    461 #define DATA_STACK_POP_DISCARD(state, size) \
    462 do { \
    463     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
    464            "(%" PY_FORMAT_SIZE_T "d)\n", \
    465            state->data_stack_base-size, size)); \
    466     state->data_stack_base -= size; \
    467 } while(0)
    468 
    469 #define DATA_PUSH(x) \
    470     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
    471 #define DATA_POP(x) \
    472     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
    473 #define DATA_POP_DISCARD(x) \
    474     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
    475 #define DATA_ALLOC(t,p) \
    476     DATA_STACK_ALLOC(state, t, p)
    477 #define DATA_LOOKUP_AT(t,p,pos) \
    478     DATA_STACK_LOOKUP_AT(state,t,p,pos)
    479 
    480 #define MARK_PUSH(lastmark) \
    481     do if (lastmark > 0) { \
    482         i = lastmark; /* ctx->lastmark may change if reallocated */ \
    483         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
    484     } while (0)
    485 #define MARK_POP(lastmark) \
    486     do if (lastmark > 0) { \
    487         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
    488     } while (0)
    489 #define MARK_POP_KEEP(lastmark) \
    490     do if (lastmark > 0) { \
    491         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
    492     } while (0)
    493 #define MARK_POP_DISCARD(lastmark) \
    494     do if (lastmark > 0) { \
    495         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
    496     } while (0)
    497 
    498 #define JUMP_NONE            0
    499 #define JUMP_MAX_UNTIL_1     1
    500 #define JUMP_MAX_UNTIL_2     2
    501 #define JUMP_MAX_UNTIL_3     3
    502 #define JUMP_MIN_UNTIL_1     4
    503 #define JUMP_MIN_UNTIL_2     5
    504 #define JUMP_MIN_UNTIL_3     6
    505 #define JUMP_REPEAT          7
    506 #define JUMP_REPEAT_ONE_1    8
    507 #define JUMP_REPEAT_ONE_2    9
    508 #define JUMP_MIN_REPEAT_ONE  10
    509 #define JUMP_BRANCH          11
    510 #define JUMP_ASSERT          12
    511 #define JUMP_ASSERT_NOT      13
    512 
    513 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
    514     DATA_ALLOC(SRE(match_context), nextctx); \
    515     nextctx->last_ctx_pos = ctx_pos; \
    516     nextctx->jump = jumpvalue; \
    517     nextctx->pattern = nextpattern; \
    518     nextctx->toplevel = toplevel_; \
    519     ctx_pos = alloc_pos; \
    520     ctx = nextctx; \
    521     goto entrance; \
    522     jumplabel: \
    523     while (0) /* gcc doesn't like labels at end of scopes */ \
    524 
    525 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
    526     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
    527 
    528 #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
    529     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
    530 
    531 typedef struct {
    532     Py_ssize_t last_ctx_pos;
    533     Py_ssize_t jump;
    534     SRE_CHAR* ptr;
    535     SRE_CODE* pattern;
    536     Py_ssize_t count;
    537     Py_ssize_t lastmark;
    538     Py_ssize_t lastindex;
    539     union {
    540         SRE_CODE chr;
    541         SRE_REPEAT* rep;
    542     } u;
    543     int toplevel;
    544 } SRE(match_context);
    545 
    546 /* check if string matches the given pattern.  returns <0 for
    547    error, 0 for failure, and 1 for success */
    548 LOCAL(Py_ssize_t)
    549 SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
    550 {
    551     SRE_CHAR* end = (SRE_CHAR *)state->end;
    552     Py_ssize_t alloc_pos, ctx_pos = -1;
    553     Py_ssize_t i, ret = 0;
    554     Py_ssize_t jump;
    555     unsigned int sigcount=0;
    556 
    557     SRE(match_context)* ctx;
    558     SRE(match_context)* nextctx;
    559 
    560     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
    561 
    562     DATA_ALLOC(SRE(match_context), ctx);
    563     ctx->last_ctx_pos = -1;
    564     ctx->jump = JUMP_NONE;
    565     ctx->pattern = pattern;
    566     ctx->toplevel = toplevel;
    567     ctx_pos = alloc_pos;
    568 
    569 entrance:
    570 
    571     ctx->ptr = (SRE_CHAR *)state->ptr;
    572 
    573     if (ctx->pattern[0] == SRE_OP_INFO) {
    574         /* optimization info block */
    575         /* <INFO> <1=skip> <2=flags> <3=min> ... */
    576         if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
    577             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
    578                    "need %" PY_FORMAT_SIZE_T "d)\n",
    579                    end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
    580             RETURN_FAILURE;
    581         }
    582         ctx->pattern += ctx->pattern[1] + 1;
    583     }
    584 
    585     for (;;) {
    586         ++sigcount;
    587         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
    588             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
    589 
    590         switch (*ctx->pattern++) {
    591 
    592         case SRE_OP_MARK:
    593             /* set mark */
    594             /* <MARK> <gid> */
    595             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
    596                    ctx->ptr, ctx->pattern[0]));
    597             i = ctx->pattern[0];
    598             if (i & 1)
    599                 state->lastindex = i/2 + 1;
    600             if (i > state->lastmark) {
    601                 /* state->lastmark is the highest valid index in the
    602                    state->mark array.  If it is increased by more than 1,
    603                    the intervening marks must be set to NULL to signal
    604                    that these marks have not been encountered. */
    605                 Py_ssize_t j = state->lastmark + 1;
    606                 while (j < i)
    607                     state->mark[j++] = NULL;
    608                 state->lastmark = i;
    609             }
    610             state->mark[i] = ctx->ptr;
    611             ctx->pattern++;
    612             break;
    613 
    614         case SRE_OP_LITERAL:
    615             /* match literal string */
    616             /* <LITERAL> <code> */
    617             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
    618                    ctx->ptr, *ctx->pattern));
    619             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
    620                 RETURN_FAILURE;
    621             ctx->pattern++;
    622             ctx->ptr++;
    623             break;
    624 
    625         case SRE_OP_NOT_LITERAL:
    626             /* match anything that is not literal character */
    627             /* <NOT_LITERAL> <code> */
    628             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
    629                    ctx->ptr, *ctx->pattern));
    630             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
    631                 RETURN_FAILURE;
    632             ctx->pattern++;
    633             ctx->ptr++;
    634             break;
    635 
    636         case SRE_OP_SUCCESS:
    637             /* end of pattern */
    638             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
    639             if (ctx->toplevel &&
    640                 ((state->match_all && ctx->ptr != state->end) ||
    641                  (state->must_advance && ctx->ptr == state->start)))
    642             {
    643                 RETURN_FAILURE;
    644             }
    645             state->ptr = ctx->ptr;
    646             RETURN_SUCCESS;
    647 
    648         case SRE_OP_AT:
    649             /* match at given position */
    650             /* <AT> <code> */
    651             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
    652             if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
    653                 RETURN_FAILURE;
    654             ctx->pattern++;
    655             break;
    656 
    657         case SRE_OP_CATEGORY:
    658             /* match at given category */
    659             /* <CATEGORY> <code> */
    660             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
    661                    ctx->ptr, *ctx->pattern));
    662             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
    663                 RETURN_FAILURE;
    664             ctx->pattern++;
    665             ctx->ptr++;
    666             break;
    667 
    668         case SRE_OP_ANY:
    669             /* match anything (except a newline) */
    670             /* <ANY> */
    671             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
    672             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
    673                 RETURN_FAILURE;
    674             ctx->ptr++;
    675             break;
    676 
    677         case SRE_OP_ANY_ALL:
    678             /* match anything */
    679             /* <ANY_ALL> */
    680             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
    681             if (ctx->ptr >= end)
    682                 RETURN_FAILURE;
    683             ctx->ptr++;
    684             break;
    685 
    686         case SRE_OP_IN:
    687             /* match set member (or non_member) */
    688             /* <IN> <skip> <set> */
    689             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
    690             if (ctx->ptr >= end ||
    691                 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
    692                 RETURN_FAILURE;
    693             ctx->pattern += ctx->pattern[0];
    694             ctx->ptr++;
    695             break;
    696 
    697         case SRE_OP_LITERAL_IGNORE:
    698             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
    699                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    700             if (ctx->ptr >= end ||
    701                 sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
    702                 RETURN_FAILURE;
    703             ctx->pattern++;
    704             ctx->ptr++;
    705             break;
    706 
    707         case SRE_OP_LITERAL_UNI_IGNORE:
    708             TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
    709                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    710             if (ctx->ptr >= end ||
    711                 sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
    712                 RETURN_FAILURE;
    713             ctx->pattern++;
    714             ctx->ptr++;
    715             break;
    716 
    717         case SRE_OP_LITERAL_LOC_IGNORE:
    718             TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
    719                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
    720             if (ctx->ptr >= end
    721                 || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
    722                 RETURN_FAILURE;
    723             ctx->pattern++;
    724             ctx->ptr++;
    725             break;
    726 
    727         case SRE_OP_NOT_LITERAL_IGNORE:
    728             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
    729                    ctx->pattern, ctx->ptr, *ctx->pattern));
    730             if (ctx->ptr >= end ||
    731                 sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
    732                 RETURN_FAILURE;
    733             ctx->pattern++;
    734             ctx->ptr++;
    735             break;
    736 
    737         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
    738             TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
    739                    ctx->pattern, ctx->ptr, *ctx->pattern));
    740             if (ctx->ptr >= end ||
    741                 sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
    742                 RETURN_FAILURE;
    743             ctx->pattern++;
    744             ctx->ptr++;
    745             break;
    746 
    747         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
    748             TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
    749                    ctx->pattern, ctx->ptr, *ctx->pattern));
    750             if (ctx->ptr >= end
    751                 || char_loc_ignore(*ctx->pattern, *ctx->ptr))
    752                 RETURN_FAILURE;
    753             ctx->pattern++;
    754             ctx->ptr++;
    755             break;
    756 
    757         case SRE_OP_IN_IGNORE:
    758             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
    759             if (ctx->ptr >= end
    760                 || !SRE(charset)(state, ctx->pattern+1,
    761                                  (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
    762                 RETURN_FAILURE;
    763             ctx->pattern += ctx->pattern[0];
    764             ctx->ptr++;
    765             break;
    766 
    767         case SRE_OP_IN_UNI_IGNORE:
    768             TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
    769             if (ctx->ptr >= end
    770                 || !SRE(charset)(state, ctx->pattern+1,
    771                                  (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
    772                 RETURN_FAILURE;
    773             ctx->pattern += ctx->pattern[0];
    774             ctx->ptr++;
    775             break;
    776 
    777         case SRE_OP_IN_LOC_IGNORE:
    778             TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
    779             if (ctx->ptr >= end
    780                 || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
    781                 RETURN_FAILURE;
    782             ctx->pattern += ctx->pattern[0];
    783             ctx->ptr++;
    784             break;
    785 
    786         case SRE_OP_JUMP:
    787         case SRE_OP_INFO:
    788             /* jump forward */
    789             /* <JUMP> <offset> */
    790             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
    791                    ctx->ptr, ctx->pattern[0]));
    792             ctx->pattern += ctx->pattern[0];
    793             break;
    794 
    795         case SRE_OP_BRANCH:
    796             /* alternation */
    797             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
    798             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
    799             LASTMARK_SAVE();
    800             ctx->u.rep = state->repeat;
    801             if (ctx->u.rep)
    802                 MARK_PUSH(ctx->lastmark);
    803             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
    804                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
    805                     (ctx->ptr >= end ||
    806                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
    807                     continue;
    808                 if (ctx->pattern[1] == SRE_OP_IN &&
    809                     (ctx->ptr >= end ||
    810                      !SRE(charset)(state, ctx->pattern + 3,
    811                                    (SRE_CODE) *ctx->ptr)))
    812                     continue;
    813                 state->ptr = ctx->ptr;
    814                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
    815                 if (ret) {
    816                     if (ctx->u.rep)
    817                         MARK_POP_DISCARD(ctx->lastmark);
    818                     RETURN_ON_ERROR(ret);
    819                     RETURN_SUCCESS;
    820                 }
    821                 if (ctx->u.rep)
    822                     MARK_POP_KEEP(ctx->lastmark);
    823                 LASTMARK_RESTORE();
    824             }
    825             if (ctx->u.rep)
    826                 MARK_POP_DISCARD(ctx->lastmark);
    827             RETURN_FAILURE;
    828 
    829         case SRE_OP_REPEAT_ONE:
    830             /* match repeated sequence (maximizing regexp) */
    831 
    832             /* this operator only works if the repeated item is
    833                exactly one character wide, and we're not already
    834                collecting backtracking points.  for other cases,
    835                use the MAX_REPEAT operator */
    836 
    837             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
    838 
    839             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
    840                    ctx->pattern[1], ctx->pattern[2]));
    841 
    842             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
    843                 RETURN_FAILURE; /* cannot match */
    844 
    845             state->ptr = ctx->ptr;
    846 
    847             ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
    848             RETURN_ON_ERROR(ret);
    849             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    850             ctx->count = ret;
    851             ctx->ptr += ctx->count;
    852 
    853             /* when we arrive here, count contains the number of
    854                matches, and ctx->ptr points to the tail of the target
    855                string.  check if the rest of the pattern matches,
    856                and backtrack if not. */
    857 
    858             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
    859                 RETURN_FAILURE;
    860 
    861             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
    862                 ctx->ptr == state->end &&
    863                 !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
    864             {
    865                 /* tail is empty.  we're finished */
    866                 state->ptr = ctx->ptr;
    867                 RETURN_SUCCESS;
    868             }
    869 
    870             LASTMARK_SAVE();
    871 
    872             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
    873                 /* tail starts with a literal. skip positions where
    874                    the rest of the pattern cannot possibly match */
    875                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
    876                 for (;;) {
    877                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
    878                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
    879                         ctx->ptr--;
    880                         ctx->count--;
    881                     }
    882                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
    883                         break;
    884                     state->ptr = ctx->ptr;
    885                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
    886                             ctx->pattern+ctx->pattern[0]);
    887                     if (ret) {
    888                         RETURN_ON_ERROR(ret);
    889                         RETURN_SUCCESS;
    890                     }
    891 
    892                     LASTMARK_RESTORE();
    893 
    894                     ctx->ptr--;
    895                     ctx->count--;
    896                 }
    897 
    898             } else {
    899                 /* general case */
    900                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
    901                     state->ptr = ctx->ptr;
    902                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
    903                             ctx->pattern+ctx->pattern[0]);
    904                     if (ret) {
    905                         RETURN_ON_ERROR(ret);
    906                         RETURN_SUCCESS;
    907                     }
    908                     ctx->ptr--;
    909                     ctx->count--;
    910                     LASTMARK_RESTORE();
    911                 }
    912             }
    913             RETURN_FAILURE;
    914 
    915         case SRE_OP_MIN_REPEAT_ONE:
    916             /* match repeated sequence (minimizing regexp) */
    917 
    918             /* this operator only works if the repeated item is
    919                exactly one character wide, and we're not already
    920                collecting backtracking points.  for other cases,
    921                use the MIN_REPEAT operator */
    922 
    923             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
    924 
    925             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
    926                    ctx->pattern[1], ctx->pattern[2]));
    927 
    928             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
    929                 RETURN_FAILURE; /* cannot match */
    930 
    931             state->ptr = ctx->ptr;
    932 
    933             if (ctx->pattern[1] == 0)
    934                 ctx->count = 0;
    935             else {
    936                 /* count using pattern min as the maximum */
    937                 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
    938                 RETURN_ON_ERROR(ret);
    939                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    940                 if (ret < (Py_ssize_t) ctx->pattern[1])
    941                     /* didn't match minimum number of times */
    942                     RETURN_FAILURE;
    943                 /* advance past minimum matches of repeat */
    944                 ctx->count = ret;
    945                 ctx->ptr += ctx->count;
    946             }
    947 
    948             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
    949                 !(ctx->toplevel &&
    950                   ((state->match_all && ctx->ptr != state->end) ||
    951                    (state->must_advance && ctx->ptr == state->start))))
    952             {
    953                 /* tail is empty.  we're finished */
    954                 state->ptr = ctx->ptr;
    955                 RETURN_SUCCESS;
    956 
    957             } else {
    958                 /* general case */
    959                 LASTMARK_SAVE();
    960                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
    961                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
    962                     state->ptr = ctx->ptr;
    963                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
    964                             ctx->pattern+ctx->pattern[0]);
    965                     if (ret) {
    966                         RETURN_ON_ERROR(ret);
    967                         RETURN_SUCCESS;
    968                     }
    969                     state->ptr = ctx->ptr;
    970                     ret = SRE(count)(state, ctx->pattern+3, 1);
    971                     RETURN_ON_ERROR(ret);
    972                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    973                     if (ret == 0)
    974                         break;
    975                     assert(ret == 1);
    976                     ctx->ptr++;
    977                     ctx->count++;
    978                     LASTMARK_RESTORE();
    979                 }
    980             }
    981             RETURN_FAILURE;
    982 
    983         case SRE_OP_REPEAT:
    984             /* create repeat context.  all the hard work is done
    985                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
    986             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
    987             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
    988                    ctx->pattern[1], ctx->pattern[2]));
    989 
    990             /* install new repeat context */
    991             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
    992             if (!ctx->u.rep) {
    993                 PyErr_NoMemory();
    994                 RETURN_FAILURE;
    995             }
    996             ctx->u.rep->count = -1;
    997             ctx->u.rep->pattern = ctx->pattern;
    998             ctx->u.rep->prev = state->repeat;
    999             ctx->u.rep->last_ptr = NULL;
   1000             state->repeat = ctx->u.rep;
   1001 
   1002             state->ptr = ctx->ptr;
   1003             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
   1004             state->repeat = ctx->u.rep->prev;
   1005             PyObject_FREE(ctx->u.rep);
   1006 
   1007             if (ret) {
   1008                 RETURN_ON_ERROR(ret);
   1009                 RETURN_SUCCESS;
   1010             }
   1011             RETURN_FAILURE;
   1012 
   1013         case SRE_OP_MAX_UNTIL:
   1014             /* maximizing repeat */
   1015             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
   1016 
   1017             /* FIXME: we probably need to deal with zero-width
   1018                matches in here... */
   1019 
   1020             ctx->u.rep = state->repeat;
   1021             if (!ctx->u.rep)
   1022                 RETURN_ERROR(SRE_ERROR_STATE);
   1023 
   1024             state->ptr = ctx->ptr;
   1025 
   1026             ctx->count = ctx->u.rep->count+1;
   1027 
   1028             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1029                    ctx->ptr, ctx->count));
   1030 
   1031             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1032                 /* not enough matches */
   1033                 ctx->u.rep->count = ctx->count;
   1034                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
   1035                         ctx->u.rep->pattern+3);
   1036                 if (ret) {
   1037                     RETURN_ON_ERROR(ret);
   1038                     RETURN_SUCCESS;
   1039                 }
   1040                 ctx->u.rep->count = ctx->count-1;
   1041                 state->ptr = ctx->ptr;
   1042                 RETURN_FAILURE;
   1043             }
   1044 
   1045             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
   1046                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
   1047                 state->ptr != ctx->u.rep->last_ptr) {
   1048                 /* we may have enough matches, but if we can
   1049                    match another item, do so */
   1050                 ctx->u.rep->count = ctx->count;
   1051                 LASTMARK_SAVE();
   1052                 MARK_PUSH(ctx->lastmark);
   1053                 /* zero-width match protection */
   1054                 DATA_PUSH(&ctx->u.rep->last_ptr);
   1055                 ctx->u.rep->last_ptr = state->ptr;
   1056                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
   1057                         ctx->u.rep->pattern+3);
   1058                 DATA_POP(&ctx->u.rep->last_ptr);
   1059                 if (ret) {
   1060                     MARK_POP_DISCARD(ctx->lastmark);
   1061                     RETURN_ON_ERROR(ret);
   1062                     RETURN_SUCCESS;
   1063                 }
   1064                 MARK_POP(ctx->lastmark);
   1065                 LASTMARK_RESTORE();
   1066                 ctx->u.rep->count = ctx->count-1;
   1067                 state->ptr = ctx->ptr;
   1068             }
   1069 
   1070             /* cannot match more repeated items here.  make sure the
   1071                tail matches */
   1072             state->repeat = ctx->u.rep->prev;
   1073             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
   1074             RETURN_ON_SUCCESS(ret);
   1075             state->repeat = ctx->u.rep;
   1076             state->ptr = ctx->ptr;
   1077             RETURN_FAILURE;
   1078 
   1079         case SRE_OP_MIN_UNTIL:
   1080             /* minimizing repeat */
   1081             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
   1082 
   1083             ctx->u.rep = state->repeat;
   1084             if (!ctx->u.rep)
   1085                 RETURN_ERROR(SRE_ERROR_STATE);
   1086 
   1087             state->ptr = ctx->ptr;
   1088 
   1089             ctx->count = ctx->u.rep->count+1;
   1090 
   1091             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
   1092                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
   1093 
   1094             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
   1095                 /* not enough matches */
   1096                 ctx->u.rep->count = ctx->count;
   1097                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
   1098                         ctx->u.rep->pattern+3);
   1099                 if (ret) {
   1100                     RETURN_ON_ERROR(ret);
   1101                     RETURN_SUCCESS;
   1102                 }
   1103                 ctx->u.rep->count = ctx->count-1;
   1104                 state->ptr = ctx->ptr;
   1105                 RETURN_FAILURE;
   1106             }
   1107 
   1108             LASTMARK_SAVE();
   1109 
   1110             /* see if the tail matches */
   1111             state->repeat = ctx->u.rep->prev;
   1112             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
   1113             if (ret) {
   1114                 RETURN_ON_ERROR(ret);
   1115                 RETURN_SUCCESS;
   1116             }
   1117 
   1118             state->repeat = ctx->u.rep;
   1119             state->ptr = ctx->ptr;
   1120 
   1121             LASTMARK_RESTORE();
   1122 
   1123             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
   1124                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
   1125                 state->ptr == ctx->u.rep->last_ptr)
   1126                 RETURN_FAILURE;
   1127 
   1128             ctx->u.rep->count = ctx->count;
   1129             /* zero-width match protection */
   1130             DATA_PUSH(&ctx->u.rep->last_ptr);
   1131             ctx->u.rep->last_ptr = state->ptr;
   1132             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
   1133                     ctx->u.rep->pattern+3);
   1134             DATA_POP(&ctx->u.rep->last_ptr);
   1135             if (ret) {
   1136                 RETURN_ON_ERROR(ret);
   1137                 RETURN_SUCCESS;
   1138             }
   1139             ctx->u.rep->count = ctx->count-1;
   1140             state->ptr = ctx->ptr;
   1141             RETURN_FAILURE;
   1142 
   1143         case SRE_OP_GROUPREF:
   1144             /* match backreference */
   1145             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
   1146                    ctx->ptr, ctx->pattern[0]));
   1147             i = ctx->pattern[0];
   1148             {
   1149                 Py_ssize_t groupref = i+i;
   1150                 if (groupref >= state->lastmark) {
   1151                     RETURN_FAILURE;
   1152                 } else {
   1153                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1154                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1155                     if (!p || !e || e < p)
   1156                         RETURN_FAILURE;
   1157                     while (p < e) {
   1158                         if (ctx->ptr >= end || *ctx->ptr != *p)
   1159                             RETURN_FAILURE;
   1160                         p++;
   1161                         ctx->ptr++;
   1162                     }
   1163                 }
   1164             }
   1165             ctx->pattern++;
   1166             break;
   1167 
   1168         case SRE_OP_GROUPREF_IGNORE:
   1169             /* match backreference */
   1170             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
   1171                    ctx->ptr, ctx->pattern[0]));
   1172             i = ctx->pattern[0];
   1173             {
   1174                 Py_ssize_t groupref = i+i;
   1175                 if (groupref >= state->lastmark) {
   1176                     RETURN_FAILURE;
   1177                 } else {
   1178                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1179                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1180                     if (!p || !e || e < p)
   1181                         RETURN_FAILURE;
   1182                     while (p < e) {
   1183                         if (ctx->ptr >= end ||
   1184                             sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
   1185                             RETURN_FAILURE;
   1186                         p++;
   1187                         ctx->ptr++;
   1188                     }
   1189                 }
   1190             }
   1191             ctx->pattern++;
   1192             break;
   1193 
   1194         case SRE_OP_GROUPREF_UNI_IGNORE:
   1195             /* match backreference */
   1196             TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
   1197                    ctx->ptr, ctx->pattern[0]));
   1198             i = ctx->pattern[0];
   1199             {
   1200                 Py_ssize_t groupref = i+i;
   1201                 if (groupref >= state->lastmark) {
   1202                     RETURN_FAILURE;
   1203                 } else {
   1204                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1205                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1206                     if (!p || !e || e < p)
   1207                         RETURN_FAILURE;
   1208                     while (p < e) {
   1209                         if (ctx->ptr >= end ||
   1210                             sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
   1211                             RETURN_FAILURE;
   1212                         p++;
   1213                         ctx->ptr++;
   1214                     }
   1215                 }
   1216             }
   1217             ctx->pattern++;
   1218             break;
   1219 
   1220         case SRE_OP_GROUPREF_LOC_IGNORE:
   1221             /* match backreference */
   1222             TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
   1223                    ctx->ptr, ctx->pattern[0]));
   1224             i = ctx->pattern[0];
   1225             {
   1226                 Py_ssize_t groupref = i+i;
   1227                 if (groupref >= state->lastmark) {
   1228                     RETURN_FAILURE;
   1229                 } else {
   1230                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1231                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1232                     if (!p || !e || e < p)
   1233                         RETURN_FAILURE;
   1234                     while (p < e) {
   1235                         if (ctx->ptr >= end ||
   1236                             sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
   1237                             RETURN_FAILURE;
   1238                         p++;
   1239                         ctx->ptr++;
   1240                     }
   1241                 }
   1242             }
   1243             ctx->pattern++;
   1244             break;
   1245 
   1246         case SRE_OP_GROUPREF_EXISTS:
   1247             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
   1248                    ctx->ptr, ctx->pattern[0]));
   1249             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
   1250             i = ctx->pattern[0];
   1251             {
   1252                 Py_ssize_t groupref = i+i;
   1253                 if (groupref >= state->lastmark) {
   1254                     ctx->pattern += ctx->pattern[1];
   1255                     break;
   1256                 } else {
   1257                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
   1258                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
   1259                     if (!p || !e || e < p) {
   1260                         ctx->pattern += ctx->pattern[1];
   1261                         break;
   1262                     }
   1263                 }
   1264             }
   1265             ctx->pattern += 2;
   1266             break;
   1267 
   1268         case SRE_OP_ASSERT:
   1269             /* assert subpattern */
   1270             /* <ASSERT> <skip> <back> <pattern> */
   1271             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
   1272                    ctx->ptr, ctx->pattern[1]));
   1273             if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
   1274                 RETURN_FAILURE;
   1275             state->ptr = ctx->ptr - ctx->pattern[1];
   1276             DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
   1277             RETURN_ON_FAILURE(ret);
   1278             ctx->pattern += ctx->pattern[0];
   1279             break;
   1280 
   1281         case SRE_OP_ASSERT_NOT:
   1282             /* assert not subpattern */
   1283             /* <ASSERT_NOT> <skip> <back> <pattern> */
   1284             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
   1285                    ctx->ptr, ctx->pattern[1]));
   1286             if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
   1287                 state->ptr = ctx->ptr - ctx->pattern[1];
   1288                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
   1289                 if (ret) {
   1290                     RETURN_ON_ERROR(ret);
   1291                     RETURN_FAILURE;
   1292                 }
   1293             }
   1294             ctx->pattern += ctx->pattern[0];
   1295             break;
   1296 
   1297         case SRE_OP_FAILURE:
   1298             /* immediate failure */
   1299             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
   1300             RETURN_FAILURE;
   1301 
   1302         default:
   1303             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
   1304                    ctx->pattern[-1]));
   1305             RETURN_ERROR(SRE_ERROR_ILLEGAL);
   1306         }
   1307     }
   1308 
   1309 exit:
   1310     ctx_pos = ctx->last_ctx_pos;
   1311     jump = ctx->jump;
   1312     DATA_POP_DISCARD(ctx);
   1313     if (ctx_pos == -1)
   1314         return ret;
   1315     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
   1316 
   1317     switch (jump) {
   1318         case JUMP_MAX_UNTIL_2:
   1319             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1320             goto jump_max_until_2;
   1321         case JUMP_MAX_UNTIL_3:
   1322             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1323             goto jump_max_until_3;
   1324         case JUMP_MIN_UNTIL_2:
   1325             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
   1326             goto jump_min_until_2;
   1327         case JUMP_MIN_UNTIL_3:
   1328             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
   1329             goto jump_min_until_3;
   1330         case JUMP_BRANCH:
   1331             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
   1332             goto jump_branch;
   1333         case JUMP_MAX_UNTIL_1:
   1334             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1335             goto jump_max_until_1;
   1336         case JUMP_MIN_UNTIL_1:
   1337             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
   1338             goto jump_min_until_1;
   1339         case JUMP_REPEAT:
   1340             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
   1341             goto jump_repeat;
   1342         case JUMP_REPEAT_ONE_1:
   1343             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
   1344             goto jump_repeat_one_1;
   1345         case JUMP_REPEAT_ONE_2:
   1346             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
   1347             goto jump_repeat_one_2;
   1348         case JUMP_MIN_REPEAT_ONE:
   1349             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
   1350             goto jump_min_repeat_one;
   1351         case JUMP_ASSERT:
   1352             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
   1353             goto jump_assert;
   1354         case JUMP_ASSERT_NOT:
   1355             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
   1356             goto jump_assert_not;
   1357         case JUMP_NONE:
   1358             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
   1359                    ctx->ptr, ret));
   1360             break;
   1361     }
   1362 
   1363     return ret; /* should never get here */
   1364 }
   1365 
   1366 /* need to reset capturing groups between two SRE(match) callings in loops */
   1367 #define RESET_CAPTURE_GROUP() \
   1368     do { state->lastmark = state->lastindex = -1; } while (0)
   1369 
   1370 LOCAL(Py_ssize_t)
   1371 SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
   1372 {
   1373     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
   1374     SRE_CHAR* end = (SRE_CHAR *)state->end;
   1375     Py_ssize_t status = 0;
   1376     Py_ssize_t prefix_len = 0;
   1377     Py_ssize_t prefix_skip = 0;
   1378     SRE_CODE* prefix = NULL;
   1379     SRE_CODE* charset = NULL;
   1380     SRE_CODE* overlap = NULL;
   1381     int flags = 0;
   1382 
   1383     if (ptr > end)
   1384         return 0;
   1385 
   1386     if (pattern[0] == SRE_OP_INFO) {
   1387         /* optimization info block */
   1388         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
   1389 
   1390         flags = pattern[2];
   1391 
   1392         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
   1393             TRACE(("reject (got %u chars, need %u)\n",
   1394                    (unsigned int)(end - ptr), pattern[3]));
   1395             return 0;
   1396         }
   1397         if (pattern[3] > 1) {
   1398             /* adjust end point (but make sure we leave at least one
   1399                character in there, so literal search will work) */
   1400             end -= pattern[3] - 1;
   1401             if (end <= ptr)
   1402                 end = ptr;
   1403         }
   1404 
   1405         if (flags & SRE_INFO_PREFIX) {
   1406             /* pattern starts with a known prefix */
   1407             /* <length> <skip> <prefix data> <overlap data> */
   1408             prefix_len = pattern[5];
   1409             prefix_skip = pattern[6];
   1410             prefix = pattern + 7;
   1411             overlap = prefix + prefix_len - 1;
   1412         } else if (flags & SRE_INFO_CHARSET)
   1413             /* pattern starts with a character from a known set */
   1414             /* <charset> */
   1415             charset = pattern + 5;
   1416 
   1417         pattern += 1 + pattern[1];
   1418     }
   1419 
   1420     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
   1421            prefix, prefix_len, prefix_skip));
   1422     TRACE(("charset = %p\n", charset));
   1423 
   1424     if (prefix_len == 1) {
   1425         /* pattern starts with a literal character */
   1426         SRE_CHAR c = (SRE_CHAR) prefix[0];
   1427 #if SIZEOF_SRE_CHAR < 4
   1428         if ((SRE_CODE) c != prefix[0])
   1429             return 0; /* literal can't match: doesn't fit in char width */
   1430 #endif
   1431         end = (SRE_CHAR *)state->end;
   1432         state->must_advance = 0;
   1433         while (ptr < end) {
   1434             while (*ptr != c) {
   1435                 if (++ptr >= end)
   1436                     return 0;
   1437             }
   1438             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
   1439             state->start = ptr;
   1440             state->ptr = ptr + prefix_skip;
   1441             if (flags & SRE_INFO_LITERAL)
   1442                 return 1; /* we got all of it */
   1443             status = SRE(match)(state, pattern + 2*prefix_skip, 0);
   1444             if (status != 0)
   1445                 return status;
   1446             ++ptr;
   1447             RESET_CAPTURE_GROUP();
   1448         }
   1449         return 0;
   1450     }
   1451 
   1452     if (prefix_len > 1) {
   1453         /* pattern starts with a known prefix.  use the overlap
   1454            table to skip forward as fast as we possibly can */
   1455         Py_ssize_t i = 0;
   1456 
   1457         end = (SRE_CHAR *)state->end;
   1458         if (prefix_len > end - ptr)
   1459             return 0;
   1460 #if SIZEOF_SRE_CHAR < 4
   1461         for (i = 0; i < prefix_len; i++)
   1462             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
   1463                 return 0; /* literal can't match: doesn't fit in char width */
   1464 #endif
   1465         while (ptr < end) {
   1466             SRE_CHAR c = (SRE_CHAR) prefix[0];
   1467             while (*ptr++ != c) {
   1468                 if (ptr >= end)
   1469                     return 0;
   1470             }
   1471             if (ptr >= end)
   1472                 return 0;
   1473 
   1474             i = 1;
   1475             state->must_advance = 0;
   1476             do {
   1477                 if (*ptr == (SRE_CHAR) prefix[i]) {
   1478                     if (++i != prefix_len) {
   1479                         if (++ptr >= end)
   1480                             return 0;
   1481                         continue;
   1482                     }
   1483                     /* found a potential match */
   1484                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
   1485                     state->start = ptr - (prefix_len - 1);
   1486                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
   1487                     if (flags & SRE_INFO_LITERAL)
   1488                         return 1; /* we got all of it */
   1489                     status = SRE(match)(state, pattern + 2*prefix_skip, 0);
   1490                     if (status != 0)
   1491                         return status;
   1492                     /* close but no cigar -- try again */
   1493                     if (++ptr >= end)
   1494                         return 0;
   1495                     RESET_CAPTURE_GROUP();
   1496                 }
   1497                 i = overlap[i];
   1498             } while (i != 0);
   1499         }
   1500         return 0;
   1501     }
   1502 
   1503     if (charset) {
   1504         /* pattern starts with a character from a known set */
   1505         end = (SRE_CHAR *)state->end;
   1506         state->must_advance = 0;
   1507         for (;;) {
   1508             while (ptr < end && !SRE(charset)(state, charset, *ptr))
   1509                 ptr++;
   1510             if (ptr >= end)
   1511                 return 0;
   1512             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
   1513             state->start = ptr;
   1514             state->ptr = ptr;
   1515             status = SRE(match)(state, pattern, 0);
   1516             if (status != 0)
   1517                 break;
   1518             ptr++;
   1519             RESET_CAPTURE_GROUP();
   1520         }
   1521     } else {
   1522         /* general case */
   1523         assert(ptr <= end);
   1524         TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
   1525         state->start = state->ptr = ptr;
   1526         status = SRE(match)(state, pattern, 1);
   1527         state->must_advance = 0;
   1528         while (status == 0 && ptr < end) {
   1529             ptr++;
   1530             RESET_CAPTURE_GROUP();
   1531             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
   1532             state->start = state->ptr = ptr;
   1533             status = SRE(match)(state, pattern, 0);
   1534         }
   1535     }
   1536 
   1537     return status;
   1538 }
   1539 
   1540 #undef SRE_CHAR
   1541 #undef SIZEOF_SRE_CHAR
   1542 #undef SRE
   1543 
   1544 /* vim:ts=4:sw=4:et
   1545 */
   1546