1 /* 2 * Secret Labs' Regular Expression Engine 3 * 4 * regular expression matching engine 5 6 Copyright (c) 2015, Daryl McDaniel. All rights reserved.<BR> 7 Copyright (c) 2011, Intel Corporation. All rights reserved.<BR> 8 This program and the accompanying materials are licensed and made available under 9 the terms and conditions of the BSD License that accompanies this distribution. 10 The full text of the license may be found at 11 http://opensource.org/licenses/bsd-license. 12 13 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 14 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 15 * 16 * partial history: 17 * 1999-10-24 fl created (based on existing template matcher code) 18 * 2000-03-06 fl first alpha, sort of 19 * 2000-08-01 fl fixes for 1.6b1 20 * 2000-08-07 fl use PyOS_CheckStack() if available 21 * 2000-09-20 fl added expand method 22 * 2001-03-20 fl lots of fixes for 2.1b2 23 * 2001-04-15 fl export copyright as Python attribute, not global 24 * 2001-04-28 fl added __copy__ methods (work in progress) 25 * 2001-05-14 fl fixes for 1.5.2 compatibility 26 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) 27 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) 28 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 29 * 2001-10-21 fl added sub/subn primitive 30 * 2001-10-24 fl added finditer primitive (for 2.2 only) 31 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) 32 * 2002-11-09 fl fixed empty sub/subn return type 33 * 2003-04-18 mvl fully support 4-byte codes 34 * 2003-10-17 gn implemented non recursive scheme 35 * 36 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 37 * 38 * This version of the SRE library can be redistributed under CNRI's 39 * Python 1.6 license. For any other use, please contact Secret Labs 40 * AB (info (at) pythonware.com). 41 * 42 * Portions of this engine have been developed in cooperation with 43 * CNRI. Hewlett-Packard provided funding for 1.6 integration and 44 * other compatibility work. 45 */ 46 47 /* Get rid of these macros to prevent collisions between EFI and Python in this file. */ 48 #undef RETURN_ERROR 49 #undef RETURN_SUCCESS 50 51 #ifndef SRE_RECURSIVE 52 53 static char copyright[] = 54 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; 55 56 #define PY_SSIZE_T_CLEAN 57 58 #include "Python.h" 59 #include "structmember.h" /* offsetof */ 60 61 #include "sre.h" 62 63 #include <ctype.h> 64 65 /* name of this module, minus the leading underscore */ 66 #if !defined(SRE_MODULE) 67 #define SRE_MODULE "sre" 68 #endif 69 70 #define SRE_PY_MODULE "re" 71 72 /* defining this one enables tracing */ 73 #undef VERBOSE 74 75 #if PY_VERSION_HEX >= 0x01060000 76 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE) 77 /* defining this enables unicode support (default under 1.6a1 and later) */ 78 #define HAVE_UNICODE 79 #endif 80 #endif 81 82 /* -------------------------------------------------------------------- */ 83 /* optional features */ 84 85 /* enables fast searching */ 86 #define USE_FAST_SEARCH 87 88 /* enables aggressive inlining (always on for Visual C) */ 89 #undef USE_INLINE 90 91 /* enables copy/deepcopy handling (work in progress) */ 92 #undef USE_BUILTIN_COPY 93 94 #if PY_VERSION_HEX < 0x01060000 95 #define PyObject_DEL(op) PyMem_DEL((op)) 96 #endif 97 98 /* -------------------------------------------------------------------- */ 99 100 #if defined(_MSC_VER) 101 #pragma optimize("gt", on) /* doesn't seem to make much difference... */ 102 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ 103 /* fastest possible local call under MSVC */ 104 #define LOCAL(type) static __inline type __fastcall 105 #elif defined(USE_INLINE) 106 #define LOCAL(type) static inline type 107 #else 108 #define LOCAL(type) static type 109 #endif 110 111 /* error codes */ 112 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ 113 #define SRE_ERROR_STATE -2 /* illegal state */ 114 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ 115 #define SRE_ERROR_MEMORY -9 /* out of memory */ 116 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ 117 118 #if defined(VERBOSE) 119 #define TRACE(v) printf v 120 #else 121 #define TRACE(v) 122 #endif 123 124 /* -------------------------------------------------------------------- */ 125 /* search engine state */ 126 127 /* default character predicates (run sre_chars.py to regenerate tables) */ 128 129 #define SRE_DIGIT_MASK 1 130 #define SRE_SPACE_MASK 2 131 #define SRE_LINEBREAK_MASK 4 132 #define SRE_ALNUM_MASK 8 133 #define SRE_WORD_MASK 16 134 135 /* FIXME: this assumes ASCII. create tables in init_sre() instead */ 136 137 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, 138 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 140 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 141 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 142 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 143 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; 144 145 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 146 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 147 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 148 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 149 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 150 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 151 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 152 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 153 120, 121, 122, 123, 124, 125, 126, 127 }; 154 155 #define SRE_IS_DIGIT(ch)\ 156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0) 157 #define SRE_IS_SPACE(ch)\ 158 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0) 159 #define SRE_IS_LINEBREAK(ch)\ 160 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0) 161 #define SRE_IS_ALNUM(ch)\ 162 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0) 163 #define SRE_IS_WORD(ch)\ 164 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0) 165 166 static unsigned int sre_lower(unsigned int ch) 167 { 168 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch); 169 } 170 171 /* locale-specific character predicates */ 172 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids 173 * warnings when c's type supports only numbers < N+1 */ 174 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0) 175 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0) 176 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n') 177 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) 178 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') 179 180 static unsigned int sre_lower_locale(unsigned int ch) 181 { 182 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); 183 } 184 185 /* unicode-specific character predicates */ 186 187 #if defined(HAVE_UNICODE) 188 189 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch)) 190 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch)) 191 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) 192 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch)) 193 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_') 194 195 static unsigned int sre_lower_unicode(unsigned int ch) 196 { 197 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch)); 198 } 199 200 #endif 201 202 LOCAL(int) 203 sre_category(SRE_CODE category, unsigned int ch) 204 { 205 switch (category) { 206 207 case SRE_CATEGORY_DIGIT: 208 return SRE_IS_DIGIT(ch); 209 case SRE_CATEGORY_NOT_DIGIT: 210 return !SRE_IS_DIGIT(ch); 211 case SRE_CATEGORY_SPACE: 212 return SRE_IS_SPACE(ch); 213 case SRE_CATEGORY_NOT_SPACE: 214 return !SRE_IS_SPACE(ch); 215 case SRE_CATEGORY_WORD: 216 return SRE_IS_WORD(ch); 217 case SRE_CATEGORY_NOT_WORD: 218 return !SRE_IS_WORD(ch); 219 case SRE_CATEGORY_LINEBREAK: 220 return SRE_IS_LINEBREAK(ch); 221 case SRE_CATEGORY_NOT_LINEBREAK: 222 return !SRE_IS_LINEBREAK(ch); 223 224 case SRE_CATEGORY_LOC_WORD: 225 return SRE_LOC_IS_WORD(ch); 226 case SRE_CATEGORY_LOC_NOT_WORD: 227 return !SRE_LOC_IS_WORD(ch); 228 229 #if defined(HAVE_UNICODE) 230 case SRE_CATEGORY_UNI_DIGIT: 231 return SRE_UNI_IS_DIGIT(ch); 232 case SRE_CATEGORY_UNI_NOT_DIGIT: 233 return !SRE_UNI_IS_DIGIT(ch); 234 case SRE_CATEGORY_UNI_SPACE: 235 return SRE_UNI_IS_SPACE(ch); 236 case SRE_CATEGORY_UNI_NOT_SPACE: 237 return !SRE_UNI_IS_SPACE(ch); 238 case SRE_CATEGORY_UNI_WORD: 239 return SRE_UNI_IS_WORD(ch); 240 case SRE_CATEGORY_UNI_NOT_WORD: 241 return !SRE_UNI_IS_WORD(ch); 242 case SRE_CATEGORY_UNI_LINEBREAK: 243 return SRE_UNI_IS_LINEBREAK(ch); 244 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 245 return !SRE_UNI_IS_LINEBREAK(ch); 246 #else 247 case SRE_CATEGORY_UNI_DIGIT: 248 return SRE_IS_DIGIT(ch); 249 case SRE_CATEGORY_UNI_NOT_DIGIT: 250 return !SRE_IS_DIGIT(ch); 251 case SRE_CATEGORY_UNI_SPACE: 252 return SRE_IS_SPACE(ch); 253 case SRE_CATEGORY_UNI_NOT_SPACE: 254 return !SRE_IS_SPACE(ch); 255 case SRE_CATEGORY_UNI_WORD: 256 return SRE_LOC_IS_WORD(ch); 257 case SRE_CATEGORY_UNI_NOT_WORD: 258 return !SRE_LOC_IS_WORD(ch); 259 case SRE_CATEGORY_UNI_LINEBREAK: 260 return SRE_IS_LINEBREAK(ch); 261 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 262 return !SRE_IS_LINEBREAK(ch); 263 #endif 264 } 265 return 0; 266 } 267 268 /* helpers */ 269 270 static void 271 data_stack_dealloc(SRE_STATE* state) 272 { 273 if (state->data_stack) { 274 PyMem_FREE(state->data_stack); 275 state->data_stack = NULL; 276 } 277 state->data_stack_size = state->data_stack_base = 0; 278 } 279 280 static int 281 data_stack_grow(SRE_STATE* state, Py_ssize_t size) 282 { 283 Py_ssize_t minsize, cursize; 284 minsize = state->data_stack_base+size; 285 cursize = state->data_stack_size; 286 if (cursize < minsize) { 287 void* stack; 288 cursize = minsize+minsize/4+1024; 289 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize)); 290 stack = PyMem_REALLOC(state->data_stack, cursize); 291 if (!stack) { 292 data_stack_dealloc(state); 293 return SRE_ERROR_MEMORY; 294 } 295 state->data_stack = (char *)stack; 296 state->data_stack_size = cursize; 297 } 298 return 0; 299 } 300 301 /* generate 8-bit version */ 302 303 #define SRE_CHAR unsigned char 304 #define SRE_AT sre_at 305 #define SRE_COUNT sre_count 306 #define SRE_CHARSET sre_charset 307 #define SRE_INFO sre_info 308 #define SRE_MATCH sre_match 309 #define SRE_MATCH_CONTEXT sre_match_context 310 #define SRE_SEARCH sre_search 311 #define SRE_LITERAL_TEMPLATE sre_literal_template 312 313 #if defined(HAVE_UNICODE) 314 315 #define SRE_RECURSIVE 316 #include "_sre.c" 317 #undef SRE_RECURSIVE 318 319 #undef SRE_LITERAL_TEMPLATE 320 #undef SRE_SEARCH 321 #undef SRE_MATCH 322 #undef SRE_MATCH_CONTEXT 323 #undef SRE_INFO 324 #undef SRE_CHARSET 325 #undef SRE_COUNT 326 #undef SRE_AT 327 #undef SRE_CHAR 328 329 /* generate 16-bit unicode version */ 330 331 #define SRE_CHAR Py_UNICODE 332 #define SRE_AT sre_uat 333 #define SRE_COUNT sre_ucount 334 #define SRE_CHARSET sre_ucharset 335 #define SRE_INFO sre_uinfo 336 #define SRE_MATCH sre_umatch 337 #define SRE_MATCH_CONTEXT sre_umatch_context 338 #define SRE_SEARCH sre_usearch 339 #define SRE_LITERAL_TEMPLATE sre_uliteral_template 340 #endif 341 342 #endif /* SRE_RECURSIVE */ 343 344 /* -------------------------------------------------------------------- */ 345 /* String matching engine */ 346 347 /* the following section is compiled twice, with different character 348 settings */ 349 350 LOCAL(int) 351 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) 352 { 353 /* check if pointer is at given position */ 354 355 Py_ssize_t thisp, thatp; 356 357 switch (at) { 358 359 case SRE_AT_BEGINNING: 360 case SRE_AT_BEGINNING_STRING: 361 return ((void*) ptr == state->beginning); 362 363 case SRE_AT_BEGINNING_LINE: 364 return ((void*) ptr == state->beginning || 365 SRE_IS_LINEBREAK((int) ptr[-1])); 366 367 case SRE_AT_END: 368 return (((void*) (ptr+1) == state->end && 369 SRE_IS_LINEBREAK((int) ptr[0])) || 370 ((void*) ptr == state->end)); 371 372 case SRE_AT_END_LINE: 373 return ((void*) ptr == state->end || 374 SRE_IS_LINEBREAK((int) ptr[0])); 375 376 case SRE_AT_END_STRING: 377 return ((void*) ptr == state->end); 378 379 case SRE_AT_BOUNDARY: 380 if (state->beginning == state->end) 381 return 0; 382 thatp = ((void*) ptr > state->beginning) ? 383 SRE_IS_WORD((int) ptr[-1]) : 0; 384 thisp = ((void*) ptr < state->end) ? 385 SRE_IS_WORD((int) ptr[0]) : 0; 386 return thisp != thatp; 387 388 case SRE_AT_NON_BOUNDARY: 389 if (state->beginning == state->end) 390 return 0; 391 thatp = ((void*) ptr > state->beginning) ? 392 SRE_IS_WORD((int) ptr[-1]) : 0; 393 thisp = ((void*) ptr < state->end) ? 394 SRE_IS_WORD((int) ptr[0]) : 0; 395 return thisp == thatp; 396 397 case SRE_AT_LOC_BOUNDARY: 398 if (state->beginning == state->end) 399 return 0; 400 thatp = ((void*) ptr > state->beginning) ? 401 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 402 thisp = ((void*) ptr < state->end) ? 403 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 404 return thisp != thatp; 405 406 case SRE_AT_LOC_NON_BOUNDARY: 407 if (state->beginning == state->end) 408 return 0; 409 thatp = ((void*) ptr > state->beginning) ? 410 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 411 thisp = ((void*) ptr < state->end) ? 412 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 413 return thisp == thatp; 414 415 #if defined(HAVE_UNICODE) 416 case SRE_AT_UNI_BOUNDARY: 417 if (state->beginning == state->end) 418 return 0; 419 thatp = ((void*) ptr > state->beginning) ? 420 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 421 thisp = ((void*) ptr < state->end) ? 422 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 423 return thisp != thatp; 424 425 case SRE_AT_UNI_NON_BOUNDARY: 426 if (state->beginning == state->end) 427 return 0; 428 thatp = ((void*) ptr > state->beginning) ? 429 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 430 thisp = ((void*) ptr < state->end) ? 431 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 432 return thisp == thatp; 433 #endif 434 435 } 436 437 return 0; 438 } 439 440 LOCAL(int) 441 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) 442 { 443 /* check if character is a member of the given set */ 444 445 int ok = 1; 446 447 for (;;) { 448 switch (*set++) { 449 450 case SRE_OP_FAILURE: 451 return !ok; 452 453 case SRE_OP_LITERAL: 454 /* <LITERAL> <code> */ 455 if (ch == set[0]) 456 return ok; 457 set++; 458 break; 459 460 case SRE_OP_CATEGORY: 461 /* <CATEGORY> <code> */ 462 if (sre_category(set[0], (int) ch)) 463 return ok; 464 set += 1; 465 break; 466 467 case SRE_OP_CHARSET: 468 if (sizeof(SRE_CODE) == 2) { 469 /* <CHARSET> <bitmap> (16 bits per code word) */ 470 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15)))) 471 return ok; 472 set += 16; 473 } 474 else { 475 /* <CHARSET> <bitmap> (32 bits per code word) */ 476 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31)))) 477 return ok; 478 set += 8; 479 } 480 break; 481 482 case SRE_OP_RANGE: 483 /* <RANGE> <lower> <upper> */ 484 if (set[0] <= ch && ch <= set[1]) 485 return ok; 486 set += 2; 487 break; 488 489 case SRE_OP_NEGATE: 490 ok = !ok; 491 break; 492 493 case SRE_OP_BIGCHARSET: 494 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ 495 { 496 Py_ssize_t count, block; 497 count = *(set++); 498 499 if (sizeof(SRE_CODE) == 2) { 500 block = ((unsigned char*)set)[ch >> 8]; 501 set += 128; 502 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15))) 503 return ok; 504 set += count*16; 505 } 506 else { 507 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids 508 * warnings when c's type supports only numbers < N+1 */ 509 if (!(ch & ~65535)) 510 block = ((unsigned char*)set)[ch >> 8]; 511 else 512 block = -1; 513 set += 64; 514 if (block >=0 && 515 (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31)))) 516 return ok; 517 set += count*8; 518 } 519 break; 520 } 521 522 default: 523 /* internal error -- there's not much we can do about it 524 here, so let's just pretend it didn't match... */ 525 return 0; 526 } 527 } 528 } 529 530 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern); 531 532 LOCAL(Py_ssize_t) 533 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) 534 { 535 SRE_CODE chr; 536 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; 537 SRE_CHAR* end = (SRE_CHAR *)state->end; 538 Py_ssize_t i; 539 540 /* adjust end */ 541 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) 542 end = ptr + maxcount; 543 544 switch (pattern[0]) { 545 546 case SRE_OP_IN: 547 /* repeated set */ 548 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); 549 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr)) 550 ptr++; 551 break; 552 553 case SRE_OP_ANY: 554 /* repeated dot wildcard. */ 555 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); 556 while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) 557 ptr++; 558 break; 559 560 case SRE_OP_ANY_ALL: 561 /* repeated dot wildcard. skip to the end of the target 562 string, and backtrack from there */ 563 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); 564 ptr = end; 565 break; 566 567 case SRE_OP_LITERAL: 568 /* repeated literal */ 569 chr = pattern[1]; 570 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); 571 while (ptr < end && (SRE_CODE) *ptr == chr) 572 ptr++; 573 break; 574 575 case SRE_OP_LITERAL_IGNORE: 576 /* repeated literal */ 577 chr = pattern[1]; 578 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); 579 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) 580 ptr++; 581 break; 582 583 case SRE_OP_NOT_LITERAL: 584 /* repeated non-literal */ 585 chr = pattern[1]; 586 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); 587 while (ptr < end && (SRE_CODE) *ptr != chr) 588 ptr++; 589 break; 590 591 case SRE_OP_NOT_LITERAL_IGNORE: 592 /* repeated non-literal */ 593 chr = pattern[1]; 594 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); 595 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) 596 ptr++; 597 break; 598 599 default: 600 /* repeated single character pattern */ 601 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); 602 while ((SRE_CHAR*) state->ptr < end) { 603 i = SRE_MATCH(state, pattern); 604 if (i < 0) 605 return i; 606 if (!i) 607 break; 608 } 609 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 610 (SRE_CHAR*) state->ptr - ptr)); 611 return (SRE_CHAR*) state->ptr - ptr; 612 } 613 614 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 615 ptr - (SRE_CHAR*) state->ptr)); 616 return ptr - (SRE_CHAR*) state->ptr; 617 } 618 619 #if 0 /* not used in this release */ 620 LOCAL(int) 621 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern) 622 { 623 /* check if an SRE_OP_INFO block matches at the current position. 624 returns the number of SRE_CODE objects to skip if successful, 0 625 if no match */ 626 627 SRE_CHAR* end = state->end; 628 SRE_CHAR* ptr = state->ptr; 629 Py_ssize_t i; 630 631 /* check minimal length */ 632 if (pattern[3] && (end - ptr) < pattern[3]) 633 return 0; 634 635 /* check known prefix */ 636 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { 637 /* <length> <skip> <prefix data> <overlap data> */ 638 for (i = 0; i < pattern[5]; i++) 639 if ((SRE_CODE) ptr[i] != pattern[7 + i]) 640 return 0; 641 return pattern[0] + 2 * pattern[6]; 642 } 643 return pattern[0]; 644 } 645 #endif 646 647 /* The macros below should be used to protect recursive SRE_MATCH() 648 * calls that *failed* and do *not* return immediately (IOW, those 649 * that will backtrack). Explaining: 650 * 651 * - Recursive SRE_MATCH() returned true: that's usually a success 652 * (besides atypical cases like ASSERT_NOT), therefore there's no 653 * reason to restore lastmark; 654 * 655 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH() 656 * is returning to the caller: If the current SRE_MATCH() is the 657 * top function of the recursion, returning false will be a matching 658 * failure, and it doesn't matter where lastmark is pointing to. 659 * If it's *not* the top function, it will be a recursive SRE_MATCH() 660 * failure by itself, and the calling SRE_MATCH() will have to deal 661 * with the failure by the same rules explained here (it will restore 662 * lastmark by itself if necessary); 663 * 664 * - Recursive SRE_MATCH() returned false, and will continue the 665 * outside 'for' loop: must be protected when breaking, since the next 666 * OP could potentially depend on lastmark; 667 * 668 * - Recursive SRE_MATCH() returned false, and will be called again 669 * inside a local for/while loop: must be protected between each 670 * loop iteration, since the recursive SRE_MATCH() could do anything, 671 * and could potentially depend on lastmark. 672 * 673 * For more information, check the discussion at SF patch #712900. 674 */ 675 #define LASTMARK_SAVE() \ 676 do { \ 677 ctx->lastmark = state->lastmark; \ 678 ctx->lastindex = state->lastindex; \ 679 } while (0) 680 #define LASTMARK_RESTORE() \ 681 do { \ 682 state->lastmark = ctx->lastmark; \ 683 state->lastindex = ctx->lastindex; \ 684 } while (0) 685 686 #define RETURN_ERROR(i) do { return i; } while(0) 687 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) 688 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) 689 690 #define RETURN_ON_ERROR(i) \ 691 do { if (i < 0) RETURN_ERROR(i); } while (0) 692 #define RETURN_ON_SUCCESS(i) \ 693 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) 694 #define RETURN_ON_FAILURE(i) \ 695 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) 696 697 #define SFY(x) #x 698 699 #define DATA_STACK_ALLOC(state, type, ptr) \ 700 do { \ 701 alloc_pos = state->data_stack_base; \ 702 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \ 703 "(%" PY_FORMAT_SIZE_T "d)\n", \ 704 SFY(type), alloc_pos, sizeof(type))); \ 705 if (sizeof(type) > state->data_stack_size - alloc_pos) { \ 706 int j = data_stack_grow(state, sizeof(type)); \ 707 if (j < 0) return j; \ 708 if (ctx_pos != -1) \ 709 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ 710 } \ 711 ptr = (type*)(state->data_stack+alloc_pos); \ 712 state->data_stack_base += sizeof(type); \ 713 } while (0) 714 715 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ 716 do { \ 717 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \ 718 ptr = (type*)(state->data_stack+pos); \ 719 } while (0) 720 721 #define DATA_STACK_PUSH(state, data, size) \ 722 do { \ 723 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \ 724 "(%" PY_FORMAT_SIZE_T "d)\n", \ 725 data, state->data_stack_base, size)); \ 726 if (size > state->data_stack_size - state->data_stack_base) { \ 727 int j = data_stack_grow(state, size); \ 728 if (j < 0) return j; \ 729 if (ctx_pos != -1) \ 730 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \ 731 } \ 732 memcpy(state->data_stack+state->data_stack_base, data, size); \ 733 state->data_stack_base += size; \ 734 } while (0) 735 736 #define DATA_STACK_POP(state, data, size, discard) \ 737 do { \ 738 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \ 739 "(%" PY_FORMAT_SIZE_T "d)\n", \ 740 data, state->data_stack_base-size, size)); \ 741 memcpy(data, state->data_stack+state->data_stack_base-size, size); \ 742 if (discard) \ 743 state->data_stack_base -= size; \ 744 } while (0) 745 746 #define DATA_STACK_POP_DISCARD(state, size) \ 747 do { \ 748 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \ 749 "(%" PY_FORMAT_SIZE_T "d)\n", \ 750 state->data_stack_base-size, size)); \ 751 state->data_stack_base -= size; \ 752 } while(0) 753 754 #define DATA_PUSH(x) \ 755 DATA_STACK_PUSH(state, (x), sizeof(*(x))) 756 #define DATA_POP(x) \ 757 DATA_STACK_POP(state, (x), sizeof(*(x)), 1) 758 #define DATA_POP_DISCARD(x) \ 759 DATA_STACK_POP_DISCARD(state, sizeof(*(x))) 760 #define DATA_ALLOC(t,p) \ 761 DATA_STACK_ALLOC(state, t, p) 762 #define DATA_LOOKUP_AT(t,p,pos) \ 763 DATA_STACK_LOOKUP_AT(state,t,p,pos) 764 765 #define MARK_PUSH(lastmark) \ 766 do if (lastmark > 0) { \ 767 i = lastmark; /* ctx->lastmark may change if reallocated */ \ 768 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ 769 } while (0) 770 #define MARK_POP(lastmark) \ 771 do if (lastmark > 0) { \ 772 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ 773 } while (0) 774 #define MARK_POP_KEEP(lastmark) \ 775 do if (lastmark > 0) { \ 776 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ 777 } while (0) 778 #define MARK_POP_DISCARD(lastmark) \ 779 do if (lastmark > 0) { \ 780 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ 781 } while (0) 782 783 #define JUMP_NONE 0 784 #define JUMP_MAX_UNTIL_1 1 785 #define JUMP_MAX_UNTIL_2 2 786 #define JUMP_MAX_UNTIL_3 3 787 #define JUMP_MIN_UNTIL_1 4 788 #define JUMP_MIN_UNTIL_2 5 789 #define JUMP_MIN_UNTIL_3 6 790 #define JUMP_REPEAT 7 791 #define JUMP_REPEAT_ONE_1 8 792 #define JUMP_REPEAT_ONE_2 9 793 #define JUMP_MIN_REPEAT_ONE 10 794 #define JUMP_BRANCH 11 795 #define JUMP_ASSERT 12 796 #define JUMP_ASSERT_NOT 13 797 798 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ 799 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \ 800 nextctx->last_ctx_pos = ctx_pos; \ 801 nextctx->jump = jumpvalue; \ 802 nextctx->pattern = nextpattern; \ 803 ctx_pos = alloc_pos; \ 804 ctx = nextctx; \ 805 goto entrance; \ 806 jumplabel: \ 807 while (0) /* gcc doesn't like labels at end of scopes */ \ 808 809 typedef struct { 810 Py_ssize_t last_ctx_pos; 811 Py_ssize_t jump; 812 SRE_CHAR* ptr; 813 SRE_CODE* pattern; 814 Py_ssize_t count; 815 Py_ssize_t lastmark; 816 Py_ssize_t lastindex; 817 union { 818 SRE_CODE chr; 819 SRE_REPEAT* rep; 820 } u; 821 } SRE_MATCH_CONTEXT; 822 823 /* check if string matches the given pattern. returns <0 for 824 error, 0 for failure, and 1 for success */ 825 LOCAL(Py_ssize_t) 826 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) 827 { 828 SRE_CHAR* end = (SRE_CHAR *)state->end; 829 Py_ssize_t alloc_pos, ctx_pos = -1; 830 Py_ssize_t i, ret = 0; 831 Py_ssize_t jump; 832 unsigned int sigcount=0; 833 834 SRE_MATCH_CONTEXT* ctx; 835 SRE_MATCH_CONTEXT* nextctx; 836 837 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); 838 839 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx); 840 ctx->last_ctx_pos = -1; 841 ctx->jump = JUMP_NONE; 842 ctx->pattern = pattern; 843 ctx_pos = alloc_pos; 844 845 entrance: 846 847 ctx->ptr = (SRE_CHAR *)state->ptr; 848 849 if (ctx->pattern[0] == SRE_OP_INFO) { 850 /* optimization info block */ 851 /* <INFO> <1=skip> <2=flags> <3=min> ... */ 852 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) { 853 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " 854 "need %" PY_FORMAT_SIZE_T "d)\n", 855 (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3])); 856 RETURN_FAILURE; 857 } 858 ctx->pattern += ctx->pattern[1] + 1; 859 } 860 861 for (;;) { 862 ++sigcount; 863 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) 864 RETURN_ERROR(SRE_ERROR_INTERRUPTED); 865 866 switch (*ctx->pattern++) { 867 868 case SRE_OP_MARK: 869 /* set mark */ 870 /* <MARK> <gid> */ 871 TRACE(("|%p|%p|MARK %d\n", ctx->pattern, 872 ctx->ptr, ctx->pattern[0])); 873 i = ctx->pattern[0]; 874 if (i & 1) 875 state->lastindex = i/2 + 1; 876 if (i > state->lastmark) { 877 /* state->lastmark is the highest valid index in the 878 state->mark array. If it is increased by more than 1, 879 the intervening marks must be set to NULL to signal 880 that these marks have not been encountered. */ 881 Py_ssize_t j = state->lastmark + 1; 882 while (j < i) 883 state->mark[j++] = NULL; 884 state->lastmark = i; 885 } 886 state->mark[i] = ctx->ptr; 887 ctx->pattern++; 888 break; 889 890 case SRE_OP_LITERAL: 891 /* match literal string */ 892 /* <LITERAL> <code> */ 893 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, 894 ctx->ptr, *ctx->pattern)); 895 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) 896 RETURN_FAILURE; 897 ctx->pattern++; 898 ctx->ptr++; 899 break; 900 901 case SRE_OP_NOT_LITERAL: 902 /* match anything that is not literal character */ 903 /* <NOT_LITERAL> <code> */ 904 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, 905 ctx->ptr, *ctx->pattern)); 906 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) 907 RETURN_FAILURE; 908 ctx->pattern++; 909 ctx->ptr++; 910 break; 911 912 case SRE_OP_SUCCESS: 913 /* end of pattern */ 914 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); 915 state->ptr = ctx->ptr; 916 RETURN_SUCCESS; 917 918 case SRE_OP_AT: 919 /* match at given position */ 920 /* <AT> <code> */ 921 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); 922 if (!SRE_AT(state, ctx->ptr, *ctx->pattern)) 923 RETURN_FAILURE; 924 ctx->pattern++; 925 break; 926 927 case SRE_OP_CATEGORY: 928 /* match at given category */ 929 /* <CATEGORY> <code> */ 930 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, 931 ctx->ptr, *ctx->pattern)); 932 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) 933 RETURN_FAILURE; 934 ctx->pattern++; 935 ctx->ptr++; 936 break; 937 938 case SRE_OP_ANY: 939 /* match anything (except a newline) */ 940 /* <ANY> */ 941 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); 942 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) 943 RETURN_FAILURE; 944 ctx->ptr++; 945 break; 946 947 case SRE_OP_ANY_ALL: 948 /* match anything */ 949 /* <ANY_ALL> */ 950 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); 951 if (ctx->ptr >= end) 952 RETURN_FAILURE; 953 ctx->ptr++; 954 break; 955 956 case SRE_OP_IN: 957 /* match set member (or non_member) */ 958 /* <IN> <skip> <set> */ 959 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); 960 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr)) 961 RETURN_FAILURE; 962 ctx->pattern += ctx->pattern[0]; 963 ctx->ptr++; 964 break; 965 966 case SRE_OP_LITERAL_IGNORE: 967 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", 968 ctx->pattern, ctx->ptr, ctx->pattern[0])); 969 if (ctx->ptr >= end || 970 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) 971 RETURN_FAILURE; 972 ctx->pattern++; 973 ctx->ptr++; 974 break; 975 976 case SRE_OP_NOT_LITERAL_IGNORE: 977 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", 978 ctx->pattern, ctx->ptr, *ctx->pattern)); 979 if (ctx->ptr >= end || 980 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) 981 RETURN_FAILURE; 982 ctx->pattern++; 983 ctx->ptr++; 984 break; 985 986 case SRE_OP_IN_IGNORE: 987 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); 988 if (ctx->ptr >= end 989 || !SRE_CHARSET(ctx->pattern+1, 990 (SRE_CODE)state->lower(*ctx->ptr))) 991 RETURN_FAILURE; 992 ctx->pattern += ctx->pattern[0]; 993 ctx->ptr++; 994 break; 995 996 case SRE_OP_JUMP: 997 case SRE_OP_INFO: 998 /* jump forward */ 999 /* <JUMP> <offset> */ 1000 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, 1001 ctx->ptr, ctx->pattern[0])); 1002 ctx->pattern += ctx->pattern[0]; 1003 break; 1004 1005 case SRE_OP_BRANCH: 1006 /* alternation */ 1007 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ 1008 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); 1009 LASTMARK_SAVE(); 1010 ctx->u.rep = state->repeat; 1011 if (ctx->u.rep) 1012 MARK_PUSH(ctx->lastmark); 1013 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { 1014 if (ctx->pattern[1] == SRE_OP_LITERAL && 1015 (ctx->ptr >= end || 1016 (SRE_CODE) *ctx->ptr != ctx->pattern[2])) 1017 continue; 1018 if (ctx->pattern[1] == SRE_OP_IN && 1019 (ctx->ptr >= end || 1020 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr))) 1021 continue; 1022 state->ptr = ctx->ptr; 1023 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); 1024 if (ret) { 1025 if (ctx->u.rep) 1026 MARK_POP_DISCARD(ctx->lastmark); 1027 RETURN_ON_ERROR(ret); 1028 RETURN_SUCCESS; 1029 } 1030 if (ctx->u.rep) 1031 MARK_POP_KEEP(ctx->lastmark); 1032 LASTMARK_RESTORE(); 1033 } 1034 if (ctx->u.rep) 1035 MARK_POP_DISCARD(ctx->lastmark); 1036 RETURN_FAILURE; 1037 1038 case SRE_OP_REPEAT_ONE: 1039 /* match repeated sequence (maximizing regexp) */ 1040 1041 /* this operator only works if the repeated item is 1042 exactly one character wide, and we're not already 1043 collecting backtracking points. for other cases, 1044 use the MAX_REPEAT operator */ 1045 1046 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 1047 1048 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 1049 ctx->pattern[1], ctx->pattern[2])); 1050 1051 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 1052 RETURN_FAILURE; /* cannot match */ 1053 1054 state->ptr = ctx->ptr; 1055 1056 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]); 1057 RETURN_ON_ERROR(ret); 1058 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); 1059 ctx->count = ret; 1060 ctx->ptr += ctx->count; 1061 1062 /* when we arrive here, count contains the number of 1063 matches, and ctx->ptr points to the tail of the target 1064 string. check if the rest of the pattern matches, 1065 and backtrack if not. */ 1066 1067 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 1068 RETURN_FAILURE; 1069 1070 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { 1071 /* tail is empty. we're finished */ 1072 state->ptr = ctx->ptr; 1073 RETURN_SUCCESS; 1074 } 1075 1076 LASTMARK_SAVE(); 1077 1078 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { 1079 /* tail starts with a literal. skip positions where 1080 the rest of the pattern cannot possibly match */ 1081 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; 1082 for (;;) { 1083 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && 1084 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { 1085 ctx->ptr--; 1086 ctx->count--; 1087 } 1088 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 1089 break; 1090 state->ptr = ctx->ptr; 1091 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, 1092 ctx->pattern+ctx->pattern[0]); 1093 if (ret) { 1094 RETURN_ON_ERROR(ret); 1095 RETURN_SUCCESS; 1096 } 1097 1098 LASTMARK_RESTORE(); 1099 1100 ctx->ptr--; 1101 ctx->count--; 1102 } 1103 1104 } else { 1105 /* general case */ 1106 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { 1107 state->ptr = ctx->ptr; 1108 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, 1109 ctx->pattern+ctx->pattern[0]); 1110 if (ret) { 1111 RETURN_ON_ERROR(ret); 1112 RETURN_SUCCESS; 1113 } 1114 ctx->ptr--; 1115 ctx->count--; 1116 LASTMARK_RESTORE(); 1117 } 1118 } 1119 RETURN_FAILURE; 1120 1121 case SRE_OP_MIN_REPEAT_ONE: 1122 /* match repeated sequence (minimizing regexp) */ 1123 1124 /* this operator only works if the repeated item is 1125 exactly one character wide, and we're not already 1126 collecting backtracking points. for other cases, 1127 use the MIN_REPEAT operator */ 1128 1129 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 1130 1131 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 1132 ctx->pattern[1], ctx->pattern[2])); 1133 1134 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 1135 RETURN_FAILURE; /* cannot match */ 1136 1137 state->ptr = ctx->ptr; 1138 1139 if (ctx->pattern[1] == 0) 1140 ctx->count = 0; 1141 else { 1142 /* count using pattern min as the maximum */ 1143 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]); 1144 RETURN_ON_ERROR(ret); 1145 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); 1146 if (ret < (Py_ssize_t) ctx->pattern[1]) 1147 /* didn't match minimum number of times */ 1148 RETURN_FAILURE; 1149 /* advance past minimum matches of repeat */ 1150 ctx->count = ret; 1151 ctx->ptr += ctx->count; 1152 } 1153 1154 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { 1155 /* tail is empty. we're finished */ 1156 state->ptr = ctx->ptr; 1157 RETURN_SUCCESS; 1158 1159 } else { 1160 /* general case */ 1161 LASTMARK_SAVE(); 1162 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT 1163 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { 1164 state->ptr = ctx->ptr; 1165 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, 1166 ctx->pattern+ctx->pattern[0]); 1167 if (ret) { 1168 RETURN_ON_ERROR(ret); 1169 RETURN_SUCCESS; 1170 } 1171 state->ptr = ctx->ptr; 1172 ret = SRE_COUNT(state, ctx->pattern+3, 1); 1173 RETURN_ON_ERROR(ret); 1174 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); 1175 if (ret == 0) 1176 break; 1177 assert(ret == 1); 1178 ctx->ptr++; 1179 ctx->count++; 1180 LASTMARK_RESTORE(); 1181 } 1182 } 1183 RETURN_FAILURE; 1184 1185 case SRE_OP_REPEAT: 1186 /* create repeat context. all the hard work is done 1187 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ 1188 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ 1189 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, 1190 ctx->pattern[1], ctx->pattern[2])); 1191 1192 /* install new repeat context */ 1193 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); 1194 if (!ctx->u.rep) { 1195 PyErr_NoMemory(); 1196 RETURN_FAILURE; 1197 } 1198 ctx->u.rep->count = -1; 1199 ctx->u.rep->pattern = ctx->pattern; 1200 ctx->u.rep->prev = state->repeat; 1201 ctx->u.rep->last_ptr = NULL; 1202 state->repeat = ctx->u.rep; 1203 1204 state->ptr = ctx->ptr; 1205 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); 1206 state->repeat = ctx->u.rep->prev; 1207 PyObject_FREE(ctx->u.rep); 1208 1209 if (ret) { 1210 RETURN_ON_ERROR(ret); 1211 RETURN_SUCCESS; 1212 } 1213 RETURN_FAILURE; 1214 1215 case SRE_OP_MAX_UNTIL: 1216 /* maximizing repeat */ 1217 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ 1218 1219 /* FIXME: we probably need to deal with zero-width 1220 matches in here... */ 1221 1222 ctx->u.rep = state->repeat; 1223 if (!ctx->u.rep) 1224 RETURN_ERROR(SRE_ERROR_STATE); 1225 1226 state->ptr = ctx->ptr; 1227 1228 ctx->count = ctx->u.rep->count+1; 1229 1230 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 1231 ctx->ptr, ctx->count)); 1232 1233 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 1234 /* not enough matches */ 1235 ctx->u.rep->count = ctx->count; 1236 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, 1237 ctx->u.rep->pattern+3); 1238 if (ret) { 1239 RETURN_ON_ERROR(ret); 1240 RETURN_SUCCESS; 1241 } 1242 ctx->u.rep->count = ctx->count-1; 1243 state->ptr = ctx->ptr; 1244 RETURN_FAILURE; 1245 } 1246 1247 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || 1248 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && 1249 state->ptr != ctx->u.rep->last_ptr) { 1250 /* we may have enough matches, but if we can 1251 match another item, do so */ 1252 ctx->u.rep->count = ctx->count; 1253 LASTMARK_SAVE(); 1254 MARK_PUSH(ctx->lastmark); 1255 /* zero-width match protection */ 1256 DATA_PUSH(&ctx->u.rep->last_ptr); 1257 ctx->u.rep->last_ptr = state->ptr; 1258 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, 1259 ctx->u.rep->pattern+3); 1260 DATA_POP(&ctx->u.rep->last_ptr); 1261 if (ret) { 1262 MARK_POP_DISCARD(ctx->lastmark); 1263 RETURN_ON_ERROR(ret); 1264 RETURN_SUCCESS; 1265 } 1266 MARK_POP(ctx->lastmark); 1267 LASTMARK_RESTORE(); 1268 ctx->u.rep->count = ctx->count-1; 1269 state->ptr = ctx->ptr; 1270 } 1271 1272 /* cannot match more repeated items here. make sure the 1273 tail matches */ 1274 state->repeat = ctx->u.rep->prev; 1275 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); 1276 RETURN_ON_SUCCESS(ret); 1277 state->repeat = ctx->u.rep; 1278 state->ptr = ctx->ptr; 1279 RETURN_FAILURE; 1280 1281 case SRE_OP_MIN_UNTIL: 1282 /* minimizing repeat */ 1283 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ 1284 1285 ctx->u.rep = state->repeat; 1286 if (!ctx->u.rep) 1287 RETURN_ERROR(SRE_ERROR_STATE); 1288 1289 state->ptr = ctx->ptr; 1290 1291 ctx->count = ctx->u.rep->count+1; 1292 1293 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, 1294 ctx->ptr, ctx->count, ctx->u.rep->pattern)); 1295 1296 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 1297 /* not enough matches */ 1298 ctx->u.rep->count = ctx->count; 1299 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, 1300 ctx->u.rep->pattern+3); 1301 if (ret) { 1302 RETURN_ON_ERROR(ret); 1303 RETURN_SUCCESS; 1304 } 1305 ctx->u.rep->count = ctx->count-1; 1306 state->ptr = ctx->ptr; 1307 RETURN_FAILURE; 1308 } 1309 1310 LASTMARK_SAVE(); 1311 1312 /* see if the tail matches */ 1313 state->repeat = ctx->u.rep->prev; 1314 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); 1315 if (ret) { 1316 RETURN_ON_ERROR(ret); 1317 RETURN_SUCCESS; 1318 } 1319 1320 state->repeat = ctx->u.rep; 1321 state->ptr = ctx->ptr; 1322 1323 LASTMARK_RESTORE(); 1324 1325 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] 1326 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || 1327 state->ptr == ctx->u.rep->last_ptr) 1328 RETURN_FAILURE; 1329 1330 ctx->u.rep->count = ctx->count; 1331 /* zero-width match protection */ 1332 DATA_PUSH(&ctx->u.rep->last_ptr); 1333 ctx->u.rep->last_ptr = state->ptr; 1334 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, 1335 ctx->u.rep->pattern+3); 1336 DATA_POP(&ctx->u.rep->last_ptr); 1337 if (ret) { 1338 RETURN_ON_ERROR(ret); 1339 RETURN_SUCCESS; 1340 } 1341 ctx->u.rep->count = ctx->count-1; 1342 state->ptr = ctx->ptr; 1343 RETURN_FAILURE; 1344 1345 case SRE_OP_GROUPREF: 1346 /* match backreference */ 1347 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, 1348 ctx->ptr, ctx->pattern[0])); 1349 i = ctx->pattern[0]; 1350 { 1351 Py_ssize_t groupref = i+i; 1352 if (groupref >= state->lastmark) { 1353 RETURN_FAILURE; 1354 } else { 1355 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1356 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1357 if (!p || !e || e < p) 1358 RETURN_FAILURE; 1359 while (p < e) { 1360 if (ctx->ptr >= end || *ctx->ptr != *p) 1361 RETURN_FAILURE; 1362 p++; ctx->ptr++; 1363 } 1364 } 1365 } 1366 ctx->pattern++; 1367 break; 1368 1369 case SRE_OP_GROUPREF_IGNORE: 1370 /* match backreference */ 1371 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, 1372 ctx->ptr, ctx->pattern[0])); 1373 i = ctx->pattern[0]; 1374 { 1375 Py_ssize_t groupref = i+i; 1376 if (groupref >= state->lastmark) { 1377 RETURN_FAILURE; 1378 } else { 1379 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1380 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1381 if (!p || !e || e < p) 1382 RETURN_FAILURE; 1383 while (p < e) { 1384 if (ctx->ptr >= end || 1385 state->lower(*ctx->ptr) != state->lower(*p)) 1386 RETURN_FAILURE; 1387 p++; ctx->ptr++; 1388 } 1389 } 1390 } 1391 ctx->pattern++; 1392 break; 1393 1394 case SRE_OP_GROUPREF_EXISTS: 1395 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, 1396 ctx->ptr, ctx->pattern[0])); 1397 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ 1398 i = ctx->pattern[0]; 1399 { 1400 Py_ssize_t groupref = i+i; 1401 if (groupref >= state->lastmark) { 1402 ctx->pattern += ctx->pattern[1]; 1403 break; 1404 } else { 1405 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1406 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1407 if (!p || !e || e < p) { 1408 ctx->pattern += ctx->pattern[1]; 1409 break; 1410 } 1411 } 1412 } 1413 ctx->pattern += 2; 1414 break; 1415 1416 case SRE_OP_ASSERT: 1417 /* assert subpattern */ 1418 /* <ASSERT> <skip> <back> <pattern> */ 1419 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, 1420 ctx->ptr, ctx->pattern[1])); 1421 state->ptr = ctx->ptr - ctx->pattern[1]; 1422 if (state->ptr < state->beginning) 1423 RETURN_FAILURE; 1424 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); 1425 RETURN_ON_FAILURE(ret); 1426 ctx->pattern += ctx->pattern[0]; 1427 break; 1428 1429 case SRE_OP_ASSERT_NOT: 1430 /* assert not subpattern */ 1431 /* <ASSERT_NOT> <skip> <back> <pattern> */ 1432 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, 1433 ctx->ptr, ctx->pattern[1])); 1434 state->ptr = ctx->ptr - ctx->pattern[1]; 1435 if (state->ptr >= state->beginning) { 1436 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); 1437 if (ret) { 1438 RETURN_ON_ERROR(ret); 1439 RETURN_FAILURE; 1440 } 1441 } 1442 ctx->pattern += ctx->pattern[0]; 1443 break; 1444 1445 case SRE_OP_FAILURE: 1446 /* immediate failure */ 1447 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); 1448 RETURN_FAILURE; 1449 1450 default: 1451 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, 1452 ctx->pattern[-1])); 1453 RETURN_ERROR(SRE_ERROR_ILLEGAL); 1454 } 1455 } 1456 1457 exit: 1458 ctx_pos = ctx->last_ctx_pos; 1459 jump = ctx->jump; 1460 DATA_POP_DISCARD(ctx); 1461 if (ctx_pos == -1) 1462 return ret; 1463 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); 1464 1465 switch (jump) { 1466 case JUMP_MAX_UNTIL_2: 1467 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1468 goto jump_max_until_2; 1469 case JUMP_MAX_UNTIL_3: 1470 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1471 goto jump_max_until_3; 1472 case JUMP_MIN_UNTIL_2: 1473 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1474 goto jump_min_until_2; 1475 case JUMP_MIN_UNTIL_3: 1476 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1477 goto jump_min_until_3; 1478 case JUMP_BRANCH: 1479 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); 1480 goto jump_branch; 1481 case JUMP_MAX_UNTIL_1: 1482 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1483 goto jump_max_until_1; 1484 case JUMP_MIN_UNTIL_1: 1485 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1486 goto jump_min_until_1; 1487 case JUMP_REPEAT: 1488 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); 1489 goto jump_repeat; 1490 case JUMP_REPEAT_ONE_1: 1491 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); 1492 goto jump_repeat_one_1; 1493 case JUMP_REPEAT_ONE_2: 1494 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); 1495 goto jump_repeat_one_2; 1496 case JUMP_MIN_REPEAT_ONE: 1497 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); 1498 goto jump_min_repeat_one; 1499 case JUMP_ASSERT: 1500 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); 1501 goto jump_assert; 1502 case JUMP_ASSERT_NOT: 1503 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); 1504 goto jump_assert_not; 1505 case JUMP_NONE: 1506 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 1507 ctx->ptr, ret)); 1508 break; 1509 } 1510 1511 return ret; /* should never get here */ 1512 } 1513 1514 LOCAL(Py_ssize_t) 1515 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) 1516 { 1517 SRE_CHAR* ptr = (SRE_CHAR *)state->start; 1518 SRE_CHAR* end = (SRE_CHAR *)state->end; 1519 Py_ssize_t status = 0; 1520 Py_ssize_t prefix_len = 0; 1521 Py_ssize_t prefix_skip = 0; 1522 SRE_CODE* prefix = NULL; 1523 SRE_CODE* charset = NULL; 1524 SRE_CODE* overlap = NULL; 1525 int flags = 0; 1526 1527 if (pattern[0] == SRE_OP_INFO) { 1528 /* optimization info block */ 1529 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ 1530 1531 flags = pattern[2]; 1532 1533 if (pattern[3] > 1) { 1534 /* adjust end point (but make sure we leave at least one 1535 character in there, so literal search will work) */ 1536 end -= pattern[3]-1; 1537 if (end <= ptr) 1538 end = ptr+1; 1539 } 1540 1541 if (flags & SRE_INFO_PREFIX) { 1542 /* pattern starts with a known prefix */ 1543 /* <length> <skip> <prefix data> <overlap data> */ 1544 prefix_len = pattern[5]; 1545 prefix_skip = pattern[6]; 1546 prefix = pattern + 7; 1547 overlap = prefix + prefix_len - 1; 1548 } else if (flags & SRE_INFO_CHARSET) 1549 /* pattern starts with a character from a known set */ 1550 /* <charset> */ 1551 charset = pattern + 5; 1552 1553 pattern += 1 + pattern[1]; 1554 } 1555 1556 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", 1557 prefix, prefix_len, prefix_skip)); 1558 TRACE(("charset = %p\n", charset)); 1559 1560 #if defined(USE_FAST_SEARCH) 1561 if (prefix_len > 1) { 1562 /* pattern starts with a known prefix. use the overlap 1563 table to skip forward as fast as we possibly can */ 1564 Py_ssize_t i = 0; 1565 end = (SRE_CHAR *)state->end; 1566 while (ptr < end) { 1567 for (;;) { 1568 if ((SRE_CODE) ptr[0] != prefix[i]) { 1569 if (!i) 1570 break; 1571 else 1572 i = overlap[i]; 1573 } else { 1574 if (++i == prefix_len) { 1575 /* found a potential match */ 1576 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); 1577 state->start = ptr + 1 - prefix_len; 1578 state->ptr = ptr + 1 - prefix_len + prefix_skip; 1579 if (flags & SRE_INFO_LITERAL) 1580 return 1; /* we got all of it */ 1581 status = SRE_MATCH(state, pattern + 2*prefix_skip); 1582 if (status != 0) 1583 return status; 1584 /* close but no cigar -- try again */ 1585 i = overlap[i]; 1586 } 1587 break; 1588 } 1589 } 1590 ptr++; 1591 } 1592 return 0; 1593 } 1594 #endif 1595 1596 if (pattern[0] == SRE_OP_LITERAL) { 1597 /* pattern starts with a literal character. this is used 1598 for short prefixes, and if fast search is disabled */ 1599 SRE_CODE chr = pattern[1]; 1600 end = (SRE_CHAR *)state->end; 1601 for (;;) { 1602 while (ptr < end && (SRE_CODE) ptr[0] != chr) 1603 ptr++; 1604 if (ptr >= end) 1605 return 0; 1606 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); 1607 state->start = ptr; 1608 state->ptr = ++ptr; 1609 if (flags & SRE_INFO_LITERAL) 1610 return 1; /* we got all of it */ 1611 status = SRE_MATCH(state, pattern + 2); 1612 if (status != 0) 1613 break; 1614 } 1615 } else if (charset) { 1616 /* pattern starts with a character from a known set */ 1617 end = (SRE_CHAR *)state->end; 1618 for (;;) { 1619 while (ptr < end && !SRE_CHARSET(charset, ptr[0])) 1620 ptr++; 1621 if (ptr >= end) 1622 return 0; 1623 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); 1624 state->start = ptr; 1625 state->ptr = ptr; 1626 status = SRE_MATCH(state, pattern); 1627 if (status != 0) 1628 break; 1629 ptr++; 1630 } 1631 } else 1632 /* general case */ 1633 while (ptr <= end) { 1634 TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); 1635 state->start = state->ptr = ptr++; 1636 status = SRE_MATCH(state, pattern); 1637 if (status != 0) 1638 break; 1639 } 1640 1641 return status; 1642 } 1643 1644 LOCAL(int) 1645 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len) 1646 { 1647 /* check if given string is a literal template (i.e. no escapes) */ 1648 while (len-- > 0) 1649 if (*ptr++ == '\\') 1650 return 0; 1651 return 1; 1652 } 1653 1654 #if !defined(SRE_RECURSIVE) 1655 1656 /* -------------------------------------------------------------------- */ 1657 /* factories and destructors */ 1658 1659 /* see sre.h for object declarations */ 1660 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int); 1661 static PyObject*pattern_scanner(PatternObject*, PyObject*); 1662 1663 static PyObject * 1664 sre_codesize(PyObject* self, PyObject *unused) 1665 { 1666 return PyInt_FromSize_t(sizeof(SRE_CODE)); 1667 } 1668 1669 static PyObject * 1670 sre_getlower(PyObject* self, PyObject* args) 1671 { 1672 int character, flags; 1673 if (!PyArg_ParseTuple(args, "ii", &character, &flags)) 1674 return NULL; 1675 if (flags & SRE_FLAG_LOCALE) 1676 return Py_BuildValue("i", sre_lower_locale(character)); 1677 if (flags & SRE_FLAG_UNICODE) 1678 #if defined(HAVE_UNICODE) 1679 return Py_BuildValue("i", sre_lower_unicode(character)); 1680 #else 1681 return Py_BuildValue("i", sre_lower_locale(character)); 1682 #endif 1683 return Py_BuildValue("i", sre_lower(character)); 1684 } 1685 1686 LOCAL(void) 1687 state_reset(SRE_STATE* state) 1688 { 1689 /* FIXME: dynamic! */ 1690 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ 1691 1692 state->lastmark = -1; 1693 state->lastindex = -1; 1694 1695 state->repeat = NULL; 1696 1697 data_stack_dealloc(state); 1698 } 1699 1700 static void* 1701 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) 1702 { 1703 /* given a python object, return a data pointer, a length (in 1704 characters), and a character size. return NULL if the object 1705 is not a string (or not compatible) */ 1706 1707 PyBufferProcs *buffer; 1708 Py_ssize_t size, bytes; 1709 int charsize; 1710 void* ptr; 1711 1712 #if defined(HAVE_UNICODE) 1713 if (PyUnicode_Check(string)) { 1714 /* unicode strings doesn't always support the buffer interface */ 1715 ptr = (void*) PyUnicode_AS_DATA(string); 1716 /* bytes = PyUnicode_GET_DATA_SIZE(string); */ 1717 size = PyUnicode_GET_SIZE(string); 1718 charsize = sizeof(Py_UNICODE); 1719 1720 } else { 1721 #endif 1722 1723 /* get pointer to string buffer */ 1724 buffer = Py_TYPE(string)->tp_as_buffer; 1725 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount || 1726 buffer->bf_getsegcount(string, NULL) != 1) { 1727 PyErr_SetString(PyExc_TypeError, "expected string or buffer"); 1728 return NULL; 1729 } 1730 1731 /* determine buffer size */ 1732 bytes = buffer->bf_getreadbuffer(string, 0, &ptr); 1733 if (bytes < 0) { 1734 PyErr_SetString(PyExc_TypeError, "buffer has negative size"); 1735 return NULL; 1736 } 1737 1738 /* determine character size */ 1739 #if PY_VERSION_HEX >= 0x01060000 1740 size = PyObject_Size(string); 1741 #else 1742 size = PyObject_Length(string); 1743 #endif 1744 1745 if (PyString_Check(string) || bytes == size) 1746 charsize = 1; 1747 #if defined(HAVE_UNICODE) 1748 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) 1749 charsize = sizeof(Py_UNICODE); 1750 #endif 1751 else { 1752 PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); 1753 return NULL; 1754 } 1755 1756 #if defined(HAVE_UNICODE) 1757 } 1758 #endif 1759 1760 *p_length = size; 1761 *p_charsize = charsize; 1762 1763 return ptr; 1764 } 1765 1766 LOCAL(PyObject*) 1767 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, 1768 Py_ssize_t start, Py_ssize_t end) 1769 { 1770 /* prepare state object */ 1771 1772 Py_ssize_t length; 1773 int charsize; 1774 void* ptr; 1775 1776 memset(state, 0, sizeof(SRE_STATE)); 1777 1778 state->lastmark = -1; 1779 state->lastindex = -1; 1780 1781 ptr = getstring(string, &length, &charsize); 1782 if (!ptr) 1783 return NULL; 1784 1785 /* adjust boundaries */ 1786 if (start < 0) 1787 start = 0; 1788 else if (start > length) 1789 start = length; 1790 1791 if (end < 0) 1792 end = 0; 1793 else if (end > length) 1794 end = length; 1795 1796 state->charsize = charsize; 1797 1798 state->beginning = ptr; 1799 1800 state->start = (void*) ((char*) ptr + start * state->charsize); 1801 state->end = (void*) ((char*) ptr + end * state->charsize); 1802 1803 Py_INCREF(string); 1804 state->string = string; 1805 state->pos = start; 1806 state->endpos = end; 1807 1808 if (pattern->flags & SRE_FLAG_LOCALE) 1809 state->lower = sre_lower_locale; 1810 else if (pattern->flags & SRE_FLAG_UNICODE) 1811 #if defined(HAVE_UNICODE) 1812 state->lower = sre_lower_unicode; 1813 #else 1814 state->lower = sre_lower_locale; 1815 #endif 1816 else 1817 state->lower = sre_lower; 1818 1819 return string; 1820 } 1821 1822 LOCAL(void) 1823 state_fini(SRE_STATE* state) 1824 { 1825 Py_XDECREF(state->string); 1826 data_stack_dealloc(state); 1827 } 1828 1829 /* calculate offset from start of string */ 1830 #define STATE_OFFSET(state, member)\ 1831 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) 1832 1833 LOCAL(PyObject*) 1834 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) 1835 { 1836 Py_ssize_t i, j; 1837 1838 index = (index - 1) * 2; 1839 1840 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { 1841 if (empty) 1842 /* want empty string */ 1843 i = j = 0; 1844 else { 1845 Py_INCREF(Py_None); 1846 return Py_None; 1847 } 1848 } else { 1849 i = STATE_OFFSET(state, state->mark[index]); 1850 j = STATE_OFFSET(state, state->mark[index+1]); 1851 } 1852 1853 return PySequence_GetSlice(string, i, j); 1854 } 1855 1856 static void 1857 pattern_error(int status) 1858 { 1859 switch (status) { 1860 case SRE_ERROR_RECURSION_LIMIT: 1861 PyErr_SetString( 1862 PyExc_RuntimeError, 1863 "maximum recursion limit exceeded" 1864 ); 1865 break; 1866 case SRE_ERROR_MEMORY: 1867 PyErr_NoMemory(); 1868 break; 1869 case SRE_ERROR_INTERRUPTED: 1870 /* An exception has already been raised, so let it fly */ 1871 break; 1872 default: 1873 /* other error codes indicate compiler/engine bugs */ 1874 PyErr_SetString( 1875 PyExc_RuntimeError, 1876 "internal error in regular expression engine" 1877 ); 1878 } 1879 } 1880 1881 static void 1882 pattern_dealloc(PatternObject* self) 1883 { 1884 if (self->weakreflist != NULL) 1885 PyObject_ClearWeakRefs((PyObject *) self); 1886 Py_XDECREF(self->pattern); 1887 Py_XDECREF(self->groupindex); 1888 Py_XDECREF(self->indexgroup); 1889 PyObject_DEL(self); 1890 } 1891 1892 static int 1893 check_args_size(const char *name, PyObject* args, PyObject* kw, int n) 1894 { 1895 Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0); 1896 if (m <= n) 1897 return 1; 1898 PyErr_Format(PyExc_TypeError, 1899 "%s() takes at most %d positional arguments (%zd given)", 1900 name, n, m); 1901 return 0; 1902 } 1903 1904 static PyObject* 1905 fix_string_param(PyObject *string, PyObject *string2, const char *oldname) 1906 { 1907 if (string2 != NULL) { 1908 char buf[100]; 1909 if (string != NULL) { 1910 PyErr_Format(PyExc_TypeError, 1911 "Argument given by name ('%s') and position (1)", 1912 oldname); 1913 return NULL; 1914 } 1915 sprintf(buf, "The '%s' keyword parameter name is deprecated. " 1916 "Use 'string' instead.", oldname); 1917 if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0) 1918 return NULL; 1919 return string2; 1920 } 1921 if (string == NULL) { 1922 PyErr_SetString(PyExc_TypeError, 1923 "Required argument 'string' (pos 1) not found"); 1924 return NULL; 1925 } 1926 return string; 1927 } 1928 1929 static PyObject* 1930 pattern_match(PatternObject* self, PyObject* args, PyObject* kw) 1931 { 1932 SRE_STATE state; 1933 int status; 1934 1935 PyObject *string = NULL, *string2 = NULL; 1936 Py_ssize_t start = 0; 1937 Py_ssize_t end = PY_SSIZE_T_MAX; 1938 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL }; 1939 if (!check_args_size("match", args, kw, 3)) 1940 return NULL; 1941 1942 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist, 1943 &string, &start, &end, &string2)) 1944 return NULL; 1945 1946 string = fix_string_param(string, string2, "pattern"); 1947 if (!string) 1948 return NULL; 1949 1950 string = state_init(&state, self, string, start, end); 1951 if (!string) 1952 return NULL; 1953 1954 state.ptr = state.start; 1955 1956 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); 1957 1958 if (state.charsize == 1) { 1959 status = sre_match(&state, PatternObject_GetCode(self)); 1960 } else { 1961 #if defined(HAVE_UNICODE) 1962 status = sre_umatch(&state, PatternObject_GetCode(self)); 1963 #endif 1964 } 1965 1966 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 1967 if (PyErr_Occurred()) 1968 return NULL; 1969 1970 state_fini(&state); 1971 1972 return pattern_new_match(self, &state, status); 1973 } 1974 1975 static PyObject* 1976 pattern_search(PatternObject* self, PyObject* args, PyObject* kw) 1977 { 1978 SRE_STATE state; 1979 int status; 1980 1981 PyObject *string = NULL, *string2 = NULL; 1982 Py_ssize_t start = 0; 1983 Py_ssize_t end = PY_SSIZE_T_MAX; 1984 static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL }; 1985 if (!check_args_size("search", args, kw, 3)) 1986 return NULL; 1987 1988 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist, 1989 &string, &start, &end, &string2)) 1990 return NULL; 1991 1992 string = fix_string_param(string, string2, "pattern"); 1993 if (!string) 1994 return NULL; 1995 1996 string = state_init(&state, self, string, start, end); 1997 if (!string) 1998 return NULL; 1999 2000 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); 2001 2002 if (state.charsize == 1) { 2003 status = sre_search(&state, PatternObject_GetCode(self)); 2004 } else { 2005 #if defined(HAVE_UNICODE) 2006 status = sre_usearch(&state, PatternObject_GetCode(self)); 2007 #endif 2008 } 2009 2010 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 2011 2012 state_fini(&state); 2013 2014 if (PyErr_Occurred()) 2015 return NULL; 2016 2017 return pattern_new_match(self, &state, status); 2018 } 2019 2020 static PyObject* 2021 call(char* module, char* function, PyObject* args) 2022 { 2023 PyObject* name; 2024 PyObject* mod; 2025 PyObject* func; 2026 PyObject* result; 2027 2028 if (!args) 2029 return NULL; 2030 name = PyString_FromString(module); 2031 if (!name) 2032 return NULL; 2033 mod = PyImport_Import(name); 2034 Py_DECREF(name); 2035 if (!mod) 2036 return NULL; 2037 func = PyObject_GetAttrString(mod, function); 2038 Py_DECREF(mod); 2039 if (!func) 2040 return NULL; 2041 result = PyObject_CallObject(func, args); 2042 Py_DECREF(func); 2043 Py_DECREF(args); 2044 return result; 2045 } 2046 2047 #ifdef USE_BUILTIN_COPY 2048 static int 2049 deepcopy(PyObject** object, PyObject* memo) 2050 { 2051 PyObject* copy; 2052 2053 copy = call( 2054 "copy", "deepcopy", 2055 PyTuple_Pack(2, *object, memo) 2056 ); 2057 if (!copy) 2058 return 0; 2059 2060 Py_DECREF(*object); 2061 *object = copy; 2062 2063 return 1; /* success */ 2064 } 2065 #endif 2066 2067 static PyObject* 2068 join_list(PyObject* list, PyObject* string) 2069 { 2070 /* join list elements */ 2071 2072 PyObject* joiner; 2073 #if PY_VERSION_HEX >= 0x01060000 2074 PyObject* function; 2075 PyObject* args; 2076 #endif 2077 PyObject* result; 2078 2079 joiner = PySequence_GetSlice(string, 0, 0); 2080 if (!joiner) 2081 return NULL; 2082 2083 if (PyList_GET_SIZE(list) == 0) { 2084 Py_DECREF(list); 2085 return joiner; 2086 } 2087 2088 #if PY_VERSION_HEX >= 0x01060000 2089 function = PyObject_GetAttrString(joiner, "join"); 2090 if (!function) { 2091 Py_DECREF(joiner); 2092 return NULL; 2093 } 2094 args = PyTuple_New(1); 2095 if (!args) { 2096 Py_DECREF(function); 2097 Py_DECREF(joiner); 2098 return NULL; 2099 } 2100 PyTuple_SET_ITEM(args, 0, list); 2101 result = PyObject_CallObject(function, args); 2102 Py_DECREF(args); /* also removes list */ 2103 Py_DECREF(function); 2104 #else 2105 result = call( 2106 "string", "join", 2107 PyTuple_Pack(2, list, joiner) 2108 ); 2109 #endif 2110 Py_DECREF(joiner); 2111 2112 return result; 2113 } 2114 2115 static PyObject* 2116 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) 2117 { 2118 SRE_STATE state; 2119 PyObject* list; 2120 int status; 2121 Py_ssize_t i, b, e; 2122 2123 PyObject *string = NULL, *string2 = NULL; 2124 Py_ssize_t start = 0; 2125 Py_ssize_t end = PY_SSIZE_T_MAX; 2126 static char* kwlist[] = { "string", "pos", "endpos", "source", NULL }; 2127 if (!check_args_size("findall", args, kw, 3)) 2128 return NULL; 2129 2130 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist, 2131 &string, &start, &end, &string2)) 2132 return NULL; 2133 2134 string = fix_string_param(string, string2, "source"); 2135 if (!string) 2136 return NULL; 2137 2138 string = state_init(&state, self, string, start, end); 2139 if (!string) 2140 return NULL; 2141 2142 list = PyList_New(0); 2143 if (!list) { 2144 state_fini(&state); 2145 return NULL; 2146 } 2147 2148 while (state.start <= state.end) { 2149 2150 PyObject* item; 2151 2152 state_reset(&state); 2153 2154 state.ptr = state.start; 2155 2156 if (state.charsize == 1) { 2157 status = sre_search(&state, PatternObject_GetCode(self)); 2158 } else { 2159 #if defined(HAVE_UNICODE) 2160 status = sre_usearch(&state, PatternObject_GetCode(self)); 2161 #endif 2162 } 2163 2164 if (PyErr_Occurred()) 2165 goto error; 2166 2167 if (status <= 0) { 2168 if (status == 0) 2169 break; 2170 pattern_error(status); 2171 goto error; 2172 } 2173 2174 /* don't bother to build a match object */ 2175 switch (self->groups) { 2176 case 0: 2177 b = STATE_OFFSET(&state, state.start); 2178 e = STATE_OFFSET(&state, state.ptr); 2179 item = PySequence_GetSlice(string, b, e); 2180 if (!item) 2181 goto error; 2182 break; 2183 case 1: 2184 item = state_getslice(&state, 1, string, 1); 2185 if (!item) 2186 goto error; 2187 break; 2188 default: 2189 item = PyTuple_New(self->groups); 2190 if (!item) 2191 goto error; 2192 for (i = 0; i < self->groups; i++) { 2193 PyObject* o = state_getslice(&state, i+1, string, 1); 2194 if (!o) { 2195 Py_DECREF(item); 2196 goto error; 2197 } 2198 PyTuple_SET_ITEM(item, i, o); 2199 } 2200 break; 2201 } 2202 2203 status = PyList_Append(list, item); 2204 Py_DECREF(item); 2205 if (status < 0) 2206 goto error; 2207 2208 if (state.ptr == state.start) 2209 state.start = (void*) ((char*) state.ptr + state.charsize); 2210 else 2211 state.start = state.ptr; 2212 2213 } 2214 2215 state_fini(&state); 2216 return list; 2217 2218 error: 2219 Py_DECREF(list); 2220 state_fini(&state); 2221 return NULL; 2222 2223 } 2224 2225 #if PY_VERSION_HEX >= 0x02020000 2226 static PyObject* 2227 pattern_finditer(PatternObject* pattern, PyObject* args) 2228 { 2229 PyObject* scanner; 2230 PyObject* search; 2231 PyObject* iterator; 2232 2233 scanner = pattern_scanner(pattern, args); 2234 if (!scanner) 2235 return NULL; 2236 2237 search = PyObject_GetAttrString(scanner, "search"); 2238 Py_DECREF(scanner); 2239 if (!search) 2240 return NULL; 2241 2242 iterator = PyCallIter_New(search, Py_None); 2243 Py_DECREF(search); 2244 2245 return iterator; 2246 } 2247 #endif 2248 2249 static PyObject* 2250 pattern_split(PatternObject* self, PyObject* args, PyObject* kw) 2251 { 2252 SRE_STATE state; 2253 PyObject* list; 2254 PyObject* item; 2255 int status; 2256 Py_ssize_t n; 2257 Py_ssize_t i; 2258 void* last; 2259 2260 PyObject *string = NULL, *string2 = NULL; 2261 Py_ssize_t maxsplit = 0; 2262 static char* kwlist[] = { "string", "maxsplit", "source", NULL }; 2263 if (!check_args_size("split", args, kw, 2)) 2264 return NULL; 2265 2266 if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist, 2267 &string, &maxsplit, &string2)) 2268 return NULL; 2269 2270 string = fix_string_param(string, string2, "source"); 2271 if (!string) 2272 return NULL; 2273 2274 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); 2275 if (!string) 2276 return NULL; 2277 2278 list = PyList_New(0); 2279 if (!list) { 2280 state_fini(&state); 2281 return NULL; 2282 } 2283 2284 n = 0; 2285 last = state.start; 2286 2287 while (!maxsplit || n < maxsplit) { 2288 2289 state_reset(&state); 2290 2291 state.ptr = state.start; 2292 2293 if (state.charsize == 1) { 2294 status = sre_search(&state, PatternObject_GetCode(self)); 2295 } else { 2296 #if defined(HAVE_UNICODE) 2297 status = sre_usearch(&state, PatternObject_GetCode(self)); 2298 #endif 2299 } 2300 2301 if (PyErr_Occurred()) 2302 goto error; 2303 2304 if (status <= 0) { 2305 if (status == 0) 2306 break; 2307 pattern_error(status); 2308 goto error; 2309 } 2310 2311 if (state.start == state.ptr) { 2312 if (last == state.end) 2313 break; 2314 /* skip one character */ 2315 state.start = (void*) ((char*) state.ptr + state.charsize); 2316 continue; 2317 } 2318 2319 /* get segment before this match */ 2320 item = PySequence_GetSlice( 2321 string, STATE_OFFSET(&state, last), 2322 STATE_OFFSET(&state, state.start) 2323 ); 2324 if (!item) 2325 goto error; 2326 status = PyList_Append(list, item); 2327 Py_DECREF(item); 2328 if (status < 0) 2329 goto error; 2330 2331 /* add groups (if any) */ 2332 for (i = 0; i < self->groups; i++) { 2333 item = state_getslice(&state, i+1, string, 0); 2334 if (!item) 2335 goto error; 2336 status = PyList_Append(list, item); 2337 Py_DECREF(item); 2338 if (status < 0) 2339 goto error; 2340 } 2341 2342 n = n + 1; 2343 2344 last = state.start = state.ptr; 2345 2346 } 2347 2348 /* get segment following last match (even if empty) */ 2349 item = PySequence_GetSlice( 2350 string, STATE_OFFSET(&state, last), state.endpos 2351 ); 2352 if (!item) 2353 goto error; 2354 status = PyList_Append(list, item); 2355 Py_DECREF(item); 2356 if (status < 0) 2357 goto error; 2358 2359 state_fini(&state); 2360 return list; 2361 2362 error: 2363 Py_DECREF(list); 2364 state_fini(&state); 2365 return NULL; 2366 2367 } 2368 2369 static PyObject* 2370 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, 2371 Py_ssize_t count, Py_ssize_t subn) 2372 { 2373 SRE_STATE state; 2374 PyObject* list; 2375 PyObject* item; 2376 PyObject* filter; 2377 PyObject* args; 2378 PyObject* match; 2379 void* ptr; 2380 int status; 2381 Py_ssize_t n; 2382 Py_ssize_t i, b, e; 2383 int bint; 2384 int filter_is_callable; 2385 2386 if (PyCallable_Check(ptemplate)) { 2387 /* sub/subn takes either a function or a template */ 2388 filter = ptemplate; 2389 Py_INCREF(filter); 2390 filter_is_callable = 1; 2391 } else { 2392 /* if not callable, check if it's a literal string */ 2393 int literal; 2394 ptr = getstring(ptemplate, &n, &bint); 2395 b = bint; 2396 if (ptr) { 2397 if (b == 1) { 2398 literal = sre_literal_template((unsigned char *)ptr, n); 2399 } else { 2400 #if defined(HAVE_UNICODE) 2401 literal = sre_uliteral_template((Py_UNICODE *)ptr, n); 2402 #endif 2403 } 2404 } else { 2405 PyErr_Clear(); 2406 literal = 0; 2407 } 2408 if (literal) { 2409 filter = ptemplate; 2410 Py_INCREF(filter); 2411 filter_is_callable = 0; 2412 } else { 2413 /* not a literal; hand it over to the template compiler */ 2414 filter = call( 2415 SRE_PY_MODULE, "_subx", 2416 PyTuple_Pack(2, self, ptemplate) 2417 ); 2418 if (!filter) 2419 return NULL; 2420 filter_is_callable = PyCallable_Check(filter); 2421 } 2422 } 2423 2424 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); 2425 if (!string) { 2426 Py_DECREF(filter); 2427 return NULL; 2428 } 2429 2430 list = PyList_New(0); 2431 if (!list) { 2432 Py_DECREF(filter); 2433 state_fini(&state); 2434 return NULL; 2435 } 2436 2437 n = i = 0; 2438 2439 while (!count || n < count) { 2440 2441 state_reset(&state); 2442 2443 state.ptr = state.start; 2444 2445 if (state.charsize == 1) { 2446 status = sre_search(&state, PatternObject_GetCode(self)); 2447 } else { 2448 #if defined(HAVE_UNICODE) 2449 status = sre_usearch(&state, PatternObject_GetCode(self)); 2450 #endif 2451 } 2452 2453 if (PyErr_Occurred()) 2454 goto error; 2455 2456 if (status <= 0) { 2457 if (status == 0) 2458 break; 2459 pattern_error(status); 2460 goto error; 2461 } 2462 2463 b = STATE_OFFSET(&state, state.start); 2464 e = STATE_OFFSET(&state, state.ptr); 2465 2466 if (i < b) { 2467 /* get segment before this match */ 2468 item = PySequence_GetSlice(string, i, b); 2469 if (!item) 2470 goto error; 2471 status = PyList_Append(list, item); 2472 Py_DECREF(item); 2473 if (status < 0) 2474 goto error; 2475 2476 } else if (i == b && i == e && n > 0) 2477 /* ignore empty match on latest position */ 2478 goto next; 2479 2480 if (filter_is_callable) { 2481 /* pass match object through filter */ 2482 match = pattern_new_match(self, &state, 1); 2483 if (!match) 2484 goto error; 2485 args = PyTuple_Pack(1, match); 2486 if (!args) { 2487 Py_DECREF(match); 2488 goto error; 2489 } 2490 item = PyObject_CallObject(filter, args); 2491 Py_DECREF(args); 2492 Py_DECREF(match); 2493 if (!item) 2494 goto error; 2495 } else { 2496 /* filter is literal string */ 2497 item = filter; 2498 Py_INCREF(item); 2499 } 2500 2501 /* add to list */ 2502 if (item != Py_None) { 2503 status = PyList_Append(list, item); 2504 Py_DECREF(item); 2505 if (status < 0) 2506 goto error; 2507 } 2508 2509 i = e; 2510 n = n + 1; 2511 2512 next: 2513 /* move on */ 2514 if (state.ptr == state.start) 2515 state.start = (void*) ((char*) state.ptr + state.charsize); 2516 else 2517 state.start = state.ptr; 2518 2519 } 2520 2521 /* get segment following last match */ 2522 if (i < state.endpos) { 2523 item = PySequence_GetSlice(string, i, state.endpos); 2524 if (!item) 2525 goto error; 2526 status = PyList_Append(list, item); 2527 Py_DECREF(item); 2528 if (status < 0) 2529 goto error; 2530 } 2531 2532 state_fini(&state); 2533 2534 Py_DECREF(filter); 2535 2536 /* convert list to single string (also removes list) */ 2537 item = join_list(list, string); 2538 2539 if (!item) 2540 return NULL; 2541 2542 if (subn) 2543 return Py_BuildValue("Nn", item, n); 2544 2545 return item; 2546 2547 error: 2548 Py_DECREF(list); 2549 state_fini(&state); 2550 Py_DECREF(filter); 2551 return NULL; 2552 2553 } 2554 2555 static PyObject* 2556 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw) 2557 { 2558 PyObject* ptemplate; 2559 PyObject* string; 2560 Py_ssize_t count = 0; 2561 static char* kwlist[] = { "repl", "string", "count", NULL }; 2562 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist, 2563 &ptemplate, &string, &count)) 2564 return NULL; 2565 2566 return pattern_subx(self, ptemplate, string, count, 0); 2567 } 2568 2569 static PyObject* 2570 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw) 2571 { 2572 PyObject* ptemplate; 2573 PyObject* string; 2574 Py_ssize_t count = 0; 2575 static char* kwlist[] = { "repl", "string", "count", NULL }; 2576 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist, 2577 &ptemplate, &string, &count)) 2578 return NULL; 2579 2580 return pattern_subx(self, ptemplate, string, count, 1); 2581 } 2582 2583 static PyObject* 2584 pattern_copy(PatternObject* self, PyObject *unused) 2585 { 2586 #ifdef USE_BUILTIN_COPY 2587 PatternObject* copy; 2588 int offset; 2589 2590 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); 2591 if (!copy) 2592 return NULL; 2593 2594 offset = offsetof(PatternObject, groups); 2595 2596 Py_XINCREF(self->groupindex); 2597 Py_XINCREF(self->indexgroup); 2598 Py_XINCREF(self->pattern); 2599 2600 memcpy((char*) copy + offset, (char*) self + offset, 2601 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); 2602 copy->weakreflist = NULL; 2603 2604 return (PyObject*) copy; 2605 #else 2606 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object"); 2607 return NULL; 2608 #endif 2609 } 2610 2611 static PyObject* 2612 pattern_deepcopy(PatternObject* self, PyObject* memo) 2613 { 2614 #ifdef USE_BUILTIN_COPY 2615 PatternObject* copy; 2616 2617 copy = (PatternObject*) pattern_copy(self); 2618 if (!copy) 2619 return NULL; 2620 2621 if (!deepcopy(©->groupindex, memo) || 2622 !deepcopy(©->indexgroup, memo) || 2623 !deepcopy(©->pattern, memo)) { 2624 Py_DECREF(copy); 2625 return NULL; 2626 } 2627 2628 #else 2629 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object"); 2630 return NULL; 2631 #endif 2632 } 2633 2634 PyDoc_STRVAR(pattern_match_doc, 2635 "match(string[, pos[, endpos]]) --> match object or None.\n\ 2636 Matches zero or more characters at the beginning of the string"); 2637 2638 PyDoc_STRVAR(pattern_search_doc, 2639 "search(string[, pos[, endpos]]) --> match object or None.\n\ 2640 Scan through string looking for a match, and return a corresponding\n\ 2641 match object instance. Return None if no position in the string matches."); 2642 2643 PyDoc_STRVAR(pattern_split_doc, 2644 "split(string[, maxsplit = 0]) --> list.\n\ 2645 Split string by the occurrences of pattern."); 2646 2647 PyDoc_STRVAR(pattern_findall_doc, 2648 "findall(string[, pos[, endpos]]) --> list.\n\ 2649 Return a list of all non-overlapping matches of pattern in string."); 2650 2651 PyDoc_STRVAR(pattern_finditer_doc, 2652 "finditer(string[, pos[, endpos]]) --> iterator.\n\ 2653 Return an iterator over all non-overlapping matches for the \n\ 2654 RE pattern in string. For each match, the iterator returns a\n\ 2655 match object."); 2656 2657 PyDoc_STRVAR(pattern_sub_doc, 2658 "sub(repl, string[, count = 0]) --> newstring\n\ 2659 Return the string obtained by replacing the leftmost non-overlapping\n\ 2660 occurrences of pattern in string by the replacement repl."); 2661 2662 PyDoc_STRVAR(pattern_subn_doc, 2663 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ 2664 Return the tuple (new_string, number_of_subs_made) found by replacing\n\ 2665 the leftmost non-overlapping occurrences of pattern with the\n\ 2666 replacement repl."); 2667 2668 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); 2669 2670 static PyMethodDef pattern_methods[] = { 2671 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, 2672 pattern_match_doc}, 2673 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, 2674 pattern_search_doc}, 2675 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, 2676 pattern_sub_doc}, 2677 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, 2678 pattern_subn_doc}, 2679 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, 2680 pattern_split_doc}, 2681 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, 2682 pattern_findall_doc}, 2683 #if PY_VERSION_HEX >= 0x02020000 2684 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, 2685 pattern_finditer_doc}, 2686 #endif 2687 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, 2688 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS}, 2689 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O}, 2690 {NULL, NULL} 2691 }; 2692 2693 #define PAT_OFF(x) offsetof(PatternObject, x) 2694 static PyMemberDef pattern_members[] = { 2695 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, 2696 {"flags", T_INT, PAT_OFF(flags), READONLY}, 2697 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, 2698 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY}, 2699 {NULL} /* Sentinel */ 2700 }; 2701 2702 statichere PyTypeObject Pattern_Type = { 2703 PyObject_HEAD_INIT(NULL) 2704 0, "_" SRE_MODULE ".SRE_Pattern", 2705 sizeof(PatternObject), sizeof(SRE_CODE), 2706 (destructor)pattern_dealloc, /*tp_dealloc*/ 2707 0, /* tp_print */ 2708 0, /* tp_getattrn */ 2709 0, /* tp_setattr */ 2710 0, /* tp_compare */ 2711 0, /* tp_repr */ 2712 0, /* tp_as_number */ 2713 0, /* tp_as_sequence */ 2714 0, /* tp_as_mapping */ 2715 0, /* tp_hash */ 2716 0, /* tp_call */ 2717 0, /* tp_str */ 2718 0, /* tp_getattro */ 2719 0, /* tp_setattro */ 2720 0, /* tp_as_buffer */ 2721 Py_TPFLAGS_DEFAULT, /* tp_flags */ 2722 pattern_doc, /* tp_doc */ 2723 0, /* tp_traverse */ 2724 0, /* tp_clear */ 2725 0, /* tp_richcompare */ 2726 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 2727 0, /* tp_iter */ 2728 0, /* tp_iternext */ 2729 pattern_methods, /* tp_methods */ 2730 pattern_members, /* tp_members */ 2731 }; 2732 2733 static int _validate(PatternObject *self); /* Forward */ 2734 2735 static PyObject * 2736 _compile(PyObject* self_, PyObject* args) 2737 { 2738 /* "compile" pattern descriptor to pattern object */ 2739 2740 PatternObject* self; 2741 Py_ssize_t i, n; 2742 2743 PyObject* pattern; 2744 int flags = 0; 2745 PyObject* code; 2746 Py_ssize_t groups = 0; 2747 PyObject* groupindex = NULL; 2748 PyObject* indexgroup = NULL; 2749 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, 2750 &PyList_Type, &code, &groups, 2751 &groupindex, &indexgroup)) 2752 return NULL; 2753 2754 n = PyList_GET_SIZE(code); 2755 /* coverity[ampersand_in_size] */ 2756 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); 2757 if (!self) 2758 return NULL; 2759 self->weakreflist = NULL; 2760 self->pattern = NULL; 2761 self->groupindex = NULL; 2762 self->indexgroup = NULL; 2763 2764 self->codesize = n; 2765 2766 for (i = 0; i < n; i++) { 2767 PyObject *o = PyList_GET_ITEM(code, i); 2768 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) 2769 : PyLong_AsUnsignedLong(o); 2770 if (value == (unsigned long)-1 && PyErr_Occurred()) { 2771 if (PyErr_ExceptionMatches(PyExc_OverflowError)) { 2772 PyErr_SetString(PyExc_OverflowError, 2773 "regular expression code size limit exceeded"); 2774 } 2775 break; 2776 } 2777 self->code[i] = (SRE_CODE) value; 2778 if ((unsigned long) self->code[i] != value) { 2779 PyErr_SetString(PyExc_OverflowError, 2780 "regular expression code size limit exceeded"); 2781 break; 2782 } 2783 } 2784 2785 if (PyErr_Occurred()) { 2786 Py_DECREF(self); 2787 return NULL; 2788 } 2789 2790 Py_INCREF(pattern); 2791 self->pattern = pattern; 2792 2793 self->flags = flags; 2794 2795 self->groups = groups; 2796 2797 Py_XINCREF(groupindex); 2798 self->groupindex = groupindex; 2799 2800 Py_XINCREF(indexgroup); 2801 self->indexgroup = indexgroup; 2802 2803 self->weakreflist = NULL; 2804 2805 if (!_validate(self)) { 2806 Py_DECREF(self); 2807 return NULL; 2808 } 2809 2810 return (PyObject*) self; 2811 } 2812 2813 /* -------------------------------------------------------------------- */ 2814 /* Code validation */ 2815 2816 /* To learn more about this code, have a look at the _compile() function in 2817 Lib/sre_compile.py. The validation functions below checks the code array 2818 for conformance with the code patterns generated there. 2819 2820 The nice thing about the generated code is that it is position-independent: 2821 all jumps are relative jumps forward. Also, jumps don't cross each other: 2822 the target of a later jump is always earlier than the target of an earlier 2823 jump. IOW, this is okay: 2824 2825 J---------J-------T--------T 2826 \ \_____/ / 2827 \______________________/ 2828 2829 but this is not: 2830 2831 J---------J-------T--------T 2832 \_________\_____/ / 2833 \____________/ 2834 2835 It also helps that SRE_CODE is always an unsigned type. 2836 */ 2837 2838 /* Defining this one enables tracing of the validator */ 2839 #undef VVERBOSE 2840 2841 /* Trace macro for the validator */ 2842 #if defined(VVERBOSE) 2843 #define VTRACE(v) printf v 2844 #else 2845 #define VTRACE(v) do {} while(0) /* do nothing */ 2846 #endif 2847 2848 /* Report failure */ 2849 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) 2850 2851 /* Extract opcode, argument, or skip count from code array */ 2852 #define GET_OP \ 2853 do { \ 2854 VTRACE(("%p: ", code)); \ 2855 if (code >= end) FAIL; \ 2856 op = *code++; \ 2857 VTRACE(("%lu (op)\n", (unsigned long)op)); \ 2858 } while (0) 2859 #define GET_ARG \ 2860 do { \ 2861 VTRACE(("%p= ", code)); \ 2862 if (code >= end) FAIL; \ 2863 arg = *code++; \ 2864 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ 2865 } while (0) 2866 #define GET_SKIP_ADJ(adj) \ 2867 do { \ 2868 VTRACE(("%p= ", code)); \ 2869 if (code >= end) FAIL; \ 2870 skip = *code; \ 2871 VTRACE(("%lu (skip to %p)\n", \ 2872 (unsigned long)skip, code+skip)); \ 2873 if (skip-adj > end-code) \ 2874 FAIL; \ 2875 code++; \ 2876 } while (0) 2877 #define GET_SKIP GET_SKIP_ADJ(0) 2878 2879 static int 2880 _validate_charset(SRE_CODE *code, SRE_CODE *end) 2881 { 2882 /* Some variables are manipulated by the macros above */ 2883 SRE_CODE op; 2884 SRE_CODE arg; 2885 SRE_CODE offset; 2886 int i; 2887 2888 while (code < end) { 2889 GET_OP; 2890 switch (op) { 2891 2892 case SRE_OP_NEGATE: 2893 break; 2894 2895 case SRE_OP_LITERAL: 2896 GET_ARG; 2897 break; 2898 2899 case SRE_OP_RANGE: 2900 GET_ARG; 2901 GET_ARG; 2902 break; 2903 2904 case SRE_OP_CHARSET: 2905 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */ 2906 if (offset > end-code) 2907 FAIL; 2908 code += offset; 2909 break; 2910 2911 case SRE_OP_BIGCHARSET: 2912 GET_ARG; /* Number of blocks */ 2913 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ 2914 if (offset > end-code) 2915 FAIL; 2916 /* Make sure that each byte points to a valid block */ 2917 for (i = 0; i < 256; i++) { 2918 if (((unsigned char *)code)[i] >= arg) 2919 FAIL; 2920 } 2921 code += offset; 2922 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */ 2923 if (offset > end-code) 2924 FAIL; 2925 code += offset; 2926 break; 2927 2928 case SRE_OP_CATEGORY: 2929 GET_ARG; 2930 switch (arg) { 2931 case SRE_CATEGORY_DIGIT: 2932 case SRE_CATEGORY_NOT_DIGIT: 2933 case SRE_CATEGORY_SPACE: 2934 case SRE_CATEGORY_NOT_SPACE: 2935 case SRE_CATEGORY_WORD: 2936 case SRE_CATEGORY_NOT_WORD: 2937 case SRE_CATEGORY_LINEBREAK: 2938 case SRE_CATEGORY_NOT_LINEBREAK: 2939 case SRE_CATEGORY_LOC_WORD: 2940 case SRE_CATEGORY_LOC_NOT_WORD: 2941 case SRE_CATEGORY_UNI_DIGIT: 2942 case SRE_CATEGORY_UNI_NOT_DIGIT: 2943 case SRE_CATEGORY_UNI_SPACE: 2944 case SRE_CATEGORY_UNI_NOT_SPACE: 2945 case SRE_CATEGORY_UNI_WORD: 2946 case SRE_CATEGORY_UNI_NOT_WORD: 2947 case SRE_CATEGORY_UNI_LINEBREAK: 2948 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 2949 break; 2950 default: 2951 FAIL; 2952 } 2953 break; 2954 2955 default: 2956 FAIL; 2957 2958 } 2959 } 2960 2961 return 1; 2962 } 2963 2964 static int 2965 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 2966 { 2967 /* Some variables are manipulated by the macros above */ 2968 SRE_CODE op; 2969 SRE_CODE arg; 2970 SRE_CODE skip; 2971 2972 VTRACE(("code=%p, end=%p\n", code, end)); 2973 2974 if (code > end) 2975 FAIL; 2976 2977 while (code < end) { 2978 GET_OP; 2979 switch (op) { 2980 2981 case SRE_OP_MARK: 2982 /* We don't check whether marks are properly nested; the 2983 sre_match() code is robust even if they don't, and the worst 2984 you can get is nonsensical match results. */ 2985 GET_ARG; 2986 if (arg > 2*groups+1) { 2987 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); 2988 FAIL; 2989 } 2990 break; 2991 2992 case SRE_OP_LITERAL: 2993 case SRE_OP_NOT_LITERAL: 2994 case SRE_OP_LITERAL_IGNORE: 2995 case SRE_OP_NOT_LITERAL_IGNORE: 2996 GET_ARG; 2997 /* The arg is just a character, nothing to check */ 2998 break; 2999 3000 case SRE_OP_SUCCESS: 3001 case SRE_OP_FAILURE: 3002 /* Nothing to check; these normally end the matching process */ 3003 break; 3004 3005 case SRE_OP_AT: 3006 GET_ARG; 3007 switch (arg) { 3008 case SRE_AT_BEGINNING: 3009 case SRE_AT_BEGINNING_STRING: 3010 case SRE_AT_BEGINNING_LINE: 3011 case SRE_AT_END: 3012 case SRE_AT_END_LINE: 3013 case SRE_AT_END_STRING: 3014 case SRE_AT_BOUNDARY: 3015 case SRE_AT_NON_BOUNDARY: 3016 case SRE_AT_LOC_BOUNDARY: 3017 case SRE_AT_LOC_NON_BOUNDARY: 3018 case SRE_AT_UNI_BOUNDARY: 3019 case SRE_AT_UNI_NON_BOUNDARY: 3020 break; 3021 default: 3022 FAIL; 3023 } 3024 break; 3025 3026 case SRE_OP_ANY: 3027 case SRE_OP_ANY_ALL: 3028 /* These have no operands */ 3029 break; 3030 3031 case SRE_OP_IN: 3032 case SRE_OP_IN_IGNORE: 3033 GET_SKIP; 3034 /* Stop 1 before the end; we check the FAILURE below */ 3035 if (!_validate_charset(code, code+skip-2)) 3036 FAIL; 3037 if (code[skip-2] != SRE_OP_FAILURE) 3038 FAIL; 3039 code += skip-1; 3040 break; 3041 3042 case SRE_OP_INFO: 3043 { 3044 /* A minimal info field is 3045 <INFO> <1=skip> <2=flags> <3=min> <4=max>; 3046 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, 3047 more follows. */ 3048 SRE_CODE flags, i; 3049 SRE_CODE *newcode; 3050 GET_SKIP; 3051 newcode = code+skip-1; 3052 GET_ARG; flags = arg; 3053 GET_ARG; /* min */ 3054 GET_ARG; /* max */ 3055 /* Check that only valid flags are present */ 3056 if ((flags & ~(SRE_INFO_PREFIX | 3057 SRE_INFO_LITERAL | 3058 SRE_INFO_CHARSET)) != 0) 3059 FAIL; 3060 /* PREFIX and CHARSET are mutually exclusive */ 3061 if ((flags & SRE_INFO_PREFIX) && 3062 (flags & SRE_INFO_CHARSET)) 3063 FAIL; 3064 /* LITERAL implies PREFIX */ 3065 if ((flags & SRE_INFO_LITERAL) && 3066 !(flags & SRE_INFO_PREFIX)) 3067 FAIL; 3068 /* Validate the prefix */ 3069 if (flags & SRE_INFO_PREFIX) { 3070 SRE_CODE prefix_len; 3071 GET_ARG; prefix_len = arg; 3072 GET_ARG; /* prefix skip */ 3073 /* Here comes the prefix string */ 3074 if (prefix_len > newcode-code) 3075 FAIL; 3076 code += prefix_len; 3077 /* And here comes the overlap table */ 3078 if (prefix_len > newcode-code) 3079 FAIL; 3080 /* Each overlap value should be < prefix_len */ 3081 for (i = 0; i < prefix_len; i++) { 3082 if (code[i] >= prefix_len) 3083 FAIL; 3084 } 3085 code += prefix_len; 3086 } 3087 /* Validate the charset */ 3088 if (flags & SRE_INFO_CHARSET) { 3089 if (!_validate_charset(code, newcode-1)) 3090 FAIL; 3091 if (newcode[-1] != SRE_OP_FAILURE) 3092 FAIL; 3093 code = newcode; 3094 } 3095 else if (code != newcode) { 3096 VTRACE(("code=%p, newcode=%p\n", code, newcode)); 3097 FAIL; 3098 } 3099 } 3100 break; 3101 3102 case SRE_OP_BRANCH: 3103 { 3104 SRE_CODE *target = NULL; 3105 for (;;) { 3106 GET_SKIP; 3107 if (skip == 0) 3108 break; 3109 /* Stop 2 before the end; we check the JUMP below */ 3110 if (!_validate_inner(code, code+skip-3, groups)) 3111 FAIL; 3112 code += skip-3; 3113 /* Check that it ends with a JUMP, and that each JUMP 3114 has the same target */ 3115 GET_OP; 3116 if (op != SRE_OP_JUMP) 3117 FAIL; 3118 GET_SKIP; 3119 if (target == NULL) 3120 target = code+skip-1; 3121 else if (code+skip-1 != target) 3122 FAIL; 3123 } 3124 } 3125 break; 3126 3127 case SRE_OP_REPEAT_ONE: 3128 case SRE_OP_MIN_REPEAT_ONE: 3129 { 3130 SRE_CODE min, max; 3131 GET_SKIP; 3132 GET_ARG; min = arg; 3133 GET_ARG; max = arg; 3134 if (min > max) 3135 FAIL; 3136 if (max > SRE_MAXREPEAT) 3137 FAIL; 3138 if (!_validate_inner(code, code+skip-4, groups)) 3139 FAIL; 3140 code += skip-4; 3141 GET_OP; 3142 if (op != SRE_OP_SUCCESS) 3143 FAIL; 3144 } 3145 break; 3146 3147 case SRE_OP_REPEAT: 3148 { 3149 SRE_CODE min, max; 3150 GET_SKIP; 3151 GET_ARG; min = arg; 3152 GET_ARG; max = arg; 3153 if (min > max) 3154 FAIL; 3155 if (max > SRE_MAXREPEAT) 3156 FAIL; 3157 if (!_validate_inner(code, code+skip-3, groups)) 3158 FAIL; 3159 code += skip-3; 3160 GET_OP; 3161 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) 3162 FAIL; 3163 } 3164 break; 3165 3166 case SRE_OP_GROUPREF: 3167 case SRE_OP_GROUPREF_IGNORE: 3168 GET_ARG; 3169 if (arg >= groups) 3170 FAIL; 3171 break; 3172 3173 case SRE_OP_GROUPREF_EXISTS: 3174 /* The regex syntax for this is: '(?(group)then|else)', where 3175 'group' is either an integer group number or a group name, 3176 'then' and 'else' are sub-regexes, and 'else' is optional. */ 3177 GET_ARG; 3178 if (arg >= groups) 3179 FAIL; 3180 GET_SKIP_ADJ(1); 3181 code--; /* The skip is relative to the first arg! */ 3182 /* There are two possibilities here: if there is both a 'then' 3183 part and an 'else' part, the generated code looks like: 3184 3185 GROUPREF_EXISTS 3186 <group> 3187 <skipyes> 3188 ...then part... 3189 JUMP 3190 <skipno> 3191 (<skipyes> jumps here) 3192 ...else part... 3193 (<skipno> jumps here) 3194 3195 If there is only a 'then' part, it looks like: 3196 3197 GROUPREF_EXISTS 3198 <group> 3199 <skip> 3200 ...then part... 3201 (<skip> jumps here) 3202 3203 There is no direct way to decide which it is, and we don't want 3204 to allow arbitrary jumps anywhere in the code; so we just look 3205 for a JUMP opcode preceding our skip target. 3206 */ 3207 if (skip >= 3 && skip-3 < end-code && 3208 code[skip-3] == SRE_OP_JUMP) 3209 { 3210 VTRACE(("both then and else parts present\n")); 3211 if (!_validate_inner(code+1, code+skip-3, groups)) 3212 FAIL; 3213 code += skip-2; /* Position after JUMP, at <skipno> */ 3214 GET_SKIP; 3215 if (!_validate_inner(code, code+skip-1, groups)) 3216 FAIL; 3217 code += skip-1; 3218 } 3219 else { 3220 VTRACE(("only a then part present\n")); 3221 if (!_validate_inner(code+1, code+skip-1, groups)) 3222 FAIL; 3223 code += skip-1; 3224 } 3225 break; 3226 3227 case SRE_OP_ASSERT: 3228 case SRE_OP_ASSERT_NOT: 3229 GET_SKIP; 3230 GET_ARG; /* 0 for lookahead, width for lookbehind */ 3231 code--; /* Back up over arg to simplify math below */ 3232 if (arg & 0x80000000) 3233 FAIL; /* Width too large */ 3234 /* Stop 1 before the end; we check the SUCCESS below */ 3235 if (!_validate_inner(code+1, code+skip-2, groups)) 3236 FAIL; 3237 code += skip-2; 3238 GET_OP; 3239 if (op != SRE_OP_SUCCESS) 3240 FAIL; 3241 break; 3242 3243 default: 3244 FAIL; 3245 3246 } 3247 } 3248 3249 VTRACE(("okay\n")); 3250 return 1; 3251 } 3252 3253 static int 3254 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 3255 { 3256 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) 3257 FAIL; 3258 if (groups == 0) /* fix for simplejson */ 3259 groups = 100; /* 100 groups should always be safe */ 3260 return _validate_inner(code, end-1, groups); 3261 } 3262 3263 static int 3264 _validate(PatternObject *self) 3265 { 3266 if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) 3267 { 3268 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); 3269 return 0; 3270 } 3271 else 3272 VTRACE(("Success!\n")); 3273 return 1; 3274 } 3275 3276 /* -------------------------------------------------------------------- */ 3277 /* match methods */ 3278 3279 static void 3280 match_dealloc(MatchObject* self) 3281 { 3282 Py_XDECREF(self->regs); 3283 Py_XDECREF(self->string); 3284 Py_DECREF(self->pattern); 3285 PyObject_DEL(self); 3286 } 3287 3288 static PyObject* 3289 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) 3290 { 3291 if (index < 0 || index >= self->groups) { 3292 /* raise IndexError if we were given a bad group number */ 3293 PyErr_SetString( 3294 PyExc_IndexError, 3295 "no such group" 3296 ); 3297 return NULL; 3298 } 3299 3300 index *= 2; 3301 3302 if (self->string == Py_None || self->mark[index] < 0) { 3303 /* return default value if the string or group is undefined */ 3304 Py_INCREF(def); 3305 return def; 3306 } 3307 3308 return PySequence_GetSlice( 3309 self->string, self->mark[index], self->mark[index+1] 3310 ); 3311 } 3312 3313 static Py_ssize_t 3314 match_getindex(MatchObject* self, PyObject* index) 3315 { 3316 Py_ssize_t i; 3317 3318 if (PyInt_Check(index) || PyLong_Check(index)) 3319 return PyInt_AsSsize_t(index); 3320 3321 i = -1; 3322 3323 if (self->pattern->groupindex) { 3324 index = PyObject_GetItem(self->pattern->groupindex, index); 3325 if (index) { 3326 if (PyInt_Check(index) || PyLong_Check(index)) 3327 i = PyInt_AsSsize_t(index); 3328 Py_DECREF(index); 3329 } else 3330 PyErr_Clear(); 3331 } 3332 3333 return i; 3334 } 3335 3336 static PyObject* 3337 match_getslice(MatchObject* self, PyObject* index, PyObject* def) 3338 { 3339 return match_getslice_by_index(self, match_getindex(self, index), def); 3340 } 3341 3342 static PyObject* 3343 match_expand(MatchObject* self, PyObject* ptemplate) 3344 { 3345 /* delegate to Python code */ 3346 return call( 3347 SRE_PY_MODULE, "_expand", 3348 PyTuple_Pack(3, self->pattern, self, ptemplate) 3349 ); 3350 } 3351 3352 static PyObject* 3353 match_group(MatchObject* self, PyObject* args) 3354 { 3355 PyObject* result; 3356 Py_ssize_t i, size; 3357 3358 size = PyTuple_GET_SIZE(args); 3359 3360 switch (size) { 3361 case 0: 3362 result = match_getslice(self, Py_False, Py_None); 3363 break; 3364 case 1: 3365 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); 3366 break; 3367 default: 3368 /* fetch multiple items */ 3369 result = PyTuple_New(size); 3370 if (!result) 3371 return NULL; 3372 for (i = 0; i < size; i++) { 3373 PyObject* item = match_getslice( 3374 self, PyTuple_GET_ITEM(args, i), Py_None 3375 ); 3376 if (!item) { 3377 Py_DECREF(result); 3378 return NULL; 3379 } 3380 PyTuple_SET_ITEM(result, i, item); 3381 } 3382 break; 3383 } 3384 return result; 3385 } 3386 3387 static PyObject* 3388 match_groups(MatchObject* self, PyObject* args, PyObject* kw) 3389 { 3390 PyObject* result; 3391 Py_ssize_t index; 3392 3393 PyObject* def = Py_None; 3394 static char* kwlist[] = { "default", NULL }; 3395 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def)) 3396 return NULL; 3397 3398 result = PyTuple_New(self->groups-1); 3399 if (!result) 3400 return NULL; 3401 3402 for (index = 1; index < self->groups; index++) { 3403 PyObject* item; 3404 item = match_getslice_by_index(self, index, def); 3405 if (!item) { 3406 Py_DECREF(result); 3407 return NULL; 3408 } 3409 PyTuple_SET_ITEM(result, index-1, item); 3410 } 3411 3412 return result; 3413 } 3414 3415 static PyObject* 3416 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw) 3417 { 3418 PyObject* result; 3419 PyObject* keys; 3420 Py_ssize_t index; 3421 3422 PyObject* def = Py_None; 3423 static char* kwlist[] = { "default", NULL }; 3424 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def)) 3425 return NULL; 3426 3427 result = PyDict_New(); 3428 if (!result || !self->pattern->groupindex) 3429 return result; 3430 3431 keys = PyMapping_Keys(self->pattern->groupindex); 3432 if (!keys) 3433 goto failed; 3434 3435 for (index = 0; index < PyList_GET_SIZE(keys); index++) { 3436 int status; 3437 PyObject* key; 3438 PyObject* value; 3439 key = PyList_GET_ITEM(keys, index); 3440 if (!key) 3441 goto failed; 3442 value = match_getslice(self, key, def); 3443 if (!value) { 3444 Py_DECREF(key); 3445 goto failed; 3446 } 3447 status = PyDict_SetItem(result, key, value); 3448 Py_DECREF(value); 3449 if (status < 0) 3450 goto failed; 3451 } 3452 3453 Py_DECREF(keys); 3454 3455 return result; 3456 3457 failed: 3458 Py_XDECREF(keys); 3459 Py_DECREF(result); 3460 return NULL; 3461 } 3462 3463 static PyObject* 3464 match_start(MatchObject* self, PyObject* args) 3465 { 3466 Py_ssize_t index; 3467 3468 PyObject* index_ = Py_False; /* zero */ 3469 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_)) 3470 return NULL; 3471 3472 index = match_getindex(self, index_); 3473 3474 if (index < 0 || index >= self->groups) { 3475 PyErr_SetString( 3476 PyExc_IndexError, 3477 "no such group" 3478 ); 3479 return NULL; 3480 } 3481 3482 /* mark is -1 if group is undefined */ 3483 return PyInt_FromSsize_t(self->mark[index*2]); 3484 } 3485 3486 static PyObject* 3487 match_end(MatchObject* self, PyObject* args) 3488 { 3489 Py_ssize_t index; 3490 3491 PyObject* index_ = Py_False; /* zero */ 3492 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_)) 3493 return NULL; 3494 3495 index = match_getindex(self, index_); 3496 3497 if (index < 0 || index >= self->groups) { 3498 PyErr_SetString( 3499 PyExc_IndexError, 3500 "no such group" 3501 ); 3502 return NULL; 3503 } 3504 3505 /* mark is -1 if group is undefined */ 3506 return PyInt_FromSsize_t(self->mark[index*2+1]); 3507 } 3508 3509 LOCAL(PyObject*) 3510 _pair(Py_ssize_t i1, Py_ssize_t i2) 3511 { 3512 PyObject* pair; 3513 PyObject* item; 3514 3515 pair = PyTuple_New(2); 3516 if (!pair) 3517 return NULL; 3518 3519 item = PyInt_FromSsize_t(i1); 3520 if (!item) 3521 goto error; 3522 PyTuple_SET_ITEM(pair, 0, item); 3523 3524 item = PyInt_FromSsize_t(i2); 3525 if (!item) 3526 goto error; 3527 PyTuple_SET_ITEM(pair, 1, item); 3528 3529 return pair; 3530 3531 error: 3532 Py_DECREF(pair); 3533 return NULL; 3534 } 3535 3536 static PyObject* 3537 match_span(MatchObject* self, PyObject* args) 3538 { 3539 Py_ssize_t index; 3540 3541 PyObject* index_ = Py_False; /* zero */ 3542 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_)) 3543 return NULL; 3544 3545 index = match_getindex(self, index_); 3546 3547 if (index < 0 || index >= self->groups) { 3548 PyErr_SetString( 3549 PyExc_IndexError, 3550 "no such group" 3551 ); 3552 return NULL; 3553 } 3554 3555 /* marks are -1 if group is undefined */ 3556 return _pair(self->mark[index*2], self->mark[index*2+1]); 3557 } 3558 3559 static PyObject* 3560 match_regs(MatchObject* self) 3561 { 3562 PyObject* regs; 3563 PyObject* item; 3564 Py_ssize_t index; 3565 3566 regs = PyTuple_New(self->groups); 3567 if (!regs) 3568 return NULL; 3569 3570 for (index = 0; index < self->groups; index++) { 3571 item = _pair(self->mark[index*2], self->mark[index*2+1]); 3572 if (!item) { 3573 Py_DECREF(regs); 3574 return NULL; 3575 } 3576 PyTuple_SET_ITEM(regs, index, item); 3577 } 3578 3579 Py_INCREF(regs); 3580 self->regs = regs; 3581 3582 return regs; 3583 } 3584 3585 static PyObject* 3586 match_copy(MatchObject* self, PyObject *unused) 3587 { 3588 #ifdef USE_BUILTIN_COPY 3589 MatchObject* copy; 3590 Py_ssize_t slots, offset; 3591 3592 slots = 2 * (self->pattern->groups+1); 3593 3594 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots); 3595 if (!copy) 3596 return NULL; 3597 3598 /* this value a constant, but any compiler should be able to 3599 figure that out all by itself */ 3600 offset = offsetof(MatchObject, string); 3601 3602 Py_XINCREF(self->pattern); 3603 Py_XINCREF(self->string); 3604 Py_XINCREF(self->regs); 3605 3606 memcpy((char*) copy + offset, (char*) self + offset, 3607 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); 3608 3609 return (PyObject*) copy; 3610 #else 3611 PyErr_SetString(PyExc_TypeError, "cannot copy this match object"); 3612 return NULL; 3613 #endif 3614 } 3615 3616 static PyObject* 3617 match_deepcopy(MatchObject* self, PyObject* memo) 3618 { 3619 #ifdef USE_BUILTIN_COPY 3620 MatchObject* copy; 3621 3622 copy = (MatchObject*) match_copy(self); 3623 if (!copy) 3624 return NULL; 3625 3626 if (!deepcopy((PyObject**) ©->pattern, memo) || 3627 !deepcopy(©->string, memo) || 3628 !deepcopy(©->regs, memo)) { 3629 Py_DECREF(copy); 3630 return NULL; 3631 } 3632 3633 #else 3634 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object"); 3635 return NULL; 3636 #endif 3637 } 3638 3639 PyDoc_STRVAR(match_doc, 3640 "The result of re.match() and re.search().\n\ 3641 Match objects always have a boolean value of True."); 3642 3643 PyDoc_STRVAR(match_group_doc, 3644 "group([group1, ...]) -> str or tuple.\n\ 3645 Return subgroup(s) of the match by indices or names.\n\ 3646 For 0 returns the entire match."); 3647 3648 PyDoc_STRVAR(match_start_doc, 3649 "start([group=0]) -> int.\n\ 3650 Return index of the start of the substring matched by group."); 3651 3652 PyDoc_STRVAR(match_end_doc, 3653 "end([group=0]) -> int.\n\ 3654 Return index of the end of the substring matched by group."); 3655 3656 PyDoc_STRVAR(match_span_doc, 3657 "span([group]) -> tuple.\n\ 3658 For MatchObject m, return the 2-tuple (m.start(group), m.end(group))."); 3659 3660 PyDoc_STRVAR(match_groups_doc, 3661 "groups([default=None]) -> tuple.\n\ 3662 Return a tuple containing all the subgroups of the match, from 1.\n\ 3663 The default argument is used for groups\n\ 3664 that did not participate in the match"); 3665 3666 PyDoc_STRVAR(match_groupdict_doc, 3667 "groupdict([default=None]) -> dict.\n\ 3668 Return a dictionary containing all the named subgroups of the match,\n\ 3669 keyed by the subgroup name. The default argument is used for groups\n\ 3670 that did not participate in the match"); 3671 3672 PyDoc_STRVAR(match_expand_doc, 3673 "expand(template) -> str.\n\ 3674 Return the string obtained by doing backslash substitution\n\ 3675 on the string template, as done by the sub() method."); 3676 3677 static PyMethodDef match_methods[] = { 3678 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, 3679 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc}, 3680 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc}, 3681 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc}, 3682 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS, 3683 match_groups_doc}, 3684 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS, 3685 match_groupdict_doc}, 3686 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc}, 3687 {"__copy__", (PyCFunction) match_copy, METH_NOARGS}, 3688 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O}, 3689 {NULL, NULL} 3690 }; 3691 3692 static PyObject * 3693 match_lastindex_get(MatchObject *self) 3694 { 3695 if (self->lastindex >= 0) 3696 return PyInt_FromSsize_t(self->lastindex); 3697 Py_INCREF(Py_None); 3698 return Py_None; 3699 } 3700 3701 static PyObject * 3702 match_lastgroup_get(MatchObject *self) 3703 { 3704 if (self->pattern->indexgroup && self->lastindex >= 0) { 3705 PyObject* result = PySequence_GetItem( 3706 self->pattern->indexgroup, self->lastindex 3707 ); 3708 if (result) 3709 return result; 3710 PyErr_Clear(); 3711 } 3712 Py_INCREF(Py_None); 3713 return Py_None; 3714 } 3715 3716 static PyObject * 3717 match_regs_get(MatchObject *self) 3718 { 3719 if (self->regs) { 3720 Py_INCREF(self->regs); 3721 return self->regs; 3722 } else 3723 return match_regs(self); 3724 } 3725 3726 static PyGetSetDef match_getset[] = { 3727 {"lastindex", (getter)match_lastindex_get, (setter)NULL}, 3728 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, 3729 {"regs", (getter)match_regs_get, (setter)NULL}, 3730 {NULL} 3731 }; 3732 3733 #define MATCH_OFF(x) offsetof(MatchObject, x) 3734 static PyMemberDef match_members[] = { 3735 {"string", T_OBJECT, MATCH_OFF(string), READONLY}, 3736 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, 3737 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, 3738 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, 3739 {NULL} 3740 }; 3741 3742 3743 /* FIXME: implement setattr("string", None) as a special case (to 3744 detach the associated string, if any */ 3745 3746 static PyTypeObject Match_Type = { 3747 PyVarObject_HEAD_INIT(NULL, 0) 3748 "_" SRE_MODULE ".SRE_Match", 3749 sizeof(MatchObject), sizeof(Py_ssize_t), 3750 (destructor)match_dealloc, /* tp_dealloc */ 3751 0, /* tp_print */ 3752 0, /* tp_getattr */ 3753 0, /* tp_setattr */ 3754 0, /* tp_compare */ 3755 0, /* tp_repr */ 3756 0, /* tp_as_number */ 3757 0, /* tp_as_sequence */ 3758 0, /* tp_as_mapping */ 3759 0, /* tp_hash */ 3760 0, /* tp_call */ 3761 0, /* tp_str */ 3762 0, /* tp_getattro */ 3763 0, /* tp_setattro */ 3764 0, /* tp_as_buffer */ 3765 Py_TPFLAGS_DEFAULT, 3766 match_doc, /* tp_doc */ 3767 0, /* tp_traverse */ 3768 0, /* tp_clear */ 3769 0, /* tp_richcompare */ 3770 0, /* tp_weaklistoffset */ 3771 0, /* tp_iter */ 3772 0, /* tp_iternext */ 3773 match_methods, /* tp_methods */ 3774 match_members, /* tp_members */ 3775 match_getset, /* tp_getset */ 3776 }; 3777 3778 static PyObject* 3779 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status) 3780 { 3781 /* create match object (from state object) */ 3782 3783 MatchObject* match; 3784 Py_ssize_t i, j; 3785 char* base; 3786 int n; 3787 3788 if (status > 0) { 3789 3790 /* create match object (with room for extra group marks) */ 3791 /* coverity[ampersand_in_size] */ 3792 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 3793 2*(pattern->groups+1)); 3794 if (!match) 3795 return NULL; 3796 3797 Py_INCREF(pattern); 3798 match->pattern = pattern; 3799 3800 Py_INCREF(state->string); 3801 match->string = state->string; 3802 3803 match->regs = NULL; 3804 match->groups = pattern->groups+1; 3805 3806 /* fill in group slices */ 3807 3808 base = (char*) state->beginning; 3809 n = state->charsize; 3810 3811 match->mark[0] = ((char*) state->start - base) / n; 3812 match->mark[1] = ((char*) state->ptr - base) / n; 3813 3814 for (i = j = 0; i < pattern->groups; i++, j+=2) 3815 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { 3816 match->mark[j+2] = ((char*) state->mark[j] - base) / n; 3817 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; 3818 } else 3819 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ 3820 3821 match->pos = state->pos; 3822 match->endpos = state->endpos; 3823 3824 match->lastindex = state->lastindex; 3825 3826 return (PyObject*) match; 3827 3828 } else if (status == 0) { 3829 3830 /* no match */ 3831 Py_INCREF(Py_None); 3832 return Py_None; 3833 3834 } 3835 3836 /* internal error */ 3837 pattern_error(status); 3838 return NULL; 3839 } 3840 3841 3842 /* -------------------------------------------------------------------- */ 3843 /* scanner methods (experimental) */ 3844 3845 static void 3846 scanner_dealloc(ScannerObject* self) 3847 { 3848 state_fini(&self->state); 3849 Py_XDECREF(self->pattern); 3850 PyObject_DEL(self); 3851 } 3852 3853 static PyObject* 3854 scanner_match(ScannerObject* self, PyObject *unused) 3855 { 3856 SRE_STATE* state = &self->state; 3857 PyObject* match; 3858 int status; 3859 3860 state_reset(state); 3861 3862 state->ptr = state->start; 3863 3864 if (state->charsize == 1) { 3865 status = sre_match(state, PatternObject_GetCode(self->pattern)); 3866 } else { 3867 #if defined(HAVE_UNICODE) 3868 status = sre_umatch(state, PatternObject_GetCode(self->pattern)); 3869 #endif 3870 } 3871 if (PyErr_Occurred()) 3872 return NULL; 3873 3874 match = pattern_new_match((PatternObject*) self->pattern, 3875 state, status); 3876 3877 if (status == 0 || state->ptr == state->start) 3878 state->start = (void*) ((char*) state->ptr + state->charsize); 3879 else 3880 state->start = state->ptr; 3881 3882 return match; 3883 } 3884 3885 3886 static PyObject* 3887 scanner_search(ScannerObject* self, PyObject *unused) 3888 { 3889 SRE_STATE* state = &self->state; 3890 PyObject* match; 3891 int status; 3892 3893 state_reset(state); 3894 3895 state->ptr = state->start; 3896 3897 if (state->charsize == 1) { 3898 status = sre_search(state, PatternObject_GetCode(self->pattern)); 3899 } else { 3900 #if defined(HAVE_UNICODE) 3901 status = sre_usearch(state, PatternObject_GetCode(self->pattern)); 3902 #endif 3903 } 3904 if (PyErr_Occurred()) 3905 return NULL; 3906 3907 match = pattern_new_match((PatternObject*) self->pattern, 3908 state, status); 3909 3910 if (status == 0 || state->ptr == state->start) 3911 state->start = (void*) ((char*) state->ptr + state->charsize); 3912 else 3913 state->start = state->ptr; 3914 3915 return match; 3916 } 3917 3918 static PyMethodDef scanner_methods[] = { 3919 {"match", (PyCFunction) scanner_match, METH_NOARGS}, 3920 {"search", (PyCFunction) scanner_search, METH_NOARGS}, 3921 {NULL, NULL} 3922 }; 3923 3924 #define SCAN_OFF(x) offsetof(ScannerObject, x) 3925 static PyMemberDef scanner_members[] = { 3926 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, 3927 {NULL} /* Sentinel */ 3928 }; 3929 3930 statichere PyTypeObject Scanner_Type = { 3931 PyObject_HEAD_INIT(NULL) 3932 0, "_" SRE_MODULE ".SRE_Scanner", 3933 sizeof(ScannerObject), 0, 3934 (destructor)scanner_dealloc, /*tp_dealloc*/ 3935 0, /* tp_print */ 3936 0, /* tp_getattr */ 3937 0, /* tp_setattr */ 3938 0, /* tp_reserved */ 3939 0, /* tp_repr */ 3940 0, /* tp_as_number */ 3941 0, /* tp_as_sequence */ 3942 0, /* tp_as_mapping */ 3943 0, /* tp_hash */ 3944 0, /* tp_call */ 3945 0, /* tp_str */ 3946 0, /* tp_getattro */ 3947 0, /* tp_setattro */ 3948 0, /* tp_as_buffer */ 3949 Py_TPFLAGS_DEFAULT, /* tp_flags */ 3950 0, /* tp_doc */ 3951 0, /* tp_traverse */ 3952 0, /* tp_clear */ 3953 0, /* tp_richcompare */ 3954 0, /* tp_weaklistoffset */ 3955 0, /* tp_iter */ 3956 0, /* tp_iternext */ 3957 scanner_methods, /* tp_methods */ 3958 scanner_members, /* tp_members */ 3959 0, /* tp_getset */ 3960 }; 3961 3962 static PyObject* 3963 pattern_scanner(PatternObject* pattern, PyObject* args) 3964 { 3965 /* create search state object */ 3966 3967 ScannerObject* self; 3968 3969 PyObject* string; 3970 Py_ssize_t start = 0; 3971 Py_ssize_t end = PY_SSIZE_T_MAX; 3972 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end)) 3973 return NULL; 3974 3975 /* create scanner object */ 3976 self = PyObject_NEW(ScannerObject, &Scanner_Type); 3977 if (!self) 3978 return NULL; 3979 self->pattern = NULL; 3980 3981 string = state_init(&self->state, pattern, string, start, end); 3982 if (!string) { 3983 Py_DECREF(self); 3984 return NULL; 3985 } 3986 3987 Py_INCREF(pattern); 3988 self->pattern = (PyObject*) pattern; 3989 3990 return (PyObject*) self; 3991 } 3992 3993 static PyMethodDef _functions[] = { 3994 {"compile", _compile, METH_VARARGS}, 3995 {"getcodesize", sre_codesize, METH_NOARGS}, 3996 {"getlower", sre_getlower, METH_VARARGS}, 3997 {NULL, NULL} 3998 }; 3999 4000 #if PY_VERSION_HEX < 0x02030000 4001 DL_EXPORT(void) init_sre(void) 4002 #else 4003 PyMODINIT_FUNC init_sre(void) 4004 #endif 4005 { 4006 PyObject* m; 4007 PyObject* d; 4008 PyObject* x; 4009 4010 /* Patch object types */ 4011 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) || 4012 PyType_Ready(&Scanner_Type)) 4013 return; 4014 4015 m = Py_InitModule("_" SRE_MODULE, _functions); 4016 if (m == NULL) 4017 return; 4018 d = PyModule_GetDict(m); 4019 4020 x = PyInt_FromLong(SRE_MAGIC); 4021 if (x) { 4022 PyDict_SetItemString(d, "MAGIC", x); 4023 Py_DECREF(x); 4024 } 4025 4026 x = PyInt_FromLong(sizeof(SRE_CODE)); 4027 if (x) { 4028 PyDict_SetItemString(d, "CODESIZE", x); 4029 Py_DECREF(x); 4030 } 4031 4032 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT); 4033 if (x) { 4034 PyDict_SetItemString(d, "MAXREPEAT", x); 4035 Py_DECREF(x); 4036 } 4037 4038 x = PyString_FromString(copyright); 4039 if (x) { 4040 PyDict_SetItemString(d, "copyright", x); 4041 Py_DECREF(x); 4042 } 4043 } 4044 4045 #endif /* !defined(SRE_RECURSIVE) */ 4046 4047 /* vim:ts=4:sw=4:et 4048 */ 4049