1 /* 2 * Secret Labs' Regular Expression Engine 3 * 4 * regular expression matching engine 5 * 6 * partial history: 7 * 1999-10-24 fl created (based on existing template matcher code) 8 * 2000-03-06 fl first alpha, sort of 9 * 2000-08-01 fl fixes for 1.6b1 10 * 2000-08-07 fl use PyOS_CheckStack() if available 11 * 2000-09-20 fl added expand method 12 * 2001-03-20 fl lots of fixes for 2.1b2 13 * 2001-04-15 fl export copyright as Python attribute, not global 14 * 2001-04-28 fl added __copy__ methods (work in progress) 15 * 2001-05-14 fl fixes for 1.5.2 compatibility 16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) 17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) 18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 19 * 2001-10-21 fl added sub/subn primitive 20 * 2001-10-24 fl added finditer primitive (for 2.2 only) 21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) 22 * 2002-11-09 fl fixed empty sub/subn return type 23 * 2003-04-18 mvl fully support 4-byte codes 24 * 2003-10-17 gn implemented non recursive scheme 25 * 2013-02-04 mrab added fullmatch primitive 26 * 27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 28 * 29 * This version of the SRE library can be redistributed under CNRI's 30 * Python 1.6 license. For any other use, please contact Secret Labs 31 * AB (info (at) pythonware.com). 32 * 33 * Portions of this engine have been developed in cooperation with 34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and 35 * other compatibility work. 36 */ 37 38 static const char copyright[] = 39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; 40 41 #define PY_SSIZE_T_CLEAN 42 43 #include "Python.h" 44 #include "structmember.h" /* offsetof */ 45 46 #include "sre.h" 47 48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) 49 50 #include <ctype.h> 51 52 /* name of this module, minus the leading underscore */ 53 #if !defined(SRE_MODULE) 54 #define SRE_MODULE "sre" 55 #endif 56 57 #define SRE_PY_MODULE "re" 58 59 /* defining this one enables tracing */ 60 #undef VERBOSE 61 62 /* -------------------------------------------------------------------- */ 63 /* optional features */ 64 65 /* enables copy/deepcopy handling (work in progress) */ 66 #undef USE_BUILTIN_COPY 67 68 /* -------------------------------------------------------------------- */ 69 70 #if defined(_MSC_VER) 71 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */ 72 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ 73 /* fastest possible local call under MSVC */ 74 #define LOCAL(type) static __inline type __fastcall 75 #elif defined(USE_INLINE) 76 #define LOCAL(type) static inline type 77 #else 78 #define LOCAL(type) static type 79 #endif 80 81 /* error codes */ 82 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ 83 #define SRE_ERROR_STATE -2 /* illegal state */ 84 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ 85 #define SRE_ERROR_MEMORY -9 /* out of memory */ 86 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ 87 88 #if defined(VERBOSE) 89 #define TRACE(v) printf v 90 #else 91 #define TRACE(v) 92 #endif 93 94 /* -------------------------------------------------------------------- */ 95 /* search engine state */ 96 97 #define SRE_IS_DIGIT(ch)\ 98 ((ch) < 128 && Py_ISDIGIT(ch)) 99 #define SRE_IS_SPACE(ch)\ 100 ((ch) < 128 && Py_ISSPACE(ch)) 101 #define SRE_IS_LINEBREAK(ch)\ 102 ((ch) == '\n') 103 #define SRE_IS_ALNUM(ch)\ 104 ((ch) < 128 && Py_ISALNUM(ch)) 105 #define SRE_IS_WORD(ch)\ 106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_')) 107 108 static unsigned int sre_lower(unsigned int ch) 109 { 110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch); 111 } 112 113 static unsigned int sre_upper(unsigned int ch) 114 { 115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch); 116 } 117 118 /* locale-specific character predicates */ 119 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids 120 * warnings when c's type supports only numbers < N+1 */ 121 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) 122 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') 123 124 static unsigned int sre_lower_locale(unsigned int ch) 125 { 126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); 127 } 128 129 static unsigned int sre_upper_locale(unsigned int ch) 130 { 131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); 132 } 133 134 /* unicode-specific character predicates */ 135 136 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) 137 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) 138 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) 139 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) 140 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') 141 142 static unsigned int sre_lower_unicode(unsigned int ch) 143 { 144 return (unsigned int) Py_UNICODE_TOLOWER(ch); 145 } 146 147 static unsigned int sre_upper_unicode(unsigned int ch) 148 { 149 return (unsigned int) Py_UNICODE_TOUPPER(ch); 150 } 151 152 LOCAL(int) 153 sre_category(SRE_CODE category, unsigned int ch) 154 { 155 switch (category) { 156 157 case SRE_CATEGORY_DIGIT: 158 return SRE_IS_DIGIT(ch); 159 case SRE_CATEGORY_NOT_DIGIT: 160 return !SRE_IS_DIGIT(ch); 161 case SRE_CATEGORY_SPACE: 162 return SRE_IS_SPACE(ch); 163 case SRE_CATEGORY_NOT_SPACE: 164 return !SRE_IS_SPACE(ch); 165 case SRE_CATEGORY_WORD: 166 return SRE_IS_WORD(ch); 167 case SRE_CATEGORY_NOT_WORD: 168 return !SRE_IS_WORD(ch); 169 case SRE_CATEGORY_LINEBREAK: 170 return SRE_IS_LINEBREAK(ch); 171 case SRE_CATEGORY_NOT_LINEBREAK: 172 return !SRE_IS_LINEBREAK(ch); 173 174 case SRE_CATEGORY_LOC_WORD: 175 return SRE_LOC_IS_WORD(ch); 176 case SRE_CATEGORY_LOC_NOT_WORD: 177 return !SRE_LOC_IS_WORD(ch); 178 179 case SRE_CATEGORY_UNI_DIGIT: 180 return SRE_UNI_IS_DIGIT(ch); 181 case SRE_CATEGORY_UNI_NOT_DIGIT: 182 return !SRE_UNI_IS_DIGIT(ch); 183 case SRE_CATEGORY_UNI_SPACE: 184 return SRE_UNI_IS_SPACE(ch); 185 case SRE_CATEGORY_UNI_NOT_SPACE: 186 return !SRE_UNI_IS_SPACE(ch); 187 case SRE_CATEGORY_UNI_WORD: 188 return SRE_UNI_IS_WORD(ch); 189 case SRE_CATEGORY_UNI_NOT_WORD: 190 return !SRE_UNI_IS_WORD(ch); 191 case SRE_CATEGORY_UNI_LINEBREAK: 192 return SRE_UNI_IS_LINEBREAK(ch); 193 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 194 return !SRE_UNI_IS_LINEBREAK(ch); 195 } 196 return 0; 197 } 198 199 /* helpers */ 200 201 static void 202 data_stack_dealloc(SRE_STATE* state) 203 { 204 if (state->data_stack) { 205 PyMem_FREE(state->data_stack); 206 state->data_stack = NULL; 207 } 208 state->data_stack_size = state->data_stack_base = 0; 209 } 210 211 static int 212 data_stack_grow(SRE_STATE* state, Py_ssize_t size) 213 { 214 Py_ssize_t minsize, cursize; 215 minsize = state->data_stack_base+size; 216 cursize = state->data_stack_size; 217 if (cursize < minsize) { 218 void* stack; 219 cursize = minsize+minsize/4+1024; 220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize)); 221 stack = PyMem_REALLOC(state->data_stack, cursize); 222 if (!stack) { 223 data_stack_dealloc(state); 224 return SRE_ERROR_MEMORY; 225 } 226 state->data_stack = (char *)stack; 227 state->data_stack_size = cursize; 228 } 229 return 0; 230 } 231 232 /* generate 8-bit version */ 233 234 #define SRE_CHAR Py_UCS1 235 #define SIZEOF_SRE_CHAR 1 236 #define SRE(F) sre_ucs1_##F 237 #include "sre_lib.h" 238 239 /* generate 16-bit unicode version */ 240 241 #define SRE_CHAR Py_UCS2 242 #define SIZEOF_SRE_CHAR 2 243 #define SRE(F) sre_ucs2_##F 244 #include "sre_lib.h" 245 246 /* generate 32-bit unicode version */ 247 248 #define SRE_CHAR Py_UCS4 249 #define SIZEOF_SRE_CHAR 4 250 #define SRE(F) sre_ucs4_##F 251 #include "sre_lib.h" 252 253 /* -------------------------------------------------------------------- */ 254 /* factories and destructors */ 255 256 /* see sre.h for object declarations */ 257 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t); 258 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); 259 260 261 /*[clinic input] 262 module _sre 263 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type" 264 class _sre.SRE_Match "MatchObject *" "&Match_Type" 265 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type" 266 [clinic start generated code]*/ 267 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/ 268 269 static PyTypeObject Pattern_Type; 270 static PyTypeObject Match_Type; 271 static PyTypeObject Scanner_Type; 272 273 /*[clinic input] 274 _sre.getcodesize -> int 275 [clinic start generated code]*/ 276 277 static int 278 _sre_getcodesize_impl(PyObject *module) 279 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ 280 { 281 return sizeof(SRE_CODE); 282 } 283 284 /*[clinic input] 285 _sre.getlower -> int 286 287 character: int 288 flags: int 289 / 290 291 [clinic start generated code]*/ 292 293 static int 294 _sre_getlower_impl(PyObject *module, int character, int flags) 295 /*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/ 296 { 297 if (flags & SRE_FLAG_LOCALE) 298 return sre_lower_locale(character); 299 if (flags & SRE_FLAG_UNICODE) 300 return sre_lower_unicode(character); 301 return sre_lower(character); 302 } 303 304 LOCAL(void) 305 state_reset(SRE_STATE* state) 306 { 307 /* FIXME: dynamic! */ 308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ 309 310 state->lastmark = -1; 311 state->lastindex = -1; 312 313 state->repeat = NULL; 314 315 data_stack_dealloc(state); 316 } 317 318 static void* 319 getstring(PyObject* string, Py_ssize_t* p_length, 320 int* p_isbytes, int* p_charsize, 321 Py_buffer *view) 322 { 323 /* given a python object, return a data pointer, a length (in 324 characters), and a character size. return NULL if the object 325 is not a string (or not compatible) */ 326 327 /* Unicode objects do not support the buffer API. So, get the data 328 directly instead. */ 329 if (PyUnicode_Check(string)) { 330 if (PyUnicode_READY(string) == -1) 331 return NULL; 332 *p_length = PyUnicode_GET_LENGTH(string); 333 *p_charsize = PyUnicode_KIND(string); 334 *p_isbytes = 0; 335 return PyUnicode_DATA(string); 336 } 337 338 /* get pointer to byte string buffer */ 339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { 340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object"); 341 return NULL; 342 } 343 344 *p_length = view->len; 345 *p_charsize = 1; 346 *p_isbytes = 1; 347 348 if (view->buf == NULL) { 349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); 350 PyBuffer_Release(view); 351 view->buf = NULL; 352 return NULL; 353 } 354 return view->buf; 355 } 356 357 LOCAL(PyObject*) 358 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, 359 Py_ssize_t start, Py_ssize_t end) 360 { 361 /* prepare state object */ 362 363 Py_ssize_t length; 364 int isbytes, charsize; 365 void* ptr; 366 367 memset(state, 0, sizeof(SRE_STATE)); 368 369 state->mark = PyMem_New(void *, pattern->groups * 2); 370 if (!state->mark) { 371 PyErr_NoMemory(); 372 goto err; 373 } 374 state->lastmark = -1; 375 state->lastindex = -1; 376 377 state->buffer.buf = NULL; 378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); 379 if (!ptr) 380 goto err; 381 382 if (isbytes && pattern->isbytes == 0) { 383 PyErr_SetString(PyExc_TypeError, 384 "cannot use a string pattern on a bytes-like object"); 385 goto err; 386 } 387 if (!isbytes && pattern->isbytes > 0) { 388 PyErr_SetString(PyExc_TypeError, 389 "cannot use a bytes pattern on a string-like object"); 390 goto err; 391 } 392 393 /* adjust boundaries */ 394 if (start < 0) 395 start = 0; 396 else if (start > length) 397 start = length; 398 399 if (end < 0) 400 end = 0; 401 else if (end > length) 402 end = length; 403 404 state->isbytes = isbytes; 405 state->charsize = charsize; 406 407 state->beginning = ptr; 408 409 state->start = (void*) ((char*) ptr + start * state->charsize); 410 state->end = (void*) ((char*) ptr + end * state->charsize); 411 412 Py_INCREF(string); 413 state->string = string; 414 state->pos = start; 415 state->endpos = end; 416 417 if (pattern->flags & SRE_FLAG_LOCALE) { 418 state->lower = sre_lower_locale; 419 state->upper = sre_upper_locale; 420 } 421 else if (pattern->flags & SRE_FLAG_UNICODE) { 422 state->lower = sre_lower_unicode; 423 state->upper = sre_upper_unicode; 424 } 425 else { 426 state->lower = sre_lower; 427 state->upper = sre_upper; 428 } 429 430 return string; 431 err: 432 PyMem_Del(state->mark); 433 state->mark = NULL; 434 if (state->buffer.buf) 435 PyBuffer_Release(&state->buffer); 436 return NULL; 437 } 438 439 LOCAL(void) 440 state_fini(SRE_STATE* state) 441 { 442 if (state->buffer.buf) 443 PyBuffer_Release(&state->buffer); 444 Py_XDECREF(state->string); 445 data_stack_dealloc(state); 446 PyMem_Del(state->mark); 447 state->mark = NULL; 448 } 449 450 /* calculate offset from start of string */ 451 #define STATE_OFFSET(state, member)\ 452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) 453 454 LOCAL(PyObject*) 455 getslice(int isbytes, const void *ptr, 456 PyObject* string, Py_ssize_t start, Py_ssize_t end) 457 { 458 if (isbytes) { 459 if (PyBytes_CheckExact(string) && 460 start == 0 && end == PyBytes_GET_SIZE(string)) { 461 Py_INCREF(string); 462 return string; 463 } 464 return PyBytes_FromStringAndSize( 465 (const char *)ptr + start, end - start); 466 } 467 else { 468 return PyUnicode_Substring(string, start, end); 469 } 470 } 471 472 LOCAL(PyObject*) 473 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) 474 { 475 Py_ssize_t i, j; 476 477 index = (index - 1) * 2; 478 479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { 480 if (empty) 481 /* want empty string */ 482 i = j = 0; 483 else { 484 Py_INCREF(Py_None); 485 return Py_None; 486 } 487 } else { 488 i = STATE_OFFSET(state, state->mark[index]); 489 j = STATE_OFFSET(state, state->mark[index+1]); 490 } 491 492 return getslice(state->isbytes, state->beginning, string, i, j); 493 } 494 495 static void 496 pattern_error(Py_ssize_t status) 497 { 498 switch (status) { 499 case SRE_ERROR_RECURSION_LIMIT: 500 /* This error code seems to be unused. */ 501 PyErr_SetString( 502 PyExc_RecursionError, 503 "maximum recursion limit exceeded" 504 ); 505 break; 506 case SRE_ERROR_MEMORY: 507 PyErr_NoMemory(); 508 break; 509 case SRE_ERROR_INTERRUPTED: 510 /* An exception has already been raised, so let it fly */ 511 break; 512 default: 513 /* other error codes indicate compiler/engine bugs */ 514 PyErr_SetString( 515 PyExc_RuntimeError, 516 "internal error in regular expression engine" 517 ); 518 } 519 } 520 521 static void 522 pattern_dealloc(PatternObject* self) 523 { 524 if (self->weakreflist != NULL) 525 PyObject_ClearWeakRefs((PyObject *) self); 526 Py_XDECREF(self->pattern); 527 Py_XDECREF(self->groupindex); 528 Py_XDECREF(self->indexgroup); 529 PyObject_DEL(self); 530 } 531 532 LOCAL(Py_ssize_t) 533 sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all) 534 { 535 if (state->charsize == 1) 536 return sre_ucs1_match(state, pattern, match_all); 537 if (state->charsize == 2) 538 return sre_ucs2_match(state, pattern, match_all); 539 assert(state->charsize == 4); 540 return sre_ucs4_match(state, pattern, match_all); 541 } 542 543 LOCAL(Py_ssize_t) 544 sre_search(SRE_STATE* state, SRE_CODE* pattern) 545 { 546 if (state->charsize == 1) 547 return sre_ucs1_search(state, pattern); 548 if (state->charsize == 2) 549 return sre_ucs2_search(state, pattern); 550 assert(state->charsize == 4); 551 return sre_ucs4_search(state, pattern); 552 } 553 554 static PyObject * 555 fix_string_param(PyObject *string, PyObject *string2, const char *oldname) 556 { 557 if (string2 != NULL) { 558 if (string != NULL) { 559 PyErr_Format(PyExc_TypeError, 560 "Argument given by name ('%s') and position (1)", 561 oldname); 562 return NULL; 563 } 564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, 565 "The '%s' keyword parameter name is deprecated. " 566 "Use 'string' instead.", oldname) < 0) 567 return NULL; 568 return string2; 569 } 570 if (string == NULL) { 571 PyErr_SetString(PyExc_TypeError, 572 "Required argument 'string' (pos 1) not found"); 573 return NULL; 574 } 575 return string; 576 } 577 578 /*[clinic input] 579 _sre.SRE_Pattern.match 580 581 string: object = NULL 582 pos: Py_ssize_t = 0 583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 584 * 585 pattern: object = NULL 586 587 Matches zero or more characters at the beginning of the string. 588 [clinic start generated code]*/ 589 590 static PyObject * 591 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string, 592 Py_ssize_t pos, Py_ssize_t endpos, 593 PyObject *pattern) 594 /*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/ 595 { 596 SRE_STATE state; 597 Py_ssize_t status; 598 PyObject *match; 599 600 string = fix_string_param(string, pattern, "pattern"); 601 if (!string) 602 return NULL; 603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) 604 return NULL; 605 606 state.ptr = state.start; 607 608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); 609 610 status = sre_match(&state, PatternObject_GetCode(self), 0); 611 612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 613 if (PyErr_Occurred()) { 614 state_fini(&state); 615 return NULL; 616 } 617 618 match = pattern_new_match(self, &state, status); 619 state_fini(&state); 620 return match; 621 } 622 623 /*[clinic input] 624 _sre.SRE_Pattern.fullmatch 625 626 string: object = NULL 627 pos: Py_ssize_t = 0 628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 629 * 630 pattern: object = NULL 631 632 Matches against all of the string 633 [clinic start generated code]*/ 634 635 static PyObject * 636 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string, 637 Py_ssize_t pos, Py_ssize_t endpos, 638 PyObject *pattern) 639 /*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/ 640 { 641 SRE_STATE state; 642 Py_ssize_t status; 643 PyObject *match; 644 645 string = fix_string_param(string, pattern, "pattern"); 646 if (!string) 647 return NULL; 648 649 if (!state_init(&state, self, string, pos, endpos)) 650 return NULL; 651 652 state.ptr = state.start; 653 654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); 655 656 status = sre_match(&state, PatternObject_GetCode(self), 1); 657 658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 659 if (PyErr_Occurred()) { 660 state_fini(&state); 661 return NULL; 662 } 663 664 match = pattern_new_match(self, &state, status); 665 state_fini(&state); 666 return match; 667 } 668 669 /*[clinic input] 670 _sre.SRE_Pattern.search 671 672 string: object = NULL 673 pos: Py_ssize_t = 0 674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 675 * 676 pattern: object = NULL 677 678 Scan through string looking for a match, and return a corresponding match object instance. 679 680 Return None if no position in the string matches. 681 [clinic start generated code]*/ 682 683 static PyObject * 684 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string, 685 Py_ssize_t pos, Py_ssize_t endpos, 686 PyObject *pattern) 687 /*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/ 688 { 689 SRE_STATE state; 690 Py_ssize_t status; 691 PyObject *match; 692 693 string = fix_string_param(string, pattern, "pattern"); 694 if (!string) 695 return NULL; 696 697 if (!state_init(&state, self, string, pos, endpos)) 698 return NULL; 699 700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); 701 702 status = sre_search(&state, PatternObject_GetCode(self)); 703 704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 705 706 if (PyErr_Occurred()) { 707 state_fini(&state); 708 return NULL; 709 } 710 711 match = pattern_new_match(self, &state, status); 712 state_fini(&state); 713 return match; 714 } 715 716 static PyObject* 717 call(const char* module, const char* function, PyObject* args) 718 { 719 PyObject* name; 720 PyObject* mod; 721 PyObject* func; 722 PyObject* result; 723 724 if (!args) 725 return NULL; 726 name = PyUnicode_FromString(module); 727 if (!name) 728 return NULL; 729 mod = PyImport_Import(name); 730 Py_DECREF(name); 731 if (!mod) 732 return NULL; 733 func = PyObject_GetAttrString(mod, function); 734 Py_DECREF(mod); 735 if (!func) 736 return NULL; 737 result = PyObject_CallObject(func, args); 738 Py_DECREF(func); 739 Py_DECREF(args); 740 return result; 741 } 742 743 #ifdef USE_BUILTIN_COPY 744 static int 745 deepcopy(PyObject** object, PyObject* memo) 746 { 747 PyObject* copy; 748 749 copy = call( 750 "copy", "deepcopy", 751 PyTuple_Pack(2, *object, memo) 752 ); 753 if (!copy) 754 return 0; 755 756 Py_SETREF(*object, copy); 757 758 return 1; /* success */ 759 } 760 #endif 761 762 /*[clinic input] 763 _sre.SRE_Pattern.findall 764 765 string: object = NULL 766 pos: Py_ssize_t = 0 767 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 768 * 769 source: object = NULL 770 771 Return a list of all non-overlapping matches of pattern in string. 772 [clinic start generated code]*/ 773 774 static PyObject * 775 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, 776 Py_ssize_t pos, Py_ssize_t endpos, 777 PyObject *source) 778 /*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/ 779 { 780 SRE_STATE state; 781 PyObject* list; 782 Py_ssize_t status; 783 Py_ssize_t i, b, e; 784 785 string = fix_string_param(string, source, "source"); 786 if (!string) 787 return NULL; 788 789 if (!state_init(&state, self, string, pos, endpos)) 790 return NULL; 791 792 list = PyList_New(0); 793 if (!list) { 794 state_fini(&state); 795 return NULL; 796 } 797 798 while (state.start <= state.end) { 799 800 PyObject* item; 801 802 state_reset(&state); 803 804 state.ptr = state.start; 805 806 status = sre_search(&state, PatternObject_GetCode(self)); 807 if (PyErr_Occurred()) 808 goto error; 809 810 if (status <= 0) { 811 if (status == 0) 812 break; 813 pattern_error(status); 814 goto error; 815 } 816 817 /* don't bother to build a match object */ 818 switch (self->groups) { 819 case 0: 820 b = STATE_OFFSET(&state, state.start); 821 e = STATE_OFFSET(&state, state.ptr); 822 item = getslice(state.isbytes, state.beginning, 823 string, b, e); 824 if (!item) 825 goto error; 826 break; 827 case 1: 828 item = state_getslice(&state, 1, string, 1); 829 if (!item) 830 goto error; 831 break; 832 default: 833 item = PyTuple_New(self->groups); 834 if (!item) 835 goto error; 836 for (i = 0; i < self->groups; i++) { 837 PyObject* o = state_getslice(&state, i+1, string, 1); 838 if (!o) { 839 Py_DECREF(item); 840 goto error; 841 } 842 PyTuple_SET_ITEM(item, i, o); 843 } 844 break; 845 } 846 847 status = PyList_Append(list, item); 848 Py_DECREF(item); 849 if (status < 0) 850 goto error; 851 852 if (state.ptr == state.start) 853 state.start = (void*) ((char*) state.ptr + state.charsize); 854 else 855 state.start = state.ptr; 856 857 } 858 859 state_fini(&state); 860 return list; 861 862 error: 863 Py_DECREF(list); 864 state_fini(&state); 865 return NULL; 866 867 } 868 869 /*[clinic input] 870 _sre.SRE_Pattern.finditer 871 872 string: object 873 pos: Py_ssize_t = 0 874 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 875 876 Return an iterator over all non-overlapping matches for the RE pattern in string. 877 878 For each match, the iterator returns a match object. 879 [clinic start generated code]*/ 880 881 static PyObject * 882 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string, 883 Py_ssize_t pos, Py_ssize_t endpos) 884 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/ 885 { 886 PyObject* scanner; 887 PyObject* search; 888 PyObject* iterator; 889 890 scanner = pattern_scanner(self, string, pos, endpos); 891 if (!scanner) 892 return NULL; 893 894 search = PyObject_GetAttrString(scanner, "search"); 895 Py_DECREF(scanner); 896 if (!search) 897 return NULL; 898 899 iterator = PyCallIter_New(search, Py_None); 900 Py_DECREF(search); 901 902 return iterator; 903 } 904 905 /*[clinic input] 906 _sre.SRE_Pattern.scanner 907 908 string: object 909 pos: Py_ssize_t = 0 910 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 911 912 [clinic start generated code]*/ 913 914 static PyObject * 915 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string, 916 Py_ssize_t pos, Py_ssize_t endpos) 917 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/ 918 { 919 return pattern_scanner(self, string, pos, endpos); 920 } 921 922 /*[clinic input] 923 _sre.SRE_Pattern.split 924 925 string: object = NULL 926 maxsplit: Py_ssize_t = 0 927 * 928 source: object = NULL 929 930 Split string by the occurrences of pattern. 931 [clinic start generated code]*/ 932 933 static PyObject * 934 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, 935 Py_ssize_t maxsplit, PyObject *source) 936 /*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/ 937 { 938 SRE_STATE state; 939 PyObject* list; 940 PyObject* item; 941 Py_ssize_t status; 942 Py_ssize_t n; 943 Py_ssize_t i; 944 void* last; 945 946 string = fix_string_param(string, source, "source"); 947 if (!string) 948 return NULL; 949 950 assert(self->codesize != 0); 951 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) { 952 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) { 953 PyErr_SetString(PyExc_ValueError, 954 "split() requires a non-empty pattern match."); 955 return NULL; 956 } 957 if (PyErr_WarnEx(PyExc_FutureWarning, 958 "split() requires a non-empty pattern match.", 959 1) < 0) 960 return NULL; 961 } 962 963 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) 964 return NULL; 965 966 list = PyList_New(0); 967 if (!list) { 968 state_fini(&state); 969 return NULL; 970 } 971 972 n = 0; 973 last = state.start; 974 975 while (!maxsplit || n < maxsplit) { 976 977 state_reset(&state); 978 979 state.ptr = state.start; 980 981 status = sre_search(&state, PatternObject_GetCode(self)); 982 if (PyErr_Occurred()) 983 goto error; 984 985 if (status <= 0) { 986 if (status == 0) 987 break; 988 pattern_error(status); 989 goto error; 990 } 991 992 if (state.start == state.ptr) { 993 if (last == state.end || state.ptr == state.end) 994 break; 995 /* skip one character */ 996 state.start = (void*) ((char*) state.ptr + state.charsize); 997 continue; 998 } 999 1000 /* get segment before this match */ 1001 item = getslice(state.isbytes, state.beginning, 1002 string, STATE_OFFSET(&state, last), 1003 STATE_OFFSET(&state, state.start) 1004 ); 1005 if (!item) 1006 goto error; 1007 status = PyList_Append(list, item); 1008 Py_DECREF(item); 1009 if (status < 0) 1010 goto error; 1011 1012 /* add groups (if any) */ 1013 for (i = 0; i < self->groups; i++) { 1014 item = state_getslice(&state, i+1, string, 0); 1015 if (!item) 1016 goto error; 1017 status = PyList_Append(list, item); 1018 Py_DECREF(item); 1019 if (status < 0) 1020 goto error; 1021 } 1022 1023 n = n + 1; 1024 1025 last = state.start = state.ptr; 1026 1027 } 1028 1029 /* get segment following last match (even if empty) */ 1030 item = getslice(state.isbytes, state.beginning, 1031 string, STATE_OFFSET(&state, last), state.endpos 1032 ); 1033 if (!item) 1034 goto error; 1035 status = PyList_Append(list, item); 1036 Py_DECREF(item); 1037 if (status < 0) 1038 goto error; 1039 1040 state_fini(&state); 1041 return list; 1042 1043 error: 1044 Py_DECREF(list); 1045 state_fini(&state); 1046 return NULL; 1047 1048 } 1049 1050 static PyObject* 1051 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, 1052 Py_ssize_t count, Py_ssize_t subn) 1053 { 1054 SRE_STATE state; 1055 PyObject* list; 1056 PyObject* joiner; 1057 PyObject* item; 1058 PyObject* filter; 1059 PyObject* match; 1060 void* ptr; 1061 Py_ssize_t status; 1062 Py_ssize_t n; 1063 Py_ssize_t i, b, e; 1064 int isbytes, charsize; 1065 int filter_is_callable; 1066 Py_buffer view; 1067 1068 if (PyCallable_Check(ptemplate)) { 1069 /* sub/subn takes either a function or a template */ 1070 filter = ptemplate; 1071 Py_INCREF(filter); 1072 filter_is_callable = 1; 1073 } else { 1074 /* if not callable, check if it's a literal string */ 1075 int literal; 1076 view.buf = NULL; 1077 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); 1078 b = charsize; 1079 if (ptr) { 1080 if (charsize == 1) 1081 literal = memchr(ptr, '\\', n) == NULL; 1082 else 1083 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; 1084 } else { 1085 PyErr_Clear(); 1086 literal = 0; 1087 } 1088 if (view.buf) 1089 PyBuffer_Release(&view); 1090 if (literal) { 1091 filter = ptemplate; 1092 Py_INCREF(filter); 1093 filter_is_callable = 0; 1094 } else { 1095 /* not a literal; hand it over to the template compiler */ 1096 filter = call( 1097 SRE_PY_MODULE, "_subx", 1098 PyTuple_Pack(2, self, ptemplate) 1099 ); 1100 if (!filter) 1101 return NULL; 1102 filter_is_callable = PyCallable_Check(filter); 1103 } 1104 } 1105 1106 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { 1107 Py_DECREF(filter); 1108 return NULL; 1109 } 1110 1111 list = PyList_New(0); 1112 if (!list) { 1113 Py_DECREF(filter); 1114 state_fini(&state); 1115 return NULL; 1116 } 1117 1118 n = i = 0; 1119 1120 while (!count || n < count) { 1121 1122 state_reset(&state); 1123 1124 state.ptr = state.start; 1125 1126 status = sre_search(&state, PatternObject_GetCode(self)); 1127 if (PyErr_Occurred()) 1128 goto error; 1129 1130 if (status <= 0) { 1131 if (status == 0) 1132 break; 1133 pattern_error(status); 1134 goto error; 1135 } 1136 1137 b = STATE_OFFSET(&state, state.start); 1138 e = STATE_OFFSET(&state, state.ptr); 1139 1140 if (i < b) { 1141 /* get segment before this match */ 1142 item = getslice(state.isbytes, state.beginning, 1143 string, i, b); 1144 if (!item) 1145 goto error; 1146 status = PyList_Append(list, item); 1147 Py_DECREF(item); 1148 if (status < 0) 1149 goto error; 1150 1151 } else if (i == b && i == e && n > 0) 1152 /* ignore empty match on latest position */ 1153 goto next; 1154 1155 if (filter_is_callable) { 1156 /* pass match object through filter */ 1157 match = pattern_new_match(self, &state, 1); 1158 if (!match) 1159 goto error; 1160 item = _PyObject_CallArg1(filter, match); 1161 Py_DECREF(match); 1162 if (!item) 1163 goto error; 1164 } else { 1165 /* filter is literal string */ 1166 item = filter; 1167 Py_INCREF(item); 1168 } 1169 1170 /* add to list */ 1171 if (item != Py_None) { 1172 status = PyList_Append(list, item); 1173 Py_DECREF(item); 1174 if (status < 0) 1175 goto error; 1176 } 1177 1178 i = e; 1179 n = n + 1; 1180 1181 next: 1182 /* move on */ 1183 if (state.ptr == state.end) 1184 break; 1185 if (state.ptr == state.start) 1186 state.start = (void*) ((char*) state.ptr + state.charsize); 1187 else 1188 state.start = state.ptr; 1189 1190 } 1191 1192 /* get segment following last match */ 1193 if (i < state.endpos) { 1194 item = getslice(state.isbytes, state.beginning, 1195 string, i, state.endpos); 1196 if (!item) 1197 goto error; 1198 status = PyList_Append(list, item); 1199 Py_DECREF(item); 1200 if (status < 0) 1201 goto error; 1202 } 1203 1204 state_fini(&state); 1205 1206 Py_DECREF(filter); 1207 1208 /* convert list to single string (also removes list) */ 1209 joiner = getslice(state.isbytes, state.beginning, string, 0, 0); 1210 if (!joiner) { 1211 Py_DECREF(list); 1212 return NULL; 1213 } 1214 if (PyList_GET_SIZE(list) == 0) { 1215 Py_DECREF(list); 1216 item = joiner; 1217 } 1218 else { 1219 if (state.isbytes) 1220 item = _PyBytes_Join(joiner, list); 1221 else 1222 item = PyUnicode_Join(joiner, list); 1223 Py_DECREF(joiner); 1224 Py_DECREF(list); 1225 if (!item) 1226 return NULL; 1227 } 1228 1229 if (subn) 1230 return Py_BuildValue("Nn", item, n); 1231 1232 return item; 1233 1234 error: 1235 Py_DECREF(list); 1236 state_fini(&state); 1237 Py_DECREF(filter); 1238 return NULL; 1239 1240 } 1241 1242 /*[clinic input] 1243 _sre.SRE_Pattern.sub 1244 1245 repl: object 1246 string: object 1247 count: Py_ssize_t = 0 1248 1249 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. 1250 [clinic start generated code]*/ 1251 1252 static PyObject * 1253 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl, 1254 PyObject *string, Py_ssize_t count) 1255 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/ 1256 { 1257 return pattern_subx(self, repl, string, count, 0); 1258 } 1259 1260 /*[clinic input] 1261 _sre.SRE_Pattern.subn 1262 1263 repl: object 1264 string: object 1265 count: Py_ssize_t = 0 1266 1267 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. 1268 [clinic start generated code]*/ 1269 1270 static PyObject * 1271 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl, 1272 PyObject *string, Py_ssize_t count) 1273 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/ 1274 { 1275 return pattern_subx(self, repl, string, count, 1); 1276 } 1277 1278 /*[clinic input] 1279 _sre.SRE_Pattern.__copy__ 1280 1281 [clinic start generated code]*/ 1282 1283 static PyObject * 1284 _sre_SRE_Pattern___copy___impl(PatternObject *self) 1285 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ 1286 { 1287 #ifdef USE_BUILTIN_COPY 1288 PatternObject* copy; 1289 int offset; 1290 1291 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); 1292 if (!copy) 1293 return NULL; 1294 1295 offset = offsetof(PatternObject, groups); 1296 1297 Py_XINCREF(self->groupindex); 1298 Py_XINCREF(self->indexgroup); 1299 Py_XINCREF(self->pattern); 1300 1301 memcpy((char*) copy + offset, (char*) self + offset, 1302 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); 1303 copy->weakreflist = NULL; 1304 1305 return (PyObject*) copy; 1306 #else 1307 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object"); 1308 return NULL; 1309 #endif 1310 } 1311 1312 /*[clinic input] 1313 _sre.SRE_Pattern.__deepcopy__ 1314 1315 memo: object 1316 1317 [clinic start generated code]*/ 1318 1319 static PyObject * 1320 _sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo) 1321 /*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/ 1322 { 1323 #ifdef USE_BUILTIN_COPY 1324 PatternObject* copy; 1325 1326 copy = (PatternObject*) pattern_copy(self); 1327 if (!copy) 1328 return NULL; 1329 1330 if (!deepcopy(©->groupindex, memo) || 1331 !deepcopy(©->indexgroup, memo) || 1332 !deepcopy(©->pattern, memo)) { 1333 Py_DECREF(copy); 1334 return NULL; 1335 } 1336 1337 #else 1338 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object"); 1339 return NULL; 1340 #endif 1341 } 1342 1343 static PyObject * 1344 pattern_repr(PatternObject *obj) 1345 { 1346 static const struct { 1347 const char *name; 1348 int value; 1349 } flag_names[] = { 1350 {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, 1351 {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, 1352 {"re.LOCALE", SRE_FLAG_LOCALE}, 1353 {"re.MULTILINE", SRE_FLAG_MULTILINE}, 1354 {"re.DOTALL", SRE_FLAG_DOTALL}, 1355 {"re.UNICODE", SRE_FLAG_UNICODE}, 1356 {"re.VERBOSE", SRE_FLAG_VERBOSE}, 1357 {"re.DEBUG", SRE_FLAG_DEBUG}, 1358 {"re.ASCII", SRE_FLAG_ASCII}, 1359 }; 1360 PyObject *result = NULL; 1361 PyObject *flag_items; 1362 size_t i; 1363 int flags = obj->flags; 1364 1365 /* Omit re.UNICODE for valid string patterns. */ 1366 if (obj->isbytes == 0 && 1367 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == 1368 SRE_FLAG_UNICODE) 1369 flags &= ~SRE_FLAG_UNICODE; 1370 1371 flag_items = PyList_New(0); 1372 if (!flag_items) 1373 return NULL; 1374 1375 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { 1376 if (flags & flag_names[i].value) { 1377 PyObject *item = PyUnicode_FromString(flag_names[i].name); 1378 if (!item) 1379 goto done; 1380 1381 if (PyList_Append(flag_items, item) < 0) { 1382 Py_DECREF(item); 1383 goto done; 1384 } 1385 Py_DECREF(item); 1386 flags &= ~flag_names[i].value; 1387 } 1388 } 1389 if (flags) { 1390 PyObject *item = PyUnicode_FromFormat("0x%x", flags); 1391 if (!item) 1392 goto done; 1393 1394 if (PyList_Append(flag_items, item) < 0) { 1395 Py_DECREF(item); 1396 goto done; 1397 } 1398 Py_DECREF(item); 1399 } 1400 1401 if (PyList_Size(flag_items) > 0) { 1402 PyObject *flags_result; 1403 PyObject *sep = PyUnicode_FromString("|"); 1404 if (!sep) 1405 goto done; 1406 flags_result = PyUnicode_Join(sep, flag_items); 1407 Py_DECREF(sep); 1408 if (!flags_result) 1409 goto done; 1410 result = PyUnicode_FromFormat("re.compile(%.200R, %S)", 1411 obj->pattern, flags_result); 1412 Py_DECREF(flags_result); 1413 } 1414 else { 1415 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); 1416 } 1417 1418 done: 1419 Py_DECREF(flag_items); 1420 return result; 1421 } 1422 1423 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); 1424 1425 /* PatternObject's 'groupindex' method. */ 1426 static PyObject * 1427 pattern_groupindex(PatternObject *self) 1428 { 1429 return PyDictProxy_New(self->groupindex); 1430 } 1431 1432 static int _validate(PatternObject *self); /* Forward */ 1433 1434 /*[clinic input] 1435 _sre.compile 1436 1437 pattern: object 1438 flags: int 1439 code: object(subclass_of='&PyList_Type') 1440 groups: Py_ssize_t 1441 groupindex: object 1442 indexgroup: object 1443 1444 [clinic start generated code]*/ 1445 1446 static PyObject * 1447 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, 1448 PyObject *code, Py_ssize_t groups, PyObject *groupindex, 1449 PyObject *indexgroup) 1450 /*[clinic end generated code: output=ef9c2b3693776404 input=7d059ec8ae1edb85]*/ 1451 { 1452 /* "compile" pattern descriptor to pattern object */ 1453 1454 PatternObject* self; 1455 Py_ssize_t i, n; 1456 1457 n = PyList_GET_SIZE(code); 1458 /* coverity[ampersand_in_size] */ 1459 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); 1460 if (!self) 1461 return NULL; 1462 self->weakreflist = NULL; 1463 self->pattern = NULL; 1464 self->groupindex = NULL; 1465 self->indexgroup = NULL; 1466 1467 self->codesize = n; 1468 1469 for (i = 0; i < n; i++) { 1470 PyObject *o = PyList_GET_ITEM(code, i); 1471 unsigned long value = PyLong_AsUnsignedLong(o); 1472 self->code[i] = (SRE_CODE) value; 1473 if ((unsigned long) self->code[i] != value) { 1474 PyErr_SetString(PyExc_OverflowError, 1475 "regular expression code size limit exceeded"); 1476 break; 1477 } 1478 } 1479 1480 if (PyErr_Occurred()) { 1481 Py_DECREF(self); 1482 return NULL; 1483 } 1484 1485 if (pattern == Py_None) { 1486 self->isbytes = -1; 1487 } 1488 else { 1489 Py_ssize_t p_length; 1490 int charsize; 1491 Py_buffer view; 1492 view.buf = NULL; 1493 if (!getstring(pattern, &p_length, &self->isbytes, 1494 &charsize, &view)) { 1495 Py_DECREF(self); 1496 return NULL; 1497 } 1498 if (view.buf) 1499 PyBuffer_Release(&view); 1500 } 1501 1502 Py_INCREF(pattern); 1503 self->pattern = pattern; 1504 1505 self->flags = flags; 1506 1507 self->groups = groups; 1508 1509 Py_INCREF(groupindex); 1510 self->groupindex = groupindex; 1511 1512 Py_INCREF(indexgroup); 1513 self->indexgroup = indexgroup; 1514 1515 if (!_validate(self)) { 1516 Py_DECREF(self); 1517 return NULL; 1518 } 1519 1520 return (PyObject*) self; 1521 } 1522 1523 /* -------------------------------------------------------------------- */ 1524 /* Code validation */ 1525 1526 /* To learn more about this code, have a look at the _compile() function in 1527 Lib/sre_compile.py. The validation functions below checks the code array 1528 for conformance with the code patterns generated there. 1529 1530 The nice thing about the generated code is that it is position-independent: 1531 all jumps are relative jumps forward. Also, jumps don't cross each other: 1532 the target of a later jump is always earlier than the target of an earlier 1533 jump. IOW, this is okay: 1534 1535 J---------J-------T--------T 1536 \ \_____/ / 1537 \______________________/ 1538 1539 but this is not: 1540 1541 J---------J-------T--------T 1542 \_________\_____/ / 1543 \____________/ 1544 1545 It also helps that SRE_CODE is always an unsigned type. 1546 */ 1547 1548 /* Defining this one enables tracing of the validator */ 1549 #undef VVERBOSE 1550 1551 /* Trace macro for the validator */ 1552 #if defined(VVERBOSE) 1553 #define VTRACE(v) printf v 1554 #else 1555 #define VTRACE(v) do {} while(0) /* do nothing */ 1556 #endif 1557 1558 /* Report failure */ 1559 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) 1560 1561 /* Extract opcode, argument, or skip count from code array */ 1562 #define GET_OP \ 1563 do { \ 1564 VTRACE(("%p: ", code)); \ 1565 if (code >= end) FAIL; \ 1566 op = *code++; \ 1567 VTRACE(("%lu (op)\n", (unsigned long)op)); \ 1568 } while (0) 1569 #define GET_ARG \ 1570 do { \ 1571 VTRACE(("%p= ", code)); \ 1572 if (code >= end) FAIL; \ 1573 arg = *code++; \ 1574 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ 1575 } while (0) 1576 #define GET_SKIP_ADJ(adj) \ 1577 do { \ 1578 VTRACE(("%p= ", code)); \ 1579 if (code >= end) FAIL; \ 1580 skip = *code; \ 1581 VTRACE(("%lu (skip to %p)\n", \ 1582 (unsigned long)skip, code+skip)); \ 1583 if (skip-adj > (uintptr_t)(end - code)) \ 1584 FAIL; \ 1585 code++; \ 1586 } while (0) 1587 #define GET_SKIP GET_SKIP_ADJ(0) 1588 1589 static int 1590 _validate_charset(SRE_CODE *code, SRE_CODE *end) 1591 { 1592 /* Some variables are manipulated by the macros above */ 1593 SRE_CODE op; 1594 SRE_CODE arg; 1595 SRE_CODE offset; 1596 int i; 1597 1598 while (code < end) { 1599 GET_OP; 1600 switch (op) { 1601 1602 case SRE_OP_NEGATE: 1603 break; 1604 1605 case SRE_OP_LITERAL: 1606 GET_ARG; 1607 break; 1608 1609 case SRE_OP_RANGE: 1610 case SRE_OP_RANGE_IGNORE: 1611 GET_ARG; 1612 GET_ARG; 1613 break; 1614 1615 case SRE_OP_CHARSET: 1616 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ 1617 if (offset > (uintptr_t)(end - code)) 1618 FAIL; 1619 code += offset; 1620 break; 1621 1622 case SRE_OP_BIGCHARSET: 1623 GET_ARG; /* Number of blocks */ 1624 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ 1625 if (offset > (uintptr_t)(end - code)) 1626 FAIL; 1627 /* Make sure that each byte points to a valid block */ 1628 for (i = 0; i < 256; i++) { 1629 if (((unsigned char *)code)[i] >= arg) 1630 FAIL; 1631 } 1632 code += offset; 1633 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ 1634 if (offset > (uintptr_t)(end - code)) 1635 FAIL; 1636 code += offset; 1637 break; 1638 1639 case SRE_OP_CATEGORY: 1640 GET_ARG; 1641 switch (arg) { 1642 case SRE_CATEGORY_DIGIT: 1643 case SRE_CATEGORY_NOT_DIGIT: 1644 case SRE_CATEGORY_SPACE: 1645 case SRE_CATEGORY_NOT_SPACE: 1646 case SRE_CATEGORY_WORD: 1647 case SRE_CATEGORY_NOT_WORD: 1648 case SRE_CATEGORY_LINEBREAK: 1649 case SRE_CATEGORY_NOT_LINEBREAK: 1650 case SRE_CATEGORY_LOC_WORD: 1651 case SRE_CATEGORY_LOC_NOT_WORD: 1652 case SRE_CATEGORY_UNI_DIGIT: 1653 case SRE_CATEGORY_UNI_NOT_DIGIT: 1654 case SRE_CATEGORY_UNI_SPACE: 1655 case SRE_CATEGORY_UNI_NOT_SPACE: 1656 case SRE_CATEGORY_UNI_WORD: 1657 case SRE_CATEGORY_UNI_NOT_WORD: 1658 case SRE_CATEGORY_UNI_LINEBREAK: 1659 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 1660 break; 1661 default: 1662 FAIL; 1663 } 1664 break; 1665 1666 default: 1667 FAIL; 1668 1669 } 1670 } 1671 1672 return 1; 1673 } 1674 1675 static int 1676 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 1677 { 1678 /* Some variables are manipulated by the macros above */ 1679 SRE_CODE op; 1680 SRE_CODE arg; 1681 SRE_CODE skip; 1682 1683 VTRACE(("code=%p, end=%p\n", code, end)); 1684 1685 if (code > end) 1686 FAIL; 1687 1688 while (code < end) { 1689 GET_OP; 1690 switch (op) { 1691 1692 case SRE_OP_MARK: 1693 /* We don't check whether marks are properly nested; the 1694 sre_match() code is robust even if they don't, and the worst 1695 you can get is nonsensical match results. */ 1696 GET_ARG; 1697 if (arg > 2 * (size_t)groups + 1) { 1698 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); 1699 FAIL; 1700 } 1701 break; 1702 1703 case SRE_OP_LITERAL: 1704 case SRE_OP_NOT_LITERAL: 1705 case SRE_OP_LITERAL_IGNORE: 1706 case SRE_OP_NOT_LITERAL_IGNORE: 1707 GET_ARG; 1708 /* The arg is just a character, nothing to check */ 1709 break; 1710 1711 case SRE_OP_SUCCESS: 1712 case SRE_OP_FAILURE: 1713 /* Nothing to check; these normally end the matching process */ 1714 break; 1715 1716 case SRE_OP_AT: 1717 GET_ARG; 1718 switch (arg) { 1719 case SRE_AT_BEGINNING: 1720 case SRE_AT_BEGINNING_STRING: 1721 case SRE_AT_BEGINNING_LINE: 1722 case SRE_AT_END: 1723 case SRE_AT_END_LINE: 1724 case SRE_AT_END_STRING: 1725 case SRE_AT_BOUNDARY: 1726 case SRE_AT_NON_BOUNDARY: 1727 case SRE_AT_LOC_BOUNDARY: 1728 case SRE_AT_LOC_NON_BOUNDARY: 1729 case SRE_AT_UNI_BOUNDARY: 1730 case SRE_AT_UNI_NON_BOUNDARY: 1731 break; 1732 default: 1733 FAIL; 1734 } 1735 break; 1736 1737 case SRE_OP_ANY: 1738 case SRE_OP_ANY_ALL: 1739 /* These have no operands */ 1740 break; 1741 1742 case SRE_OP_IN: 1743 case SRE_OP_IN_IGNORE: 1744 GET_SKIP; 1745 /* Stop 1 before the end; we check the FAILURE below */ 1746 if (!_validate_charset(code, code+skip-2)) 1747 FAIL; 1748 if (code[skip-2] != SRE_OP_FAILURE) 1749 FAIL; 1750 code += skip-1; 1751 break; 1752 1753 case SRE_OP_INFO: 1754 { 1755 /* A minimal info field is 1756 <INFO> <1=skip> <2=flags> <3=min> <4=max>; 1757 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, 1758 more follows. */ 1759 SRE_CODE flags, i; 1760 SRE_CODE *newcode; 1761 GET_SKIP; 1762 newcode = code+skip-1; 1763 GET_ARG; flags = arg; 1764 GET_ARG; 1765 GET_ARG; 1766 /* Check that only valid flags are present */ 1767 if ((flags & ~(SRE_INFO_PREFIX | 1768 SRE_INFO_LITERAL | 1769 SRE_INFO_CHARSET)) != 0) 1770 FAIL; 1771 /* PREFIX and CHARSET are mutually exclusive */ 1772 if ((flags & SRE_INFO_PREFIX) && 1773 (flags & SRE_INFO_CHARSET)) 1774 FAIL; 1775 /* LITERAL implies PREFIX */ 1776 if ((flags & SRE_INFO_LITERAL) && 1777 !(flags & SRE_INFO_PREFIX)) 1778 FAIL; 1779 /* Validate the prefix */ 1780 if (flags & SRE_INFO_PREFIX) { 1781 SRE_CODE prefix_len; 1782 GET_ARG; prefix_len = arg; 1783 GET_ARG; 1784 /* Here comes the prefix string */ 1785 if (prefix_len > (uintptr_t)(newcode - code)) 1786 FAIL; 1787 code += prefix_len; 1788 /* And here comes the overlap table */ 1789 if (prefix_len > (uintptr_t)(newcode - code)) 1790 FAIL; 1791 /* Each overlap value should be < prefix_len */ 1792 for (i = 0; i < prefix_len; i++) { 1793 if (code[i] >= prefix_len) 1794 FAIL; 1795 } 1796 code += prefix_len; 1797 } 1798 /* Validate the charset */ 1799 if (flags & SRE_INFO_CHARSET) { 1800 if (!_validate_charset(code, newcode-1)) 1801 FAIL; 1802 if (newcode[-1] != SRE_OP_FAILURE) 1803 FAIL; 1804 code = newcode; 1805 } 1806 else if (code != newcode) { 1807 VTRACE(("code=%p, newcode=%p\n", code, newcode)); 1808 FAIL; 1809 } 1810 } 1811 break; 1812 1813 case SRE_OP_BRANCH: 1814 { 1815 SRE_CODE *target = NULL; 1816 for (;;) { 1817 GET_SKIP; 1818 if (skip == 0) 1819 break; 1820 /* Stop 2 before the end; we check the JUMP below */ 1821 if (!_validate_inner(code, code+skip-3, groups)) 1822 FAIL; 1823 code += skip-3; 1824 /* Check that it ends with a JUMP, and that each JUMP 1825 has the same target */ 1826 GET_OP; 1827 if (op != SRE_OP_JUMP) 1828 FAIL; 1829 GET_SKIP; 1830 if (target == NULL) 1831 target = code+skip-1; 1832 else if (code+skip-1 != target) 1833 FAIL; 1834 } 1835 } 1836 break; 1837 1838 case SRE_OP_REPEAT_ONE: 1839 case SRE_OP_MIN_REPEAT_ONE: 1840 { 1841 SRE_CODE min, max; 1842 GET_SKIP; 1843 GET_ARG; min = arg; 1844 GET_ARG; max = arg; 1845 if (min > max) 1846 FAIL; 1847 if (max > SRE_MAXREPEAT) 1848 FAIL; 1849 if (!_validate_inner(code, code+skip-4, groups)) 1850 FAIL; 1851 code += skip-4; 1852 GET_OP; 1853 if (op != SRE_OP_SUCCESS) 1854 FAIL; 1855 } 1856 break; 1857 1858 case SRE_OP_REPEAT: 1859 { 1860 SRE_CODE min, max; 1861 GET_SKIP; 1862 GET_ARG; min = arg; 1863 GET_ARG; max = arg; 1864 if (min > max) 1865 FAIL; 1866 if (max > SRE_MAXREPEAT) 1867 FAIL; 1868 if (!_validate_inner(code, code+skip-3, groups)) 1869 FAIL; 1870 code += skip-3; 1871 GET_OP; 1872 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) 1873 FAIL; 1874 } 1875 break; 1876 1877 case SRE_OP_GROUPREF: 1878 case SRE_OP_GROUPREF_IGNORE: 1879 GET_ARG; 1880 if (arg >= (size_t)groups) 1881 FAIL; 1882 break; 1883 1884 case SRE_OP_GROUPREF_EXISTS: 1885 /* The regex syntax for this is: '(?(group)then|else)', where 1886 'group' is either an integer group number or a group name, 1887 'then' and 'else' are sub-regexes, and 'else' is optional. */ 1888 GET_ARG; 1889 if (arg >= (size_t)groups) 1890 FAIL; 1891 GET_SKIP_ADJ(1); 1892 code--; /* The skip is relative to the first arg! */ 1893 /* There are two possibilities here: if there is both a 'then' 1894 part and an 'else' part, the generated code looks like: 1895 1896 GROUPREF_EXISTS 1897 <group> 1898 <skipyes> 1899 ...then part... 1900 JUMP 1901 <skipno> 1902 (<skipyes> jumps here) 1903 ...else part... 1904 (<skipno> jumps here) 1905 1906 If there is only a 'then' part, it looks like: 1907 1908 GROUPREF_EXISTS 1909 <group> 1910 <skip> 1911 ...then part... 1912 (<skip> jumps here) 1913 1914 There is no direct way to decide which it is, and we don't want 1915 to allow arbitrary jumps anywhere in the code; so we just look 1916 for a JUMP opcode preceding our skip target. 1917 */ 1918 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) && 1919 code[skip-3] == SRE_OP_JUMP) 1920 { 1921 VTRACE(("both then and else parts present\n")); 1922 if (!_validate_inner(code+1, code+skip-3, groups)) 1923 FAIL; 1924 code += skip-2; /* Position after JUMP, at <skipno> */ 1925 GET_SKIP; 1926 if (!_validate_inner(code, code+skip-1, groups)) 1927 FAIL; 1928 code += skip-1; 1929 } 1930 else { 1931 VTRACE(("only a then part present\n")); 1932 if (!_validate_inner(code+1, code+skip-1, groups)) 1933 FAIL; 1934 code += skip-1; 1935 } 1936 break; 1937 1938 case SRE_OP_ASSERT: 1939 case SRE_OP_ASSERT_NOT: 1940 GET_SKIP; 1941 GET_ARG; /* 0 for lookahead, width for lookbehind */ 1942 code--; /* Back up over arg to simplify math below */ 1943 if (arg & 0x80000000) 1944 FAIL; /* Width too large */ 1945 /* Stop 1 before the end; we check the SUCCESS below */ 1946 if (!_validate_inner(code+1, code+skip-2, groups)) 1947 FAIL; 1948 code += skip-2; 1949 GET_OP; 1950 if (op != SRE_OP_SUCCESS) 1951 FAIL; 1952 break; 1953 1954 default: 1955 FAIL; 1956 1957 } 1958 } 1959 1960 VTRACE(("okay\n")); 1961 return 1; 1962 } 1963 1964 static int 1965 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 1966 { 1967 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || 1968 code >= end || end[-1] != SRE_OP_SUCCESS) 1969 FAIL; 1970 return _validate_inner(code, end-1, groups); 1971 } 1972 1973 static int 1974 _validate(PatternObject *self) 1975 { 1976 if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) 1977 { 1978 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); 1979 return 0; 1980 } 1981 else 1982 VTRACE(("Success!\n")); 1983 return 1; 1984 } 1985 1986 /* -------------------------------------------------------------------- */ 1987 /* match methods */ 1988 1989 static void 1990 match_dealloc(MatchObject* self) 1991 { 1992 Py_XDECREF(self->regs); 1993 Py_XDECREF(self->string); 1994 Py_DECREF(self->pattern); 1995 PyObject_DEL(self); 1996 } 1997 1998 static PyObject* 1999 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) 2000 { 2001 Py_ssize_t length; 2002 int isbytes, charsize; 2003 Py_buffer view; 2004 PyObject *result; 2005 void* ptr; 2006 Py_ssize_t i, j; 2007 2008 if (index < 0 || index >= self->groups) { 2009 /* raise IndexError if we were given a bad group number */ 2010 PyErr_SetString( 2011 PyExc_IndexError, 2012 "no such group" 2013 ); 2014 return NULL; 2015 } 2016 2017 index *= 2; 2018 2019 if (self->string == Py_None || self->mark[index] < 0) { 2020 /* return default value if the string or group is undefined */ 2021 Py_INCREF(def); 2022 return def; 2023 } 2024 2025 ptr = getstring(self->string, &length, &isbytes, &charsize, &view); 2026 if (ptr == NULL) 2027 return NULL; 2028 2029 i = self->mark[index]; 2030 j = self->mark[index+1]; 2031 i = Py_MIN(i, length); 2032 j = Py_MIN(j, length); 2033 result = getslice(isbytes, ptr, self->string, i, j); 2034 if (isbytes && view.buf != NULL) 2035 PyBuffer_Release(&view); 2036 return result; 2037 } 2038 2039 static Py_ssize_t 2040 match_getindex(MatchObject* self, PyObject* index) 2041 { 2042 Py_ssize_t i; 2043 2044 if (index == NULL) 2045 /* Default value */ 2046 return 0; 2047 2048 if (PyIndex_Check(index)) { 2049 return PyNumber_AsSsize_t(index, NULL); 2050 } 2051 2052 i = -1; 2053 2054 if (self->pattern->groupindex) { 2055 index = PyObject_GetItem(self->pattern->groupindex, index); 2056 if (index) { 2057 if (PyLong_Check(index)) 2058 i = PyLong_AsSsize_t(index); 2059 Py_DECREF(index); 2060 } else 2061 PyErr_Clear(); 2062 } 2063 2064 return i; 2065 } 2066 2067 static PyObject* 2068 match_getslice(MatchObject* self, PyObject* index, PyObject* def) 2069 { 2070 return match_getslice_by_index(self, match_getindex(self, index), def); 2071 } 2072 2073 /*[clinic input] 2074 _sre.SRE_Match.expand 2075 2076 template: object 2077 2078 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method. 2079 [clinic start generated code]*/ 2080 2081 static PyObject * 2082 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) 2083 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ 2084 { 2085 /* delegate to Python code */ 2086 return call( 2087 SRE_PY_MODULE, "_expand", 2088 PyTuple_Pack(3, self->pattern, self, template) 2089 ); 2090 } 2091 2092 static PyObject* 2093 match_group(MatchObject* self, PyObject* args) 2094 { 2095 PyObject* result; 2096 Py_ssize_t i, size; 2097 2098 size = PyTuple_GET_SIZE(args); 2099 2100 switch (size) { 2101 case 0: 2102 result = match_getslice(self, Py_False, Py_None); 2103 break; 2104 case 1: 2105 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); 2106 break; 2107 default: 2108 /* fetch multiple items */ 2109 result = PyTuple_New(size); 2110 if (!result) 2111 return NULL; 2112 for (i = 0; i < size; i++) { 2113 PyObject* item = match_getslice( 2114 self, PyTuple_GET_ITEM(args, i), Py_None 2115 ); 2116 if (!item) { 2117 Py_DECREF(result); 2118 return NULL; 2119 } 2120 PyTuple_SET_ITEM(result, i, item); 2121 } 2122 break; 2123 } 2124 return result; 2125 } 2126 2127 static PyObject* 2128 match_getitem(MatchObject* self, PyObject* name) 2129 { 2130 return match_getslice(self, name, Py_None); 2131 } 2132 2133 /*[clinic input] 2134 _sre.SRE_Match.groups 2135 2136 default: object = None 2137 Is used for groups that did not participate in the match. 2138 2139 Return a tuple containing all the subgroups of the match, from 1. 2140 [clinic start generated code]*/ 2141 2142 static PyObject * 2143 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) 2144 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ 2145 { 2146 PyObject* result; 2147 Py_ssize_t index; 2148 2149 result = PyTuple_New(self->groups-1); 2150 if (!result) 2151 return NULL; 2152 2153 for (index = 1; index < self->groups; index++) { 2154 PyObject* item; 2155 item = match_getslice_by_index(self, index, default_value); 2156 if (!item) { 2157 Py_DECREF(result); 2158 return NULL; 2159 } 2160 PyTuple_SET_ITEM(result, index-1, item); 2161 } 2162 2163 return result; 2164 } 2165 2166 /*[clinic input] 2167 _sre.SRE_Match.groupdict 2168 2169 default: object = None 2170 Is used for groups that did not participate in the match. 2171 2172 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. 2173 [clinic start generated code]*/ 2174 2175 static PyObject * 2176 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) 2177 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ 2178 { 2179 PyObject* result; 2180 PyObject* keys; 2181 Py_ssize_t index; 2182 2183 result = PyDict_New(); 2184 if (!result || !self->pattern->groupindex) 2185 return result; 2186 2187 keys = PyMapping_Keys(self->pattern->groupindex); 2188 if (!keys) 2189 goto failed; 2190 2191 for (index = 0; index < PyList_GET_SIZE(keys); index++) { 2192 int status; 2193 PyObject* key; 2194 PyObject* value; 2195 key = PyList_GET_ITEM(keys, index); 2196 if (!key) 2197 goto failed; 2198 value = match_getslice(self, key, default_value); 2199 if (!value) 2200 goto failed; 2201 status = PyDict_SetItem(result, key, value); 2202 Py_DECREF(value); 2203 if (status < 0) 2204 goto failed; 2205 } 2206 2207 Py_DECREF(keys); 2208 2209 return result; 2210 2211 failed: 2212 Py_XDECREF(keys); 2213 Py_DECREF(result); 2214 return NULL; 2215 } 2216 2217 /*[clinic input] 2218 _sre.SRE_Match.start -> Py_ssize_t 2219 2220 group: object(c_default="NULL") = 0 2221 / 2222 2223 Return index of the start of the substring matched by group. 2224 [clinic start generated code]*/ 2225 2226 static Py_ssize_t 2227 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) 2228 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ 2229 { 2230 Py_ssize_t index = match_getindex(self, group); 2231 2232 if (index < 0 || index >= self->groups) { 2233 PyErr_SetString( 2234 PyExc_IndexError, 2235 "no such group" 2236 ); 2237 return -1; 2238 } 2239 2240 /* mark is -1 if group is undefined */ 2241 return self->mark[index*2]; 2242 } 2243 2244 /*[clinic input] 2245 _sre.SRE_Match.end -> Py_ssize_t 2246 2247 group: object(c_default="NULL") = 0 2248 / 2249 2250 Return index of the end of the substring matched by group. 2251 [clinic start generated code]*/ 2252 2253 static Py_ssize_t 2254 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) 2255 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ 2256 { 2257 Py_ssize_t index = match_getindex(self, group); 2258 2259 if (index < 0 || index >= self->groups) { 2260 PyErr_SetString( 2261 PyExc_IndexError, 2262 "no such group" 2263 ); 2264 return -1; 2265 } 2266 2267 /* mark is -1 if group is undefined */ 2268 return self->mark[index*2+1]; 2269 } 2270 2271 LOCAL(PyObject*) 2272 _pair(Py_ssize_t i1, Py_ssize_t i2) 2273 { 2274 PyObject* pair; 2275 PyObject* item; 2276 2277 pair = PyTuple_New(2); 2278 if (!pair) 2279 return NULL; 2280 2281 item = PyLong_FromSsize_t(i1); 2282 if (!item) 2283 goto error; 2284 PyTuple_SET_ITEM(pair, 0, item); 2285 2286 item = PyLong_FromSsize_t(i2); 2287 if (!item) 2288 goto error; 2289 PyTuple_SET_ITEM(pair, 1, item); 2290 2291 return pair; 2292 2293 error: 2294 Py_DECREF(pair); 2295 return NULL; 2296 } 2297 2298 /*[clinic input] 2299 _sre.SRE_Match.span 2300 2301 group: object(c_default="NULL") = 0 2302 / 2303 2304 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)). 2305 [clinic start generated code]*/ 2306 2307 static PyObject * 2308 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) 2309 /*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/ 2310 { 2311 Py_ssize_t index = match_getindex(self, group); 2312 2313 if (index < 0 || index >= self->groups) { 2314 PyErr_SetString( 2315 PyExc_IndexError, 2316 "no such group" 2317 ); 2318 return NULL; 2319 } 2320 2321 /* marks are -1 if group is undefined */ 2322 return _pair(self->mark[index*2], self->mark[index*2+1]); 2323 } 2324 2325 static PyObject* 2326 match_regs(MatchObject* self) 2327 { 2328 PyObject* regs; 2329 PyObject* item; 2330 Py_ssize_t index; 2331 2332 regs = PyTuple_New(self->groups); 2333 if (!regs) 2334 return NULL; 2335 2336 for (index = 0; index < self->groups; index++) { 2337 item = _pair(self->mark[index*2], self->mark[index*2+1]); 2338 if (!item) { 2339 Py_DECREF(regs); 2340 return NULL; 2341 } 2342 PyTuple_SET_ITEM(regs, index, item); 2343 } 2344 2345 Py_INCREF(regs); 2346 self->regs = regs; 2347 2348 return regs; 2349 } 2350 2351 /*[clinic input] 2352 _sre.SRE_Match.__copy__ 2353 2354 [clinic start generated code]*/ 2355 2356 static PyObject * 2357 _sre_SRE_Match___copy___impl(MatchObject *self) 2358 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ 2359 { 2360 #ifdef USE_BUILTIN_COPY 2361 MatchObject* copy; 2362 Py_ssize_t slots, offset; 2363 2364 slots = 2 * (self->pattern->groups+1); 2365 2366 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots); 2367 if (!copy) 2368 return NULL; 2369 2370 /* this value a constant, but any compiler should be able to 2371 figure that out all by itself */ 2372 offset = offsetof(MatchObject, string); 2373 2374 Py_XINCREF(self->pattern); 2375 Py_XINCREF(self->string); 2376 Py_XINCREF(self->regs); 2377 2378 memcpy((char*) copy + offset, (char*) self + offset, 2379 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); 2380 2381 return (PyObject*) copy; 2382 #else 2383 PyErr_SetString(PyExc_TypeError, "cannot copy this match object"); 2384 return NULL; 2385 #endif 2386 } 2387 2388 /*[clinic input] 2389 _sre.SRE_Match.__deepcopy__ 2390 2391 memo: object 2392 2393 [clinic start generated code]*/ 2394 2395 static PyObject * 2396 _sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo) 2397 /*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/ 2398 { 2399 #ifdef USE_BUILTIN_COPY 2400 MatchObject* copy; 2401 2402 copy = (MatchObject*) match_copy(self); 2403 if (!copy) 2404 return NULL; 2405 2406 if (!deepcopy((PyObject**) ©->pattern, memo) || 2407 !deepcopy(©->string, memo) || 2408 !deepcopy(©->regs, memo)) { 2409 Py_DECREF(copy); 2410 return NULL; 2411 } 2412 2413 #else 2414 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object"); 2415 return NULL; 2416 #endif 2417 } 2418 2419 PyDoc_STRVAR(match_doc, 2420 "The result of re.match() and re.search().\n\ 2421 Match objects always have a boolean value of True."); 2422 2423 PyDoc_STRVAR(match_group_doc, 2424 "group([group1, ...]) -> str or tuple.\n\ 2425 Return subgroup(s) of the match by indices or names.\n\ 2426 For 0 returns the entire match."); 2427 2428 static PyObject * 2429 match_lastindex_get(MatchObject *self) 2430 { 2431 if (self->lastindex >= 0) 2432 return PyLong_FromSsize_t(self->lastindex); 2433 Py_INCREF(Py_None); 2434 return Py_None; 2435 } 2436 2437 static PyObject * 2438 match_lastgroup_get(MatchObject *self) 2439 { 2440 if (self->pattern->indexgroup && self->lastindex >= 0) { 2441 PyObject* result = PySequence_GetItem( 2442 self->pattern->indexgroup, self->lastindex 2443 ); 2444 if (result) 2445 return result; 2446 PyErr_Clear(); 2447 } 2448 Py_INCREF(Py_None); 2449 return Py_None; 2450 } 2451 2452 static PyObject * 2453 match_regs_get(MatchObject *self) 2454 { 2455 if (self->regs) { 2456 Py_INCREF(self->regs); 2457 return self->regs; 2458 } else 2459 return match_regs(self); 2460 } 2461 2462 static PyObject * 2463 match_repr(MatchObject *self) 2464 { 2465 PyObject *result; 2466 PyObject *group0 = match_getslice_by_index(self, 0, Py_None); 2467 if (group0 == NULL) 2468 return NULL; 2469 result = PyUnicode_FromFormat( 2470 "<%s object; span=(%d, %d), match=%.50R>", 2471 Py_TYPE(self)->tp_name, 2472 self->mark[0], self->mark[1], group0); 2473 Py_DECREF(group0); 2474 return result; 2475 } 2476 2477 2478 static PyObject* 2479 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status) 2480 { 2481 /* create match object (from state object) */ 2482 2483 MatchObject* match; 2484 Py_ssize_t i, j; 2485 char* base; 2486 int n; 2487 2488 if (status > 0) { 2489 2490 /* create match object (with room for extra group marks) */ 2491 /* coverity[ampersand_in_size] */ 2492 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2493 2*(pattern->groups+1)); 2494 if (!match) 2495 return NULL; 2496 2497 Py_INCREF(pattern); 2498 match->pattern = pattern; 2499 2500 Py_INCREF(state->string); 2501 match->string = state->string; 2502 2503 match->regs = NULL; 2504 match->groups = pattern->groups+1; 2505 2506 /* fill in group slices */ 2507 2508 base = (char*) state->beginning; 2509 n = state->charsize; 2510 2511 match->mark[0] = ((char*) state->start - base) / n; 2512 match->mark[1] = ((char*) state->ptr - base) / n; 2513 2514 for (i = j = 0; i < pattern->groups; i++, j+=2) 2515 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { 2516 match->mark[j+2] = ((char*) state->mark[j] - base) / n; 2517 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; 2518 } else 2519 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ 2520 2521 match->pos = state->pos; 2522 match->endpos = state->endpos; 2523 2524 match->lastindex = state->lastindex; 2525 2526 return (PyObject*) match; 2527 2528 } else if (status == 0) { 2529 2530 /* no match */ 2531 Py_INCREF(Py_None); 2532 return Py_None; 2533 2534 } 2535 2536 /* internal error */ 2537 pattern_error(status); 2538 return NULL; 2539 } 2540 2541 2542 /* -------------------------------------------------------------------- */ 2543 /* scanner methods (experimental) */ 2544 2545 static void 2546 scanner_dealloc(ScannerObject* self) 2547 { 2548 state_fini(&self->state); 2549 Py_XDECREF(self->pattern); 2550 PyObject_DEL(self); 2551 } 2552 2553 /*[clinic input] 2554 _sre.SRE_Scanner.match 2555 2556 [clinic start generated code]*/ 2557 2558 static PyObject * 2559 _sre_SRE_Scanner_match_impl(ScannerObject *self) 2560 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/ 2561 { 2562 SRE_STATE* state = &self->state; 2563 PyObject* match; 2564 Py_ssize_t status; 2565 2566 if (state->start == NULL) 2567 Py_RETURN_NONE; 2568 2569 state_reset(state); 2570 2571 state->ptr = state->start; 2572 2573 status = sre_match(state, PatternObject_GetCode(self->pattern), 0); 2574 if (PyErr_Occurred()) 2575 return NULL; 2576 2577 match = pattern_new_match((PatternObject*) self->pattern, 2578 state, status); 2579 2580 if (status == 0) 2581 state->start = NULL; 2582 else if (state->ptr != state->start) 2583 state->start = state->ptr; 2584 else if (state->ptr != state->end) 2585 state->start = (void*) ((char*) state->ptr + state->charsize); 2586 else 2587 state->start = NULL; 2588 2589 return match; 2590 } 2591 2592 2593 /*[clinic input] 2594 _sre.SRE_Scanner.search 2595 2596 [clinic start generated code]*/ 2597 2598 static PyObject * 2599 _sre_SRE_Scanner_search_impl(ScannerObject *self) 2600 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/ 2601 { 2602 SRE_STATE* state = &self->state; 2603 PyObject* match; 2604 Py_ssize_t status; 2605 2606 if (state->start == NULL) 2607 Py_RETURN_NONE; 2608 2609 state_reset(state); 2610 2611 state->ptr = state->start; 2612 2613 status = sre_search(state, PatternObject_GetCode(self->pattern)); 2614 if (PyErr_Occurred()) 2615 return NULL; 2616 2617 match = pattern_new_match((PatternObject*) self->pattern, 2618 state, status); 2619 2620 if (status == 0) 2621 state->start = NULL; 2622 else if (state->ptr != state->start) 2623 state->start = state->ptr; 2624 else if (state->ptr != state->end) 2625 state->start = (void*) ((char*) state->ptr + state->charsize); 2626 else 2627 state->start = NULL; 2628 2629 return match; 2630 } 2631 2632 static PyObject * 2633 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos) 2634 { 2635 ScannerObject* scanner; 2636 2637 /* create scanner object */ 2638 scanner = PyObject_NEW(ScannerObject, &Scanner_Type); 2639 if (!scanner) 2640 return NULL; 2641 scanner->pattern = NULL; 2642 2643 /* create search state object */ 2644 if (!state_init(&scanner->state, self, string, pos, endpos)) { 2645 Py_DECREF(scanner); 2646 return NULL; 2647 } 2648 2649 Py_INCREF(self); 2650 scanner->pattern = (PyObject*) self; 2651 2652 return (PyObject*) scanner; 2653 } 2654 2655 static Py_hash_t 2656 pattern_hash(PatternObject *self) 2657 { 2658 Py_hash_t hash, hash2; 2659 2660 hash = PyObject_Hash(self->pattern); 2661 if (hash == -1) { 2662 return -1; 2663 } 2664 2665 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); 2666 hash ^= hash2; 2667 2668 hash ^= self->flags; 2669 hash ^= self->isbytes; 2670 hash ^= self->codesize; 2671 2672 if (hash == -1) { 2673 hash = -2; 2674 } 2675 return hash; 2676 } 2677 2678 static PyObject* 2679 pattern_richcompare(PyObject *lefto, PyObject *righto, int op) 2680 { 2681 PatternObject *left, *right; 2682 int cmp; 2683 2684 if (op != Py_EQ && op != Py_NE) { 2685 Py_RETURN_NOTIMPLEMENTED; 2686 } 2687 2688 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { 2689 Py_RETURN_NOTIMPLEMENTED; 2690 } 2691 2692 if (lefto == righto) { 2693 /* a pattern is equal to itself */ 2694 return PyBool_FromLong(op == Py_EQ); 2695 } 2696 2697 left = (PatternObject *)lefto; 2698 right = (PatternObject *)righto; 2699 2700 cmp = (left->flags == right->flags 2701 && left->isbytes == right->isbytes 2702 && left->codesize == right->codesize); 2703 if (cmp) { 2704 /* Compare the code and the pattern because the same pattern can 2705 produce different codes depending on the locale used to compile the 2706 pattern when the re.LOCALE flag is used. Don't compare groups, 2707 indexgroup nor groupindex: they are derivated from the pattern. */ 2708 cmp = (memcmp(left->code, right->code, 2709 sizeof(left->code[0]) * left->codesize) == 0); 2710 } 2711 if (cmp) { 2712 cmp = PyObject_RichCompareBool(left->pattern, right->pattern, 2713 Py_EQ); 2714 if (cmp < 0) { 2715 return NULL; 2716 } 2717 } 2718 if (op == Py_NE) { 2719 cmp = !cmp; 2720 } 2721 return PyBool_FromLong(cmp); 2722 } 2723 2724 #include "clinic/_sre.c.h" 2725 2726 static PyMethodDef pattern_methods[] = { 2727 _SRE_SRE_PATTERN_MATCH_METHODDEF 2728 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF 2729 _SRE_SRE_PATTERN_SEARCH_METHODDEF 2730 _SRE_SRE_PATTERN_SUB_METHODDEF 2731 _SRE_SRE_PATTERN_SUBN_METHODDEF 2732 _SRE_SRE_PATTERN_FINDALL_METHODDEF 2733 _SRE_SRE_PATTERN_SPLIT_METHODDEF 2734 _SRE_SRE_PATTERN_FINDITER_METHODDEF 2735 _SRE_SRE_PATTERN_SCANNER_METHODDEF 2736 _SRE_SRE_PATTERN___COPY___METHODDEF 2737 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF 2738 {NULL, NULL} 2739 }; 2740 2741 static PyGetSetDef pattern_getset[] = { 2742 {"groupindex", (getter)pattern_groupindex, (setter)NULL, 2743 "A dictionary mapping group names to group numbers."}, 2744 {NULL} /* Sentinel */ 2745 }; 2746 2747 #define PAT_OFF(x) offsetof(PatternObject, x) 2748 static PyMemberDef pattern_members[] = { 2749 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, 2750 {"flags", T_INT, PAT_OFF(flags), READONLY}, 2751 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, 2752 {NULL} /* Sentinel */ 2753 }; 2754 2755 static PyTypeObject Pattern_Type = { 2756 PyVarObject_HEAD_INIT(NULL, 0) 2757 "_" SRE_MODULE ".SRE_Pattern", 2758 sizeof(PatternObject), sizeof(SRE_CODE), 2759 (destructor)pattern_dealloc, /* tp_dealloc */ 2760 0, /* tp_print */ 2761 0, /* tp_getattr */ 2762 0, /* tp_setattr */ 2763 0, /* tp_reserved */ 2764 (reprfunc)pattern_repr, /* tp_repr */ 2765 0, /* tp_as_number */ 2766 0, /* tp_as_sequence */ 2767 0, /* tp_as_mapping */ 2768 (hashfunc)pattern_hash, /* tp_hash */ 2769 0, /* tp_call */ 2770 0, /* tp_str */ 2771 0, /* tp_getattro */ 2772 0, /* tp_setattro */ 2773 0, /* tp_as_buffer */ 2774 Py_TPFLAGS_DEFAULT, /* tp_flags */ 2775 pattern_doc, /* tp_doc */ 2776 0, /* tp_traverse */ 2777 0, /* tp_clear */ 2778 pattern_richcompare, /* tp_richcompare */ 2779 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 2780 0, /* tp_iter */ 2781 0, /* tp_iternext */ 2782 pattern_methods, /* tp_methods */ 2783 pattern_members, /* tp_members */ 2784 pattern_getset, /* tp_getset */ 2785 }; 2786 2787 /* Match objects do not support length or assignment, but do support 2788 __getitem__. */ 2789 static PyMappingMethods match_as_mapping = { 2790 NULL, 2791 (binaryfunc)match_getitem, 2792 NULL 2793 }; 2794 2795 static PyMethodDef match_methods[] = { 2796 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, 2797 _SRE_SRE_MATCH_START_METHODDEF 2798 _SRE_SRE_MATCH_END_METHODDEF 2799 _SRE_SRE_MATCH_SPAN_METHODDEF 2800 _SRE_SRE_MATCH_GROUPS_METHODDEF 2801 _SRE_SRE_MATCH_GROUPDICT_METHODDEF 2802 _SRE_SRE_MATCH_EXPAND_METHODDEF 2803 _SRE_SRE_MATCH___COPY___METHODDEF 2804 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF 2805 {NULL, NULL} 2806 }; 2807 2808 static PyGetSetDef match_getset[] = { 2809 {"lastindex", (getter)match_lastindex_get, (setter)NULL}, 2810 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, 2811 {"regs", (getter)match_regs_get, (setter)NULL}, 2812 {NULL} 2813 }; 2814 2815 #define MATCH_OFF(x) offsetof(MatchObject, x) 2816 static PyMemberDef match_members[] = { 2817 {"string", T_OBJECT, MATCH_OFF(string), READONLY}, 2818 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, 2819 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, 2820 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, 2821 {NULL} 2822 }; 2823 2824 /* FIXME: implement setattr("string", None) as a special case (to 2825 detach the associated string, if any */ 2826 2827 static PyTypeObject Match_Type = { 2828 PyVarObject_HEAD_INIT(NULL,0) 2829 "_" SRE_MODULE ".SRE_Match", 2830 sizeof(MatchObject), sizeof(Py_ssize_t), 2831 (destructor)match_dealloc, /* tp_dealloc */ 2832 0, /* tp_print */ 2833 0, /* tp_getattr */ 2834 0, /* tp_setattr */ 2835 0, /* tp_reserved */ 2836 (reprfunc)match_repr, /* tp_repr */ 2837 0, /* tp_as_number */ 2838 0, /* tp_as_sequence */ 2839 &match_as_mapping, /* tp_as_mapping */ 2840 0, /* tp_hash */ 2841 0, /* tp_call */ 2842 0, /* tp_str */ 2843 0, /* tp_getattro */ 2844 0, /* tp_setattro */ 2845 0, /* tp_as_buffer */ 2846 Py_TPFLAGS_DEFAULT, /* tp_flags */ 2847 match_doc, /* tp_doc */ 2848 0, /* tp_traverse */ 2849 0, /* tp_clear */ 2850 0, /* tp_richcompare */ 2851 0, /* tp_weaklistoffset */ 2852 0, /* tp_iter */ 2853 0, /* tp_iternext */ 2854 match_methods, /* tp_methods */ 2855 match_members, /* tp_members */ 2856 match_getset, /* tp_getset */ 2857 }; 2858 2859 static PyMethodDef scanner_methods[] = { 2860 _SRE_SRE_SCANNER_MATCH_METHODDEF 2861 _SRE_SRE_SCANNER_SEARCH_METHODDEF 2862 {NULL, NULL} 2863 }; 2864 2865 #define SCAN_OFF(x) offsetof(ScannerObject, x) 2866 static PyMemberDef scanner_members[] = { 2867 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, 2868 {NULL} /* Sentinel */ 2869 }; 2870 2871 static PyTypeObject Scanner_Type = { 2872 PyVarObject_HEAD_INIT(NULL, 0) 2873 "_" SRE_MODULE ".SRE_Scanner", 2874 sizeof(ScannerObject), 0, 2875 (destructor)scanner_dealloc,/* tp_dealloc */ 2876 0, /* tp_print */ 2877 0, /* tp_getattr */ 2878 0, /* tp_setattr */ 2879 0, /* tp_reserved */ 2880 0, /* tp_repr */ 2881 0, /* tp_as_number */ 2882 0, /* tp_as_sequence */ 2883 0, /* tp_as_mapping */ 2884 0, /* tp_hash */ 2885 0, /* tp_call */ 2886 0, /* tp_str */ 2887 0, /* tp_getattro */ 2888 0, /* tp_setattro */ 2889 0, /* tp_as_buffer */ 2890 Py_TPFLAGS_DEFAULT, /* tp_flags */ 2891 0, /* tp_doc */ 2892 0, /* tp_traverse */ 2893 0, /* tp_clear */ 2894 0, /* tp_richcompare */ 2895 0, /* tp_weaklistoffset */ 2896 0, /* tp_iter */ 2897 0, /* tp_iternext */ 2898 scanner_methods, /* tp_methods */ 2899 scanner_members, /* tp_members */ 2900 0, /* tp_getset */ 2901 }; 2902 2903 static PyMethodDef _functions[] = { 2904 _SRE_COMPILE_METHODDEF 2905 _SRE_GETCODESIZE_METHODDEF 2906 _SRE_GETLOWER_METHODDEF 2907 {NULL, NULL} 2908 }; 2909 2910 static struct PyModuleDef sremodule = { 2911 PyModuleDef_HEAD_INIT, 2912 "_" SRE_MODULE, 2913 NULL, 2914 -1, 2915 _functions, 2916 NULL, 2917 NULL, 2918 NULL, 2919 NULL 2920 }; 2921 2922 PyMODINIT_FUNC PyInit__sre(void) 2923 { 2924 PyObject* m; 2925 PyObject* d; 2926 PyObject* x; 2927 2928 /* Patch object types */ 2929 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) || 2930 PyType_Ready(&Scanner_Type)) 2931 return NULL; 2932 2933 m = PyModule_Create(&sremodule); 2934 if (m == NULL) 2935 return NULL; 2936 d = PyModule_GetDict(m); 2937 2938 x = PyLong_FromLong(SRE_MAGIC); 2939 if (x) { 2940 PyDict_SetItemString(d, "MAGIC", x); 2941 Py_DECREF(x); 2942 } 2943 2944 x = PyLong_FromLong(sizeof(SRE_CODE)); 2945 if (x) { 2946 PyDict_SetItemString(d, "CODESIZE", x); 2947 Py_DECREF(x); 2948 } 2949 2950 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT); 2951 if (x) { 2952 PyDict_SetItemString(d, "MAXREPEAT", x); 2953 Py_DECREF(x); 2954 } 2955 2956 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS); 2957 if (x) { 2958 PyDict_SetItemString(d, "MAXGROUPS", x); 2959 Py_DECREF(x); 2960 } 2961 2962 x = PyUnicode_FromString(copyright); 2963 if (x) { 2964 PyDict_SetItemString(d, "copyright", x); 2965 Py_DECREF(x); 2966 } 2967 return m; 2968 } 2969 2970 /* vim:ts=4:sw=4:et 2971 */ 2972