1 /*-------------------------------------------------------------------- 2 * Licensed to PSF under a Contributor Agreement. 3 * See http://www.python.org/psf/license for licensing details. 4 * 5 * _elementtree - C accelerator for xml.etree.ElementTree 6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. 7 * Copyright (c) 1999-2009 by Fredrik Lundh. 8 * 9 * info (at) pythonware.com 10 * http://www.pythonware.com 11 *-------------------------------------------------------------------- 12 */ 13 14 #define PY_SSIZE_T_CLEAN 15 16 #include "Python.h" 17 #include "structmember.h" 18 19 /* -------------------------------------------------------------------- */ 20 /* configuration */ 21 22 /* An element can hold this many children without extra memory 23 allocations. */ 24 #define STATIC_CHILDREN 4 25 26 /* For best performance, chose a value so that 80-90% of all nodes 27 have no more than the given number of children. Set this to zero 28 to minimize the size of the element structure itself (this only 29 helps if you have lots of leaf nodes with attributes). */ 30 31 /* Also note that pymalloc always allocates blocks in multiples of 32 eight bytes. For the current C version of ElementTree, this means 33 that the number of children should be an even number, at least on 34 32-bit platforms. */ 35 36 /* -------------------------------------------------------------------- */ 37 38 #if 0 39 static int memory = 0; 40 #define ALLOC(size, comment)\ 41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) 42 #define RELEASE(size, comment)\ 43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) 44 #else 45 #define ALLOC(size, comment) 46 #define RELEASE(size, comment) 47 #endif 48 49 /* compiler tweaks */ 50 #if defined(_MSC_VER) 51 #define LOCAL(type) static __inline type __fastcall 52 #else 53 #define LOCAL(type) static type 54 #endif 55 56 /* macros used to store 'join' flags in string object pointers. note 57 that all use of text and tail as object pointers must be wrapped in 58 JOIN_OBJ. see comments in the ElementObject definition for more 59 info. */ 60 #define JOIN_GET(p) ((uintptr_t) (p) & 1) 61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag))) 62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1)) 63 64 /* Py_SETREF for a PyObject* that uses a join flag. */ 65 Py_LOCAL_INLINE(void) 66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr) 67 { 68 PyObject *tmp = JOIN_OBJ(*p); 69 *p = new_joined_ptr; 70 Py_DECREF(tmp); 71 } 72 73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by 74 * reference since this function sets it to NULL. 75 */ 76 static void _clear_joined_ptr(PyObject **p) 77 { 78 if (*p) { 79 _set_joined_ptr(p, NULL); 80 } 81 } 82 83 /* Types defined by this extension */ 84 static PyTypeObject Element_Type; 85 static PyTypeObject ElementIter_Type; 86 static PyTypeObject TreeBuilder_Type; 87 static PyTypeObject XMLParser_Type; 88 89 90 /* Per-module state; PEP 3121 */ 91 typedef struct { 92 PyObject *parseerror_obj; 93 PyObject *deepcopy_obj; 94 PyObject *elementpath_obj; 95 } elementtreestate; 96 97 static struct PyModuleDef elementtreemodule; 98 99 /* Given a module object (assumed to be _elementtree), get its per-module 100 * state. 101 */ 102 #define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod)) 103 104 /* Find the module instance imported in the currently running sub-interpreter 105 * and get its state. 106 */ 107 #define ET_STATE_GLOBAL \ 108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule))) 109 110 static int 111 elementtree_clear(PyObject *m) 112 { 113 elementtreestate *st = ET_STATE(m); 114 Py_CLEAR(st->parseerror_obj); 115 Py_CLEAR(st->deepcopy_obj); 116 Py_CLEAR(st->elementpath_obj); 117 return 0; 118 } 119 120 static int 121 elementtree_traverse(PyObject *m, visitproc visit, void *arg) 122 { 123 elementtreestate *st = ET_STATE(m); 124 Py_VISIT(st->parseerror_obj); 125 Py_VISIT(st->deepcopy_obj); 126 Py_VISIT(st->elementpath_obj); 127 return 0; 128 } 129 130 static void 131 elementtree_free(void *m) 132 { 133 elementtree_clear((PyObject *)m); 134 } 135 136 /* helpers */ 137 138 LOCAL(PyObject*) 139 list_join(PyObject* list) 140 { 141 /* join list elements */ 142 PyObject* joiner; 143 PyObject* result; 144 145 joiner = PyUnicode_FromStringAndSize("", 0); 146 if (!joiner) 147 return NULL; 148 result = PyUnicode_Join(joiner, list); 149 Py_DECREF(joiner); 150 return result; 151 } 152 153 /* Is the given object an empty dictionary? 154 */ 155 static int 156 is_empty_dict(PyObject *obj) 157 { 158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0; 159 } 160 161 162 /* -------------------------------------------------------------------- */ 163 /* the Element type */ 164 165 typedef struct { 166 167 /* attributes (a dictionary object), or None if no attributes */ 168 PyObject* attrib; 169 170 /* child elements */ 171 Py_ssize_t length; /* actual number of items */ 172 Py_ssize_t allocated; /* allocated items */ 173 174 /* this either points to _children or to a malloced buffer */ 175 PyObject* *children; 176 177 PyObject* _children[STATIC_CHILDREN]; 178 179 } ElementObjectExtra; 180 181 typedef struct { 182 PyObject_HEAD 183 184 /* element tag (a string). */ 185 PyObject* tag; 186 187 /* text before first child. note that this is a tagged pointer; 188 use JOIN_OBJ to get the object pointer. the join flag is used 189 to distinguish lists created by the tree builder from lists 190 assigned to the attribute by application code; the former 191 should be joined before being returned to the user, the latter 192 should be left intact. */ 193 PyObject* text; 194 195 /* text after this element, in parent. note that this is a tagged 196 pointer; use JOIN_OBJ to get the object pointer. */ 197 PyObject* tail; 198 199 ElementObjectExtra* extra; 200 201 PyObject *weakreflist; /* For tp_weaklistoffset */ 202 203 } ElementObject; 204 205 206 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) 207 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type) 208 209 210 /* -------------------------------------------------------------------- */ 211 /* Element constructors and destructor */ 212 213 LOCAL(int) 214 create_extra(ElementObject* self, PyObject* attrib) 215 { 216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); 217 if (!self->extra) { 218 PyErr_NoMemory(); 219 return -1; 220 } 221 222 if (!attrib) 223 attrib = Py_None; 224 225 Py_INCREF(attrib); 226 self->extra->attrib = attrib; 227 228 self->extra->length = 0; 229 self->extra->allocated = STATIC_CHILDREN; 230 self->extra->children = self->extra->_children; 231 232 return 0; 233 } 234 235 LOCAL(void) 236 dealloc_extra(ElementObjectExtra *extra) 237 { 238 Py_ssize_t i; 239 240 if (!extra) 241 return; 242 243 Py_DECREF(extra->attrib); 244 245 for (i = 0; i < extra->length; i++) 246 Py_DECREF(extra->children[i]); 247 248 if (extra->children != extra->_children) 249 PyObject_Free(extra->children); 250 251 PyObject_Free(extra); 252 } 253 254 LOCAL(void) 255 clear_extra(ElementObject* self) 256 { 257 ElementObjectExtra *myextra; 258 259 if (!self->extra) 260 return; 261 262 /* Avoid DECREFs calling into this code again (cycles, etc.) 263 */ 264 myextra = self->extra; 265 self->extra = NULL; 266 267 dealloc_extra(myextra); 268 } 269 270 /* Convenience internal function to create new Element objects with the given 271 * tag and attributes. 272 */ 273 LOCAL(PyObject*) 274 create_new_element(PyObject* tag, PyObject* attrib) 275 { 276 ElementObject* self; 277 278 self = PyObject_GC_New(ElementObject, &Element_Type); 279 if (self == NULL) 280 return NULL; 281 self->extra = NULL; 282 283 Py_INCREF(tag); 284 self->tag = tag; 285 286 Py_INCREF(Py_None); 287 self->text = Py_None; 288 289 Py_INCREF(Py_None); 290 self->tail = Py_None; 291 292 self->weakreflist = NULL; 293 294 ALLOC(sizeof(ElementObject), "create element"); 295 PyObject_GC_Track(self); 296 297 if (attrib != Py_None && !is_empty_dict(attrib)) { 298 if (create_extra(self, attrib) < 0) { 299 Py_DECREF(self); 300 return NULL; 301 } 302 } 303 304 return (PyObject*) self; 305 } 306 307 static PyObject * 308 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 309 { 310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); 311 if (e != NULL) { 312 Py_INCREF(Py_None); 313 e->tag = Py_None; 314 315 Py_INCREF(Py_None); 316 e->text = Py_None; 317 318 Py_INCREF(Py_None); 319 e->tail = Py_None; 320 321 e->extra = NULL; 322 e->weakreflist = NULL; 323 } 324 return (PyObject *)e; 325 } 326 327 /* Helper function for extracting the attrib dictionary from a keywords dict. 328 * This is required by some constructors/functions in this module that can 329 * either accept attrib as a keyword argument or all attributes splashed 330 * directly into *kwds. 331 * 332 * Return a dictionary with the content of kwds merged into the content of 333 * attrib. If there is no attrib keyword, return a copy of kwds. 334 */ 335 static PyObject* 336 get_attrib_from_keywords(PyObject *kwds) 337 { 338 PyObject *attrib_str = PyUnicode_FromString("attrib"); 339 if (attrib_str == NULL) { 340 return NULL; 341 } 342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str); 343 344 if (attrib) { 345 /* If attrib was found in kwds, copy its value and remove it from 346 * kwds 347 */ 348 if (!PyDict_Check(attrib)) { 349 Py_DECREF(attrib_str); 350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", 351 Py_TYPE(attrib)->tp_name); 352 return NULL; 353 } 354 attrib = PyDict_Copy(attrib); 355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) { 356 Py_DECREF(attrib); 357 attrib = NULL; 358 } 359 } else { 360 attrib = PyDict_New(); 361 } 362 363 Py_DECREF(attrib_str); 364 365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) { 366 Py_DECREF(attrib); 367 return NULL; 368 } 369 return attrib; 370 } 371 372 /*[clinic input] 373 module _elementtree 374 class _elementtree.Element "ElementObject *" "&Element_Type" 375 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type" 376 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type" 377 [clinic start generated code]*/ 378 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/ 379 380 static int 381 element_init(PyObject *self, PyObject *args, PyObject *kwds) 382 { 383 PyObject *tag; 384 PyObject *attrib = NULL; 385 ElementObject *self_elem; 386 387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) 388 return -1; 389 390 if (attrib) { 391 /* attrib passed as positional arg */ 392 attrib = PyDict_Copy(attrib); 393 if (!attrib) 394 return -1; 395 if (kwds) { 396 if (PyDict_Update(attrib, kwds) < 0) { 397 Py_DECREF(attrib); 398 return -1; 399 } 400 } 401 } else if (kwds) { 402 /* have keywords args */ 403 attrib = get_attrib_from_keywords(kwds); 404 if (!attrib) 405 return -1; 406 } 407 408 self_elem = (ElementObject *)self; 409 410 if (attrib != NULL && !is_empty_dict(attrib)) { 411 if (create_extra(self_elem, attrib) < 0) { 412 Py_DECREF(attrib); 413 return -1; 414 } 415 } 416 417 /* We own a reference to attrib here and it's no longer needed. */ 418 Py_XDECREF(attrib); 419 420 /* Replace the objects already pointed to by tag, text and tail. */ 421 Py_INCREF(tag); 422 Py_XSETREF(self_elem->tag, tag); 423 424 Py_INCREF(Py_None); 425 _set_joined_ptr(&self_elem->text, Py_None); 426 427 Py_INCREF(Py_None); 428 _set_joined_ptr(&self_elem->tail, Py_None); 429 430 return 0; 431 } 432 433 LOCAL(int) 434 element_resize(ElementObject* self, Py_ssize_t extra) 435 { 436 Py_ssize_t size; 437 PyObject* *children; 438 439 assert(extra >= 0); 440 /* make sure self->children can hold the given number of extra 441 elements. set an exception and return -1 if allocation failed */ 442 443 if (!self->extra) { 444 if (create_extra(self, NULL) < 0) 445 return -1; 446 } 447 448 size = self->extra->length + extra; /* never overflows */ 449 450 if (size > self->extra->allocated) { 451 /* use Python 2.4's list growth strategy */ 452 size = (size >> 3) + (size < 9 ? 3 : 6) + size; 453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" 454 * which needs at least 4 bytes. 455 * Although it's a false alarm always assume at least one child to 456 * be safe. 457 */ 458 size = size ? size : 1; 459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*)) 460 goto nomemory; 461 if (self->extra->children != self->extra->_children) { 462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer 463 * "children", which needs at least 4 bytes. Although it's a 464 * false alarm always assume at least one child to be safe. 465 */ 466 children = PyObject_Realloc(self->extra->children, 467 size * sizeof(PyObject*)); 468 if (!children) 469 goto nomemory; 470 } else { 471 children = PyObject_Malloc(size * sizeof(PyObject*)); 472 if (!children) 473 goto nomemory; 474 /* copy existing children from static area to malloc buffer */ 475 memcpy(children, self->extra->children, 476 self->extra->length * sizeof(PyObject*)); 477 } 478 self->extra->children = children; 479 self->extra->allocated = size; 480 } 481 482 return 0; 483 484 nomemory: 485 PyErr_NoMemory(); 486 return -1; 487 } 488 489 LOCAL(int) 490 element_add_subelement(ElementObject* self, PyObject* element) 491 { 492 /* add a child element to a parent */ 493 494 if (element_resize(self, 1) < 0) 495 return -1; 496 497 Py_INCREF(element); 498 self->extra->children[self->extra->length] = element; 499 500 self->extra->length++; 501 502 return 0; 503 } 504 505 LOCAL(PyObject*) 506 element_get_attrib(ElementObject* self) 507 { 508 /* return borrowed reference to attrib dictionary */ 509 /* note: this function assumes that the extra section exists */ 510 511 PyObject* res = self->extra->attrib; 512 513 if (res == Py_None) { 514 /* create missing dictionary */ 515 res = PyDict_New(); 516 if (!res) 517 return NULL; 518 Py_DECREF(Py_None); 519 self->extra->attrib = res; 520 } 521 522 return res; 523 } 524 525 LOCAL(PyObject*) 526 element_get_text(ElementObject* self) 527 { 528 /* return borrowed reference to text attribute */ 529 530 PyObject *res = self->text; 531 532 if (JOIN_GET(res)) { 533 res = JOIN_OBJ(res); 534 if (PyList_CheckExact(res)) { 535 PyObject *tmp = list_join(res); 536 if (!tmp) 537 return NULL; 538 self->text = tmp; 539 Py_DECREF(res); 540 res = tmp; 541 } 542 } 543 544 return res; 545 } 546 547 LOCAL(PyObject*) 548 element_get_tail(ElementObject* self) 549 { 550 /* return borrowed reference to text attribute */ 551 552 PyObject *res = self->tail; 553 554 if (JOIN_GET(res)) { 555 res = JOIN_OBJ(res); 556 if (PyList_CheckExact(res)) { 557 PyObject *tmp = list_join(res); 558 if (!tmp) 559 return NULL; 560 self->tail = tmp; 561 Py_DECREF(res); 562 res = tmp; 563 } 564 } 565 566 return res; 567 } 568 569 static PyObject* 570 subelement(PyObject *self, PyObject *args, PyObject *kwds) 571 { 572 PyObject* elem; 573 574 ElementObject* parent; 575 PyObject* tag; 576 PyObject* attrib = NULL; 577 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", 578 &Element_Type, &parent, &tag, 579 &PyDict_Type, &attrib)) { 580 return NULL; 581 } 582 583 if (attrib) { 584 /* attrib passed as positional arg */ 585 attrib = PyDict_Copy(attrib); 586 if (!attrib) 587 return NULL; 588 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) { 589 Py_DECREF(attrib); 590 return NULL; 591 } 592 } else if (kwds) { 593 /* have keyword args */ 594 attrib = get_attrib_from_keywords(kwds); 595 if (!attrib) 596 return NULL; 597 } else { 598 /* no attrib arg, no kwds, so no attribute */ 599 Py_INCREF(Py_None); 600 attrib = Py_None; 601 } 602 603 elem = create_new_element(tag, attrib); 604 Py_DECREF(attrib); 605 if (elem == NULL) 606 return NULL; 607 608 if (element_add_subelement(parent, elem) < 0) { 609 Py_DECREF(elem); 610 return NULL; 611 } 612 613 return elem; 614 } 615 616 static int 617 element_gc_traverse(ElementObject *self, visitproc visit, void *arg) 618 { 619 Py_VISIT(self->tag); 620 Py_VISIT(JOIN_OBJ(self->text)); 621 Py_VISIT(JOIN_OBJ(self->tail)); 622 623 if (self->extra) { 624 Py_ssize_t i; 625 Py_VISIT(self->extra->attrib); 626 627 for (i = 0; i < self->extra->length; ++i) 628 Py_VISIT(self->extra->children[i]); 629 } 630 return 0; 631 } 632 633 static int 634 element_gc_clear(ElementObject *self) 635 { 636 Py_CLEAR(self->tag); 637 _clear_joined_ptr(&self->text); 638 _clear_joined_ptr(&self->tail); 639 640 /* After dropping all references from extra, it's no longer valid anyway, 641 * so fully deallocate it. 642 */ 643 clear_extra(self); 644 return 0; 645 } 646 647 static void 648 element_dealloc(ElementObject* self) 649 { 650 /* bpo-31095: UnTrack is needed before calling any callbacks */ 651 PyObject_GC_UnTrack(self); 652 Py_TRASHCAN_SAFE_BEGIN(self) 653 654 if (self->weakreflist != NULL) 655 PyObject_ClearWeakRefs((PyObject *) self); 656 657 /* element_gc_clear clears all references and deallocates extra 658 */ 659 element_gc_clear(self); 660 661 RELEASE(sizeof(ElementObject), "destroy element"); 662 Py_TYPE(self)->tp_free((PyObject *)self); 663 Py_TRASHCAN_SAFE_END(self) 664 } 665 666 /* -------------------------------------------------------------------- */ 667 668 /*[clinic input] 669 _elementtree.Element.append 670 671 subelement: object(subclass_of='&Element_Type') 672 / 673 674 [clinic start generated code]*/ 675 676 static PyObject * 677 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement) 678 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/ 679 { 680 if (element_add_subelement(self, subelement) < 0) 681 return NULL; 682 683 Py_RETURN_NONE; 684 } 685 686 /*[clinic input] 687 _elementtree.Element.clear 688 689 [clinic start generated code]*/ 690 691 static PyObject * 692 _elementtree_Element_clear_impl(ElementObject *self) 693 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/ 694 { 695 clear_extra(self); 696 697 Py_INCREF(Py_None); 698 _set_joined_ptr(&self->text, Py_None); 699 700 Py_INCREF(Py_None); 701 _set_joined_ptr(&self->tail, Py_None); 702 703 Py_RETURN_NONE; 704 } 705 706 /*[clinic input] 707 _elementtree.Element.__copy__ 708 709 [clinic start generated code]*/ 710 711 static PyObject * 712 _elementtree_Element___copy___impl(ElementObject *self) 713 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/ 714 { 715 Py_ssize_t i; 716 ElementObject* element; 717 718 element = (ElementObject*) create_new_element( 719 self->tag, (self->extra) ? self->extra->attrib : Py_None); 720 if (!element) 721 return NULL; 722 723 Py_INCREF(JOIN_OBJ(self->text)); 724 _set_joined_ptr(&element->text, self->text); 725 726 Py_INCREF(JOIN_OBJ(self->tail)); 727 _set_joined_ptr(&element->tail, self->tail); 728 729 assert(!element->extra || !element->extra->length); 730 if (self->extra) { 731 if (element_resize(element, self->extra->length) < 0) { 732 Py_DECREF(element); 733 return NULL; 734 } 735 736 for (i = 0; i < self->extra->length; i++) { 737 Py_INCREF(self->extra->children[i]); 738 element->extra->children[i] = self->extra->children[i]; 739 } 740 741 assert(!element->extra->length); 742 element->extra->length = self->extra->length; 743 } 744 745 return (PyObject*) element; 746 } 747 748 /* Helper for a deep copy. */ 749 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *); 750 751 /*[clinic input] 752 _elementtree.Element.__deepcopy__ 753 754 memo: object(subclass_of="&PyDict_Type") 755 / 756 757 [clinic start generated code]*/ 758 759 static PyObject * 760 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) 761 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/ 762 { 763 Py_ssize_t i; 764 ElementObject* element; 765 PyObject* tag; 766 PyObject* attrib; 767 PyObject* text; 768 PyObject* tail; 769 PyObject* id; 770 771 tag = deepcopy(self->tag, memo); 772 if (!tag) 773 return NULL; 774 775 if (self->extra) { 776 attrib = deepcopy(self->extra->attrib, memo); 777 if (!attrib) { 778 Py_DECREF(tag); 779 return NULL; 780 } 781 } else { 782 Py_INCREF(Py_None); 783 attrib = Py_None; 784 } 785 786 element = (ElementObject*) create_new_element(tag, attrib); 787 788 Py_DECREF(tag); 789 Py_DECREF(attrib); 790 791 if (!element) 792 return NULL; 793 794 text = deepcopy(JOIN_OBJ(self->text), memo); 795 if (!text) 796 goto error; 797 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); 798 799 tail = deepcopy(JOIN_OBJ(self->tail), memo); 800 if (!tail) 801 goto error; 802 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); 803 804 assert(!element->extra || !element->extra->length); 805 if (self->extra) { 806 if (element_resize(element, self->extra->length) < 0) 807 goto error; 808 809 for (i = 0; i < self->extra->length; i++) { 810 PyObject* child = deepcopy(self->extra->children[i], memo); 811 if (!child) { 812 element->extra->length = i; 813 goto error; 814 } 815 element->extra->children[i] = child; 816 } 817 818 assert(!element->extra->length); 819 element->extra->length = self->extra->length; 820 } 821 822 /* add object to memo dictionary (so deepcopy won't visit it again) */ 823 id = PyLong_FromSsize_t((uintptr_t) self); 824 if (!id) 825 goto error; 826 827 i = PyDict_SetItem(memo, id, (PyObject*) element); 828 829 Py_DECREF(id); 830 831 if (i < 0) 832 goto error; 833 834 return (PyObject*) element; 835 836 error: 837 Py_DECREF(element); 838 return NULL; 839 } 840 841 LOCAL(PyObject *) 842 deepcopy(PyObject *object, PyObject *memo) 843 { 844 /* do a deep copy of the given object */ 845 elementtreestate *st; 846 PyObject *stack[2]; 847 848 /* Fast paths */ 849 if (object == Py_None || PyUnicode_CheckExact(object)) { 850 Py_INCREF(object); 851 return object; 852 } 853 854 if (Py_REFCNT(object) == 1) { 855 if (PyDict_CheckExact(object)) { 856 PyObject *key, *value; 857 Py_ssize_t pos = 0; 858 int simple = 1; 859 while (PyDict_Next(object, &pos, &key, &value)) { 860 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) { 861 simple = 0; 862 break; 863 } 864 } 865 if (simple) 866 return PyDict_Copy(object); 867 /* Fall through to general case */ 868 } 869 else if (Element_CheckExact(object)) { 870 return _elementtree_Element___deepcopy___impl( 871 (ElementObject *)object, memo); 872 } 873 } 874 875 /* General case */ 876 st = ET_STATE_GLOBAL; 877 if (!st->deepcopy_obj) { 878 PyErr_SetString(PyExc_RuntimeError, 879 "deepcopy helper not found"); 880 return NULL; 881 } 882 883 stack[0] = object; 884 stack[1] = memo; 885 return _PyObject_FastCall(st->deepcopy_obj, stack, 2); 886 } 887 888 889 /*[clinic input] 890 _elementtree.Element.__sizeof__ -> Py_ssize_t 891 892 [clinic start generated code]*/ 893 894 static Py_ssize_t 895 _elementtree_Element___sizeof___impl(ElementObject *self) 896 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/ 897 { 898 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self)); 899 if (self->extra) { 900 result += sizeof(ElementObjectExtra); 901 if (self->extra->children != self->extra->_children) 902 result += sizeof(PyObject*) * self->extra->allocated; 903 } 904 return result; 905 } 906 907 /* dict keys for getstate/setstate. */ 908 #define PICKLED_TAG "tag" 909 #define PICKLED_CHILDREN "_children" 910 #define PICKLED_ATTRIB "attrib" 911 #define PICKLED_TAIL "tail" 912 #define PICKLED_TEXT "text" 913 914 /* __getstate__ returns a fabricated instance dict as in the pure-Python 915 * Element implementation, for interoperability/interchangeability. This 916 * makes the pure-Python implementation details an API, but (a) there aren't 917 * any unnecessary structures there; and (b) it buys compatibility with 3.2 918 * pickles. See issue #16076. 919 */ 920 /*[clinic input] 921 _elementtree.Element.__getstate__ 922 923 [clinic start generated code]*/ 924 925 static PyObject * 926 _elementtree_Element___getstate___impl(ElementObject *self) 927 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/ 928 { 929 Py_ssize_t i, noattrib; 930 PyObject *instancedict = NULL, *children; 931 932 /* Build a list of children. */ 933 children = PyList_New(self->extra ? self->extra->length : 0); 934 if (!children) 935 return NULL; 936 for (i = 0; i < PyList_GET_SIZE(children); i++) { 937 PyObject *child = self->extra->children[i]; 938 Py_INCREF(child); 939 PyList_SET_ITEM(children, i, child); 940 } 941 942 /* Construct the state object. */ 943 noattrib = (self->extra == NULL || self->extra->attrib == Py_None); 944 if (noattrib) 945 instancedict = Py_BuildValue("{sOsOs{}sOsO}", 946 PICKLED_TAG, self->tag, 947 PICKLED_CHILDREN, children, 948 PICKLED_ATTRIB, 949 PICKLED_TEXT, JOIN_OBJ(self->text), 950 PICKLED_TAIL, JOIN_OBJ(self->tail)); 951 else 952 instancedict = Py_BuildValue("{sOsOsOsOsO}", 953 PICKLED_TAG, self->tag, 954 PICKLED_CHILDREN, children, 955 PICKLED_ATTRIB, self->extra->attrib, 956 PICKLED_TEXT, JOIN_OBJ(self->text), 957 PICKLED_TAIL, JOIN_OBJ(self->tail)); 958 if (instancedict) { 959 Py_DECREF(children); 960 return instancedict; 961 } 962 else { 963 for (i = 0; i < PyList_GET_SIZE(children); i++) 964 Py_DECREF(PyList_GET_ITEM(children, i)); 965 Py_DECREF(children); 966 967 return NULL; 968 } 969 } 970 971 static PyObject * 972 element_setstate_from_attributes(ElementObject *self, 973 PyObject *tag, 974 PyObject *attrib, 975 PyObject *text, 976 PyObject *tail, 977 PyObject *children) 978 { 979 Py_ssize_t i, nchildren; 980 ElementObjectExtra *oldextra = NULL; 981 982 if (!tag) { 983 PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); 984 return NULL; 985 } 986 987 Py_INCREF(tag); 988 Py_XSETREF(self->tag, tag); 989 990 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; 991 Py_INCREF(JOIN_OBJ(text)); 992 _set_joined_ptr(&self->text, text); 993 994 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; 995 Py_INCREF(JOIN_OBJ(tail)); 996 _set_joined_ptr(&self->tail, tail); 997 998 /* Handle ATTRIB and CHILDREN. */ 999 if (!children && !attrib) { 1000 Py_RETURN_NONE; 1001 } 1002 1003 /* Compute 'nchildren'. */ 1004 if (children) { 1005 if (!PyList_Check(children)) { 1006 PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); 1007 return NULL; 1008 } 1009 nchildren = PyList_GET_SIZE(children); 1010 1011 /* (Re-)allocate 'extra'. 1012 Avoid DECREFs calling into this code again (cycles, etc.) 1013 */ 1014 oldextra = self->extra; 1015 self->extra = NULL; 1016 if (element_resize(self, nchildren)) { 1017 assert(!self->extra || !self->extra->length); 1018 clear_extra(self); 1019 self->extra = oldextra; 1020 return NULL; 1021 } 1022 assert(self->extra); 1023 assert(self->extra->allocated >= nchildren); 1024 if (oldextra) { 1025 assert(self->extra->attrib == Py_None); 1026 self->extra->attrib = oldextra->attrib; 1027 oldextra->attrib = Py_None; 1028 } 1029 1030 /* Copy children */ 1031 for (i = 0; i < nchildren; i++) { 1032 self->extra->children[i] = PyList_GET_ITEM(children, i); 1033 Py_INCREF(self->extra->children[i]); 1034 } 1035 1036 assert(!self->extra->length); 1037 self->extra->length = nchildren; 1038 } 1039 else { 1040 if (element_resize(self, 0)) { 1041 return NULL; 1042 } 1043 } 1044 1045 /* Stash attrib. */ 1046 if (attrib) { 1047 Py_INCREF(attrib); 1048 Py_XSETREF(self->extra->attrib, attrib); 1049 } 1050 dealloc_extra(oldextra); 1051 1052 Py_RETURN_NONE; 1053 } 1054 1055 /* __setstate__ for Element instance from the Python implementation. 1056 * 'state' should be the instance dict. 1057 */ 1058 1059 static PyObject * 1060 element_setstate_from_Python(ElementObject *self, PyObject *state) 1061 { 1062 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT, 1063 PICKLED_TAIL, PICKLED_CHILDREN, 0}; 1064 PyObject *args; 1065 PyObject *tag, *attrib, *text, *tail, *children; 1066 PyObject *retval; 1067 1068 tag = attrib = text = tail = children = NULL; 1069 args = PyTuple_New(0); 1070 if (!args) 1071 return NULL; 1072 1073 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, 1074 &attrib, &text, &tail, &children)) 1075 retval = element_setstate_from_attributes(self, tag, attrib, text, 1076 tail, children); 1077 else 1078 retval = NULL; 1079 1080 Py_DECREF(args); 1081 return retval; 1082 } 1083 1084 /*[clinic input] 1085 _elementtree.Element.__setstate__ 1086 1087 state: object 1088 / 1089 1090 [clinic start generated code]*/ 1091 1092 static PyObject * 1093 _elementtree_Element___setstate__(ElementObject *self, PyObject *state) 1094 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/ 1095 { 1096 if (!PyDict_CheckExact(state)) { 1097 PyErr_Format(PyExc_TypeError, 1098 "Don't know how to unpickle \"%.200R\" as an Element", 1099 state); 1100 return NULL; 1101 } 1102 else 1103 return element_setstate_from_Python(self, state); 1104 } 1105 1106 LOCAL(int) 1107 checkpath(PyObject* tag) 1108 { 1109 Py_ssize_t i; 1110 int check = 1; 1111 1112 /* check if a tag contains an xpath character */ 1113 1114 #define PATHCHAR(ch) \ 1115 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') 1116 1117 if (PyUnicode_Check(tag)) { 1118 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); 1119 void *data = PyUnicode_DATA(tag); 1120 unsigned int kind = PyUnicode_KIND(tag); 1121 for (i = 0; i < len; i++) { 1122 Py_UCS4 ch = PyUnicode_READ(kind, data, i); 1123 if (ch == '{') 1124 check = 0; 1125 else if (ch == '}') 1126 check = 1; 1127 else if (check && PATHCHAR(ch)) 1128 return 1; 1129 } 1130 return 0; 1131 } 1132 if (PyBytes_Check(tag)) { 1133 char *p = PyBytes_AS_STRING(tag); 1134 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) { 1135 if (p[i] == '{') 1136 check = 0; 1137 else if (p[i] == '}') 1138 check = 1; 1139 else if (check && PATHCHAR(p[i])) 1140 return 1; 1141 } 1142 return 0; 1143 } 1144 1145 return 1; /* unknown type; might be path expression */ 1146 } 1147 1148 /*[clinic input] 1149 _elementtree.Element.extend 1150 1151 elements: object 1152 / 1153 1154 [clinic start generated code]*/ 1155 1156 static PyObject * 1157 _elementtree_Element_extend(ElementObject *self, PyObject *elements) 1158 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/ 1159 { 1160 PyObject* seq; 1161 Py_ssize_t i; 1162 1163 seq = PySequence_Fast(elements, ""); 1164 if (!seq) { 1165 PyErr_Format( 1166 PyExc_TypeError, 1167 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name 1168 ); 1169 return NULL; 1170 } 1171 1172 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) { 1173 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 1174 Py_INCREF(element); 1175 if (!Element_Check(element)) { 1176 PyErr_Format( 1177 PyExc_TypeError, 1178 "expected an Element, not \"%.200s\"", 1179 Py_TYPE(element)->tp_name); 1180 Py_DECREF(seq); 1181 Py_DECREF(element); 1182 return NULL; 1183 } 1184 1185 if (element_add_subelement(self, element) < 0) { 1186 Py_DECREF(seq); 1187 Py_DECREF(element); 1188 return NULL; 1189 } 1190 Py_DECREF(element); 1191 } 1192 1193 Py_DECREF(seq); 1194 1195 Py_RETURN_NONE; 1196 } 1197 1198 /*[clinic input] 1199 _elementtree.Element.find 1200 1201 path: object 1202 namespaces: object = None 1203 1204 [clinic start generated code]*/ 1205 1206 static PyObject * 1207 _elementtree_Element_find_impl(ElementObject *self, PyObject *path, 1208 PyObject *namespaces) 1209 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/ 1210 { 1211 Py_ssize_t i; 1212 elementtreestate *st = ET_STATE_GLOBAL; 1213 1214 if (checkpath(path) || namespaces != Py_None) { 1215 _Py_IDENTIFIER(find); 1216 return _PyObject_CallMethodIdObjArgs( 1217 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL 1218 ); 1219 } 1220 1221 if (!self->extra) 1222 Py_RETURN_NONE; 1223 1224 for (i = 0; i < self->extra->length; i++) { 1225 PyObject* item = self->extra->children[i]; 1226 int rc; 1227 if (!Element_Check(item)) 1228 continue; 1229 Py_INCREF(item); 1230 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1231 if (rc > 0) 1232 return item; 1233 Py_DECREF(item); 1234 if (rc < 0) 1235 return NULL; 1236 } 1237 1238 Py_RETURN_NONE; 1239 } 1240 1241 /*[clinic input] 1242 _elementtree.Element.findtext 1243 1244 path: object 1245 default: object = None 1246 namespaces: object = None 1247 1248 [clinic start generated code]*/ 1249 1250 static PyObject * 1251 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path, 1252 PyObject *default_value, 1253 PyObject *namespaces) 1254 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/ 1255 { 1256 Py_ssize_t i; 1257 _Py_IDENTIFIER(findtext); 1258 elementtreestate *st = ET_STATE_GLOBAL; 1259 1260 if (checkpath(path) || namespaces != Py_None) 1261 return _PyObject_CallMethodIdObjArgs( 1262 st->elementpath_obj, &PyId_findtext, 1263 self, path, default_value, namespaces, NULL 1264 ); 1265 1266 if (!self->extra) { 1267 Py_INCREF(default_value); 1268 return default_value; 1269 } 1270 1271 for (i = 0; i < self->extra->length; i++) { 1272 PyObject *item = self->extra->children[i]; 1273 int rc; 1274 if (!Element_Check(item)) 1275 continue; 1276 Py_INCREF(item); 1277 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1278 if (rc > 0) { 1279 PyObject* text = element_get_text((ElementObject*)item); 1280 if (text == Py_None) { 1281 Py_DECREF(item); 1282 return PyUnicode_New(0, 0); 1283 } 1284 Py_XINCREF(text); 1285 Py_DECREF(item); 1286 return text; 1287 } 1288 Py_DECREF(item); 1289 if (rc < 0) 1290 return NULL; 1291 } 1292 1293 Py_INCREF(default_value); 1294 return default_value; 1295 } 1296 1297 /*[clinic input] 1298 _elementtree.Element.findall 1299 1300 path: object 1301 namespaces: object = None 1302 1303 [clinic start generated code]*/ 1304 1305 static PyObject * 1306 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path, 1307 PyObject *namespaces) 1308 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/ 1309 { 1310 Py_ssize_t i; 1311 PyObject* out; 1312 elementtreestate *st = ET_STATE_GLOBAL; 1313 1314 if (checkpath(path) || namespaces != Py_None) { 1315 _Py_IDENTIFIER(findall); 1316 return _PyObject_CallMethodIdObjArgs( 1317 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL 1318 ); 1319 } 1320 1321 out = PyList_New(0); 1322 if (!out) 1323 return NULL; 1324 1325 if (!self->extra) 1326 return out; 1327 1328 for (i = 0; i < self->extra->length; i++) { 1329 PyObject* item = self->extra->children[i]; 1330 int rc; 1331 if (!Element_Check(item)) 1332 continue; 1333 Py_INCREF(item); 1334 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1335 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) { 1336 Py_DECREF(item); 1337 Py_DECREF(out); 1338 return NULL; 1339 } 1340 Py_DECREF(item); 1341 } 1342 1343 return out; 1344 } 1345 1346 /*[clinic input] 1347 _elementtree.Element.iterfind 1348 1349 path: object 1350 namespaces: object = None 1351 1352 [clinic start generated code]*/ 1353 1354 static PyObject * 1355 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path, 1356 PyObject *namespaces) 1357 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/ 1358 { 1359 PyObject* tag = path; 1360 _Py_IDENTIFIER(iterfind); 1361 elementtreestate *st = ET_STATE_GLOBAL; 1362 1363 return _PyObject_CallMethodIdObjArgs( 1364 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL); 1365 } 1366 1367 /*[clinic input] 1368 _elementtree.Element.get 1369 1370 key: object 1371 default: object = None 1372 1373 [clinic start generated code]*/ 1374 1375 static PyObject * 1376 _elementtree_Element_get_impl(ElementObject *self, PyObject *key, 1377 PyObject *default_value) 1378 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/ 1379 { 1380 PyObject* value; 1381 1382 if (!self->extra || self->extra->attrib == Py_None) 1383 value = default_value; 1384 else { 1385 value = PyDict_GetItem(self->extra->attrib, key); 1386 if (!value) 1387 value = default_value; 1388 } 1389 1390 Py_INCREF(value); 1391 return value; 1392 } 1393 1394 /*[clinic input] 1395 _elementtree.Element.getchildren 1396 1397 [clinic start generated code]*/ 1398 1399 static PyObject * 1400 _elementtree_Element_getchildren_impl(ElementObject *self) 1401 /*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/ 1402 { 1403 Py_ssize_t i; 1404 PyObject* list; 1405 1406 if (PyErr_WarnEx(PyExc_DeprecationWarning, 1407 "This method will be removed in future versions. " 1408 "Use 'list(elem)' or iteration over elem instead.", 1409 1) < 0) { 1410 return NULL; 1411 } 1412 1413 if (!self->extra) 1414 return PyList_New(0); 1415 1416 list = PyList_New(self->extra->length); 1417 if (!list) 1418 return NULL; 1419 1420 for (i = 0; i < self->extra->length; i++) { 1421 PyObject* item = self->extra->children[i]; 1422 Py_INCREF(item); 1423 PyList_SET_ITEM(list, i, item); 1424 } 1425 1426 return list; 1427 } 1428 1429 1430 static PyObject * 1431 create_elementiter(ElementObject *self, PyObject *tag, int gettext); 1432 1433 1434 /*[clinic input] 1435 _elementtree.Element.iter 1436 1437 tag: object = None 1438 1439 [clinic start generated code]*/ 1440 1441 static PyObject * 1442 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag) 1443 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/ 1444 { 1445 if (PyUnicode_Check(tag)) { 1446 if (PyUnicode_READY(tag) < 0) 1447 return NULL; 1448 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*') 1449 tag = Py_None; 1450 } 1451 else if (PyBytes_Check(tag)) { 1452 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*') 1453 tag = Py_None; 1454 } 1455 1456 return create_elementiter(self, tag, 0); 1457 } 1458 1459 1460 /*[clinic input] 1461 _elementtree.Element.getiterator 1462 1463 tag: object = None 1464 1465 [clinic start generated code]*/ 1466 1467 static PyObject * 1468 _elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag) 1469 /*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/ 1470 { 1471 /* Change for a DeprecationWarning in 1.4 */ 1472 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning, 1473 "This method will be removed in future versions. " 1474 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 1475 1) < 0) { 1476 return NULL; 1477 } 1478 return _elementtree_Element_iter_impl(self, tag); 1479 } 1480 1481 1482 /*[clinic input] 1483 _elementtree.Element.itertext 1484 1485 [clinic start generated code]*/ 1486 1487 static PyObject * 1488 _elementtree_Element_itertext_impl(ElementObject *self) 1489 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/ 1490 { 1491 return create_elementiter(self, Py_None, 1); 1492 } 1493 1494 1495 static PyObject* 1496 element_getitem(PyObject* self_, Py_ssize_t index) 1497 { 1498 ElementObject* self = (ElementObject*) self_; 1499 1500 if (!self->extra || index < 0 || index >= self->extra->length) { 1501 PyErr_SetString( 1502 PyExc_IndexError, 1503 "child index out of range" 1504 ); 1505 return NULL; 1506 } 1507 1508 Py_INCREF(self->extra->children[index]); 1509 return self->extra->children[index]; 1510 } 1511 1512 /*[clinic input] 1513 _elementtree.Element.insert 1514 1515 index: Py_ssize_t 1516 subelement: object(subclass_of='&Element_Type') 1517 / 1518 1519 [clinic start generated code]*/ 1520 1521 static PyObject * 1522 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, 1523 PyObject *subelement) 1524 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/ 1525 { 1526 Py_ssize_t i; 1527 1528 if (!self->extra) { 1529 if (create_extra(self, NULL) < 0) 1530 return NULL; 1531 } 1532 1533 if (index < 0) { 1534 index += self->extra->length; 1535 if (index < 0) 1536 index = 0; 1537 } 1538 if (index > self->extra->length) 1539 index = self->extra->length; 1540 1541 if (element_resize(self, 1) < 0) 1542 return NULL; 1543 1544 for (i = self->extra->length; i > index; i--) 1545 self->extra->children[i] = self->extra->children[i-1]; 1546 1547 Py_INCREF(subelement); 1548 self->extra->children[index] = subelement; 1549 1550 self->extra->length++; 1551 1552 Py_RETURN_NONE; 1553 } 1554 1555 /*[clinic input] 1556 _elementtree.Element.items 1557 1558 [clinic start generated code]*/ 1559 1560 static PyObject * 1561 _elementtree_Element_items_impl(ElementObject *self) 1562 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/ 1563 { 1564 if (!self->extra || self->extra->attrib == Py_None) 1565 return PyList_New(0); 1566 1567 return PyDict_Items(self->extra->attrib); 1568 } 1569 1570 /*[clinic input] 1571 _elementtree.Element.keys 1572 1573 [clinic start generated code]*/ 1574 1575 static PyObject * 1576 _elementtree_Element_keys_impl(ElementObject *self) 1577 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/ 1578 { 1579 if (!self->extra || self->extra->attrib == Py_None) 1580 return PyList_New(0); 1581 1582 return PyDict_Keys(self->extra->attrib); 1583 } 1584 1585 static Py_ssize_t 1586 element_length(ElementObject* self) 1587 { 1588 if (!self->extra) 1589 return 0; 1590 1591 return self->extra->length; 1592 } 1593 1594 /*[clinic input] 1595 _elementtree.Element.makeelement 1596 1597 tag: object 1598 attrib: object 1599 / 1600 1601 [clinic start generated code]*/ 1602 1603 static PyObject * 1604 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag, 1605 PyObject *attrib) 1606 /*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/ 1607 { 1608 PyObject* elem; 1609 1610 attrib = PyDict_Copy(attrib); 1611 if (!attrib) 1612 return NULL; 1613 1614 elem = create_new_element(tag, attrib); 1615 1616 Py_DECREF(attrib); 1617 1618 return elem; 1619 } 1620 1621 /*[clinic input] 1622 _elementtree.Element.remove 1623 1624 subelement: object(subclass_of='&Element_Type') 1625 / 1626 1627 [clinic start generated code]*/ 1628 1629 static PyObject * 1630 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) 1631 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/ 1632 { 1633 Py_ssize_t i; 1634 int rc; 1635 PyObject *found; 1636 1637 if (!self->extra) { 1638 /* element has no children, so raise exception */ 1639 PyErr_SetString( 1640 PyExc_ValueError, 1641 "list.remove(x): x not in list" 1642 ); 1643 return NULL; 1644 } 1645 1646 for (i = 0; i < self->extra->length; i++) { 1647 if (self->extra->children[i] == subelement) 1648 break; 1649 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); 1650 if (rc > 0) 1651 break; 1652 if (rc < 0) 1653 return NULL; 1654 } 1655 1656 if (i >= self->extra->length) { 1657 /* subelement is not in children, so raise exception */ 1658 PyErr_SetString( 1659 PyExc_ValueError, 1660 "list.remove(x): x not in list" 1661 ); 1662 return NULL; 1663 } 1664 1665 found = self->extra->children[i]; 1666 1667 self->extra->length--; 1668 for (; i < self->extra->length; i++) 1669 self->extra->children[i] = self->extra->children[i+1]; 1670 1671 Py_DECREF(found); 1672 Py_RETURN_NONE; 1673 } 1674 1675 static PyObject* 1676 element_repr(ElementObject* self) 1677 { 1678 int status; 1679 1680 if (self->tag == NULL) 1681 return PyUnicode_FromFormat("<Element at %p>", self); 1682 1683 status = Py_ReprEnter((PyObject *)self); 1684 if (status == 0) { 1685 PyObject *res; 1686 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); 1687 Py_ReprLeave((PyObject *)self); 1688 return res; 1689 } 1690 if (status > 0) 1691 PyErr_Format(PyExc_RuntimeError, 1692 "reentrant call inside %s.__repr__", 1693 Py_TYPE(self)->tp_name); 1694 return NULL; 1695 } 1696 1697 /*[clinic input] 1698 _elementtree.Element.set 1699 1700 key: object 1701 value: object 1702 / 1703 1704 [clinic start generated code]*/ 1705 1706 static PyObject * 1707 _elementtree_Element_set_impl(ElementObject *self, PyObject *key, 1708 PyObject *value) 1709 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/ 1710 { 1711 PyObject* attrib; 1712 1713 if (!self->extra) { 1714 if (create_extra(self, NULL) < 0) 1715 return NULL; 1716 } 1717 1718 attrib = element_get_attrib(self); 1719 if (!attrib) 1720 return NULL; 1721 1722 if (PyDict_SetItem(attrib, key, value) < 0) 1723 return NULL; 1724 1725 Py_RETURN_NONE; 1726 } 1727 1728 static int 1729 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) 1730 { 1731 ElementObject* self = (ElementObject*) self_; 1732 Py_ssize_t i; 1733 PyObject* old; 1734 1735 if (!self->extra || index < 0 || index >= self->extra->length) { 1736 PyErr_SetString( 1737 PyExc_IndexError, 1738 "child assignment index out of range"); 1739 return -1; 1740 } 1741 1742 old = self->extra->children[index]; 1743 1744 if (item) { 1745 Py_INCREF(item); 1746 self->extra->children[index] = item; 1747 } else { 1748 self->extra->length--; 1749 for (i = index; i < self->extra->length; i++) 1750 self->extra->children[i] = self->extra->children[i+1]; 1751 } 1752 1753 Py_DECREF(old); 1754 1755 return 0; 1756 } 1757 1758 static PyObject* 1759 element_subscr(PyObject* self_, PyObject* item) 1760 { 1761 ElementObject* self = (ElementObject*) self_; 1762 1763 if (PyIndex_Check(item)) { 1764 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1765 1766 if (i == -1 && PyErr_Occurred()) { 1767 return NULL; 1768 } 1769 if (i < 0 && self->extra) 1770 i += self->extra->length; 1771 return element_getitem(self_, i); 1772 } 1773 else if (PySlice_Check(item)) { 1774 Py_ssize_t start, stop, step, slicelen, cur, i; 1775 PyObject* list; 1776 1777 if (!self->extra) 1778 return PyList_New(0); 1779 1780 if (PySlice_Unpack(item, &start, &stop, &step) < 0) { 1781 return NULL; 1782 } 1783 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, 1784 step); 1785 1786 if (slicelen <= 0) 1787 return PyList_New(0); 1788 else { 1789 list = PyList_New(slicelen); 1790 if (!list) 1791 return NULL; 1792 1793 for (cur = start, i = 0; i < slicelen; 1794 cur += step, i++) { 1795 PyObject* item = self->extra->children[cur]; 1796 Py_INCREF(item); 1797 PyList_SET_ITEM(list, i, item); 1798 } 1799 1800 return list; 1801 } 1802 } 1803 else { 1804 PyErr_SetString(PyExc_TypeError, 1805 "element indices must be integers"); 1806 return NULL; 1807 } 1808 } 1809 1810 static int 1811 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) 1812 { 1813 ElementObject* self = (ElementObject*) self_; 1814 1815 if (PyIndex_Check(item)) { 1816 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1817 1818 if (i == -1 && PyErr_Occurred()) { 1819 return -1; 1820 } 1821 if (i < 0 && self->extra) 1822 i += self->extra->length; 1823 return element_setitem(self_, i, value); 1824 } 1825 else if (PySlice_Check(item)) { 1826 Py_ssize_t start, stop, step, slicelen, newlen, cur, i; 1827 1828 PyObject* recycle = NULL; 1829 PyObject* seq; 1830 1831 if (!self->extra) { 1832 if (create_extra(self, NULL) < 0) 1833 return -1; 1834 } 1835 1836 if (PySlice_Unpack(item, &start, &stop, &step) < 0) { 1837 return -1; 1838 } 1839 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, 1840 step); 1841 1842 if (value == NULL) { 1843 /* Delete slice */ 1844 size_t cur; 1845 Py_ssize_t i; 1846 1847 if (slicelen <= 0) 1848 return 0; 1849 1850 /* Since we're deleting, the direction of the range doesn't matter, 1851 * so for simplicity make it always ascending. 1852 */ 1853 if (step < 0) { 1854 stop = start + 1; 1855 start = stop + step * (slicelen - 1) - 1; 1856 step = -step; 1857 } 1858 1859 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *)); 1860 1861 /* recycle is a list that will contain all the children 1862 * scheduled for removal. 1863 */ 1864 if (!(recycle = PyList_New(slicelen))) { 1865 return -1; 1866 } 1867 1868 /* This loop walks over all the children that have to be deleted, 1869 * with cur pointing at them. num_moved is the amount of children 1870 * until the next deleted child that have to be "shifted down" to 1871 * occupy the deleted's places. 1872 * Note that in the ith iteration, shifting is done i+i places down 1873 * because i children were already removed. 1874 */ 1875 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { 1876 /* Compute how many children have to be moved, clipping at the 1877 * list end. 1878 */ 1879 Py_ssize_t num_moved = step - 1; 1880 if (cur + step >= (size_t)self->extra->length) { 1881 num_moved = self->extra->length - cur - 1; 1882 } 1883 1884 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); 1885 1886 memmove( 1887 self->extra->children + cur - i, 1888 self->extra->children + cur + 1, 1889 num_moved * sizeof(PyObject *)); 1890 } 1891 1892 /* Leftover "tail" after the last removed child */ 1893 cur = start + (size_t)slicelen * step; 1894 if (cur < (size_t)self->extra->length) { 1895 memmove( 1896 self->extra->children + cur - slicelen, 1897 self->extra->children + cur, 1898 (self->extra->length - cur) * sizeof(PyObject *)); 1899 } 1900 1901 self->extra->length -= slicelen; 1902 1903 /* Discard the recycle list with all the deleted sub-elements */ 1904 Py_DECREF(recycle); 1905 return 0; 1906 } 1907 1908 /* A new slice is actually being assigned */ 1909 seq = PySequence_Fast(value, ""); 1910 if (!seq) { 1911 PyErr_Format( 1912 PyExc_TypeError, 1913 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name 1914 ); 1915 return -1; 1916 } 1917 newlen = PySequence_Fast_GET_SIZE(seq); 1918 1919 if (step != 1 && newlen != slicelen) 1920 { 1921 Py_DECREF(seq); 1922 PyErr_Format(PyExc_ValueError, 1923 "attempt to assign sequence of size %zd " 1924 "to extended slice of size %zd", 1925 newlen, slicelen 1926 ); 1927 return -1; 1928 } 1929 1930 /* Resize before creating the recycle bin, to prevent refleaks. */ 1931 if (newlen > slicelen) { 1932 if (element_resize(self, newlen - slicelen) < 0) { 1933 Py_DECREF(seq); 1934 return -1; 1935 } 1936 } 1937 1938 if (slicelen > 0) { 1939 /* to avoid recursive calls to this method (via decref), move 1940 old items to the recycle bin here, and get rid of them when 1941 we're done modifying the element */ 1942 recycle = PyList_New(slicelen); 1943 if (!recycle) { 1944 Py_DECREF(seq); 1945 return -1; 1946 } 1947 for (cur = start, i = 0; i < slicelen; 1948 cur += step, i++) 1949 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); 1950 } 1951 1952 if (newlen < slicelen) { 1953 /* delete slice */ 1954 for (i = stop; i < self->extra->length; i++) 1955 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1956 } else if (newlen > slicelen) { 1957 /* insert slice */ 1958 for (i = self->extra->length-1; i >= stop; i--) 1959 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1960 } 1961 1962 /* replace the slice */ 1963 for (cur = start, i = 0; i < newlen; 1964 cur += step, i++) { 1965 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 1966 Py_INCREF(element); 1967 self->extra->children[cur] = element; 1968 } 1969 1970 self->extra->length += newlen - slicelen; 1971 1972 Py_DECREF(seq); 1973 1974 /* discard the recycle bin, and everything in it */ 1975 Py_XDECREF(recycle); 1976 1977 return 0; 1978 } 1979 else { 1980 PyErr_SetString(PyExc_TypeError, 1981 "element indices must be integers"); 1982 return -1; 1983 } 1984 } 1985 1986 static PyObject* 1987 element_tag_getter(ElementObject *self, void *closure) 1988 { 1989 PyObject *res = self->tag; 1990 Py_INCREF(res); 1991 return res; 1992 } 1993 1994 static PyObject* 1995 element_text_getter(ElementObject *self, void *closure) 1996 { 1997 PyObject *res = element_get_text(self); 1998 Py_XINCREF(res); 1999 return res; 2000 } 2001 2002 static PyObject* 2003 element_tail_getter(ElementObject *self, void *closure) 2004 { 2005 PyObject *res = element_get_tail(self); 2006 Py_XINCREF(res); 2007 return res; 2008 } 2009 2010 static PyObject* 2011 element_attrib_getter(ElementObject *self, void *closure) 2012 { 2013 PyObject *res; 2014 if (!self->extra) { 2015 if (create_extra(self, NULL) < 0) 2016 return NULL; 2017 } 2018 res = element_get_attrib(self); 2019 Py_XINCREF(res); 2020 return res; 2021 } 2022 2023 /* macro for setter validation */ 2024 #define _VALIDATE_ATTR_VALUE(V) \ 2025 if ((V) == NULL) { \ 2026 PyErr_SetString( \ 2027 PyExc_AttributeError, \ 2028 "can't delete element attribute"); \ 2029 return -1; \ 2030 } 2031 2032 static int 2033 element_tag_setter(ElementObject *self, PyObject *value, void *closure) 2034 { 2035 _VALIDATE_ATTR_VALUE(value); 2036 Py_INCREF(value); 2037 Py_SETREF(self->tag, value); 2038 return 0; 2039 } 2040 2041 static int 2042 element_text_setter(ElementObject *self, PyObject *value, void *closure) 2043 { 2044 _VALIDATE_ATTR_VALUE(value); 2045 Py_INCREF(value); 2046 _set_joined_ptr(&self->text, value); 2047 return 0; 2048 } 2049 2050 static int 2051 element_tail_setter(ElementObject *self, PyObject *value, void *closure) 2052 { 2053 _VALIDATE_ATTR_VALUE(value); 2054 Py_INCREF(value); 2055 _set_joined_ptr(&self->tail, value); 2056 return 0; 2057 } 2058 2059 static int 2060 element_attrib_setter(ElementObject *self, PyObject *value, void *closure) 2061 { 2062 _VALIDATE_ATTR_VALUE(value); 2063 if (!self->extra) { 2064 if (create_extra(self, NULL) < 0) 2065 return -1; 2066 } 2067 Py_INCREF(value); 2068 Py_SETREF(self->extra->attrib, value); 2069 return 0; 2070 } 2071 2072 static PySequenceMethods element_as_sequence = { 2073 (lenfunc) element_length, 2074 0, /* sq_concat */ 2075 0, /* sq_repeat */ 2076 element_getitem, 2077 0, 2078 element_setitem, 2079 0, 2080 }; 2081 2082 /******************************* Element iterator ****************************/ 2083 2084 /* ElementIterObject represents the iteration state over an XML element in 2085 * pre-order traversal. To keep track of which sub-element should be returned 2086 * next, a stack of parents is maintained. This is a standard stack-based 2087 * iterative pre-order traversal of a tree. 2088 * The stack is managed using a continuous array. 2089 * Each stack item contains the saved parent to which we should return after 2090 * the current one is exhausted, and the next child to examine in that parent. 2091 */ 2092 typedef struct ParentLocator_t { 2093 ElementObject *parent; 2094 Py_ssize_t child_index; 2095 } ParentLocator; 2096 2097 typedef struct { 2098 PyObject_HEAD 2099 ParentLocator *parent_stack; 2100 Py_ssize_t parent_stack_used; 2101 Py_ssize_t parent_stack_size; 2102 ElementObject *root_element; 2103 PyObject *sought_tag; 2104 int gettext; 2105 } ElementIterObject; 2106 2107 2108 static void 2109 elementiter_dealloc(ElementIterObject *it) 2110 { 2111 Py_ssize_t i = it->parent_stack_used; 2112 it->parent_stack_used = 0; 2113 /* bpo-31095: UnTrack is needed before calling any callbacks */ 2114 PyObject_GC_UnTrack(it); 2115 while (i--) 2116 Py_XDECREF(it->parent_stack[i].parent); 2117 PyMem_Free(it->parent_stack); 2118 2119 Py_XDECREF(it->sought_tag); 2120 Py_XDECREF(it->root_element); 2121 2122 PyObject_GC_Del(it); 2123 } 2124 2125 static int 2126 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg) 2127 { 2128 Py_ssize_t i = it->parent_stack_used; 2129 while (i--) 2130 Py_VISIT(it->parent_stack[i].parent); 2131 2132 Py_VISIT(it->root_element); 2133 Py_VISIT(it->sought_tag); 2134 return 0; 2135 } 2136 2137 /* Helper function for elementiter_next. Add a new parent to the parent stack. 2138 */ 2139 static int 2140 parent_stack_push_new(ElementIterObject *it, ElementObject *parent) 2141 { 2142 ParentLocator *item; 2143 2144 if (it->parent_stack_used >= it->parent_stack_size) { 2145 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */ 2146 ParentLocator *parent_stack = it->parent_stack; 2147 PyMem_Resize(parent_stack, ParentLocator, new_size); 2148 if (parent_stack == NULL) 2149 return -1; 2150 it->parent_stack = parent_stack; 2151 it->parent_stack_size = new_size; 2152 } 2153 item = it->parent_stack + it->parent_stack_used++; 2154 Py_INCREF(parent); 2155 item->parent = parent; 2156 item->child_index = 0; 2157 return 0; 2158 } 2159 2160 static PyObject * 2161 elementiter_next(ElementIterObject *it) 2162 { 2163 /* Sub-element iterator. 2164 * 2165 * A short note on gettext: this function serves both the iter() and 2166 * itertext() methods to avoid code duplication. However, there are a few 2167 * small differences in the way these iterations work. Namely: 2168 * - itertext() only yields text from nodes that have it, and continues 2169 * iterating when a node doesn't have text (so it doesn't return any 2170 * node like iter()) 2171 * - itertext() also has to handle tail, after finishing with all the 2172 * children of a node. 2173 */ 2174 int rc; 2175 ElementObject *elem; 2176 PyObject *text; 2177 2178 while (1) { 2179 /* Handle the case reached in the beginning and end of iteration, where 2180 * the parent stack is empty. If root_element is NULL and we're here, the 2181 * iterator is exhausted. 2182 */ 2183 if (!it->parent_stack_used) { 2184 if (!it->root_element) { 2185 PyErr_SetNone(PyExc_StopIteration); 2186 return NULL; 2187 } 2188 2189 elem = it->root_element; /* steals a reference */ 2190 it->root_element = NULL; 2191 } 2192 else { 2193 /* See if there are children left to traverse in the current parent. If 2194 * yes, visit the next child. If not, pop the stack and try again. 2195 */ 2196 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1]; 2197 Py_ssize_t child_index = item->child_index; 2198 ElementObjectExtra *extra; 2199 elem = item->parent; 2200 extra = elem->extra; 2201 if (!extra || child_index >= extra->length) { 2202 it->parent_stack_used--; 2203 /* Note that extra condition on it->parent_stack_used here; 2204 * this is because itertext() is supposed to only return *inner* 2205 * text, not text following the element it began iteration with. 2206 */ 2207 if (it->gettext && it->parent_stack_used) { 2208 text = element_get_tail(elem); 2209 goto gettext; 2210 } 2211 Py_DECREF(elem); 2212 continue; 2213 } 2214 2215 if (!Element_Check(extra->children[child_index])) { 2216 PyErr_Format(PyExc_AttributeError, 2217 "'%.100s' object has no attribute 'iter'", 2218 Py_TYPE(extra->children[child_index])->tp_name); 2219 return NULL; 2220 } 2221 elem = (ElementObject *)extra->children[child_index]; 2222 item->child_index++; 2223 Py_INCREF(elem); 2224 } 2225 2226 if (parent_stack_push_new(it, elem) < 0) { 2227 Py_DECREF(elem); 2228 PyErr_NoMemory(); 2229 return NULL; 2230 } 2231 if (it->gettext) { 2232 text = element_get_text(elem); 2233 goto gettext; 2234 } 2235 2236 if (it->sought_tag == Py_None) 2237 return (PyObject *)elem; 2238 2239 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); 2240 if (rc > 0) 2241 return (PyObject *)elem; 2242 2243 Py_DECREF(elem); 2244 if (rc < 0) 2245 return NULL; 2246 continue; 2247 2248 gettext: 2249 if (!text) { 2250 Py_DECREF(elem); 2251 return NULL; 2252 } 2253 if (text == Py_None) { 2254 Py_DECREF(elem); 2255 } 2256 else { 2257 Py_INCREF(text); 2258 Py_DECREF(elem); 2259 rc = PyObject_IsTrue(text); 2260 if (rc > 0) 2261 return text; 2262 Py_DECREF(text); 2263 if (rc < 0) 2264 return NULL; 2265 } 2266 } 2267 2268 return NULL; 2269 } 2270 2271 2272 static PyTypeObject ElementIter_Type = { 2273 PyVarObject_HEAD_INIT(NULL, 0) 2274 /* Using the module's name since the pure-Python implementation does not 2275 have such a type. */ 2276 "_elementtree._element_iterator", /* tp_name */ 2277 sizeof(ElementIterObject), /* tp_basicsize */ 2278 0, /* tp_itemsize */ 2279 /* methods */ 2280 (destructor)elementiter_dealloc, /* tp_dealloc */ 2281 0, /* tp_print */ 2282 0, /* tp_getattr */ 2283 0, /* tp_setattr */ 2284 0, /* tp_reserved */ 2285 0, /* tp_repr */ 2286 0, /* tp_as_number */ 2287 0, /* tp_as_sequence */ 2288 0, /* tp_as_mapping */ 2289 0, /* tp_hash */ 2290 0, /* tp_call */ 2291 0, /* tp_str */ 2292 0, /* tp_getattro */ 2293 0, /* tp_setattro */ 2294 0, /* tp_as_buffer */ 2295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 2296 0, /* tp_doc */ 2297 (traverseproc)elementiter_traverse, /* tp_traverse */ 2298 0, /* tp_clear */ 2299 0, /* tp_richcompare */ 2300 0, /* tp_weaklistoffset */ 2301 PyObject_SelfIter, /* tp_iter */ 2302 (iternextfunc)elementiter_next, /* tp_iternext */ 2303 0, /* tp_methods */ 2304 0, /* tp_members */ 2305 0, /* tp_getset */ 2306 0, /* tp_base */ 2307 0, /* tp_dict */ 2308 0, /* tp_descr_get */ 2309 0, /* tp_descr_set */ 2310 0, /* tp_dictoffset */ 2311 0, /* tp_init */ 2312 0, /* tp_alloc */ 2313 0, /* tp_new */ 2314 }; 2315 2316 #define INIT_PARENT_STACK_SIZE 8 2317 2318 static PyObject * 2319 create_elementiter(ElementObject *self, PyObject *tag, int gettext) 2320 { 2321 ElementIterObject *it; 2322 2323 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type); 2324 if (!it) 2325 return NULL; 2326 2327 Py_INCREF(tag); 2328 it->sought_tag = tag; 2329 it->gettext = gettext; 2330 Py_INCREF(self); 2331 it->root_element = self; 2332 2333 PyObject_GC_Track(it); 2334 2335 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE); 2336 if (it->parent_stack == NULL) { 2337 Py_DECREF(it); 2338 PyErr_NoMemory(); 2339 return NULL; 2340 } 2341 it->parent_stack_used = 0; 2342 it->parent_stack_size = INIT_PARENT_STACK_SIZE; 2343 2344 return (PyObject *)it; 2345 } 2346 2347 2348 /* ==================================================================== */ 2349 /* the tree builder type */ 2350 2351 typedef struct { 2352 PyObject_HEAD 2353 2354 PyObject *root; /* root node (first created node) */ 2355 2356 PyObject *this; /* current node */ 2357 PyObject *last; /* most recently created node */ 2358 2359 PyObject *data; /* data collector (string or list), or NULL */ 2360 2361 PyObject *stack; /* element stack */ 2362 Py_ssize_t index; /* current stack size (0 means empty) */ 2363 2364 PyObject *element_factory; 2365 2366 /* element tracing */ 2367 PyObject *events_append; /* the append method of the list of events, or NULL */ 2368 PyObject *start_event_obj; /* event objects (NULL to ignore) */ 2369 PyObject *end_event_obj; 2370 PyObject *start_ns_event_obj; 2371 PyObject *end_ns_event_obj; 2372 } TreeBuilderObject; 2373 2374 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) 2375 2376 /* -------------------------------------------------------------------- */ 2377 /* constructor and destructor */ 2378 2379 static PyObject * 2380 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 2381 { 2382 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0); 2383 if (t != NULL) { 2384 t->root = NULL; 2385 2386 Py_INCREF(Py_None); 2387 t->this = Py_None; 2388 Py_INCREF(Py_None); 2389 t->last = Py_None; 2390 2391 t->data = NULL; 2392 t->element_factory = NULL; 2393 t->stack = PyList_New(20); 2394 if (!t->stack) { 2395 Py_DECREF(t->this); 2396 Py_DECREF(t->last); 2397 Py_DECREF((PyObject *) t); 2398 return NULL; 2399 } 2400 t->index = 0; 2401 2402 t->events_append = NULL; 2403 t->start_event_obj = t->end_event_obj = NULL; 2404 t->start_ns_event_obj = t->end_ns_event_obj = NULL; 2405 } 2406 return (PyObject *)t; 2407 } 2408 2409 /*[clinic input] 2410 _elementtree.TreeBuilder.__init__ 2411 2412 element_factory: object = NULL 2413 2414 [clinic start generated code]*/ 2415 2416 static int 2417 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, 2418 PyObject *element_factory) 2419 /*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/ 2420 { 2421 if (element_factory) { 2422 Py_INCREF(element_factory); 2423 Py_XSETREF(self->element_factory, element_factory); 2424 } 2425 2426 return 0; 2427 } 2428 2429 static int 2430 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) 2431 { 2432 Py_VISIT(self->end_ns_event_obj); 2433 Py_VISIT(self->start_ns_event_obj); 2434 Py_VISIT(self->end_event_obj); 2435 Py_VISIT(self->start_event_obj); 2436 Py_VISIT(self->events_append); 2437 Py_VISIT(self->root); 2438 Py_VISIT(self->this); 2439 Py_VISIT(self->last); 2440 Py_VISIT(self->data); 2441 Py_VISIT(self->stack); 2442 Py_VISIT(self->element_factory); 2443 return 0; 2444 } 2445 2446 static int 2447 treebuilder_gc_clear(TreeBuilderObject *self) 2448 { 2449 Py_CLEAR(self->end_ns_event_obj); 2450 Py_CLEAR(self->start_ns_event_obj); 2451 Py_CLEAR(self->end_event_obj); 2452 Py_CLEAR(self->start_event_obj); 2453 Py_CLEAR(self->events_append); 2454 Py_CLEAR(self->stack); 2455 Py_CLEAR(self->data); 2456 Py_CLEAR(self->last); 2457 Py_CLEAR(self->this); 2458 Py_CLEAR(self->element_factory); 2459 Py_CLEAR(self->root); 2460 return 0; 2461 } 2462 2463 static void 2464 treebuilder_dealloc(TreeBuilderObject *self) 2465 { 2466 PyObject_GC_UnTrack(self); 2467 treebuilder_gc_clear(self); 2468 Py_TYPE(self)->tp_free((PyObject *)self); 2469 } 2470 2471 /* -------------------------------------------------------------------- */ 2472 /* helpers for handling of arbitrary element-like objects */ 2473 2474 static int 2475 treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, 2476 PyObject **dest, _Py_Identifier *name) 2477 { 2478 if (Element_CheckExact(element)) { 2479 PyObject *tmp = JOIN_OBJ(*dest); 2480 *dest = JOIN_SET(*data, PyList_CheckExact(*data)); 2481 *data = NULL; 2482 Py_DECREF(tmp); 2483 return 0; 2484 } 2485 else { 2486 PyObject *joined = list_join(*data); 2487 int r; 2488 if (joined == NULL) 2489 return -1; 2490 r = _PyObject_SetAttrId(element, name, joined); 2491 Py_DECREF(joined); 2492 if (r < 0) 2493 return -1; 2494 Py_CLEAR(*data); 2495 return 0; 2496 } 2497 } 2498 2499 LOCAL(int) 2500 treebuilder_flush_data(TreeBuilderObject* self) 2501 { 2502 PyObject *element = self->last; 2503 2504 if (!self->data) { 2505 return 0; 2506 } 2507 2508 if (self->this == element) { 2509 _Py_IDENTIFIER(text); 2510 return treebuilder_set_element_text_or_tail( 2511 element, &self->data, 2512 &((ElementObject *) element)->text, &PyId_text); 2513 } 2514 else { 2515 _Py_IDENTIFIER(tail); 2516 return treebuilder_set_element_text_or_tail( 2517 element, &self->data, 2518 &((ElementObject *) element)->tail, &PyId_tail); 2519 } 2520 } 2521 2522 static int 2523 treebuilder_add_subelement(PyObject *element, PyObject *child) 2524 { 2525 _Py_IDENTIFIER(append); 2526 if (Element_CheckExact(element)) { 2527 ElementObject *elem = (ElementObject *) element; 2528 return element_add_subelement(elem, child); 2529 } 2530 else { 2531 PyObject *res; 2532 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL); 2533 if (res == NULL) 2534 return -1; 2535 Py_DECREF(res); 2536 return 0; 2537 } 2538 } 2539 2540 LOCAL(int) 2541 treebuilder_append_event(TreeBuilderObject *self, PyObject *action, 2542 PyObject *node) 2543 { 2544 if (action != NULL) { 2545 PyObject *res; 2546 PyObject *event = PyTuple_Pack(2, action, node); 2547 if (event == NULL) 2548 return -1; 2549 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL); 2550 Py_DECREF(event); 2551 if (res == NULL) 2552 return -1; 2553 Py_DECREF(res); 2554 } 2555 return 0; 2556 } 2557 2558 /* -------------------------------------------------------------------- */ 2559 /* handlers */ 2560 2561 LOCAL(PyObject*) 2562 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, 2563 PyObject* attrib) 2564 { 2565 PyObject* node; 2566 PyObject* this; 2567 elementtreestate *st = ET_STATE_GLOBAL; 2568 2569 if (treebuilder_flush_data(self) < 0) { 2570 return NULL; 2571 } 2572 2573 if (!self->element_factory || self->element_factory == Py_None) { 2574 node = create_new_element(tag, attrib); 2575 } else if (attrib == Py_None) { 2576 attrib = PyDict_New(); 2577 if (!attrib) 2578 return NULL; 2579 node = PyObject_CallFunctionObjArgs(self->element_factory, 2580 tag, attrib, NULL); 2581 Py_DECREF(attrib); 2582 } 2583 else { 2584 node = PyObject_CallFunctionObjArgs(self->element_factory, 2585 tag, attrib, NULL); 2586 } 2587 if (!node) { 2588 return NULL; 2589 } 2590 2591 this = self->this; 2592 2593 if (this != Py_None) { 2594 if (treebuilder_add_subelement(this, node) < 0) 2595 goto error; 2596 } else { 2597 if (self->root) { 2598 PyErr_SetString( 2599 st->parseerror_obj, 2600 "multiple elements on top level" 2601 ); 2602 goto error; 2603 } 2604 Py_INCREF(node); 2605 self->root = node; 2606 } 2607 2608 if (self->index < PyList_GET_SIZE(self->stack)) { 2609 if (PyList_SetItem(self->stack, self->index, this) < 0) 2610 goto error; 2611 Py_INCREF(this); 2612 } else { 2613 if (PyList_Append(self->stack, this) < 0) 2614 goto error; 2615 } 2616 self->index++; 2617 2618 Py_INCREF(node); 2619 Py_SETREF(self->this, node); 2620 Py_INCREF(node); 2621 Py_SETREF(self->last, node); 2622 2623 if (treebuilder_append_event(self, self->start_event_obj, node) < 0) 2624 goto error; 2625 2626 return node; 2627 2628 error: 2629 Py_DECREF(node); 2630 return NULL; 2631 } 2632 2633 LOCAL(PyObject*) 2634 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) 2635 { 2636 if (!self->data) { 2637 if (self->last == Py_None) { 2638 /* ignore calls to data before the first call to start */ 2639 Py_RETURN_NONE; 2640 } 2641 /* store the first item as is */ 2642 Py_INCREF(data); self->data = data; 2643 } else { 2644 /* more than one item; use a list to collect items */ 2645 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && 2646 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { 2647 /* XXX this code path unused in Python 3? */ 2648 /* expat often generates single character data sections; handle 2649 the most common case by resizing the existing string... */ 2650 Py_ssize_t size = PyBytes_GET_SIZE(self->data); 2651 if (_PyBytes_Resize(&self->data, size + 1) < 0) 2652 return NULL; 2653 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0]; 2654 } else if (PyList_CheckExact(self->data)) { 2655 if (PyList_Append(self->data, data) < 0) 2656 return NULL; 2657 } else { 2658 PyObject* list = PyList_New(2); 2659 if (!list) 2660 return NULL; 2661 PyList_SET_ITEM(list, 0, self->data); 2662 Py_INCREF(data); PyList_SET_ITEM(list, 1, data); 2663 self->data = list; 2664 } 2665 } 2666 2667 Py_RETURN_NONE; 2668 } 2669 2670 LOCAL(PyObject*) 2671 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) 2672 { 2673 PyObject* item; 2674 2675 if (treebuilder_flush_data(self) < 0) { 2676 return NULL; 2677 } 2678 2679 if (self->index == 0) { 2680 PyErr_SetString( 2681 PyExc_IndexError, 2682 "pop from empty stack" 2683 ); 2684 return NULL; 2685 } 2686 2687 item = self->last; 2688 self->last = self->this; 2689 self->index--; 2690 self->this = PyList_GET_ITEM(self->stack, self->index); 2691 Py_INCREF(self->this); 2692 Py_DECREF(item); 2693 2694 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0) 2695 return NULL; 2696 2697 Py_INCREF(self->last); 2698 return (PyObject*) self->last; 2699 } 2700 2701 /* -------------------------------------------------------------------- */ 2702 /* methods (in alphabetical order) */ 2703 2704 /*[clinic input] 2705 _elementtree.TreeBuilder.data 2706 2707 data: object 2708 / 2709 2710 [clinic start generated code]*/ 2711 2712 static PyObject * 2713 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data) 2714 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/ 2715 { 2716 return treebuilder_handle_data(self, data); 2717 } 2718 2719 /*[clinic input] 2720 _elementtree.TreeBuilder.end 2721 2722 tag: object 2723 / 2724 2725 [clinic start generated code]*/ 2726 2727 static PyObject * 2728 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag) 2729 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/ 2730 { 2731 return treebuilder_handle_end(self, tag); 2732 } 2733 2734 LOCAL(PyObject*) 2735 treebuilder_done(TreeBuilderObject* self) 2736 { 2737 PyObject* res; 2738 2739 /* FIXME: check stack size? */ 2740 2741 if (self->root) 2742 res = self->root; 2743 else 2744 res = Py_None; 2745 2746 Py_INCREF(res); 2747 return res; 2748 } 2749 2750 /*[clinic input] 2751 _elementtree.TreeBuilder.close 2752 2753 [clinic start generated code]*/ 2754 2755 static PyObject * 2756 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self) 2757 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/ 2758 { 2759 return treebuilder_done(self); 2760 } 2761 2762 /*[clinic input] 2763 _elementtree.TreeBuilder.start 2764 2765 tag: object 2766 attrs: object = None 2767 / 2768 2769 [clinic start generated code]*/ 2770 2771 static PyObject * 2772 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag, 2773 PyObject *attrs) 2774 /*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/ 2775 { 2776 return treebuilder_handle_start(self, tag, attrs); 2777 } 2778 2779 /* ==================================================================== */ 2780 /* the expat interface */ 2781 2782 #include "expat.h" 2783 #include "pyexpat.h" 2784 2785 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be 2786 * cached globally without being in per-module state. 2787 */ 2788 static struct PyExpat_CAPI *expat_capi; 2789 #define EXPAT(func) (expat_capi->func) 2790 2791 static XML_Memory_Handling_Suite ExpatMemoryHandler = { 2792 PyObject_Malloc, PyObject_Realloc, PyObject_Free}; 2793 2794 typedef struct { 2795 PyObject_HEAD 2796 2797 XML_Parser parser; 2798 2799 PyObject *target; 2800 PyObject *entity; 2801 2802 PyObject *names; 2803 2804 PyObject *handle_start; 2805 PyObject *handle_data; 2806 PyObject *handle_end; 2807 2808 PyObject *handle_comment; 2809 PyObject *handle_pi; 2810 PyObject *handle_doctype; 2811 2812 PyObject *handle_close; 2813 2814 } XMLParserObject; 2815 2816 static PyObject* 2817 _elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs); 2818 static PyObject * 2819 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name, 2820 PyObject *pubid, PyObject *system); 2821 2822 /* helpers */ 2823 2824 LOCAL(PyObject*) 2825 makeuniversal(XMLParserObject* self, const char* string) 2826 { 2827 /* convert a UTF-8 tag/attribute name from the expat parser 2828 to a universal name string */ 2829 2830 Py_ssize_t size = (Py_ssize_t) strlen(string); 2831 PyObject* key; 2832 PyObject* value; 2833 2834 /* look the 'raw' name up in the names dictionary */ 2835 key = PyBytes_FromStringAndSize(string, size); 2836 if (!key) 2837 return NULL; 2838 2839 value = PyDict_GetItem(self->names, key); 2840 2841 if (value) { 2842 Py_INCREF(value); 2843 } else { 2844 /* new name. convert to universal name, and decode as 2845 necessary */ 2846 2847 PyObject* tag; 2848 char* p; 2849 Py_ssize_t i; 2850 2851 /* look for namespace separator */ 2852 for (i = 0; i < size; i++) 2853 if (string[i] == '}') 2854 break; 2855 if (i != size) { 2856 /* convert to universal name */ 2857 tag = PyBytes_FromStringAndSize(NULL, size+1); 2858 if (tag == NULL) { 2859 Py_DECREF(key); 2860 return NULL; 2861 } 2862 p = PyBytes_AS_STRING(tag); 2863 p[0] = '{'; 2864 memcpy(p+1, string, size); 2865 size++; 2866 } else { 2867 /* plain name; use key as tag */ 2868 Py_INCREF(key); 2869 tag = key; 2870 } 2871 2872 /* decode universal name */ 2873 p = PyBytes_AS_STRING(tag); 2874 value = PyUnicode_DecodeUTF8(p, size, "strict"); 2875 Py_DECREF(tag); 2876 if (!value) { 2877 Py_DECREF(key); 2878 return NULL; 2879 } 2880 2881 /* add to names dictionary */ 2882 if (PyDict_SetItem(self->names, key, value) < 0) { 2883 Py_DECREF(key); 2884 Py_DECREF(value); 2885 return NULL; 2886 } 2887 } 2888 2889 Py_DECREF(key); 2890 return value; 2891 } 2892 2893 /* Set the ParseError exception with the given parameters. 2894 * If message is not NULL, it's used as the error string. Otherwise, the 2895 * message string is the default for the given error_code. 2896 */ 2897 static void 2898 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column, 2899 const char *message) 2900 { 2901 PyObject *errmsg, *error, *position, *code; 2902 elementtreestate *st = ET_STATE_GLOBAL; 2903 2904 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd", 2905 message ? message : EXPAT(ErrorString)(error_code), 2906 line, column); 2907 if (errmsg == NULL) 2908 return; 2909 2910 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL); 2911 Py_DECREF(errmsg); 2912 if (!error) 2913 return; 2914 2915 /* Add code and position attributes */ 2916 code = PyLong_FromLong((long)error_code); 2917 if (!code) { 2918 Py_DECREF(error); 2919 return; 2920 } 2921 if (PyObject_SetAttrString(error, "code", code) == -1) { 2922 Py_DECREF(error); 2923 Py_DECREF(code); 2924 return; 2925 } 2926 Py_DECREF(code); 2927 2928 position = Py_BuildValue("(nn)", line, column); 2929 if (!position) { 2930 Py_DECREF(error); 2931 return; 2932 } 2933 if (PyObject_SetAttrString(error, "position", position) == -1) { 2934 Py_DECREF(error); 2935 Py_DECREF(position); 2936 return; 2937 } 2938 Py_DECREF(position); 2939 2940 PyErr_SetObject(st->parseerror_obj, error); 2941 Py_DECREF(error); 2942 } 2943 2944 /* -------------------------------------------------------------------- */ 2945 /* handlers */ 2946 2947 static void 2948 expat_default_handler(XMLParserObject* self, const XML_Char* data_in, 2949 int data_len) 2950 { 2951 PyObject* key; 2952 PyObject* value; 2953 PyObject* res; 2954 2955 if (data_len < 2 || data_in[0] != '&') 2956 return; 2957 2958 if (PyErr_Occurred()) 2959 return; 2960 2961 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict"); 2962 if (!key) 2963 return; 2964 2965 value = PyDict_GetItem(self->entity, key); 2966 2967 if (value) { 2968 if (TreeBuilder_CheckExact(self->target)) 2969 res = treebuilder_handle_data( 2970 (TreeBuilderObject*) self->target, value 2971 ); 2972 else if (self->handle_data) 2973 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL); 2974 else 2975 res = NULL; 2976 Py_XDECREF(res); 2977 } else if (!PyErr_Occurred()) { 2978 /* Report the first error, not the last */ 2979 char message[128] = "undefined entity "; 2980 strncat(message, data_in, data_len < 100?data_len:100); 2981 expat_set_error( 2982 XML_ERROR_UNDEFINED_ENTITY, 2983 EXPAT(GetErrorLineNumber)(self->parser), 2984 EXPAT(GetErrorColumnNumber)(self->parser), 2985 message 2986 ); 2987 } 2988 2989 Py_DECREF(key); 2990 } 2991 2992 static void 2993 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, 2994 const XML_Char **attrib_in) 2995 { 2996 PyObject* res; 2997 PyObject* tag; 2998 PyObject* attrib; 2999 int ok; 3000 3001 if (PyErr_Occurred()) 3002 return; 3003 3004 /* tag name */ 3005 tag = makeuniversal(self, tag_in); 3006 if (!tag) 3007 return; /* parser will look for errors */ 3008 3009 /* attributes */ 3010 if (attrib_in[0]) { 3011 attrib = PyDict_New(); 3012 if (!attrib) { 3013 Py_DECREF(tag); 3014 return; 3015 } 3016 while (attrib_in[0] && attrib_in[1]) { 3017 PyObject* key = makeuniversal(self, attrib_in[0]); 3018 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict"); 3019 if (!key || !value) { 3020 Py_XDECREF(value); 3021 Py_XDECREF(key); 3022 Py_DECREF(attrib); 3023 Py_DECREF(tag); 3024 return; 3025 } 3026 ok = PyDict_SetItem(attrib, key, value); 3027 Py_DECREF(value); 3028 Py_DECREF(key); 3029 if (ok < 0) { 3030 Py_DECREF(attrib); 3031 Py_DECREF(tag); 3032 return; 3033 } 3034 attrib_in += 2; 3035 } 3036 } else { 3037 Py_INCREF(Py_None); 3038 attrib = Py_None; 3039 } 3040 3041 if (TreeBuilder_CheckExact(self->target)) { 3042 /* shortcut */ 3043 res = treebuilder_handle_start((TreeBuilderObject*) self->target, 3044 tag, attrib); 3045 } 3046 else if (self->handle_start) { 3047 if (attrib == Py_None) { 3048 Py_DECREF(attrib); 3049 attrib = PyDict_New(); 3050 if (!attrib) { 3051 Py_DECREF(tag); 3052 return; 3053 } 3054 } 3055 res = PyObject_CallFunctionObjArgs(self->handle_start, 3056 tag, attrib, NULL); 3057 } else 3058 res = NULL; 3059 3060 Py_DECREF(tag); 3061 Py_DECREF(attrib); 3062 3063 Py_XDECREF(res); 3064 } 3065 3066 static void 3067 expat_data_handler(XMLParserObject* self, const XML_Char* data_in, 3068 int data_len) 3069 { 3070 PyObject* data; 3071 PyObject* res; 3072 3073 if (PyErr_Occurred()) 3074 return; 3075 3076 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict"); 3077 if (!data) 3078 return; /* parser will look for errors */ 3079 3080 if (TreeBuilder_CheckExact(self->target)) 3081 /* shortcut */ 3082 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); 3083 else if (self->handle_data) 3084 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL); 3085 else 3086 res = NULL; 3087 3088 Py_DECREF(data); 3089 3090 Py_XDECREF(res); 3091 } 3092 3093 static void 3094 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) 3095 { 3096 PyObject* tag; 3097 PyObject* res = NULL; 3098 3099 if (PyErr_Occurred()) 3100 return; 3101 3102 if (TreeBuilder_CheckExact(self->target)) 3103 /* shortcut */ 3104 /* the standard tree builder doesn't look at the end tag */ 3105 res = treebuilder_handle_end( 3106 (TreeBuilderObject*) self->target, Py_None 3107 ); 3108 else if (self->handle_end) { 3109 tag = makeuniversal(self, tag_in); 3110 if (tag) { 3111 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL); 3112 Py_DECREF(tag); 3113 } 3114 } 3115 3116 Py_XDECREF(res); 3117 } 3118 3119 static void 3120 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, 3121 const XML_Char *uri) 3122 { 3123 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3124 PyObject *parcel; 3125 3126 if (PyErr_Occurred()) 3127 return; 3128 3129 if (!target->events_append || !target->start_ns_event_obj) 3130 return; 3131 3132 if (!uri) 3133 uri = ""; 3134 if (!prefix) 3135 prefix = ""; 3136 3137 parcel = Py_BuildValue("ss", prefix, uri); 3138 if (!parcel) 3139 return; 3140 treebuilder_append_event(target, target->start_ns_event_obj, parcel); 3141 Py_DECREF(parcel); 3142 } 3143 3144 static void 3145 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) 3146 { 3147 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3148 3149 if (PyErr_Occurred()) 3150 return; 3151 3152 if (!target->events_append) 3153 return; 3154 3155 treebuilder_append_event(target, target->end_ns_event_obj, Py_None); 3156 } 3157 3158 static void 3159 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) 3160 { 3161 PyObject* comment; 3162 PyObject* res; 3163 3164 if (PyErr_Occurred()) 3165 return; 3166 3167 if (self->handle_comment) { 3168 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); 3169 if (comment) { 3170 res = PyObject_CallFunctionObjArgs(self->handle_comment, 3171 comment, NULL); 3172 Py_XDECREF(res); 3173 Py_DECREF(comment); 3174 } 3175 } 3176 } 3177 3178 static void 3179 expat_start_doctype_handler(XMLParserObject *self, 3180 const XML_Char *doctype_name, 3181 const XML_Char *sysid, 3182 const XML_Char *pubid, 3183 int has_internal_subset) 3184 { 3185 PyObject *self_pyobj = (PyObject *)self; 3186 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj; 3187 PyObject *parser_doctype = NULL; 3188 PyObject *res = NULL; 3189 3190 if (PyErr_Occurred()) 3191 return; 3192 3193 doctype_name_obj = makeuniversal(self, doctype_name); 3194 if (!doctype_name_obj) 3195 return; 3196 3197 if (sysid) { 3198 sysid_obj = makeuniversal(self, sysid); 3199 if (!sysid_obj) { 3200 Py_DECREF(doctype_name_obj); 3201 return; 3202 } 3203 } else { 3204 Py_INCREF(Py_None); 3205 sysid_obj = Py_None; 3206 } 3207 3208 if (pubid) { 3209 pubid_obj = makeuniversal(self, pubid); 3210 if (!pubid_obj) { 3211 Py_DECREF(doctype_name_obj); 3212 Py_DECREF(sysid_obj); 3213 return; 3214 } 3215 } else { 3216 Py_INCREF(Py_None); 3217 pubid_obj = Py_None; 3218 } 3219 3220 /* If the target has a handler for doctype, call it. */ 3221 if (self->handle_doctype) { 3222 res = PyObject_CallFunctionObjArgs(self->handle_doctype, 3223 doctype_name_obj, pubid_obj, 3224 sysid_obj, NULL); 3225 Py_CLEAR(res); 3226 } 3227 else { 3228 /* Now see if the parser itself has a doctype method. If yes and it's 3229 * a custom method, call it but warn about deprecation. If it's only 3230 * the vanilla XMLParser method, do nothing. 3231 */ 3232 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); 3233 if (parser_doctype && 3234 !(PyCFunction_Check(parser_doctype) && 3235 PyCFunction_GET_SELF(parser_doctype) == self_pyobj && 3236 PyCFunction_GET_FUNCTION(parser_doctype) == 3237 (PyCFunction) _elementtree_XMLParser_doctype)) { 3238 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj, 3239 pubid_obj, sysid_obj); 3240 if (!res) 3241 goto clear; 3242 Py_DECREF(res); 3243 res = PyObject_CallFunctionObjArgs(parser_doctype, 3244 doctype_name_obj, pubid_obj, 3245 sysid_obj, NULL); 3246 Py_CLEAR(res); 3247 } 3248 } 3249 3250 clear: 3251 Py_XDECREF(parser_doctype); 3252 Py_DECREF(doctype_name_obj); 3253 Py_DECREF(pubid_obj); 3254 Py_DECREF(sysid_obj); 3255 } 3256 3257 static void 3258 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, 3259 const XML_Char* data_in) 3260 { 3261 PyObject* target; 3262 PyObject* data; 3263 PyObject* res; 3264 3265 if (PyErr_Occurred()) 3266 return; 3267 3268 if (self->handle_pi) { 3269 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); 3270 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); 3271 if (target && data) { 3272 res = PyObject_CallFunctionObjArgs(self->handle_pi, 3273 target, data, NULL); 3274 Py_XDECREF(res); 3275 Py_DECREF(data); 3276 Py_DECREF(target); 3277 } else { 3278 Py_XDECREF(data); 3279 Py_XDECREF(target); 3280 } 3281 } 3282 } 3283 3284 /* -------------------------------------------------------------------- */ 3285 3286 static PyObject * 3287 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3288 { 3289 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); 3290 if (self) { 3291 self->parser = NULL; 3292 self->target = self->entity = self->names = NULL; 3293 self->handle_start = self->handle_data = self->handle_end = NULL; 3294 self->handle_comment = self->handle_pi = self->handle_close = NULL; 3295 self->handle_doctype = NULL; 3296 } 3297 return (PyObject *)self; 3298 } 3299 3300 static int 3301 ignore_attribute_error(PyObject *value) 3302 { 3303 if (value == NULL) { 3304 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { 3305 return -1; 3306 } 3307 PyErr_Clear(); 3308 } 3309 return 0; 3310 } 3311 3312 /*[clinic input] 3313 _elementtree.XMLParser.__init__ 3314 3315 html: object = NULL 3316 target: object = NULL 3317 encoding: str(accept={str, NoneType}) = NULL 3318 3319 [clinic start generated code]*/ 3320 3321 static int 3322 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html, 3323 PyObject *target, const char *encoding) 3324 /*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/ 3325 { 3326 if (html != NULL) { 3327 if (PyErr_WarnEx(PyExc_DeprecationWarning, 3328 "The html argument of XMLParser() is deprecated", 3329 1) < 0) { 3330 return -1; 3331 } 3332 } 3333 3334 self->entity = PyDict_New(); 3335 if (!self->entity) 3336 return -1; 3337 3338 self->names = PyDict_New(); 3339 if (!self->names) { 3340 Py_CLEAR(self->entity); 3341 return -1; 3342 } 3343 3344 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); 3345 if (!self->parser) { 3346 Py_CLEAR(self->entity); 3347 Py_CLEAR(self->names); 3348 PyErr_NoMemory(); 3349 return -1; 3350 } 3351 /* expat < 2.1.0 has no XML_SetHashSalt() */ 3352 if (EXPAT(SetHashSalt) != NULL) { 3353 EXPAT(SetHashSalt)(self->parser, 3354 (unsigned long)_Py_HashSecret.expat.hashsalt); 3355 } 3356 3357 if (target) { 3358 Py_INCREF(target); 3359 } else { 3360 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); 3361 if (!target) { 3362 Py_CLEAR(self->entity); 3363 Py_CLEAR(self->names); 3364 return -1; 3365 } 3366 } 3367 self->target = target; 3368 3369 self->handle_start = PyObject_GetAttrString(target, "start"); 3370 if (ignore_attribute_error(self->handle_start)) { 3371 return -1; 3372 } 3373 self->handle_data = PyObject_GetAttrString(target, "data"); 3374 if (ignore_attribute_error(self->handle_data)) { 3375 return -1; 3376 } 3377 self->handle_end = PyObject_GetAttrString(target, "end"); 3378 if (ignore_attribute_error(self->handle_end)) { 3379 return -1; 3380 } 3381 self->handle_comment = PyObject_GetAttrString(target, "comment"); 3382 if (ignore_attribute_error(self->handle_comment)) { 3383 return -1; 3384 } 3385 self->handle_pi = PyObject_GetAttrString(target, "pi"); 3386 if (ignore_attribute_error(self->handle_pi)) { 3387 return -1; 3388 } 3389 self->handle_close = PyObject_GetAttrString(target, "close"); 3390 if (ignore_attribute_error(self->handle_close)) { 3391 return -1; 3392 } 3393 self->handle_doctype = PyObject_GetAttrString(target, "doctype"); 3394 if (ignore_attribute_error(self->handle_doctype)) { 3395 return -1; 3396 } 3397 3398 /* configure parser */ 3399 EXPAT(SetUserData)(self->parser, self); 3400 EXPAT(SetElementHandler)( 3401 self->parser, 3402 (XML_StartElementHandler) expat_start_handler, 3403 (XML_EndElementHandler) expat_end_handler 3404 ); 3405 EXPAT(SetDefaultHandlerExpand)( 3406 self->parser, 3407 (XML_DefaultHandler) expat_default_handler 3408 ); 3409 EXPAT(SetCharacterDataHandler)( 3410 self->parser, 3411 (XML_CharacterDataHandler) expat_data_handler 3412 ); 3413 if (self->handle_comment) 3414 EXPAT(SetCommentHandler)( 3415 self->parser, 3416 (XML_CommentHandler) expat_comment_handler 3417 ); 3418 if (self->handle_pi) 3419 EXPAT(SetProcessingInstructionHandler)( 3420 self->parser, 3421 (XML_ProcessingInstructionHandler) expat_pi_handler 3422 ); 3423 EXPAT(SetStartDoctypeDeclHandler)( 3424 self->parser, 3425 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler 3426 ); 3427 EXPAT(SetUnknownEncodingHandler)( 3428 self->parser, 3429 EXPAT(DefaultUnknownEncodingHandler), NULL 3430 ); 3431 3432 return 0; 3433 } 3434 3435 static int 3436 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) 3437 { 3438 Py_VISIT(self->handle_close); 3439 Py_VISIT(self->handle_pi); 3440 Py_VISIT(self->handle_comment); 3441 Py_VISIT(self->handle_end); 3442 Py_VISIT(self->handle_data); 3443 Py_VISIT(self->handle_start); 3444 3445 Py_VISIT(self->target); 3446 Py_VISIT(self->entity); 3447 Py_VISIT(self->names); 3448 3449 return 0; 3450 } 3451 3452 static int 3453 xmlparser_gc_clear(XMLParserObject *self) 3454 { 3455 if (self->parser != NULL) { 3456 XML_Parser parser = self->parser; 3457 self->parser = NULL; 3458 EXPAT(ParserFree)(parser); 3459 } 3460 3461 Py_CLEAR(self->handle_close); 3462 Py_CLEAR(self->handle_pi); 3463 Py_CLEAR(self->handle_comment); 3464 Py_CLEAR(self->handle_end); 3465 Py_CLEAR(self->handle_data); 3466 Py_CLEAR(self->handle_start); 3467 Py_CLEAR(self->handle_doctype); 3468 3469 Py_CLEAR(self->target); 3470 Py_CLEAR(self->entity); 3471 Py_CLEAR(self->names); 3472 3473 return 0; 3474 } 3475 3476 static void 3477 xmlparser_dealloc(XMLParserObject* self) 3478 { 3479 PyObject_GC_UnTrack(self); 3480 xmlparser_gc_clear(self); 3481 Py_TYPE(self)->tp_free((PyObject *)self); 3482 } 3483 3484 LOCAL(PyObject*) 3485 expat_parse(XMLParserObject* self, const char* data, int data_len, int final) 3486 { 3487 int ok; 3488 3489 assert(!PyErr_Occurred()); 3490 ok = EXPAT(Parse)(self->parser, data, data_len, final); 3491 3492 if (PyErr_Occurred()) 3493 return NULL; 3494 3495 if (!ok) { 3496 expat_set_error( 3497 EXPAT(GetErrorCode)(self->parser), 3498 EXPAT(GetErrorLineNumber)(self->parser), 3499 EXPAT(GetErrorColumnNumber)(self->parser), 3500 NULL 3501 ); 3502 return NULL; 3503 } 3504 3505 Py_RETURN_NONE; 3506 } 3507 3508 /*[clinic input] 3509 _elementtree.XMLParser.close 3510 3511 [clinic start generated code]*/ 3512 3513 static PyObject * 3514 _elementtree_XMLParser_close_impl(XMLParserObject *self) 3515 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/ 3516 { 3517 /* end feeding data to parser */ 3518 3519 PyObject* res; 3520 res = expat_parse(self, "", 0, 1); 3521 if (!res) 3522 return NULL; 3523 3524 if (TreeBuilder_CheckExact(self->target)) { 3525 Py_DECREF(res); 3526 return treebuilder_done((TreeBuilderObject*) self->target); 3527 } 3528 else if (self->handle_close) { 3529 Py_DECREF(res); 3530 return _PyObject_CallNoArg(self->handle_close); 3531 } 3532 else { 3533 return res; 3534 } 3535 } 3536 3537 /*[clinic input] 3538 _elementtree.XMLParser.feed 3539 3540 data: object 3541 / 3542 3543 [clinic start generated code]*/ 3544 3545 static PyObject * 3546 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data) 3547 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/ 3548 { 3549 /* feed data to parser */ 3550 3551 if (PyUnicode_Check(data)) { 3552 Py_ssize_t data_len; 3553 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len); 3554 if (data_ptr == NULL) 3555 return NULL; 3556 if (data_len > INT_MAX) { 3557 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3558 return NULL; 3559 } 3560 /* Explicitly set UTF-8 encoding. Return code ignored. */ 3561 (void)EXPAT(SetEncoding)(self->parser, "utf-8"); 3562 return expat_parse(self, data_ptr, (int)data_len, 0); 3563 } 3564 else { 3565 Py_buffer view; 3566 PyObject *res; 3567 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) 3568 return NULL; 3569 if (view.len > INT_MAX) { 3570 PyBuffer_Release(&view); 3571 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3572 return NULL; 3573 } 3574 res = expat_parse(self, view.buf, (int)view.len, 0); 3575 PyBuffer_Release(&view); 3576 return res; 3577 } 3578 } 3579 3580 /*[clinic input] 3581 _elementtree.XMLParser._parse_whole 3582 3583 file: object 3584 / 3585 3586 [clinic start generated code]*/ 3587 3588 static PyObject * 3589 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file) 3590 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/ 3591 { 3592 /* (internal) parse the whole input, until end of stream */ 3593 PyObject* reader; 3594 PyObject* buffer; 3595 PyObject* temp; 3596 PyObject* res; 3597 3598 reader = PyObject_GetAttrString(file, "read"); 3599 if (!reader) 3600 return NULL; 3601 3602 /* read from open file object */ 3603 for (;;) { 3604 3605 buffer = PyObject_CallFunction(reader, "i", 64*1024); 3606 3607 if (!buffer) { 3608 /* read failed (e.g. due to KeyboardInterrupt) */ 3609 Py_DECREF(reader); 3610 return NULL; 3611 } 3612 3613 if (PyUnicode_CheckExact(buffer)) { 3614 /* A unicode object is encoded into bytes using UTF-8 */ 3615 if (PyUnicode_GET_LENGTH(buffer) == 0) { 3616 Py_DECREF(buffer); 3617 break; 3618 } 3619 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); 3620 Py_DECREF(buffer); 3621 if (!temp) { 3622 /* Propagate exception from PyUnicode_AsEncodedString */ 3623 Py_DECREF(reader); 3624 return NULL; 3625 } 3626 buffer = temp; 3627 } 3628 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { 3629 Py_DECREF(buffer); 3630 break; 3631 } 3632 3633 if (PyBytes_GET_SIZE(buffer) > INT_MAX) { 3634 Py_DECREF(buffer); 3635 Py_DECREF(reader); 3636 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3637 return NULL; 3638 } 3639 res = expat_parse( 3640 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0 3641 ); 3642 3643 Py_DECREF(buffer); 3644 3645 if (!res) { 3646 Py_DECREF(reader); 3647 return NULL; 3648 } 3649 Py_DECREF(res); 3650 3651 } 3652 3653 Py_DECREF(reader); 3654 3655 res = expat_parse(self, "", 0, 1); 3656 3657 if (res && TreeBuilder_CheckExact(self->target)) { 3658 Py_DECREF(res); 3659 return treebuilder_done((TreeBuilderObject*) self->target); 3660 } 3661 3662 return res; 3663 } 3664 3665 /*[clinic input] 3666 _elementtree.XMLParser.doctype 3667 3668 name: object 3669 pubid: object 3670 system: object 3671 / 3672 3673 [clinic start generated code]*/ 3674 3675 static PyObject * 3676 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name, 3677 PyObject *pubid, PyObject *system) 3678 /*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/ 3679 { 3680 if (PyErr_WarnEx(PyExc_DeprecationWarning, 3681 "This method of XMLParser is deprecated. Define" 3682 " doctype() method on the TreeBuilder target.", 3683 1) < 0) { 3684 return NULL; 3685 } 3686 Py_RETURN_NONE; 3687 } 3688 3689 /*[clinic input] 3690 _elementtree.XMLParser._setevents 3691 3692 events_queue: object 3693 events_to_report: object = None 3694 / 3695 3696 [clinic start generated code]*/ 3697 3698 static PyObject * 3699 _elementtree_XMLParser__setevents_impl(XMLParserObject *self, 3700 PyObject *events_queue, 3701 PyObject *events_to_report) 3702 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/ 3703 { 3704 /* activate element event reporting */ 3705 Py_ssize_t i; 3706 TreeBuilderObject *target; 3707 PyObject *events_append, *events_seq; 3708 3709 if (!TreeBuilder_CheckExact(self->target)) { 3710 PyErr_SetString( 3711 PyExc_TypeError, 3712 "event handling only supported for ElementTree.TreeBuilder " 3713 "targets" 3714 ); 3715 return NULL; 3716 } 3717 3718 target = (TreeBuilderObject*) self->target; 3719 3720 events_append = PyObject_GetAttrString(events_queue, "append"); 3721 if (events_append == NULL) 3722 return NULL; 3723 Py_XSETREF(target->events_append, events_append); 3724 3725 /* clear out existing events */ 3726 Py_CLEAR(target->start_event_obj); 3727 Py_CLEAR(target->end_event_obj); 3728 Py_CLEAR(target->start_ns_event_obj); 3729 Py_CLEAR(target->end_ns_event_obj); 3730 3731 if (events_to_report == Py_None) { 3732 /* default is "end" only */ 3733 target->end_event_obj = PyUnicode_FromString("end"); 3734 Py_RETURN_NONE; 3735 } 3736 3737 if (!(events_seq = PySequence_Fast(events_to_report, 3738 "events must be a sequence"))) { 3739 return NULL; 3740 } 3741 3742 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) { 3743 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i); 3744 const char *event_name = NULL; 3745 if (PyUnicode_Check(event_name_obj)) { 3746 event_name = PyUnicode_AsUTF8(event_name_obj); 3747 } else if (PyBytes_Check(event_name_obj)) { 3748 event_name = PyBytes_AS_STRING(event_name_obj); 3749 } 3750 if (event_name == NULL) { 3751 Py_DECREF(events_seq); 3752 PyErr_Format(PyExc_ValueError, "invalid events sequence"); 3753 return NULL; 3754 } 3755 3756 Py_INCREF(event_name_obj); 3757 if (strcmp(event_name, "start") == 0) { 3758 Py_XSETREF(target->start_event_obj, event_name_obj); 3759 } else if (strcmp(event_name, "end") == 0) { 3760 Py_XSETREF(target->end_event_obj, event_name_obj); 3761 } else if (strcmp(event_name, "start-ns") == 0) { 3762 Py_XSETREF(target->start_ns_event_obj, event_name_obj); 3763 EXPAT(SetNamespaceDeclHandler)( 3764 self->parser, 3765 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 3766 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 3767 ); 3768 } else if (strcmp(event_name, "end-ns") == 0) { 3769 Py_XSETREF(target->end_ns_event_obj, event_name_obj); 3770 EXPAT(SetNamespaceDeclHandler)( 3771 self->parser, 3772 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 3773 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 3774 ); 3775 } else { 3776 Py_DECREF(event_name_obj); 3777 Py_DECREF(events_seq); 3778 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name); 3779 return NULL; 3780 } 3781 } 3782 3783 Py_DECREF(events_seq); 3784 Py_RETURN_NONE; 3785 } 3786 3787 static PyObject* 3788 xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) 3789 { 3790 if (PyUnicode_Check(nameobj)) { 3791 PyObject* res; 3792 if (_PyUnicode_EqualToASCIIString(nameobj, "entity")) 3793 res = self->entity; 3794 else if (_PyUnicode_EqualToASCIIString(nameobj, "target")) 3795 res = self->target; 3796 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) { 3797 return PyUnicode_FromFormat( 3798 "Expat %d.%d.%d", XML_MAJOR_VERSION, 3799 XML_MINOR_VERSION, XML_MICRO_VERSION); 3800 } 3801 else 3802 goto generic; 3803 3804 Py_INCREF(res); 3805 return res; 3806 } 3807 generic: 3808 return PyObject_GenericGetAttr((PyObject*) self, nameobj); 3809 } 3810 3811 #include "clinic/_elementtree.c.h" 3812 3813 static PyMethodDef element_methods[] = { 3814 3815 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF 3816 3817 _ELEMENTTREE_ELEMENT_GET_METHODDEF 3818 _ELEMENTTREE_ELEMENT_SET_METHODDEF 3819 3820 _ELEMENTTREE_ELEMENT_FIND_METHODDEF 3821 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF 3822 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF 3823 3824 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF 3825 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF 3826 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF 3827 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF 3828 3829 _ELEMENTTREE_ELEMENT_ITER_METHODDEF 3830 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF 3831 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF 3832 3833 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF 3834 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF 3835 3836 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF 3837 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF 3838 3839 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF 3840 3841 _ELEMENTTREE_ELEMENT___COPY___METHODDEF 3842 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF 3843 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF 3844 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF 3845 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF 3846 3847 {NULL, NULL} 3848 }; 3849 3850 static PyMappingMethods element_as_mapping = { 3851 (lenfunc) element_length, 3852 (binaryfunc) element_subscr, 3853 (objobjargproc) element_ass_subscr, 3854 }; 3855 3856 static PyGetSetDef element_getsetlist[] = { 3857 {"tag", 3858 (getter)element_tag_getter, 3859 (setter)element_tag_setter, 3860 "A string identifying what kind of data this element represents"}, 3861 {"text", 3862 (getter)element_text_getter, 3863 (setter)element_text_setter, 3864 "A string of text directly after the start tag, or None"}, 3865 {"tail", 3866 (getter)element_tail_getter, 3867 (setter)element_tail_setter, 3868 "A string of text directly after the end tag, or None"}, 3869 {"attrib", 3870 (getter)element_attrib_getter, 3871 (setter)element_attrib_setter, 3872 "A dictionary containing the element's attributes"}, 3873 {NULL}, 3874 }; 3875 3876 static PyTypeObject Element_Type = { 3877 PyVarObject_HEAD_INIT(NULL, 0) 3878 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0, 3879 /* methods */ 3880 (destructor)element_dealloc, /* tp_dealloc */ 3881 0, /* tp_print */ 3882 0, /* tp_getattr */ 3883 0, /* tp_setattr */ 3884 0, /* tp_reserved */ 3885 (reprfunc)element_repr, /* tp_repr */ 3886 0, /* tp_as_number */ 3887 &element_as_sequence, /* tp_as_sequence */ 3888 &element_as_mapping, /* tp_as_mapping */ 3889 0, /* tp_hash */ 3890 0, /* tp_call */ 3891 0, /* tp_str */ 3892 PyObject_GenericGetAttr, /* tp_getattro */ 3893 0, /* tp_setattro */ 3894 0, /* tp_as_buffer */ 3895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 3896 /* tp_flags */ 3897 0, /* tp_doc */ 3898 (traverseproc)element_gc_traverse, /* tp_traverse */ 3899 (inquiry)element_gc_clear, /* tp_clear */ 3900 0, /* tp_richcompare */ 3901 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */ 3902 0, /* tp_iter */ 3903 0, /* tp_iternext */ 3904 element_methods, /* tp_methods */ 3905 0, /* tp_members */ 3906 element_getsetlist, /* tp_getset */ 3907 0, /* tp_base */ 3908 0, /* tp_dict */ 3909 0, /* tp_descr_get */ 3910 0, /* tp_descr_set */ 3911 0, /* tp_dictoffset */ 3912 (initproc)element_init, /* tp_init */ 3913 PyType_GenericAlloc, /* tp_alloc */ 3914 element_new, /* tp_new */ 3915 0, /* tp_free */ 3916 }; 3917 3918 static PyMethodDef treebuilder_methods[] = { 3919 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF 3920 _ELEMENTTREE_TREEBUILDER_START_METHODDEF 3921 _ELEMENTTREE_TREEBUILDER_END_METHODDEF 3922 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF 3923 {NULL, NULL} 3924 }; 3925 3926 static PyTypeObject TreeBuilder_Type = { 3927 PyVarObject_HEAD_INIT(NULL, 0) 3928 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0, 3929 /* methods */ 3930 (destructor)treebuilder_dealloc, /* tp_dealloc */ 3931 0, /* tp_print */ 3932 0, /* tp_getattr */ 3933 0, /* tp_setattr */ 3934 0, /* tp_reserved */ 3935 0, /* tp_repr */ 3936 0, /* tp_as_number */ 3937 0, /* tp_as_sequence */ 3938 0, /* tp_as_mapping */ 3939 0, /* tp_hash */ 3940 0, /* tp_call */ 3941 0, /* tp_str */ 3942 0, /* tp_getattro */ 3943 0, /* tp_setattro */ 3944 0, /* tp_as_buffer */ 3945 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 3946 /* tp_flags */ 3947 0, /* tp_doc */ 3948 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */ 3949 (inquiry)treebuilder_gc_clear, /* tp_clear */ 3950 0, /* tp_richcompare */ 3951 0, /* tp_weaklistoffset */ 3952 0, /* tp_iter */ 3953 0, /* tp_iternext */ 3954 treebuilder_methods, /* tp_methods */ 3955 0, /* tp_members */ 3956 0, /* tp_getset */ 3957 0, /* tp_base */ 3958 0, /* tp_dict */ 3959 0, /* tp_descr_get */ 3960 0, /* tp_descr_set */ 3961 0, /* tp_dictoffset */ 3962 _elementtree_TreeBuilder___init__, /* tp_init */ 3963 PyType_GenericAlloc, /* tp_alloc */ 3964 treebuilder_new, /* tp_new */ 3965 0, /* tp_free */ 3966 }; 3967 3968 static PyMethodDef xmlparser_methods[] = { 3969 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF 3970 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF 3971 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF 3972 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF 3973 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF 3974 {NULL, NULL} 3975 }; 3976 3977 static PyTypeObject XMLParser_Type = { 3978 PyVarObject_HEAD_INIT(NULL, 0) 3979 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0, 3980 /* methods */ 3981 (destructor)xmlparser_dealloc, /* tp_dealloc */ 3982 0, /* tp_print */ 3983 0, /* tp_getattr */ 3984 0, /* tp_setattr */ 3985 0, /* tp_reserved */ 3986 0, /* tp_repr */ 3987 0, /* tp_as_number */ 3988 0, /* tp_as_sequence */ 3989 0, /* tp_as_mapping */ 3990 0, /* tp_hash */ 3991 0, /* tp_call */ 3992 0, /* tp_str */ 3993 (getattrofunc)xmlparser_getattro, /* tp_getattro */ 3994 0, /* tp_setattro */ 3995 0, /* tp_as_buffer */ 3996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 3997 /* tp_flags */ 3998 0, /* tp_doc */ 3999 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */ 4000 (inquiry)xmlparser_gc_clear, /* tp_clear */ 4001 0, /* tp_richcompare */ 4002 0, /* tp_weaklistoffset */ 4003 0, /* tp_iter */ 4004 0, /* tp_iternext */ 4005 xmlparser_methods, /* tp_methods */ 4006 0, /* tp_members */ 4007 0, /* tp_getset */ 4008 0, /* tp_base */ 4009 0, /* tp_dict */ 4010 0, /* tp_descr_get */ 4011 0, /* tp_descr_set */ 4012 0, /* tp_dictoffset */ 4013 _elementtree_XMLParser___init__, /* tp_init */ 4014 PyType_GenericAlloc, /* tp_alloc */ 4015 xmlparser_new, /* tp_new */ 4016 0, /* tp_free */ 4017 }; 4018 4019 /* ==================================================================== */ 4020 /* python module interface */ 4021 4022 static PyMethodDef _functions[] = { 4023 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS}, 4024 {NULL, NULL} 4025 }; 4026 4027 4028 static struct PyModuleDef elementtreemodule = { 4029 PyModuleDef_HEAD_INIT, 4030 "_elementtree", 4031 NULL, 4032 sizeof(elementtreestate), 4033 _functions, 4034 NULL, 4035 elementtree_traverse, 4036 elementtree_clear, 4037 elementtree_free 4038 }; 4039 4040 PyMODINIT_FUNC 4041 PyInit__elementtree(void) 4042 { 4043 PyObject *m, *temp; 4044 elementtreestate *st; 4045 4046 m = PyState_FindModule(&elementtreemodule); 4047 if (m) { 4048 Py_INCREF(m); 4049 return m; 4050 } 4051 4052 /* Initialize object types */ 4053 if (PyType_Ready(&ElementIter_Type) < 0) 4054 return NULL; 4055 if (PyType_Ready(&TreeBuilder_Type) < 0) 4056 return NULL; 4057 if (PyType_Ready(&Element_Type) < 0) 4058 return NULL; 4059 if (PyType_Ready(&XMLParser_Type) < 0) 4060 return NULL; 4061 4062 m = PyModule_Create(&elementtreemodule); 4063 if (!m) 4064 return NULL; 4065 st = ET_STATE(m); 4066 4067 if (!(temp = PyImport_ImportModule("copy"))) 4068 return NULL; 4069 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); 4070 Py_XDECREF(temp); 4071 4072 if (st->deepcopy_obj == NULL) { 4073 return NULL; 4074 } 4075 4076 assert(!PyErr_Occurred()); 4077 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) 4078 return NULL; 4079 4080 /* link against pyexpat */ 4081 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); 4082 if (expat_capi) { 4083 /* check that it's usable */ 4084 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || 4085 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) || 4086 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || 4087 expat_capi->MINOR_VERSION != XML_MINOR_VERSION || 4088 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) { 4089 PyErr_SetString(PyExc_ImportError, 4090 "pyexpat version is incompatible"); 4091 return NULL; 4092 } 4093 } else { 4094 return NULL; 4095 } 4096 4097 st->parseerror_obj = PyErr_NewException( 4098 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL 4099 ); 4100 Py_INCREF(st->parseerror_obj); 4101 PyModule_AddObject(m, "ParseError", st->parseerror_obj); 4102 4103 Py_INCREF((PyObject *)&Element_Type); 4104 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type); 4105 4106 Py_INCREF((PyObject *)&TreeBuilder_Type); 4107 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type); 4108 4109 Py_INCREF((PyObject *)&XMLParser_Type); 4110 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type); 4111 4112 return m; 4113 } 4114