1 /* 2 * ElementTree 3 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $ 4 * 5 * elementtree accelerator 6 * 7 * History: 8 * 1999-06-20 fl created (as part of sgmlop) 9 * 2001-05-29 fl effdom edition 10 * 2003-02-27 fl elementtree edition (alpha) 11 * 2004-06-03 fl updates for elementtree 1.2 12 * 2005-01-05 fl major optimization effort 13 * 2005-01-11 fl first public release (cElementTree 0.8) 14 * 2005-01-12 fl split element object into base and extras 15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9) 16 * 2005-01-17 fl added treebuilder close method 17 * 2005-01-17 fl fixed crash in getchildren 18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3) 19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8) 20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0) 21 * 2005-01-28 fl added remove method (1.0.1) 22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2) 23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers 24 * 2005-03-26 fl added Comment and PI support to XMLParser 25 * 2005-03-27 fl event optimizations; complain about bogus events 26 * 2005-08-08 fl fixed read error handling in parse 27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3) 28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4) 29 * 2005-12-16 fl added support for non-standard encodings 30 * 2006-03-08 fl fixed a couple of potential null-refs and leaks 31 * 2006-03-12 fl merge in 2.5 ssize_t changes 32 * 2007-08-25 fl call custom builder's close method from XMLParser 33 * 2007-08-31 fl added iter, extend from ET 1.3 34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc 35 * 2007-09-03 fl fixed handling of negative insert indexes 36 * 2007-09-04 fl added itertext from ET 1.3 37 * 2007-09-06 fl added position attribute to ParseError exception 38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic) 39 * 40 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. 41 * Copyright (c) 1999-2009 by Fredrik Lundh. 42 * 43 * info (at) pythonware.com 44 * http://www.pythonware.com 45 */ 46 47 /* Licensed to PSF under a Contributor Agreement. */ 48 /* See http://www.python.org/psf/license for licensing details. */ 49 50 #include "Python.h" 51 52 #define VERSION "1.0.6" 53 54 /* -------------------------------------------------------------------- */ 55 /* configuration */ 56 57 /* Leave defined to include the expat-based XMLParser type */ 58 #define USE_EXPAT 59 60 /* Define to do all expat calls via pyexpat's embedded expat library */ 61 /* #define USE_PYEXPAT_CAPI */ 62 63 /* An element can hold this many children without extra memory 64 allocations. */ 65 #define STATIC_CHILDREN 4 66 67 /* For best performance, chose a value so that 80-90% of all nodes 68 have no more than the given number of children. Set this to zero 69 to minimize the size of the element structure itself (this only 70 helps if you have lots of leaf nodes with attributes). */ 71 72 /* Also note that pymalloc always allocates blocks in multiples of 73 eight bytes. For the current version of cElementTree, this means 74 that the number of children should be an even number, at least on 75 32-bit platforms. */ 76 77 /* -------------------------------------------------------------------- */ 78 79 #if 0 80 static int memory = 0; 81 #define ALLOC(size, comment)\ 82 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) 83 #define RELEASE(size, comment)\ 84 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) 85 #else 86 #define ALLOC(size, comment) 87 #define RELEASE(size, comment) 88 #endif 89 90 /* compiler tweaks */ 91 #if defined(_MSC_VER) 92 #define LOCAL(type) static __inline type __fastcall 93 #else 94 #define LOCAL(type) static type 95 #endif 96 97 /* compatibility macros */ 98 #if (PY_VERSION_HEX < 0x02060000) 99 #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) 100 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) 101 #endif 102 103 #if (PY_VERSION_HEX < 0x02050000) 104 typedef int Py_ssize_t; 105 #define lenfunc inquiry 106 #endif 107 108 #if (PY_VERSION_HEX < 0x02040000) 109 #define PyDict_CheckExact PyDict_Check 110 111 #if !defined(Py_RETURN_NONE) 112 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None 113 #endif 114 #endif 115 116 /* macros used to store 'join' flags in string object pointers. note 117 that all use of text and tail as object pointers must be wrapped in 118 JOIN_OBJ. see comments in the ElementObject definition for more 119 info. */ 120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) 121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) 122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) 123 124 /* glue functions (see the init function for details) */ 125 static PyObject* elementtree_parseerror_obj; 126 static PyObject* elementtree_copyelement_obj; 127 static PyObject* elementtree_deepcopy_obj; 128 static PyObject* elementtree_iter_obj; 129 static PyObject* elementtree_itertext_obj; 130 static PyObject* elementpath_obj; 131 132 /* helpers */ 133 134 LOCAL(PyObject*) 135 deepcopy(PyObject* object, PyObject* memo) 136 { 137 /* do a deep copy of the given object */ 138 139 PyObject* args; 140 PyObject* result; 141 142 if (!elementtree_deepcopy_obj) { 143 PyErr_SetString( 144 PyExc_RuntimeError, 145 "deepcopy helper not found" 146 ); 147 return NULL; 148 } 149 150 args = PyTuple_New(2); 151 if (!args) 152 return NULL; 153 154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); 155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo); 156 157 result = PyObject_CallObject(elementtree_deepcopy_obj, args); 158 159 Py_DECREF(args); 160 161 return result; 162 } 163 164 LOCAL(PyObject*) 165 list_join(PyObject* list) 166 { 167 /* join list elements (destroying the list in the process) */ 168 169 PyObject* joiner; 170 PyObject* function; 171 PyObject* args; 172 PyObject* result; 173 174 switch (PyList_GET_SIZE(list)) { 175 case 0: 176 Py_DECREF(list); 177 return PyString_FromString(""); 178 case 1: 179 result = PyList_GET_ITEM(list, 0); 180 Py_INCREF(result); 181 Py_DECREF(list); 182 return result; 183 } 184 185 /* two or more elements: slice out a suitable separator from the 186 first member, and use that to join the entire list */ 187 188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); 189 if (!joiner) 190 return NULL; 191 192 function = PyObject_GetAttrString(joiner, "join"); 193 if (!function) { 194 Py_DECREF(joiner); 195 return NULL; 196 } 197 198 args = PyTuple_New(1); 199 if (!args) 200 return NULL; 201 202 PyTuple_SET_ITEM(args, 0, list); 203 204 result = PyObject_CallObject(function, args); 205 206 Py_DECREF(args); /* also removes list */ 207 Py_DECREF(function); 208 Py_DECREF(joiner); 209 210 return result; 211 } 212 213 /* -------------------------------------------------------------------- */ 214 /* the element type */ 215 216 typedef struct { 217 218 /* attributes (a dictionary object), or None if no attributes */ 219 PyObject* attrib; 220 221 /* child elements */ 222 int length; /* actual number of items */ 223 int allocated; /* allocated items */ 224 225 /* this either points to _children or to a malloced buffer */ 226 PyObject* *children; 227 228 PyObject* _children[STATIC_CHILDREN]; 229 230 } ElementObjectExtra; 231 232 typedef struct { 233 PyObject_HEAD 234 235 /* element tag (a string). */ 236 PyObject* tag; 237 238 /* text before first child. note that this is a tagged pointer; 239 use JOIN_OBJ to get the object pointer. the join flag is used 240 to distinguish lists created by the tree builder from lists 241 assigned to the attribute by application code; the former 242 should be joined before being returned to the user, the latter 243 should be left intact. */ 244 PyObject* text; 245 246 /* text after this element, in parent. note that this is a tagged 247 pointer; use JOIN_OBJ to get the object pointer. */ 248 PyObject* tail; 249 250 ElementObjectExtra* extra; 251 252 } ElementObject; 253 254 staticforward PyTypeObject Element_Type; 255 256 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) 257 258 /* -------------------------------------------------------------------- */ 259 /* element constructor and destructor */ 260 261 LOCAL(int) 262 element_new_extra(ElementObject* self, PyObject* attrib) 263 { 264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); 265 if (!self->extra) 266 return -1; 267 268 if (!attrib) 269 attrib = Py_None; 270 271 Py_INCREF(attrib); 272 self->extra->attrib = attrib; 273 274 self->extra->length = 0; 275 self->extra->allocated = STATIC_CHILDREN; 276 self->extra->children = self->extra->_children; 277 278 return 0; 279 } 280 281 LOCAL(void) 282 element_dealloc_extra(ElementObject* self) 283 { 284 int i; 285 286 Py_DECREF(self->extra->attrib); 287 288 for (i = 0; i < self->extra->length; i++) 289 Py_DECREF(self->extra->children[i]); 290 291 if (self->extra->children != self->extra->_children) 292 PyObject_Free(self->extra->children); 293 294 PyObject_Free(self->extra); 295 } 296 297 LOCAL(PyObject*) 298 element_new(PyObject* tag, PyObject* attrib) 299 { 300 ElementObject* self; 301 302 self = PyObject_New(ElementObject, &Element_Type); 303 if (self == NULL) 304 return NULL; 305 306 /* use None for empty dictionaries */ 307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) 308 attrib = Py_None; 309 310 self->extra = NULL; 311 312 if (attrib != Py_None) { 313 314 if (element_new_extra(self, attrib) < 0) { 315 PyObject_Del(self); 316 return NULL; 317 } 318 319 self->extra->length = 0; 320 self->extra->allocated = STATIC_CHILDREN; 321 self->extra->children = self->extra->_children; 322 323 } 324 325 Py_INCREF(tag); 326 self->tag = tag; 327 328 Py_INCREF(Py_None); 329 self->text = Py_None; 330 331 Py_INCREF(Py_None); 332 self->tail = Py_None; 333 334 ALLOC(sizeof(ElementObject), "create element"); 335 336 return (PyObject*) self; 337 } 338 339 LOCAL(int) 340 element_resize(ElementObject* self, int extra) 341 { 342 int size; 343 PyObject* *children; 344 345 /* make sure self->children can hold the given number of extra 346 elements. set an exception and return -1 if allocation failed */ 347 348 if (!self->extra) 349 element_new_extra(self, NULL); 350 351 size = self->extra->length + extra; 352 353 if (size > self->extra->allocated) { 354 /* use Python 2.4's list growth strategy */ 355 size = (size >> 3) + (size < 9 ? 3 : 6) + size; 356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" 357 * which needs at least 4 bytes. 358 * Although it's a false alarm always assume at least one child to 359 * be safe. 360 */ 361 size = size ? size : 1; 362 if (self->extra->children != self->extra->_children) { 363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer 364 * "children", which needs at least 4 bytes. Although it's a 365 * false alarm always assume at least one child to be safe. 366 */ 367 children = PyObject_Realloc(self->extra->children, 368 size * sizeof(PyObject*)); 369 if (!children) 370 goto nomemory; 371 } else { 372 children = PyObject_Malloc(size * sizeof(PyObject*)); 373 if (!children) 374 goto nomemory; 375 /* copy existing children from static area to malloc buffer */ 376 memcpy(children, self->extra->children, 377 self->extra->length * sizeof(PyObject*)); 378 } 379 self->extra->children = children; 380 self->extra->allocated = size; 381 } 382 383 return 0; 384 385 nomemory: 386 PyErr_NoMemory(); 387 return -1; 388 } 389 390 LOCAL(int) 391 element_add_subelement(ElementObject* self, PyObject* element) 392 { 393 /* add a child element to a parent */ 394 395 if (element_resize(self, 1) < 0) 396 return -1; 397 398 Py_INCREF(element); 399 self->extra->children[self->extra->length] = element; 400 401 self->extra->length++; 402 403 return 0; 404 } 405 406 LOCAL(PyObject*) 407 element_get_attrib(ElementObject* self) 408 { 409 /* return borrowed reference to attrib dictionary */ 410 /* note: this function assumes that the extra section exists */ 411 412 PyObject* res = self->extra->attrib; 413 414 if (res == Py_None) { 415 Py_DECREF(res); 416 /* create missing dictionary */ 417 res = PyDict_New(); 418 if (!res) 419 return NULL; 420 self->extra->attrib = res; 421 } 422 423 return res; 424 } 425 426 LOCAL(PyObject*) 427 element_get_text(ElementObject* self) 428 { 429 /* return borrowed reference to text attribute */ 430 431 PyObject* res = self->text; 432 433 if (JOIN_GET(res)) { 434 res = JOIN_OBJ(res); 435 if (PyList_CheckExact(res)) { 436 res = list_join(res); 437 if (!res) 438 return NULL; 439 self->text = res; 440 } 441 } 442 443 return res; 444 } 445 446 LOCAL(PyObject*) 447 element_get_tail(ElementObject* self) 448 { 449 /* return borrowed reference to text attribute */ 450 451 PyObject* res = self->tail; 452 453 if (JOIN_GET(res)) { 454 res = JOIN_OBJ(res); 455 if (PyList_CheckExact(res)) { 456 res = list_join(res); 457 if (!res) 458 return NULL; 459 self->tail = res; 460 } 461 } 462 463 return res; 464 } 465 466 static PyObject* 467 element(PyObject* self, PyObject* args, PyObject* kw) 468 { 469 PyObject* elem; 470 471 PyObject* tag; 472 PyObject* attrib = NULL; 473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, 474 &PyDict_Type, &attrib)) 475 return NULL; 476 477 if (attrib || kw) { 478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); 479 if (!attrib) 480 return NULL; 481 if (kw) 482 PyDict_Update(attrib, kw); 483 } else { 484 Py_INCREF(Py_None); 485 attrib = Py_None; 486 } 487 488 elem = element_new(tag, attrib); 489 490 Py_DECREF(attrib); 491 492 return elem; 493 } 494 495 static PyObject* 496 subelement(PyObject* self, PyObject* args, PyObject* kw) 497 { 498 PyObject* elem; 499 500 ElementObject* parent; 501 PyObject* tag; 502 PyObject* attrib = NULL; 503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", 504 &Element_Type, &parent, &tag, 505 &PyDict_Type, &attrib)) 506 return NULL; 507 508 if (attrib || kw) { 509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); 510 if (!attrib) 511 return NULL; 512 if (kw) 513 PyDict_Update(attrib, kw); 514 } else { 515 Py_INCREF(Py_None); 516 attrib = Py_None; 517 } 518 519 elem = element_new(tag, attrib); 520 521 Py_DECREF(attrib); 522 523 if (element_add_subelement(parent, elem) < 0) { 524 Py_DECREF(elem); 525 return NULL; 526 } 527 528 return elem; 529 } 530 531 static void 532 element_dealloc(ElementObject* self) 533 { 534 if (self->extra) 535 element_dealloc_extra(self); 536 537 /* discard attributes */ 538 Py_DECREF(self->tag); 539 Py_DECREF(JOIN_OBJ(self->text)); 540 Py_DECREF(JOIN_OBJ(self->tail)); 541 542 RELEASE(sizeof(ElementObject), "destroy element"); 543 544 PyObject_Del(self); 545 } 546 547 /* -------------------------------------------------------------------- */ 548 /* methods (in alphabetical order) */ 549 550 static PyObject* 551 element_append(ElementObject* self, PyObject* args) 552 { 553 PyObject* element; 554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) 555 return NULL; 556 557 if (element_add_subelement(self, element) < 0) 558 return NULL; 559 560 Py_RETURN_NONE; 561 } 562 563 static PyObject* 564 element_clear(ElementObject* self, PyObject* args) 565 { 566 if (!PyArg_ParseTuple(args, ":clear")) 567 return NULL; 568 569 if (self->extra) { 570 element_dealloc_extra(self); 571 self->extra = NULL; 572 } 573 574 Py_INCREF(Py_None); 575 Py_DECREF(JOIN_OBJ(self->text)); 576 self->text = Py_None; 577 578 Py_INCREF(Py_None); 579 Py_DECREF(JOIN_OBJ(self->tail)); 580 self->tail = Py_None; 581 582 Py_RETURN_NONE; 583 } 584 585 static PyObject* 586 element_copy(ElementObject* self, PyObject* args) 587 { 588 int i; 589 ElementObject* element; 590 591 if (!PyArg_ParseTuple(args, ":__copy__")) 592 return NULL; 593 594 element = (ElementObject*) element_new( 595 self->tag, (self->extra) ? self->extra->attrib : Py_None 596 ); 597 if (!element) 598 return NULL; 599 600 Py_DECREF(JOIN_OBJ(element->text)); 601 element->text = self->text; 602 Py_INCREF(JOIN_OBJ(element->text)); 603 604 Py_DECREF(JOIN_OBJ(element->tail)); 605 element->tail = self->tail; 606 Py_INCREF(JOIN_OBJ(element->tail)); 607 608 if (self->extra) { 609 610 if (element_resize(element, self->extra->length) < 0) { 611 Py_DECREF(element); 612 return NULL; 613 } 614 615 for (i = 0; i < self->extra->length; i++) { 616 Py_INCREF(self->extra->children[i]); 617 element->extra->children[i] = self->extra->children[i]; 618 } 619 620 element->extra->length = self->extra->length; 621 622 } 623 624 return (PyObject*) element; 625 } 626 627 static PyObject* 628 element_deepcopy(ElementObject* self, PyObject* args) 629 { 630 int i; 631 ElementObject* element; 632 PyObject* tag; 633 PyObject* attrib; 634 PyObject* text; 635 PyObject* tail; 636 PyObject* id; 637 638 PyObject* memo; 639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) 640 return NULL; 641 642 tag = deepcopy(self->tag, memo); 643 if (!tag) 644 return NULL; 645 646 if (self->extra) { 647 attrib = deepcopy(self->extra->attrib, memo); 648 if (!attrib) { 649 Py_DECREF(tag); 650 return NULL; 651 } 652 } else { 653 Py_INCREF(Py_None); 654 attrib = Py_None; 655 } 656 657 element = (ElementObject*) element_new(tag, attrib); 658 659 Py_DECREF(tag); 660 Py_DECREF(attrib); 661 662 if (!element) 663 return NULL; 664 665 text = deepcopy(JOIN_OBJ(self->text), memo); 666 if (!text) 667 goto error; 668 Py_DECREF(element->text); 669 element->text = JOIN_SET(text, JOIN_GET(self->text)); 670 671 tail = deepcopy(JOIN_OBJ(self->tail), memo); 672 if (!tail) 673 goto error; 674 Py_DECREF(element->tail); 675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); 676 677 if (self->extra) { 678 679 if (element_resize(element, self->extra->length) < 0) 680 goto error; 681 682 for (i = 0; i < self->extra->length; i++) { 683 PyObject* child = deepcopy(self->extra->children[i], memo); 684 if (!child) { 685 element->extra->length = i; 686 goto error; 687 } 688 element->extra->children[i] = child; 689 } 690 691 element->extra->length = self->extra->length; 692 693 } 694 695 /* add object to memo dictionary (so deepcopy won't visit it again) */ 696 id = PyInt_FromLong((Py_uintptr_t) self); 697 if (!id) 698 goto error; 699 700 i = PyDict_SetItem(memo, id, (PyObject*) element); 701 702 Py_DECREF(id); 703 704 if (i < 0) 705 goto error; 706 707 return (PyObject*) element; 708 709 error: 710 Py_DECREF(element); 711 return NULL; 712 } 713 714 LOCAL(int) 715 checkpath(PyObject* tag) 716 { 717 Py_ssize_t i; 718 int check = 1; 719 720 /* check if a tag contains an xpath character */ 721 722 #define PATHCHAR(ch) \ 723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') 724 725 #if defined(Py_USING_UNICODE) 726 if (PyUnicode_Check(tag)) { 727 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); 728 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { 729 if (p[i] == '{') 730 check = 0; 731 else if (p[i] == '}') 732 check = 1; 733 else if (check && PATHCHAR(p[i])) 734 return 1; 735 } 736 return 0; 737 } 738 #endif 739 if (PyString_Check(tag)) { 740 char *p = PyString_AS_STRING(tag); 741 for (i = 0; i < PyString_GET_SIZE(tag); i++) { 742 if (p[i] == '{') 743 check = 0; 744 else if (p[i] == '}') 745 check = 1; 746 else if (check && PATHCHAR(p[i])) 747 return 1; 748 } 749 return 0; 750 } 751 752 return 1; /* unknown type; might be path expression */ 753 } 754 755 static PyObject* 756 element_extend(ElementObject* self, PyObject* args) 757 { 758 PyObject* seq; 759 Py_ssize_t i, seqlen = 0; 760 761 PyObject* seq_in; 762 if (!PyArg_ParseTuple(args, "O:extend", &seq_in)) 763 return NULL; 764 765 seq = PySequence_Fast(seq_in, ""); 766 if (!seq) { 767 PyErr_Format( 768 PyExc_TypeError, 769 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name 770 ); 771 return NULL; 772 } 773 774 seqlen = PySequence_Size(seq); 775 for (i = 0; i < seqlen; i++) { 776 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 777 if (element_add_subelement(self, element) < 0) { 778 Py_DECREF(seq); 779 return NULL; 780 } 781 } 782 783 Py_DECREF(seq); 784 785 Py_RETURN_NONE; 786 } 787 788 static PyObject* 789 element_find(ElementObject* self, PyObject* args) 790 { 791 int i; 792 793 PyObject* tag; 794 PyObject* namespaces = Py_None; 795 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) 796 return NULL; 797 798 if (checkpath(tag) || namespaces != Py_None) 799 return PyObject_CallMethod( 800 elementpath_obj, "find", "OOO", self, tag, namespaces 801 ); 802 803 if (!self->extra) 804 Py_RETURN_NONE; 805 806 for (i = 0; i < self->extra->length; i++) { 807 PyObject* item = self->extra->children[i]; 808 if (Element_CheckExact(item) && 809 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { 810 Py_INCREF(item); 811 return item; 812 } 813 } 814 815 Py_RETURN_NONE; 816 } 817 818 static PyObject* 819 element_findtext(ElementObject* self, PyObject* args) 820 { 821 int i; 822 823 PyObject* tag; 824 PyObject* default_value = Py_None; 825 PyObject* namespaces = Py_None; 826 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) 827 return NULL; 828 829 if (checkpath(tag) || namespaces != Py_None) 830 return PyObject_CallMethod( 831 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces 832 ); 833 834 if (!self->extra) { 835 Py_INCREF(default_value); 836 return default_value; 837 } 838 839 for (i = 0; i < self->extra->length; i++) { 840 ElementObject* item = (ElementObject*) self->extra->children[i]; 841 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) { 842 PyObject* text = element_get_text(item); 843 if (text == Py_None) 844 return PyString_FromString(""); 845 Py_XINCREF(text); 846 return text; 847 } 848 } 849 850 Py_INCREF(default_value); 851 return default_value; 852 } 853 854 static PyObject* 855 element_findall(ElementObject* self, PyObject* args) 856 { 857 int i; 858 PyObject* out; 859 860 PyObject* tag; 861 PyObject* namespaces = Py_None; 862 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) 863 return NULL; 864 865 if (checkpath(tag) || namespaces != Py_None) 866 return PyObject_CallMethod( 867 elementpath_obj, "findall", "OOO", self, tag, namespaces 868 ); 869 870 out = PyList_New(0); 871 if (!out) 872 return NULL; 873 874 if (!self->extra) 875 return out; 876 877 for (i = 0; i < self->extra->length; i++) { 878 PyObject* item = self->extra->children[i]; 879 if (Element_CheckExact(item) && 880 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { 881 if (PyList_Append(out, item) < 0) { 882 Py_DECREF(out); 883 return NULL; 884 } 885 } 886 } 887 888 return out; 889 } 890 891 static PyObject* 892 element_iterfind(ElementObject* self, PyObject* args) 893 { 894 PyObject* tag; 895 PyObject* namespaces = Py_None; 896 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) 897 return NULL; 898 899 return PyObject_CallMethod( 900 elementpath_obj, "iterfind", "OOO", self, tag, namespaces 901 ); 902 } 903 904 static PyObject* 905 element_get(ElementObject* self, PyObject* args) 906 { 907 PyObject* value; 908 909 PyObject* key; 910 PyObject* default_value = Py_None; 911 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) 912 return NULL; 913 914 if (!self->extra || self->extra->attrib == Py_None) 915 value = default_value; 916 else { 917 value = PyDict_GetItem(self->extra->attrib, key); 918 if (!value) 919 value = default_value; 920 } 921 922 Py_INCREF(value); 923 return value; 924 } 925 926 static PyObject* 927 element_getchildren(ElementObject* self, PyObject* args) 928 { 929 int i; 930 PyObject* list; 931 932 /* FIXME: report as deprecated? */ 933 934 if (!PyArg_ParseTuple(args, ":getchildren")) 935 return NULL; 936 937 if (!self->extra) 938 return PyList_New(0); 939 940 list = PyList_New(self->extra->length); 941 if (!list) 942 return NULL; 943 944 for (i = 0; i < self->extra->length; i++) { 945 PyObject* item = self->extra->children[i]; 946 Py_INCREF(item); 947 PyList_SET_ITEM(list, i, item); 948 } 949 950 return list; 951 } 952 953 static PyObject* 954 element_iter(ElementObject* self, PyObject* args) 955 { 956 PyObject* result; 957 958 PyObject* tag = Py_None; 959 if (!PyArg_ParseTuple(args, "|O:iter", &tag)) 960 return NULL; 961 962 if (!elementtree_iter_obj) { 963 PyErr_SetString( 964 PyExc_RuntimeError, 965 "iter helper not found" 966 ); 967 return NULL; 968 } 969 970 args = PyTuple_New(2); 971 if (!args) 972 return NULL; 973 974 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); 975 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); 976 977 result = PyObject_CallObject(elementtree_iter_obj, args); 978 979 Py_DECREF(args); 980 981 return result; 982 } 983 984 985 static PyObject* 986 element_itertext(ElementObject* self, PyObject* args) 987 { 988 PyObject* result; 989 990 if (!PyArg_ParseTuple(args, ":itertext")) 991 return NULL; 992 993 if (!elementtree_itertext_obj) { 994 PyErr_SetString( 995 PyExc_RuntimeError, 996 "itertext helper not found" 997 ); 998 return NULL; 999 } 1000 1001 args = PyTuple_New(1); 1002 if (!args) 1003 return NULL; 1004 1005 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); 1006 1007 result = PyObject_CallObject(elementtree_itertext_obj, args); 1008 1009 Py_DECREF(args); 1010 1011 return result; 1012 } 1013 1014 static PyObject* 1015 element_getitem(PyObject* self_, Py_ssize_t index) 1016 { 1017 ElementObject* self = (ElementObject*) self_; 1018 1019 if (!self->extra || index < 0 || index >= self->extra->length) { 1020 PyErr_SetString( 1021 PyExc_IndexError, 1022 "child index out of range" 1023 ); 1024 return NULL; 1025 } 1026 1027 Py_INCREF(self->extra->children[index]); 1028 return self->extra->children[index]; 1029 } 1030 1031 static PyObject* 1032 element_insert(ElementObject* self, PyObject* args) 1033 { 1034 int i; 1035 1036 int index; 1037 PyObject* element; 1038 if (!PyArg_ParseTuple(args, "iO!:insert", &index, 1039 &Element_Type, &element)) 1040 return NULL; 1041 1042 if (!self->extra) 1043 element_new_extra(self, NULL); 1044 1045 if (index < 0) { 1046 index += self->extra->length; 1047 if (index < 0) 1048 index = 0; 1049 } 1050 if (index > self->extra->length) 1051 index = self->extra->length; 1052 1053 if (element_resize(self, 1) < 0) 1054 return NULL; 1055 1056 for (i = self->extra->length; i > index; i--) 1057 self->extra->children[i] = self->extra->children[i-1]; 1058 1059 Py_INCREF(element); 1060 self->extra->children[index] = element; 1061 1062 self->extra->length++; 1063 1064 Py_RETURN_NONE; 1065 } 1066 1067 static PyObject* 1068 element_items(ElementObject* self, PyObject* args) 1069 { 1070 if (!PyArg_ParseTuple(args, ":items")) 1071 return NULL; 1072 1073 if (!self->extra || self->extra->attrib == Py_None) 1074 return PyList_New(0); 1075 1076 return PyDict_Items(self->extra->attrib); 1077 } 1078 1079 static PyObject* 1080 element_keys(ElementObject* self, PyObject* args) 1081 { 1082 if (!PyArg_ParseTuple(args, ":keys")) 1083 return NULL; 1084 1085 if (!self->extra || self->extra->attrib == Py_None) 1086 return PyList_New(0); 1087 1088 return PyDict_Keys(self->extra->attrib); 1089 } 1090 1091 static Py_ssize_t 1092 element_length(ElementObject* self) 1093 { 1094 if (!self->extra) 1095 return 0; 1096 1097 return self->extra->length; 1098 } 1099 1100 static PyObject* 1101 element_makeelement(PyObject* self, PyObject* args, PyObject* kw) 1102 { 1103 PyObject* elem; 1104 1105 PyObject* tag; 1106 PyObject* attrib; 1107 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) 1108 return NULL; 1109 1110 attrib = PyDict_Copy(attrib); 1111 if (!attrib) 1112 return NULL; 1113 1114 elem = element_new(tag, attrib); 1115 1116 Py_DECREF(attrib); 1117 1118 return elem; 1119 } 1120 1121 static PyObject* 1122 element_reduce(ElementObject* self, PyObject* args) 1123 { 1124 if (!PyArg_ParseTuple(args, ":__reduce__")) 1125 return NULL; 1126 1127 /* Hack alert: This method is used to work around a __copy__ 1128 problem on certain 2.3 and 2.4 versions. To save time and 1129 simplify the code, we create the copy in here, and use a dummy 1130 copyelement helper to trick the copy module into doing the 1131 right thing. */ 1132 1133 if (!elementtree_copyelement_obj) { 1134 PyErr_SetString( 1135 PyExc_RuntimeError, 1136 "copyelement helper not found" 1137 ); 1138 return NULL; 1139 } 1140 1141 return Py_BuildValue( 1142 "O(N)", elementtree_copyelement_obj, element_copy(self, args) 1143 ); 1144 } 1145 1146 static PyObject* 1147 element_remove(ElementObject* self, PyObject* args) 1148 { 1149 int i; 1150 1151 PyObject* element; 1152 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) 1153 return NULL; 1154 1155 if (!self->extra) { 1156 /* element has no children, so raise exception */ 1157 PyErr_SetString( 1158 PyExc_ValueError, 1159 "list.remove(x): x not in list" 1160 ); 1161 return NULL; 1162 } 1163 1164 for (i = 0; i < self->extra->length; i++) { 1165 if (self->extra->children[i] == element) 1166 break; 1167 if (PyObject_Compare(self->extra->children[i], element) == 0) 1168 break; 1169 } 1170 1171 if (i == self->extra->length) { 1172 /* element is not in children, so raise exception */ 1173 PyErr_SetString( 1174 PyExc_ValueError, 1175 "list.remove(x): x not in list" 1176 ); 1177 return NULL; 1178 } 1179 1180 Py_DECREF(self->extra->children[i]); 1181 1182 self->extra->length--; 1183 1184 for (; i < self->extra->length; i++) 1185 self->extra->children[i] = self->extra->children[i+1]; 1186 1187 Py_RETURN_NONE; 1188 } 1189 1190 static PyObject* 1191 element_repr(ElementObject* self) 1192 { 1193 PyObject *repr, *tag; 1194 1195 tag = PyObject_Repr(self->tag); 1196 if (!tag) 1197 return NULL; 1198 1199 repr = PyString_FromFormat("<Element %s at %p>", 1200 PyString_AS_STRING(tag), self); 1201 1202 Py_DECREF(tag); 1203 1204 return repr; 1205 } 1206 1207 static PyObject* 1208 element_set(ElementObject* self, PyObject* args) 1209 { 1210 PyObject* attrib; 1211 1212 PyObject* key; 1213 PyObject* value; 1214 if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) 1215 return NULL; 1216 1217 if (!self->extra) 1218 element_new_extra(self, NULL); 1219 1220 attrib = element_get_attrib(self); 1221 if (!attrib) 1222 return NULL; 1223 1224 if (PyDict_SetItem(attrib, key, value) < 0) 1225 return NULL; 1226 1227 Py_RETURN_NONE; 1228 } 1229 1230 static int 1231 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) 1232 { 1233 ElementObject* self = (ElementObject*) self_; 1234 int i; 1235 PyObject* old; 1236 1237 if (!self->extra || index < 0 || index >= self->extra->length) { 1238 PyErr_SetString( 1239 PyExc_IndexError, 1240 "child assignment index out of range"); 1241 return -1; 1242 } 1243 1244 old = self->extra->children[index]; 1245 1246 if (item) { 1247 Py_INCREF(item); 1248 self->extra->children[index] = item; 1249 } else { 1250 self->extra->length--; 1251 for (i = index; i < self->extra->length; i++) 1252 self->extra->children[i] = self->extra->children[i+1]; 1253 } 1254 1255 Py_DECREF(old); 1256 1257 return 0; 1258 } 1259 1260 static PyObject* 1261 element_subscr(PyObject* self_, PyObject* item) 1262 { 1263 ElementObject* self = (ElementObject*) self_; 1264 1265 #if (PY_VERSION_HEX < 0x02050000) 1266 if (PyInt_Check(item) || PyLong_Check(item)) { 1267 long i = PyInt_AsLong(item); 1268 #else 1269 if (PyIndex_Check(item)) { 1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1271 #endif 1272 1273 if (i == -1 && PyErr_Occurred()) { 1274 return NULL; 1275 } 1276 if (i < 0 && self->extra) 1277 i += self->extra->length; 1278 return element_getitem(self_, i); 1279 } 1280 else if (PySlice_Check(item)) { 1281 Py_ssize_t start, stop, step, slicelen, cur, i; 1282 PyObject* list; 1283 1284 if (!self->extra) 1285 return PyList_New(0); 1286 1287 if (PySlice_GetIndicesEx((PySliceObject *)item, 1288 self->extra->length, 1289 &start, &stop, &step, &slicelen) < 0) { 1290 return NULL; 1291 } 1292 1293 if (slicelen <= 0) 1294 return PyList_New(0); 1295 else { 1296 list = PyList_New(slicelen); 1297 if (!list) 1298 return NULL; 1299 1300 for (cur = start, i = 0; i < slicelen; 1301 cur += step, i++) { 1302 PyObject* item = self->extra->children[cur]; 1303 Py_INCREF(item); 1304 PyList_SET_ITEM(list, i, item); 1305 } 1306 1307 return list; 1308 } 1309 } 1310 else { 1311 PyErr_SetString(PyExc_TypeError, 1312 "element indices must be integers"); 1313 return NULL; 1314 } 1315 } 1316 1317 static int 1318 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) 1319 { 1320 ElementObject* self = (ElementObject*) self_; 1321 1322 #if (PY_VERSION_HEX < 0x02050000) 1323 if (PyInt_Check(item) || PyLong_Check(item)) { 1324 long i = PyInt_AsLong(item); 1325 #else 1326 if (PyIndex_Check(item)) { 1327 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1328 #endif 1329 1330 if (i == -1 && PyErr_Occurred()) { 1331 return -1; 1332 } 1333 if (i < 0 && self->extra) 1334 i += self->extra->length; 1335 return element_setitem(self_, i, value); 1336 } 1337 else if (PySlice_Check(item)) { 1338 Py_ssize_t start, stop, step, slicelen, newlen, cur, i; 1339 1340 PyObject* recycle = NULL; 1341 PyObject* seq = NULL; 1342 1343 if (!self->extra) 1344 element_new_extra(self, NULL); 1345 1346 if (PySlice_GetIndicesEx((PySliceObject *)item, 1347 self->extra->length, 1348 &start, &stop, &step, &slicelen) < 0) { 1349 return -1; 1350 } 1351 1352 if (value == NULL) 1353 newlen = 0; 1354 else { 1355 seq = PySequence_Fast(value, ""); 1356 if (!seq) { 1357 PyErr_Format( 1358 PyExc_TypeError, 1359 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name 1360 ); 1361 return -1; 1362 } 1363 newlen = PySequence_Size(seq); 1364 } 1365 1366 if (step != 1 && newlen != slicelen) 1367 { 1368 PyErr_Format(PyExc_ValueError, 1369 #if (PY_VERSION_HEX < 0x02050000) 1370 "attempt to assign sequence of size %d " 1371 "to extended slice of size %d", 1372 #else 1373 "attempt to assign sequence of size %zd " 1374 "to extended slice of size %zd", 1375 #endif 1376 newlen, slicelen 1377 ); 1378 return -1; 1379 } 1380 1381 1382 /* Resize before creating the recycle bin, to prevent refleaks. */ 1383 if (newlen > slicelen) { 1384 if (element_resize(self, newlen - slicelen) < 0) { 1385 if (seq) { 1386 Py_DECREF(seq); 1387 } 1388 return -1; 1389 } 1390 } 1391 1392 if (slicelen > 0) { 1393 /* to avoid recursive calls to this method (via decref), move 1394 old items to the recycle bin here, and get rid of them when 1395 we're done modifying the element */ 1396 recycle = PyList_New(slicelen); 1397 if (!recycle) { 1398 if (seq) { 1399 Py_DECREF(seq); 1400 } 1401 return -1; 1402 } 1403 for (cur = start, i = 0; i < slicelen; 1404 cur += step, i++) 1405 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); 1406 } 1407 1408 if (newlen < slicelen) { 1409 /* delete slice */ 1410 for (i = stop; i < self->extra->length; i++) 1411 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1412 } else if (newlen > slicelen) { 1413 /* insert slice */ 1414 for (i = self->extra->length-1; i >= stop; i--) 1415 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1416 } 1417 1418 /* replace the slice */ 1419 for (cur = start, i = 0; i < newlen; 1420 cur += step, i++) { 1421 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 1422 Py_INCREF(element); 1423 self->extra->children[cur] = element; 1424 } 1425 1426 self->extra->length += newlen - slicelen; 1427 1428 if (seq) { 1429 Py_DECREF(seq); 1430 } 1431 1432 /* discard the recycle bin, and everything in it */ 1433 Py_XDECREF(recycle); 1434 1435 return 0; 1436 } 1437 else { 1438 PyErr_SetString(PyExc_TypeError, 1439 "element indices must be integers"); 1440 return -1; 1441 } 1442 } 1443 1444 static PyMethodDef element_methods[] = { 1445 1446 {"clear", (PyCFunction) element_clear, METH_VARARGS}, 1447 1448 {"get", (PyCFunction) element_get, METH_VARARGS}, 1449 {"set", (PyCFunction) element_set, METH_VARARGS}, 1450 1451 {"find", (PyCFunction) element_find, METH_VARARGS}, 1452 {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, 1453 {"findall", (PyCFunction) element_findall, METH_VARARGS}, 1454 1455 {"append", (PyCFunction) element_append, METH_VARARGS}, 1456 {"extend", (PyCFunction) element_extend, METH_VARARGS}, 1457 {"insert", (PyCFunction) element_insert, METH_VARARGS}, 1458 {"remove", (PyCFunction) element_remove, METH_VARARGS}, 1459 1460 {"iter", (PyCFunction) element_iter, METH_VARARGS}, 1461 {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, 1462 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS}, 1463 1464 {"getiterator", (PyCFunction) element_iter, METH_VARARGS}, 1465 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, 1466 1467 {"items", (PyCFunction) element_items, METH_VARARGS}, 1468 {"keys", (PyCFunction) element_keys, METH_VARARGS}, 1469 1470 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, 1471 1472 {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, 1473 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, 1474 1475 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on 1476 C objects correctly, so we have to fake it using a __reduce__- 1477 based hack (see the element_reduce implementation above for 1478 details). */ 1479 1480 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're 1481 using a runtime test to figure out if we need to fake things 1482 or now (see the init code below). The following entry is 1483 enabled only if the hack is needed. */ 1484 1485 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, 1486 1487 {NULL, NULL} 1488 }; 1489 1490 static PyObject* 1491 element_getattr(ElementObject* self, char* name) 1492 { 1493 PyObject* res; 1494 1495 /* handle common attributes first */ 1496 if (strcmp(name, "tag") == 0) { 1497 res = self->tag; 1498 Py_INCREF(res); 1499 return res; 1500 } else if (strcmp(name, "text") == 0) { 1501 res = element_get_text(self); 1502 Py_INCREF(res); 1503 return res; 1504 } 1505 1506 /* methods */ 1507 res = Py_FindMethod(element_methods, (PyObject*) self, name); 1508 if (res) 1509 return res; 1510 1511 PyErr_Clear(); 1512 1513 /* less common attributes */ 1514 if (strcmp(name, "tail") == 0) { 1515 res = element_get_tail(self); 1516 } else if (strcmp(name, "attrib") == 0) { 1517 if (!self->extra) 1518 element_new_extra(self, NULL); 1519 res = element_get_attrib(self); 1520 } else { 1521 PyErr_SetString(PyExc_AttributeError, name); 1522 return NULL; 1523 } 1524 1525 if (!res) 1526 return NULL; 1527 1528 Py_INCREF(res); 1529 return res; 1530 } 1531 1532 static int 1533 element_setattr(ElementObject* self, const char* name, PyObject* value) 1534 { 1535 if (value == NULL) { 1536 PyErr_SetString( 1537 PyExc_AttributeError, 1538 "can't delete element attributes" 1539 ); 1540 return -1; 1541 } 1542 1543 if (strcmp(name, "tag") == 0) { 1544 Py_DECREF(self->tag); 1545 self->tag = value; 1546 Py_INCREF(self->tag); 1547 } else if (strcmp(name, "text") == 0) { 1548 Py_DECREF(JOIN_OBJ(self->text)); 1549 self->text = value; 1550 Py_INCREF(self->text); 1551 } else if (strcmp(name, "tail") == 0) { 1552 Py_DECREF(JOIN_OBJ(self->tail)); 1553 self->tail = value; 1554 Py_INCREF(self->tail); 1555 } else if (strcmp(name, "attrib") == 0) { 1556 if (!self->extra) 1557 element_new_extra(self, NULL); 1558 Py_DECREF(self->extra->attrib); 1559 self->extra->attrib = value; 1560 Py_INCREF(self->extra->attrib); 1561 } else { 1562 PyErr_SetString(PyExc_AttributeError, name); 1563 return -1; 1564 } 1565 1566 return 0; 1567 } 1568 1569 static PySequenceMethods element_as_sequence = { 1570 (lenfunc) element_length, 1571 0, /* sq_concat */ 1572 0, /* sq_repeat */ 1573 element_getitem, 1574 0, 1575 element_setitem, 1576 0, 1577 }; 1578 1579 static PyMappingMethods element_as_mapping = { 1580 (lenfunc) element_length, 1581 (binaryfunc) element_subscr, 1582 (objobjargproc) element_ass_subscr, 1583 }; 1584 1585 statichere PyTypeObject Element_Type = { 1586 PyObject_HEAD_INIT(NULL) 1587 0, "Element", sizeof(ElementObject), 0, 1588 /* methods */ 1589 (destructor)element_dealloc, /* tp_dealloc */ 1590 0, /* tp_print */ 1591 (getattrfunc)element_getattr, /* tp_getattr */ 1592 (setattrfunc)element_setattr, /* tp_setattr */ 1593 0, /* tp_compare */ 1594 (reprfunc)element_repr, /* tp_repr */ 1595 0, /* tp_as_number */ 1596 &element_as_sequence, /* tp_as_sequence */ 1597 &element_as_mapping, /* tp_as_mapping */ 1598 }; 1599 1600 /* ==================================================================== */ 1601 /* the tree builder type */ 1602 1603 typedef struct { 1604 PyObject_HEAD 1605 1606 PyObject* root; /* root node (first created node) */ 1607 1608 ElementObject* this; /* current node */ 1609 ElementObject* last; /* most recently created node */ 1610 1611 PyObject* data; /* data collector (string or list), or NULL */ 1612 1613 PyObject* stack; /* element stack */ 1614 Py_ssize_t index; /* current stack size (0=empty) */ 1615 1616 /* element tracing */ 1617 PyObject* events; /* list of events, or NULL if not collecting */ 1618 PyObject* start_event_obj; /* event objects (NULL to ignore) */ 1619 PyObject* end_event_obj; 1620 PyObject* start_ns_event_obj; 1621 PyObject* end_ns_event_obj; 1622 1623 } TreeBuilderObject; 1624 1625 staticforward PyTypeObject TreeBuilder_Type; 1626 1627 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) 1628 1629 /* -------------------------------------------------------------------- */ 1630 /* constructor and destructor */ 1631 1632 LOCAL(PyObject*) 1633 treebuilder_new(void) 1634 { 1635 TreeBuilderObject* self; 1636 1637 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); 1638 if (self == NULL) 1639 return NULL; 1640 1641 self->root = NULL; 1642 1643 Py_INCREF(Py_None); 1644 self->this = (ElementObject*) Py_None; 1645 1646 Py_INCREF(Py_None); 1647 self->last = (ElementObject*) Py_None; 1648 1649 self->data = NULL; 1650 1651 self->stack = PyList_New(20); 1652 self->index = 0; 1653 1654 self->events = NULL; 1655 self->start_event_obj = self->end_event_obj = NULL; 1656 self->start_ns_event_obj = self->end_ns_event_obj = NULL; 1657 1658 ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); 1659 1660 return (PyObject*) self; 1661 } 1662 1663 static PyObject* 1664 treebuilder(PyObject* self_, PyObject* args) 1665 { 1666 if (!PyArg_ParseTuple(args, ":TreeBuilder")) 1667 return NULL; 1668 1669 return treebuilder_new(); 1670 } 1671 1672 static void 1673 treebuilder_dealloc(TreeBuilderObject* self) 1674 { 1675 Py_XDECREF(self->end_ns_event_obj); 1676 Py_XDECREF(self->start_ns_event_obj); 1677 Py_XDECREF(self->end_event_obj); 1678 Py_XDECREF(self->start_event_obj); 1679 Py_XDECREF(self->events); 1680 Py_DECREF(self->stack); 1681 Py_XDECREF(self->data); 1682 Py_DECREF(self->last); 1683 Py_DECREF(self->this); 1684 Py_XDECREF(self->root); 1685 1686 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); 1687 1688 PyObject_Del(self); 1689 } 1690 1691 /* -------------------------------------------------------------------- */ 1692 /* handlers */ 1693 1694 LOCAL(PyObject*) 1695 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, 1696 PyObject* standalone) 1697 { 1698 Py_RETURN_NONE; 1699 } 1700 1701 LOCAL(PyObject*) 1702 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, 1703 PyObject* attrib) 1704 { 1705 PyObject* node; 1706 PyObject* this; 1707 1708 if (self->data) { 1709 if (self->this == self->last) { 1710 Py_DECREF(JOIN_OBJ(self->last->text)); 1711 self->last->text = JOIN_SET( 1712 self->data, PyList_CheckExact(self->data) 1713 ); 1714 } else { 1715 Py_DECREF(JOIN_OBJ(self->last->tail)); 1716 self->last->tail = JOIN_SET( 1717 self->data, PyList_CheckExact(self->data) 1718 ); 1719 } 1720 self->data = NULL; 1721 } 1722 1723 node = element_new(tag, attrib); 1724 if (!node) 1725 return NULL; 1726 1727 this = (PyObject*) self->this; 1728 1729 if (this != Py_None) { 1730 if (element_add_subelement((ElementObject*) this, node) < 0) 1731 goto error; 1732 } else { 1733 if (self->root) { 1734 PyErr_SetString( 1735 elementtree_parseerror_obj, 1736 "multiple elements on top level" 1737 ); 1738 goto error; 1739 } 1740 Py_INCREF(node); 1741 self->root = node; 1742 } 1743 1744 if (self->index < PyList_GET_SIZE(self->stack)) { 1745 if (PyList_SetItem(self->stack, self->index, this) < 0) 1746 goto error; 1747 Py_INCREF(this); 1748 } else { 1749 if (PyList_Append(self->stack, this) < 0) 1750 goto error; 1751 } 1752 self->index++; 1753 1754 Py_DECREF(this); 1755 Py_INCREF(node); 1756 self->this = (ElementObject*) node; 1757 1758 Py_DECREF(self->last); 1759 Py_INCREF(node); 1760 self->last = (ElementObject*) node; 1761 1762 if (self->start_event_obj) { 1763 PyObject* res; 1764 PyObject* action = self->start_event_obj; 1765 res = PyTuple_New(2); 1766 if (res) { 1767 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); 1768 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); 1769 PyList_Append(self->events, res); 1770 Py_DECREF(res); 1771 } else 1772 PyErr_Clear(); /* FIXME: propagate error */ 1773 } 1774 1775 return node; 1776 1777 error: 1778 Py_DECREF(node); 1779 return NULL; 1780 } 1781 1782 LOCAL(PyObject*) 1783 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) 1784 { 1785 if (!self->data) { 1786 if (self->last == (ElementObject*) Py_None) { 1787 /* ignore calls to data before the first call to start */ 1788 Py_RETURN_NONE; 1789 } 1790 /* store the first item as is */ 1791 Py_INCREF(data); self->data = data; 1792 } else { 1793 /* more than one item; use a list to collect items */ 1794 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && 1795 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) { 1796 /* expat often generates single character data sections; handle 1797 the most common case by resizing the existing string... */ 1798 Py_ssize_t size = PyString_GET_SIZE(self->data); 1799 if (_PyString_Resize(&self->data, size + 1) < 0) 1800 return NULL; 1801 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0]; 1802 } else if (PyList_CheckExact(self->data)) { 1803 if (PyList_Append(self->data, data) < 0) 1804 return NULL; 1805 } else { 1806 PyObject* list = PyList_New(2); 1807 if (!list) 1808 return NULL; 1809 PyList_SET_ITEM(list, 0, self->data); 1810 Py_INCREF(data); PyList_SET_ITEM(list, 1, data); 1811 self->data = list; 1812 } 1813 } 1814 1815 Py_RETURN_NONE; 1816 } 1817 1818 LOCAL(PyObject*) 1819 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) 1820 { 1821 PyObject* item; 1822 1823 if (self->data) { 1824 if (self->this == self->last) { 1825 Py_DECREF(JOIN_OBJ(self->last->text)); 1826 self->last->text = JOIN_SET( 1827 self->data, PyList_CheckExact(self->data) 1828 ); 1829 } else { 1830 Py_DECREF(JOIN_OBJ(self->last->tail)); 1831 self->last->tail = JOIN_SET( 1832 self->data, PyList_CheckExact(self->data) 1833 ); 1834 } 1835 self->data = NULL; 1836 } 1837 1838 if (self->index == 0) { 1839 PyErr_SetString( 1840 PyExc_IndexError, 1841 "pop from empty stack" 1842 ); 1843 return NULL; 1844 } 1845 1846 self->index--; 1847 1848 item = PyList_GET_ITEM(self->stack, self->index); 1849 Py_INCREF(item); 1850 1851 Py_DECREF(self->last); 1852 1853 self->last = (ElementObject*) self->this; 1854 self->this = (ElementObject*) item; 1855 1856 if (self->end_event_obj) { 1857 PyObject* res; 1858 PyObject* action = self->end_event_obj; 1859 PyObject* node = (PyObject*) self->last; 1860 res = PyTuple_New(2); 1861 if (res) { 1862 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); 1863 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); 1864 PyList_Append(self->events, res); 1865 Py_DECREF(res); 1866 } else 1867 PyErr_Clear(); /* FIXME: propagate error */ 1868 } 1869 1870 Py_INCREF(self->last); 1871 return (PyObject*) self->last; 1872 } 1873 1874 LOCAL(void) 1875 treebuilder_handle_namespace(TreeBuilderObject* self, int start, 1876 PyObject *prefix, PyObject *uri) 1877 { 1878 PyObject* res; 1879 PyObject* action; 1880 PyObject* parcel; 1881 1882 if (!self->events) 1883 return; 1884 1885 if (start) { 1886 if (!self->start_ns_event_obj) 1887 return; 1888 action = self->start_ns_event_obj; 1889 parcel = Py_BuildValue("OO", prefix, uri); 1890 if (!parcel) 1891 return; 1892 Py_INCREF(action); 1893 } else { 1894 if (!self->end_ns_event_obj) 1895 return; 1896 action = self->end_ns_event_obj; 1897 Py_INCREF(action); 1898 parcel = Py_None; 1899 Py_INCREF(parcel); 1900 } 1901 1902 res = PyTuple_New(2); 1903 1904 if (res) { 1905 PyTuple_SET_ITEM(res, 0, action); 1906 PyTuple_SET_ITEM(res, 1, parcel); 1907 PyList_Append(self->events, res); 1908 Py_DECREF(res); 1909 } else 1910 PyErr_Clear(); /* FIXME: propagate error */ 1911 } 1912 1913 /* -------------------------------------------------------------------- */ 1914 /* methods (in alphabetical order) */ 1915 1916 static PyObject* 1917 treebuilder_data(TreeBuilderObject* self, PyObject* args) 1918 { 1919 PyObject* data; 1920 if (!PyArg_ParseTuple(args, "O:data", &data)) 1921 return NULL; 1922 1923 return treebuilder_handle_data(self, data); 1924 } 1925 1926 static PyObject* 1927 treebuilder_end(TreeBuilderObject* self, PyObject* args) 1928 { 1929 PyObject* tag; 1930 if (!PyArg_ParseTuple(args, "O:end", &tag)) 1931 return NULL; 1932 1933 return treebuilder_handle_end(self, tag); 1934 } 1935 1936 LOCAL(PyObject*) 1937 treebuilder_done(TreeBuilderObject* self) 1938 { 1939 PyObject* res; 1940 1941 /* FIXME: check stack size? */ 1942 1943 if (self->root) 1944 res = self->root; 1945 else 1946 res = Py_None; 1947 1948 Py_INCREF(res); 1949 return res; 1950 } 1951 1952 static PyObject* 1953 treebuilder_close(TreeBuilderObject* self, PyObject* args) 1954 { 1955 if (!PyArg_ParseTuple(args, ":close")) 1956 return NULL; 1957 1958 return treebuilder_done(self); 1959 } 1960 1961 static PyObject* 1962 treebuilder_start(TreeBuilderObject* self, PyObject* args) 1963 { 1964 PyObject* tag; 1965 PyObject* attrib = Py_None; 1966 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) 1967 return NULL; 1968 1969 return treebuilder_handle_start(self, tag, attrib); 1970 } 1971 1972 static PyObject* 1973 treebuilder_xml(TreeBuilderObject* self, PyObject* args) 1974 { 1975 PyObject* encoding; 1976 PyObject* standalone; 1977 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) 1978 return NULL; 1979 1980 return treebuilder_handle_xml(self, encoding, standalone); 1981 } 1982 1983 static PyMethodDef treebuilder_methods[] = { 1984 {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, 1985 {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, 1986 {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, 1987 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, 1988 {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, 1989 {NULL, NULL} 1990 }; 1991 1992 static PyObject* 1993 treebuilder_getattr(TreeBuilderObject* self, char* name) 1994 { 1995 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name); 1996 } 1997 1998 statichere PyTypeObject TreeBuilder_Type = { 1999 PyObject_HEAD_INIT(NULL) 2000 0, "TreeBuilder", sizeof(TreeBuilderObject), 0, 2001 /* methods */ 2002 (destructor)treebuilder_dealloc, /* tp_dealloc */ 2003 0, /* tp_print */ 2004 (getattrfunc)treebuilder_getattr, /* tp_getattr */ 2005 }; 2006 2007 /* ==================================================================== */ 2008 /* the expat interface */ 2009 2010 #if defined(USE_EXPAT) 2011 2012 #include "expat.h" 2013 2014 #if defined(USE_PYEXPAT_CAPI) 2015 #include "pyexpat.h" 2016 static struct PyExpat_CAPI* expat_capi; 2017 #define EXPAT(func) (expat_capi->func) 2018 #else 2019 #define EXPAT(func) (XML_##func) 2020 #endif 2021 2022 typedef struct { 2023 PyObject_HEAD 2024 2025 XML_Parser parser; 2026 2027 PyObject* target; 2028 PyObject* entity; 2029 2030 PyObject* names; 2031 2032 PyObject* handle_xml; 2033 2034 PyObject* handle_start; 2035 PyObject* handle_data; 2036 PyObject* handle_end; 2037 2038 PyObject* handle_comment; 2039 PyObject* handle_pi; 2040 2041 PyObject* handle_close; 2042 2043 } XMLParserObject; 2044 2045 staticforward PyTypeObject XMLParser_Type; 2046 2047 /* helpers */ 2048 2049 #if defined(Py_USING_UNICODE) 2050 LOCAL(int) 2051 checkstring(const char* string, int size) 2052 { 2053 int i; 2054 2055 /* check if an 8-bit string contains UTF-8 characters */ 2056 for (i = 0; i < size; i++) 2057 if (string[i] & 0x80) 2058 return 1; 2059 2060 return 0; 2061 } 2062 #endif 2063 2064 LOCAL(PyObject*) 2065 makestring(const char* string, int size) 2066 { 2067 /* convert a UTF-8 string to either a 7-bit ascii string or a 2068 Unicode string */ 2069 2070 #if defined(Py_USING_UNICODE) 2071 if (checkstring(string, size)) 2072 return PyUnicode_DecodeUTF8(string, size, "strict"); 2073 #endif 2074 2075 return PyString_FromStringAndSize(string, size); 2076 } 2077 2078 LOCAL(PyObject*) 2079 makeuniversal(XMLParserObject* self, const char* string) 2080 { 2081 /* convert a UTF-8 tag/attribute name from the expat parser 2082 to a universal name string */ 2083 2084 int size = strlen(string); 2085 PyObject* key; 2086 PyObject* value; 2087 2088 /* look the 'raw' name up in the names dictionary */ 2089 key = PyString_FromStringAndSize(string, size); 2090 if (!key) 2091 return NULL; 2092 2093 value = PyDict_GetItem(self->names, key); 2094 2095 if (value) { 2096 Py_INCREF(value); 2097 } else { 2098 /* new name. convert to universal name, and decode as 2099 necessary */ 2100 2101 PyObject* tag; 2102 char* p; 2103 int i; 2104 2105 /* look for namespace separator */ 2106 for (i = 0; i < size; i++) 2107 if (string[i] == '}') 2108 break; 2109 if (i != size) { 2110 /* convert to universal name */ 2111 tag = PyString_FromStringAndSize(NULL, size+1); 2112 p = PyString_AS_STRING(tag); 2113 p[0] = '{'; 2114 memcpy(p+1, string, size); 2115 size++; 2116 } else { 2117 /* plain name; use key as tag */ 2118 Py_INCREF(key); 2119 tag = key; 2120 } 2121 2122 /* decode universal name */ 2123 #if defined(Py_USING_UNICODE) 2124 /* inline makestring, to avoid duplicating the source string if 2125 it's not an utf-8 string */ 2126 p = PyString_AS_STRING(tag); 2127 if (checkstring(p, size)) { 2128 value = PyUnicode_DecodeUTF8(p, size, "strict"); 2129 Py_DECREF(tag); 2130 if (!value) { 2131 Py_DECREF(key); 2132 return NULL; 2133 } 2134 } else 2135 #endif 2136 value = tag; /* use tag as is */ 2137 2138 /* add to names dictionary */ 2139 if (PyDict_SetItem(self->names, key, value) < 0) { 2140 Py_DECREF(key); 2141 Py_DECREF(value); 2142 return NULL; 2143 } 2144 } 2145 2146 Py_DECREF(key); 2147 return value; 2148 } 2149 2150 static void 2151 expat_set_error(const char* message, int line, int column) 2152 { 2153 PyObject *error; 2154 PyObject *position; 2155 char buffer[256]; 2156 2157 sprintf(buffer, "%s: line %d, column %d", message, line, column); 2158 2159 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); 2160 if (!error) 2161 return; 2162 2163 /* add position attribute */ 2164 position = Py_BuildValue("(ii)", line, column); 2165 if (!position) { 2166 Py_DECREF(error); 2167 return; 2168 } 2169 if (PyObject_SetAttrString(error, "position", position) == -1) { 2170 Py_DECREF(error); 2171 Py_DECREF(position); 2172 return; 2173 } 2174 Py_DECREF(position); 2175 2176 PyErr_SetObject(elementtree_parseerror_obj, error); 2177 Py_DECREF(error); 2178 } 2179 2180 /* -------------------------------------------------------------------- */ 2181 /* handlers */ 2182 2183 static void 2184 expat_default_handler(XMLParserObject* self, const XML_Char* data_in, 2185 int data_len) 2186 { 2187 PyObject* key; 2188 PyObject* value; 2189 PyObject* res; 2190 2191 if (data_len < 2 || data_in[0] != '&') 2192 return; 2193 2194 key = makestring(data_in + 1, data_len - 2); 2195 if (!key) 2196 return; 2197 2198 value = PyDict_GetItem(self->entity, key); 2199 2200 if (value) { 2201 if (TreeBuilder_CheckExact(self->target)) 2202 res = treebuilder_handle_data( 2203 (TreeBuilderObject*) self->target, value 2204 ); 2205 else if (self->handle_data) 2206 res = PyObject_CallFunction(self->handle_data, "O", value); 2207 else 2208 res = NULL; 2209 Py_XDECREF(res); 2210 } else if (!PyErr_Occurred()) { 2211 /* Report the first error, not the last */ 2212 char message[128]; 2213 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key)); 2214 expat_set_error( 2215 message, 2216 EXPAT(GetErrorLineNumber)(self->parser), 2217 EXPAT(GetErrorColumnNumber)(self->parser) 2218 ); 2219 } 2220 2221 Py_DECREF(key); 2222 } 2223 2224 static void 2225 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, 2226 const XML_Char **attrib_in) 2227 { 2228 PyObject* res; 2229 PyObject* tag; 2230 PyObject* attrib; 2231 int ok; 2232 2233 /* tag name */ 2234 tag = makeuniversal(self, tag_in); 2235 if (!tag) 2236 return; /* parser will look for errors */ 2237 2238 /* attributes */ 2239 if (attrib_in[0]) { 2240 attrib = PyDict_New(); 2241 if (!attrib) 2242 return; 2243 while (attrib_in[0] && attrib_in[1]) { 2244 PyObject* key = makeuniversal(self, attrib_in[0]); 2245 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1])); 2246 if (!key || !value) { 2247 Py_XDECREF(value); 2248 Py_XDECREF(key); 2249 Py_DECREF(attrib); 2250 return; 2251 } 2252 ok = PyDict_SetItem(attrib, key, value); 2253 Py_DECREF(value); 2254 Py_DECREF(key); 2255 if (ok < 0) { 2256 Py_DECREF(attrib); 2257 return; 2258 } 2259 attrib_in += 2; 2260 } 2261 } else { 2262 Py_INCREF(Py_None); 2263 attrib = Py_None; 2264 } 2265 2266 if (TreeBuilder_CheckExact(self->target)) 2267 /* shortcut */ 2268 res = treebuilder_handle_start((TreeBuilderObject*) self->target, 2269 tag, attrib); 2270 else if (self->handle_start) { 2271 if (attrib == Py_None) { 2272 Py_DECREF(attrib); 2273 attrib = PyDict_New(); 2274 if (!attrib) 2275 return; 2276 } 2277 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); 2278 } else 2279 res = NULL; 2280 2281 Py_DECREF(tag); 2282 Py_DECREF(attrib); 2283 2284 Py_XDECREF(res); 2285 } 2286 2287 static void 2288 expat_data_handler(XMLParserObject* self, const XML_Char* data_in, 2289 int data_len) 2290 { 2291 PyObject* data; 2292 PyObject* res; 2293 2294 data = makestring(data_in, data_len); 2295 if (!data) 2296 return; /* parser will look for errors */ 2297 2298 if (TreeBuilder_CheckExact(self->target)) 2299 /* shortcut */ 2300 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); 2301 else if (self->handle_data) 2302 res = PyObject_CallFunction(self->handle_data, "O", data); 2303 else 2304 res = NULL; 2305 2306 Py_DECREF(data); 2307 2308 Py_XDECREF(res); 2309 } 2310 2311 static void 2312 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) 2313 { 2314 PyObject* tag; 2315 PyObject* res = NULL; 2316 2317 if (TreeBuilder_CheckExact(self->target)) 2318 /* shortcut */ 2319 /* the standard tree builder doesn't look at the end tag */ 2320 res = treebuilder_handle_end( 2321 (TreeBuilderObject*) self->target, Py_None 2322 ); 2323 else if (self->handle_end) { 2324 tag = makeuniversal(self, tag_in); 2325 if (tag) { 2326 res = PyObject_CallFunction(self->handle_end, "O", tag); 2327 Py_DECREF(tag); 2328 } 2329 } 2330 2331 Py_XDECREF(res); 2332 } 2333 2334 static void 2335 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, 2336 const XML_Char *uri) 2337 { 2338 PyObject* sprefix = NULL; 2339 PyObject* suri = NULL; 2340 2341 suri = makestring(uri, strlen(uri)); 2342 if (!suri) 2343 return; 2344 2345 if (prefix) 2346 sprefix = makestring(prefix, strlen(prefix)); 2347 else 2348 sprefix = PyString_FromStringAndSize("", 0); 2349 if (!sprefix) { 2350 Py_DECREF(suri); 2351 return; 2352 } 2353 2354 treebuilder_handle_namespace( 2355 (TreeBuilderObject*) self->target, 1, sprefix, suri 2356 ); 2357 2358 Py_DECREF(sprefix); 2359 Py_DECREF(suri); 2360 } 2361 2362 static void 2363 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) 2364 { 2365 treebuilder_handle_namespace( 2366 (TreeBuilderObject*) self->target, 0, NULL, NULL 2367 ); 2368 } 2369 2370 static void 2371 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) 2372 { 2373 PyObject* comment; 2374 PyObject* res; 2375 2376 if (self->handle_comment) { 2377 comment = makestring(comment_in, strlen(comment_in)); 2378 if (comment) { 2379 res = PyObject_CallFunction(self->handle_comment, "O", comment); 2380 Py_XDECREF(res); 2381 Py_DECREF(comment); 2382 } 2383 } 2384 } 2385 2386 static void 2387 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, 2388 const XML_Char* data_in) 2389 { 2390 PyObject* target; 2391 PyObject* data; 2392 PyObject* res; 2393 2394 if (self->handle_pi) { 2395 target = makestring(target_in, strlen(target_in)); 2396 data = makestring(data_in, strlen(data_in)); 2397 if (target && data) { 2398 res = PyObject_CallFunction(self->handle_pi, "OO", target, data); 2399 Py_XDECREF(res); 2400 Py_DECREF(data); 2401 Py_DECREF(target); 2402 } else { 2403 Py_XDECREF(data); 2404 Py_XDECREF(target); 2405 } 2406 } 2407 } 2408 2409 #if defined(Py_USING_UNICODE) 2410 static int 2411 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, 2412 XML_Encoding *info) 2413 { 2414 PyObject* u; 2415 Py_UNICODE* p; 2416 unsigned char s[256]; 2417 int i; 2418 2419 memset(info, 0, sizeof(XML_Encoding)); 2420 2421 for (i = 0; i < 256; i++) 2422 s[i] = i; 2423 2424 u = PyUnicode_Decode((char*) s, 256, name, "replace"); 2425 if (!u) 2426 return XML_STATUS_ERROR; 2427 2428 if (PyUnicode_GET_SIZE(u) != 256) { 2429 Py_DECREF(u); 2430 return XML_STATUS_ERROR; 2431 } 2432 2433 p = PyUnicode_AS_UNICODE(u); 2434 2435 for (i = 0; i < 256; i++) { 2436 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) 2437 info->map[i] = p[i]; 2438 else 2439 info->map[i] = -1; 2440 } 2441 2442 Py_DECREF(u); 2443 2444 return XML_STATUS_OK; 2445 } 2446 #endif 2447 2448 /* -------------------------------------------------------------------- */ 2449 /* constructor and destructor */ 2450 2451 static PyObject* 2452 xmlparser(PyObject* self_, PyObject* args, PyObject* kw) 2453 { 2454 XMLParserObject* self; 2455 /* FIXME: does this need to be static? */ 2456 static XML_Memory_Handling_Suite memory_handler; 2457 2458 PyObject* target = NULL; 2459 char* encoding = NULL; 2460 static char* kwlist[] = { "target", "encoding", NULL }; 2461 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, 2462 &target, &encoding)) 2463 return NULL; 2464 2465 #if defined(USE_PYEXPAT_CAPI) 2466 if (!expat_capi) { 2467 PyErr_SetString( 2468 PyExc_RuntimeError, "cannot load dispatch table from pyexpat" 2469 ); 2470 return NULL; 2471 } 2472 #endif 2473 2474 self = PyObject_New(XMLParserObject, &XMLParser_Type); 2475 if (self == NULL) 2476 return NULL; 2477 2478 self->entity = PyDict_New(); 2479 if (!self->entity) { 2480 PyObject_Del(self); 2481 return NULL; 2482 } 2483 2484 self->names = PyDict_New(); 2485 if (!self->names) { 2486 PyObject_Del(self->entity); 2487 PyObject_Del(self); 2488 return NULL; 2489 } 2490 2491 memory_handler.malloc_fcn = PyObject_Malloc; 2492 memory_handler.realloc_fcn = PyObject_Realloc; 2493 memory_handler.free_fcn = PyObject_Free; 2494 2495 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); 2496 if (!self->parser) { 2497 PyObject_Del(self->names); 2498 PyObject_Del(self->entity); 2499 PyObject_Del(self); 2500 PyErr_NoMemory(); 2501 return NULL; 2502 } 2503 2504 /* setup target handlers */ 2505 if (!target) { 2506 target = treebuilder_new(); 2507 if (!target) { 2508 EXPAT(ParserFree)(self->parser); 2509 PyObject_Del(self->names); 2510 PyObject_Del(self->entity); 2511 PyObject_Del(self); 2512 return NULL; 2513 } 2514 } else 2515 Py_INCREF(target); 2516 self->target = target; 2517 2518 self->handle_xml = PyObject_GetAttrString(target, "xml"); 2519 self->handle_start = PyObject_GetAttrString(target, "start"); 2520 self->handle_data = PyObject_GetAttrString(target, "data"); 2521 self->handle_end = PyObject_GetAttrString(target, "end"); 2522 self->handle_comment = PyObject_GetAttrString(target, "comment"); 2523 self->handle_pi = PyObject_GetAttrString(target, "pi"); 2524 self->handle_close = PyObject_GetAttrString(target, "close"); 2525 2526 PyErr_Clear(); 2527 2528 /* configure parser */ 2529 EXPAT(SetUserData)(self->parser, self); 2530 EXPAT(SetElementHandler)( 2531 self->parser, 2532 (XML_StartElementHandler) expat_start_handler, 2533 (XML_EndElementHandler) expat_end_handler 2534 ); 2535 EXPAT(SetDefaultHandlerExpand)( 2536 self->parser, 2537 (XML_DefaultHandler) expat_default_handler 2538 ); 2539 EXPAT(SetCharacterDataHandler)( 2540 self->parser, 2541 (XML_CharacterDataHandler) expat_data_handler 2542 ); 2543 if (self->handle_comment) 2544 EXPAT(SetCommentHandler)( 2545 self->parser, 2546 (XML_CommentHandler) expat_comment_handler 2547 ); 2548 if (self->handle_pi) 2549 EXPAT(SetProcessingInstructionHandler)( 2550 self->parser, 2551 (XML_ProcessingInstructionHandler) expat_pi_handler 2552 ); 2553 #if defined(Py_USING_UNICODE) 2554 EXPAT(SetUnknownEncodingHandler)( 2555 self->parser, 2556 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL 2557 ); 2558 #endif 2559 2560 ALLOC(sizeof(XMLParserObject), "create expatparser"); 2561 2562 return (PyObject*) self; 2563 } 2564 2565 static void 2566 xmlparser_dealloc(XMLParserObject* self) 2567 { 2568 EXPAT(ParserFree)(self->parser); 2569 2570 Py_XDECREF(self->handle_close); 2571 Py_XDECREF(self->handle_pi); 2572 Py_XDECREF(self->handle_comment); 2573 Py_XDECREF(self->handle_end); 2574 Py_XDECREF(self->handle_data); 2575 Py_XDECREF(self->handle_start); 2576 Py_XDECREF(self->handle_xml); 2577 2578 Py_DECREF(self->target); 2579 Py_DECREF(self->entity); 2580 Py_DECREF(self->names); 2581 2582 RELEASE(sizeof(XMLParserObject), "destroy expatparser"); 2583 2584 PyObject_Del(self); 2585 } 2586 2587 /* -------------------------------------------------------------------- */ 2588 /* methods (in alphabetical order) */ 2589 2590 LOCAL(PyObject*) 2591 expat_parse(XMLParserObject* self, char* data, int data_len, int final) 2592 { 2593 int ok; 2594 2595 ok = EXPAT(Parse)(self->parser, data, data_len, final); 2596 2597 if (PyErr_Occurred()) 2598 return NULL; 2599 2600 if (!ok) { 2601 expat_set_error( 2602 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), 2603 EXPAT(GetErrorLineNumber)(self->parser), 2604 EXPAT(GetErrorColumnNumber)(self->parser) 2605 ); 2606 return NULL; 2607 } 2608 2609 Py_RETURN_NONE; 2610 } 2611 2612 static PyObject* 2613 xmlparser_close(XMLParserObject* self, PyObject* args) 2614 { 2615 /* end feeding data to parser */ 2616 2617 PyObject* res; 2618 if (!PyArg_ParseTuple(args, ":close")) 2619 return NULL; 2620 2621 res = expat_parse(self, "", 0, 1); 2622 if (!res) 2623 return NULL; 2624 2625 if (TreeBuilder_CheckExact(self->target)) { 2626 Py_DECREF(res); 2627 return treebuilder_done((TreeBuilderObject*) self->target); 2628 } if (self->handle_close) { 2629 Py_DECREF(res); 2630 return PyObject_CallFunction(self->handle_close, ""); 2631 } else 2632 return res; 2633 } 2634 2635 static PyObject* 2636 xmlparser_feed(XMLParserObject* self, PyObject* args) 2637 { 2638 /* feed data to parser */ 2639 2640 char* data; 2641 int data_len; 2642 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) 2643 return NULL; 2644 2645 return expat_parse(self, data, data_len, 0); 2646 } 2647 2648 static PyObject* 2649 xmlparser_parse(XMLParserObject* self, PyObject* args) 2650 { 2651 /* (internal) parse until end of input stream */ 2652 2653 PyObject* reader; 2654 PyObject* buffer; 2655 PyObject* res; 2656 2657 PyObject* fileobj; 2658 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) 2659 return NULL; 2660 2661 reader = PyObject_GetAttrString(fileobj, "read"); 2662 if (!reader) 2663 return NULL; 2664 2665 /* read from open file object */ 2666 for (;;) { 2667 2668 buffer = PyObject_CallFunction(reader, "i", 64*1024); 2669 2670 if (!buffer) { 2671 /* read failed (e.g. due to KeyboardInterrupt) */ 2672 Py_DECREF(reader); 2673 return NULL; 2674 } 2675 2676 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) { 2677 Py_DECREF(buffer); 2678 break; 2679 } 2680 2681 res = expat_parse( 2682 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0 2683 ); 2684 2685 Py_DECREF(buffer); 2686 2687 if (!res) { 2688 Py_DECREF(reader); 2689 return NULL; 2690 } 2691 Py_DECREF(res); 2692 2693 } 2694 2695 Py_DECREF(reader); 2696 2697 res = expat_parse(self, "", 0, 1); 2698 2699 if (res && TreeBuilder_CheckExact(self->target)) { 2700 Py_DECREF(res); 2701 return treebuilder_done((TreeBuilderObject*) self->target); 2702 } 2703 2704 return res; 2705 } 2706 2707 static PyObject* 2708 xmlparser_setevents(XMLParserObject* self, PyObject* args) 2709 { 2710 /* activate element event reporting */ 2711 2712 Py_ssize_t i; 2713 TreeBuilderObject* target; 2714 2715 PyObject* events; /* event collector */ 2716 PyObject* event_set = Py_None; 2717 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events, 2718 &event_set)) 2719 return NULL; 2720 2721 if (!TreeBuilder_CheckExact(self->target)) { 2722 PyErr_SetString( 2723 PyExc_TypeError, 2724 "event handling only supported for cElementTree.Treebuilder " 2725 "targets" 2726 ); 2727 return NULL; 2728 } 2729 2730 target = (TreeBuilderObject*) self->target; 2731 2732 Py_INCREF(events); 2733 Py_XDECREF(target->events); 2734 target->events = events; 2735 2736 /* clear out existing events */ 2737 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; 2738 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; 2739 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; 2740 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; 2741 2742 if (event_set == Py_None) { 2743 /* default is "end" only */ 2744 target->end_event_obj = PyString_FromString("end"); 2745 Py_RETURN_NONE; 2746 } 2747 2748 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ 2749 goto error; 2750 2751 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { 2752 PyObject* item = PyTuple_GET_ITEM(event_set, i); 2753 char* event; 2754 if (!PyString_Check(item)) 2755 goto error; 2756 event = PyString_AS_STRING(item); 2757 if (strcmp(event, "start") == 0) { 2758 Py_INCREF(item); 2759 target->start_event_obj = item; 2760 } else if (strcmp(event, "end") == 0) { 2761 Py_INCREF(item); 2762 Py_XDECREF(target->end_event_obj); 2763 target->end_event_obj = item; 2764 } else if (strcmp(event, "start-ns") == 0) { 2765 Py_INCREF(item); 2766 Py_XDECREF(target->start_ns_event_obj); 2767 target->start_ns_event_obj = item; 2768 EXPAT(SetNamespaceDeclHandler)( 2769 self->parser, 2770 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 2771 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 2772 ); 2773 } else if (strcmp(event, "end-ns") == 0) { 2774 Py_INCREF(item); 2775 Py_XDECREF(target->end_ns_event_obj); 2776 target->end_ns_event_obj = item; 2777 EXPAT(SetNamespaceDeclHandler)( 2778 self->parser, 2779 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 2780 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 2781 ); 2782 } else { 2783 PyErr_Format( 2784 PyExc_ValueError, 2785 "unknown event '%s'", event 2786 ); 2787 return NULL; 2788 } 2789 } 2790 2791 Py_RETURN_NONE; 2792 2793 error: 2794 PyErr_SetString( 2795 PyExc_TypeError, 2796 "invalid event tuple" 2797 ); 2798 return NULL; 2799 } 2800 2801 static PyMethodDef xmlparser_methods[] = { 2802 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, 2803 {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, 2804 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, 2805 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, 2806 {NULL, NULL} 2807 }; 2808 2809 static PyObject* 2810 xmlparser_getattr(XMLParserObject* self, char* name) 2811 { 2812 PyObject* res; 2813 2814 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name); 2815 if (res) 2816 return res; 2817 2818 PyErr_Clear(); 2819 2820 if (strcmp(name, "entity") == 0) 2821 res = self->entity; 2822 else if (strcmp(name, "target") == 0) 2823 res = self->target; 2824 else if (strcmp(name, "version") == 0) { 2825 char buffer[100]; 2826 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, 2827 XML_MINOR_VERSION, XML_MICRO_VERSION); 2828 return PyString_FromString(buffer); 2829 } else { 2830 PyErr_SetString(PyExc_AttributeError, name); 2831 return NULL; 2832 } 2833 2834 Py_INCREF(res); 2835 return res; 2836 } 2837 2838 statichere PyTypeObject XMLParser_Type = { 2839 PyObject_HEAD_INIT(NULL) 2840 0, "XMLParser", sizeof(XMLParserObject), 0, 2841 /* methods */ 2842 (destructor)xmlparser_dealloc, /* tp_dealloc */ 2843 0, /* tp_print */ 2844 (getattrfunc)xmlparser_getattr, /* tp_getattr */ 2845 }; 2846 2847 #endif 2848 2849 /* ==================================================================== */ 2850 /* python module interface */ 2851 2852 static PyMethodDef _functions[] = { 2853 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, 2854 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, 2855 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, 2856 #if defined(USE_EXPAT) 2857 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, 2858 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, 2859 #endif 2860 {NULL, NULL} 2861 }; 2862 2863 DL_EXPORT(void) 2864 init_elementtree(void) 2865 { 2866 PyObject* m; 2867 PyObject* g; 2868 char* bootstrap; 2869 2870 /* Patch object type */ 2871 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type; 2872 #if defined(USE_EXPAT) 2873 Py_TYPE(&XMLParser_Type) = &PyType_Type; 2874 #endif 2875 2876 m = Py_InitModule("_elementtree", _functions); 2877 if (!m) 2878 return; 2879 2880 /* python glue code */ 2881 2882 g = PyDict_New(); 2883 if (!g) 2884 return; 2885 2886 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); 2887 2888 bootstrap = ( 2889 2890 "from copy import copy, deepcopy\n" 2891 2892 "try:\n" 2893 " from xml.etree import ElementTree\n" 2894 "except ImportError:\n" 2895 " import ElementTree\n" 2896 "ET = ElementTree\n" 2897 "del ElementTree\n" 2898 2899 "import _elementtree as cElementTree\n" 2900 2901 "try:\n" /* check if copy works as is */ 2902 " copy(cElementTree.Element('x'))\n" 2903 "except:\n" 2904 " def copyelement(elem):\n" 2905 " return elem\n" 2906 2907 "class CommentProxy:\n" 2908 " def __call__(self, text=None):\n" 2909 " element = cElementTree.Element(ET.Comment)\n" 2910 " element.text = text\n" 2911 " return element\n" 2912 " def __cmp__(self, other):\n" 2913 " return cmp(ET.Comment, other)\n" 2914 "cElementTree.Comment = CommentProxy()\n" 2915 2916 "class ElementTree(ET.ElementTree):\n" /* public */ 2917 " def parse(self, source, parser=None):\n" 2918 " if not hasattr(source, 'read'):\n" 2919 " source = open(source, 'rb')\n" 2920 " if parser is not None:\n" 2921 " while 1:\n" 2922 " data = source.read(65536)\n" 2923 " if not data:\n" 2924 " break\n" 2925 " parser.feed(data)\n" 2926 " self._root = parser.close()\n" 2927 " else:\n" 2928 " parser = cElementTree.XMLParser()\n" 2929 " self._root = parser._parse(source)\n" 2930 " return self._root\n" 2931 "cElementTree.ElementTree = ElementTree\n" 2932 2933 "def iter(node, tag=None):\n" /* helper */ 2934 " if tag == '*':\n" 2935 " tag = None\n" 2936 " if tag is None or node.tag == tag:\n" 2937 " yield node\n" 2938 " for node in node:\n" 2939 " for node in iter(node, tag):\n" 2940 " yield node\n" 2941 2942 "def itertext(node):\n" /* helper */ 2943 " if node.text:\n" 2944 " yield node.text\n" 2945 " for e in node:\n" 2946 " for s in e.itertext():\n" 2947 " yield s\n" 2948 " if e.tail:\n" 2949 " yield e.tail\n" 2950 2951 "def parse(source, parser=None):\n" /* public */ 2952 " tree = ElementTree()\n" 2953 " tree.parse(source, parser)\n" 2954 " return tree\n" 2955 "cElementTree.parse = parse\n" 2956 2957 "class iterparse(object):\n" 2958 " root = None\n" 2959 " def __init__(self, file, events=None):\n" 2960 " if not hasattr(file, 'read'):\n" 2961 " file = open(file, 'rb')\n" 2962 " self._file = file\n" 2963 " self._events = []\n" 2964 " self._index = 0\n" 2965 " self.root = self._root = None\n" 2966 " b = cElementTree.TreeBuilder()\n" 2967 " self._parser = cElementTree.XMLParser(b)\n" 2968 " self._parser._setevents(self._events, events)\n" 2969 " def next(self):\n" 2970 " while 1:\n" 2971 " try:\n" 2972 " item = self._events[self._index]\n" 2973 " except IndexError:\n" 2974 " if self._parser is None:\n" 2975 " self.root = self._root\n" 2976 " raise StopIteration\n" 2977 " # load event buffer\n" 2978 " del self._events[:]\n" 2979 " self._index = 0\n" 2980 " data = self._file.read(16384)\n" 2981 " if data:\n" 2982 " self._parser.feed(data)\n" 2983 " else:\n" 2984 " self._root = self._parser.close()\n" 2985 " self._parser = None\n" 2986 " else:\n" 2987 " self._index = self._index + 1\n" 2988 " return item\n" 2989 " def __iter__(self):\n" 2990 " return self\n" 2991 "cElementTree.iterparse = iterparse\n" 2992 2993 "class PIProxy:\n" 2994 " def __call__(self, target, text=None):\n" 2995 " element = cElementTree.Element(ET.PI)\n" 2996 " element.text = target\n" 2997 " if text:\n" 2998 " element.text = element.text + ' ' + text\n" 2999 " return element\n" 3000 " def __cmp__(self, other):\n" 3001 " return cmp(ET.PI, other)\n" 3002 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n" 3003 3004 "def XML(text):\n" /* public */ 3005 " parser = cElementTree.XMLParser()\n" 3006 " parser.feed(text)\n" 3007 " return parser.close()\n" 3008 "cElementTree.XML = cElementTree.fromstring = XML\n" 3009 3010 "def XMLID(text):\n" /* public */ 3011 " tree = XML(text)\n" 3012 " ids = {}\n" 3013 " for elem in tree.iter():\n" 3014 " id = elem.get('id')\n" 3015 " if id:\n" 3016 " ids[id] = elem\n" 3017 " return tree, ids\n" 3018 "cElementTree.XMLID = XMLID\n" 3019 3020 "try:\n" 3021 " register_namespace = ET.register_namespace\n" 3022 "except AttributeError:\n" 3023 " def register_namespace(prefix, uri):\n" 3024 " ET._namespace_map[uri] = prefix\n" 3025 "cElementTree.register_namespace = register_namespace\n" 3026 3027 "cElementTree.dump = ET.dump\n" 3028 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" 3029 "cElementTree.iselement = ET.iselement\n" 3030 "cElementTree.QName = ET.QName\n" 3031 "cElementTree.tostring = ET.tostring\n" 3032 "cElementTree.fromstringlist = ET.fromstringlist\n" 3033 "cElementTree.tostringlist = ET.tostringlist\n" 3034 "cElementTree.VERSION = '" VERSION "'\n" 3035 "cElementTree.__version__ = '" VERSION "'\n" 3036 3037 ); 3038 3039 if (!PyRun_String(bootstrap, Py_file_input, g, NULL)) 3040 return; 3041 3042 elementpath_obj = PyDict_GetItemString(g, "ElementPath"); 3043 3044 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); 3045 if (elementtree_copyelement_obj) { 3046 /* reduce hack needed; enable reduce method */ 3047 PyMethodDef* mp; 3048 for (mp = element_methods; mp->ml_name; mp++) 3049 if (mp->ml_meth == (PyCFunction) element_reduce) { 3050 mp->ml_name = "__reduce__"; 3051 break; 3052 } 3053 } else 3054 PyErr_Clear(); 3055 3056 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); 3057 elementtree_iter_obj = PyDict_GetItemString(g, "iter"); 3058 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext"); 3059 3060 #if defined(USE_PYEXPAT_CAPI) 3061 /* link against pyexpat, if possible */ 3062 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); 3063 if (expat_capi) { 3064 /* check that it's usable */ 3065 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || 3066 expat_capi->size < sizeof(struct PyExpat_CAPI) || 3067 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || 3068 expat_capi->MINOR_VERSION != XML_MINOR_VERSION || 3069 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) 3070 expat_capi = NULL; 3071 } 3072 #endif 3073 3074 elementtree_parseerror_obj = PyErr_NewException( 3075 "cElementTree.ParseError", PyExc_SyntaxError, NULL 3076 ); 3077 Py_INCREF(elementtree_parseerror_obj); 3078 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); 3079 } 3080