Home | History | Annotate | Download | only in Modules
      1 /*
      2  * ElementTree
      3  * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
      4  *
      5  * elementtree accelerator
      6  *
      7  * History:
      8  * 1999-06-20 fl  created (as part of sgmlop)
      9  * 2001-05-29 fl  effdom edition
     10  * 2003-02-27 fl  elementtree edition (alpha)
     11  * 2004-06-03 fl  updates for elementtree 1.2
     12  * 2005-01-05 fl  major optimization effort
     13  * 2005-01-11 fl  first public release (cElementTree 0.8)
     14  * 2005-01-12 fl  split element object into base and extras
     15  * 2005-01-13 fl  use tagged pointers for tail/text (cElementTree 0.9)
     16  * 2005-01-17 fl  added treebuilder close method
     17  * 2005-01-17 fl  fixed crash in getchildren
     18  * 2005-01-18 fl  removed observer api, added iterparse (cElementTree 0.9.3)
     19  * 2005-01-23 fl  revised iterparse api; added namespace event support (0.9.8)
     20  * 2005-01-26 fl  added VERSION module property (cElementTree 1.0)
     21  * 2005-01-28 fl  added remove method (1.0.1)
     22  * 2005-03-01 fl  added iselement function; fixed makeelement aliasing (1.0.2)
     23  * 2005-03-13 fl  export Comment and ProcessingInstruction/PI helpers
     24  * 2005-03-26 fl  added Comment and PI support to XMLParser
     25  * 2005-03-27 fl  event optimizations; complain about bogus events
     26  * 2005-08-08 fl  fixed read error handling in parse
     27  * 2005-08-11 fl  added runtime test for copy workaround (1.0.3)
     28  * 2005-12-13 fl  added expat_capi support (for xml.etree) (1.0.4)
     29  * 2005-12-16 fl  added support for non-standard encodings
     30  * 2006-03-08 fl  fixed a couple of potential null-refs and leaks
     31  * 2006-03-12 fl  merge in 2.5 ssize_t changes
     32  * 2007-08-25 fl  call custom builder's close method from XMLParser
     33  * 2007-08-31 fl  added iter, extend from ET 1.3
     34  * 2007-09-01 fl  fixed ParseError exception, setslice source type, etc
     35  * 2007-09-03 fl  fixed handling of negative insert indexes
     36  * 2007-09-04 fl  added itertext from ET 1.3
     37  * 2007-09-06 fl  added position attribute to ParseError exception
     38  * 2008-06-06 fl  delay error reporting in iterparse (from Hrvoje Niksic)
     39  *
     40  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
     41  * Copyright (c) 1999-2009 by Fredrik Lundh.
     42  *
     43  * info (at) pythonware.com
     44  * http://www.pythonware.com
     45  */
     46 
     47 /* Licensed to PSF under a Contributor Agreement. */
     48 /* See http://www.python.org/psf/license for licensing details. */
     49 
     50 #include "Python.h"
     51 
     52 #define VERSION "1.0.6"
     53 
     54 /* -------------------------------------------------------------------- */
     55 /* configuration */
     56 
     57 /* Leave defined to include the expat-based XMLParser type */
     58 #define USE_EXPAT
     59 
     60 /* Define to do all expat calls via pyexpat's embedded expat library */
     61 /* #define USE_PYEXPAT_CAPI */
     62 
     63 /* An element can hold this many children without extra memory
     64    allocations. */
     65 #define STATIC_CHILDREN 4
     66 
     67 /* For best performance, chose a value so that 80-90% of all nodes
     68    have no more than the given number of children.  Set this to zero
     69    to minimize the size of the element structure itself (this only
     70    helps if you have lots of leaf nodes with attributes). */
     71 
     72 /* Also note that pymalloc always allocates blocks in multiples of
     73    eight bytes.  For the current version of cElementTree, this means
     74    that the number of children should be an even number, at least on
     75    32-bit platforms. */
     76 
     77 /* -------------------------------------------------------------------- */
     78 
     79 #if 0
     80 static int memory = 0;
     81 #define ALLOC(size, comment)\
     82 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
     83 #define RELEASE(size, comment)\
     84 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
     85 #else
     86 #define ALLOC(size, comment)
     87 #define RELEASE(size, comment)
     88 #endif
     89 
     90 /* compiler tweaks */
     91 #if defined(_MSC_VER)
     92 #define LOCAL(type) static __inline type __fastcall
     93 #else
     94 #define LOCAL(type) static type
     95 #endif
     96 
     97 /* compatibility macros */
     98 #if (PY_VERSION_HEX < 0x02060000)
     99 #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
    100 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
    101 #endif
    102 
    103 #if (PY_VERSION_HEX < 0x02050000)
    104 typedef int Py_ssize_t;
    105 #define lenfunc inquiry
    106 #endif
    107 
    108 #if (PY_VERSION_HEX < 0x02040000)
    109 #define PyDict_CheckExact PyDict_Check
    110 
    111 #if !defined(Py_RETURN_NONE)
    112 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
    113 #endif
    114 #endif
    115 
    116 /* macros used to store 'join' flags in string object pointers.  note
    117    that all use of text and tail as object pointers must be wrapped in
    118    JOIN_OBJ.  see comments in the ElementObject definition for more
    119    info. */
    120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
    121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
    122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
    123 
    124 /* glue functions (see the init function for details) */
    125 static PyObject* elementtree_parseerror_obj;
    126 static PyObject* elementtree_copyelement_obj;
    127 static PyObject* elementtree_deepcopy_obj;
    128 static PyObject* elementtree_iter_obj;
    129 static PyObject* elementtree_itertext_obj;
    130 static PyObject* elementpath_obj;
    131 
    132 /* helpers */
    133 
    134 LOCAL(PyObject*)
    135 deepcopy(PyObject* object, PyObject* memo)
    136 {
    137     /* do a deep copy of the given object */
    138 
    139     PyObject* args;
    140     PyObject* result;
    141 
    142     if (!elementtree_deepcopy_obj) {
    143         PyErr_SetString(
    144             PyExc_RuntimeError,
    145             "deepcopy helper not found"
    146             );
    147         return NULL;
    148     }
    149 
    150     args = PyTuple_New(2);
    151     if (!args)
    152         return NULL;
    153 
    154     Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
    155     Py_INCREF(memo);   PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
    156 
    157     result = PyObject_CallObject(elementtree_deepcopy_obj, args);
    158 
    159     Py_DECREF(args);
    160 
    161     return result;
    162 }
    163 
    164 LOCAL(PyObject*)
    165 list_join(PyObject* list)
    166 {
    167     /* join list elements (destroying the list in the process) */
    168 
    169     PyObject* joiner;
    170     PyObject* function;
    171     PyObject* args;
    172     PyObject* result;
    173 
    174     switch (PyList_GET_SIZE(list)) {
    175     case 0:
    176         Py_DECREF(list);
    177         return PyString_FromString("");
    178     case 1:
    179         result = PyList_GET_ITEM(list, 0);
    180         Py_INCREF(result);
    181         Py_DECREF(list);
    182         return result;
    183     }
    184 
    185     /* two or more elements: slice out a suitable separator from the
    186        first member, and use that to join the entire list */
    187 
    188     joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
    189     if (!joiner)
    190         return NULL;
    191 
    192     function = PyObject_GetAttrString(joiner, "join");
    193     if (!function) {
    194         Py_DECREF(joiner);
    195         return NULL;
    196     }
    197 
    198     args = PyTuple_New(1);
    199     if (!args)
    200         return NULL;
    201 
    202     PyTuple_SET_ITEM(args, 0, list);
    203 
    204     result = PyObject_CallObject(function, args);
    205 
    206     Py_DECREF(args); /* also removes list */
    207     Py_DECREF(function);
    208     Py_DECREF(joiner);
    209 
    210     return result;
    211 }
    212 
    213 /* -------------------------------------------------------------------- */
    214 /* the element type */
    215 
    216 typedef struct {
    217 
    218     /* attributes (a dictionary object), or None if no attributes */
    219     PyObject* attrib;
    220 
    221     /* child elements */
    222     int length; /* actual number of items */
    223     int allocated; /* allocated items */
    224 
    225     /* this either points to _children or to a malloced buffer */
    226     PyObject* *children;
    227 
    228     PyObject* _children[STATIC_CHILDREN];
    229 
    230 } ElementObjectExtra;
    231 
    232 typedef struct {
    233     PyObject_HEAD
    234 
    235     /* element tag (a string). */
    236     PyObject* tag;
    237 
    238     /* text before first child.  note that this is a tagged pointer;
    239        use JOIN_OBJ to get the object pointer.  the join flag is used
    240        to distinguish lists created by the tree builder from lists
    241        assigned to the attribute by application code; the former
    242        should be joined before being returned to the user, the latter
    243        should be left intact. */
    244     PyObject* text;
    245 
    246     /* text after this element, in parent.  note that this is a tagged
    247        pointer; use JOIN_OBJ to get the object pointer. */
    248     PyObject* tail;
    249 
    250     ElementObjectExtra* extra;
    251 
    252 } ElementObject;
    253 
    254 staticforward PyTypeObject Element_Type;
    255 
    256 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
    257 
    258 /* -------------------------------------------------------------------- */
    259 /* element constructor and destructor */
    260 
    261 LOCAL(int)
    262 element_new_extra(ElementObject* self, PyObject* attrib)
    263 {
    264     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
    265     if (!self->extra)
    266         return -1;
    267 
    268     if (!attrib)
    269         attrib = Py_None;
    270 
    271     Py_INCREF(attrib);
    272     self->extra->attrib = attrib;
    273 
    274     self->extra->length = 0;
    275     self->extra->allocated = STATIC_CHILDREN;
    276     self->extra->children = self->extra->_children;
    277 
    278     return 0;
    279 }
    280 
    281 LOCAL(void)
    282 element_dealloc_extra(ElementObject* self)
    283 {
    284     int i;
    285 
    286     Py_DECREF(self->extra->attrib);
    287 
    288     for (i = 0; i < self->extra->length; i++)
    289         Py_DECREF(self->extra->children[i]);
    290 
    291     if (self->extra->children != self->extra->_children)
    292         PyObject_Free(self->extra->children);
    293 
    294     PyObject_Free(self->extra);
    295 }
    296 
    297 LOCAL(PyObject*)
    298 element_new(PyObject* tag, PyObject* attrib)
    299 {
    300     ElementObject* self;
    301 
    302     self = PyObject_New(ElementObject, &Element_Type);
    303     if (self == NULL)
    304         return NULL;
    305 
    306     /* use None for empty dictionaries */
    307     if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
    308         attrib = Py_None;
    309 
    310     self->extra = NULL;
    311 
    312     if (attrib != Py_None) {
    313 
    314         if (element_new_extra(self, attrib) < 0) {
    315             PyObject_Del(self);
    316             return NULL;
    317         }
    318 
    319         self->extra->length = 0;
    320         self->extra->allocated = STATIC_CHILDREN;
    321         self->extra->children = self->extra->_children;
    322 
    323     }
    324 
    325     Py_INCREF(tag);
    326     self->tag = tag;
    327 
    328     Py_INCREF(Py_None);
    329     self->text = Py_None;
    330 
    331     Py_INCREF(Py_None);
    332     self->tail = Py_None;
    333 
    334     ALLOC(sizeof(ElementObject), "create element");
    335 
    336     return (PyObject*) self;
    337 }
    338 
    339 LOCAL(int)
    340 element_resize(ElementObject* self, int extra)
    341 {
    342     int size;
    343     PyObject* *children;
    344 
    345     /* make sure self->children can hold the given number of extra
    346        elements.  set an exception and return -1 if allocation failed */
    347 
    348     if (!self->extra)
    349         element_new_extra(self, NULL);
    350 
    351     size = self->extra->length + extra;
    352 
    353     if (size > self->extra->allocated) {
    354         /* use Python 2.4's list growth strategy */
    355         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
    356         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
    357          * which needs at least 4 bytes.
    358          * Although it's a false alarm always assume at least one child to
    359          * be safe.
    360          */
    361         size = size ? size : 1;
    362         if (self->extra->children != self->extra->_children) {
    363             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
    364              * "children", which needs at least 4 bytes. Although it's a
    365              * false alarm always assume at least one child to be safe.
    366              */
    367             children = PyObject_Realloc(self->extra->children,
    368                                         size * sizeof(PyObject*));
    369             if (!children)
    370                 goto nomemory;
    371         } else {
    372             children = PyObject_Malloc(size * sizeof(PyObject*));
    373             if (!children)
    374                 goto nomemory;
    375             /* copy existing children from static area to malloc buffer */
    376             memcpy(children, self->extra->children,
    377                    self->extra->length * sizeof(PyObject*));
    378         }
    379         self->extra->children = children;
    380         self->extra->allocated = size;
    381     }
    382 
    383     return 0;
    384 
    385   nomemory:
    386     PyErr_NoMemory();
    387     return -1;
    388 }
    389 
    390 LOCAL(int)
    391 element_add_subelement(ElementObject* self, PyObject* element)
    392 {
    393     /* add a child element to a parent */
    394 
    395     if (element_resize(self, 1) < 0)
    396         return -1;
    397 
    398     Py_INCREF(element);
    399     self->extra->children[self->extra->length] = element;
    400 
    401     self->extra->length++;
    402 
    403     return 0;
    404 }
    405 
    406 LOCAL(PyObject*)
    407 element_get_attrib(ElementObject* self)
    408 {
    409     /* return borrowed reference to attrib dictionary */
    410     /* note: this function assumes that the extra section exists */
    411 
    412     PyObject* res = self->extra->attrib;
    413 
    414     if (res == Py_None) {
    415         Py_DECREF(res);
    416         /* create missing dictionary */
    417         res = PyDict_New();
    418         if (!res)
    419             return NULL;
    420         self->extra->attrib = res;
    421     }
    422 
    423     return res;
    424 }
    425 
    426 LOCAL(PyObject*)
    427 element_get_text(ElementObject* self)
    428 {
    429     /* return borrowed reference to text attribute */
    430 
    431     PyObject* res = self->text;
    432 
    433     if (JOIN_GET(res)) {
    434         res = JOIN_OBJ(res);
    435         if (PyList_CheckExact(res)) {
    436             res = list_join(res);
    437             if (!res)
    438                 return NULL;
    439             self->text = res;
    440         }
    441     }
    442 
    443     return res;
    444 }
    445 
    446 LOCAL(PyObject*)
    447 element_get_tail(ElementObject* self)
    448 {
    449     /* return borrowed reference to text attribute */
    450 
    451     PyObject* res = self->tail;
    452 
    453     if (JOIN_GET(res)) {
    454         res = JOIN_OBJ(res);
    455         if (PyList_CheckExact(res)) {
    456             res = list_join(res);
    457             if (!res)
    458                 return NULL;
    459             self->tail = res;
    460         }
    461     }
    462 
    463     return res;
    464 }
    465 
    466 static PyObject*
    467 element(PyObject* self, PyObject* args, PyObject* kw)
    468 {
    469     PyObject* elem;
    470 
    471     PyObject* tag;
    472     PyObject* attrib = NULL;
    473     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
    474                           &PyDict_Type, &attrib))
    475         return NULL;
    476 
    477     if (attrib || kw) {
    478         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
    479         if (!attrib)
    480             return NULL;
    481         if (kw)
    482             PyDict_Update(attrib, kw);
    483     } else {
    484         Py_INCREF(Py_None);
    485         attrib = Py_None;
    486     }
    487 
    488     elem = element_new(tag, attrib);
    489 
    490     Py_DECREF(attrib);
    491 
    492     return elem;
    493 }
    494 
    495 static PyObject*
    496 subelement(PyObject* self, PyObject* args, PyObject* kw)
    497 {
    498     PyObject* elem;
    499 
    500     ElementObject* parent;
    501     PyObject* tag;
    502     PyObject* attrib = NULL;
    503     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
    504                           &Element_Type, &parent, &tag,
    505                           &PyDict_Type, &attrib))
    506         return NULL;
    507 
    508     if (attrib || kw) {
    509         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
    510         if (!attrib)
    511             return NULL;
    512         if (kw)
    513             PyDict_Update(attrib, kw);
    514     } else {
    515         Py_INCREF(Py_None);
    516         attrib = Py_None;
    517     }
    518 
    519     elem = element_new(tag, attrib);
    520 
    521     Py_DECREF(attrib);
    522 
    523     if (element_add_subelement(parent, elem) < 0) {
    524         Py_DECREF(elem);
    525         return NULL;
    526     }
    527 
    528     return elem;
    529 }
    530 
    531 static void
    532 element_dealloc(ElementObject* self)
    533 {
    534     if (self->extra)
    535         element_dealloc_extra(self);
    536 
    537     /* discard attributes */
    538     Py_DECREF(self->tag);
    539     Py_DECREF(JOIN_OBJ(self->text));
    540     Py_DECREF(JOIN_OBJ(self->tail));
    541 
    542     RELEASE(sizeof(ElementObject), "destroy element");
    543 
    544     PyObject_Del(self);
    545 }
    546 
    547 /* -------------------------------------------------------------------- */
    548 /* methods (in alphabetical order) */
    549 
    550 static PyObject*
    551 element_append(ElementObject* self, PyObject* args)
    552 {
    553     PyObject* element;
    554     if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
    555         return NULL;
    556 
    557     if (element_add_subelement(self, element) < 0)
    558         return NULL;
    559 
    560     Py_RETURN_NONE;
    561 }
    562 
    563 static PyObject*
    564 element_clear(ElementObject* self, PyObject* args)
    565 {
    566     if (!PyArg_ParseTuple(args, ":clear"))
    567         return NULL;
    568 
    569     if (self->extra) {
    570         element_dealloc_extra(self);
    571         self->extra = NULL;
    572     }
    573 
    574     Py_INCREF(Py_None);
    575     Py_DECREF(JOIN_OBJ(self->text));
    576     self->text = Py_None;
    577 
    578     Py_INCREF(Py_None);
    579     Py_DECREF(JOIN_OBJ(self->tail));
    580     self->tail = Py_None;
    581 
    582     Py_RETURN_NONE;
    583 }
    584 
    585 static PyObject*
    586 element_copy(ElementObject* self, PyObject* args)
    587 {
    588     int i;
    589     ElementObject* element;
    590 
    591     if (!PyArg_ParseTuple(args, ":__copy__"))
    592         return NULL;
    593 
    594     element = (ElementObject*) element_new(
    595         self->tag, (self->extra) ? self->extra->attrib : Py_None
    596         );
    597     if (!element)
    598         return NULL;
    599 
    600     Py_DECREF(JOIN_OBJ(element->text));
    601     element->text = self->text;
    602     Py_INCREF(JOIN_OBJ(element->text));
    603 
    604     Py_DECREF(JOIN_OBJ(element->tail));
    605     element->tail = self->tail;
    606     Py_INCREF(JOIN_OBJ(element->tail));
    607 
    608     if (self->extra) {
    609 
    610         if (element_resize(element, self->extra->length) < 0) {
    611             Py_DECREF(element);
    612             return NULL;
    613         }
    614 
    615         for (i = 0; i < self->extra->length; i++) {
    616             Py_INCREF(self->extra->children[i]);
    617             element->extra->children[i] = self->extra->children[i];
    618         }
    619 
    620         element->extra->length = self->extra->length;
    621 
    622     }
    623 
    624     return (PyObject*) element;
    625 }
    626 
    627 static PyObject*
    628 element_deepcopy(ElementObject* self, PyObject* args)
    629 {
    630     int i;
    631     ElementObject* element;
    632     PyObject* tag;
    633     PyObject* attrib;
    634     PyObject* text;
    635     PyObject* tail;
    636     PyObject* id;
    637 
    638     PyObject* memo;
    639     if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
    640         return NULL;
    641 
    642     tag = deepcopy(self->tag, memo);
    643     if (!tag)
    644         return NULL;
    645 
    646     if (self->extra) {
    647         attrib = deepcopy(self->extra->attrib, memo);
    648         if (!attrib) {
    649             Py_DECREF(tag);
    650             return NULL;
    651         }
    652     } else {
    653         Py_INCREF(Py_None);
    654         attrib = Py_None;
    655     }
    656 
    657     element = (ElementObject*) element_new(tag, attrib);
    658 
    659     Py_DECREF(tag);
    660     Py_DECREF(attrib);
    661 
    662     if (!element)
    663         return NULL;
    664 
    665     text = deepcopy(JOIN_OBJ(self->text), memo);
    666     if (!text)
    667         goto error;
    668     Py_DECREF(element->text);
    669     element->text = JOIN_SET(text, JOIN_GET(self->text));
    670 
    671     tail = deepcopy(JOIN_OBJ(self->tail), memo);
    672     if (!tail)
    673         goto error;
    674     Py_DECREF(element->tail);
    675     element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
    676 
    677     if (self->extra) {
    678 
    679         if (element_resize(element, self->extra->length) < 0)
    680             goto error;
    681 
    682         for (i = 0; i < self->extra->length; i++) {
    683             PyObject* child = deepcopy(self->extra->children[i], memo);
    684             if (!child) {
    685                 element->extra->length = i;
    686                 goto error;
    687             }
    688             element->extra->children[i] = child;
    689         }
    690 
    691         element->extra->length = self->extra->length;
    692 
    693     }
    694 
    695     /* add object to memo dictionary (so deepcopy won't visit it again) */
    696     id = PyInt_FromLong((Py_uintptr_t) self);
    697     if (!id)
    698         goto error;
    699 
    700     i = PyDict_SetItem(memo, id, (PyObject*) element);
    701 
    702     Py_DECREF(id);
    703 
    704     if (i < 0)
    705         goto error;
    706 
    707     return (PyObject*) element;
    708 
    709   error:
    710     Py_DECREF(element);
    711     return NULL;
    712 }
    713 
    714 LOCAL(int)
    715 checkpath(PyObject* tag)
    716 {
    717     Py_ssize_t i;
    718     int check = 1;
    719 
    720     /* check if a tag contains an xpath character */
    721 
    722 #define PATHCHAR(ch) \
    723     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
    724 
    725 #if defined(Py_USING_UNICODE)
    726     if (PyUnicode_Check(tag)) {
    727         Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
    728         for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
    729             if (p[i] == '{')
    730                 check = 0;
    731             else if (p[i] == '}')
    732                 check = 1;
    733             else if (check && PATHCHAR(p[i]))
    734                 return 1;
    735         }
    736         return 0;
    737     }
    738 #endif
    739     if (PyString_Check(tag)) {
    740         char *p = PyString_AS_STRING(tag);
    741         for (i = 0; i < PyString_GET_SIZE(tag); i++) {
    742             if (p[i] == '{')
    743                 check = 0;
    744             else if (p[i] == '}')
    745                 check = 1;
    746             else if (check && PATHCHAR(p[i]))
    747                 return 1;
    748         }
    749         return 0;
    750     }
    751 
    752     return 1; /* unknown type; might be path expression */
    753 }
    754 
    755 static PyObject*
    756 element_extend(ElementObject* self, PyObject* args)
    757 {
    758     PyObject* seq;
    759     Py_ssize_t i, seqlen = 0;
    760 
    761     PyObject* seq_in;
    762     if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
    763         return NULL;
    764 
    765     seq = PySequence_Fast(seq_in, "");
    766     if (!seq) {
    767         PyErr_Format(
    768             PyExc_TypeError,
    769             "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
    770             );
    771         return NULL;
    772     }
    773 
    774     seqlen = PySequence_Size(seq);
    775     for (i = 0; i < seqlen; i++) {
    776         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
    777         if (element_add_subelement(self, element) < 0) {
    778             Py_DECREF(seq);
    779             return NULL;
    780         }
    781     }
    782 
    783     Py_DECREF(seq);
    784 
    785     Py_RETURN_NONE;
    786 }
    787 
    788 static PyObject*
    789 element_find(ElementObject* self, PyObject* args)
    790 {
    791     int i;
    792 
    793     PyObject* tag;
    794     PyObject* namespaces = Py_None;
    795     if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
    796         return NULL;
    797 
    798     if (checkpath(tag) || namespaces != Py_None)
    799         return PyObject_CallMethod(
    800             elementpath_obj, "find", "OOO", self, tag, namespaces
    801             );
    802 
    803     if (!self->extra)
    804         Py_RETURN_NONE;
    805 
    806     for (i = 0; i < self->extra->length; i++) {
    807         PyObject* item = self->extra->children[i];
    808         if (Element_CheckExact(item) &&
    809             PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
    810             Py_INCREF(item);
    811             return item;
    812         }
    813     }
    814 
    815     Py_RETURN_NONE;
    816 }
    817 
    818 static PyObject*
    819 element_findtext(ElementObject* self, PyObject* args)
    820 {
    821     int i;
    822 
    823     PyObject* tag;
    824     PyObject* default_value = Py_None;
    825     PyObject* namespaces = Py_None;
    826     if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
    827         return NULL;
    828 
    829     if (checkpath(tag) || namespaces != Py_None)
    830         return PyObject_CallMethod(
    831             elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
    832             );
    833 
    834     if (!self->extra) {
    835         Py_INCREF(default_value);
    836         return default_value;
    837     }
    838 
    839     for (i = 0; i < self->extra->length; i++) {
    840         ElementObject* item = (ElementObject*) self->extra->children[i];
    841         if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
    842             PyObject* text = element_get_text(item);
    843             if (text == Py_None)
    844                 return PyString_FromString("");
    845             Py_XINCREF(text);
    846             return text;
    847         }
    848     }
    849 
    850     Py_INCREF(default_value);
    851     return default_value;
    852 }
    853 
    854 static PyObject*
    855 element_findall(ElementObject* self, PyObject* args)
    856 {
    857     int i;
    858     PyObject* out;
    859 
    860     PyObject* tag;
    861     PyObject* namespaces = Py_None;
    862     if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
    863         return NULL;
    864 
    865     if (checkpath(tag) || namespaces != Py_None)
    866         return PyObject_CallMethod(
    867             elementpath_obj, "findall", "OOO", self, tag, namespaces
    868             );
    869 
    870     out = PyList_New(0);
    871     if (!out)
    872         return NULL;
    873 
    874     if (!self->extra)
    875         return out;
    876 
    877     for (i = 0; i < self->extra->length; i++) {
    878         PyObject* item = self->extra->children[i];
    879         if (Element_CheckExact(item) &&
    880             PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
    881             if (PyList_Append(out, item) < 0) {
    882                 Py_DECREF(out);
    883                 return NULL;
    884             }
    885         }
    886     }
    887 
    888     return out;
    889 }
    890 
    891 static PyObject*
    892 element_iterfind(ElementObject* self, PyObject* args)
    893 {
    894     PyObject* tag;
    895     PyObject* namespaces = Py_None;
    896     if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
    897         return NULL;
    898 
    899     return PyObject_CallMethod(
    900         elementpath_obj, "iterfind", "OOO", self, tag, namespaces
    901         );
    902 }
    903 
    904 static PyObject*
    905 element_get(ElementObject* self, PyObject* args)
    906 {
    907     PyObject* value;
    908 
    909     PyObject* key;
    910     PyObject* default_value = Py_None;
    911     if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
    912         return NULL;
    913 
    914     if (!self->extra || self->extra->attrib == Py_None)
    915         value = default_value;
    916     else {
    917         value = PyDict_GetItem(self->extra->attrib, key);
    918         if (!value)
    919             value = default_value;
    920     }
    921 
    922     Py_INCREF(value);
    923     return value;
    924 }
    925 
    926 static PyObject*
    927 element_getchildren(ElementObject* self, PyObject* args)
    928 {
    929     int i;
    930     PyObject* list;
    931 
    932     /* FIXME: report as deprecated? */
    933 
    934     if (!PyArg_ParseTuple(args, ":getchildren"))
    935         return NULL;
    936 
    937     if (!self->extra)
    938         return PyList_New(0);
    939 
    940     list = PyList_New(self->extra->length);
    941     if (!list)
    942         return NULL;
    943 
    944     for (i = 0; i < self->extra->length; i++) {
    945         PyObject* item = self->extra->children[i];
    946         Py_INCREF(item);
    947         PyList_SET_ITEM(list, i, item);
    948     }
    949 
    950     return list;
    951 }
    952 
    953 static PyObject*
    954 element_iter(ElementObject* self, PyObject* args)
    955 {
    956     PyObject* result;
    957 
    958     PyObject* tag = Py_None;
    959     if (!PyArg_ParseTuple(args, "|O:iter", &tag))
    960         return NULL;
    961 
    962     if (!elementtree_iter_obj) {
    963         PyErr_SetString(
    964             PyExc_RuntimeError,
    965             "iter helper not found"
    966             );
    967         return NULL;
    968     }
    969 
    970     args = PyTuple_New(2);
    971     if (!args)
    972         return NULL;
    973 
    974     Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
    975     Py_INCREF(tag);  PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
    976 
    977     result = PyObject_CallObject(elementtree_iter_obj, args);
    978 
    979     Py_DECREF(args);
    980 
    981     return result;
    982 }
    983 
    984 
    985 static PyObject*
    986 element_itertext(ElementObject* self, PyObject* args)
    987 {
    988     PyObject* result;
    989 
    990     if (!PyArg_ParseTuple(args, ":itertext"))
    991         return NULL;
    992 
    993     if (!elementtree_itertext_obj) {
    994         PyErr_SetString(
    995             PyExc_RuntimeError,
    996             "itertext helper not found"
    997             );
    998         return NULL;
    999     }
   1000 
   1001     args = PyTuple_New(1);
   1002     if (!args)
   1003         return NULL;
   1004 
   1005     Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
   1006 
   1007     result = PyObject_CallObject(elementtree_itertext_obj, args);
   1008 
   1009     Py_DECREF(args);
   1010 
   1011     return result;
   1012 }
   1013 
   1014 static PyObject*
   1015 element_getitem(PyObject* self_, Py_ssize_t index)
   1016 {
   1017     ElementObject* self = (ElementObject*) self_;
   1018 
   1019     if (!self->extra || index < 0 || index >= self->extra->length) {
   1020         PyErr_SetString(
   1021             PyExc_IndexError,
   1022             "child index out of range"
   1023             );
   1024         return NULL;
   1025     }
   1026 
   1027     Py_INCREF(self->extra->children[index]);
   1028     return self->extra->children[index];
   1029 }
   1030 
   1031 static PyObject*
   1032 element_insert(ElementObject* self, PyObject* args)
   1033 {
   1034     int i;
   1035 
   1036     int index;
   1037     PyObject* element;
   1038     if (!PyArg_ParseTuple(args, "iO!:insert", &index,
   1039                           &Element_Type, &element))
   1040         return NULL;
   1041 
   1042     if (!self->extra)
   1043         element_new_extra(self, NULL);
   1044 
   1045     if (index < 0) {
   1046         index += self->extra->length;
   1047         if (index < 0)
   1048             index = 0;
   1049     }
   1050     if (index > self->extra->length)
   1051         index = self->extra->length;
   1052 
   1053     if (element_resize(self, 1) < 0)
   1054         return NULL;
   1055 
   1056     for (i = self->extra->length; i > index; i--)
   1057         self->extra->children[i] = self->extra->children[i-1];
   1058 
   1059     Py_INCREF(element);
   1060     self->extra->children[index] = element;
   1061 
   1062     self->extra->length++;
   1063 
   1064     Py_RETURN_NONE;
   1065 }
   1066 
   1067 static PyObject*
   1068 element_items(ElementObject* self, PyObject* args)
   1069 {
   1070     if (!PyArg_ParseTuple(args, ":items"))
   1071         return NULL;
   1072 
   1073     if (!self->extra || self->extra->attrib == Py_None)
   1074         return PyList_New(0);
   1075 
   1076     return PyDict_Items(self->extra->attrib);
   1077 }
   1078 
   1079 static PyObject*
   1080 element_keys(ElementObject* self, PyObject* args)
   1081 {
   1082     if (!PyArg_ParseTuple(args, ":keys"))
   1083         return NULL;
   1084 
   1085     if (!self->extra || self->extra->attrib == Py_None)
   1086         return PyList_New(0);
   1087 
   1088     return PyDict_Keys(self->extra->attrib);
   1089 }
   1090 
   1091 static Py_ssize_t
   1092 element_length(ElementObject* self)
   1093 {
   1094     if (!self->extra)
   1095         return 0;
   1096 
   1097     return self->extra->length;
   1098 }
   1099 
   1100 static PyObject*
   1101 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
   1102 {
   1103     PyObject* elem;
   1104 
   1105     PyObject* tag;
   1106     PyObject* attrib;
   1107     if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
   1108         return NULL;
   1109 
   1110     attrib = PyDict_Copy(attrib);
   1111     if (!attrib)
   1112         return NULL;
   1113 
   1114     elem = element_new(tag, attrib);
   1115 
   1116     Py_DECREF(attrib);
   1117 
   1118     return elem;
   1119 }
   1120 
   1121 static PyObject*
   1122 element_reduce(ElementObject* self, PyObject* args)
   1123 {
   1124     if (!PyArg_ParseTuple(args, ":__reduce__"))
   1125         return NULL;
   1126 
   1127     /* Hack alert: This method is used to work around a __copy__
   1128        problem on certain 2.3 and 2.4 versions.  To save time and
   1129        simplify the code, we create the copy in here, and use a dummy
   1130        copyelement helper to trick the copy module into doing the
   1131        right thing. */
   1132 
   1133     if (!elementtree_copyelement_obj) {
   1134         PyErr_SetString(
   1135             PyExc_RuntimeError,
   1136             "copyelement helper not found"
   1137             );
   1138         return NULL;
   1139     }
   1140 
   1141     return Py_BuildValue(
   1142         "O(N)", elementtree_copyelement_obj, element_copy(self, args)
   1143         );
   1144 }
   1145 
   1146 static PyObject*
   1147 element_remove(ElementObject* self, PyObject* args)
   1148 {
   1149     int i;
   1150 
   1151     PyObject* element;
   1152     if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
   1153         return NULL;
   1154 
   1155     if (!self->extra) {
   1156         /* element has no children, so raise exception */
   1157         PyErr_SetString(
   1158             PyExc_ValueError,
   1159             "list.remove(x): x not in list"
   1160             );
   1161         return NULL;
   1162     }
   1163 
   1164     for (i = 0; i < self->extra->length; i++) {
   1165         if (self->extra->children[i] == element)
   1166             break;
   1167         if (PyObject_Compare(self->extra->children[i], element) == 0)
   1168             break;
   1169     }
   1170 
   1171     if (i == self->extra->length) {
   1172         /* element is not in children, so raise exception */
   1173         PyErr_SetString(
   1174             PyExc_ValueError,
   1175             "list.remove(x): x not in list"
   1176             );
   1177         return NULL;
   1178     }
   1179 
   1180     Py_DECREF(self->extra->children[i]);
   1181 
   1182     self->extra->length--;
   1183 
   1184     for (; i < self->extra->length; i++)
   1185         self->extra->children[i] = self->extra->children[i+1];
   1186 
   1187     Py_RETURN_NONE;
   1188 }
   1189 
   1190 static PyObject*
   1191 element_repr(ElementObject* self)
   1192 {
   1193     PyObject *repr, *tag;
   1194 
   1195     tag = PyObject_Repr(self->tag);
   1196     if (!tag)
   1197         return NULL;
   1198 
   1199     repr = PyString_FromFormat("<Element %s at %p>",
   1200                                PyString_AS_STRING(tag), self);
   1201 
   1202     Py_DECREF(tag);
   1203 
   1204     return repr;
   1205 }
   1206 
   1207 static PyObject*
   1208 element_set(ElementObject* self, PyObject* args)
   1209 {
   1210     PyObject* attrib;
   1211 
   1212     PyObject* key;
   1213     PyObject* value;
   1214     if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
   1215         return NULL;
   1216 
   1217     if (!self->extra)
   1218         element_new_extra(self, NULL);
   1219 
   1220     attrib = element_get_attrib(self);
   1221     if (!attrib)
   1222         return NULL;
   1223 
   1224     if (PyDict_SetItem(attrib, key, value) < 0)
   1225         return NULL;
   1226 
   1227     Py_RETURN_NONE;
   1228 }
   1229 
   1230 static int
   1231 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
   1232 {
   1233     ElementObject* self = (ElementObject*) self_;
   1234     int i;
   1235     PyObject* old;
   1236 
   1237     if (!self->extra || index < 0 || index >= self->extra->length) {
   1238         PyErr_SetString(
   1239             PyExc_IndexError,
   1240             "child assignment index out of range");
   1241         return -1;
   1242     }
   1243 
   1244     old = self->extra->children[index];
   1245 
   1246     if (item) {
   1247         Py_INCREF(item);
   1248         self->extra->children[index] = item;
   1249     } else {
   1250         self->extra->length--;
   1251         for (i = index; i < self->extra->length; i++)
   1252             self->extra->children[i] = self->extra->children[i+1];
   1253     }
   1254 
   1255     Py_DECREF(old);
   1256 
   1257     return 0;
   1258 }
   1259 
   1260 static PyObject*
   1261 element_subscr(PyObject* self_, PyObject* item)
   1262 {
   1263     ElementObject* self = (ElementObject*) self_;
   1264 
   1265 #if (PY_VERSION_HEX < 0x02050000)
   1266     if (PyInt_Check(item) || PyLong_Check(item)) {
   1267         long i = PyInt_AsLong(item);
   1268 #else
   1269     if (PyIndex_Check(item)) {
   1270         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1271 #endif
   1272 
   1273         if (i == -1 && PyErr_Occurred()) {
   1274             return NULL;
   1275         }
   1276         if (i < 0 && self->extra)
   1277             i += self->extra->length;
   1278         return element_getitem(self_, i);
   1279     }
   1280     else if (PySlice_Check(item)) {
   1281         Py_ssize_t start, stop, step, slicelen, cur, i;
   1282         PyObject* list;
   1283 
   1284         if (!self->extra)
   1285             return PyList_New(0);
   1286 
   1287         if (PySlice_GetIndicesEx((PySliceObject *)item,
   1288                 self->extra->length,
   1289                 &start, &stop, &step, &slicelen) < 0) {
   1290             return NULL;
   1291         }
   1292 
   1293         if (slicelen <= 0)
   1294             return PyList_New(0);
   1295         else {
   1296             list = PyList_New(slicelen);
   1297             if (!list)
   1298                 return NULL;
   1299 
   1300             for (cur = start, i = 0; i < slicelen;
   1301                  cur += step, i++) {
   1302                 PyObject* item = self->extra->children[cur];
   1303                 Py_INCREF(item);
   1304                 PyList_SET_ITEM(list, i, item);
   1305             }
   1306 
   1307             return list;
   1308         }
   1309     }
   1310     else {
   1311         PyErr_SetString(PyExc_TypeError,
   1312                 "element indices must be integers");
   1313         return NULL;
   1314     }
   1315 }
   1316 
   1317 static int
   1318 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
   1319 {
   1320     ElementObject* self = (ElementObject*) self_;
   1321 
   1322 #if (PY_VERSION_HEX < 0x02050000)
   1323     if (PyInt_Check(item) || PyLong_Check(item)) {
   1324         long i = PyInt_AsLong(item);
   1325 #else
   1326     if (PyIndex_Check(item)) {
   1327         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1328 #endif
   1329 
   1330         if (i == -1 && PyErr_Occurred()) {
   1331             return -1;
   1332         }
   1333         if (i < 0 && self->extra)
   1334             i += self->extra->length;
   1335         return element_setitem(self_, i, value);
   1336     }
   1337     else if (PySlice_Check(item)) {
   1338         Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
   1339 
   1340         PyObject* recycle = NULL;
   1341         PyObject* seq = NULL;
   1342 
   1343         if (!self->extra)
   1344             element_new_extra(self, NULL);
   1345 
   1346         if (PySlice_GetIndicesEx((PySliceObject *)item,
   1347                 self->extra->length,
   1348                 &start, &stop, &step, &slicelen) < 0) {
   1349             return -1;
   1350         }
   1351 
   1352         if (value == NULL)
   1353             newlen = 0;
   1354         else {
   1355             seq = PySequence_Fast(value, "");
   1356             if (!seq) {
   1357                 PyErr_Format(
   1358                     PyExc_TypeError,
   1359                     "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
   1360                     );
   1361                 return -1;
   1362             }
   1363             newlen = PySequence_Size(seq);
   1364         }
   1365 
   1366         if (step !=  1 && newlen != slicelen)
   1367         {
   1368             PyErr_Format(PyExc_ValueError,
   1369 #if (PY_VERSION_HEX < 0x02050000)
   1370                 "attempt to assign sequence of size %d "
   1371                 "to extended slice of size %d",
   1372 #else
   1373                 "attempt to assign sequence of size %zd "
   1374                 "to extended slice of size %zd",
   1375 #endif
   1376                 newlen, slicelen
   1377                 );
   1378             return -1;
   1379         }
   1380 
   1381 
   1382         /* Resize before creating the recycle bin, to prevent refleaks. */
   1383         if (newlen > slicelen) {
   1384             if (element_resize(self, newlen - slicelen) < 0) {
   1385                 if (seq) {
   1386                     Py_DECREF(seq);
   1387                 }
   1388                 return -1;
   1389             }
   1390         }
   1391 
   1392         if (slicelen > 0) {
   1393             /* to avoid recursive calls to this method (via decref), move
   1394                old items to the recycle bin here, and get rid of them when
   1395                we're done modifying the element */
   1396             recycle = PyList_New(slicelen);
   1397             if (!recycle) {
   1398                 if (seq) {
   1399                     Py_DECREF(seq);
   1400                 }
   1401                 return -1;
   1402             }
   1403             for (cur = start, i = 0; i < slicelen;
   1404                  cur += step, i++)
   1405                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
   1406         }
   1407 
   1408         if (newlen < slicelen) {
   1409             /* delete slice */
   1410             for (i = stop; i < self->extra->length; i++)
   1411                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
   1412         } else if (newlen > slicelen) {
   1413             /* insert slice */
   1414             for (i = self->extra->length-1; i >= stop; i--)
   1415                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
   1416         }
   1417 
   1418         /* replace the slice */
   1419         for (cur = start, i = 0; i < newlen;
   1420              cur += step, i++) {
   1421             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
   1422             Py_INCREF(element);
   1423             self->extra->children[cur] = element;
   1424         }
   1425 
   1426         self->extra->length += newlen - slicelen;
   1427 
   1428         if (seq) {
   1429             Py_DECREF(seq);
   1430         }
   1431 
   1432         /* discard the recycle bin, and everything in it */
   1433         Py_XDECREF(recycle);
   1434 
   1435         return 0;
   1436     }
   1437     else {
   1438         PyErr_SetString(PyExc_TypeError,
   1439                 "element indices must be integers");
   1440         return -1;
   1441     }
   1442 }
   1443 
   1444 static PyMethodDef element_methods[] = {
   1445 
   1446     {"clear", (PyCFunction) element_clear, METH_VARARGS},
   1447 
   1448     {"get", (PyCFunction) element_get, METH_VARARGS},
   1449     {"set", (PyCFunction) element_set, METH_VARARGS},
   1450 
   1451     {"find", (PyCFunction) element_find, METH_VARARGS},
   1452     {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
   1453     {"findall", (PyCFunction) element_findall, METH_VARARGS},
   1454 
   1455     {"append", (PyCFunction) element_append, METH_VARARGS},
   1456     {"extend", (PyCFunction) element_extend, METH_VARARGS},
   1457     {"insert", (PyCFunction) element_insert, METH_VARARGS},
   1458     {"remove", (PyCFunction) element_remove, METH_VARARGS},
   1459 
   1460     {"iter", (PyCFunction) element_iter, METH_VARARGS},
   1461     {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
   1462     {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
   1463 
   1464     {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
   1465     {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
   1466 
   1467     {"items", (PyCFunction) element_items, METH_VARARGS},
   1468     {"keys", (PyCFunction) element_keys, METH_VARARGS},
   1469 
   1470     {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
   1471 
   1472     {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
   1473     {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
   1474 
   1475     /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
   1476        C objects correctly, so we have to fake it using a __reduce__-
   1477        based hack (see the element_reduce implementation above for
   1478        details). */
   1479 
   1480     /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
   1481        using a runtime test to figure out if we need to fake things
   1482        or now (see the init code below).  The following entry is
   1483        enabled only if the hack is needed. */
   1484 
   1485     {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
   1486 
   1487     {NULL, NULL}
   1488 };
   1489 
   1490 static PyObject*
   1491 element_getattr(ElementObject* self, char* name)
   1492 {
   1493     PyObject* res;
   1494 
   1495     /* handle common attributes first */
   1496     if (strcmp(name, "tag") == 0) {
   1497         res = self->tag;
   1498         Py_INCREF(res);
   1499         return res;
   1500     } else if (strcmp(name, "text") == 0) {
   1501         res = element_get_text(self);
   1502         Py_INCREF(res);
   1503         return res;
   1504     }
   1505 
   1506     /* methods */
   1507     res = Py_FindMethod(element_methods, (PyObject*) self, name);
   1508     if (res)
   1509         return res;
   1510 
   1511     PyErr_Clear();
   1512 
   1513     /* less common attributes */
   1514     if (strcmp(name, "tail") == 0) {
   1515         res = element_get_tail(self);
   1516     } else if (strcmp(name, "attrib") == 0) {
   1517         if (!self->extra)
   1518             element_new_extra(self, NULL);
   1519         res = element_get_attrib(self);
   1520     } else {
   1521         PyErr_SetString(PyExc_AttributeError, name);
   1522         return NULL;
   1523     }
   1524 
   1525     if (!res)
   1526         return NULL;
   1527 
   1528     Py_INCREF(res);
   1529     return res;
   1530 }
   1531 
   1532 static int
   1533 element_setattr(ElementObject* self, const char* name, PyObject* value)
   1534 {
   1535     if (value == NULL) {
   1536         PyErr_SetString(
   1537             PyExc_AttributeError,
   1538             "can't delete element attributes"
   1539             );
   1540         return -1;
   1541     }
   1542 
   1543     if (strcmp(name, "tag") == 0) {
   1544         Py_DECREF(self->tag);
   1545         self->tag = value;
   1546         Py_INCREF(self->tag);
   1547     } else if (strcmp(name, "text") == 0) {
   1548         Py_DECREF(JOIN_OBJ(self->text));
   1549         self->text = value;
   1550         Py_INCREF(self->text);
   1551     } else if (strcmp(name, "tail") == 0) {
   1552         Py_DECREF(JOIN_OBJ(self->tail));
   1553         self->tail = value;
   1554         Py_INCREF(self->tail);
   1555     } else if (strcmp(name, "attrib") == 0) {
   1556         if (!self->extra)
   1557             element_new_extra(self, NULL);
   1558         Py_DECREF(self->extra->attrib);
   1559         self->extra->attrib = value;
   1560         Py_INCREF(self->extra->attrib);
   1561     } else {
   1562         PyErr_SetString(PyExc_AttributeError, name);
   1563         return -1;
   1564     }
   1565 
   1566     return 0;
   1567 }
   1568 
   1569 static PySequenceMethods element_as_sequence = {
   1570     (lenfunc) element_length,
   1571     0, /* sq_concat */
   1572     0, /* sq_repeat */
   1573     element_getitem,
   1574     0,
   1575     element_setitem,
   1576     0,
   1577 };
   1578 
   1579 static PyMappingMethods element_as_mapping = {
   1580     (lenfunc) element_length,
   1581     (binaryfunc) element_subscr,
   1582     (objobjargproc) element_ass_subscr,
   1583 };
   1584 
   1585 statichere PyTypeObject Element_Type = {
   1586     PyObject_HEAD_INIT(NULL)
   1587     0, "Element", sizeof(ElementObject), 0,
   1588     /* methods */
   1589     (destructor)element_dealloc, /* tp_dealloc */
   1590     0, /* tp_print */
   1591     (getattrfunc)element_getattr, /* tp_getattr */
   1592     (setattrfunc)element_setattr, /* tp_setattr */
   1593     0, /* tp_compare */
   1594     (reprfunc)element_repr, /* tp_repr */
   1595     0, /* tp_as_number */
   1596     &element_as_sequence, /* tp_as_sequence */
   1597     &element_as_mapping, /* tp_as_mapping */
   1598 };
   1599 
   1600 /* ==================================================================== */
   1601 /* the tree builder type */
   1602 
   1603 typedef struct {
   1604     PyObject_HEAD
   1605 
   1606     PyObject* root; /* root node (first created node) */
   1607 
   1608     ElementObject* this; /* current node */
   1609     ElementObject* last; /* most recently created node */
   1610 
   1611     PyObject* data; /* data collector (string or list), or NULL */
   1612 
   1613     PyObject* stack; /* element stack */
   1614     Py_ssize_t index; /* current stack size (0=empty) */
   1615 
   1616     /* element tracing */
   1617     PyObject* events; /* list of events, or NULL if not collecting */
   1618     PyObject* start_event_obj; /* event objects (NULL to ignore) */
   1619     PyObject* end_event_obj;
   1620     PyObject* start_ns_event_obj;
   1621     PyObject* end_ns_event_obj;
   1622 
   1623 } TreeBuilderObject;
   1624 
   1625 staticforward PyTypeObject TreeBuilder_Type;
   1626 
   1627 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
   1628 
   1629 /* -------------------------------------------------------------------- */
   1630 /* constructor and destructor */
   1631 
   1632 LOCAL(PyObject*)
   1633 treebuilder_new(void)
   1634 {
   1635     TreeBuilderObject* self;
   1636 
   1637     self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
   1638     if (self == NULL)
   1639         return NULL;
   1640 
   1641     self->root = NULL;
   1642 
   1643     Py_INCREF(Py_None);
   1644     self->this = (ElementObject*) Py_None;
   1645 
   1646     Py_INCREF(Py_None);
   1647     self->last = (ElementObject*) Py_None;
   1648 
   1649     self->data = NULL;
   1650 
   1651     self->stack = PyList_New(20);
   1652     self->index = 0;
   1653 
   1654     self->events = NULL;
   1655     self->start_event_obj = self->end_event_obj = NULL;
   1656     self->start_ns_event_obj = self->end_ns_event_obj = NULL;
   1657 
   1658     ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
   1659 
   1660     return (PyObject*) self;
   1661 }
   1662 
   1663 static PyObject*
   1664 treebuilder(PyObject* self_, PyObject* args)
   1665 {
   1666     if (!PyArg_ParseTuple(args, ":TreeBuilder"))
   1667         return NULL;
   1668 
   1669     return treebuilder_new();
   1670 }
   1671 
   1672 static void
   1673 treebuilder_dealloc(TreeBuilderObject* self)
   1674 {
   1675     Py_XDECREF(self->end_ns_event_obj);
   1676     Py_XDECREF(self->start_ns_event_obj);
   1677     Py_XDECREF(self->end_event_obj);
   1678     Py_XDECREF(self->start_event_obj);
   1679     Py_XDECREF(self->events);
   1680     Py_DECREF(self->stack);
   1681     Py_XDECREF(self->data);
   1682     Py_DECREF(self->last);
   1683     Py_DECREF(self->this);
   1684     Py_XDECREF(self->root);
   1685 
   1686     RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
   1687 
   1688     PyObject_Del(self);
   1689 }
   1690 
   1691 /* -------------------------------------------------------------------- */
   1692 /* handlers */
   1693 
   1694 LOCAL(PyObject*)
   1695 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
   1696                        PyObject* standalone)
   1697 {
   1698     Py_RETURN_NONE;
   1699 }
   1700 
   1701 LOCAL(PyObject*)
   1702 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
   1703                          PyObject* attrib)
   1704 {
   1705     PyObject* node;
   1706     PyObject* this;
   1707 
   1708     if (self->data) {
   1709         if (self->this == self->last) {
   1710             Py_DECREF(JOIN_OBJ(self->last->text));
   1711             self->last->text = JOIN_SET(
   1712                 self->data, PyList_CheckExact(self->data)
   1713                 );
   1714         } else {
   1715             Py_DECREF(JOIN_OBJ(self->last->tail));
   1716             self->last->tail = JOIN_SET(
   1717                 self->data, PyList_CheckExact(self->data)
   1718                 );
   1719         }
   1720         self->data = NULL;
   1721     }
   1722 
   1723     node = element_new(tag, attrib);
   1724     if (!node)
   1725         return NULL;
   1726 
   1727     this = (PyObject*) self->this;
   1728 
   1729     if (this != Py_None) {
   1730         if (element_add_subelement((ElementObject*) this, node) < 0)
   1731             goto error;
   1732     } else {
   1733         if (self->root) {
   1734             PyErr_SetString(
   1735                 elementtree_parseerror_obj,
   1736                 "multiple elements on top level"
   1737                 );
   1738             goto error;
   1739         }
   1740         Py_INCREF(node);
   1741         self->root = node;
   1742     }
   1743 
   1744     if (self->index < PyList_GET_SIZE(self->stack)) {
   1745         if (PyList_SetItem(self->stack, self->index, this) < 0)
   1746             goto error;
   1747         Py_INCREF(this);
   1748     } else {
   1749         if (PyList_Append(self->stack, this) < 0)
   1750             goto error;
   1751     }
   1752     self->index++;
   1753 
   1754     Py_DECREF(this);
   1755     Py_INCREF(node);
   1756     self->this = (ElementObject*) node;
   1757 
   1758     Py_DECREF(self->last);
   1759     Py_INCREF(node);
   1760     self->last = (ElementObject*) node;
   1761 
   1762     if (self->start_event_obj) {
   1763         PyObject* res;
   1764         PyObject* action = self->start_event_obj;
   1765         res = PyTuple_New(2);
   1766         if (res) {
   1767             Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
   1768             Py_INCREF(node);   PyTuple_SET_ITEM(res, 1, (PyObject*) node);
   1769             PyList_Append(self->events, res);
   1770             Py_DECREF(res);
   1771         } else
   1772             PyErr_Clear(); /* FIXME: propagate error */
   1773     }
   1774 
   1775     return node;
   1776 
   1777   error:
   1778     Py_DECREF(node);
   1779     return NULL;
   1780 }
   1781 
   1782 LOCAL(PyObject*)
   1783 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
   1784 {
   1785     if (!self->data) {
   1786         if (self->last == (ElementObject*) Py_None) {
   1787             /* ignore calls to data before the first call to start */
   1788             Py_RETURN_NONE;
   1789         }
   1790         /* store the first item as is */
   1791         Py_INCREF(data); self->data = data;
   1792     } else {
   1793         /* more than one item; use a list to collect items */
   1794         if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
   1795             PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
   1796             /* expat often generates single character data sections; handle
   1797                the most common case by resizing the existing string... */
   1798             Py_ssize_t size = PyString_GET_SIZE(self->data);
   1799             if (_PyString_Resize(&self->data, size + 1) < 0)
   1800                 return NULL;
   1801             PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
   1802         } else if (PyList_CheckExact(self->data)) {
   1803             if (PyList_Append(self->data, data) < 0)
   1804                 return NULL;
   1805         } else {
   1806             PyObject* list = PyList_New(2);
   1807             if (!list)
   1808                 return NULL;
   1809             PyList_SET_ITEM(list, 0, self->data);
   1810             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
   1811             self->data = list;
   1812         }
   1813     }
   1814 
   1815     Py_RETURN_NONE;
   1816 }
   1817 
   1818 LOCAL(PyObject*)
   1819 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
   1820 {
   1821     PyObject* item;
   1822 
   1823     if (self->data) {
   1824         if (self->this == self->last) {
   1825             Py_DECREF(JOIN_OBJ(self->last->text));
   1826             self->last->text = JOIN_SET(
   1827                 self->data, PyList_CheckExact(self->data)
   1828                 );
   1829         } else {
   1830             Py_DECREF(JOIN_OBJ(self->last->tail));
   1831             self->last->tail = JOIN_SET(
   1832                 self->data, PyList_CheckExact(self->data)
   1833                 );
   1834         }
   1835         self->data = NULL;
   1836     }
   1837 
   1838     if (self->index == 0) {
   1839         PyErr_SetString(
   1840             PyExc_IndexError,
   1841             "pop from empty stack"
   1842             );
   1843         return NULL;
   1844     }
   1845 
   1846     self->index--;
   1847 
   1848     item = PyList_GET_ITEM(self->stack, self->index);
   1849     Py_INCREF(item);
   1850 
   1851     Py_DECREF(self->last);
   1852 
   1853     self->last = (ElementObject*) self->this;
   1854     self->this = (ElementObject*) item;
   1855 
   1856     if (self->end_event_obj) {
   1857         PyObject* res;
   1858         PyObject* action = self->end_event_obj;
   1859         PyObject* node = (PyObject*) self->last;
   1860         res = PyTuple_New(2);
   1861         if (res) {
   1862             Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
   1863             Py_INCREF(node);   PyTuple_SET_ITEM(res, 1, (PyObject*) node);
   1864             PyList_Append(self->events, res);
   1865             Py_DECREF(res);
   1866         } else
   1867             PyErr_Clear(); /* FIXME: propagate error */
   1868     }
   1869 
   1870     Py_INCREF(self->last);
   1871     return (PyObject*) self->last;
   1872 }
   1873 
   1874 LOCAL(void)
   1875 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
   1876                              PyObject *prefix, PyObject *uri)
   1877 {
   1878     PyObject* res;
   1879     PyObject* action;
   1880     PyObject* parcel;
   1881 
   1882     if (!self->events)
   1883         return;
   1884 
   1885     if (start) {
   1886         if (!self->start_ns_event_obj)
   1887             return;
   1888         action = self->start_ns_event_obj;
   1889         parcel = Py_BuildValue("OO", prefix, uri);
   1890         if (!parcel)
   1891             return;
   1892         Py_INCREF(action);
   1893     } else {
   1894         if (!self->end_ns_event_obj)
   1895             return;
   1896         action = self->end_ns_event_obj;
   1897         Py_INCREF(action);
   1898         parcel = Py_None;
   1899         Py_INCREF(parcel);
   1900     }
   1901 
   1902     res = PyTuple_New(2);
   1903 
   1904     if (res) {
   1905         PyTuple_SET_ITEM(res, 0, action);
   1906         PyTuple_SET_ITEM(res, 1, parcel);
   1907         PyList_Append(self->events, res);
   1908         Py_DECREF(res);
   1909     } else
   1910         PyErr_Clear(); /* FIXME: propagate error */
   1911 }
   1912 
   1913 /* -------------------------------------------------------------------- */
   1914 /* methods (in alphabetical order) */
   1915 
   1916 static PyObject*
   1917 treebuilder_data(TreeBuilderObject* self, PyObject* args)
   1918 {
   1919     PyObject* data;
   1920     if (!PyArg_ParseTuple(args, "O:data", &data))
   1921         return NULL;
   1922 
   1923     return treebuilder_handle_data(self, data);
   1924 }
   1925 
   1926 static PyObject*
   1927 treebuilder_end(TreeBuilderObject* self, PyObject* args)
   1928 {
   1929     PyObject* tag;
   1930     if (!PyArg_ParseTuple(args, "O:end", &tag))
   1931         return NULL;
   1932 
   1933     return treebuilder_handle_end(self, tag);
   1934 }
   1935 
   1936 LOCAL(PyObject*)
   1937 treebuilder_done(TreeBuilderObject* self)
   1938 {
   1939     PyObject* res;
   1940 
   1941     /* FIXME: check stack size? */
   1942 
   1943     if (self->root)
   1944         res = self->root;
   1945     else
   1946         res = Py_None;
   1947 
   1948     Py_INCREF(res);
   1949     return res;
   1950 }
   1951 
   1952 static PyObject*
   1953 treebuilder_close(TreeBuilderObject* self, PyObject* args)
   1954 {
   1955     if (!PyArg_ParseTuple(args, ":close"))
   1956         return NULL;
   1957 
   1958     return treebuilder_done(self);
   1959 }
   1960 
   1961 static PyObject*
   1962 treebuilder_start(TreeBuilderObject* self, PyObject* args)
   1963 {
   1964     PyObject* tag;
   1965     PyObject* attrib = Py_None;
   1966     if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
   1967         return NULL;
   1968 
   1969     return treebuilder_handle_start(self, tag, attrib);
   1970 }
   1971 
   1972 static PyObject*
   1973 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
   1974 {
   1975     PyObject* encoding;
   1976     PyObject* standalone;
   1977     if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
   1978         return NULL;
   1979 
   1980     return treebuilder_handle_xml(self, encoding, standalone);
   1981 }
   1982 
   1983 static PyMethodDef treebuilder_methods[] = {
   1984     {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
   1985     {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
   1986     {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
   1987     {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
   1988     {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
   1989     {NULL, NULL}
   1990 };
   1991 
   1992 static PyObject*
   1993 treebuilder_getattr(TreeBuilderObject* self, char* name)
   1994 {
   1995     return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
   1996 }
   1997 
   1998 statichere PyTypeObject TreeBuilder_Type = {
   1999     PyObject_HEAD_INIT(NULL)
   2000     0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
   2001     /* methods */
   2002     (destructor)treebuilder_dealloc, /* tp_dealloc */
   2003     0, /* tp_print */
   2004     (getattrfunc)treebuilder_getattr, /* tp_getattr */
   2005 };
   2006 
   2007 /* ==================================================================== */
   2008 /* the expat interface */
   2009 
   2010 #if defined(USE_EXPAT)
   2011 
   2012 #include "expat.h"
   2013 
   2014 #if defined(USE_PYEXPAT_CAPI)
   2015 #include "pyexpat.h"
   2016 static struct PyExpat_CAPI* expat_capi;
   2017 #define EXPAT(func) (expat_capi->func)
   2018 #else
   2019 #define EXPAT(func) (XML_##func)
   2020 #endif
   2021 
   2022 typedef struct {
   2023     PyObject_HEAD
   2024 
   2025     XML_Parser parser;
   2026 
   2027     PyObject* target;
   2028     PyObject* entity;
   2029 
   2030     PyObject* names;
   2031 
   2032     PyObject* handle_xml;
   2033 
   2034     PyObject* handle_start;
   2035     PyObject* handle_data;
   2036     PyObject* handle_end;
   2037 
   2038     PyObject* handle_comment;
   2039     PyObject* handle_pi;
   2040 
   2041     PyObject* handle_close;
   2042 
   2043 } XMLParserObject;
   2044 
   2045 staticforward PyTypeObject XMLParser_Type;
   2046 
   2047 /* helpers */
   2048 
   2049 #if defined(Py_USING_UNICODE)
   2050 LOCAL(int)
   2051 checkstring(const char* string, int size)
   2052 {
   2053     int i;
   2054 
   2055     /* check if an 8-bit string contains UTF-8 characters */
   2056     for (i = 0; i < size; i++)
   2057         if (string[i] & 0x80)
   2058             return 1;
   2059 
   2060     return 0;
   2061 }
   2062 #endif
   2063 
   2064 LOCAL(PyObject*)
   2065 makestring(const char* string, int size)
   2066 {
   2067     /* convert a UTF-8 string to either a 7-bit ascii string or a
   2068        Unicode string */
   2069 
   2070 #if defined(Py_USING_UNICODE)
   2071     if (checkstring(string, size))
   2072         return PyUnicode_DecodeUTF8(string, size, "strict");
   2073 #endif
   2074 
   2075     return PyString_FromStringAndSize(string, size);
   2076 }
   2077 
   2078 LOCAL(PyObject*)
   2079 makeuniversal(XMLParserObject* self, const char* string)
   2080 {
   2081     /* convert a UTF-8 tag/attribute name from the expat parser
   2082        to a universal name string */
   2083 
   2084     int size = strlen(string);
   2085     PyObject* key;
   2086     PyObject* value;
   2087 
   2088     /* look the 'raw' name up in the names dictionary */
   2089     key = PyString_FromStringAndSize(string, size);
   2090     if (!key)
   2091         return NULL;
   2092 
   2093     value = PyDict_GetItem(self->names, key);
   2094 
   2095     if (value) {
   2096         Py_INCREF(value);
   2097     } else {
   2098         /* new name.  convert to universal name, and decode as
   2099            necessary */
   2100 
   2101         PyObject* tag;
   2102         char* p;
   2103         int i;
   2104 
   2105         /* look for namespace separator */
   2106         for (i = 0; i < size; i++)
   2107             if (string[i] == '}')
   2108                 break;
   2109         if (i != size) {
   2110             /* convert to universal name */
   2111             tag = PyString_FromStringAndSize(NULL, size+1);
   2112             p = PyString_AS_STRING(tag);
   2113             p[0] = '{';
   2114             memcpy(p+1, string, size);
   2115             size++;
   2116         } else {
   2117             /* plain name; use key as tag */
   2118             Py_INCREF(key);
   2119             tag = key;
   2120         }
   2121 
   2122         /* decode universal name */
   2123 #if defined(Py_USING_UNICODE)
   2124         /* inline makestring, to avoid duplicating the source string if
   2125            it's not an utf-8 string */
   2126         p = PyString_AS_STRING(tag);
   2127         if (checkstring(p, size)) {
   2128             value = PyUnicode_DecodeUTF8(p, size, "strict");
   2129             Py_DECREF(tag);
   2130             if (!value) {
   2131                 Py_DECREF(key);
   2132                 return NULL;
   2133             }
   2134         } else
   2135 #endif
   2136             value = tag; /* use tag as is */
   2137 
   2138         /* add to names dictionary */
   2139         if (PyDict_SetItem(self->names, key, value) < 0) {
   2140             Py_DECREF(key);
   2141             Py_DECREF(value);
   2142             return NULL;
   2143         }
   2144     }
   2145 
   2146     Py_DECREF(key);
   2147     return value;
   2148 }
   2149 
   2150 static void
   2151 expat_set_error(const char* message, int line, int column)
   2152 {
   2153     PyObject *error;
   2154     PyObject *position;
   2155     char buffer[256];
   2156 
   2157     sprintf(buffer, "%s: line %d, column %d", message, line, column);
   2158 
   2159     error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
   2160     if (!error)
   2161         return;
   2162 
   2163     /* add position attribute */
   2164     position = Py_BuildValue("(ii)", line, column);
   2165     if (!position) {
   2166         Py_DECREF(error);
   2167         return;
   2168     }
   2169     if (PyObject_SetAttrString(error, "position", position) == -1) {
   2170         Py_DECREF(error);
   2171         Py_DECREF(position);
   2172         return;
   2173     }
   2174     Py_DECREF(position);
   2175 
   2176     PyErr_SetObject(elementtree_parseerror_obj, error);
   2177     Py_DECREF(error);
   2178 }
   2179 
   2180 /* -------------------------------------------------------------------- */
   2181 /* handlers */
   2182 
   2183 static void
   2184 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
   2185                       int data_len)
   2186 {
   2187     PyObject* key;
   2188     PyObject* value;
   2189     PyObject* res;
   2190 
   2191     if (data_len < 2 || data_in[0] != '&')
   2192         return;
   2193 
   2194     key = makestring(data_in + 1, data_len - 2);
   2195     if (!key)
   2196         return;
   2197 
   2198     value = PyDict_GetItem(self->entity, key);
   2199 
   2200     if (value) {
   2201         if (TreeBuilder_CheckExact(self->target))
   2202             res = treebuilder_handle_data(
   2203                 (TreeBuilderObject*) self->target, value
   2204                 );
   2205         else if (self->handle_data)
   2206             res = PyObject_CallFunction(self->handle_data, "O", value);
   2207         else
   2208             res = NULL;
   2209         Py_XDECREF(res);
   2210     } else if (!PyErr_Occurred()) {
   2211         /* Report the first error, not the last */
   2212         char message[128];
   2213         sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
   2214         expat_set_error(
   2215             message,
   2216             EXPAT(GetErrorLineNumber)(self->parser),
   2217             EXPAT(GetErrorColumnNumber)(self->parser)
   2218             );
   2219     }
   2220 
   2221     Py_DECREF(key);
   2222 }
   2223 
   2224 static void
   2225 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
   2226                     const XML_Char **attrib_in)
   2227 {
   2228     PyObject* res;
   2229     PyObject* tag;
   2230     PyObject* attrib;
   2231     int ok;
   2232 
   2233     /* tag name */
   2234     tag = makeuniversal(self, tag_in);
   2235     if (!tag)
   2236         return; /* parser will look for errors */
   2237 
   2238     /* attributes */
   2239     if (attrib_in[0]) {
   2240         attrib = PyDict_New();
   2241         if (!attrib)
   2242             return;
   2243         while (attrib_in[0] && attrib_in[1]) {
   2244             PyObject* key = makeuniversal(self, attrib_in[0]);
   2245             PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
   2246             if (!key || !value) {
   2247                 Py_XDECREF(value);
   2248                 Py_XDECREF(key);
   2249                 Py_DECREF(attrib);
   2250                 return;
   2251             }
   2252             ok = PyDict_SetItem(attrib, key, value);
   2253             Py_DECREF(value);
   2254             Py_DECREF(key);
   2255             if (ok < 0) {
   2256                 Py_DECREF(attrib);
   2257                 return;
   2258             }
   2259             attrib_in += 2;
   2260         }
   2261     } else {
   2262         Py_INCREF(Py_None);
   2263         attrib = Py_None;
   2264     }
   2265 
   2266     if (TreeBuilder_CheckExact(self->target))
   2267         /* shortcut */
   2268         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
   2269                                        tag, attrib);
   2270     else if (self->handle_start) {
   2271         if (attrib == Py_None) {
   2272             Py_DECREF(attrib);
   2273             attrib = PyDict_New();
   2274             if (!attrib)
   2275                 return;
   2276         }
   2277         res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
   2278     } else
   2279         res = NULL;
   2280 
   2281     Py_DECREF(tag);
   2282     Py_DECREF(attrib);
   2283 
   2284     Py_XDECREF(res);
   2285 }
   2286 
   2287 static void
   2288 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
   2289                    int data_len)
   2290 {
   2291     PyObject* data;
   2292     PyObject* res;
   2293 
   2294     data = makestring(data_in, data_len);
   2295     if (!data)
   2296         return; /* parser will look for errors */
   2297 
   2298     if (TreeBuilder_CheckExact(self->target))
   2299         /* shortcut */
   2300         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
   2301     else if (self->handle_data)
   2302         res = PyObject_CallFunction(self->handle_data, "O", data);
   2303     else
   2304         res = NULL;
   2305 
   2306     Py_DECREF(data);
   2307 
   2308     Py_XDECREF(res);
   2309 }
   2310 
   2311 static void
   2312 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
   2313 {
   2314     PyObject* tag;
   2315     PyObject* res = NULL;
   2316 
   2317     if (TreeBuilder_CheckExact(self->target))
   2318         /* shortcut */
   2319         /* the standard tree builder doesn't look at the end tag */
   2320         res = treebuilder_handle_end(
   2321             (TreeBuilderObject*) self->target, Py_None
   2322             );
   2323     else if (self->handle_end) {
   2324         tag = makeuniversal(self, tag_in);
   2325         if (tag) {
   2326             res = PyObject_CallFunction(self->handle_end, "O", tag);
   2327             Py_DECREF(tag);
   2328         }
   2329     }
   2330 
   2331     Py_XDECREF(res);
   2332 }
   2333 
   2334 static void
   2335 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
   2336                        const XML_Char *uri)
   2337 {
   2338     PyObject* sprefix = NULL;
   2339     PyObject* suri = NULL;
   2340 
   2341     suri = makestring(uri, strlen(uri));
   2342     if (!suri)
   2343         return;
   2344 
   2345     if (prefix)
   2346         sprefix = makestring(prefix, strlen(prefix));
   2347     else
   2348         sprefix = PyString_FromStringAndSize("", 0);
   2349     if (!sprefix) {
   2350         Py_DECREF(suri);
   2351         return;
   2352     }
   2353 
   2354     treebuilder_handle_namespace(
   2355         (TreeBuilderObject*) self->target, 1, sprefix, suri
   2356         );
   2357 
   2358     Py_DECREF(sprefix);
   2359     Py_DECREF(suri);
   2360 }
   2361 
   2362 static void
   2363 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
   2364 {
   2365     treebuilder_handle_namespace(
   2366         (TreeBuilderObject*) self->target, 0, NULL, NULL
   2367         );
   2368 }
   2369 
   2370 static void
   2371 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
   2372 {
   2373     PyObject* comment;
   2374     PyObject* res;
   2375 
   2376     if (self->handle_comment) {
   2377         comment = makestring(comment_in, strlen(comment_in));
   2378         if (comment) {
   2379             res = PyObject_CallFunction(self->handle_comment, "O", comment);
   2380             Py_XDECREF(res);
   2381             Py_DECREF(comment);
   2382         }
   2383     }
   2384 }
   2385 
   2386 static void
   2387 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
   2388                  const XML_Char* data_in)
   2389 {
   2390     PyObject* target;
   2391     PyObject* data;
   2392     PyObject* res;
   2393 
   2394     if (self->handle_pi) {
   2395         target = makestring(target_in, strlen(target_in));
   2396         data = makestring(data_in, strlen(data_in));
   2397         if (target && data) {
   2398             res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
   2399             Py_XDECREF(res);
   2400             Py_DECREF(data);
   2401             Py_DECREF(target);
   2402         } else {
   2403             Py_XDECREF(data);
   2404             Py_XDECREF(target);
   2405         }
   2406     }
   2407 }
   2408 
   2409 #if defined(Py_USING_UNICODE)
   2410 static int
   2411 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
   2412                                XML_Encoding *info)
   2413 {
   2414     PyObject* u;
   2415     Py_UNICODE* p;
   2416     unsigned char s[256];
   2417     int i;
   2418 
   2419     memset(info, 0, sizeof(XML_Encoding));
   2420 
   2421     for (i = 0; i < 256; i++)
   2422         s[i] = i;
   2423 
   2424     u = PyUnicode_Decode((char*) s, 256, name, "replace");
   2425     if (!u)
   2426         return XML_STATUS_ERROR;
   2427 
   2428     if (PyUnicode_GET_SIZE(u) != 256) {
   2429         Py_DECREF(u);
   2430         return XML_STATUS_ERROR;
   2431     }
   2432 
   2433     p = PyUnicode_AS_UNICODE(u);
   2434 
   2435     for (i = 0; i < 256; i++) {
   2436         if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
   2437             info->map[i] = p[i];
   2438         else
   2439             info->map[i] = -1;
   2440     }
   2441 
   2442     Py_DECREF(u);
   2443 
   2444     return XML_STATUS_OK;
   2445 }
   2446 #endif
   2447 
   2448 /* -------------------------------------------------------------------- */
   2449 /* constructor and destructor */
   2450 
   2451 static PyObject*
   2452 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
   2453 {
   2454     XMLParserObject* self;
   2455     /* FIXME: does this need to be static? */
   2456     static XML_Memory_Handling_Suite memory_handler;
   2457 
   2458     PyObject* target = NULL;
   2459     char* encoding = NULL;
   2460     static char* kwlist[] = { "target", "encoding", NULL };
   2461     if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
   2462                                      &target, &encoding))
   2463         return NULL;
   2464 
   2465 #if defined(USE_PYEXPAT_CAPI)
   2466     if (!expat_capi) {
   2467         PyErr_SetString(
   2468             PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
   2469             );
   2470         return NULL;
   2471     }
   2472 #endif
   2473 
   2474     self = PyObject_New(XMLParserObject, &XMLParser_Type);
   2475     if (self == NULL)
   2476         return NULL;
   2477 
   2478     self->entity = PyDict_New();
   2479     if (!self->entity) {
   2480         PyObject_Del(self);
   2481         return NULL;
   2482     }
   2483 
   2484     self->names = PyDict_New();
   2485     if (!self->names) {
   2486         PyObject_Del(self->entity);
   2487         PyObject_Del(self);
   2488         return NULL;
   2489     }
   2490 
   2491     memory_handler.malloc_fcn = PyObject_Malloc;
   2492     memory_handler.realloc_fcn = PyObject_Realloc;
   2493     memory_handler.free_fcn = PyObject_Free;
   2494 
   2495     self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
   2496     if (!self->parser) {
   2497         PyObject_Del(self->names);
   2498         PyObject_Del(self->entity);
   2499         PyObject_Del(self);
   2500         PyErr_NoMemory();
   2501         return NULL;
   2502     }
   2503 
   2504     /* setup target handlers */
   2505     if (!target) {
   2506         target = treebuilder_new();
   2507         if (!target) {
   2508             EXPAT(ParserFree)(self->parser);
   2509             PyObject_Del(self->names);
   2510             PyObject_Del(self->entity);
   2511             PyObject_Del(self);
   2512             return NULL;
   2513         }
   2514     } else
   2515         Py_INCREF(target);
   2516     self->target = target;
   2517 
   2518     self->handle_xml = PyObject_GetAttrString(target, "xml");
   2519     self->handle_start = PyObject_GetAttrString(target, "start");
   2520     self->handle_data = PyObject_GetAttrString(target, "data");
   2521     self->handle_end = PyObject_GetAttrString(target, "end");
   2522     self->handle_comment = PyObject_GetAttrString(target, "comment");
   2523     self->handle_pi = PyObject_GetAttrString(target, "pi");
   2524     self->handle_close = PyObject_GetAttrString(target, "close");
   2525 
   2526     PyErr_Clear();
   2527 
   2528     /* configure parser */
   2529     EXPAT(SetUserData)(self->parser, self);
   2530     EXPAT(SetElementHandler)(
   2531         self->parser,
   2532         (XML_StartElementHandler) expat_start_handler,
   2533         (XML_EndElementHandler) expat_end_handler
   2534         );
   2535     EXPAT(SetDefaultHandlerExpand)(
   2536         self->parser,
   2537         (XML_DefaultHandler) expat_default_handler
   2538         );
   2539     EXPAT(SetCharacterDataHandler)(
   2540         self->parser,
   2541         (XML_CharacterDataHandler) expat_data_handler
   2542         );
   2543     if (self->handle_comment)
   2544         EXPAT(SetCommentHandler)(
   2545             self->parser,
   2546             (XML_CommentHandler) expat_comment_handler
   2547             );
   2548     if (self->handle_pi)
   2549         EXPAT(SetProcessingInstructionHandler)(
   2550             self->parser,
   2551             (XML_ProcessingInstructionHandler) expat_pi_handler
   2552             );
   2553 #if defined(Py_USING_UNICODE)
   2554     EXPAT(SetUnknownEncodingHandler)(
   2555         self->parser,
   2556         (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
   2557         );
   2558 #endif
   2559 
   2560     ALLOC(sizeof(XMLParserObject), "create expatparser");
   2561 
   2562     return (PyObject*) self;
   2563 }
   2564 
   2565 static void
   2566 xmlparser_dealloc(XMLParserObject* self)
   2567 {
   2568     EXPAT(ParserFree)(self->parser);
   2569 
   2570     Py_XDECREF(self->handle_close);
   2571     Py_XDECREF(self->handle_pi);
   2572     Py_XDECREF(self->handle_comment);
   2573     Py_XDECREF(self->handle_end);
   2574     Py_XDECREF(self->handle_data);
   2575     Py_XDECREF(self->handle_start);
   2576     Py_XDECREF(self->handle_xml);
   2577 
   2578     Py_DECREF(self->target);
   2579     Py_DECREF(self->entity);
   2580     Py_DECREF(self->names);
   2581 
   2582     RELEASE(sizeof(XMLParserObject), "destroy expatparser");
   2583 
   2584     PyObject_Del(self);
   2585 }
   2586 
   2587 /* -------------------------------------------------------------------- */
   2588 /* methods (in alphabetical order) */
   2589 
   2590 LOCAL(PyObject*)
   2591 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
   2592 {
   2593     int ok;
   2594 
   2595     ok = EXPAT(Parse)(self->parser, data, data_len, final);
   2596 
   2597     if (PyErr_Occurred())
   2598         return NULL;
   2599 
   2600     if (!ok) {
   2601         expat_set_error(
   2602             EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
   2603             EXPAT(GetErrorLineNumber)(self->parser),
   2604             EXPAT(GetErrorColumnNumber)(self->parser)
   2605             );
   2606         return NULL;
   2607     }
   2608 
   2609     Py_RETURN_NONE;
   2610 }
   2611 
   2612 static PyObject*
   2613 xmlparser_close(XMLParserObject* self, PyObject* args)
   2614 {
   2615     /* end feeding data to parser */
   2616 
   2617     PyObject* res;
   2618     if (!PyArg_ParseTuple(args, ":close"))
   2619         return NULL;
   2620 
   2621     res = expat_parse(self, "", 0, 1);
   2622     if (!res)
   2623         return NULL;
   2624 
   2625     if (TreeBuilder_CheckExact(self->target)) {
   2626         Py_DECREF(res);
   2627         return treebuilder_done((TreeBuilderObject*) self->target);
   2628     } if (self->handle_close) {
   2629         Py_DECREF(res);
   2630         return PyObject_CallFunction(self->handle_close, "");
   2631     } else
   2632         return res;
   2633 }
   2634 
   2635 static PyObject*
   2636 xmlparser_feed(XMLParserObject* self, PyObject* args)
   2637 {
   2638     /* feed data to parser */
   2639 
   2640     char* data;
   2641     int data_len;
   2642     if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
   2643         return NULL;
   2644 
   2645     return expat_parse(self, data, data_len, 0);
   2646 }
   2647 
   2648 static PyObject*
   2649 xmlparser_parse(XMLParserObject* self, PyObject* args)
   2650 {
   2651     /* (internal) parse until end of input stream */
   2652 
   2653     PyObject* reader;
   2654     PyObject* buffer;
   2655     PyObject* res;
   2656 
   2657     PyObject* fileobj;
   2658     if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
   2659         return NULL;
   2660 
   2661     reader = PyObject_GetAttrString(fileobj, "read");
   2662     if (!reader)
   2663         return NULL;
   2664 
   2665     /* read from open file object */
   2666     for (;;) {
   2667 
   2668         buffer = PyObject_CallFunction(reader, "i", 64*1024);
   2669 
   2670         if (!buffer) {
   2671             /* read failed (e.g. due to KeyboardInterrupt) */
   2672             Py_DECREF(reader);
   2673             return NULL;
   2674         }
   2675 
   2676         if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
   2677             Py_DECREF(buffer);
   2678             break;
   2679         }
   2680 
   2681         res = expat_parse(
   2682             self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
   2683             );
   2684 
   2685         Py_DECREF(buffer);
   2686 
   2687         if (!res) {
   2688             Py_DECREF(reader);
   2689             return NULL;
   2690         }
   2691         Py_DECREF(res);
   2692 
   2693     }
   2694 
   2695     Py_DECREF(reader);
   2696 
   2697     res = expat_parse(self, "", 0, 1);
   2698 
   2699     if (res && TreeBuilder_CheckExact(self->target)) {
   2700         Py_DECREF(res);
   2701         return treebuilder_done((TreeBuilderObject*) self->target);
   2702     }
   2703 
   2704     return res;
   2705 }
   2706 
   2707 static PyObject*
   2708 xmlparser_setevents(XMLParserObject* self, PyObject* args)
   2709 {
   2710     /* activate element event reporting */
   2711 
   2712     Py_ssize_t i;
   2713     TreeBuilderObject* target;
   2714 
   2715     PyObject* events; /* event collector */
   2716     PyObject* event_set = Py_None;
   2717     if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events,
   2718                           &event_set))
   2719         return NULL;
   2720 
   2721     if (!TreeBuilder_CheckExact(self->target)) {
   2722         PyErr_SetString(
   2723             PyExc_TypeError,
   2724             "event handling only supported for cElementTree.Treebuilder "
   2725             "targets"
   2726             );
   2727         return NULL;
   2728     }
   2729 
   2730     target = (TreeBuilderObject*) self->target;
   2731 
   2732     Py_INCREF(events);
   2733     Py_XDECREF(target->events);
   2734     target->events = events;
   2735 
   2736     /* clear out existing events */
   2737     Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
   2738     Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
   2739     Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
   2740     Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
   2741 
   2742     if (event_set == Py_None) {
   2743         /* default is "end" only */
   2744         target->end_event_obj = PyString_FromString("end");
   2745         Py_RETURN_NONE;
   2746     }
   2747 
   2748     if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
   2749         goto error;
   2750 
   2751     for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
   2752         PyObject* item = PyTuple_GET_ITEM(event_set, i);
   2753         char* event;
   2754         if (!PyString_Check(item))
   2755             goto error;
   2756         event = PyString_AS_STRING(item);
   2757         if (strcmp(event, "start") == 0) {
   2758             Py_INCREF(item);
   2759             target->start_event_obj = item;
   2760         } else if (strcmp(event, "end") == 0) {
   2761             Py_INCREF(item);
   2762             Py_XDECREF(target->end_event_obj);
   2763             target->end_event_obj = item;
   2764         } else if (strcmp(event, "start-ns") == 0) {
   2765             Py_INCREF(item);
   2766             Py_XDECREF(target->start_ns_event_obj);
   2767             target->start_ns_event_obj = item;
   2768             EXPAT(SetNamespaceDeclHandler)(
   2769                 self->parser,
   2770                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
   2771                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
   2772                 );
   2773         } else if (strcmp(event, "end-ns") == 0) {
   2774             Py_INCREF(item);
   2775             Py_XDECREF(target->end_ns_event_obj);
   2776             target->end_ns_event_obj = item;
   2777             EXPAT(SetNamespaceDeclHandler)(
   2778                 self->parser,
   2779                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
   2780                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
   2781                 );
   2782         } else {
   2783             PyErr_Format(
   2784                 PyExc_ValueError,
   2785                 "unknown event '%s'", event
   2786                 );
   2787             return NULL;
   2788         }
   2789     }
   2790 
   2791     Py_RETURN_NONE;
   2792 
   2793   error:
   2794     PyErr_SetString(
   2795         PyExc_TypeError,
   2796         "invalid event tuple"
   2797         );
   2798     return NULL;
   2799 }
   2800 
   2801 static PyMethodDef xmlparser_methods[] = {
   2802     {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
   2803     {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
   2804     {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
   2805     {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
   2806     {NULL, NULL}
   2807 };
   2808 
   2809 static PyObject*
   2810 xmlparser_getattr(XMLParserObject* self, char* name)
   2811 {
   2812     PyObject* res;
   2813 
   2814     res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
   2815     if (res)
   2816         return res;
   2817 
   2818     PyErr_Clear();
   2819 
   2820     if (strcmp(name, "entity") == 0)
   2821         res = self->entity;
   2822     else if (strcmp(name, "target") == 0)
   2823         res = self->target;
   2824     else if (strcmp(name, "version") == 0) {
   2825         char buffer[100];
   2826         sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
   2827                 XML_MINOR_VERSION, XML_MICRO_VERSION);
   2828         return PyString_FromString(buffer);
   2829     } else {
   2830         PyErr_SetString(PyExc_AttributeError, name);
   2831         return NULL;
   2832     }
   2833 
   2834     Py_INCREF(res);
   2835     return res;
   2836 }
   2837 
   2838 statichere PyTypeObject XMLParser_Type = {
   2839     PyObject_HEAD_INIT(NULL)
   2840     0, "XMLParser", sizeof(XMLParserObject), 0,
   2841     /* methods */
   2842     (destructor)xmlparser_dealloc, /* tp_dealloc */
   2843     0, /* tp_print */
   2844     (getattrfunc)xmlparser_getattr, /* tp_getattr */
   2845 };
   2846 
   2847 #endif
   2848 
   2849 /* ==================================================================== */
   2850 /* python module interface */
   2851 
   2852 static PyMethodDef _functions[] = {
   2853     {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
   2854     {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
   2855     {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
   2856 #if defined(USE_EXPAT)
   2857     {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
   2858     {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
   2859 #endif
   2860     {NULL, NULL}
   2861 };
   2862 
   2863 DL_EXPORT(void)
   2864 init_elementtree(void)
   2865 {
   2866     PyObject* m;
   2867     PyObject* g;
   2868     char* bootstrap;
   2869 
   2870     /* Patch object type */
   2871     Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
   2872 #if defined(USE_EXPAT)
   2873     Py_TYPE(&XMLParser_Type) = &PyType_Type;
   2874 #endif
   2875 
   2876     m = Py_InitModule("_elementtree", _functions);
   2877     if (!m)
   2878         return;
   2879 
   2880     /* python glue code */
   2881 
   2882     g = PyDict_New();
   2883     if (!g)
   2884         return;
   2885 
   2886     PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
   2887 
   2888     bootstrap = (
   2889 
   2890         "from copy import copy, deepcopy\n"
   2891 
   2892         "try:\n"
   2893         "  from xml.etree import ElementTree\n"
   2894         "except ImportError:\n"
   2895         "  import ElementTree\n"
   2896         "ET = ElementTree\n"
   2897         "del ElementTree\n"
   2898 
   2899         "import _elementtree as cElementTree\n"
   2900 
   2901         "try:\n" /* check if copy works as is */
   2902         "  copy(cElementTree.Element('x'))\n"
   2903         "except:\n"
   2904         "  def copyelement(elem):\n"
   2905         "    return elem\n"
   2906 
   2907         "class CommentProxy:\n"
   2908         " def __call__(self, text=None):\n"
   2909         "  element = cElementTree.Element(ET.Comment)\n"
   2910         "  element.text = text\n"
   2911         "  return element\n"
   2912         " def __cmp__(self, other):\n"
   2913         "  return cmp(ET.Comment, other)\n"
   2914         "cElementTree.Comment = CommentProxy()\n"
   2915 
   2916         "class ElementTree(ET.ElementTree):\n" /* public */
   2917         "  def parse(self, source, parser=None):\n"
   2918         "    if not hasattr(source, 'read'):\n"
   2919         "      source = open(source, 'rb')\n"
   2920         "    if parser is not None:\n"
   2921         "      while 1:\n"
   2922         "        data = source.read(65536)\n"
   2923         "        if not data:\n"
   2924         "          break\n"
   2925         "        parser.feed(data)\n"
   2926         "      self._root = parser.close()\n"
   2927         "    else:\n"
   2928         "      parser = cElementTree.XMLParser()\n"
   2929         "      self._root = parser._parse(source)\n"
   2930         "    return self._root\n"
   2931         "cElementTree.ElementTree = ElementTree\n"
   2932 
   2933         "def iter(node, tag=None):\n" /* helper */
   2934         "  if tag == '*':\n"
   2935         "    tag = None\n"
   2936         "  if tag is None or node.tag == tag:\n"
   2937         "    yield node\n"
   2938         "  for node in node:\n"
   2939         "    for node in iter(node, tag):\n"
   2940         "      yield node\n"
   2941 
   2942         "def itertext(node):\n" /* helper */
   2943         "  if node.text:\n"
   2944         "    yield node.text\n"
   2945         "  for e in node:\n"
   2946         "    for s in e.itertext():\n"
   2947         "      yield s\n"
   2948         "    if e.tail:\n"
   2949         "      yield e.tail\n"
   2950 
   2951         "def parse(source, parser=None):\n" /* public */
   2952         "  tree = ElementTree()\n"
   2953         "  tree.parse(source, parser)\n"
   2954         "  return tree\n"
   2955         "cElementTree.parse = parse\n"
   2956 
   2957         "class iterparse(object):\n"
   2958         " root = None\n"
   2959         " def __init__(self, file, events=None):\n"
   2960         "  if not hasattr(file, 'read'):\n"
   2961         "    file = open(file, 'rb')\n"
   2962         "  self._file = file\n"
   2963         "  self._events = []\n"
   2964         "  self._index = 0\n"
   2965         "  self.root = self._root = None\n"
   2966         "  b = cElementTree.TreeBuilder()\n"
   2967         "  self._parser = cElementTree.XMLParser(b)\n"
   2968         "  self._parser._setevents(self._events, events)\n"
   2969         " def next(self):\n"
   2970         "  while 1:\n"
   2971         "    try:\n"
   2972         "      item = self._events[self._index]\n"
   2973         "    except IndexError:\n"
   2974         "      if self._parser is None:\n"
   2975         "        self.root = self._root\n"
   2976         "        raise StopIteration\n"
   2977         "      # load event buffer\n"
   2978         "      del self._events[:]\n"
   2979         "      self._index = 0\n"
   2980         "      data = self._file.read(16384)\n"
   2981         "      if data:\n"
   2982         "        self._parser.feed(data)\n"
   2983         "      else:\n"
   2984         "        self._root = self._parser.close()\n"
   2985         "        self._parser = None\n"
   2986         "    else:\n"
   2987         "      self._index = self._index + 1\n"
   2988         "      return item\n"
   2989         " def __iter__(self):\n"
   2990         "  return self\n"
   2991         "cElementTree.iterparse = iterparse\n"
   2992 
   2993         "class PIProxy:\n"
   2994         " def __call__(self, target, text=None):\n"
   2995         "  element = cElementTree.Element(ET.PI)\n"
   2996         "  element.text = target\n"
   2997         "  if text:\n"
   2998         "    element.text = element.text + ' ' + text\n"
   2999         "  return element\n"
   3000         " def __cmp__(self, other):\n"
   3001         "  return cmp(ET.PI, other)\n"
   3002         "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
   3003 
   3004         "def XML(text):\n" /* public */
   3005         "  parser = cElementTree.XMLParser()\n"
   3006         "  parser.feed(text)\n"
   3007         "  return parser.close()\n"
   3008         "cElementTree.XML = cElementTree.fromstring = XML\n"
   3009 
   3010         "def XMLID(text):\n" /* public */
   3011         "  tree = XML(text)\n"
   3012         "  ids = {}\n"
   3013         "  for elem in tree.iter():\n"
   3014         "    id = elem.get('id')\n"
   3015         "    if id:\n"
   3016         "      ids[id] = elem\n"
   3017         "  return tree, ids\n"
   3018         "cElementTree.XMLID = XMLID\n"
   3019 
   3020         "try:\n"
   3021         " register_namespace = ET.register_namespace\n"
   3022         "except AttributeError:\n"
   3023         " def register_namespace(prefix, uri):\n"
   3024         "  ET._namespace_map[uri] = prefix\n"
   3025         "cElementTree.register_namespace = register_namespace\n"
   3026 
   3027         "cElementTree.dump = ET.dump\n"
   3028         "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
   3029         "cElementTree.iselement = ET.iselement\n"
   3030         "cElementTree.QName = ET.QName\n"
   3031         "cElementTree.tostring = ET.tostring\n"
   3032         "cElementTree.fromstringlist = ET.fromstringlist\n"
   3033         "cElementTree.tostringlist = ET.tostringlist\n"
   3034         "cElementTree.VERSION = '" VERSION "'\n"
   3035         "cElementTree.__version__ = '" VERSION "'\n"
   3036 
   3037        );
   3038 
   3039     if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
   3040         return;
   3041 
   3042     elementpath_obj = PyDict_GetItemString(g, "ElementPath");
   3043 
   3044     elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
   3045     if (elementtree_copyelement_obj) {
   3046         /* reduce hack needed; enable reduce method */
   3047         PyMethodDef* mp;
   3048         for (mp = element_methods; mp->ml_name; mp++)
   3049             if (mp->ml_meth == (PyCFunction) element_reduce) {
   3050                 mp->ml_name = "__reduce__";
   3051                 break;
   3052             }
   3053     } else
   3054         PyErr_Clear();
   3055 
   3056     elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
   3057     elementtree_iter_obj = PyDict_GetItemString(g, "iter");
   3058     elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
   3059 
   3060 #if defined(USE_PYEXPAT_CAPI)
   3061     /* link against pyexpat, if possible */
   3062     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
   3063     if (expat_capi) {
   3064         /* check that it's usable */
   3065         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
   3066             expat_capi->size < sizeof(struct PyExpat_CAPI) ||
   3067             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
   3068             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
   3069             expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
   3070             expat_capi = NULL;
   3071     }
   3072 #endif
   3073 
   3074     elementtree_parseerror_obj = PyErr_NewException(
   3075         "cElementTree.ParseError", PyExc_SyntaxError, NULL
   3076         );
   3077     Py_INCREF(elementtree_parseerror_obj);
   3078     PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
   3079 }
   3080