Home | History | Annotate | Download | only in Modules
      1 #include "Python.h"
      2 #include <ctype.h>
      3 
      4 #include "frameobject.h"
      5 #include "expat.h"
      6 
      7 #include "pyexpat.h"
      8 
      9 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
     10    included methods. */
     11 /*[clinic input]
     12 module pyexpat
     13 [clinic start generated code]*/
     14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
     15 
     16 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
     17 
     18 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
     19     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
     20 
     21 enum HandlerTypes {
     22     StartElement,
     23     EndElement,
     24     ProcessingInstruction,
     25     CharacterData,
     26     UnparsedEntityDecl,
     27     NotationDecl,
     28     StartNamespaceDecl,
     29     EndNamespaceDecl,
     30     Comment,
     31     StartCdataSection,
     32     EndCdataSection,
     33     Default,
     34     DefaultHandlerExpand,
     35     NotStandalone,
     36     ExternalEntityRef,
     37     StartDoctypeDecl,
     38     EndDoctypeDecl,
     39     EntityDecl,
     40     XmlDecl,
     41     ElementDecl,
     42     AttlistDecl,
     43 #if XML_COMBINED_VERSION >= 19504
     44     SkippedEntity,
     45 #endif
     46     _DummyDecl
     47 };
     48 
     49 static PyObject *ErrorObject;
     50 
     51 /* ----------------------------------------------------- */
     52 
     53 /* Declarations for objects of type xmlparser */
     54 
     55 typedef struct {
     56     PyObject_HEAD
     57 
     58     XML_Parser itself;
     59     int ordered_attributes;     /* Return attributes as a list. */
     60     int specified_attributes;   /* Report only specified attributes. */
     61     int in_callback;            /* Is a callback active? */
     62     int ns_prefixes;            /* Namespace-triplets mode? */
     63     XML_Char *buffer;           /* Buffer used when accumulating characters */
     64                                 /* NULL if not enabled */
     65     int buffer_size;            /* Size of buffer, in XML_Char units */
     66     int buffer_used;            /* Buffer units in use */
     67     PyObject *intern;           /* Dictionary to intern strings */
     68     PyObject **handlers;
     69 } xmlparseobject;
     70 
     71 #include "clinic/pyexpat.c.h"
     72 
     73 #define CHARACTER_DATA_BUFFER_SIZE 8192
     74 
     75 static PyTypeObject Xmlparsetype;
     76 
     77 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
     78 typedef void* xmlhandler;
     79 
     80 struct HandlerInfo {
     81     const char *name;
     82     xmlhandlersetter setter;
     83     xmlhandler handler;
     84     PyCodeObject *tb_code;
     85     PyObject *nameobj;
     86 };
     87 
     88 static struct HandlerInfo handler_info[64];
     89 
     90 /* Set an integer attribute on the error object; return true on success,
     91  * false on an exception.
     92  */
     93 static int
     94 set_error_attr(PyObject *err, const char *name, int value)
     95 {
     96     PyObject *v = PyLong_FromLong(value);
     97 
     98     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
     99         Py_XDECREF(v);
    100         return 0;
    101     }
    102     Py_DECREF(v);
    103     return 1;
    104 }
    105 
    106 /* Build and set an Expat exception, including positioning
    107  * information.  Always returns NULL.
    108  */
    109 static PyObject *
    110 set_error(xmlparseobject *self, enum XML_Error code)
    111 {
    112     PyObject *err;
    113     PyObject *buffer;
    114     XML_Parser parser = self->itself;
    115     int lineno = XML_GetErrorLineNumber(parser);
    116     int column = XML_GetErrorColumnNumber(parser);
    117 
    118     buffer = PyUnicode_FromFormat("%s: line %i, column %i",
    119                                   XML_ErrorString(code), lineno, column);
    120     if (buffer == NULL)
    121         return NULL;
    122     err = PyObject_CallFunction(ErrorObject, "O", buffer);
    123     Py_DECREF(buffer);
    124     if (  err != NULL
    125           && set_error_attr(err, "code", code)
    126           && set_error_attr(err, "offset", column)
    127           && set_error_attr(err, "lineno", lineno)) {
    128         PyErr_SetObject(ErrorObject, err);
    129     }
    130     Py_XDECREF(err);
    131     return NULL;
    132 }
    133 
    134 static int
    135 have_handler(xmlparseobject *self, int type)
    136 {
    137     PyObject *handler = self->handlers[type];
    138     return handler != NULL;
    139 }
    140 
    141 static PyObject *
    142 get_handler_name(struct HandlerInfo *hinfo)
    143 {
    144     PyObject *name = hinfo->nameobj;
    145     if (name == NULL) {
    146         name = PyUnicode_FromString(hinfo->name);
    147         hinfo->nameobj = name;
    148     }
    149     Py_XINCREF(name);
    150     return name;
    151 }
    152 
    153 
    154 /* Convert a string of XML_Chars into a Unicode string.
    155    Returns None if str is a null pointer. */
    156 
    157 static PyObject *
    158 conv_string_to_unicode(const XML_Char *str)
    159 {
    160     /* XXX currently this code assumes that XML_Char is 8-bit,
    161        and hence in UTF-8.  */
    162     /* UTF-8 from Expat, Unicode desired */
    163     if (str == NULL) {
    164         Py_INCREF(Py_None);
    165         return Py_None;
    166     }
    167     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
    168 }
    169 
    170 static PyObject *
    171 conv_string_len_to_unicode(const XML_Char *str, int len)
    172 {
    173     /* XXX currently this code assumes that XML_Char is 8-bit,
    174        and hence in UTF-8.  */
    175     /* UTF-8 from Expat, Unicode desired */
    176     if (str == NULL) {
    177         Py_INCREF(Py_None);
    178         return Py_None;
    179     }
    180     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
    181 }
    182 
    183 /* Callback routines */
    184 
    185 static void clear_handlers(xmlparseobject *self, int initial);
    186 
    187 /* This handler is used when an error has been detected, in the hope
    188    that actual parsing can be terminated early.  This will only help
    189    if an external entity reference is encountered. */
    190 static int
    191 error_external_entity_ref_handler(XML_Parser parser,
    192                                   const XML_Char *context,
    193                                   const XML_Char *base,
    194                                   const XML_Char *systemId,
    195                                   const XML_Char *publicId)
    196 {
    197     return 0;
    198 }
    199 
    200 /* Dummy character data handler used when an error (exception) has
    201    been detected, and the actual parsing can be terminated early.
    202    This is needed since character data handler can't be safely removed
    203    from within the character data handler, but can be replaced.  It is
    204    used only from the character data handler trampoline, and must be
    205    used right after `flag_error()` is called. */
    206 static void
    207 noop_character_data_handler(void *userData, const XML_Char *data, int len)
    208 {
    209     /* Do nothing. */
    210 }
    211 
    212 static void
    213 flag_error(xmlparseobject *self)
    214 {
    215     clear_handlers(self, 0);
    216     XML_SetExternalEntityRefHandler(self->itself,
    217                                     error_external_entity_ref_handler);
    218 }
    219 
    220 static PyObject*
    221 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
    222                 xmlparseobject *self)
    223 {
    224     PyObject *res;
    225 
    226     res = PyEval_CallObject(func, args);
    227     if (res == NULL) {
    228         _PyTraceback_Add(funcname, __FILE__, lineno);
    229         XML_StopParser(self->itself, XML_FALSE);
    230     }
    231     return res;
    232 }
    233 
    234 static PyObject*
    235 string_intern(xmlparseobject *self, const char* str)
    236 {
    237     PyObject *result = conv_string_to_unicode(str);
    238     PyObject *value;
    239     /* result can be NULL if the unicode conversion failed. */
    240     if (!result)
    241         return result;
    242     if (!self->intern)
    243         return result;
    244     value = PyDict_GetItem(self->intern, result);
    245     if (!value) {
    246         if (PyDict_SetItem(self->intern, result, result) == 0)
    247             return result;
    248         else
    249             return NULL;
    250     }
    251     Py_INCREF(value);
    252     Py_DECREF(result);
    253     return value;
    254 }
    255 
    256 /* Return 0 on success, -1 on exception.
    257  * flag_error() will be called before return if needed.
    258  */
    259 static int
    260 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
    261 {
    262     PyObject *args;
    263     PyObject *temp;
    264 
    265     if (!have_handler(self, CharacterData))
    266         return -1;
    267 
    268     args = PyTuple_New(1);
    269     if (args == NULL)
    270         return -1;
    271     temp = (conv_string_len_to_unicode(buffer, len));
    272     if (temp == NULL) {
    273         Py_DECREF(args);
    274         flag_error(self);
    275         XML_SetCharacterDataHandler(self->itself,
    276                                     noop_character_data_handler);
    277         return -1;
    278     }
    279     PyTuple_SET_ITEM(args, 0, temp);
    280     /* temp is now a borrowed reference; consider it unused. */
    281     self->in_callback = 1;
    282     temp = call_with_frame("CharacterData", __LINE__,
    283                            self->handlers[CharacterData], args, self);
    284     /* temp is an owned reference again, or NULL */
    285     self->in_callback = 0;
    286     Py_DECREF(args);
    287     if (temp == NULL) {
    288         flag_error(self);
    289         XML_SetCharacterDataHandler(self->itself,
    290                                     noop_character_data_handler);
    291         return -1;
    292     }
    293     Py_DECREF(temp);
    294     return 0;
    295 }
    296 
    297 static int
    298 flush_character_buffer(xmlparseobject *self)
    299 {
    300     int rc;
    301     if (self->buffer == NULL || self->buffer_used == 0)
    302         return 0;
    303     rc = call_character_handler(self, self->buffer, self->buffer_used);
    304     self->buffer_used = 0;
    305     return rc;
    306 }
    307 
    308 static void
    309 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
    310 {
    311     xmlparseobject *self = (xmlparseobject *) userData;
    312 
    313     if (PyErr_Occurred())
    314         return;
    315 
    316     if (self->buffer == NULL)
    317         call_character_handler(self, data, len);
    318     else {
    319         if ((self->buffer_used + len) > self->buffer_size) {
    320             if (flush_character_buffer(self) < 0)
    321                 return;
    322             /* handler might have changed; drop the rest on the floor
    323              * if there isn't a handler anymore
    324              */
    325             if (!have_handler(self, CharacterData))
    326                 return;
    327         }
    328         if (len > self->buffer_size) {
    329             call_character_handler(self, data, len);
    330             self->buffer_used = 0;
    331         }
    332         else {
    333             memcpy(self->buffer + self->buffer_used,
    334                    data, len * sizeof(XML_Char));
    335             self->buffer_used += len;
    336         }
    337     }
    338 }
    339 
    340 static void
    341 my_StartElementHandler(void *userData,
    342                        const XML_Char *name, const XML_Char *atts[])
    343 {
    344     xmlparseobject *self = (xmlparseobject *)userData;
    345 
    346     if (have_handler(self, StartElement)) {
    347         PyObject *container, *rv, *args;
    348         int i, max;
    349 
    350         if (PyErr_Occurred())
    351             return;
    352 
    353         if (flush_character_buffer(self) < 0)
    354             return;
    355         /* Set max to the number of slots filled in atts[]; max/2 is
    356          * the number of attributes we need to process.
    357          */
    358         if (self->specified_attributes) {
    359             max = XML_GetSpecifiedAttributeCount(self->itself);
    360         }
    361         else {
    362             max = 0;
    363             while (atts[max] != NULL)
    364                 max += 2;
    365         }
    366         /* Build the container. */
    367         if (self->ordered_attributes)
    368             container = PyList_New(max);
    369         else
    370             container = PyDict_New();
    371         if (container == NULL) {
    372             flag_error(self);
    373             return;
    374         }
    375         for (i = 0; i < max; i += 2) {
    376             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
    377             PyObject *v;
    378             if (n == NULL) {
    379                 flag_error(self);
    380                 Py_DECREF(container);
    381                 return;
    382             }
    383             v = conv_string_to_unicode((XML_Char *) atts[i+1]);
    384             if (v == NULL) {
    385                 flag_error(self);
    386                 Py_DECREF(container);
    387                 Py_DECREF(n);
    388                 return;
    389             }
    390             if (self->ordered_attributes) {
    391                 PyList_SET_ITEM(container, i, n);
    392                 PyList_SET_ITEM(container, i+1, v);
    393             }
    394             else if (PyDict_SetItem(container, n, v)) {
    395                 flag_error(self);
    396                 Py_DECREF(n);
    397                 Py_DECREF(v);
    398                 return;
    399             }
    400             else {
    401                 Py_DECREF(n);
    402                 Py_DECREF(v);
    403             }
    404         }
    405         args = string_intern(self, name);
    406         if (args != NULL)
    407             args = Py_BuildValue("(NN)", args, container);
    408         if (args == NULL) {
    409             Py_DECREF(container);
    410             return;
    411         }
    412         /* Container is now a borrowed reference; ignore it. */
    413         self->in_callback = 1;
    414         rv = call_with_frame("StartElement", __LINE__,
    415                              self->handlers[StartElement], args, self);
    416         self->in_callback = 0;
    417         Py_DECREF(args);
    418         if (rv == NULL) {
    419             flag_error(self);
    420             return;
    421         }
    422         Py_DECREF(rv);
    423     }
    424 }
    425 
    426 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
    427                 RETURN, GETUSERDATA) \
    428 static RC \
    429 my_##NAME##Handler PARAMS {\
    430     xmlparseobject *self = GETUSERDATA ; \
    431     PyObject *args = NULL; \
    432     PyObject *rv = NULL; \
    433     INIT \
    434 \
    435     if (have_handler(self, NAME)) { \
    436         if (PyErr_Occurred()) \
    437             return RETURN; \
    438         if (flush_character_buffer(self) < 0) \
    439             return RETURN; \
    440         args = Py_BuildValue PARAM_FORMAT ;\
    441         if (!args) { flag_error(self); return RETURN;} \
    442         self->in_callback = 1; \
    443         rv = call_with_frame(#NAME,__LINE__, \
    444                              self->handlers[NAME], args, self); \
    445         self->in_callback = 0; \
    446         Py_DECREF(args); \
    447         if (rv == NULL) { \
    448             flag_error(self); \
    449             return RETURN; \
    450         } \
    451         CONVERSION \
    452         Py_DECREF(rv); \
    453     } \
    454     return RETURN; \
    455 }
    456 
    457 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
    458         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
    459         (xmlparseobject *)userData)
    460 
    461 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
    462         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
    463                         rc = PyLong_AsLong(rv);, rc, \
    464         (xmlparseobject *)userData)
    465 
    466 VOID_HANDLER(EndElement,
    467              (void *userData, const XML_Char *name),
    468              ("(N)", string_intern(self, name)))
    469 
    470 VOID_HANDLER(ProcessingInstruction,
    471              (void *userData,
    472               const XML_Char *target,
    473               const XML_Char *data),
    474              ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
    475 
    476 VOID_HANDLER(UnparsedEntityDecl,
    477              (void *userData,
    478               const XML_Char *entityName,
    479               const XML_Char *base,
    480               const XML_Char *systemId,
    481               const XML_Char *publicId,
    482               const XML_Char *notationName),
    483              ("(NNNNN)",
    484               string_intern(self, entityName), string_intern(self, base),
    485               string_intern(self, systemId), string_intern(self, publicId),
    486               string_intern(self, notationName)))
    487 
    488 VOID_HANDLER(EntityDecl,
    489              (void *userData,
    490               const XML_Char *entityName,
    491               int is_parameter_entity,
    492               const XML_Char *value,
    493               int value_length,
    494               const XML_Char *base,
    495               const XML_Char *systemId,
    496               const XML_Char *publicId,
    497               const XML_Char *notationName),
    498              ("NiNNNNN",
    499               string_intern(self, entityName), is_parameter_entity,
    500               (conv_string_len_to_unicode(value, value_length)),
    501               string_intern(self, base), string_intern(self, systemId),
    502               string_intern(self, publicId),
    503               string_intern(self, notationName)))
    504 
    505 VOID_HANDLER(XmlDecl,
    506              (void *userData,
    507               const XML_Char *version,
    508               const XML_Char *encoding,
    509               int standalone),
    510              ("(O&O&i)",
    511               conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
    512               standalone))
    513 
    514 static PyObject *
    515 conv_content_model(XML_Content * const model,
    516                    PyObject *(*conv_string)(const XML_Char *))
    517 {
    518     PyObject *result = NULL;
    519     PyObject *children = PyTuple_New(model->numchildren);
    520     int i;
    521 
    522     if (children != NULL) {
    523         assert(model->numchildren < INT_MAX);
    524         for (i = 0; i < (int)model->numchildren; ++i) {
    525             PyObject *child = conv_content_model(&model->children[i],
    526                                                  conv_string);
    527             if (child == NULL) {
    528                 Py_XDECREF(children);
    529                 return NULL;
    530             }
    531             PyTuple_SET_ITEM(children, i, child);
    532         }
    533         result = Py_BuildValue("(iiO&N)",
    534                                model->type, model->quant,
    535                                conv_string,model->name, children);
    536     }
    537     return result;
    538 }
    539 
    540 static void
    541 my_ElementDeclHandler(void *userData,
    542                       const XML_Char *name,
    543                       XML_Content *model)
    544 {
    545     xmlparseobject *self = (xmlparseobject *)userData;
    546     PyObject *args = NULL;
    547 
    548     if (have_handler(self, ElementDecl)) {
    549         PyObject *rv = NULL;
    550         PyObject *modelobj, *nameobj;
    551 
    552         if (PyErr_Occurred())
    553             return;
    554 
    555         if (flush_character_buffer(self) < 0)
    556             goto finally;
    557         modelobj = conv_content_model(model, (conv_string_to_unicode));
    558         if (modelobj == NULL) {
    559             flag_error(self);
    560             goto finally;
    561         }
    562         nameobj = string_intern(self, name);
    563         if (nameobj == NULL) {
    564             Py_DECREF(modelobj);
    565             flag_error(self);
    566             goto finally;
    567         }
    568         args = Py_BuildValue("NN", nameobj, modelobj);
    569         if (args == NULL) {
    570             Py_DECREF(modelobj);
    571             flag_error(self);
    572             goto finally;
    573         }
    574         self->in_callback = 1;
    575         rv = call_with_frame("ElementDecl", __LINE__,
    576                              self->handlers[ElementDecl], args, self);
    577         self->in_callback = 0;
    578         if (rv == NULL) {
    579             flag_error(self);
    580             goto finally;
    581         }
    582         Py_DECREF(rv);
    583     }
    584  finally:
    585     Py_XDECREF(args);
    586     XML_FreeContentModel(self->itself, model);
    587     return;
    588 }
    589 
    590 VOID_HANDLER(AttlistDecl,
    591              (void *userData,
    592               const XML_Char *elname,
    593               const XML_Char *attname,
    594               const XML_Char *att_type,
    595               const XML_Char *dflt,
    596               int isrequired),
    597              ("(NNO&O&i)",
    598               string_intern(self, elname), string_intern(self, attname),
    599               conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
    600               isrequired))
    601 
    602 #if XML_COMBINED_VERSION >= 19504
    603 VOID_HANDLER(SkippedEntity,
    604              (void *userData,
    605               const XML_Char *entityName,
    606               int is_parameter_entity),
    607              ("Ni",
    608               string_intern(self, entityName), is_parameter_entity))
    609 #endif
    610 
    611 VOID_HANDLER(NotationDecl,
    612                 (void *userData,
    613                         const XML_Char *notationName,
    614                         const XML_Char *base,
    615                         const XML_Char *systemId,
    616                         const XML_Char *publicId),
    617                 ("(NNNN)",
    618                  string_intern(self, notationName), string_intern(self, base),
    619                  string_intern(self, systemId), string_intern(self, publicId)))
    620 
    621 VOID_HANDLER(StartNamespaceDecl,
    622                 (void *userData,
    623                       const XML_Char *prefix,
    624                       const XML_Char *uri),
    625                 ("(NN)",
    626                  string_intern(self, prefix), string_intern(self, uri)))
    627 
    628 VOID_HANDLER(EndNamespaceDecl,
    629                 (void *userData,
    630                     const XML_Char *prefix),
    631                 ("(N)", string_intern(self, prefix)))
    632 
    633 VOID_HANDLER(Comment,
    634                (void *userData, const XML_Char *data),
    635                 ("(O&)", conv_string_to_unicode ,data))
    636 
    637 VOID_HANDLER(StartCdataSection,
    638                (void *userData),
    639                 ("()"))
    640 
    641 VOID_HANDLER(EndCdataSection,
    642                (void *userData),
    643                 ("()"))
    644 
    645 VOID_HANDLER(Default,
    646               (void *userData, const XML_Char *s, int len),
    647               ("(N)", (conv_string_len_to_unicode(s,len))))
    648 
    649 VOID_HANDLER(DefaultHandlerExpand,
    650               (void *userData, const XML_Char *s, int len),
    651               ("(N)", (conv_string_len_to_unicode(s,len))))
    652 
    653 INT_HANDLER(NotStandalone,
    654                 (void *userData),
    655                 ("()"))
    656 
    657 RC_HANDLER(int, ExternalEntityRef,
    658                 (XML_Parser parser,
    659                     const XML_Char *context,
    660                     const XML_Char *base,
    661                     const XML_Char *systemId,
    662                     const XML_Char *publicId),
    663                 int rc=0;,
    664                 ("(O&NNN)",
    665                  conv_string_to_unicode ,context, string_intern(self, base),
    666                  string_intern(self, systemId), string_intern(self, publicId)),
    667                 rc = PyLong_AsLong(rv);, rc,
    668                 XML_GetUserData(parser))
    669 
    670 /* XXX UnknownEncodingHandler */
    671 
    672 VOID_HANDLER(StartDoctypeDecl,
    673              (void *userData, const XML_Char *doctypeName,
    674               const XML_Char *sysid, const XML_Char *pubid,
    675               int has_internal_subset),
    676              ("(NNNi)", string_intern(self, doctypeName),
    677               string_intern(self, sysid), string_intern(self, pubid),
    678               has_internal_subset))
    679 
    680 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
    681 
    682 /* ---------------------------------------------------------------- */
    683 /*[clinic input]
    684 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
    685 [clinic start generated code]*/
    686 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
    687 
    688 
    689 static PyObject *
    690 get_parse_result(xmlparseobject *self, int rv)
    691 {
    692     if (PyErr_Occurred()) {
    693         return NULL;
    694     }
    695     if (rv == 0) {
    696         return set_error(self, XML_GetErrorCode(self->itself));
    697     }
    698     if (flush_character_buffer(self) < 0) {
    699         return NULL;
    700     }
    701     return PyLong_FromLong(rv);
    702 }
    703 
    704 #define MAX_CHUNK_SIZE (1 << 20)
    705 
    706 /*[clinic input]
    707 pyexpat.xmlparser.Parse
    708 
    709     data: object
    710     isfinal: int(c_default="0") = False
    711     /
    712 
    713 Parse XML data.
    714 
    715 `isfinal' should be true at end of input.
    716 [clinic start generated code]*/
    717 
    718 static PyObject *
    719 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
    720                              int isfinal)
    721 /*[clinic end generated code: output=f4db843dd1f4ed4b input=199d9e8e92ebbb4b]*/
    722 {
    723     const char *s;
    724     Py_ssize_t slen;
    725     Py_buffer view;
    726     int rc;
    727 
    728     if (PyUnicode_Check(data)) {
    729         view.buf = NULL;
    730         s = PyUnicode_AsUTF8AndSize(data, &slen);
    731         if (s == NULL)
    732             return NULL;
    733         /* Explicitly set UTF-8 encoding. Return code ignored. */
    734         (void)XML_SetEncoding(self->itself, "utf-8");
    735     }
    736     else {
    737         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
    738             return NULL;
    739         s = view.buf;
    740         slen = view.len;
    741     }
    742 
    743     while (slen > MAX_CHUNK_SIZE) {
    744         rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
    745         if (!rc)
    746             goto done;
    747         s += MAX_CHUNK_SIZE;
    748         slen -= MAX_CHUNK_SIZE;
    749     }
    750     Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
    751     assert(slen <= INT_MAX);
    752     rc = XML_Parse(self->itself, s, (int)slen, isfinal);
    753 
    754 done:
    755     if (view.buf != NULL)
    756         PyBuffer_Release(&view);
    757     return get_parse_result(self, rc);
    758 }
    759 
    760 /* File reading copied from cPickle */
    761 
    762 #define BUF_SIZE 2048
    763 
    764 static int
    765 readinst(char *buf, int buf_size, PyObject *meth)
    766 {
    767     PyObject *str;
    768     Py_ssize_t len;
    769     const char *ptr;
    770 
    771     str = PyObject_CallFunction(meth, "n", buf_size);
    772     if (str == NULL)
    773         goto error;
    774 
    775     if (PyBytes_Check(str))
    776         ptr = PyBytes_AS_STRING(str);
    777     else if (PyByteArray_Check(str))
    778         ptr = PyByteArray_AS_STRING(str);
    779     else {
    780         PyErr_Format(PyExc_TypeError,
    781                      "read() did not return a bytes object (type=%.400s)",
    782                      Py_TYPE(str)->tp_name);
    783         goto error;
    784     }
    785     len = Py_SIZE(str);
    786     if (len > buf_size) {
    787         PyErr_Format(PyExc_ValueError,
    788                      "read() returned too much data: "
    789                      "%i bytes requested, %zd returned",
    790                      buf_size, len);
    791         goto error;
    792     }
    793     memcpy(buf, ptr, len);
    794     Py_DECREF(str);
    795     /* len <= buf_size <= INT_MAX */
    796     return (int)len;
    797 
    798 error:
    799     Py_XDECREF(str);
    800     return -1;
    801 }
    802 
    803 /*[clinic input]
    804 pyexpat.xmlparser.ParseFile
    805 
    806     file: object
    807     /
    808 
    809 Parse XML data from file-like object.
    810 [clinic start generated code]*/
    811 
    812 static PyObject *
    813 pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
    814 /*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
    815 {
    816     int rv = 1;
    817     PyObject *readmethod = NULL;
    818     _Py_IDENTIFIER(read);
    819 
    820     readmethod = _PyObject_GetAttrId(file, &PyId_read);
    821     if (readmethod == NULL) {
    822         PyErr_SetString(PyExc_TypeError,
    823                         "argument must have 'read' attribute");
    824         return NULL;
    825     }
    826     for (;;) {
    827         int bytes_read;
    828         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
    829         if (buf == NULL) {
    830             Py_XDECREF(readmethod);
    831             return get_parse_result(self, 0);
    832         }
    833 
    834         bytes_read = readinst(buf, BUF_SIZE, readmethod);
    835         if (bytes_read < 0) {
    836             Py_DECREF(readmethod);
    837             return NULL;
    838         }
    839         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
    840         if (PyErr_Occurred()) {
    841             Py_XDECREF(readmethod);
    842             return NULL;
    843         }
    844 
    845         if (!rv || bytes_read == 0)
    846             break;
    847     }
    848     Py_XDECREF(readmethod);
    849     return get_parse_result(self, rv);
    850 }
    851 
    852 /*[clinic input]
    853 pyexpat.xmlparser.SetBase
    854 
    855     base: str
    856     /
    857 
    858 Set the base URL for the parser.
    859 [clinic start generated code]*/
    860 
    861 static PyObject *
    862 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
    863 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
    864 {
    865     if (!XML_SetBase(self->itself, base)) {
    866         return PyErr_NoMemory();
    867     }
    868     Py_RETURN_NONE;
    869 }
    870 
    871 /*[clinic input]
    872 pyexpat.xmlparser.GetBase
    873 
    874 Return base URL string for the parser.
    875 [clinic start generated code]*/
    876 
    877 static PyObject *
    878 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
    879 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
    880 {
    881     return Py_BuildValue("z", XML_GetBase(self->itself));
    882 }
    883 
    884 /*[clinic input]
    885 pyexpat.xmlparser.GetInputContext
    886 
    887 Return the untranslated text of the input that caused the current event.
    888 
    889 If the event was generated by a large amount of text (such as a start tag
    890 for an element with many attributes), not all of the text may be available.
    891 [clinic start generated code]*/
    892 
    893 static PyObject *
    894 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
    895 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
    896 {
    897     if (self->in_callback) {
    898         int offset, size;
    899         const char *buffer
    900             = XML_GetInputContext(self->itself, &offset, &size);
    901 
    902         if (buffer != NULL)
    903             return PyBytes_FromStringAndSize(buffer + offset,
    904                                               size - offset);
    905         else
    906             Py_RETURN_NONE;
    907     }
    908     else
    909         Py_RETURN_NONE;
    910 }
    911 
    912 /*[clinic input]
    913 pyexpat.xmlparser.ExternalEntityParserCreate
    914 
    915     context: str(accept={str, NoneType})
    916     encoding: str = NULL
    917     /
    918 
    919 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
    920 [clinic start generated code]*/
    921 
    922 static PyObject *
    923 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
    924                                                   const char *context,
    925                                                   const char *encoding)
    926 /*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
    927 {
    928     xmlparseobject *new_parser;
    929     int i;
    930 
    931     new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
    932     if (new_parser == NULL)
    933         return NULL;
    934     new_parser->buffer_size = self->buffer_size;
    935     new_parser->buffer_used = 0;
    936     new_parser->buffer = NULL;
    937     new_parser->ordered_attributes = self->ordered_attributes;
    938     new_parser->specified_attributes = self->specified_attributes;
    939     new_parser->in_callback = 0;
    940     new_parser->ns_prefixes = self->ns_prefixes;
    941     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
    942                                                         encoding);
    943     new_parser->handlers = 0;
    944     new_parser->intern = self->intern;
    945     Py_XINCREF(new_parser->intern);
    946     PyObject_GC_Track(new_parser);
    947 
    948     if (self->buffer != NULL) {
    949         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
    950         if (new_parser->buffer == NULL) {
    951             Py_DECREF(new_parser);
    952             return PyErr_NoMemory();
    953         }
    954     }
    955     if (!new_parser->itself) {
    956         Py_DECREF(new_parser);
    957         return PyErr_NoMemory();
    958     }
    959 
    960     XML_SetUserData(new_parser->itself, (void *)new_parser);
    961 
    962     /* allocate and clear handlers first */
    963     for (i = 0; handler_info[i].name != NULL; i++)
    964         /* do nothing */;
    965 
    966     new_parser->handlers = PyMem_New(PyObject *, i);
    967     if (!new_parser->handlers) {
    968         Py_DECREF(new_parser);
    969         return PyErr_NoMemory();
    970     }
    971     clear_handlers(new_parser, 1);
    972 
    973     /* then copy handlers from self */
    974     for (i = 0; handler_info[i].name != NULL; i++) {
    975         PyObject *handler = self->handlers[i];
    976         if (handler != NULL) {
    977             Py_INCREF(handler);
    978             new_parser->handlers[i] = handler;
    979             handler_info[i].setter(new_parser->itself,
    980                                    handler_info[i].handler);
    981         }
    982     }
    983     return (PyObject *)new_parser;
    984 }
    985 
    986 /*[clinic input]
    987 pyexpat.xmlparser.SetParamEntityParsing
    988 
    989     flag: int
    990     /
    991 
    992 Controls parsing of parameter entities (including the external DTD subset).
    993 
    994 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
    995 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
    996 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
    997 was successful.
    998 [clinic start generated code]*/
    999 
   1000 static PyObject *
   1001 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
   1002 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
   1003 {
   1004     flag = XML_SetParamEntityParsing(self->itself, flag);
   1005     return PyLong_FromLong(flag);
   1006 }
   1007 
   1008 
   1009 #if XML_COMBINED_VERSION >= 19505
   1010 /*[clinic input]
   1011 pyexpat.xmlparser.UseForeignDTD
   1012 
   1013     flag: bool = True
   1014     /
   1015 
   1016 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
   1017 
   1018 This readily allows the use of a 'default' document type controlled by the
   1019 application, while still getting the advantage of providing document type
   1020 information to the parser. 'flag' defaults to True if not provided.
   1021 [clinic start generated code]*/
   1022 
   1023 static PyObject *
   1024 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
   1025 /*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
   1026 {
   1027     enum XML_Error rc;
   1028 
   1029     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
   1030     if (rc != XML_ERROR_NONE) {
   1031         return set_error(self, rc);
   1032     }
   1033     Py_INCREF(Py_None);
   1034     return Py_None;
   1035 }
   1036 #endif
   1037 
   1038 /*[clinic input]
   1039 pyexpat.xmlparser.__dir__
   1040 [clinic start generated code]*/
   1041 
   1042 static PyObject *
   1043 pyexpat_xmlparser___dir___impl(xmlparseobject *self)
   1044 /*[clinic end generated code: output=bc22451efb9e4d17 input=76aa455f2a661384]*/
   1045 {
   1046 #define APPEND(list, str)                               \
   1047         do {                                            \
   1048                 PyObject *o = PyUnicode_FromString(str);        \
   1049                 if (o != NULL)                          \
   1050                         PyList_Append(list, o);         \
   1051                 Py_XDECREF(o);                          \
   1052         } while (0)
   1053 
   1054     int i;
   1055     PyObject *rc = PyList_New(0);
   1056     if (!rc)
   1057         return NULL;
   1058     for (i = 0; handler_info[i].name != NULL; i++) {
   1059         PyObject *o = get_handler_name(&handler_info[i]);
   1060         if (o != NULL)
   1061             PyList_Append(rc, o);
   1062         Py_XDECREF(o);
   1063     }
   1064     APPEND(rc, "ErrorCode");
   1065     APPEND(rc, "ErrorLineNumber");
   1066     APPEND(rc, "ErrorColumnNumber");
   1067     APPEND(rc, "ErrorByteIndex");
   1068     APPEND(rc, "CurrentLineNumber");
   1069     APPEND(rc, "CurrentColumnNumber");
   1070     APPEND(rc, "CurrentByteIndex");
   1071     APPEND(rc, "buffer_size");
   1072     APPEND(rc, "buffer_text");
   1073     APPEND(rc, "buffer_used");
   1074     APPEND(rc, "namespace_prefixes");
   1075     APPEND(rc, "ordered_attributes");
   1076     APPEND(rc, "specified_attributes");
   1077     APPEND(rc, "intern");
   1078 
   1079 #undef APPEND
   1080 
   1081     if (PyErr_Occurred()) {
   1082         Py_DECREF(rc);
   1083         rc = NULL;
   1084     }
   1085 
   1086     return rc;
   1087 }
   1088 
   1089 static struct PyMethodDef xmlparse_methods[] = {
   1090     PYEXPAT_XMLPARSER_PARSE_METHODDEF
   1091     PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
   1092     PYEXPAT_XMLPARSER_SETBASE_METHODDEF
   1093     PYEXPAT_XMLPARSER_GETBASE_METHODDEF
   1094     PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
   1095     PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
   1096     PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
   1097 #if XML_COMBINED_VERSION >= 19505
   1098     PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
   1099 #endif
   1100     PYEXPAT_XMLPARSER___DIR___METHODDEF
   1101     {NULL, NULL}  /* sentinel */
   1102 };
   1103 
   1104 /* ---------- */
   1105 
   1106 
   1107 
   1108 /* pyexpat international encoding support.
   1109    Make it as simple as possible.
   1110 */
   1111 
   1112 static int
   1113 PyUnknownEncodingHandler(void *encodingHandlerData,
   1114                          const XML_Char *name,
   1115                          XML_Encoding *info)
   1116 {
   1117     static unsigned char template_buffer[256] = {0};
   1118     PyObject* u;
   1119     int i;
   1120     void *data;
   1121     unsigned int kind;
   1122 
   1123     if (PyErr_Occurred())
   1124         return XML_STATUS_ERROR;
   1125 
   1126     if (template_buffer[1] == 0) {
   1127         for (i = 0; i < 256; i++)
   1128             template_buffer[i] = i;
   1129     }
   1130 
   1131     u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
   1132     if (u == NULL || PyUnicode_READY(u)) {
   1133         Py_XDECREF(u);
   1134         return XML_STATUS_ERROR;
   1135     }
   1136 
   1137     if (PyUnicode_GET_LENGTH(u) != 256) {
   1138         Py_DECREF(u);
   1139         PyErr_SetString(PyExc_ValueError,
   1140                         "multi-byte encodings are not supported");
   1141         return XML_STATUS_ERROR;
   1142     }
   1143 
   1144     kind = PyUnicode_KIND(u);
   1145     data = PyUnicode_DATA(u);
   1146     for (i = 0; i < 256; i++) {
   1147         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
   1148         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
   1149             info->map[i] = ch;
   1150         else
   1151             info->map[i] = -1;
   1152     }
   1153 
   1154     info->data = NULL;
   1155     info->convert = NULL;
   1156     info->release = NULL;
   1157     Py_DECREF(u);
   1158 
   1159     return XML_STATUS_OK;
   1160 }
   1161 
   1162 
   1163 static PyObject *
   1164 newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
   1165 {
   1166     int i;
   1167     xmlparseobject *self;
   1168 
   1169     self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
   1170     if (self == NULL)
   1171         return NULL;
   1172 
   1173     self->buffer = NULL;
   1174     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
   1175     self->buffer_used = 0;
   1176     self->ordered_attributes = 0;
   1177     self->specified_attributes = 0;
   1178     self->in_callback = 0;
   1179     self->ns_prefixes = 0;
   1180     self->handlers = NULL;
   1181     self->intern = intern;
   1182     Py_XINCREF(self->intern);
   1183     PyObject_GC_Track(self);
   1184 
   1185     /* namespace_separator is either NULL or contains one char + \0 */
   1186     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
   1187                                        namespace_separator);
   1188     if (self->itself == NULL) {
   1189         PyErr_SetString(PyExc_RuntimeError,
   1190                         "XML_ParserCreate failed");
   1191         Py_DECREF(self);
   1192         return NULL;
   1193     }
   1194 #if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
   1195     /* This feature was added upstream in libexpat 2.1.0.  Our expat copy
   1196      * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
   1197      * to indicate that we can still use it. */
   1198     XML_SetHashSalt(self->itself,
   1199                     (unsigned long)_Py_HashSecret.expat.hashsalt);
   1200 #endif
   1201     XML_SetUserData(self->itself, (void *)self);
   1202     XML_SetUnknownEncodingHandler(self->itself,
   1203                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
   1204 
   1205     for (i = 0; handler_info[i].name != NULL; i++)
   1206         /* do nothing */;
   1207 
   1208     self->handlers = PyMem_New(PyObject *, i);
   1209     if (!self->handlers) {
   1210         Py_DECREF(self);
   1211         return PyErr_NoMemory();
   1212     }
   1213     clear_handlers(self, 1);
   1214 
   1215     return (PyObject*)self;
   1216 }
   1217 
   1218 
   1219 static void
   1220 xmlparse_dealloc(xmlparseobject *self)
   1221 {
   1222     int i;
   1223     PyObject_GC_UnTrack(self);
   1224     if (self->itself != NULL)
   1225         XML_ParserFree(self->itself);
   1226     self->itself = NULL;
   1227 
   1228     if (self->handlers != NULL) {
   1229         for (i = 0; handler_info[i].name != NULL; i++)
   1230             Py_CLEAR(self->handlers[i]);
   1231         PyMem_Free(self->handlers);
   1232         self->handlers = NULL;
   1233     }
   1234     if (self->buffer != NULL) {
   1235         PyMem_Free(self->buffer);
   1236         self->buffer = NULL;
   1237     }
   1238     Py_XDECREF(self->intern);
   1239     PyObject_GC_Del(self);
   1240 }
   1241 
   1242 static int
   1243 handlername2int(PyObject *name)
   1244 {
   1245     int i;
   1246     for (i = 0; handler_info[i].name != NULL; i++) {
   1247         if (_PyUnicode_EqualToASCIIString(name, handler_info[i].name)) {
   1248             return i;
   1249         }
   1250     }
   1251     return -1;
   1252 }
   1253 
   1254 static PyObject *
   1255 get_pybool(int istrue)
   1256 {
   1257     PyObject *result = istrue ? Py_True : Py_False;
   1258     Py_INCREF(result);
   1259     return result;
   1260 }
   1261 
   1262 static PyObject *
   1263 xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
   1264 {
   1265     Py_UCS4 first_char;
   1266     int handlernum = -1;
   1267 
   1268     if (!PyUnicode_Check(nameobj))
   1269         goto generic;
   1270     if (PyUnicode_READY(nameobj))
   1271         return NULL;
   1272 
   1273     handlernum = handlername2int(nameobj);
   1274 
   1275     if (handlernum != -1) {
   1276         PyObject *result = self->handlers[handlernum];
   1277         if (result == NULL)
   1278             result = Py_None;
   1279         Py_INCREF(result);
   1280         return result;
   1281     }
   1282 
   1283     first_char = PyUnicode_READ_CHAR(nameobj, 0);
   1284     if (first_char == 'E') {
   1285         if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorCode"))
   1286             return PyLong_FromLong((long)
   1287                                   XML_GetErrorCode(self->itself));
   1288         if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorLineNumber"))
   1289             return PyLong_FromLong((long)
   1290                                   XML_GetErrorLineNumber(self->itself));
   1291         if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorColumnNumber"))
   1292             return PyLong_FromLong((long)
   1293                                   XML_GetErrorColumnNumber(self->itself));
   1294         if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorByteIndex"))
   1295             return PyLong_FromLong((long)
   1296                                   XML_GetErrorByteIndex(self->itself));
   1297     }
   1298     if (first_char == 'C') {
   1299         if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentLineNumber"))
   1300             return PyLong_FromLong((long)
   1301                                   XML_GetCurrentLineNumber(self->itself));
   1302         if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentColumnNumber"))
   1303             return PyLong_FromLong((long)
   1304                                   XML_GetCurrentColumnNumber(self->itself));
   1305         if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentByteIndex"))
   1306             return PyLong_FromLong((long)
   1307                                   XML_GetCurrentByteIndex(self->itself));
   1308     }
   1309     if (first_char == 'b') {
   1310         if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_size"))
   1311             return PyLong_FromLong((long) self->buffer_size);
   1312         if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_text"))
   1313             return get_pybool(self->buffer != NULL);
   1314         if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_used"))
   1315             return PyLong_FromLong((long) self->buffer_used);
   1316     }
   1317     if (_PyUnicode_EqualToASCIIString(nameobj, "namespace_prefixes"))
   1318         return get_pybool(self->ns_prefixes);
   1319     if (_PyUnicode_EqualToASCIIString(nameobj, "ordered_attributes"))
   1320         return get_pybool(self->ordered_attributes);
   1321     if (_PyUnicode_EqualToASCIIString(nameobj, "specified_attributes"))
   1322         return get_pybool((long) self->specified_attributes);
   1323     if (_PyUnicode_EqualToASCIIString(nameobj, "intern")) {
   1324         if (self->intern == NULL) {
   1325             Py_INCREF(Py_None);
   1326             return Py_None;
   1327         }
   1328         else {
   1329             Py_INCREF(self->intern);
   1330             return self->intern;
   1331         }
   1332     }
   1333   generic:
   1334     return PyObject_GenericGetAttr((PyObject*)self, nameobj);
   1335 }
   1336 
   1337 static int
   1338 sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
   1339 {
   1340     int handlernum = handlername2int(name);
   1341     if (handlernum >= 0) {
   1342         xmlhandler c_handler = NULL;
   1343 
   1344         if (v == Py_None) {
   1345             /* If this is the character data handler, and a character
   1346                data handler is already active, we need to be more
   1347                careful.  What we can safely do is replace the existing
   1348                character data handler callback function with a no-op
   1349                function that will refuse to call Python.  The downside
   1350                is that this doesn't completely remove the character
   1351                data handler from the C layer if there's any callback
   1352                active, so Expat does a little more work than it
   1353                otherwise would, but that's really an odd case.  A more
   1354                elaborate system of handlers and state could remove the
   1355                C handler more effectively. */
   1356             if (handlernum == CharacterData && self->in_callback)
   1357                 c_handler = noop_character_data_handler;
   1358             v = NULL;
   1359         }
   1360         else if (v != NULL) {
   1361             Py_INCREF(v);
   1362             c_handler = handler_info[handlernum].handler;
   1363         }
   1364         Py_XSETREF(self->handlers[handlernum], v);
   1365         handler_info[handlernum].setter(self->itself, c_handler);
   1366         return 1;
   1367     }
   1368     return 0;
   1369 }
   1370 
   1371 static int
   1372 xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
   1373 {
   1374     /* Set attribute 'name' to value 'v'. v==NULL means delete */
   1375     if (!PyUnicode_Check(name)) {
   1376         PyErr_Format(PyExc_TypeError,
   1377                      "attribute name must be string, not '%.200s'",
   1378                      name->ob_type->tp_name);
   1379         return -1;
   1380     }
   1381     if (v == NULL) {
   1382         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
   1383         return -1;
   1384     }
   1385     if (_PyUnicode_EqualToASCIIString(name, "buffer_text")) {
   1386         int b = PyObject_IsTrue(v);
   1387         if (b < 0)
   1388             return -1;
   1389         if (b) {
   1390             if (self->buffer == NULL) {
   1391                 self->buffer = PyMem_Malloc(self->buffer_size);
   1392                 if (self->buffer == NULL) {
   1393                     PyErr_NoMemory();
   1394                     return -1;
   1395                 }
   1396                 self->buffer_used = 0;
   1397             }
   1398         }
   1399         else if (self->buffer != NULL) {
   1400             if (flush_character_buffer(self) < 0)
   1401                 return -1;
   1402             PyMem_Free(self->buffer);
   1403             self->buffer = NULL;
   1404         }
   1405         return 0;
   1406     }
   1407     if (_PyUnicode_EqualToASCIIString(name, "namespace_prefixes")) {
   1408         int b = PyObject_IsTrue(v);
   1409         if (b < 0)
   1410             return -1;
   1411         self->ns_prefixes = b;
   1412         XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
   1413         return 0;
   1414     }
   1415     if (_PyUnicode_EqualToASCIIString(name, "ordered_attributes")) {
   1416         int b = PyObject_IsTrue(v);
   1417         if (b < 0)
   1418             return -1;
   1419         self->ordered_attributes = b;
   1420         return 0;
   1421     }
   1422     if (_PyUnicode_EqualToASCIIString(name, "specified_attributes")) {
   1423         int b = PyObject_IsTrue(v);
   1424         if (b < 0)
   1425             return -1;
   1426         self->specified_attributes = b;
   1427         return 0;
   1428     }
   1429 
   1430     if (_PyUnicode_EqualToASCIIString(name, "buffer_size")) {
   1431       long new_buffer_size;
   1432       if (!PyLong_Check(v)) {
   1433         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
   1434         return -1;
   1435       }
   1436 
   1437       new_buffer_size = PyLong_AsLong(v);
   1438       if (new_buffer_size <= 0) {
   1439         if (!PyErr_Occurred())
   1440           PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
   1441         return -1;
   1442       }
   1443 
   1444       /* trivial case -- no change */
   1445       if (new_buffer_size == self->buffer_size) {
   1446         return 0;
   1447       }
   1448 
   1449       /* check maximum */
   1450       if (new_buffer_size > INT_MAX) {
   1451         char errmsg[100];
   1452         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
   1453         PyErr_SetString(PyExc_ValueError, errmsg);
   1454         return -1;
   1455       }
   1456 
   1457       if (self->buffer != NULL) {
   1458         /* there is already a buffer */
   1459         if (self->buffer_used != 0) {
   1460             if (flush_character_buffer(self) < 0) {
   1461                 return -1;
   1462             }
   1463         }
   1464         /* free existing buffer */
   1465         PyMem_Free(self->buffer);
   1466       }
   1467       self->buffer = PyMem_Malloc(new_buffer_size);
   1468       if (self->buffer == NULL) {
   1469         PyErr_NoMemory();
   1470         return -1;
   1471       }
   1472       self->buffer_size = new_buffer_size;
   1473       return 0;
   1474     }
   1475 
   1476     if (_PyUnicode_EqualToASCIIString(name, "CharacterDataHandler")) {
   1477         /* If we're changing the character data handler, flush all
   1478          * cached data with the old handler.  Not sure there's a
   1479          * "right" thing to do, though, but this probably won't
   1480          * happen.
   1481          */
   1482         if (flush_character_buffer(self) < 0)
   1483             return -1;
   1484     }
   1485     if (sethandler(self, name, v)) {
   1486         return 0;
   1487     }
   1488     PyErr_SetObject(PyExc_AttributeError, name);
   1489     return -1;
   1490 }
   1491 
   1492 static int
   1493 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
   1494 {
   1495     int i;
   1496     for (i = 0; handler_info[i].name != NULL; i++)
   1497         Py_VISIT(op->handlers[i]);
   1498     return 0;
   1499 }
   1500 
   1501 static int
   1502 xmlparse_clear(xmlparseobject *op)
   1503 {
   1504     clear_handlers(op, 0);
   1505     Py_CLEAR(op->intern);
   1506     return 0;
   1507 }
   1508 
   1509 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
   1510 
   1511 static PyTypeObject Xmlparsetype = {
   1512         PyVarObject_HEAD_INIT(NULL, 0)
   1513         "pyexpat.xmlparser",            /*tp_name*/
   1514         sizeof(xmlparseobject),         /*tp_basicsize*/
   1515         0,                              /*tp_itemsize*/
   1516         /* methods */
   1517         (destructor)xmlparse_dealloc,   /*tp_dealloc*/
   1518         (printfunc)0,           /*tp_print*/
   1519         0,                      /*tp_getattr*/
   1520         0,  /*tp_setattr*/
   1521         0,                      /*tp_reserved*/
   1522         (reprfunc)0,            /*tp_repr*/
   1523         0,                      /*tp_as_number*/
   1524         0,              /*tp_as_sequence*/
   1525         0,              /*tp_as_mapping*/
   1526         (hashfunc)0,            /*tp_hash*/
   1527         (ternaryfunc)0,         /*tp_call*/
   1528         (reprfunc)0,            /*tp_str*/
   1529         (getattrofunc)xmlparse_getattro, /* tp_getattro */
   1530         (setattrofunc)xmlparse_setattro,              /* tp_setattro */
   1531         0,              /* tp_as_buffer */
   1532         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
   1533         Xmlparsetype__doc__, /* tp_doc - Documentation string */
   1534         (traverseproc)xmlparse_traverse,        /* tp_traverse */
   1535         (inquiry)xmlparse_clear,                /* tp_clear */
   1536         0,                              /* tp_richcompare */
   1537         0,                              /* tp_weaklistoffset */
   1538         0,                              /* tp_iter */
   1539         0,                              /* tp_iternext */
   1540         xmlparse_methods,               /* tp_methods */
   1541 };
   1542 
   1543 /* End of code for xmlparser objects */
   1544 /* -------------------------------------------------------- */
   1545 
   1546 /*[clinic input]
   1547 pyexpat.ParserCreate
   1548 
   1549     encoding: str(accept={str, NoneType}) = NULL
   1550     namespace_separator: str(accept={str, NoneType}) = NULL
   1551     intern: object = NULL
   1552 
   1553 Return a new XML parser object.
   1554 [clinic start generated code]*/
   1555 
   1556 static PyObject *
   1557 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
   1558                           const char *namespace_separator, PyObject *intern)
   1559 /*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/
   1560 {
   1561     PyObject *result;
   1562     int intern_decref = 0;
   1563 
   1564     if (namespace_separator != NULL
   1565         && strlen(namespace_separator) > 1) {
   1566         PyErr_SetString(PyExc_ValueError,
   1567                         "namespace_separator must be at most one"
   1568                         " character, omitted, or None");
   1569         return NULL;
   1570     }
   1571     /* Explicitly passing None means no interning is desired.
   1572        Not passing anything means that a new dictionary is used. */
   1573     if (intern == Py_None)
   1574         intern = NULL;
   1575     else if (intern == NULL) {
   1576         intern = PyDict_New();
   1577         if (!intern)
   1578             return NULL;
   1579         intern_decref = 1;
   1580     }
   1581     else if (!PyDict_Check(intern)) {
   1582         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
   1583         return NULL;
   1584     }
   1585 
   1586     result = newxmlparseobject(encoding, namespace_separator, intern);
   1587     if (intern_decref) {
   1588         Py_DECREF(intern);
   1589     }
   1590     return result;
   1591 }
   1592 
   1593 /*[clinic input]
   1594 pyexpat.ErrorString
   1595 
   1596     code: long
   1597     /
   1598 
   1599 Returns string error for given number.
   1600 [clinic start generated code]*/
   1601 
   1602 static PyObject *
   1603 pyexpat_ErrorString_impl(PyObject *module, long code)
   1604 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
   1605 {
   1606     return Py_BuildValue("z", XML_ErrorString((int)code));
   1607 }
   1608 
   1609 /* List of methods defined in the module */
   1610 
   1611 static struct PyMethodDef pyexpat_methods[] = {
   1612     PYEXPAT_PARSERCREATE_METHODDEF
   1613     PYEXPAT_ERRORSTRING_METHODDEF
   1614     {NULL, NULL}  /* sentinel */
   1615 };
   1616 
   1617 /* Module docstring */
   1618 
   1619 PyDoc_STRVAR(pyexpat_module_documentation,
   1620 "Python wrapper for Expat parser.");
   1621 
   1622 /* Initialization function for the module */
   1623 
   1624 #ifndef MODULE_NAME
   1625 #define MODULE_NAME "pyexpat"
   1626 #endif
   1627 
   1628 #ifndef MODULE_INITFUNC
   1629 #define MODULE_INITFUNC PyInit_pyexpat
   1630 #endif
   1631 
   1632 static struct PyModuleDef pyexpatmodule = {
   1633         PyModuleDef_HEAD_INIT,
   1634         MODULE_NAME,
   1635         pyexpat_module_documentation,
   1636         -1,
   1637         pyexpat_methods,
   1638         NULL,
   1639         NULL,
   1640         NULL,
   1641         NULL
   1642 };
   1643 
   1644 PyMODINIT_FUNC
   1645 MODULE_INITFUNC(void)
   1646 {
   1647     PyObject *m, *d;
   1648     PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
   1649     PyObject *errors_module;
   1650     PyObject *modelmod_name;
   1651     PyObject *model_module;
   1652     PyObject *sys_modules;
   1653     PyObject *tmpnum, *tmpstr;
   1654     PyObject *codes_dict;
   1655     PyObject *rev_codes_dict;
   1656     int res;
   1657     static struct PyExpat_CAPI capi;
   1658     PyObject *capi_object;
   1659 
   1660     if (errmod_name == NULL)
   1661         return NULL;
   1662     modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
   1663     if (modelmod_name == NULL)
   1664         return NULL;
   1665 
   1666     if (PyType_Ready(&Xmlparsetype) < 0)
   1667         return NULL;
   1668 
   1669     /* Create the module and add the functions */
   1670     m = PyModule_Create(&pyexpatmodule);
   1671     if (m == NULL)
   1672         return NULL;
   1673 
   1674     /* Add some symbolic constants to the module */
   1675     if (ErrorObject == NULL) {
   1676         ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
   1677                                          NULL, NULL);
   1678         if (ErrorObject == NULL)
   1679             return NULL;
   1680     }
   1681     Py_INCREF(ErrorObject);
   1682     PyModule_AddObject(m, "error", ErrorObject);
   1683     Py_INCREF(ErrorObject);
   1684     PyModule_AddObject(m, "ExpatError", ErrorObject);
   1685     Py_INCREF(&Xmlparsetype);
   1686     PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
   1687 
   1688     PyModule_AddStringConstant(m, "EXPAT_VERSION",
   1689                                XML_ExpatVersion());
   1690     {
   1691         XML_Expat_Version info = XML_ExpatVersionInfo();
   1692         PyModule_AddObject(m, "version_info",
   1693                            Py_BuildValue("(iii)", info.major,
   1694                                          info.minor, info.micro));
   1695     }
   1696     /* XXX When Expat supports some way of figuring out how it was
   1697        compiled, this should check and set native_encoding
   1698        appropriately.
   1699     */
   1700     PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
   1701 
   1702     sys_modules = PySys_GetObject("modules");
   1703     if (sys_modules == NULL) {
   1704         Py_DECREF(m);
   1705         return NULL;
   1706     }
   1707     d = PyModule_GetDict(m);
   1708     if (d == NULL) {
   1709         Py_DECREF(m);
   1710         return NULL;
   1711     }
   1712     errors_module = PyDict_GetItem(d, errmod_name);
   1713     if (errors_module == NULL) {
   1714         errors_module = PyModule_New(MODULE_NAME ".errors");
   1715         if (errors_module != NULL) {
   1716             PyDict_SetItem(sys_modules, errmod_name, errors_module);
   1717             /* gives away the reference to errors_module */
   1718             PyModule_AddObject(m, "errors", errors_module);
   1719         }
   1720     }
   1721     Py_DECREF(errmod_name);
   1722     model_module = PyDict_GetItem(d, modelmod_name);
   1723     if (model_module == NULL) {
   1724         model_module = PyModule_New(MODULE_NAME ".model");
   1725         if (model_module != NULL) {
   1726             PyDict_SetItem(sys_modules, modelmod_name, model_module);
   1727             /* gives away the reference to model_module */
   1728             PyModule_AddObject(m, "model", model_module);
   1729         }
   1730     }
   1731     Py_DECREF(modelmod_name);
   1732     if (errors_module == NULL || model_module == NULL) {
   1733         /* Don't core dump later! */
   1734         Py_DECREF(m);
   1735         return NULL;
   1736     }
   1737 
   1738 #if XML_COMBINED_VERSION > 19505
   1739     {
   1740         const XML_Feature *features = XML_GetFeatureList();
   1741         PyObject *list = PyList_New(0);
   1742         if (list == NULL)
   1743             /* just ignore it */
   1744             PyErr_Clear();
   1745         else {
   1746             int i = 0;
   1747             for (; features[i].feature != XML_FEATURE_END; ++i) {
   1748                 int ok;
   1749                 PyObject *item = Py_BuildValue("si", features[i].name,
   1750                                                features[i].value);
   1751                 if (item == NULL) {
   1752                     Py_DECREF(list);
   1753                     list = NULL;
   1754                     break;
   1755                 }
   1756                 ok = PyList_Append(list, item);
   1757                 Py_DECREF(item);
   1758                 if (ok < 0) {
   1759                     PyErr_Clear();
   1760                     break;
   1761                 }
   1762             }
   1763             if (list != NULL)
   1764                 PyModule_AddObject(m, "features", list);
   1765         }
   1766     }
   1767 #endif
   1768 
   1769     codes_dict = PyDict_New();
   1770     rev_codes_dict = PyDict_New();
   1771     if (codes_dict == NULL || rev_codes_dict == NULL) {
   1772         Py_XDECREF(codes_dict);
   1773         Py_XDECREF(rev_codes_dict);
   1774         return NULL;
   1775     }
   1776 
   1777 #define MYCONST(name) \
   1778     if (PyModule_AddStringConstant(errors_module, #name,               \
   1779                                    XML_ErrorString(name)) < 0)         \
   1780         return NULL;                                                   \
   1781     tmpnum = PyLong_FromLong(name);                                    \
   1782     if (tmpnum == NULL) return NULL;                                   \
   1783     res = PyDict_SetItemString(codes_dict,                             \
   1784                                XML_ErrorString(name), tmpnum);         \
   1785     if (res < 0) return NULL;                                          \
   1786     tmpstr = PyUnicode_FromString(XML_ErrorString(name));              \
   1787     if (tmpstr == NULL) return NULL;                                   \
   1788     res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr);              \
   1789     Py_DECREF(tmpstr);                                                 \
   1790     Py_DECREF(tmpnum);                                                 \
   1791     if (res < 0) return NULL;                                          \
   1792 
   1793     MYCONST(XML_ERROR_NO_MEMORY);
   1794     MYCONST(XML_ERROR_SYNTAX);
   1795     MYCONST(XML_ERROR_NO_ELEMENTS);
   1796     MYCONST(XML_ERROR_INVALID_TOKEN);
   1797     MYCONST(XML_ERROR_UNCLOSED_TOKEN);
   1798     MYCONST(XML_ERROR_PARTIAL_CHAR);
   1799     MYCONST(XML_ERROR_TAG_MISMATCH);
   1800     MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
   1801     MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
   1802     MYCONST(XML_ERROR_PARAM_ENTITY_REF);
   1803     MYCONST(XML_ERROR_UNDEFINED_ENTITY);
   1804     MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
   1805     MYCONST(XML_ERROR_ASYNC_ENTITY);
   1806     MYCONST(XML_ERROR_BAD_CHAR_REF);
   1807     MYCONST(XML_ERROR_BINARY_ENTITY_REF);
   1808     MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
   1809     MYCONST(XML_ERROR_MISPLACED_XML_PI);
   1810     MYCONST(XML_ERROR_UNKNOWN_ENCODING);
   1811     MYCONST(XML_ERROR_INCORRECT_ENCODING);
   1812     MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
   1813     MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
   1814     MYCONST(XML_ERROR_NOT_STANDALONE);
   1815     MYCONST(XML_ERROR_UNEXPECTED_STATE);
   1816     MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
   1817     MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
   1818     MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
   1819     /* Added in Expat 1.95.7. */
   1820     MYCONST(XML_ERROR_UNBOUND_PREFIX);
   1821     /* Added in Expat 1.95.8. */
   1822     MYCONST(XML_ERROR_UNDECLARING_PREFIX);
   1823     MYCONST(XML_ERROR_INCOMPLETE_PE);
   1824     MYCONST(XML_ERROR_XML_DECL);
   1825     MYCONST(XML_ERROR_TEXT_DECL);
   1826     MYCONST(XML_ERROR_PUBLICID);
   1827     MYCONST(XML_ERROR_SUSPENDED);
   1828     MYCONST(XML_ERROR_NOT_SUSPENDED);
   1829     MYCONST(XML_ERROR_ABORTED);
   1830     MYCONST(XML_ERROR_FINISHED);
   1831     MYCONST(XML_ERROR_SUSPEND_PE);
   1832 
   1833     if (PyModule_AddStringConstant(errors_module, "__doc__",
   1834                                    "Constants used to describe "
   1835                                    "error conditions.") < 0)
   1836         return NULL;
   1837 
   1838     if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
   1839         return NULL;
   1840     if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
   1841         return NULL;
   1842 
   1843 #undef MYCONST
   1844 
   1845 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
   1846     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
   1847     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
   1848     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
   1849 #undef MYCONST
   1850 
   1851 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
   1852     PyModule_AddStringConstant(model_module, "__doc__",
   1853                      "Constants used to interpret content model information.");
   1854 
   1855     MYCONST(XML_CTYPE_EMPTY);
   1856     MYCONST(XML_CTYPE_ANY);
   1857     MYCONST(XML_CTYPE_MIXED);
   1858     MYCONST(XML_CTYPE_NAME);
   1859     MYCONST(XML_CTYPE_CHOICE);
   1860     MYCONST(XML_CTYPE_SEQ);
   1861 
   1862     MYCONST(XML_CQUANT_NONE);
   1863     MYCONST(XML_CQUANT_OPT);
   1864     MYCONST(XML_CQUANT_REP);
   1865     MYCONST(XML_CQUANT_PLUS);
   1866 #undef MYCONST
   1867 
   1868     /* initialize pyexpat dispatch table */
   1869     capi.size = sizeof(capi);
   1870     capi.magic = PyExpat_CAPI_MAGIC;
   1871     capi.MAJOR_VERSION = XML_MAJOR_VERSION;
   1872     capi.MINOR_VERSION = XML_MINOR_VERSION;
   1873     capi.MICRO_VERSION = XML_MICRO_VERSION;
   1874     capi.ErrorString = XML_ErrorString;
   1875     capi.GetErrorCode = XML_GetErrorCode;
   1876     capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
   1877     capi.GetErrorLineNumber = XML_GetErrorLineNumber;
   1878     capi.Parse = XML_Parse;
   1879     capi.ParserCreate_MM = XML_ParserCreate_MM;
   1880     capi.ParserFree = XML_ParserFree;
   1881     capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
   1882     capi.SetCommentHandler = XML_SetCommentHandler;
   1883     capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
   1884     capi.SetElementHandler = XML_SetElementHandler;
   1885     capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
   1886     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
   1887     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
   1888     capi.SetUserData = XML_SetUserData;
   1889     capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
   1890     capi.SetEncoding = XML_SetEncoding;
   1891     capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
   1892 
   1893     /* export using capsule */
   1894     capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
   1895     if (capi_object)
   1896         PyModule_AddObject(m, "expat_CAPI", capi_object);
   1897     return m;
   1898 }
   1899 
   1900 static void
   1901 clear_handlers(xmlparseobject *self, int initial)
   1902 {
   1903     int i = 0;
   1904 
   1905     for (; handler_info[i].name != NULL; i++) {
   1906         if (initial)
   1907             self->handlers[i] = NULL;
   1908         else {
   1909             Py_CLEAR(self->handlers[i]);
   1910             handler_info[i].setter(self->itself, NULL);
   1911         }
   1912     }
   1913 }
   1914 
   1915 static struct HandlerInfo handler_info[] = {
   1916     {"StartElementHandler",
   1917      (xmlhandlersetter)XML_SetStartElementHandler,
   1918      (xmlhandler)my_StartElementHandler},
   1919     {"EndElementHandler",
   1920      (xmlhandlersetter)XML_SetEndElementHandler,
   1921      (xmlhandler)my_EndElementHandler},
   1922     {"ProcessingInstructionHandler",
   1923      (xmlhandlersetter)XML_SetProcessingInstructionHandler,
   1924      (xmlhandler)my_ProcessingInstructionHandler},
   1925     {"CharacterDataHandler",
   1926      (xmlhandlersetter)XML_SetCharacterDataHandler,
   1927      (xmlhandler)my_CharacterDataHandler},
   1928     {"UnparsedEntityDeclHandler",
   1929      (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
   1930      (xmlhandler)my_UnparsedEntityDeclHandler},
   1931     {"NotationDeclHandler",
   1932      (xmlhandlersetter)XML_SetNotationDeclHandler,
   1933      (xmlhandler)my_NotationDeclHandler},
   1934     {"StartNamespaceDeclHandler",
   1935      (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
   1936      (xmlhandler)my_StartNamespaceDeclHandler},
   1937     {"EndNamespaceDeclHandler",
   1938      (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
   1939      (xmlhandler)my_EndNamespaceDeclHandler},
   1940     {"CommentHandler",
   1941      (xmlhandlersetter)XML_SetCommentHandler,
   1942      (xmlhandler)my_CommentHandler},
   1943     {"StartCdataSectionHandler",
   1944      (xmlhandlersetter)XML_SetStartCdataSectionHandler,
   1945      (xmlhandler)my_StartCdataSectionHandler},
   1946     {"EndCdataSectionHandler",
   1947      (xmlhandlersetter)XML_SetEndCdataSectionHandler,
   1948      (xmlhandler)my_EndCdataSectionHandler},
   1949     {"DefaultHandler",
   1950      (xmlhandlersetter)XML_SetDefaultHandler,
   1951      (xmlhandler)my_DefaultHandler},
   1952     {"DefaultHandlerExpand",
   1953      (xmlhandlersetter)XML_SetDefaultHandlerExpand,
   1954      (xmlhandler)my_DefaultHandlerExpandHandler},
   1955     {"NotStandaloneHandler",
   1956      (xmlhandlersetter)XML_SetNotStandaloneHandler,
   1957      (xmlhandler)my_NotStandaloneHandler},
   1958     {"ExternalEntityRefHandler",
   1959      (xmlhandlersetter)XML_SetExternalEntityRefHandler,
   1960      (xmlhandler)my_ExternalEntityRefHandler},
   1961     {"StartDoctypeDeclHandler",
   1962      (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
   1963      (xmlhandler)my_StartDoctypeDeclHandler},
   1964     {"EndDoctypeDeclHandler",
   1965      (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
   1966      (xmlhandler)my_EndDoctypeDeclHandler},
   1967     {"EntityDeclHandler",
   1968      (xmlhandlersetter)XML_SetEntityDeclHandler,
   1969      (xmlhandler)my_EntityDeclHandler},
   1970     {"XmlDeclHandler",
   1971      (xmlhandlersetter)XML_SetXmlDeclHandler,
   1972      (xmlhandler)my_XmlDeclHandler},
   1973     {"ElementDeclHandler",
   1974      (xmlhandlersetter)XML_SetElementDeclHandler,
   1975      (xmlhandler)my_ElementDeclHandler},
   1976     {"AttlistDeclHandler",
   1977      (xmlhandlersetter)XML_SetAttlistDeclHandler,
   1978      (xmlhandler)my_AttlistDeclHandler},
   1979 #if XML_COMBINED_VERSION >= 19504
   1980     {"SkippedEntityHandler",
   1981      (xmlhandlersetter)XML_SetSkippedEntityHandler,
   1982      (xmlhandler)my_SkippedEntityHandler},
   1983 #endif
   1984 
   1985     {NULL, NULL, NULL} /* sentinel */
   1986 };
   1987 
   1988 /*[clinic input]
   1989 dump buffer
   1990 [clinic start generated code]*/
   1991 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=524ce2e021e4eba6]*/
   1992