1 #include "Python.h" 2 #include <ctype.h> 3 4 #include "frameobject.h" 5 #include "expat.h" 6 7 #include "pyexpat.h" 8 9 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally 10 included methods. */ 11 /*[clinic input] 12 module pyexpat 13 [clinic start generated code]*/ 14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/ 15 16 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) 17 18 static XML_Memory_Handling_Suite ExpatMemoryHandler = { 19 PyObject_Malloc, PyObject_Realloc, PyObject_Free}; 20 21 enum HandlerTypes { 22 StartElement, 23 EndElement, 24 ProcessingInstruction, 25 CharacterData, 26 UnparsedEntityDecl, 27 NotationDecl, 28 StartNamespaceDecl, 29 EndNamespaceDecl, 30 Comment, 31 StartCdataSection, 32 EndCdataSection, 33 Default, 34 DefaultHandlerExpand, 35 NotStandalone, 36 ExternalEntityRef, 37 StartDoctypeDecl, 38 EndDoctypeDecl, 39 EntityDecl, 40 XmlDecl, 41 ElementDecl, 42 AttlistDecl, 43 #if XML_COMBINED_VERSION >= 19504 44 SkippedEntity, 45 #endif 46 _DummyDecl 47 }; 48 49 static PyObject *ErrorObject; 50 51 /* ----------------------------------------------------- */ 52 53 /* Declarations for objects of type xmlparser */ 54 55 typedef struct { 56 PyObject_HEAD 57 58 XML_Parser itself; 59 int ordered_attributes; /* Return attributes as a list. */ 60 int specified_attributes; /* Report only specified attributes. */ 61 int in_callback; /* Is a callback active? */ 62 int ns_prefixes; /* Namespace-triplets mode? */ 63 XML_Char *buffer; /* Buffer used when accumulating characters */ 64 /* NULL if not enabled */ 65 int buffer_size; /* Size of buffer, in XML_Char units */ 66 int buffer_used; /* Buffer units in use */ 67 PyObject *intern; /* Dictionary to intern strings */ 68 PyObject **handlers; 69 } xmlparseobject; 70 71 #include "clinic/pyexpat.c.h" 72 73 #define CHARACTER_DATA_BUFFER_SIZE 8192 74 75 static PyTypeObject Xmlparsetype; 76 77 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth); 78 typedef void* xmlhandler; 79 80 struct HandlerInfo { 81 const char *name; 82 xmlhandlersetter setter; 83 xmlhandler handler; 84 PyCodeObject *tb_code; 85 PyObject *nameobj; 86 }; 87 88 static struct HandlerInfo handler_info[64]; 89 90 /* Set an integer attribute on the error object; return true on success, 91 * false on an exception. 92 */ 93 static int 94 set_error_attr(PyObject *err, const char *name, int value) 95 { 96 PyObject *v = PyLong_FromLong(value); 97 98 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { 99 Py_XDECREF(v); 100 return 0; 101 } 102 Py_DECREF(v); 103 return 1; 104 } 105 106 /* Build and set an Expat exception, including positioning 107 * information. Always returns NULL. 108 */ 109 static PyObject * 110 set_error(xmlparseobject *self, enum XML_Error code) 111 { 112 PyObject *err; 113 PyObject *buffer; 114 XML_Parser parser = self->itself; 115 int lineno = XML_GetErrorLineNumber(parser); 116 int column = XML_GetErrorColumnNumber(parser); 117 118 buffer = PyUnicode_FromFormat("%s: line %i, column %i", 119 XML_ErrorString(code), lineno, column); 120 if (buffer == NULL) 121 return NULL; 122 err = PyObject_CallFunctionObjArgs(ErrorObject, buffer, NULL); 123 Py_DECREF(buffer); 124 if ( err != NULL 125 && set_error_attr(err, "code", code) 126 && set_error_attr(err, "offset", column) 127 && set_error_attr(err, "lineno", lineno)) { 128 PyErr_SetObject(ErrorObject, err); 129 } 130 Py_XDECREF(err); 131 return NULL; 132 } 133 134 static int 135 have_handler(xmlparseobject *self, int type) 136 { 137 PyObject *handler = self->handlers[type]; 138 return handler != NULL; 139 } 140 141 static PyObject * 142 get_handler_name(struct HandlerInfo *hinfo) 143 { 144 PyObject *name = hinfo->nameobj; 145 if (name == NULL) { 146 name = PyUnicode_FromString(hinfo->name); 147 hinfo->nameobj = name; 148 } 149 Py_XINCREF(name); 150 return name; 151 } 152 153 154 /* Convert a string of XML_Chars into a Unicode string. 155 Returns None if str is a null pointer. */ 156 157 static PyObject * 158 conv_string_to_unicode(const XML_Char *str) 159 { 160 /* XXX currently this code assumes that XML_Char is 8-bit, 161 and hence in UTF-8. */ 162 /* UTF-8 from Expat, Unicode desired */ 163 if (str == NULL) { 164 Py_RETURN_NONE; 165 } 166 return PyUnicode_DecodeUTF8(str, strlen(str), "strict"); 167 } 168 169 static PyObject * 170 conv_string_len_to_unicode(const XML_Char *str, int len) 171 { 172 /* XXX currently this code assumes that XML_Char is 8-bit, 173 and hence in UTF-8. */ 174 /* UTF-8 from Expat, Unicode desired */ 175 if (str == NULL) { 176 Py_RETURN_NONE; 177 } 178 return PyUnicode_DecodeUTF8((const char *)str, len, "strict"); 179 } 180 181 /* Callback routines */ 182 183 static void clear_handlers(xmlparseobject *self, int initial); 184 185 /* This handler is used when an error has been detected, in the hope 186 that actual parsing can be terminated early. This will only help 187 if an external entity reference is encountered. */ 188 static int 189 error_external_entity_ref_handler(XML_Parser parser, 190 const XML_Char *context, 191 const XML_Char *base, 192 const XML_Char *systemId, 193 const XML_Char *publicId) 194 { 195 return 0; 196 } 197 198 /* Dummy character data handler used when an error (exception) has 199 been detected, and the actual parsing can be terminated early. 200 This is needed since character data handler can't be safely removed 201 from within the character data handler, but can be replaced. It is 202 used only from the character data handler trampoline, and must be 203 used right after `flag_error()` is called. */ 204 static void 205 noop_character_data_handler(void *userData, const XML_Char *data, int len) 206 { 207 /* Do nothing. */ 208 } 209 210 static void 211 flag_error(xmlparseobject *self) 212 { 213 clear_handlers(self, 0); 214 XML_SetExternalEntityRefHandler(self->itself, 215 error_external_entity_ref_handler); 216 } 217 218 static PyObject* 219 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args, 220 xmlparseobject *self) 221 { 222 PyObject *res; 223 224 res = PyEval_CallObject(func, args); 225 if (res == NULL) { 226 _PyTraceback_Add(funcname, __FILE__, lineno); 227 XML_StopParser(self->itself, XML_FALSE); 228 } 229 return res; 230 } 231 232 static PyObject* 233 string_intern(xmlparseobject *self, const char* str) 234 { 235 PyObject *result = conv_string_to_unicode(str); 236 PyObject *value; 237 /* result can be NULL if the unicode conversion failed. */ 238 if (!result) 239 return result; 240 if (!self->intern) 241 return result; 242 value = PyDict_GetItem(self->intern, result); 243 if (!value) { 244 if (PyDict_SetItem(self->intern, result, result) == 0) 245 return result; 246 else { 247 Py_DECREF(result); 248 return NULL; 249 } 250 } 251 Py_INCREF(value); 252 Py_DECREF(result); 253 return value; 254 } 255 256 /* Return 0 on success, -1 on exception. 257 * flag_error() will be called before return if needed. 258 */ 259 static int 260 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len) 261 { 262 PyObject *args; 263 PyObject *temp; 264 265 if (!have_handler(self, CharacterData)) 266 return -1; 267 268 args = PyTuple_New(1); 269 if (args == NULL) 270 return -1; 271 temp = (conv_string_len_to_unicode(buffer, len)); 272 if (temp == NULL) { 273 Py_DECREF(args); 274 flag_error(self); 275 XML_SetCharacterDataHandler(self->itself, 276 noop_character_data_handler); 277 return -1; 278 } 279 PyTuple_SET_ITEM(args, 0, temp); 280 /* temp is now a borrowed reference; consider it unused. */ 281 self->in_callback = 1; 282 temp = call_with_frame("CharacterData", __LINE__, 283 self->handlers[CharacterData], args, self); 284 /* temp is an owned reference again, or NULL */ 285 self->in_callback = 0; 286 Py_DECREF(args); 287 if (temp == NULL) { 288 flag_error(self); 289 XML_SetCharacterDataHandler(self->itself, 290 noop_character_data_handler); 291 return -1; 292 } 293 Py_DECREF(temp); 294 return 0; 295 } 296 297 static int 298 flush_character_buffer(xmlparseobject *self) 299 { 300 int rc; 301 if (self->buffer == NULL || self->buffer_used == 0) 302 return 0; 303 rc = call_character_handler(self, self->buffer, self->buffer_used); 304 self->buffer_used = 0; 305 return rc; 306 } 307 308 static void 309 my_CharacterDataHandler(void *userData, const XML_Char *data, int len) 310 { 311 xmlparseobject *self = (xmlparseobject *) userData; 312 313 if (PyErr_Occurred()) 314 return; 315 316 if (self->buffer == NULL) 317 call_character_handler(self, data, len); 318 else { 319 if ((self->buffer_used + len) > self->buffer_size) { 320 if (flush_character_buffer(self) < 0) 321 return; 322 /* handler might have changed; drop the rest on the floor 323 * if there isn't a handler anymore 324 */ 325 if (!have_handler(self, CharacterData)) 326 return; 327 } 328 if (len > self->buffer_size) { 329 call_character_handler(self, data, len); 330 self->buffer_used = 0; 331 } 332 else { 333 memcpy(self->buffer + self->buffer_used, 334 data, len * sizeof(XML_Char)); 335 self->buffer_used += len; 336 } 337 } 338 } 339 340 static void 341 my_StartElementHandler(void *userData, 342 const XML_Char *name, const XML_Char *atts[]) 343 { 344 xmlparseobject *self = (xmlparseobject *)userData; 345 346 if (have_handler(self, StartElement)) { 347 PyObject *container, *rv, *args; 348 int i, max; 349 350 if (PyErr_Occurred()) 351 return; 352 353 if (flush_character_buffer(self) < 0) 354 return; 355 /* Set max to the number of slots filled in atts[]; max/2 is 356 * the number of attributes we need to process. 357 */ 358 if (self->specified_attributes) { 359 max = XML_GetSpecifiedAttributeCount(self->itself); 360 } 361 else { 362 max = 0; 363 while (atts[max] != NULL) 364 max += 2; 365 } 366 /* Build the container. */ 367 if (self->ordered_attributes) 368 container = PyList_New(max); 369 else 370 container = PyDict_New(); 371 if (container == NULL) { 372 flag_error(self); 373 return; 374 } 375 for (i = 0; i < max; i += 2) { 376 PyObject *n = string_intern(self, (XML_Char *) atts[i]); 377 PyObject *v; 378 if (n == NULL) { 379 flag_error(self); 380 Py_DECREF(container); 381 return; 382 } 383 v = conv_string_to_unicode((XML_Char *) atts[i+1]); 384 if (v == NULL) { 385 flag_error(self); 386 Py_DECREF(container); 387 Py_DECREF(n); 388 return; 389 } 390 if (self->ordered_attributes) { 391 PyList_SET_ITEM(container, i, n); 392 PyList_SET_ITEM(container, i+1, v); 393 } 394 else if (PyDict_SetItem(container, n, v)) { 395 flag_error(self); 396 Py_DECREF(n); 397 Py_DECREF(v); 398 Py_DECREF(container); 399 return; 400 } 401 else { 402 Py_DECREF(n); 403 Py_DECREF(v); 404 } 405 } 406 args = string_intern(self, name); 407 if (args == NULL) { 408 Py_DECREF(container); 409 return; 410 } 411 args = Py_BuildValue("(NN)", args, container); 412 if (args == NULL) { 413 return; 414 } 415 /* Container is now a borrowed reference; ignore it. */ 416 self->in_callback = 1; 417 rv = call_with_frame("StartElement", __LINE__, 418 self->handlers[StartElement], args, self); 419 self->in_callback = 0; 420 Py_DECREF(args); 421 if (rv == NULL) { 422 flag_error(self); 423 return; 424 } 425 Py_DECREF(rv); 426 } 427 } 428 429 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \ 430 RETURN, GETUSERDATA) \ 431 static RC \ 432 my_##NAME##Handler PARAMS {\ 433 xmlparseobject *self = GETUSERDATA ; \ 434 PyObject *args = NULL; \ 435 PyObject *rv = NULL; \ 436 INIT \ 437 \ 438 if (have_handler(self, NAME)) { \ 439 if (PyErr_Occurred()) \ 440 return RETURN; \ 441 if (flush_character_buffer(self) < 0) \ 442 return RETURN; \ 443 args = Py_BuildValue PARAM_FORMAT ;\ 444 if (!args) { flag_error(self); return RETURN;} \ 445 self->in_callback = 1; \ 446 rv = call_with_frame(#NAME,__LINE__, \ 447 self->handlers[NAME], args, self); \ 448 self->in_callback = 0; \ 449 Py_DECREF(args); \ 450 if (rv == NULL) { \ 451 flag_error(self); \ 452 return RETURN; \ 453 } \ 454 CONVERSION \ 455 Py_DECREF(rv); \ 456 } \ 457 return RETURN; \ 458 } 459 460 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \ 461 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\ 462 (xmlparseobject *)userData) 463 464 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\ 465 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \ 466 rc = PyLong_AsLong(rv);, rc, \ 467 (xmlparseobject *)userData) 468 469 VOID_HANDLER(EndElement, 470 (void *userData, const XML_Char *name), 471 ("(N)", string_intern(self, name))) 472 473 VOID_HANDLER(ProcessingInstruction, 474 (void *userData, 475 const XML_Char *target, 476 const XML_Char *data), 477 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data)) 478 479 VOID_HANDLER(UnparsedEntityDecl, 480 (void *userData, 481 const XML_Char *entityName, 482 const XML_Char *base, 483 const XML_Char *systemId, 484 const XML_Char *publicId, 485 const XML_Char *notationName), 486 ("(NNNNN)", 487 string_intern(self, entityName), string_intern(self, base), 488 string_intern(self, systemId), string_intern(self, publicId), 489 string_intern(self, notationName))) 490 491 VOID_HANDLER(EntityDecl, 492 (void *userData, 493 const XML_Char *entityName, 494 int is_parameter_entity, 495 const XML_Char *value, 496 int value_length, 497 const XML_Char *base, 498 const XML_Char *systemId, 499 const XML_Char *publicId, 500 const XML_Char *notationName), 501 ("NiNNNNN", 502 string_intern(self, entityName), is_parameter_entity, 503 (conv_string_len_to_unicode(value, value_length)), 504 string_intern(self, base), string_intern(self, systemId), 505 string_intern(self, publicId), 506 string_intern(self, notationName))) 507 508 VOID_HANDLER(XmlDecl, 509 (void *userData, 510 const XML_Char *version, 511 const XML_Char *encoding, 512 int standalone), 513 ("(O&O&i)", 514 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding, 515 standalone)) 516 517 static PyObject * 518 conv_content_model(XML_Content * const model, 519 PyObject *(*conv_string)(const XML_Char *)) 520 { 521 PyObject *result = NULL; 522 PyObject *children = PyTuple_New(model->numchildren); 523 int i; 524 525 if (children != NULL) { 526 assert(model->numchildren < INT_MAX); 527 for (i = 0; i < (int)model->numchildren; ++i) { 528 PyObject *child = conv_content_model(&model->children[i], 529 conv_string); 530 if (child == NULL) { 531 Py_XDECREF(children); 532 return NULL; 533 } 534 PyTuple_SET_ITEM(children, i, child); 535 } 536 result = Py_BuildValue("(iiO&N)", 537 model->type, model->quant, 538 conv_string,model->name, children); 539 } 540 return result; 541 } 542 543 static void 544 my_ElementDeclHandler(void *userData, 545 const XML_Char *name, 546 XML_Content *model) 547 { 548 xmlparseobject *self = (xmlparseobject *)userData; 549 PyObject *args = NULL; 550 551 if (have_handler(self, ElementDecl)) { 552 PyObject *rv = NULL; 553 PyObject *modelobj, *nameobj; 554 555 if (PyErr_Occurred()) 556 return; 557 558 if (flush_character_buffer(self) < 0) 559 goto finally; 560 modelobj = conv_content_model(model, (conv_string_to_unicode)); 561 if (modelobj == NULL) { 562 flag_error(self); 563 goto finally; 564 } 565 nameobj = string_intern(self, name); 566 if (nameobj == NULL) { 567 Py_DECREF(modelobj); 568 flag_error(self); 569 goto finally; 570 } 571 args = Py_BuildValue("NN", nameobj, modelobj); 572 if (args == NULL) { 573 flag_error(self); 574 goto finally; 575 } 576 self->in_callback = 1; 577 rv = call_with_frame("ElementDecl", __LINE__, 578 self->handlers[ElementDecl], args, self); 579 self->in_callback = 0; 580 if (rv == NULL) { 581 flag_error(self); 582 goto finally; 583 } 584 Py_DECREF(rv); 585 } 586 finally: 587 Py_XDECREF(args); 588 XML_FreeContentModel(self->itself, model); 589 return; 590 } 591 592 VOID_HANDLER(AttlistDecl, 593 (void *userData, 594 const XML_Char *elname, 595 const XML_Char *attname, 596 const XML_Char *att_type, 597 const XML_Char *dflt, 598 int isrequired), 599 ("(NNO&O&i)", 600 string_intern(self, elname), string_intern(self, attname), 601 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt, 602 isrequired)) 603 604 #if XML_COMBINED_VERSION >= 19504 605 VOID_HANDLER(SkippedEntity, 606 (void *userData, 607 const XML_Char *entityName, 608 int is_parameter_entity), 609 ("Ni", 610 string_intern(self, entityName), is_parameter_entity)) 611 #endif 612 613 VOID_HANDLER(NotationDecl, 614 (void *userData, 615 const XML_Char *notationName, 616 const XML_Char *base, 617 const XML_Char *systemId, 618 const XML_Char *publicId), 619 ("(NNNN)", 620 string_intern(self, notationName), string_intern(self, base), 621 string_intern(self, systemId), string_intern(self, publicId))) 622 623 VOID_HANDLER(StartNamespaceDecl, 624 (void *userData, 625 const XML_Char *prefix, 626 const XML_Char *uri), 627 ("(NN)", 628 string_intern(self, prefix), string_intern(self, uri))) 629 630 VOID_HANDLER(EndNamespaceDecl, 631 (void *userData, 632 const XML_Char *prefix), 633 ("(N)", string_intern(self, prefix))) 634 635 VOID_HANDLER(Comment, 636 (void *userData, const XML_Char *data), 637 ("(O&)", conv_string_to_unicode ,data)) 638 639 VOID_HANDLER(StartCdataSection, 640 (void *userData), 641 ("()")) 642 643 VOID_HANDLER(EndCdataSection, 644 (void *userData), 645 ("()")) 646 647 VOID_HANDLER(Default, 648 (void *userData, const XML_Char *s, int len), 649 ("(N)", (conv_string_len_to_unicode(s,len)))) 650 651 VOID_HANDLER(DefaultHandlerExpand, 652 (void *userData, const XML_Char *s, int len), 653 ("(N)", (conv_string_len_to_unicode(s,len)))) 654 655 INT_HANDLER(NotStandalone, 656 (void *userData), 657 ("()")) 658 659 RC_HANDLER(int, ExternalEntityRef, 660 (XML_Parser parser, 661 const XML_Char *context, 662 const XML_Char *base, 663 const XML_Char *systemId, 664 const XML_Char *publicId), 665 int rc=0;, 666 ("(O&NNN)", 667 conv_string_to_unicode ,context, string_intern(self, base), 668 string_intern(self, systemId), string_intern(self, publicId)), 669 rc = PyLong_AsLong(rv);, rc, 670 XML_GetUserData(parser)) 671 672 /* XXX UnknownEncodingHandler */ 673 674 VOID_HANDLER(StartDoctypeDecl, 675 (void *userData, const XML_Char *doctypeName, 676 const XML_Char *sysid, const XML_Char *pubid, 677 int has_internal_subset), 678 ("(NNNi)", string_intern(self, doctypeName), 679 string_intern(self, sysid), string_intern(self, pubid), 680 has_internal_subset)) 681 682 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) 683 684 /* ---------------------------------------------------------------- */ 685 /*[clinic input] 686 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype" 687 [clinic start generated code]*/ 688 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/ 689 690 691 static PyObject * 692 get_parse_result(xmlparseobject *self, int rv) 693 { 694 if (PyErr_Occurred()) { 695 return NULL; 696 } 697 if (rv == 0) { 698 return set_error(self, XML_GetErrorCode(self->itself)); 699 } 700 if (flush_character_buffer(self) < 0) { 701 return NULL; 702 } 703 return PyLong_FromLong(rv); 704 } 705 706 #define MAX_CHUNK_SIZE (1 << 20) 707 708 /*[clinic input] 709 pyexpat.xmlparser.Parse 710 711 data: object 712 isfinal: bool(accept={int}) = False 713 / 714 715 Parse XML data. 716 717 `isfinal' should be true at end of input. 718 [clinic start generated code]*/ 719 720 static PyObject * 721 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data, 722 int isfinal) 723 /*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/ 724 { 725 const char *s; 726 Py_ssize_t slen; 727 Py_buffer view; 728 int rc; 729 730 if (PyUnicode_Check(data)) { 731 view.buf = NULL; 732 s = PyUnicode_AsUTF8AndSize(data, &slen); 733 if (s == NULL) 734 return NULL; 735 /* Explicitly set UTF-8 encoding. Return code ignored. */ 736 (void)XML_SetEncoding(self->itself, "utf-8"); 737 } 738 else { 739 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) 740 return NULL; 741 s = view.buf; 742 slen = view.len; 743 } 744 745 while (slen > MAX_CHUNK_SIZE) { 746 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0); 747 if (!rc) 748 goto done; 749 s += MAX_CHUNK_SIZE; 750 slen -= MAX_CHUNK_SIZE; 751 } 752 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX); 753 assert(slen <= INT_MAX); 754 rc = XML_Parse(self->itself, s, (int)slen, isfinal); 755 756 done: 757 if (view.buf != NULL) 758 PyBuffer_Release(&view); 759 return get_parse_result(self, rc); 760 } 761 762 /* File reading copied from cPickle */ 763 764 #define BUF_SIZE 2048 765 766 static int 767 readinst(char *buf, int buf_size, PyObject *meth) 768 { 769 PyObject *str; 770 Py_ssize_t len; 771 const char *ptr; 772 773 str = PyObject_CallFunction(meth, "n", buf_size); 774 if (str == NULL) 775 goto error; 776 777 if (PyBytes_Check(str)) 778 ptr = PyBytes_AS_STRING(str); 779 else if (PyByteArray_Check(str)) 780 ptr = PyByteArray_AS_STRING(str); 781 else { 782 PyErr_Format(PyExc_TypeError, 783 "read() did not return a bytes object (type=%.400s)", 784 Py_TYPE(str)->tp_name); 785 goto error; 786 } 787 len = Py_SIZE(str); 788 if (len > buf_size) { 789 PyErr_Format(PyExc_ValueError, 790 "read() returned too much data: " 791 "%i bytes requested, %zd returned", 792 buf_size, len); 793 goto error; 794 } 795 memcpy(buf, ptr, len); 796 Py_DECREF(str); 797 /* len <= buf_size <= INT_MAX */ 798 return (int)len; 799 800 error: 801 Py_XDECREF(str); 802 return -1; 803 } 804 805 /*[clinic input] 806 pyexpat.xmlparser.ParseFile 807 808 file: object 809 / 810 811 Parse XML data from file-like object. 812 [clinic start generated code]*/ 813 814 static PyObject * 815 pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file) 816 /*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/ 817 { 818 int rv = 1; 819 PyObject *readmethod = NULL; 820 _Py_IDENTIFIER(read); 821 822 readmethod = _PyObject_GetAttrId(file, &PyId_read); 823 if (readmethod == NULL) { 824 PyErr_SetString(PyExc_TypeError, 825 "argument must have 'read' attribute"); 826 return NULL; 827 } 828 for (;;) { 829 int bytes_read; 830 void *buf = XML_GetBuffer(self->itself, BUF_SIZE); 831 if (buf == NULL) { 832 Py_XDECREF(readmethod); 833 return get_parse_result(self, 0); 834 } 835 836 bytes_read = readinst(buf, BUF_SIZE, readmethod); 837 if (bytes_read < 0) { 838 Py_DECREF(readmethod); 839 return NULL; 840 } 841 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0); 842 if (PyErr_Occurred()) { 843 Py_XDECREF(readmethod); 844 return NULL; 845 } 846 847 if (!rv || bytes_read == 0) 848 break; 849 } 850 Py_XDECREF(readmethod); 851 return get_parse_result(self, rv); 852 } 853 854 /*[clinic input] 855 pyexpat.xmlparser.SetBase 856 857 base: str 858 / 859 860 Set the base URL for the parser. 861 [clinic start generated code]*/ 862 863 static PyObject * 864 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base) 865 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/ 866 { 867 if (!XML_SetBase(self->itself, base)) { 868 return PyErr_NoMemory(); 869 } 870 Py_RETURN_NONE; 871 } 872 873 /*[clinic input] 874 pyexpat.xmlparser.GetBase 875 876 Return base URL string for the parser. 877 [clinic start generated code]*/ 878 879 static PyObject * 880 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self) 881 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/ 882 { 883 return Py_BuildValue("z", XML_GetBase(self->itself)); 884 } 885 886 /*[clinic input] 887 pyexpat.xmlparser.GetInputContext 888 889 Return the untranslated text of the input that caused the current event. 890 891 If the event was generated by a large amount of text (such as a start tag 892 for an element with many attributes), not all of the text may be available. 893 [clinic start generated code]*/ 894 895 static PyObject * 896 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self) 897 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/ 898 { 899 if (self->in_callback) { 900 int offset, size; 901 const char *buffer 902 = XML_GetInputContext(self->itself, &offset, &size); 903 904 if (buffer != NULL) 905 return PyBytes_FromStringAndSize(buffer + offset, 906 size - offset); 907 else 908 Py_RETURN_NONE; 909 } 910 else 911 Py_RETURN_NONE; 912 } 913 914 /*[clinic input] 915 pyexpat.xmlparser.ExternalEntityParserCreate 916 917 context: str(accept={str, NoneType}) 918 encoding: str = NULL 919 / 920 921 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler. 922 [clinic start generated code]*/ 923 924 static PyObject * 925 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, 926 const char *context, 927 const char *encoding) 928 /*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/ 929 { 930 xmlparseobject *new_parser; 931 int i; 932 933 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype); 934 if (new_parser == NULL) 935 return NULL; 936 new_parser->buffer_size = self->buffer_size; 937 new_parser->buffer_used = 0; 938 new_parser->buffer = NULL; 939 new_parser->ordered_attributes = self->ordered_attributes; 940 new_parser->specified_attributes = self->specified_attributes; 941 new_parser->in_callback = 0; 942 new_parser->ns_prefixes = self->ns_prefixes; 943 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, 944 encoding); 945 new_parser->handlers = 0; 946 new_parser->intern = self->intern; 947 Py_XINCREF(new_parser->intern); 948 PyObject_GC_Track(new_parser); 949 950 if (self->buffer != NULL) { 951 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); 952 if (new_parser->buffer == NULL) { 953 Py_DECREF(new_parser); 954 return PyErr_NoMemory(); 955 } 956 } 957 if (!new_parser->itself) { 958 Py_DECREF(new_parser); 959 return PyErr_NoMemory(); 960 } 961 962 XML_SetUserData(new_parser->itself, (void *)new_parser); 963 964 /* allocate and clear handlers first */ 965 for (i = 0; handler_info[i].name != NULL; i++) 966 /* do nothing */; 967 968 new_parser->handlers = PyMem_New(PyObject *, i); 969 if (!new_parser->handlers) { 970 Py_DECREF(new_parser); 971 return PyErr_NoMemory(); 972 } 973 clear_handlers(new_parser, 1); 974 975 /* then copy handlers from self */ 976 for (i = 0; handler_info[i].name != NULL; i++) { 977 PyObject *handler = self->handlers[i]; 978 if (handler != NULL) { 979 Py_INCREF(handler); 980 new_parser->handlers[i] = handler; 981 handler_info[i].setter(new_parser->itself, 982 handler_info[i].handler); 983 } 984 } 985 return (PyObject *)new_parser; 986 } 987 988 /*[clinic input] 989 pyexpat.xmlparser.SetParamEntityParsing 990 991 flag: int 992 / 993 994 Controls parsing of parameter entities (including the external DTD subset). 995 996 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER, 997 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and 998 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag 999 was successful. 1000 [clinic start generated code]*/ 1001 1002 static PyObject * 1003 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag) 1004 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/ 1005 { 1006 flag = XML_SetParamEntityParsing(self->itself, flag); 1007 return PyLong_FromLong(flag); 1008 } 1009 1010 1011 #if XML_COMBINED_VERSION >= 19505 1012 /*[clinic input] 1013 pyexpat.xmlparser.UseForeignDTD 1014 1015 flag: bool = True 1016 / 1017 1018 Allows the application to provide an artificial external subset if one is not specified as part of the document instance. 1019 1020 This readily allows the use of a 'default' document type controlled by the 1021 application, while still getting the advantage of providing document type 1022 information to the parser. 'flag' defaults to True if not provided. 1023 [clinic start generated code]*/ 1024 1025 static PyObject * 1026 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag) 1027 /*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/ 1028 { 1029 enum XML_Error rc; 1030 1031 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE); 1032 if (rc != XML_ERROR_NONE) { 1033 return set_error(self, rc); 1034 } 1035 Py_RETURN_NONE; 1036 } 1037 #endif 1038 1039 /*[clinic input] 1040 pyexpat.xmlparser.__dir__ 1041 [clinic start generated code]*/ 1042 1043 static PyObject * 1044 pyexpat_xmlparser___dir___impl(xmlparseobject *self) 1045 /*[clinic end generated code: output=bc22451efb9e4d17 input=76aa455f2a661384]*/ 1046 { 1047 #define APPEND(list, str) \ 1048 do { \ 1049 PyObject *o = PyUnicode_FromString(str); \ 1050 if (o != NULL) \ 1051 PyList_Append(list, o); \ 1052 Py_XDECREF(o); \ 1053 } while (0) 1054 1055 int i; 1056 PyObject *rc = PyList_New(0); 1057 if (!rc) 1058 return NULL; 1059 for (i = 0; handler_info[i].name != NULL; i++) { 1060 PyObject *o = get_handler_name(&handler_info[i]); 1061 if (o != NULL) 1062 PyList_Append(rc, o); 1063 Py_XDECREF(o); 1064 } 1065 APPEND(rc, "ErrorCode"); 1066 APPEND(rc, "ErrorLineNumber"); 1067 APPEND(rc, "ErrorColumnNumber"); 1068 APPEND(rc, "ErrorByteIndex"); 1069 APPEND(rc, "CurrentLineNumber"); 1070 APPEND(rc, "CurrentColumnNumber"); 1071 APPEND(rc, "CurrentByteIndex"); 1072 APPEND(rc, "buffer_size"); 1073 APPEND(rc, "buffer_text"); 1074 APPEND(rc, "buffer_used"); 1075 APPEND(rc, "namespace_prefixes"); 1076 APPEND(rc, "ordered_attributes"); 1077 APPEND(rc, "specified_attributes"); 1078 APPEND(rc, "intern"); 1079 1080 #undef APPEND 1081 1082 if (PyErr_Occurred()) { 1083 Py_DECREF(rc); 1084 rc = NULL; 1085 } 1086 1087 return rc; 1088 } 1089 1090 static struct PyMethodDef xmlparse_methods[] = { 1091 PYEXPAT_XMLPARSER_PARSE_METHODDEF 1092 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF 1093 PYEXPAT_XMLPARSER_SETBASE_METHODDEF 1094 PYEXPAT_XMLPARSER_GETBASE_METHODDEF 1095 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF 1096 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF 1097 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF 1098 #if XML_COMBINED_VERSION >= 19505 1099 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF 1100 #endif 1101 PYEXPAT_XMLPARSER___DIR___METHODDEF 1102 {NULL, NULL} /* sentinel */ 1103 }; 1104 1105 /* ---------- */ 1106 1107 1108 1109 /* pyexpat international encoding support. 1110 Make it as simple as possible. 1111 */ 1112 1113 static int 1114 PyUnknownEncodingHandler(void *encodingHandlerData, 1115 const XML_Char *name, 1116 XML_Encoding *info) 1117 { 1118 static unsigned char template_buffer[256] = {0}; 1119 PyObject* u; 1120 int i; 1121 void *data; 1122 unsigned int kind; 1123 1124 if (PyErr_Occurred()) 1125 return XML_STATUS_ERROR; 1126 1127 if (template_buffer[1] == 0) { 1128 for (i = 0; i < 256; i++) 1129 template_buffer[i] = i; 1130 } 1131 1132 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace"); 1133 if (u == NULL || PyUnicode_READY(u)) { 1134 Py_XDECREF(u); 1135 return XML_STATUS_ERROR; 1136 } 1137 1138 if (PyUnicode_GET_LENGTH(u) != 256) { 1139 Py_DECREF(u); 1140 PyErr_SetString(PyExc_ValueError, 1141 "multi-byte encodings are not supported"); 1142 return XML_STATUS_ERROR; 1143 } 1144 1145 kind = PyUnicode_KIND(u); 1146 data = PyUnicode_DATA(u); 1147 for (i = 0; i < 256; i++) { 1148 Py_UCS4 ch = PyUnicode_READ(kind, data, i); 1149 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) 1150 info->map[i] = ch; 1151 else 1152 info->map[i] = -1; 1153 } 1154 1155 info->data = NULL; 1156 info->convert = NULL; 1157 info->release = NULL; 1158 Py_DECREF(u); 1159 1160 return XML_STATUS_OK; 1161 } 1162 1163 1164 static PyObject * 1165 newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern) 1166 { 1167 int i; 1168 xmlparseobject *self; 1169 1170 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype); 1171 if (self == NULL) 1172 return NULL; 1173 1174 self->buffer = NULL; 1175 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE; 1176 self->buffer_used = 0; 1177 self->ordered_attributes = 0; 1178 self->specified_attributes = 0; 1179 self->in_callback = 0; 1180 self->ns_prefixes = 0; 1181 self->handlers = NULL; 1182 self->intern = intern; 1183 Py_XINCREF(self->intern); 1184 PyObject_GC_Track(self); 1185 1186 /* namespace_separator is either NULL or contains one char + \0 */ 1187 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, 1188 namespace_separator); 1189 if (self->itself == NULL) { 1190 PyErr_SetString(PyExc_RuntimeError, 1191 "XML_ParserCreate failed"); 1192 Py_DECREF(self); 1193 return NULL; 1194 } 1195 #if XML_COMBINED_VERSION >= 20100 1196 /* This feature was added upstream in libexpat 2.1.0. */ 1197 XML_SetHashSalt(self->itself, 1198 (unsigned long)_Py_HashSecret.expat.hashsalt); 1199 #endif 1200 XML_SetUserData(self->itself, (void *)self); 1201 XML_SetUnknownEncodingHandler(self->itself, 1202 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL); 1203 1204 for (i = 0; handler_info[i].name != NULL; i++) 1205 /* do nothing */; 1206 1207 self->handlers = PyMem_New(PyObject *, i); 1208 if (!self->handlers) { 1209 Py_DECREF(self); 1210 return PyErr_NoMemory(); 1211 } 1212 clear_handlers(self, 1); 1213 1214 return (PyObject*)self; 1215 } 1216 1217 1218 static void 1219 xmlparse_dealloc(xmlparseobject *self) 1220 { 1221 int i; 1222 PyObject_GC_UnTrack(self); 1223 if (self->itself != NULL) 1224 XML_ParserFree(self->itself); 1225 self->itself = NULL; 1226 1227 if (self->handlers != NULL) { 1228 for (i = 0; handler_info[i].name != NULL; i++) 1229 Py_CLEAR(self->handlers[i]); 1230 PyMem_Free(self->handlers); 1231 self->handlers = NULL; 1232 } 1233 if (self->buffer != NULL) { 1234 PyMem_Free(self->buffer); 1235 self->buffer = NULL; 1236 } 1237 Py_XDECREF(self->intern); 1238 PyObject_GC_Del(self); 1239 } 1240 1241 static int 1242 handlername2int(PyObject *name) 1243 { 1244 int i; 1245 for (i = 0; handler_info[i].name != NULL; i++) { 1246 if (_PyUnicode_EqualToASCIIString(name, handler_info[i].name)) { 1247 return i; 1248 } 1249 } 1250 return -1; 1251 } 1252 1253 static PyObject * 1254 get_pybool(int istrue) 1255 { 1256 PyObject *result = istrue ? Py_True : Py_False; 1257 Py_INCREF(result); 1258 return result; 1259 } 1260 1261 static PyObject * 1262 xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) 1263 { 1264 Py_UCS4 first_char; 1265 int handlernum = -1; 1266 1267 if (!PyUnicode_Check(nameobj)) 1268 goto generic; 1269 if (PyUnicode_READY(nameobj)) 1270 return NULL; 1271 1272 handlernum = handlername2int(nameobj); 1273 1274 if (handlernum != -1) { 1275 PyObject *result = self->handlers[handlernum]; 1276 if (result == NULL) 1277 result = Py_None; 1278 Py_INCREF(result); 1279 return result; 1280 } 1281 1282 first_char = PyUnicode_READ_CHAR(nameobj, 0); 1283 if (first_char == 'E') { 1284 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorCode")) 1285 return PyLong_FromLong((long) 1286 XML_GetErrorCode(self->itself)); 1287 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorLineNumber")) 1288 return PyLong_FromLong((long) 1289 XML_GetErrorLineNumber(self->itself)); 1290 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorColumnNumber")) 1291 return PyLong_FromLong((long) 1292 XML_GetErrorColumnNumber(self->itself)); 1293 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorByteIndex")) 1294 return PyLong_FromLong((long) 1295 XML_GetErrorByteIndex(self->itself)); 1296 } 1297 if (first_char == 'C') { 1298 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentLineNumber")) 1299 return PyLong_FromLong((long) 1300 XML_GetCurrentLineNumber(self->itself)); 1301 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentColumnNumber")) 1302 return PyLong_FromLong((long) 1303 XML_GetCurrentColumnNumber(self->itself)); 1304 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentByteIndex")) 1305 return PyLong_FromLong((long) 1306 XML_GetCurrentByteIndex(self->itself)); 1307 } 1308 if (first_char == 'b') { 1309 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_size")) 1310 return PyLong_FromLong((long) self->buffer_size); 1311 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_text")) 1312 return get_pybool(self->buffer != NULL); 1313 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_used")) 1314 return PyLong_FromLong((long) self->buffer_used); 1315 } 1316 if (_PyUnicode_EqualToASCIIString(nameobj, "namespace_prefixes")) 1317 return get_pybool(self->ns_prefixes); 1318 if (_PyUnicode_EqualToASCIIString(nameobj, "ordered_attributes")) 1319 return get_pybool(self->ordered_attributes); 1320 if (_PyUnicode_EqualToASCIIString(nameobj, "specified_attributes")) 1321 return get_pybool((long) self->specified_attributes); 1322 if (_PyUnicode_EqualToASCIIString(nameobj, "intern")) { 1323 if (self->intern == NULL) { 1324 Py_RETURN_NONE; 1325 } 1326 else { 1327 Py_INCREF(self->intern); 1328 return self->intern; 1329 } 1330 } 1331 generic: 1332 return PyObject_GenericGetAttr((PyObject*)self, nameobj); 1333 } 1334 1335 static int 1336 sethandler(xmlparseobject *self, PyObject *name, PyObject* v) 1337 { 1338 int handlernum = handlername2int(name); 1339 if (handlernum >= 0) { 1340 xmlhandler c_handler = NULL; 1341 1342 if (v == Py_None) { 1343 /* If this is the character data handler, and a character 1344 data handler is already active, we need to be more 1345 careful. What we can safely do is replace the existing 1346 character data handler callback function with a no-op 1347 function that will refuse to call Python. The downside 1348 is that this doesn't completely remove the character 1349 data handler from the C layer if there's any callback 1350 active, so Expat does a little more work than it 1351 otherwise would, but that's really an odd case. A more 1352 elaborate system of handlers and state could remove the 1353 C handler more effectively. */ 1354 if (handlernum == CharacterData && self->in_callback) 1355 c_handler = noop_character_data_handler; 1356 v = NULL; 1357 } 1358 else if (v != NULL) { 1359 Py_INCREF(v); 1360 c_handler = handler_info[handlernum].handler; 1361 } 1362 Py_XSETREF(self->handlers[handlernum], v); 1363 handler_info[handlernum].setter(self->itself, c_handler); 1364 return 1; 1365 } 1366 return 0; 1367 } 1368 1369 static int 1370 xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v) 1371 { 1372 /* Set attribute 'name' to value 'v'. v==NULL means delete */ 1373 if (!PyUnicode_Check(name)) { 1374 PyErr_Format(PyExc_TypeError, 1375 "attribute name must be string, not '%.200s'", 1376 name->ob_type->tp_name); 1377 return -1; 1378 } 1379 if (v == NULL) { 1380 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); 1381 return -1; 1382 } 1383 if (_PyUnicode_EqualToASCIIString(name, "buffer_text")) { 1384 int b = PyObject_IsTrue(v); 1385 if (b < 0) 1386 return -1; 1387 if (b) { 1388 if (self->buffer == NULL) { 1389 self->buffer = PyMem_Malloc(self->buffer_size); 1390 if (self->buffer == NULL) { 1391 PyErr_NoMemory(); 1392 return -1; 1393 } 1394 self->buffer_used = 0; 1395 } 1396 } 1397 else if (self->buffer != NULL) { 1398 if (flush_character_buffer(self) < 0) 1399 return -1; 1400 PyMem_Free(self->buffer); 1401 self->buffer = NULL; 1402 } 1403 return 0; 1404 } 1405 if (_PyUnicode_EqualToASCIIString(name, "namespace_prefixes")) { 1406 int b = PyObject_IsTrue(v); 1407 if (b < 0) 1408 return -1; 1409 self->ns_prefixes = b; 1410 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); 1411 return 0; 1412 } 1413 if (_PyUnicode_EqualToASCIIString(name, "ordered_attributes")) { 1414 int b = PyObject_IsTrue(v); 1415 if (b < 0) 1416 return -1; 1417 self->ordered_attributes = b; 1418 return 0; 1419 } 1420 if (_PyUnicode_EqualToASCIIString(name, "specified_attributes")) { 1421 int b = PyObject_IsTrue(v); 1422 if (b < 0) 1423 return -1; 1424 self->specified_attributes = b; 1425 return 0; 1426 } 1427 1428 if (_PyUnicode_EqualToASCIIString(name, "buffer_size")) { 1429 long new_buffer_size; 1430 if (!PyLong_Check(v)) { 1431 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); 1432 return -1; 1433 } 1434 1435 new_buffer_size = PyLong_AsLong(v); 1436 if (new_buffer_size <= 0) { 1437 if (!PyErr_Occurred()) 1438 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero"); 1439 return -1; 1440 } 1441 1442 /* trivial case -- no change */ 1443 if (new_buffer_size == self->buffer_size) { 1444 return 0; 1445 } 1446 1447 /* check maximum */ 1448 if (new_buffer_size > INT_MAX) { 1449 char errmsg[100]; 1450 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX); 1451 PyErr_SetString(PyExc_ValueError, errmsg); 1452 return -1; 1453 } 1454 1455 if (self->buffer != NULL) { 1456 /* there is already a buffer */ 1457 if (self->buffer_used != 0) { 1458 if (flush_character_buffer(self) < 0) { 1459 return -1; 1460 } 1461 } 1462 /* free existing buffer */ 1463 PyMem_Free(self->buffer); 1464 } 1465 self->buffer = PyMem_Malloc(new_buffer_size); 1466 if (self->buffer == NULL) { 1467 PyErr_NoMemory(); 1468 return -1; 1469 } 1470 self->buffer_size = new_buffer_size; 1471 return 0; 1472 } 1473 1474 if (_PyUnicode_EqualToASCIIString(name, "CharacterDataHandler")) { 1475 /* If we're changing the character data handler, flush all 1476 * cached data with the old handler. Not sure there's a 1477 * "right" thing to do, though, but this probably won't 1478 * happen. 1479 */ 1480 if (flush_character_buffer(self) < 0) 1481 return -1; 1482 } 1483 if (sethandler(self, name, v)) { 1484 return 0; 1485 } 1486 PyErr_SetObject(PyExc_AttributeError, name); 1487 return -1; 1488 } 1489 1490 static int 1491 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) 1492 { 1493 int i; 1494 for (i = 0; handler_info[i].name != NULL; i++) 1495 Py_VISIT(op->handlers[i]); 1496 return 0; 1497 } 1498 1499 static int 1500 xmlparse_clear(xmlparseobject *op) 1501 { 1502 clear_handlers(op, 0); 1503 Py_CLEAR(op->intern); 1504 return 0; 1505 } 1506 1507 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser"); 1508 1509 static PyTypeObject Xmlparsetype = { 1510 PyVarObject_HEAD_INIT(NULL, 0) 1511 "pyexpat.xmlparser", /*tp_name*/ 1512 sizeof(xmlparseobject), /*tp_basicsize*/ 1513 0, /*tp_itemsize*/ 1514 /* methods */ 1515 (destructor)xmlparse_dealloc, /*tp_dealloc*/ 1516 (printfunc)0, /*tp_print*/ 1517 0, /*tp_getattr*/ 1518 0, /*tp_setattr*/ 1519 0, /*tp_reserved*/ 1520 (reprfunc)0, /*tp_repr*/ 1521 0, /*tp_as_number*/ 1522 0, /*tp_as_sequence*/ 1523 0, /*tp_as_mapping*/ 1524 (hashfunc)0, /*tp_hash*/ 1525 (ternaryfunc)0, /*tp_call*/ 1526 (reprfunc)0, /*tp_str*/ 1527 (getattrofunc)xmlparse_getattro, /* tp_getattro */ 1528 (setattrofunc)xmlparse_setattro, /* tp_setattro */ 1529 0, /* tp_as_buffer */ 1530 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 1531 Xmlparsetype__doc__, /* tp_doc - Documentation string */ 1532 (traverseproc)xmlparse_traverse, /* tp_traverse */ 1533 (inquiry)xmlparse_clear, /* tp_clear */ 1534 0, /* tp_richcompare */ 1535 0, /* tp_weaklistoffset */ 1536 0, /* tp_iter */ 1537 0, /* tp_iternext */ 1538 xmlparse_methods, /* tp_methods */ 1539 }; 1540 1541 /* End of code for xmlparser objects */ 1542 /* -------------------------------------------------------- */ 1543 1544 /*[clinic input] 1545 pyexpat.ParserCreate 1546 1547 encoding: str(accept={str, NoneType}) = NULL 1548 namespace_separator: str(accept={str, NoneType}) = NULL 1549 intern: object = NULL 1550 1551 Return a new XML parser object. 1552 [clinic start generated code]*/ 1553 1554 static PyObject * 1555 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding, 1556 const char *namespace_separator, PyObject *intern) 1557 /*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/ 1558 { 1559 PyObject *result; 1560 int intern_decref = 0; 1561 1562 if (namespace_separator != NULL 1563 && strlen(namespace_separator) > 1) { 1564 PyErr_SetString(PyExc_ValueError, 1565 "namespace_separator must be at most one" 1566 " character, omitted, or None"); 1567 return NULL; 1568 } 1569 /* Explicitly passing None means no interning is desired. 1570 Not passing anything means that a new dictionary is used. */ 1571 if (intern == Py_None) 1572 intern = NULL; 1573 else if (intern == NULL) { 1574 intern = PyDict_New(); 1575 if (!intern) 1576 return NULL; 1577 intern_decref = 1; 1578 } 1579 else if (!PyDict_Check(intern)) { 1580 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary"); 1581 return NULL; 1582 } 1583 1584 result = newxmlparseobject(encoding, namespace_separator, intern); 1585 if (intern_decref) { 1586 Py_DECREF(intern); 1587 } 1588 return result; 1589 } 1590 1591 /*[clinic input] 1592 pyexpat.ErrorString 1593 1594 code: long 1595 / 1596 1597 Returns string error for given number. 1598 [clinic start generated code]*/ 1599 1600 static PyObject * 1601 pyexpat_ErrorString_impl(PyObject *module, long code) 1602 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/ 1603 { 1604 return Py_BuildValue("z", XML_ErrorString((int)code)); 1605 } 1606 1607 /* List of methods defined in the module */ 1608 1609 static struct PyMethodDef pyexpat_methods[] = { 1610 PYEXPAT_PARSERCREATE_METHODDEF 1611 PYEXPAT_ERRORSTRING_METHODDEF 1612 {NULL, NULL} /* sentinel */ 1613 }; 1614 1615 /* Module docstring */ 1616 1617 PyDoc_STRVAR(pyexpat_module_documentation, 1618 "Python wrapper for Expat parser."); 1619 1620 /* Initialization function for the module */ 1621 1622 #ifndef MODULE_NAME 1623 #define MODULE_NAME "pyexpat" 1624 #endif 1625 1626 #ifndef MODULE_INITFUNC 1627 #define MODULE_INITFUNC PyInit_pyexpat 1628 #endif 1629 1630 static struct PyModuleDef pyexpatmodule = { 1631 PyModuleDef_HEAD_INIT, 1632 MODULE_NAME, 1633 pyexpat_module_documentation, 1634 -1, 1635 pyexpat_methods, 1636 NULL, 1637 NULL, 1638 NULL, 1639 NULL 1640 }; 1641 1642 PyMODINIT_FUNC 1643 MODULE_INITFUNC(void) 1644 { 1645 PyObject *m, *d; 1646 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors"); 1647 PyObject *errors_module; 1648 PyObject *modelmod_name; 1649 PyObject *model_module; 1650 PyObject *tmpnum, *tmpstr; 1651 PyObject *codes_dict; 1652 PyObject *rev_codes_dict; 1653 int res; 1654 static struct PyExpat_CAPI capi; 1655 PyObject *capi_object; 1656 1657 if (errmod_name == NULL) 1658 return NULL; 1659 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model"); 1660 if (modelmod_name == NULL) 1661 return NULL; 1662 1663 if (PyType_Ready(&Xmlparsetype) < 0) 1664 return NULL; 1665 1666 /* Create the module and add the functions */ 1667 m = PyModule_Create(&pyexpatmodule); 1668 if (m == NULL) 1669 return NULL; 1670 1671 /* Add some symbolic constants to the module */ 1672 if (ErrorObject == NULL) { 1673 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError", 1674 NULL, NULL); 1675 if (ErrorObject == NULL) 1676 return NULL; 1677 } 1678 Py_INCREF(ErrorObject); 1679 PyModule_AddObject(m, "error", ErrorObject); 1680 Py_INCREF(ErrorObject); 1681 PyModule_AddObject(m, "ExpatError", ErrorObject); 1682 Py_INCREF(&Xmlparsetype); 1683 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype); 1684 1685 PyModule_AddStringConstant(m, "EXPAT_VERSION", 1686 XML_ExpatVersion()); 1687 { 1688 XML_Expat_Version info = XML_ExpatVersionInfo(); 1689 PyModule_AddObject(m, "version_info", 1690 Py_BuildValue("(iii)", info.major, 1691 info.minor, info.micro)); 1692 } 1693 /* XXX When Expat supports some way of figuring out how it was 1694 compiled, this should check and set native_encoding 1695 appropriately. 1696 */ 1697 PyModule_AddStringConstant(m, "native_encoding", "UTF-8"); 1698 1699 d = PyModule_GetDict(m); 1700 if (d == NULL) { 1701 Py_DECREF(m); 1702 return NULL; 1703 } 1704 errors_module = PyDict_GetItem(d, errmod_name); 1705 if (errors_module == NULL) { 1706 errors_module = PyModule_New(MODULE_NAME ".errors"); 1707 if (errors_module != NULL) { 1708 _PyImport_SetModule(errmod_name, errors_module); 1709 /* gives away the reference to errors_module */ 1710 PyModule_AddObject(m, "errors", errors_module); 1711 } 1712 } 1713 Py_DECREF(errmod_name); 1714 model_module = PyDict_GetItem(d, modelmod_name); 1715 if (model_module == NULL) { 1716 model_module = PyModule_New(MODULE_NAME ".model"); 1717 if (model_module != NULL) { 1718 _PyImport_SetModule(modelmod_name, model_module); 1719 /* gives away the reference to model_module */ 1720 PyModule_AddObject(m, "model", model_module); 1721 } 1722 } 1723 Py_DECREF(modelmod_name); 1724 if (errors_module == NULL || model_module == NULL) { 1725 /* Don't core dump later! */ 1726 Py_DECREF(m); 1727 return NULL; 1728 } 1729 1730 #if XML_COMBINED_VERSION > 19505 1731 { 1732 const XML_Feature *features = XML_GetFeatureList(); 1733 PyObject *list = PyList_New(0); 1734 if (list == NULL) 1735 /* just ignore it */ 1736 PyErr_Clear(); 1737 else { 1738 int i = 0; 1739 for (; features[i].feature != XML_FEATURE_END; ++i) { 1740 int ok; 1741 PyObject *item = Py_BuildValue("si", features[i].name, 1742 features[i].value); 1743 if (item == NULL) { 1744 Py_DECREF(list); 1745 list = NULL; 1746 break; 1747 } 1748 ok = PyList_Append(list, item); 1749 Py_DECREF(item); 1750 if (ok < 0) { 1751 PyErr_Clear(); 1752 break; 1753 } 1754 } 1755 if (list != NULL) 1756 PyModule_AddObject(m, "features", list); 1757 } 1758 } 1759 #endif 1760 1761 codes_dict = PyDict_New(); 1762 rev_codes_dict = PyDict_New(); 1763 if (codes_dict == NULL || rev_codes_dict == NULL) { 1764 Py_XDECREF(codes_dict); 1765 Py_XDECREF(rev_codes_dict); 1766 return NULL; 1767 } 1768 1769 #define MYCONST(name) \ 1770 if (PyModule_AddStringConstant(errors_module, #name, \ 1771 XML_ErrorString(name)) < 0) \ 1772 return NULL; \ 1773 tmpnum = PyLong_FromLong(name); \ 1774 if (tmpnum == NULL) return NULL; \ 1775 res = PyDict_SetItemString(codes_dict, \ 1776 XML_ErrorString(name), tmpnum); \ 1777 if (res < 0) return NULL; \ 1778 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \ 1779 if (tmpstr == NULL) return NULL; \ 1780 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \ 1781 Py_DECREF(tmpstr); \ 1782 Py_DECREF(tmpnum); \ 1783 if (res < 0) return NULL; \ 1784 1785 MYCONST(XML_ERROR_NO_MEMORY); 1786 MYCONST(XML_ERROR_SYNTAX); 1787 MYCONST(XML_ERROR_NO_ELEMENTS); 1788 MYCONST(XML_ERROR_INVALID_TOKEN); 1789 MYCONST(XML_ERROR_UNCLOSED_TOKEN); 1790 MYCONST(XML_ERROR_PARTIAL_CHAR); 1791 MYCONST(XML_ERROR_TAG_MISMATCH); 1792 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE); 1793 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT); 1794 MYCONST(XML_ERROR_PARAM_ENTITY_REF); 1795 MYCONST(XML_ERROR_UNDEFINED_ENTITY); 1796 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF); 1797 MYCONST(XML_ERROR_ASYNC_ENTITY); 1798 MYCONST(XML_ERROR_BAD_CHAR_REF); 1799 MYCONST(XML_ERROR_BINARY_ENTITY_REF); 1800 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); 1801 MYCONST(XML_ERROR_MISPLACED_XML_PI); 1802 MYCONST(XML_ERROR_UNKNOWN_ENCODING); 1803 MYCONST(XML_ERROR_INCORRECT_ENCODING); 1804 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION); 1805 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING); 1806 MYCONST(XML_ERROR_NOT_STANDALONE); 1807 MYCONST(XML_ERROR_UNEXPECTED_STATE); 1808 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE); 1809 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD); 1810 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING); 1811 /* Added in Expat 1.95.7. */ 1812 MYCONST(XML_ERROR_UNBOUND_PREFIX); 1813 /* Added in Expat 1.95.8. */ 1814 MYCONST(XML_ERROR_UNDECLARING_PREFIX); 1815 MYCONST(XML_ERROR_INCOMPLETE_PE); 1816 MYCONST(XML_ERROR_XML_DECL); 1817 MYCONST(XML_ERROR_TEXT_DECL); 1818 MYCONST(XML_ERROR_PUBLICID); 1819 MYCONST(XML_ERROR_SUSPENDED); 1820 MYCONST(XML_ERROR_NOT_SUSPENDED); 1821 MYCONST(XML_ERROR_ABORTED); 1822 MYCONST(XML_ERROR_FINISHED); 1823 MYCONST(XML_ERROR_SUSPEND_PE); 1824 1825 if (PyModule_AddStringConstant(errors_module, "__doc__", 1826 "Constants used to describe " 1827 "error conditions.") < 0) 1828 return NULL; 1829 1830 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) 1831 return NULL; 1832 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) 1833 return NULL; 1834 1835 #undef MYCONST 1836 1837 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c) 1838 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER); 1839 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1840 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS); 1841 #undef MYCONST 1842 1843 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c) 1844 PyModule_AddStringConstant(model_module, "__doc__", 1845 "Constants used to interpret content model information."); 1846 1847 MYCONST(XML_CTYPE_EMPTY); 1848 MYCONST(XML_CTYPE_ANY); 1849 MYCONST(XML_CTYPE_MIXED); 1850 MYCONST(XML_CTYPE_NAME); 1851 MYCONST(XML_CTYPE_CHOICE); 1852 MYCONST(XML_CTYPE_SEQ); 1853 1854 MYCONST(XML_CQUANT_NONE); 1855 MYCONST(XML_CQUANT_OPT); 1856 MYCONST(XML_CQUANT_REP); 1857 MYCONST(XML_CQUANT_PLUS); 1858 #undef MYCONST 1859 1860 /* initialize pyexpat dispatch table */ 1861 capi.size = sizeof(capi); 1862 capi.magic = PyExpat_CAPI_MAGIC; 1863 capi.MAJOR_VERSION = XML_MAJOR_VERSION; 1864 capi.MINOR_VERSION = XML_MINOR_VERSION; 1865 capi.MICRO_VERSION = XML_MICRO_VERSION; 1866 capi.ErrorString = XML_ErrorString; 1867 capi.GetErrorCode = XML_GetErrorCode; 1868 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber; 1869 capi.GetErrorLineNumber = XML_GetErrorLineNumber; 1870 capi.Parse = XML_Parse; 1871 capi.ParserCreate_MM = XML_ParserCreate_MM; 1872 capi.ParserFree = XML_ParserFree; 1873 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler; 1874 capi.SetCommentHandler = XML_SetCommentHandler; 1875 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand; 1876 capi.SetElementHandler = XML_SetElementHandler; 1877 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler; 1878 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler; 1879 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; 1880 capi.SetUserData = XML_SetUserData; 1881 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; 1882 capi.SetEncoding = XML_SetEncoding; 1883 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler; 1884 #if XML_COMBINED_VERSION >= 20100 1885 capi.SetHashSalt = XML_SetHashSalt; 1886 #else 1887 capi.SetHashSalt = NULL; 1888 #endif 1889 1890 /* export using capsule */ 1891 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL); 1892 if (capi_object) 1893 PyModule_AddObject(m, "expat_CAPI", capi_object); 1894 return m; 1895 } 1896 1897 static void 1898 clear_handlers(xmlparseobject *self, int initial) 1899 { 1900 int i = 0; 1901 1902 for (; handler_info[i].name != NULL; i++) { 1903 if (initial) 1904 self->handlers[i] = NULL; 1905 else { 1906 Py_CLEAR(self->handlers[i]); 1907 handler_info[i].setter(self->itself, NULL); 1908 } 1909 } 1910 } 1911 1912 static struct HandlerInfo handler_info[] = { 1913 {"StartElementHandler", 1914 (xmlhandlersetter)XML_SetStartElementHandler, 1915 (xmlhandler)my_StartElementHandler}, 1916 {"EndElementHandler", 1917 (xmlhandlersetter)XML_SetEndElementHandler, 1918 (xmlhandler)my_EndElementHandler}, 1919 {"ProcessingInstructionHandler", 1920 (xmlhandlersetter)XML_SetProcessingInstructionHandler, 1921 (xmlhandler)my_ProcessingInstructionHandler}, 1922 {"CharacterDataHandler", 1923 (xmlhandlersetter)XML_SetCharacterDataHandler, 1924 (xmlhandler)my_CharacterDataHandler}, 1925 {"UnparsedEntityDeclHandler", 1926 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler, 1927 (xmlhandler)my_UnparsedEntityDeclHandler}, 1928 {"NotationDeclHandler", 1929 (xmlhandlersetter)XML_SetNotationDeclHandler, 1930 (xmlhandler)my_NotationDeclHandler}, 1931 {"StartNamespaceDeclHandler", 1932 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler, 1933 (xmlhandler)my_StartNamespaceDeclHandler}, 1934 {"EndNamespaceDeclHandler", 1935 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler, 1936 (xmlhandler)my_EndNamespaceDeclHandler}, 1937 {"CommentHandler", 1938 (xmlhandlersetter)XML_SetCommentHandler, 1939 (xmlhandler)my_CommentHandler}, 1940 {"StartCdataSectionHandler", 1941 (xmlhandlersetter)XML_SetStartCdataSectionHandler, 1942 (xmlhandler)my_StartCdataSectionHandler}, 1943 {"EndCdataSectionHandler", 1944 (xmlhandlersetter)XML_SetEndCdataSectionHandler, 1945 (xmlhandler)my_EndCdataSectionHandler}, 1946 {"DefaultHandler", 1947 (xmlhandlersetter)XML_SetDefaultHandler, 1948 (xmlhandler)my_DefaultHandler}, 1949 {"DefaultHandlerExpand", 1950 (xmlhandlersetter)XML_SetDefaultHandlerExpand, 1951 (xmlhandler)my_DefaultHandlerExpandHandler}, 1952 {"NotStandaloneHandler", 1953 (xmlhandlersetter)XML_SetNotStandaloneHandler, 1954 (xmlhandler)my_NotStandaloneHandler}, 1955 {"ExternalEntityRefHandler", 1956 (xmlhandlersetter)XML_SetExternalEntityRefHandler, 1957 (xmlhandler)my_ExternalEntityRefHandler}, 1958 {"StartDoctypeDeclHandler", 1959 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler, 1960 (xmlhandler)my_StartDoctypeDeclHandler}, 1961 {"EndDoctypeDeclHandler", 1962 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler, 1963 (xmlhandler)my_EndDoctypeDeclHandler}, 1964 {"EntityDeclHandler", 1965 (xmlhandlersetter)XML_SetEntityDeclHandler, 1966 (xmlhandler)my_EntityDeclHandler}, 1967 {"XmlDeclHandler", 1968 (xmlhandlersetter)XML_SetXmlDeclHandler, 1969 (xmlhandler)my_XmlDeclHandler}, 1970 {"ElementDeclHandler", 1971 (xmlhandlersetter)XML_SetElementDeclHandler, 1972 (xmlhandler)my_ElementDeclHandler}, 1973 {"AttlistDeclHandler", 1974 (xmlhandlersetter)XML_SetAttlistDeclHandler, 1975 (xmlhandler)my_AttlistDeclHandler}, 1976 #if XML_COMBINED_VERSION >= 19504 1977 {"SkippedEntityHandler", 1978 (xmlhandlersetter)XML_SetSkippedEntityHandler, 1979 (xmlhandler)my_SkippedEntityHandler}, 1980 #endif 1981 1982 {NULL, NULL, NULL} /* sentinel */ 1983 }; 1984