1 #include "Python.h" 2 #include <ctype.h> 3 4 #include "frameobject.h" 5 #include "expat.h" 6 7 #include "pyexpat.h" 8 9 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally 10 included methods. */ 11 /*[clinic input] 12 module pyexpat 13 [clinic start generated code]*/ 14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/ 15 16 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) 17 18 static XML_Memory_Handling_Suite ExpatMemoryHandler = { 19 PyObject_Malloc, PyObject_Realloc, PyObject_Free}; 20 21 enum HandlerTypes { 22 StartElement, 23 EndElement, 24 ProcessingInstruction, 25 CharacterData, 26 UnparsedEntityDecl, 27 NotationDecl, 28 StartNamespaceDecl, 29 EndNamespaceDecl, 30 Comment, 31 StartCdataSection, 32 EndCdataSection, 33 Default, 34 DefaultHandlerExpand, 35 NotStandalone, 36 ExternalEntityRef, 37 StartDoctypeDecl, 38 EndDoctypeDecl, 39 EntityDecl, 40 XmlDecl, 41 ElementDecl, 42 AttlistDecl, 43 #if XML_COMBINED_VERSION >= 19504 44 SkippedEntity, 45 #endif 46 _DummyDecl 47 }; 48 49 static PyObject *ErrorObject; 50 51 /* ----------------------------------------------------- */ 52 53 /* Declarations for objects of type xmlparser */ 54 55 typedef struct { 56 PyObject_HEAD 57 58 XML_Parser itself; 59 int ordered_attributes; /* Return attributes as a list. */ 60 int specified_attributes; /* Report only specified attributes. */ 61 int in_callback; /* Is a callback active? */ 62 int ns_prefixes; /* Namespace-triplets mode? */ 63 XML_Char *buffer; /* Buffer used when accumulating characters */ 64 /* NULL if not enabled */ 65 int buffer_size; /* Size of buffer, in XML_Char units */ 66 int buffer_used; /* Buffer units in use */ 67 PyObject *intern; /* Dictionary to intern strings */ 68 PyObject **handlers; 69 } xmlparseobject; 70 71 #include "clinic/pyexpat.c.h" 72 73 #define CHARACTER_DATA_BUFFER_SIZE 8192 74 75 static PyTypeObject Xmlparsetype; 76 77 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth); 78 typedef void* xmlhandler; 79 80 struct HandlerInfo { 81 const char *name; 82 xmlhandlersetter setter; 83 xmlhandler handler; 84 PyCodeObject *tb_code; 85 PyObject *nameobj; 86 }; 87 88 static struct HandlerInfo handler_info[64]; 89 90 /* Set an integer attribute on the error object; return true on success, 91 * false on an exception. 92 */ 93 static int 94 set_error_attr(PyObject *err, const char *name, int value) 95 { 96 PyObject *v = PyLong_FromLong(value); 97 98 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { 99 Py_XDECREF(v); 100 return 0; 101 } 102 Py_DECREF(v); 103 return 1; 104 } 105 106 /* Build and set an Expat exception, including positioning 107 * information. Always returns NULL. 108 */ 109 static PyObject * 110 set_error(xmlparseobject *self, enum XML_Error code) 111 { 112 PyObject *err; 113 PyObject *buffer; 114 XML_Parser parser = self->itself; 115 int lineno = XML_GetErrorLineNumber(parser); 116 int column = XML_GetErrorColumnNumber(parser); 117 118 buffer = PyUnicode_FromFormat("%s: line %i, column %i", 119 XML_ErrorString(code), lineno, column); 120 if (buffer == NULL) 121 return NULL; 122 err = PyObject_CallFunction(ErrorObject, "O", buffer); 123 Py_DECREF(buffer); 124 if ( err != NULL 125 && set_error_attr(err, "code", code) 126 && set_error_attr(err, "offset", column) 127 && set_error_attr(err, "lineno", lineno)) { 128 PyErr_SetObject(ErrorObject, err); 129 } 130 Py_XDECREF(err); 131 return NULL; 132 } 133 134 static int 135 have_handler(xmlparseobject *self, int type) 136 { 137 PyObject *handler = self->handlers[type]; 138 return handler != NULL; 139 } 140 141 static PyObject * 142 get_handler_name(struct HandlerInfo *hinfo) 143 { 144 PyObject *name = hinfo->nameobj; 145 if (name == NULL) { 146 name = PyUnicode_FromString(hinfo->name); 147 hinfo->nameobj = name; 148 } 149 Py_XINCREF(name); 150 return name; 151 } 152 153 154 /* Convert a string of XML_Chars into a Unicode string. 155 Returns None if str is a null pointer. */ 156 157 static PyObject * 158 conv_string_to_unicode(const XML_Char *str) 159 { 160 /* XXX currently this code assumes that XML_Char is 8-bit, 161 and hence in UTF-8. */ 162 /* UTF-8 from Expat, Unicode desired */ 163 if (str == NULL) { 164 Py_INCREF(Py_None); 165 return Py_None; 166 } 167 return PyUnicode_DecodeUTF8(str, strlen(str), "strict"); 168 } 169 170 static PyObject * 171 conv_string_len_to_unicode(const XML_Char *str, int len) 172 { 173 /* XXX currently this code assumes that XML_Char is 8-bit, 174 and hence in UTF-8. */ 175 /* UTF-8 from Expat, Unicode desired */ 176 if (str == NULL) { 177 Py_INCREF(Py_None); 178 return Py_None; 179 } 180 return PyUnicode_DecodeUTF8((const char *)str, len, "strict"); 181 } 182 183 /* Callback routines */ 184 185 static void clear_handlers(xmlparseobject *self, int initial); 186 187 /* This handler is used when an error has been detected, in the hope 188 that actual parsing can be terminated early. This will only help 189 if an external entity reference is encountered. */ 190 static int 191 error_external_entity_ref_handler(XML_Parser parser, 192 const XML_Char *context, 193 const XML_Char *base, 194 const XML_Char *systemId, 195 const XML_Char *publicId) 196 { 197 return 0; 198 } 199 200 /* Dummy character data handler used when an error (exception) has 201 been detected, and the actual parsing can be terminated early. 202 This is needed since character data handler can't be safely removed 203 from within the character data handler, but can be replaced. It is 204 used only from the character data handler trampoline, and must be 205 used right after `flag_error()` is called. */ 206 static void 207 noop_character_data_handler(void *userData, const XML_Char *data, int len) 208 { 209 /* Do nothing. */ 210 } 211 212 static void 213 flag_error(xmlparseobject *self) 214 { 215 clear_handlers(self, 0); 216 XML_SetExternalEntityRefHandler(self->itself, 217 error_external_entity_ref_handler); 218 } 219 220 static PyObject* 221 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args, 222 xmlparseobject *self) 223 { 224 PyObject *res; 225 226 res = PyEval_CallObject(func, args); 227 if (res == NULL) { 228 _PyTraceback_Add(funcname, __FILE__, lineno); 229 XML_StopParser(self->itself, XML_FALSE); 230 } 231 return res; 232 } 233 234 static PyObject* 235 string_intern(xmlparseobject *self, const char* str) 236 { 237 PyObject *result = conv_string_to_unicode(str); 238 PyObject *value; 239 /* result can be NULL if the unicode conversion failed. */ 240 if (!result) 241 return result; 242 if (!self->intern) 243 return result; 244 value = PyDict_GetItem(self->intern, result); 245 if (!value) { 246 if (PyDict_SetItem(self->intern, result, result) == 0) 247 return result; 248 else 249 return NULL; 250 } 251 Py_INCREF(value); 252 Py_DECREF(result); 253 return value; 254 } 255 256 /* Return 0 on success, -1 on exception. 257 * flag_error() will be called before return if needed. 258 */ 259 static int 260 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len) 261 { 262 PyObject *args; 263 PyObject *temp; 264 265 if (!have_handler(self, CharacterData)) 266 return -1; 267 268 args = PyTuple_New(1); 269 if (args == NULL) 270 return -1; 271 temp = (conv_string_len_to_unicode(buffer, len)); 272 if (temp == NULL) { 273 Py_DECREF(args); 274 flag_error(self); 275 XML_SetCharacterDataHandler(self->itself, 276 noop_character_data_handler); 277 return -1; 278 } 279 PyTuple_SET_ITEM(args, 0, temp); 280 /* temp is now a borrowed reference; consider it unused. */ 281 self->in_callback = 1; 282 temp = call_with_frame("CharacterData", __LINE__, 283 self->handlers[CharacterData], args, self); 284 /* temp is an owned reference again, or NULL */ 285 self->in_callback = 0; 286 Py_DECREF(args); 287 if (temp == NULL) { 288 flag_error(self); 289 XML_SetCharacterDataHandler(self->itself, 290 noop_character_data_handler); 291 return -1; 292 } 293 Py_DECREF(temp); 294 return 0; 295 } 296 297 static int 298 flush_character_buffer(xmlparseobject *self) 299 { 300 int rc; 301 if (self->buffer == NULL || self->buffer_used == 0) 302 return 0; 303 rc = call_character_handler(self, self->buffer, self->buffer_used); 304 self->buffer_used = 0; 305 return rc; 306 } 307 308 static void 309 my_CharacterDataHandler(void *userData, const XML_Char *data, int len) 310 { 311 xmlparseobject *self = (xmlparseobject *) userData; 312 313 if (PyErr_Occurred()) 314 return; 315 316 if (self->buffer == NULL) 317 call_character_handler(self, data, len); 318 else { 319 if ((self->buffer_used + len) > self->buffer_size) { 320 if (flush_character_buffer(self) < 0) 321 return; 322 /* handler might have changed; drop the rest on the floor 323 * if there isn't a handler anymore 324 */ 325 if (!have_handler(self, CharacterData)) 326 return; 327 } 328 if (len > self->buffer_size) { 329 call_character_handler(self, data, len); 330 self->buffer_used = 0; 331 } 332 else { 333 memcpy(self->buffer + self->buffer_used, 334 data, len * sizeof(XML_Char)); 335 self->buffer_used += len; 336 } 337 } 338 } 339 340 static void 341 my_StartElementHandler(void *userData, 342 const XML_Char *name, const XML_Char *atts[]) 343 { 344 xmlparseobject *self = (xmlparseobject *)userData; 345 346 if (have_handler(self, StartElement)) { 347 PyObject *container, *rv, *args; 348 int i, max; 349 350 if (PyErr_Occurred()) 351 return; 352 353 if (flush_character_buffer(self) < 0) 354 return; 355 /* Set max to the number of slots filled in atts[]; max/2 is 356 * the number of attributes we need to process. 357 */ 358 if (self->specified_attributes) { 359 max = XML_GetSpecifiedAttributeCount(self->itself); 360 } 361 else { 362 max = 0; 363 while (atts[max] != NULL) 364 max += 2; 365 } 366 /* Build the container. */ 367 if (self->ordered_attributes) 368 container = PyList_New(max); 369 else 370 container = PyDict_New(); 371 if (container == NULL) { 372 flag_error(self); 373 return; 374 } 375 for (i = 0; i < max; i += 2) { 376 PyObject *n = string_intern(self, (XML_Char *) atts[i]); 377 PyObject *v; 378 if (n == NULL) { 379 flag_error(self); 380 Py_DECREF(container); 381 return; 382 } 383 v = conv_string_to_unicode((XML_Char *) atts[i+1]); 384 if (v == NULL) { 385 flag_error(self); 386 Py_DECREF(container); 387 Py_DECREF(n); 388 return; 389 } 390 if (self->ordered_attributes) { 391 PyList_SET_ITEM(container, i, n); 392 PyList_SET_ITEM(container, i+1, v); 393 } 394 else if (PyDict_SetItem(container, n, v)) { 395 flag_error(self); 396 Py_DECREF(n); 397 Py_DECREF(v); 398 return; 399 } 400 else { 401 Py_DECREF(n); 402 Py_DECREF(v); 403 } 404 } 405 args = string_intern(self, name); 406 if (args != NULL) 407 args = Py_BuildValue("(NN)", args, container); 408 if (args == NULL) { 409 Py_DECREF(container); 410 return; 411 } 412 /* Container is now a borrowed reference; ignore it. */ 413 self->in_callback = 1; 414 rv = call_with_frame("StartElement", __LINE__, 415 self->handlers[StartElement], args, self); 416 self->in_callback = 0; 417 Py_DECREF(args); 418 if (rv == NULL) { 419 flag_error(self); 420 return; 421 } 422 Py_DECREF(rv); 423 } 424 } 425 426 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \ 427 RETURN, GETUSERDATA) \ 428 static RC \ 429 my_##NAME##Handler PARAMS {\ 430 xmlparseobject *self = GETUSERDATA ; \ 431 PyObject *args = NULL; \ 432 PyObject *rv = NULL; \ 433 INIT \ 434 \ 435 if (have_handler(self, NAME)) { \ 436 if (PyErr_Occurred()) \ 437 return RETURN; \ 438 if (flush_character_buffer(self) < 0) \ 439 return RETURN; \ 440 args = Py_BuildValue PARAM_FORMAT ;\ 441 if (!args) { flag_error(self); return RETURN;} \ 442 self->in_callback = 1; \ 443 rv = call_with_frame(#NAME,__LINE__, \ 444 self->handlers[NAME], args, self); \ 445 self->in_callback = 0; \ 446 Py_DECREF(args); \ 447 if (rv == NULL) { \ 448 flag_error(self); \ 449 return RETURN; \ 450 } \ 451 CONVERSION \ 452 Py_DECREF(rv); \ 453 } \ 454 return RETURN; \ 455 } 456 457 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \ 458 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\ 459 (xmlparseobject *)userData) 460 461 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\ 462 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \ 463 rc = PyLong_AsLong(rv);, rc, \ 464 (xmlparseobject *)userData) 465 466 VOID_HANDLER(EndElement, 467 (void *userData, const XML_Char *name), 468 ("(N)", string_intern(self, name))) 469 470 VOID_HANDLER(ProcessingInstruction, 471 (void *userData, 472 const XML_Char *target, 473 const XML_Char *data), 474 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data)) 475 476 VOID_HANDLER(UnparsedEntityDecl, 477 (void *userData, 478 const XML_Char *entityName, 479 const XML_Char *base, 480 const XML_Char *systemId, 481 const XML_Char *publicId, 482 const XML_Char *notationName), 483 ("(NNNNN)", 484 string_intern(self, entityName), string_intern(self, base), 485 string_intern(self, systemId), string_intern(self, publicId), 486 string_intern(self, notationName))) 487 488 VOID_HANDLER(EntityDecl, 489 (void *userData, 490 const XML_Char *entityName, 491 int is_parameter_entity, 492 const XML_Char *value, 493 int value_length, 494 const XML_Char *base, 495 const XML_Char *systemId, 496 const XML_Char *publicId, 497 const XML_Char *notationName), 498 ("NiNNNNN", 499 string_intern(self, entityName), is_parameter_entity, 500 (conv_string_len_to_unicode(value, value_length)), 501 string_intern(self, base), string_intern(self, systemId), 502 string_intern(self, publicId), 503 string_intern(self, notationName))) 504 505 VOID_HANDLER(XmlDecl, 506 (void *userData, 507 const XML_Char *version, 508 const XML_Char *encoding, 509 int standalone), 510 ("(O&O&i)", 511 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding, 512 standalone)) 513 514 static PyObject * 515 conv_content_model(XML_Content * const model, 516 PyObject *(*conv_string)(const XML_Char *)) 517 { 518 PyObject *result = NULL; 519 PyObject *children = PyTuple_New(model->numchildren); 520 int i; 521 522 if (children != NULL) { 523 assert(model->numchildren < INT_MAX); 524 for (i = 0; i < (int)model->numchildren; ++i) { 525 PyObject *child = conv_content_model(&model->children[i], 526 conv_string); 527 if (child == NULL) { 528 Py_XDECREF(children); 529 return NULL; 530 } 531 PyTuple_SET_ITEM(children, i, child); 532 } 533 result = Py_BuildValue("(iiO&N)", 534 model->type, model->quant, 535 conv_string,model->name, children); 536 } 537 return result; 538 } 539 540 static void 541 my_ElementDeclHandler(void *userData, 542 const XML_Char *name, 543 XML_Content *model) 544 { 545 xmlparseobject *self = (xmlparseobject *)userData; 546 PyObject *args = NULL; 547 548 if (have_handler(self, ElementDecl)) { 549 PyObject *rv = NULL; 550 PyObject *modelobj, *nameobj; 551 552 if (PyErr_Occurred()) 553 return; 554 555 if (flush_character_buffer(self) < 0) 556 goto finally; 557 modelobj = conv_content_model(model, (conv_string_to_unicode)); 558 if (modelobj == NULL) { 559 flag_error(self); 560 goto finally; 561 } 562 nameobj = string_intern(self, name); 563 if (nameobj == NULL) { 564 Py_DECREF(modelobj); 565 flag_error(self); 566 goto finally; 567 } 568 args = Py_BuildValue("NN", nameobj, modelobj); 569 if (args == NULL) { 570 Py_DECREF(modelobj); 571 flag_error(self); 572 goto finally; 573 } 574 self->in_callback = 1; 575 rv = call_with_frame("ElementDecl", __LINE__, 576 self->handlers[ElementDecl], args, self); 577 self->in_callback = 0; 578 if (rv == NULL) { 579 flag_error(self); 580 goto finally; 581 } 582 Py_DECREF(rv); 583 } 584 finally: 585 Py_XDECREF(args); 586 XML_FreeContentModel(self->itself, model); 587 return; 588 } 589 590 VOID_HANDLER(AttlistDecl, 591 (void *userData, 592 const XML_Char *elname, 593 const XML_Char *attname, 594 const XML_Char *att_type, 595 const XML_Char *dflt, 596 int isrequired), 597 ("(NNO&O&i)", 598 string_intern(self, elname), string_intern(self, attname), 599 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt, 600 isrequired)) 601 602 #if XML_COMBINED_VERSION >= 19504 603 VOID_HANDLER(SkippedEntity, 604 (void *userData, 605 const XML_Char *entityName, 606 int is_parameter_entity), 607 ("Ni", 608 string_intern(self, entityName), is_parameter_entity)) 609 #endif 610 611 VOID_HANDLER(NotationDecl, 612 (void *userData, 613 const XML_Char *notationName, 614 const XML_Char *base, 615 const XML_Char *systemId, 616 const XML_Char *publicId), 617 ("(NNNN)", 618 string_intern(self, notationName), string_intern(self, base), 619 string_intern(self, systemId), string_intern(self, publicId))) 620 621 VOID_HANDLER(StartNamespaceDecl, 622 (void *userData, 623 const XML_Char *prefix, 624 const XML_Char *uri), 625 ("(NN)", 626 string_intern(self, prefix), string_intern(self, uri))) 627 628 VOID_HANDLER(EndNamespaceDecl, 629 (void *userData, 630 const XML_Char *prefix), 631 ("(N)", string_intern(self, prefix))) 632 633 VOID_HANDLER(Comment, 634 (void *userData, const XML_Char *data), 635 ("(O&)", conv_string_to_unicode ,data)) 636 637 VOID_HANDLER(StartCdataSection, 638 (void *userData), 639 ("()")) 640 641 VOID_HANDLER(EndCdataSection, 642 (void *userData), 643 ("()")) 644 645 VOID_HANDLER(Default, 646 (void *userData, const XML_Char *s, int len), 647 ("(N)", (conv_string_len_to_unicode(s,len)))) 648 649 VOID_HANDLER(DefaultHandlerExpand, 650 (void *userData, const XML_Char *s, int len), 651 ("(N)", (conv_string_len_to_unicode(s,len)))) 652 653 INT_HANDLER(NotStandalone, 654 (void *userData), 655 ("()")) 656 657 RC_HANDLER(int, ExternalEntityRef, 658 (XML_Parser parser, 659 const XML_Char *context, 660 const XML_Char *base, 661 const XML_Char *systemId, 662 const XML_Char *publicId), 663 int rc=0;, 664 ("(O&NNN)", 665 conv_string_to_unicode ,context, string_intern(self, base), 666 string_intern(self, systemId), string_intern(self, publicId)), 667 rc = PyLong_AsLong(rv);, rc, 668 XML_GetUserData(parser)) 669 670 /* XXX UnknownEncodingHandler */ 671 672 VOID_HANDLER(StartDoctypeDecl, 673 (void *userData, const XML_Char *doctypeName, 674 const XML_Char *sysid, const XML_Char *pubid, 675 int has_internal_subset), 676 ("(NNNi)", string_intern(self, doctypeName), 677 string_intern(self, sysid), string_intern(self, pubid), 678 has_internal_subset)) 679 680 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) 681 682 /* ---------------------------------------------------------------- */ 683 /*[clinic input] 684 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype" 685 [clinic start generated code]*/ 686 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/ 687 688 689 static PyObject * 690 get_parse_result(xmlparseobject *self, int rv) 691 { 692 if (PyErr_Occurred()) { 693 return NULL; 694 } 695 if (rv == 0) { 696 return set_error(self, XML_GetErrorCode(self->itself)); 697 } 698 if (flush_character_buffer(self) < 0) { 699 return NULL; 700 } 701 return PyLong_FromLong(rv); 702 } 703 704 #define MAX_CHUNK_SIZE (1 << 20) 705 706 /*[clinic input] 707 pyexpat.xmlparser.Parse 708 709 data: object 710 isfinal: int(c_default="0") = False 711 / 712 713 Parse XML data. 714 715 `isfinal' should be true at end of input. 716 [clinic start generated code]*/ 717 718 static PyObject * 719 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data, 720 int isfinal) 721 /*[clinic end generated code: output=f4db843dd1f4ed4b input=199d9e8e92ebbb4b]*/ 722 { 723 const char *s; 724 Py_ssize_t slen; 725 Py_buffer view; 726 int rc; 727 728 if (PyUnicode_Check(data)) { 729 view.buf = NULL; 730 s = PyUnicode_AsUTF8AndSize(data, &slen); 731 if (s == NULL) 732 return NULL; 733 /* Explicitly set UTF-8 encoding. Return code ignored. */ 734 (void)XML_SetEncoding(self->itself, "utf-8"); 735 } 736 else { 737 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) 738 return NULL; 739 s = view.buf; 740 slen = view.len; 741 } 742 743 while (slen > MAX_CHUNK_SIZE) { 744 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0); 745 if (!rc) 746 goto done; 747 s += MAX_CHUNK_SIZE; 748 slen -= MAX_CHUNK_SIZE; 749 } 750 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX); 751 assert(slen <= INT_MAX); 752 rc = XML_Parse(self->itself, s, (int)slen, isfinal); 753 754 done: 755 if (view.buf != NULL) 756 PyBuffer_Release(&view); 757 return get_parse_result(self, rc); 758 } 759 760 /* File reading copied from cPickle */ 761 762 #define BUF_SIZE 2048 763 764 static int 765 readinst(char *buf, int buf_size, PyObject *meth) 766 { 767 PyObject *str; 768 Py_ssize_t len; 769 const char *ptr; 770 771 str = PyObject_CallFunction(meth, "n", buf_size); 772 if (str == NULL) 773 goto error; 774 775 if (PyBytes_Check(str)) 776 ptr = PyBytes_AS_STRING(str); 777 else if (PyByteArray_Check(str)) 778 ptr = PyByteArray_AS_STRING(str); 779 else { 780 PyErr_Format(PyExc_TypeError, 781 "read() did not return a bytes object (type=%.400s)", 782 Py_TYPE(str)->tp_name); 783 goto error; 784 } 785 len = Py_SIZE(str); 786 if (len > buf_size) { 787 PyErr_Format(PyExc_ValueError, 788 "read() returned too much data: " 789 "%i bytes requested, %zd returned", 790 buf_size, len); 791 goto error; 792 } 793 memcpy(buf, ptr, len); 794 Py_DECREF(str); 795 /* len <= buf_size <= INT_MAX */ 796 return (int)len; 797 798 error: 799 Py_XDECREF(str); 800 return -1; 801 } 802 803 /*[clinic input] 804 pyexpat.xmlparser.ParseFile 805 806 file: object 807 / 808 809 Parse XML data from file-like object. 810 [clinic start generated code]*/ 811 812 static PyObject * 813 pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file) 814 /*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/ 815 { 816 int rv = 1; 817 PyObject *readmethod = NULL; 818 _Py_IDENTIFIER(read); 819 820 readmethod = _PyObject_GetAttrId(file, &PyId_read); 821 if (readmethod == NULL) { 822 PyErr_SetString(PyExc_TypeError, 823 "argument must have 'read' attribute"); 824 return NULL; 825 } 826 for (;;) { 827 int bytes_read; 828 void *buf = XML_GetBuffer(self->itself, BUF_SIZE); 829 if (buf == NULL) { 830 Py_XDECREF(readmethod); 831 return get_parse_result(self, 0); 832 } 833 834 bytes_read = readinst(buf, BUF_SIZE, readmethod); 835 if (bytes_read < 0) { 836 Py_DECREF(readmethod); 837 return NULL; 838 } 839 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0); 840 if (PyErr_Occurred()) { 841 Py_XDECREF(readmethod); 842 return NULL; 843 } 844 845 if (!rv || bytes_read == 0) 846 break; 847 } 848 Py_XDECREF(readmethod); 849 return get_parse_result(self, rv); 850 } 851 852 /*[clinic input] 853 pyexpat.xmlparser.SetBase 854 855 base: str 856 / 857 858 Set the base URL for the parser. 859 [clinic start generated code]*/ 860 861 static PyObject * 862 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base) 863 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/ 864 { 865 if (!XML_SetBase(self->itself, base)) { 866 return PyErr_NoMemory(); 867 } 868 Py_RETURN_NONE; 869 } 870 871 /*[clinic input] 872 pyexpat.xmlparser.GetBase 873 874 Return base URL string for the parser. 875 [clinic start generated code]*/ 876 877 static PyObject * 878 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self) 879 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/ 880 { 881 return Py_BuildValue("z", XML_GetBase(self->itself)); 882 } 883 884 /*[clinic input] 885 pyexpat.xmlparser.GetInputContext 886 887 Return the untranslated text of the input that caused the current event. 888 889 If the event was generated by a large amount of text (such as a start tag 890 for an element with many attributes), not all of the text may be available. 891 [clinic start generated code]*/ 892 893 static PyObject * 894 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self) 895 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/ 896 { 897 if (self->in_callback) { 898 int offset, size; 899 const char *buffer 900 = XML_GetInputContext(self->itself, &offset, &size); 901 902 if (buffer != NULL) 903 return PyBytes_FromStringAndSize(buffer + offset, 904 size - offset); 905 else 906 Py_RETURN_NONE; 907 } 908 else 909 Py_RETURN_NONE; 910 } 911 912 /*[clinic input] 913 pyexpat.xmlparser.ExternalEntityParserCreate 914 915 context: str(accept={str, NoneType}) 916 encoding: str = NULL 917 / 918 919 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler. 920 [clinic start generated code]*/ 921 922 static PyObject * 923 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, 924 const char *context, 925 const char *encoding) 926 /*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/ 927 { 928 xmlparseobject *new_parser; 929 int i; 930 931 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype); 932 if (new_parser == NULL) 933 return NULL; 934 new_parser->buffer_size = self->buffer_size; 935 new_parser->buffer_used = 0; 936 new_parser->buffer = NULL; 937 new_parser->ordered_attributes = self->ordered_attributes; 938 new_parser->specified_attributes = self->specified_attributes; 939 new_parser->in_callback = 0; 940 new_parser->ns_prefixes = self->ns_prefixes; 941 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, 942 encoding); 943 new_parser->handlers = 0; 944 new_parser->intern = self->intern; 945 Py_XINCREF(new_parser->intern); 946 PyObject_GC_Track(new_parser); 947 948 if (self->buffer != NULL) { 949 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); 950 if (new_parser->buffer == NULL) { 951 Py_DECREF(new_parser); 952 return PyErr_NoMemory(); 953 } 954 } 955 if (!new_parser->itself) { 956 Py_DECREF(new_parser); 957 return PyErr_NoMemory(); 958 } 959 960 XML_SetUserData(new_parser->itself, (void *)new_parser); 961 962 /* allocate and clear handlers first */ 963 for (i = 0; handler_info[i].name != NULL; i++) 964 /* do nothing */; 965 966 new_parser->handlers = PyMem_New(PyObject *, i); 967 if (!new_parser->handlers) { 968 Py_DECREF(new_parser); 969 return PyErr_NoMemory(); 970 } 971 clear_handlers(new_parser, 1); 972 973 /* then copy handlers from self */ 974 for (i = 0; handler_info[i].name != NULL; i++) { 975 PyObject *handler = self->handlers[i]; 976 if (handler != NULL) { 977 Py_INCREF(handler); 978 new_parser->handlers[i] = handler; 979 handler_info[i].setter(new_parser->itself, 980 handler_info[i].handler); 981 } 982 } 983 return (PyObject *)new_parser; 984 } 985 986 /*[clinic input] 987 pyexpat.xmlparser.SetParamEntityParsing 988 989 flag: int 990 / 991 992 Controls parsing of parameter entities (including the external DTD subset). 993 994 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER, 995 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and 996 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag 997 was successful. 998 [clinic start generated code]*/ 999 1000 static PyObject * 1001 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag) 1002 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/ 1003 { 1004 flag = XML_SetParamEntityParsing(self->itself, flag); 1005 return PyLong_FromLong(flag); 1006 } 1007 1008 1009 #if XML_COMBINED_VERSION >= 19505 1010 /*[clinic input] 1011 pyexpat.xmlparser.UseForeignDTD 1012 1013 flag: bool = True 1014 / 1015 1016 Allows the application to provide an artificial external subset if one is not specified as part of the document instance. 1017 1018 This readily allows the use of a 'default' document type controlled by the 1019 application, while still getting the advantage of providing document type 1020 information to the parser. 'flag' defaults to True if not provided. 1021 [clinic start generated code]*/ 1022 1023 static PyObject * 1024 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag) 1025 /*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/ 1026 { 1027 enum XML_Error rc; 1028 1029 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE); 1030 if (rc != XML_ERROR_NONE) { 1031 return set_error(self, rc); 1032 } 1033 Py_INCREF(Py_None); 1034 return Py_None; 1035 } 1036 #endif 1037 1038 /*[clinic input] 1039 pyexpat.xmlparser.__dir__ 1040 [clinic start generated code]*/ 1041 1042 static PyObject * 1043 pyexpat_xmlparser___dir___impl(xmlparseobject *self) 1044 /*[clinic end generated code: output=bc22451efb9e4d17 input=76aa455f2a661384]*/ 1045 { 1046 #define APPEND(list, str) \ 1047 do { \ 1048 PyObject *o = PyUnicode_FromString(str); \ 1049 if (o != NULL) \ 1050 PyList_Append(list, o); \ 1051 Py_XDECREF(o); \ 1052 } while (0) 1053 1054 int i; 1055 PyObject *rc = PyList_New(0); 1056 if (!rc) 1057 return NULL; 1058 for (i = 0; handler_info[i].name != NULL; i++) { 1059 PyObject *o = get_handler_name(&handler_info[i]); 1060 if (o != NULL) 1061 PyList_Append(rc, o); 1062 Py_XDECREF(o); 1063 } 1064 APPEND(rc, "ErrorCode"); 1065 APPEND(rc, "ErrorLineNumber"); 1066 APPEND(rc, "ErrorColumnNumber"); 1067 APPEND(rc, "ErrorByteIndex"); 1068 APPEND(rc, "CurrentLineNumber"); 1069 APPEND(rc, "CurrentColumnNumber"); 1070 APPEND(rc, "CurrentByteIndex"); 1071 APPEND(rc, "buffer_size"); 1072 APPEND(rc, "buffer_text"); 1073 APPEND(rc, "buffer_used"); 1074 APPEND(rc, "namespace_prefixes"); 1075 APPEND(rc, "ordered_attributes"); 1076 APPEND(rc, "specified_attributes"); 1077 APPEND(rc, "intern"); 1078 1079 #undef APPEND 1080 1081 if (PyErr_Occurred()) { 1082 Py_DECREF(rc); 1083 rc = NULL; 1084 } 1085 1086 return rc; 1087 } 1088 1089 static struct PyMethodDef xmlparse_methods[] = { 1090 PYEXPAT_XMLPARSER_PARSE_METHODDEF 1091 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF 1092 PYEXPAT_XMLPARSER_SETBASE_METHODDEF 1093 PYEXPAT_XMLPARSER_GETBASE_METHODDEF 1094 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF 1095 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF 1096 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF 1097 #if XML_COMBINED_VERSION >= 19505 1098 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF 1099 #endif 1100 PYEXPAT_XMLPARSER___DIR___METHODDEF 1101 {NULL, NULL} /* sentinel */ 1102 }; 1103 1104 /* ---------- */ 1105 1106 1107 1108 /* pyexpat international encoding support. 1109 Make it as simple as possible. 1110 */ 1111 1112 static int 1113 PyUnknownEncodingHandler(void *encodingHandlerData, 1114 const XML_Char *name, 1115 XML_Encoding *info) 1116 { 1117 static unsigned char template_buffer[256] = {0}; 1118 PyObject* u; 1119 int i; 1120 void *data; 1121 unsigned int kind; 1122 1123 if (PyErr_Occurred()) 1124 return XML_STATUS_ERROR; 1125 1126 if (template_buffer[1] == 0) { 1127 for (i = 0; i < 256; i++) 1128 template_buffer[i] = i; 1129 } 1130 1131 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace"); 1132 if (u == NULL || PyUnicode_READY(u)) { 1133 Py_XDECREF(u); 1134 return XML_STATUS_ERROR; 1135 } 1136 1137 if (PyUnicode_GET_LENGTH(u) != 256) { 1138 Py_DECREF(u); 1139 PyErr_SetString(PyExc_ValueError, 1140 "multi-byte encodings are not supported"); 1141 return XML_STATUS_ERROR; 1142 } 1143 1144 kind = PyUnicode_KIND(u); 1145 data = PyUnicode_DATA(u); 1146 for (i = 0; i < 256; i++) { 1147 Py_UCS4 ch = PyUnicode_READ(kind, data, i); 1148 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) 1149 info->map[i] = ch; 1150 else 1151 info->map[i] = -1; 1152 } 1153 1154 info->data = NULL; 1155 info->convert = NULL; 1156 info->release = NULL; 1157 Py_DECREF(u); 1158 1159 return XML_STATUS_OK; 1160 } 1161 1162 1163 static PyObject * 1164 newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern) 1165 { 1166 int i; 1167 xmlparseobject *self; 1168 1169 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype); 1170 if (self == NULL) 1171 return NULL; 1172 1173 self->buffer = NULL; 1174 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE; 1175 self->buffer_used = 0; 1176 self->ordered_attributes = 0; 1177 self->specified_attributes = 0; 1178 self->in_callback = 0; 1179 self->ns_prefixes = 0; 1180 self->handlers = NULL; 1181 self->intern = intern; 1182 Py_XINCREF(self->intern); 1183 PyObject_GC_Track(self); 1184 1185 /* namespace_separator is either NULL or contains one char + \0 */ 1186 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, 1187 namespace_separator); 1188 if (self->itself == NULL) { 1189 PyErr_SetString(PyExc_RuntimeError, 1190 "XML_ParserCreate failed"); 1191 Py_DECREF(self); 1192 return NULL; 1193 } 1194 #if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT) 1195 /* This feature was added upstream in libexpat 2.1.0. Our expat copy 1196 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT 1197 * to indicate that we can still use it. */ 1198 XML_SetHashSalt(self->itself, 1199 (unsigned long)_Py_HashSecret.expat.hashsalt); 1200 #endif 1201 XML_SetUserData(self->itself, (void *)self); 1202 XML_SetUnknownEncodingHandler(self->itself, 1203 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL); 1204 1205 for (i = 0; handler_info[i].name != NULL; i++) 1206 /* do nothing */; 1207 1208 self->handlers = PyMem_New(PyObject *, i); 1209 if (!self->handlers) { 1210 Py_DECREF(self); 1211 return PyErr_NoMemory(); 1212 } 1213 clear_handlers(self, 1); 1214 1215 return (PyObject*)self; 1216 } 1217 1218 1219 static void 1220 xmlparse_dealloc(xmlparseobject *self) 1221 { 1222 int i; 1223 PyObject_GC_UnTrack(self); 1224 if (self->itself != NULL) 1225 XML_ParserFree(self->itself); 1226 self->itself = NULL; 1227 1228 if (self->handlers != NULL) { 1229 for (i = 0; handler_info[i].name != NULL; i++) 1230 Py_CLEAR(self->handlers[i]); 1231 PyMem_Free(self->handlers); 1232 self->handlers = NULL; 1233 } 1234 if (self->buffer != NULL) { 1235 PyMem_Free(self->buffer); 1236 self->buffer = NULL; 1237 } 1238 Py_XDECREF(self->intern); 1239 PyObject_GC_Del(self); 1240 } 1241 1242 static int 1243 handlername2int(PyObject *name) 1244 { 1245 int i; 1246 for (i = 0; handler_info[i].name != NULL; i++) { 1247 if (_PyUnicode_EqualToASCIIString(name, handler_info[i].name)) { 1248 return i; 1249 } 1250 } 1251 return -1; 1252 } 1253 1254 static PyObject * 1255 get_pybool(int istrue) 1256 { 1257 PyObject *result = istrue ? Py_True : Py_False; 1258 Py_INCREF(result); 1259 return result; 1260 } 1261 1262 static PyObject * 1263 xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) 1264 { 1265 Py_UCS4 first_char; 1266 int handlernum = -1; 1267 1268 if (!PyUnicode_Check(nameobj)) 1269 goto generic; 1270 if (PyUnicode_READY(nameobj)) 1271 return NULL; 1272 1273 handlernum = handlername2int(nameobj); 1274 1275 if (handlernum != -1) { 1276 PyObject *result = self->handlers[handlernum]; 1277 if (result == NULL) 1278 result = Py_None; 1279 Py_INCREF(result); 1280 return result; 1281 } 1282 1283 first_char = PyUnicode_READ_CHAR(nameobj, 0); 1284 if (first_char == 'E') { 1285 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorCode")) 1286 return PyLong_FromLong((long) 1287 XML_GetErrorCode(self->itself)); 1288 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorLineNumber")) 1289 return PyLong_FromLong((long) 1290 XML_GetErrorLineNumber(self->itself)); 1291 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorColumnNumber")) 1292 return PyLong_FromLong((long) 1293 XML_GetErrorColumnNumber(self->itself)); 1294 if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorByteIndex")) 1295 return PyLong_FromLong((long) 1296 XML_GetErrorByteIndex(self->itself)); 1297 } 1298 if (first_char == 'C') { 1299 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentLineNumber")) 1300 return PyLong_FromLong((long) 1301 XML_GetCurrentLineNumber(self->itself)); 1302 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentColumnNumber")) 1303 return PyLong_FromLong((long) 1304 XML_GetCurrentColumnNumber(self->itself)); 1305 if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentByteIndex")) 1306 return PyLong_FromLong((long) 1307 XML_GetCurrentByteIndex(self->itself)); 1308 } 1309 if (first_char == 'b') { 1310 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_size")) 1311 return PyLong_FromLong((long) self->buffer_size); 1312 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_text")) 1313 return get_pybool(self->buffer != NULL); 1314 if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_used")) 1315 return PyLong_FromLong((long) self->buffer_used); 1316 } 1317 if (_PyUnicode_EqualToASCIIString(nameobj, "namespace_prefixes")) 1318 return get_pybool(self->ns_prefixes); 1319 if (_PyUnicode_EqualToASCIIString(nameobj, "ordered_attributes")) 1320 return get_pybool(self->ordered_attributes); 1321 if (_PyUnicode_EqualToASCIIString(nameobj, "specified_attributes")) 1322 return get_pybool((long) self->specified_attributes); 1323 if (_PyUnicode_EqualToASCIIString(nameobj, "intern")) { 1324 if (self->intern == NULL) { 1325 Py_INCREF(Py_None); 1326 return Py_None; 1327 } 1328 else { 1329 Py_INCREF(self->intern); 1330 return self->intern; 1331 } 1332 } 1333 generic: 1334 return PyObject_GenericGetAttr((PyObject*)self, nameobj); 1335 } 1336 1337 static int 1338 sethandler(xmlparseobject *self, PyObject *name, PyObject* v) 1339 { 1340 int handlernum = handlername2int(name); 1341 if (handlernum >= 0) { 1342 xmlhandler c_handler = NULL; 1343 1344 if (v == Py_None) { 1345 /* If this is the character data handler, and a character 1346 data handler is already active, we need to be more 1347 careful. What we can safely do is replace the existing 1348 character data handler callback function with a no-op 1349 function that will refuse to call Python. The downside 1350 is that this doesn't completely remove the character 1351 data handler from the C layer if there's any callback 1352 active, so Expat does a little more work than it 1353 otherwise would, but that's really an odd case. A more 1354 elaborate system of handlers and state could remove the 1355 C handler more effectively. */ 1356 if (handlernum == CharacterData && self->in_callback) 1357 c_handler = noop_character_data_handler; 1358 v = NULL; 1359 } 1360 else if (v != NULL) { 1361 Py_INCREF(v); 1362 c_handler = handler_info[handlernum].handler; 1363 } 1364 Py_XSETREF(self->handlers[handlernum], v); 1365 handler_info[handlernum].setter(self->itself, c_handler); 1366 return 1; 1367 } 1368 return 0; 1369 } 1370 1371 static int 1372 xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v) 1373 { 1374 /* Set attribute 'name' to value 'v'. v==NULL means delete */ 1375 if (!PyUnicode_Check(name)) { 1376 PyErr_Format(PyExc_TypeError, 1377 "attribute name must be string, not '%.200s'", 1378 name->ob_type->tp_name); 1379 return -1; 1380 } 1381 if (v == NULL) { 1382 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); 1383 return -1; 1384 } 1385 if (_PyUnicode_EqualToASCIIString(name, "buffer_text")) { 1386 int b = PyObject_IsTrue(v); 1387 if (b < 0) 1388 return -1; 1389 if (b) { 1390 if (self->buffer == NULL) { 1391 self->buffer = PyMem_Malloc(self->buffer_size); 1392 if (self->buffer == NULL) { 1393 PyErr_NoMemory(); 1394 return -1; 1395 } 1396 self->buffer_used = 0; 1397 } 1398 } 1399 else if (self->buffer != NULL) { 1400 if (flush_character_buffer(self) < 0) 1401 return -1; 1402 PyMem_Free(self->buffer); 1403 self->buffer = NULL; 1404 } 1405 return 0; 1406 } 1407 if (_PyUnicode_EqualToASCIIString(name, "namespace_prefixes")) { 1408 int b = PyObject_IsTrue(v); 1409 if (b < 0) 1410 return -1; 1411 self->ns_prefixes = b; 1412 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); 1413 return 0; 1414 } 1415 if (_PyUnicode_EqualToASCIIString(name, "ordered_attributes")) { 1416 int b = PyObject_IsTrue(v); 1417 if (b < 0) 1418 return -1; 1419 self->ordered_attributes = b; 1420 return 0; 1421 } 1422 if (_PyUnicode_EqualToASCIIString(name, "specified_attributes")) { 1423 int b = PyObject_IsTrue(v); 1424 if (b < 0) 1425 return -1; 1426 self->specified_attributes = b; 1427 return 0; 1428 } 1429 1430 if (_PyUnicode_EqualToASCIIString(name, "buffer_size")) { 1431 long new_buffer_size; 1432 if (!PyLong_Check(v)) { 1433 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); 1434 return -1; 1435 } 1436 1437 new_buffer_size = PyLong_AsLong(v); 1438 if (new_buffer_size <= 0) { 1439 if (!PyErr_Occurred()) 1440 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero"); 1441 return -1; 1442 } 1443 1444 /* trivial case -- no change */ 1445 if (new_buffer_size == self->buffer_size) { 1446 return 0; 1447 } 1448 1449 /* check maximum */ 1450 if (new_buffer_size > INT_MAX) { 1451 char errmsg[100]; 1452 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX); 1453 PyErr_SetString(PyExc_ValueError, errmsg); 1454 return -1; 1455 } 1456 1457 if (self->buffer != NULL) { 1458 /* there is already a buffer */ 1459 if (self->buffer_used != 0) { 1460 if (flush_character_buffer(self) < 0) { 1461 return -1; 1462 } 1463 } 1464 /* free existing buffer */ 1465 PyMem_Free(self->buffer); 1466 } 1467 self->buffer = PyMem_Malloc(new_buffer_size); 1468 if (self->buffer == NULL) { 1469 PyErr_NoMemory(); 1470 return -1; 1471 } 1472 self->buffer_size = new_buffer_size; 1473 return 0; 1474 } 1475 1476 if (_PyUnicode_EqualToASCIIString(name, "CharacterDataHandler")) { 1477 /* If we're changing the character data handler, flush all 1478 * cached data with the old handler. Not sure there's a 1479 * "right" thing to do, though, but this probably won't 1480 * happen. 1481 */ 1482 if (flush_character_buffer(self) < 0) 1483 return -1; 1484 } 1485 if (sethandler(self, name, v)) { 1486 return 0; 1487 } 1488 PyErr_SetObject(PyExc_AttributeError, name); 1489 return -1; 1490 } 1491 1492 static int 1493 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) 1494 { 1495 int i; 1496 for (i = 0; handler_info[i].name != NULL; i++) 1497 Py_VISIT(op->handlers[i]); 1498 return 0; 1499 } 1500 1501 static int 1502 xmlparse_clear(xmlparseobject *op) 1503 { 1504 clear_handlers(op, 0); 1505 Py_CLEAR(op->intern); 1506 return 0; 1507 } 1508 1509 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser"); 1510 1511 static PyTypeObject Xmlparsetype = { 1512 PyVarObject_HEAD_INIT(NULL, 0) 1513 "pyexpat.xmlparser", /*tp_name*/ 1514 sizeof(xmlparseobject), /*tp_basicsize*/ 1515 0, /*tp_itemsize*/ 1516 /* methods */ 1517 (destructor)xmlparse_dealloc, /*tp_dealloc*/ 1518 (printfunc)0, /*tp_print*/ 1519 0, /*tp_getattr*/ 1520 0, /*tp_setattr*/ 1521 0, /*tp_reserved*/ 1522 (reprfunc)0, /*tp_repr*/ 1523 0, /*tp_as_number*/ 1524 0, /*tp_as_sequence*/ 1525 0, /*tp_as_mapping*/ 1526 (hashfunc)0, /*tp_hash*/ 1527 (ternaryfunc)0, /*tp_call*/ 1528 (reprfunc)0, /*tp_str*/ 1529 (getattrofunc)xmlparse_getattro, /* tp_getattro */ 1530 (setattrofunc)xmlparse_setattro, /* tp_setattro */ 1531 0, /* tp_as_buffer */ 1532 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 1533 Xmlparsetype__doc__, /* tp_doc - Documentation string */ 1534 (traverseproc)xmlparse_traverse, /* tp_traverse */ 1535 (inquiry)xmlparse_clear, /* tp_clear */ 1536 0, /* tp_richcompare */ 1537 0, /* tp_weaklistoffset */ 1538 0, /* tp_iter */ 1539 0, /* tp_iternext */ 1540 xmlparse_methods, /* tp_methods */ 1541 }; 1542 1543 /* End of code for xmlparser objects */ 1544 /* -------------------------------------------------------- */ 1545 1546 /*[clinic input] 1547 pyexpat.ParserCreate 1548 1549 encoding: str(accept={str, NoneType}) = NULL 1550 namespace_separator: str(accept={str, NoneType}) = NULL 1551 intern: object = NULL 1552 1553 Return a new XML parser object. 1554 [clinic start generated code]*/ 1555 1556 static PyObject * 1557 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding, 1558 const char *namespace_separator, PyObject *intern) 1559 /*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/ 1560 { 1561 PyObject *result; 1562 int intern_decref = 0; 1563 1564 if (namespace_separator != NULL 1565 && strlen(namespace_separator) > 1) { 1566 PyErr_SetString(PyExc_ValueError, 1567 "namespace_separator must be at most one" 1568 " character, omitted, or None"); 1569 return NULL; 1570 } 1571 /* Explicitly passing None means no interning is desired. 1572 Not passing anything means that a new dictionary is used. */ 1573 if (intern == Py_None) 1574 intern = NULL; 1575 else if (intern == NULL) { 1576 intern = PyDict_New(); 1577 if (!intern) 1578 return NULL; 1579 intern_decref = 1; 1580 } 1581 else if (!PyDict_Check(intern)) { 1582 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary"); 1583 return NULL; 1584 } 1585 1586 result = newxmlparseobject(encoding, namespace_separator, intern); 1587 if (intern_decref) { 1588 Py_DECREF(intern); 1589 } 1590 return result; 1591 } 1592 1593 /*[clinic input] 1594 pyexpat.ErrorString 1595 1596 code: long 1597 / 1598 1599 Returns string error for given number. 1600 [clinic start generated code]*/ 1601 1602 static PyObject * 1603 pyexpat_ErrorString_impl(PyObject *module, long code) 1604 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/ 1605 { 1606 return Py_BuildValue("z", XML_ErrorString((int)code)); 1607 } 1608 1609 /* List of methods defined in the module */ 1610 1611 static struct PyMethodDef pyexpat_methods[] = { 1612 PYEXPAT_PARSERCREATE_METHODDEF 1613 PYEXPAT_ERRORSTRING_METHODDEF 1614 {NULL, NULL} /* sentinel */ 1615 }; 1616 1617 /* Module docstring */ 1618 1619 PyDoc_STRVAR(pyexpat_module_documentation, 1620 "Python wrapper for Expat parser."); 1621 1622 /* Initialization function for the module */ 1623 1624 #ifndef MODULE_NAME 1625 #define MODULE_NAME "pyexpat" 1626 #endif 1627 1628 #ifndef MODULE_INITFUNC 1629 #define MODULE_INITFUNC PyInit_pyexpat 1630 #endif 1631 1632 static struct PyModuleDef pyexpatmodule = { 1633 PyModuleDef_HEAD_INIT, 1634 MODULE_NAME, 1635 pyexpat_module_documentation, 1636 -1, 1637 pyexpat_methods, 1638 NULL, 1639 NULL, 1640 NULL, 1641 NULL 1642 }; 1643 1644 PyMODINIT_FUNC 1645 MODULE_INITFUNC(void) 1646 { 1647 PyObject *m, *d; 1648 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors"); 1649 PyObject *errors_module; 1650 PyObject *modelmod_name; 1651 PyObject *model_module; 1652 PyObject *sys_modules; 1653 PyObject *tmpnum, *tmpstr; 1654 PyObject *codes_dict; 1655 PyObject *rev_codes_dict; 1656 int res; 1657 static struct PyExpat_CAPI capi; 1658 PyObject *capi_object; 1659 1660 if (errmod_name == NULL) 1661 return NULL; 1662 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model"); 1663 if (modelmod_name == NULL) 1664 return NULL; 1665 1666 if (PyType_Ready(&Xmlparsetype) < 0) 1667 return NULL; 1668 1669 /* Create the module and add the functions */ 1670 m = PyModule_Create(&pyexpatmodule); 1671 if (m == NULL) 1672 return NULL; 1673 1674 /* Add some symbolic constants to the module */ 1675 if (ErrorObject == NULL) { 1676 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError", 1677 NULL, NULL); 1678 if (ErrorObject == NULL) 1679 return NULL; 1680 } 1681 Py_INCREF(ErrorObject); 1682 PyModule_AddObject(m, "error", ErrorObject); 1683 Py_INCREF(ErrorObject); 1684 PyModule_AddObject(m, "ExpatError", ErrorObject); 1685 Py_INCREF(&Xmlparsetype); 1686 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype); 1687 1688 PyModule_AddStringConstant(m, "EXPAT_VERSION", 1689 XML_ExpatVersion()); 1690 { 1691 XML_Expat_Version info = XML_ExpatVersionInfo(); 1692 PyModule_AddObject(m, "version_info", 1693 Py_BuildValue("(iii)", info.major, 1694 info.minor, info.micro)); 1695 } 1696 /* XXX When Expat supports some way of figuring out how it was 1697 compiled, this should check and set native_encoding 1698 appropriately. 1699 */ 1700 PyModule_AddStringConstant(m, "native_encoding", "UTF-8"); 1701 1702 sys_modules = PySys_GetObject("modules"); 1703 if (sys_modules == NULL) { 1704 Py_DECREF(m); 1705 return NULL; 1706 } 1707 d = PyModule_GetDict(m); 1708 if (d == NULL) { 1709 Py_DECREF(m); 1710 return NULL; 1711 } 1712 errors_module = PyDict_GetItem(d, errmod_name); 1713 if (errors_module == NULL) { 1714 errors_module = PyModule_New(MODULE_NAME ".errors"); 1715 if (errors_module != NULL) { 1716 PyDict_SetItem(sys_modules, errmod_name, errors_module); 1717 /* gives away the reference to errors_module */ 1718 PyModule_AddObject(m, "errors", errors_module); 1719 } 1720 } 1721 Py_DECREF(errmod_name); 1722 model_module = PyDict_GetItem(d, modelmod_name); 1723 if (model_module == NULL) { 1724 model_module = PyModule_New(MODULE_NAME ".model"); 1725 if (model_module != NULL) { 1726 PyDict_SetItem(sys_modules, modelmod_name, model_module); 1727 /* gives away the reference to model_module */ 1728 PyModule_AddObject(m, "model", model_module); 1729 } 1730 } 1731 Py_DECREF(modelmod_name); 1732 if (errors_module == NULL || model_module == NULL) { 1733 /* Don't core dump later! */ 1734 Py_DECREF(m); 1735 return NULL; 1736 } 1737 1738 #if XML_COMBINED_VERSION > 19505 1739 { 1740 const XML_Feature *features = XML_GetFeatureList(); 1741 PyObject *list = PyList_New(0); 1742 if (list == NULL) 1743 /* just ignore it */ 1744 PyErr_Clear(); 1745 else { 1746 int i = 0; 1747 for (; features[i].feature != XML_FEATURE_END; ++i) { 1748 int ok; 1749 PyObject *item = Py_BuildValue("si", features[i].name, 1750 features[i].value); 1751 if (item == NULL) { 1752 Py_DECREF(list); 1753 list = NULL; 1754 break; 1755 } 1756 ok = PyList_Append(list, item); 1757 Py_DECREF(item); 1758 if (ok < 0) { 1759 PyErr_Clear(); 1760 break; 1761 } 1762 } 1763 if (list != NULL) 1764 PyModule_AddObject(m, "features", list); 1765 } 1766 } 1767 #endif 1768 1769 codes_dict = PyDict_New(); 1770 rev_codes_dict = PyDict_New(); 1771 if (codes_dict == NULL || rev_codes_dict == NULL) { 1772 Py_XDECREF(codes_dict); 1773 Py_XDECREF(rev_codes_dict); 1774 return NULL; 1775 } 1776 1777 #define MYCONST(name) \ 1778 if (PyModule_AddStringConstant(errors_module, #name, \ 1779 XML_ErrorString(name)) < 0) \ 1780 return NULL; \ 1781 tmpnum = PyLong_FromLong(name); \ 1782 if (tmpnum == NULL) return NULL; \ 1783 res = PyDict_SetItemString(codes_dict, \ 1784 XML_ErrorString(name), tmpnum); \ 1785 if (res < 0) return NULL; \ 1786 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \ 1787 if (tmpstr == NULL) return NULL; \ 1788 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \ 1789 Py_DECREF(tmpstr); \ 1790 Py_DECREF(tmpnum); \ 1791 if (res < 0) return NULL; \ 1792 1793 MYCONST(XML_ERROR_NO_MEMORY); 1794 MYCONST(XML_ERROR_SYNTAX); 1795 MYCONST(XML_ERROR_NO_ELEMENTS); 1796 MYCONST(XML_ERROR_INVALID_TOKEN); 1797 MYCONST(XML_ERROR_UNCLOSED_TOKEN); 1798 MYCONST(XML_ERROR_PARTIAL_CHAR); 1799 MYCONST(XML_ERROR_TAG_MISMATCH); 1800 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE); 1801 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT); 1802 MYCONST(XML_ERROR_PARAM_ENTITY_REF); 1803 MYCONST(XML_ERROR_UNDEFINED_ENTITY); 1804 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF); 1805 MYCONST(XML_ERROR_ASYNC_ENTITY); 1806 MYCONST(XML_ERROR_BAD_CHAR_REF); 1807 MYCONST(XML_ERROR_BINARY_ENTITY_REF); 1808 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); 1809 MYCONST(XML_ERROR_MISPLACED_XML_PI); 1810 MYCONST(XML_ERROR_UNKNOWN_ENCODING); 1811 MYCONST(XML_ERROR_INCORRECT_ENCODING); 1812 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION); 1813 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING); 1814 MYCONST(XML_ERROR_NOT_STANDALONE); 1815 MYCONST(XML_ERROR_UNEXPECTED_STATE); 1816 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE); 1817 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD); 1818 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING); 1819 /* Added in Expat 1.95.7. */ 1820 MYCONST(XML_ERROR_UNBOUND_PREFIX); 1821 /* Added in Expat 1.95.8. */ 1822 MYCONST(XML_ERROR_UNDECLARING_PREFIX); 1823 MYCONST(XML_ERROR_INCOMPLETE_PE); 1824 MYCONST(XML_ERROR_XML_DECL); 1825 MYCONST(XML_ERROR_TEXT_DECL); 1826 MYCONST(XML_ERROR_PUBLICID); 1827 MYCONST(XML_ERROR_SUSPENDED); 1828 MYCONST(XML_ERROR_NOT_SUSPENDED); 1829 MYCONST(XML_ERROR_ABORTED); 1830 MYCONST(XML_ERROR_FINISHED); 1831 MYCONST(XML_ERROR_SUSPEND_PE); 1832 1833 if (PyModule_AddStringConstant(errors_module, "__doc__", 1834 "Constants used to describe " 1835 "error conditions.") < 0) 1836 return NULL; 1837 1838 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) 1839 return NULL; 1840 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) 1841 return NULL; 1842 1843 #undef MYCONST 1844 1845 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c) 1846 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER); 1847 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1848 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS); 1849 #undef MYCONST 1850 1851 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c) 1852 PyModule_AddStringConstant(model_module, "__doc__", 1853 "Constants used to interpret content model information."); 1854 1855 MYCONST(XML_CTYPE_EMPTY); 1856 MYCONST(XML_CTYPE_ANY); 1857 MYCONST(XML_CTYPE_MIXED); 1858 MYCONST(XML_CTYPE_NAME); 1859 MYCONST(XML_CTYPE_CHOICE); 1860 MYCONST(XML_CTYPE_SEQ); 1861 1862 MYCONST(XML_CQUANT_NONE); 1863 MYCONST(XML_CQUANT_OPT); 1864 MYCONST(XML_CQUANT_REP); 1865 MYCONST(XML_CQUANT_PLUS); 1866 #undef MYCONST 1867 1868 /* initialize pyexpat dispatch table */ 1869 capi.size = sizeof(capi); 1870 capi.magic = PyExpat_CAPI_MAGIC; 1871 capi.MAJOR_VERSION = XML_MAJOR_VERSION; 1872 capi.MINOR_VERSION = XML_MINOR_VERSION; 1873 capi.MICRO_VERSION = XML_MICRO_VERSION; 1874 capi.ErrorString = XML_ErrorString; 1875 capi.GetErrorCode = XML_GetErrorCode; 1876 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber; 1877 capi.GetErrorLineNumber = XML_GetErrorLineNumber; 1878 capi.Parse = XML_Parse; 1879 capi.ParserCreate_MM = XML_ParserCreate_MM; 1880 capi.ParserFree = XML_ParserFree; 1881 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler; 1882 capi.SetCommentHandler = XML_SetCommentHandler; 1883 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand; 1884 capi.SetElementHandler = XML_SetElementHandler; 1885 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler; 1886 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler; 1887 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; 1888 capi.SetUserData = XML_SetUserData; 1889 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; 1890 capi.SetEncoding = XML_SetEncoding; 1891 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler; 1892 1893 /* export using capsule */ 1894 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL); 1895 if (capi_object) 1896 PyModule_AddObject(m, "expat_CAPI", capi_object); 1897 return m; 1898 } 1899 1900 static void 1901 clear_handlers(xmlparseobject *self, int initial) 1902 { 1903 int i = 0; 1904 1905 for (; handler_info[i].name != NULL; i++) { 1906 if (initial) 1907 self->handlers[i] = NULL; 1908 else { 1909 Py_CLEAR(self->handlers[i]); 1910 handler_info[i].setter(self->itself, NULL); 1911 } 1912 } 1913 } 1914 1915 static struct HandlerInfo handler_info[] = { 1916 {"StartElementHandler", 1917 (xmlhandlersetter)XML_SetStartElementHandler, 1918 (xmlhandler)my_StartElementHandler}, 1919 {"EndElementHandler", 1920 (xmlhandlersetter)XML_SetEndElementHandler, 1921 (xmlhandler)my_EndElementHandler}, 1922 {"ProcessingInstructionHandler", 1923 (xmlhandlersetter)XML_SetProcessingInstructionHandler, 1924 (xmlhandler)my_ProcessingInstructionHandler}, 1925 {"CharacterDataHandler", 1926 (xmlhandlersetter)XML_SetCharacterDataHandler, 1927 (xmlhandler)my_CharacterDataHandler}, 1928 {"UnparsedEntityDeclHandler", 1929 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler, 1930 (xmlhandler)my_UnparsedEntityDeclHandler}, 1931 {"NotationDeclHandler", 1932 (xmlhandlersetter)XML_SetNotationDeclHandler, 1933 (xmlhandler)my_NotationDeclHandler}, 1934 {"StartNamespaceDeclHandler", 1935 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler, 1936 (xmlhandler)my_StartNamespaceDeclHandler}, 1937 {"EndNamespaceDeclHandler", 1938 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler, 1939 (xmlhandler)my_EndNamespaceDeclHandler}, 1940 {"CommentHandler", 1941 (xmlhandlersetter)XML_SetCommentHandler, 1942 (xmlhandler)my_CommentHandler}, 1943 {"StartCdataSectionHandler", 1944 (xmlhandlersetter)XML_SetStartCdataSectionHandler, 1945 (xmlhandler)my_StartCdataSectionHandler}, 1946 {"EndCdataSectionHandler", 1947 (xmlhandlersetter)XML_SetEndCdataSectionHandler, 1948 (xmlhandler)my_EndCdataSectionHandler}, 1949 {"DefaultHandler", 1950 (xmlhandlersetter)XML_SetDefaultHandler, 1951 (xmlhandler)my_DefaultHandler}, 1952 {"DefaultHandlerExpand", 1953 (xmlhandlersetter)XML_SetDefaultHandlerExpand, 1954 (xmlhandler)my_DefaultHandlerExpandHandler}, 1955 {"NotStandaloneHandler", 1956 (xmlhandlersetter)XML_SetNotStandaloneHandler, 1957 (xmlhandler)my_NotStandaloneHandler}, 1958 {"ExternalEntityRefHandler", 1959 (xmlhandlersetter)XML_SetExternalEntityRefHandler, 1960 (xmlhandler)my_ExternalEntityRefHandler}, 1961 {"StartDoctypeDeclHandler", 1962 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler, 1963 (xmlhandler)my_StartDoctypeDeclHandler}, 1964 {"EndDoctypeDeclHandler", 1965 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler, 1966 (xmlhandler)my_EndDoctypeDeclHandler}, 1967 {"EntityDeclHandler", 1968 (xmlhandlersetter)XML_SetEntityDeclHandler, 1969 (xmlhandler)my_EntityDeclHandler}, 1970 {"XmlDeclHandler", 1971 (xmlhandlersetter)XML_SetXmlDeclHandler, 1972 (xmlhandler)my_XmlDeclHandler}, 1973 {"ElementDeclHandler", 1974 (xmlhandlersetter)XML_SetElementDeclHandler, 1975 (xmlhandler)my_ElementDeclHandler}, 1976 {"AttlistDeclHandler", 1977 (xmlhandlersetter)XML_SetAttlistDeclHandler, 1978 (xmlhandler)my_AttlistDeclHandler}, 1979 #if XML_COMBINED_VERSION >= 19504 1980 {"SkippedEntityHandler", 1981 (xmlhandlersetter)XML_SetSkippedEntityHandler, 1982 (xmlhandler)my_SkippedEntityHandler}, 1983 #endif 1984 1985 {NULL, NULL, NULL} /* sentinel */ 1986 }; 1987 1988 /*[clinic input] 1989 dump buffer 1990 [clinic start generated code]*/ 1991 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=524ce2e021e4eba6]*/ 1992