1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <stdarg.h> 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/tree.h> 17 #include <libxml/dict.h> 18 #include <libxml/hash.h> 19 #include <libxml/valid.h> 20 #include <libxml/entities.h> 21 #include <libxml/xmlerror.h> 22 #include <libxml/xmlstring.h> 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 /** 29 * XML_DEFAULT_VERSION: 30 * 31 * The default version of XML used: 1.0 32 */ 33 #define XML_DEFAULT_VERSION "1.0" 34 35 /** 36 * xmlParserInput: 37 * 38 * An xmlParserInput is an input flow for the XML processor. 39 * Each entity parsed is associated an xmlParserInput (except the 40 * few predefined ones). This is the case both for internal entities 41 * - in which case the flow is already completely in memory - or 42 * external entities - in which case we use the buf structure for 43 * progressive reading and I18N conversions to the internal UTF-8 format. 44 */ 45 46 /** 47 * xmlParserInputDeallocate: 48 * @str: the string to deallocate 49 * 50 * Callback for freeing some parser input allocations. 51 */ 52 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 53 54 struct _xmlParserInput { 55 /* Input buffer */ 56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 57 58 const char *filename; /* The file analyzed, if any */ 59 const char *directory; /* the directory/base of the file */ 60 const xmlChar *base; /* Base of the array to parse */ 61 const xmlChar *cur; /* Current char being parsed */ 62 const xmlChar *end; /* end of the array to parse */ 63 int length; /* length if known */ 64 int line; /* Current line */ 65 int col; /* Current column */ 66 /* 67 * NOTE: consumed is only tested for equality in the parser code, 68 * so even if there is an overflow this should not give troubles 69 * for parsing very large instances. 70 */ 71 unsigned long consumed; /* How many xmlChars already consumed */ 72 xmlParserInputDeallocate free; /* function to deallocate the base */ 73 const xmlChar *encoding; /* the encoding string for entity */ 74 const xmlChar *version; /* the version string for entity */ 75 int standalone; /* Was that entity marked standalone */ 76 int id; /* an unique identifier for the entity */ 77 }; 78 79 /** 80 * xmlParserNodeInfo: 81 * 82 * The parser can be asked to collect Node informations, i.e. at what 83 * place in the file they were detected. 84 * NOTE: This is off by default and not very well tested. 85 */ 86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 88 89 struct _xmlParserNodeInfo { 90 const struct _xmlNode* node; 91 /* Position & line # that text that created the node begins & ends on */ 92 unsigned long begin_pos; 93 unsigned long begin_line; 94 unsigned long end_pos; 95 unsigned long end_line; 96 }; 97 98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 100 struct _xmlParserNodeInfoSeq { 101 unsigned long maximum; 102 unsigned long length; 103 xmlParserNodeInfo* buffer; 104 }; 105 106 /** 107 * xmlParserInputState: 108 * 109 * The parser is now working also as a state based parser. 110 * The recursive one use the state info for entities processing. 111 */ 112 typedef enum { 113 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 114 XML_PARSER_START = 0, /* nothing has been parsed */ 115 XML_PARSER_MISC, /* Misc* before int subset */ 116 XML_PARSER_PI, /* Within a processing instruction */ 117 XML_PARSER_DTD, /* within some DTD content */ 118 XML_PARSER_PROLOG, /* Misc* after internal subset */ 119 XML_PARSER_COMMENT, /* within a comment */ 120 XML_PARSER_START_TAG, /* within a start tag */ 121 XML_PARSER_CONTENT, /* within the content */ 122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 123 XML_PARSER_END_TAG, /* within a closing tag */ 124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 129 XML_PARSER_IGNORE, /* within an IGNORED section */ 130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 131 } xmlParserInputState; 132 133 /** 134 * XML_DETECT_IDS: 135 * 136 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 137 * Use it to initialize xmlLoadExtDtdDefaultValue. 138 */ 139 #define XML_DETECT_IDS 2 140 141 /** 142 * XML_COMPLETE_ATTRS: 143 * 144 * Bit in the loadsubset context field to tell to do complete the 145 * elements attributes lists with the ones defaulted from the DTDs. 146 * Use it to initialize xmlLoadExtDtdDefaultValue. 147 */ 148 #define XML_COMPLETE_ATTRS 4 149 150 /** 151 * XML_SKIP_IDS: 152 * 153 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 155 */ 156 #define XML_SKIP_IDS 8 157 158 /** 159 * xmlParserMode: 160 * 161 * A parser can operate in various modes 162 */ 163 typedef enum { 164 XML_PARSE_UNKNOWN = 0, 165 XML_PARSE_DOM = 1, 166 XML_PARSE_SAX = 2, 167 XML_PARSE_PUSH_DOM = 3, 168 XML_PARSE_PUSH_SAX = 4, 169 XML_PARSE_READER = 5 170 } xmlParserMode; 171 172 /** 173 * xmlParserCtxt: 174 * 175 * The parser context. 176 * NOTE This doesn't completely define the parser state, the (current ?) 177 * design of the parser uses recursive function calls since this allow 178 * and easy mapping from the production rules of the specification 179 * to the actual code. The drawback is that the actual function call 180 * also reflect the parser state. However most of the parsing routines 181 * takes as the only argument the parser context pointer, so migrating 182 * to a state based parser for progressive parsing shouldn't be too hard. 183 */ 184 struct _xmlParserCtxt { 185 struct _xmlSAXHandler *sax; /* The SAX handler */ 186 void *userData; /* For SAX interface only, used by DOM build */ 187 xmlDocPtr myDoc; /* the document being built */ 188 int wellFormed; /* is the document well formed */ 189 int replaceEntities; /* shall we replace entities ? */ 190 const xmlChar *version; /* the XML version string */ 191 const xmlChar *encoding; /* the declared encoding, if any */ 192 int standalone; /* standalone document */ 193 int html; /* an HTML(1)/Docbook(2) document */ 194 195 /* Input stream stack */ 196 xmlParserInputPtr input; /* Current input stream */ 197 int inputNr; /* Number of current input streams */ 198 int inputMax; /* Max number of input streams */ 199 xmlParserInputPtr *inputTab; /* stack of inputs */ 200 201 /* Node analysis stack only used for DOM building */ 202 xmlNodePtr node; /* Current parsed Node */ 203 int nodeNr; /* Depth of the parsing stack */ 204 int nodeMax; /* Max depth of the parsing stack */ 205 xmlNodePtr *nodeTab; /* array of nodes */ 206 207 int record_info; /* Whether node info should be kept */ 208 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 209 210 int errNo; /* error code */ 211 212 int hasExternalSubset; /* reference and external subset */ 213 int hasPErefs; /* the internal subset has PE refs */ 214 int external; /* are we parsing an external entity */ 215 216 int valid; /* is the document valid */ 217 int validate; /* shall we try to validate ? */ 218 xmlValidCtxt vctxt; /* The validity context */ 219 220 xmlParserInputState instate; /* current type of input */ 221 int token; /* next char look-ahead */ 222 223 char *directory; /* the data directory */ 224 225 /* Node name stack */ 226 const xmlChar *name; /* Current parsed Node */ 227 int nameNr; /* Depth of the parsing stack */ 228 int nameMax; /* Max depth of the parsing stack */ 229 const xmlChar * *nameTab; /* array of nodes */ 230 231 long nbChars; /* number of xmlChar processed */ 232 long checkIndex; /* used by progressive parsing lookup */ 233 int keepBlanks; /* ugly but ... */ 234 int disableSAX; /* SAX callbacks are disabled */ 235 int inSubset; /* Parsing is in int 1/ext 2 subset */ 236 const xmlChar * intSubName; /* name of subset */ 237 xmlChar * extSubURI; /* URI of external subset */ 238 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 239 240 /* xml:space values */ 241 int * space; /* Should the parser preserve spaces */ 242 int spaceNr; /* Depth of the parsing stack */ 243 int spaceMax; /* Max depth of the parsing stack */ 244 int * spaceTab; /* array of space infos */ 245 246 int depth; /* to prevent entity substitution loops */ 247 xmlParserInputPtr entity; /* used to check entities boundaries */ 248 int charset; /* encoding of the in-memory content 249 actually an xmlCharEncoding */ 250 int nodelen; /* Those two fields are there to */ 251 int nodemem; /* Speed up large node parsing */ 252 int pedantic; /* signal pedantic warnings */ 253 void *_private; /* For user data, libxml won't touch it */ 254 255 int loadsubset; /* should the external subset be loaded */ 256 int linenumbers; /* set line number in element content */ 257 void *catalogs; /* document's own catalog */ 258 int recovery; /* run in recovery mode */ 259 int progressive; /* is this a progressive parsing */ 260 xmlDictPtr dict; /* dictionnary for the parser */ 261 const xmlChar * *atts; /* array for the attributes callbacks */ 262 int maxatts; /* the size of the array */ 263 int docdict; /* use strings from dict to build tree */ 264 265 /* 266 * pre-interned strings 267 */ 268 const xmlChar *str_xml; 269 const xmlChar *str_xmlns; 270 const xmlChar *str_xml_ns; 271 272 /* 273 * Everything below is used only by the new SAX mode 274 */ 275 int sax2; /* operating in the new SAX mode */ 276 int nsNr; /* the number of inherited namespaces */ 277 int nsMax; /* the size of the arrays */ 278 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 279 struct _xmlParserCtxt *nsParent; /* parent context to inherit namespaces from * */ 280 int *attallocs; /* which attribute were allocated */ 281 void * *pushTab; /* array of data for push */ 282 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 283 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 284 int nsWellFormed; /* is the document XML Nanespace okay */ 285 int options; /* Extra options */ 286 287 /* 288 * Those fields are needed only for treaming parsing so far 289 */ 290 int dictNames; /* Use dictionary names for the tree */ 291 int freeElemsNr; /* number of freed element nodes */ 292 xmlNodePtr freeElems; /* List of freed element nodes */ 293 int freeAttrsNr; /* number of freed attributes nodes */ 294 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 295 296 /* 297 * the complete error informations for the last error. 298 */ 299 xmlError lastError; 300 xmlParserMode parseMode; /* the parser mode */ 301 unsigned long nbentities; /* number of entities references */ 302 unsigned long sizeentities; /* size of parsed entities */ 303 }; 304 305 /** 306 * xmlSAXLocator: 307 * 308 * A SAX Locator. 309 */ 310 struct _xmlSAXLocator { 311 const xmlChar *(*getPublicId)(void *ctx); 312 const xmlChar *(*getSystemId)(void *ctx); 313 int (*getLineNumber)(void *ctx); 314 int (*getColumnNumber)(void *ctx); 315 }; 316 317 /** 318 * xmlSAXHandler: 319 * 320 * A SAX handler is bunch of callbacks called by the parser when processing 321 * of the input generate data or structure informations. 322 */ 323 324 /** 325 * resolveEntitySAXFunc: 326 * @ctx: the user data (XML parser context) 327 * @publicId: The public ID of the entity 328 * @systemId: The system ID of the entity 329 * 330 * Callback: 331 * The entity loader, to control the loading of external entities, 332 * the application can either: 333 * - override this resolveEntity() callback in the SAX block 334 * - or better use the xmlSetExternalEntityLoader() function to 335 * set up it's own entity resolution routine 336 * 337 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 338 */ 339 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 340 const xmlChar *publicId, 341 const xmlChar *systemId); 342 /** 343 * internalSubsetSAXFunc: 344 * @ctx: the user data (XML parser context) 345 * @name: the root element name 346 * @ExternalID: the external ID 347 * @SystemID: the SYSTEM ID (e.g. filename or URL) 348 * 349 * Callback on internal subset declaration. 350 */ 351 typedef void (*internalSubsetSAXFunc) (void *ctx, 352 const xmlChar *name, 353 const xmlChar *ExternalID, 354 const xmlChar *SystemID); 355 /** 356 * externalSubsetSAXFunc: 357 * @ctx: the user data (XML parser context) 358 * @name: the root element name 359 * @ExternalID: the external ID 360 * @SystemID: the SYSTEM ID (e.g. filename or URL) 361 * 362 * Callback on external subset declaration. 363 */ 364 typedef void (*externalSubsetSAXFunc) (void *ctx, 365 const xmlChar *name, 366 const xmlChar *ExternalID, 367 const xmlChar *SystemID); 368 /** 369 * getEntitySAXFunc: 370 * @ctx: the user data (XML parser context) 371 * @name: The entity name 372 * 373 * Get an entity by name. 374 * 375 * Returns the xmlEntityPtr if found. 376 */ 377 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 378 const xmlChar *name); 379 /** 380 * getParameterEntitySAXFunc: 381 * @ctx: the user data (XML parser context) 382 * @name: The entity name 383 * 384 * Get a parameter entity by name. 385 * 386 * Returns the xmlEntityPtr if found. 387 */ 388 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 389 const xmlChar *name); 390 /** 391 * entityDeclSAXFunc: 392 * @ctx: the user data (XML parser context) 393 * @name: the entity name 394 * @type: the entity type 395 * @publicId: The public ID of the entity 396 * @systemId: The system ID of the entity 397 * @content: the entity value (without processing). 398 * 399 * An entity definition has been parsed. 400 */ 401 typedef void (*entityDeclSAXFunc) (void *ctx, 402 const xmlChar *name, 403 int type, 404 const xmlChar *publicId, 405 const xmlChar *systemId, 406 xmlChar *content); 407 /** 408 * notationDeclSAXFunc: 409 * @ctx: the user data (XML parser context) 410 * @name: The name of the notation 411 * @publicId: The public ID of the entity 412 * @systemId: The system ID of the entity 413 * 414 * What to do when a notation declaration has been parsed. 415 */ 416 typedef void (*notationDeclSAXFunc)(void *ctx, 417 const xmlChar *name, 418 const xmlChar *publicId, 419 const xmlChar *systemId); 420 /** 421 * attributeDeclSAXFunc: 422 * @ctx: the user data (XML parser context) 423 * @elem: the name of the element 424 * @fullname: the attribute name 425 * @type: the attribute type 426 * @def: the type of default value 427 * @defaultValue: the attribute default value 428 * @tree: the tree of enumerated value set 429 * 430 * An attribute definition has been parsed. 431 */ 432 typedef void (*attributeDeclSAXFunc)(void *ctx, 433 const xmlChar *elem, 434 const xmlChar *fullname, 435 int type, 436 int def, 437 const xmlChar *defaultValue, 438 xmlEnumerationPtr tree); 439 /** 440 * elementDeclSAXFunc: 441 * @ctx: the user data (XML parser context) 442 * @name: the element name 443 * @type: the element type 444 * @content: the element value tree 445 * 446 * An element definition has been parsed. 447 */ 448 typedef void (*elementDeclSAXFunc)(void *ctx, 449 const xmlChar *name, 450 int type, 451 xmlElementContentPtr content); 452 /** 453 * unparsedEntityDeclSAXFunc: 454 * @ctx: the user data (XML parser context) 455 * @name: The name of the entity 456 * @publicId: The public ID of the entity 457 * @systemId: The system ID of the entity 458 * @notationName: the name of the notation 459 * 460 * What to do when an unparsed entity declaration is parsed. 461 */ 462 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 463 const xmlChar *name, 464 const xmlChar *publicId, 465 const xmlChar *systemId, 466 const xmlChar *notationName); 467 /** 468 * setDocumentLocatorSAXFunc: 469 * @ctx: the user data (XML parser context) 470 * @loc: A SAX Locator 471 * 472 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 473 * Everything is available on the context, so this is useless in our case. 474 */ 475 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 476 xmlSAXLocatorPtr loc); 477 /** 478 * startDocumentSAXFunc: 479 * @ctx: the user data (XML parser context) 480 * 481 * Called when the document start being processed. 482 */ 483 typedef void (*startDocumentSAXFunc) (void *ctx); 484 /** 485 * endDocumentSAXFunc: 486 * @ctx: the user data (XML parser context) 487 * 488 * Called when the document end has been detected. 489 */ 490 typedef void (*endDocumentSAXFunc) (void *ctx); 491 /** 492 * startElementSAXFunc: 493 * @ctx: the user data (XML parser context) 494 * @name: The element name, including namespace prefix 495 * @atts: An array of name/value attributes pairs, NULL terminated 496 * 497 * Called when an opening tag has been processed. 498 */ 499 typedef void (*startElementSAXFunc) (void *ctx, 500 const xmlChar *name, 501 const xmlChar **atts); 502 /** 503 * endElementSAXFunc: 504 * @ctx: the user data (XML parser context) 505 * @name: The element name 506 * 507 * Called when the end of an element has been detected. 508 */ 509 typedef void (*endElementSAXFunc) (void *ctx, 510 const xmlChar *name); 511 /** 512 * attributeSAXFunc: 513 * @ctx: the user data (XML parser context) 514 * @name: The attribute name, including namespace prefix 515 * @value: The attribute value 516 * 517 * Handle an attribute that has been read by the parser. 518 * The default handling is to convert the attribute into an 519 * DOM subtree and past it in a new xmlAttr element added to 520 * the element. 521 */ 522 typedef void (*attributeSAXFunc) (void *ctx, 523 const xmlChar *name, 524 const xmlChar *value); 525 /** 526 * referenceSAXFunc: 527 * @ctx: the user data (XML parser context) 528 * @name: The entity name 529 * 530 * Called when an entity reference is detected. 531 */ 532 typedef void (*referenceSAXFunc) (void *ctx, 533 const xmlChar *name); 534 /** 535 * charactersSAXFunc: 536 * @ctx: the user data (XML parser context) 537 * @ch: a xmlChar string 538 * @len: the number of xmlChar 539 * 540 * Receiving some chars from the parser. 541 */ 542 typedef void (*charactersSAXFunc) (void *ctx, 543 const xmlChar *ch, 544 int len); 545 /** 546 * ignorableWhitespaceSAXFunc: 547 * @ctx: the user data (XML parser context) 548 * @ch: a xmlChar string 549 * @len: the number of xmlChar 550 * 551 * Receiving some ignorable whitespaces from the parser. 552 * UNUSED: by default the DOM building will use characters. 553 */ 554 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 555 const xmlChar *ch, 556 int len); 557 /** 558 * processingInstructionSAXFunc: 559 * @ctx: the user data (XML parser context) 560 * @target: the target name 561 * @data: the PI data's 562 * 563 * A processing instruction has been parsed. 564 */ 565 typedef void (*processingInstructionSAXFunc) (void *ctx, 566 const xmlChar *target, 567 const xmlChar *data); 568 /** 569 * commentSAXFunc: 570 * @ctx: the user data (XML parser context) 571 * @value: the comment content 572 * 573 * A comment has been parsed. 574 */ 575 typedef void (*commentSAXFunc) (void *ctx, 576 const xmlChar *value); 577 /** 578 * cdataBlockSAXFunc: 579 * @ctx: the user data (XML parser context) 580 * @value: The pcdata content 581 * @len: the block length 582 * 583 * Called when a pcdata block has been parsed. 584 */ 585 typedef void (*cdataBlockSAXFunc) ( 586 void *ctx, 587 const xmlChar *value, 588 int len); 589 /** 590 * warningSAXFunc: 591 * @ctx: an XML parser context 592 * @msg: the message to display/transmit 593 * @...: extra parameters for the message display 594 * 595 * Display and format a warning messages, callback. 596 */ 597 typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 598 const char *msg, ...) ATTRIBUTE_PRINTF(2,3); 599 /** 600 * errorSAXFunc: 601 * @ctx: an XML parser context 602 * @msg: the message to display/transmit 603 * @...: extra parameters for the message display 604 * 605 * Display and format an error messages, callback. 606 */ 607 typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 608 const char *msg, ...) ATTRIBUTE_PRINTF(2,3); 609 /** 610 * fatalErrorSAXFunc: 611 * @ctx: an XML parser context 612 * @msg: the message to display/transmit 613 * @...: extra parameters for the message display 614 * 615 * Display and format fatal error messages, callback. 616 * Note: so far fatalError() SAX callbacks are not used, error() 617 * get all the callbacks for errors. 618 */ 619 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 620 const char *msg, ...) ATTRIBUTE_PRINTF(2,3); 621 /** 622 * isStandaloneSAXFunc: 623 * @ctx: the user data (XML parser context) 624 * 625 * Is this document tagged standalone? 626 * 627 * Returns 1 if true 628 */ 629 typedef int (*isStandaloneSAXFunc) (void *ctx); 630 /** 631 * hasInternalSubsetSAXFunc: 632 * @ctx: the user data (XML parser context) 633 * 634 * Does this document has an internal subset. 635 * 636 * Returns 1 if true 637 */ 638 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 639 640 /** 641 * hasExternalSubsetSAXFunc: 642 * @ctx: the user data (XML parser context) 643 * 644 * Does this document has an external subset? 645 * 646 * Returns 1 if true 647 */ 648 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 649 650 /************************************************************************ 651 * * 652 * The SAX version 2 API extensions * 653 * * 654 ************************************************************************/ 655 /** 656 * XML_SAX2_MAGIC: 657 * 658 * Special constant found in SAX2 blocks initialized fields 659 */ 660 #define XML_SAX2_MAGIC 0xDEEDBEAF 661 662 /** 663 * startElementNsSAX2Func: 664 * @ctx: the user data (XML parser context) 665 * @localname: the local name of the element 666 * @prefix: the element namespace prefix if available 667 * @URI: the element namespace name if available 668 * @nb_namespaces: number of namespace definitions on that node 669 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 670 * @nb_attributes: the number of attributes on that node 671 * @nb_defaulted: the number of defaulted attributes. The defaulted 672 * ones are at the end of the array 673 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 674 * attribute values. 675 * 676 * SAX2 callback when an element start has been detected by the parser. 677 * It provides the namespace informations for the element, as well as 678 * the new namespace declarations on the element. 679 */ 680 681 typedef void (*startElementNsSAX2Func) (void *ctx, 682 const xmlChar *localname, 683 const xmlChar *prefix, 684 const xmlChar *URI, 685 int nb_namespaces, 686 const xmlChar **namespaces, 687 int nb_attributes, 688 int nb_defaulted, 689 const xmlChar **attributes); 690 691 /** 692 * endElementNsSAX2Func: 693 * @ctx: the user data (XML parser context) 694 * @localname: the local name of the element 695 * @prefix: the element namespace prefix if available 696 * @URI: the element namespace name if available 697 * 698 * SAX2 callback when an element end has been detected by the parser. 699 * It provides the namespace informations for the element. 700 */ 701 702 typedef void (*endElementNsSAX2Func) (void *ctx, 703 const xmlChar *localname, 704 const xmlChar *prefix, 705 const xmlChar *URI); 706 707 708 struct _xmlSAXHandler { 709 internalSubsetSAXFunc internalSubset; 710 isStandaloneSAXFunc isStandalone; 711 hasInternalSubsetSAXFunc hasInternalSubset; 712 hasExternalSubsetSAXFunc hasExternalSubset; 713 resolveEntitySAXFunc resolveEntity; 714 getEntitySAXFunc getEntity; 715 entityDeclSAXFunc entityDecl; 716 notationDeclSAXFunc notationDecl; 717 attributeDeclSAXFunc attributeDecl; 718 elementDeclSAXFunc elementDecl; 719 unparsedEntityDeclSAXFunc unparsedEntityDecl; 720 setDocumentLocatorSAXFunc setDocumentLocator; 721 startDocumentSAXFunc startDocument; 722 endDocumentSAXFunc endDocument; 723 startElementSAXFunc startElement; 724 endElementSAXFunc endElement; 725 referenceSAXFunc reference; 726 charactersSAXFunc characters; 727 ignorableWhitespaceSAXFunc ignorableWhitespace; 728 processingInstructionSAXFunc processingInstruction; 729 commentSAXFunc comment; 730 warningSAXFunc warning; 731 errorSAXFunc error; 732 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 733 getParameterEntitySAXFunc getParameterEntity; 734 cdataBlockSAXFunc cdataBlock; 735 externalSubsetSAXFunc externalSubset; 736 unsigned int initialized; 737 /* The following fields are extensions available only on version 2 */ 738 void *_private; 739 startElementNsSAX2Func startElementNs; 740 endElementNsSAX2Func endElementNs; 741 xmlStructuredErrorFunc serror; 742 }; 743 744 /* 745 * SAX Version 1 746 */ 747 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 748 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 749 struct _xmlSAXHandlerV1 { 750 internalSubsetSAXFunc internalSubset; 751 isStandaloneSAXFunc isStandalone; 752 hasInternalSubsetSAXFunc hasInternalSubset; 753 hasExternalSubsetSAXFunc hasExternalSubset; 754 resolveEntitySAXFunc resolveEntity; 755 getEntitySAXFunc getEntity; 756 entityDeclSAXFunc entityDecl; 757 notationDeclSAXFunc notationDecl; 758 attributeDeclSAXFunc attributeDecl; 759 elementDeclSAXFunc elementDecl; 760 unparsedEntityDeclSAXFunc unparsedEntityDecl; 761 setDocumentLocatorSAXFunc setDocumentLocator; 762 startDocumentSAXFunc startDocument; 763 endDocumentSAXFunc endDocument; 764 startElementSAXFunc startElement; 765 endElementSAXFunc endElement; 766 referenceSAXFunc reference; 767 charactersSAXFunc characters; 768 ignorableWhitespaceSAXFunc ignorableWhitespace; 769 processingInstructionSAXFunc processingInstruction; 770 commentSAXFunc comment; 771 warningSAXFunc warning; 772 errorSAXFunc error; 773 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 774 getParameterEntitySAXFunc getParameterEntity; 775 cdataBlockSAXFunc cdataBlock; 776 externalSubsetSAXFunc externalSubset; 777 unsigned int initialized; 778 }; 779 780 781 /** 782 * xmlExternalEntityLoader: 783 * @URL: The System ID of the resource requested 784 * @ID: The Public ID of the resource requested 785 * @context: the XML parser context 786 * 787 * External entity loaders types. 788 * 789 * Returns the entity input parser. 790 */ 791 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 792 const char *ID, 793 xmlParserCtxtPtr context); 794 795 #ifdef __cplusplus 796 } 797 #endif 798 799 #include <libxml/encoding.h> 800 #include <libxml/xmlIO.h> 801 #include <libxml/globals.h> 802 803 #ifdef __cplusplus 804 extern "C" { 805 #endif 806 807 808 /* 809 * Init/Cleanup 810 */ 811 XMLPUBFUN void XMLCALL 812 xmlInitParser (void); 813 XMLPUBFUN void XMLCALL 814 xmlCleanupParser (void); 815 816 /* 817 * Input functions 818 */ 819 XMLPUBFUN int XMLCALL 820 xmlParserInputRead (xmlParserInputPtr in, 821 int len); 822 XMLPUBFUN int XMLCALL 823 xmlParserInputGrow (xmlParserInputPtr in, 824 int len); 825 826 /* 827 * Basic parsing Interfaces 828 */ 829 #ifdef LIBXML_SAX1_ENABLED 830 XMLPUBFUN xmlDocPtr XMLCALL 831 xmlParseDoc (const xmlChar *cur); 832 XMLPUBFUN xmlDocPtr XMLCALL 833 xmlParseFile (const char *filename); 834 XMLPUBFUN xmlDocPtr XMLCALL 835 xmlParseMemory (const char *buffer, 836 int size); 837 #endif /* LIBXML_SAX1_ENABLED */ 838 XMLPUBFUN int XMLCALL 839 xmlSubstituteEntitiesDefault(int val); 840 XMLPUBFUN int XMLCALL 841 xmlKeepBlanksDefault (int val); 842 XMLPUBFUN void XMLCALL 843 xmlStopParser (xmlParserCtxtPtr ctxt); 844 XMLPUBFUN int XMLCALL 845 xmlPedanticParserDefault(int val); 846 XMLPUBFUN int XMLCALL 847 xmlLineNumbersDefault (int val); 848 849 #ifdef LIBXML_SAX1_ENABLED 850 /* 851 * Recovery mode 852 */ 853 XMLPUBFUN xmlDocPtr XMLCALL 854 xmlRecoverDoc (xmlChar *cur); 855 XMLPUBFUN xmlDocPtr XMLCALL 856 xmlRecoverMemory (const char *buffer, 857 int size); 858 XMLPUBFUN xmlDocPtr XMLCALL 859 xmlRecoverFile (const char *filename); 860 #endif /* LIBXML_SAX1_ENABLED */ 861 862 /* 863 * Less common routines and SAX interfaces 864 */ 865 XMLPUBFUN int XMLCALL 866 xmlParseDocument (xmlParserCtxtPtr ctxt); 867 XMLPUBFUN int XMLCALL 868 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 869 #ifdef LIBXML_SAX1_ENABLED 870 XMLPUBFUN int XMLCALL 871 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 872 void *user_data, 873 const char *filename); 874 XMLPUBFUN int XMLCALL 875 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 876 void *user_data, 877 const char *buffer, 878 int size); 879 XMLPUBFUN xmlDocPtr XMLCALL 880 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 881 const xmlChar *cur, 882 int recovery); 883 XMLPUBFUN xmlDocPtr XMLCALL 884 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 885 const char *buffer, 886 int size, 887 int recovery); 888 XMLPUBFUN xmlDocPtr XMLCALL 889 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 890 const char *buffer, 891 int size, 892 int recovery, 893 void *data); 894 XMLPUBFUN xmlDocPtr XMLCALL 895 xmlSAXParseFile (xmlSAXHandlerPtr sax, 896 const char *filename, 897 int recovery); 898 XMLPUBFUN xmlDocPtr XMLCALL 899 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 900 const char *filename, 901 int recovery, 902 void *data); 903 XMLPUBFUN xmlDocPtr XMLCALL 904 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 905 const char *filename); 906 XMLPUBFUN xmlDocPtr XMLCALL 907 xmlParseEntity (const char *filename); 908 #endif /* LIBXML_SAX1_ENABLED */ 909 910 #ifdef LIBXML_VALID_ENABLED 911 XMLPUBFUN xmlDtdPtr XMLCALL 912 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 913 const xmlChar *ExternalID, 914 const xmlChar *SystemID); 915 XMLPUBFUN xmlDtdPtr XMLCALL 916 xmlParseDTD (const xmlChar *ExternalID, 917 const xmlChar *SystemID); 918 XMLPUBFUN xmlDtdPtr XMLCALL 919 xmlIOParseDTD (xmlSAXHandlerPtr sax, 920 xmlParserInputBufferPtr input, 921 xmlCharEncoding enc); 922 #endif /* LIBXML_VALID_ENABLE */ 923 #ifdef LIBXML_SAX1_ENABLED 924 XMLPUBFUN int XMLCALL 925 xmlParseBalancedChunkMemory(xmlDocPtr doc, 926 xmlSAXHandlerPtr sax, 927 void *user_data, 928 int depth, 929 const xmlChar *string, 930 xmlNodePtr *lst); 931 #endif /* LIBXML_SAX1_ENABLED */ 932 XMLPUBFUN xmlParserErrors XMLCALL 933 xmlParseInNodeContext (xmlNodePtr node, 934 const char *data, 935 int datalen, 936 int options, 937 xmlNodePtr *lst); 938 #ifdef LIBXML_SAX1_ENABLED 939 XMLPUBFUN int XMLCALL 940 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 941 xmlSAXHandlerPtr sax, 942 void *user_data, 943 int depth, 944 const xmlChar *string, 945 xmlNodePtr *lst, 946 int recover); 947 XMLPUBFUN int XMLCALL 948 xmlParseExternalEntity (xmlDocPtr doc, 949 xmlSAXHandlerPtr sax, 950 void *user_data, 951 int depth, 952 const xmlChar *URL, 953 const xmlChar *ID, 954 xmlNodePtr *lst); 955 #endif /* LIBXML_SAX1_ENABLED */ 956 XMLPUBFUN int XMLCALL 957 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 958 const xmlChar *URL, 959 const xmlChar *ID, 960 xmlNodePtr *lst); 961 962 /* 963 * Parser contexts handling. 964 */ 965 XMLPUBFUN xmlParserCtxtPtr XMLCALL 966 xmlNewParserCtxt (void); 967 XMLPUBFUN int XMLCALL 968 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 969 XMLPUBFUN void XMLCALL 970 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 971 XMLPUBFUN void XMLCALL 972 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 973 #ifdef LIBXML_SAX1_ENABLED 974 XMLPUBFUN void XMLCALL 975 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 976 const xmlChar* buffer, 977 const char *filename); 978 #endif /* LIBXML_SAX1_ENABLED */ 979 XMLPUBFUN xmlParserCtxtPtr XMLCALL 980 xmlCreateDocParserCtxt (const xmlChar *cur); 981 982 #ifdef LIBXML_LEGACY_ENABLED 983 /* 984 * Reading/setting optional parsing features. 985 */ 986 XMLPUBFUN int XMLCALL 987 xmlGetFeaturesList (int *len, 988 const char **result); 989 XMLPUBFUN int XMLCALL 990 xmlGetFeature (xmlParserCtxtPtr ctxt, 991 const char *name, 992 void *result); 993 XMLPUBFUN int XMLCALL 994 xmlSetFeature (xmlParserCtxtPtr ctxt, 995 const char *name, 996 void *value); 997 #endif /* LIBXML_LEGACY_ENABLED */ 998 999 #ifdef LIBXML_PUSH_ENABLED 1000 /* 1001 * Interfaces for the Push mode. 1002 */ 1003 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1004 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1005 void *user_data, 1006 const char *chunk, 1007 int size, 1008 const char *filename); 1009 XMLPUBFUN int XMLCALL 1010 xmlParseChunk (xmlParserCtxtPtr ctxt, 1011 const char *chunk, 1012 int size, 1013 int terminate); 1014 #endif /* LIBXML_PUSH_ENABLED */ 1015 1016 /* 1017 * Special I/O mode. 1018 */ 1019 1020 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1021 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1022 void *user_data, 1023 xmlInputReadCallback ioread, 1024 xmlInputCloseCallback ioclose, 1025 void *ioctx, 1026 xmlCharEncoding enc); 1027 1028 XMLPUBFUN xmlParserInputPtr XMLCALL 1029 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1030 xmlParserInputBufferPtr input, 1031 xmlCharEncoding enc); 1032 1033 /* 1034 * Node infos. 1035 */ 1036 XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1037 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1038 const xmlNodePtr node); 1039 XMLPUBFUN void XMLCALL 1040 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1041 XMLPUBFUN void XMLCALL 1042 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1043 XMLPUBFUN unsigned long XMLCALL 1044 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1045 const xmlNodePtr node); 1046 XMLPUBFUN void XMLCALL 1047 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1048 const xmlParserNodeInfoPtr info); 1049 1050 /* 1051 * External entities handling actually implemented in xmlIO. 1052 */ 1053 1054 XMLPUBFUN void XMLCALL 1055 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1056 XMLPUBFUN xmlExternalEntityLoader XMLCALL 1057 xmlGetExternalEntityLoader(void); 1058 XMLPUBFUN xmlParserInputPtr XMLCALL 1059 xmlLoadExternalEntity (const char *URL, 1060 const char *ID, 1061 xmlParserCtxtPtr ctxt); 1062 1063 /* 1064 * Index lookup, actually implemented in the encoding module 1065 */ 1066 XMLPUBFUN long XMLCALL 1067 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1068 1069 /* 1070 * New set of simpler/more flexible APIs 1071 */ 1072 /** 1073 * xmlParserOption: 1074 * 1075 * This is the set of XML parser options that can be passed down 1076 * to the xmlReadDoc() and similar calls. 1077 */ 1078 typedef enum { 1079 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1080 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1081 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1082 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1083 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1084 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1085 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1086 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1087 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1088 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1089 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ 1090 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1091 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ 1092 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1093 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1094 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1095 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1096 the tree allowed afterwards (will possibly 1097 crash if you try to modify the tree) */ 1098 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1099 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1100 XML_PARSE_HUGE = 1<<19, /* relax any hardcoded limit from the parser */ 1101 XML_PARSE_OLDSAX = 1<<20 /* parse using SAX2 interface from before 2.7.0 */ 1102 } xmlParserOption; 1103 1104 XMLPUBFUN void XMLCALL 1105 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1106 XMLPUBFUN int XMLCALL 1107 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1108 const char *chunk, 1109 int size, 1110 const char *filename, 1111 const char *encoding); 1112 XMLPUBFUN int XMLCALL 1113 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1114 int options); 1115 XMLPUBFUN xmlDocPtr XMLCALL 1116 xmlReadDoc (const xmlChar *cur, 1117 const char *URL, 1118 const char *encoding, 1119 int options); 1120 XMLPUBFUN xmlDocPtr XMLCALL 1121 xmlReadFile (const char *URL, 1122 const char *encoding, 1123 int options); 1124 XMLPUBFUN xmlDocPtr XMLCALL 1125 xmlReadMemory (const char *buffer, 1126 int size, 1127 const char *URL, 1128 const char *encoding, 1129 int options); 1130 XMLPUBFUN xmlDocPtr XMLCALL 1131 xmlReadFd (int fd, 1132 const char *URL, 1133 const char *encoding, 1134 int options); 1135 XMLPUBFUN xmlDocPtr XMLCALL 1136 xmlReadIO (xmlInputReadCallback ioread, 1137 xmlInputCloseCallback ioclose, 1138 void *ioctx, 1139 const char *URL, 1140 const char *encoding, 1141 int options); 1142 XMLPUBFUN xmlDocPtr XMLCALL 1143 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1144 const xmlChar *cur, 1145 const char *URL, 1146 const char *encoding, 1147 int options); 1148 XMLPUBFUN xmlDocPtr XMLCALL 1149 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1150 const char *filename, 1151 const char *encoding, 1152 int options); 1153 XMLPUBFUN xmlDocPtr XMLCALL 1154 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1155 const char *buffer, 1156 int size, 1157 const char *URL, 1158 const char *encoding, 1159 int options); 1160 XMLPUBFUN xmlDocPtr XMLCALL 1161 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1162 int fd, 1163 const char *URL, 1164 const char *encoding, 1165 int options); 1166 XMLPUBFUN xmlDocPtr XMLCALL 1167 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1168 xmlInputReadCallback ioread, 1169 xmlInputCloseCallback ioclose, 1170 void *ioctx, 1171 const char *URL, 1172 const char *encoding, 1173 int options); 1174 1175 /* 1176 * Library wide options 1177 */ 1178 /** 1179 * xmlFeature: 1180 * 1181 * Used to examine the existance of features that can be enabled 1182 * or disabled at compile-time. 1183 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1184 */ 1185 typedef enum { 1186 XML_WITH_THREAD = 1, 1187 XML_WITH_TREE = 2, 1188 XML_WITH_OUTPUT = 3, 1189 XML_WITH_PUSH = 4, 1190 XML_WITH_READER = 5, 1191 XML_WITH_PATTERN = 6, 1192 XML_WITH_WRITER = 7, 1193 XML_WITH_SAX1 = 8, 1194 XML_WITH_FTP = 9, 1195 XML_WITH_HTTP = 10, 1196 XML_WITH_VALID = 11, 1197 XML_WITH_HTML = 12, 1198 XML_WITH_LEGACY = 13, 1199 XML_WITH_C14N = 14, 1200 XML_WITH_CATALOG = 15, 1201 XML_WITH_XPATH = 16, 1202 XML_WITH_XPTR = 17, 1203 XML_WITH_XINCLUDE = 18, 1204 XML_WITH_ICONV = 19, 1205 XML_WITH_ISO8859X = 20, 1206 XML_WITH_UNICODE = 21, 1207 XML_WITH_REGEXP = 22, 1208 XML_WITH_AUTOMATA = 23, 1209 XML_WITH_EXPR = 24, 1210 XML_WITH_SCHEMAS = 25, 1211 XML_WITH_SCHEMATRON = 26, 1212 XML_WITH_MODULES = 27, 1213 XML_WITH_DEBUG = 28, 1214 XML_WITH_DEBUG_MEM = 29, 1215 XML_WITH_DEBUG_RUN = 30, 1216 XML_WITH_ZLIB = 31, 1217 XML_WITH_ICU = 32, 1218 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1219 } xmlFeature; 1220 1221 XMLPUBFUN int XMLCALL 1222 xmlHasFeature (xmlFeature feature); 1223 1224 #ifdef __cplusplus 1225 } 1226 #endif 1227 #endif /* __XML_PARSER_H__ */ 1228