1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <stdarg.h> 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/tree.h> 17 #include <libxml/dict.h> 18 #include <libxml/hash.h> 19 #include <libxml/valid.h> 20 #include <libxml/entities.h> 21 #include <libxml/xmlerror.h> 22 #include <libxml/xmlstring.h> 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 /** 29 * XML_DEFAULT_VERSION: 30 * 31 * The default version of XML used: 1.0 32 */ 33 #define XML_DEFAULT_VERSION "1.0" 34 35 /** 36 * xmlParserInput: 37 * 38 * An xmlParserInput is an input flow for the XML processor. 39 * Each entity parsed is associated an xmlParserInput (except the 40 * few predefined ones). This is the case both for internal entities 41 * - in which case the flow is already completely in memory - or 42 * external entities - in which case we use the buf structure for 43 * progressive reading and I18N conversions to the internal UTF-8 format. 44 */ 45 46 /** 47 * xmlParserInputDeallocate: 48 * @str: the string to deallocate 49 * 50 * Callback for freeing some parser input allocations. 51 */ 52 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 53 54 struct _xmlParserInput { 55 /* Input buffer */ 56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 57 58 const char *filename; /* The file analyzed, if any */ 59 const char *directory; /* the directory/base of the file */ 60 const xmlChar *base; /* Base of the array to parse */ 61 const xmlChar *cur; /* Current char being parsed */ 62 const xmlChar *end; /* end of the array to parse */ 63 int length; /* length if known */ 64 int line; /* Current line */ 65 int col; /* Current column */ 66 /* 67 * NOTE: consumed is only tested for equality in the parser code, 68 * so even if there is an overflow this should not give troubles 69 * for parsing very large instances. 70 */ 71 unsigned long consumed; /* How many xmlChars already consumed */ 72 xmlParserInputDeallocate free; /* function to deallocate the base */ 73 const xmlChar *encoding; /* the encoding string for entity */ 74 const xmlChar *version; /* the version string for entity */ 75 int standalone; /* Was that entity marked standalone */ 76 int id; /* an unique identifier for the entity */ 77 }; 78 79 /** 80 * xmlParserNodeInfo: 81 * 82 * The parser can be asked to collect Node informations, i.e. at what 83 * place in the file they were detected. 84 * NOTE: This is off by default and not very well tested. 85 */ 86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 88 89 struct _xmlParserNodeInfo { 90 const struct _xmlNode* node; 91 /* Position & line # that text that created the node begins & ends on */ 92 unsigned long begin_pos; 93 unsigned long begin_line; 94 unsigned long end_pos; 95 unsigned long end_line; 96 }; 97 98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 100 struct _xmlParserNodeInfoSeq { 101 unsigned long maximum; 102 unsigned long length; 103 xmlParserNodeInfo* buffer; 104 }; 105 106 /** 107 * xmlParserInputState: 108 * 109 * The parser is now working also as a state based parser. 110 * The recursive one use the state info for entities processing. 111 */ 112 typedef enum { 113 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 114 XML_PARSER_START = 0, /* nothing has been parsed */ 115 XML_PARSER_MISC, /* Misc* before int subset */ 116 XML_PARSER_PI, /* Within a processing instruction */ 117 XML_PARSER_DTD, /* within some DTD content */ 118 XML_PARSER_PROLOG, /* Misc* after internal subset */ 119 XML_PARSER_COMMENT, /* within a comment */ 120 XML_PARSER_START_TAG, /* within a start tag */ 121 XML_PARSER_CONTENT, /* within the content */ 122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 123 XML_PARSER_END_TAG, /* within a closing tag */ 124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 129 XML_PARSER_IGNORE, /* within an IGNORED section */ 130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 131 } xmlParserInputState; 132 133 /** 134 * XML_DETECT_IDS: 135 * 136 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 137 * Use it to initialize xmlLoadExtDtdDefaultValue. 138 */ 139 #define XML_DETECT_IDS 2 140 141 /** 142 * XML_COMPLETE_ATTRS: 143 * 144 * Bit in the loadsubset context field to tell to do complete the 145 * elements attributes lists with the ones defaulted from the DTDs. 146 * Use it to initialize xmlLoadExtDtdDefaultValue. 147 */ 148 #define XML_COMPLETE_ATTRS 4 149 150 /** 151 * XML_SKIP_IDS: 152 * 153 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 155 */ 156 #define XML_SKIP_IDS 8 157 158 /** 159 * xmlParserMode: 160 * 161 * A parser can operate in various modes 162 */ 163 typedef enum { 164 XML_PARSE_UNKNOWN = 0, 165 XML_PARSE_DOM = 1, 166 XML_PARSE_SAX = 2, 167 XML_PARSE_PUSH_DOM = 3, 168 XML_PARSE_PUSH_SAX = 4, 169 XML_PARSE_READER = 5 170 } xmlParserMode; 171 172 /** 173 * xmlParserCtxt: 174 * 175 * The parser context. 176 * NOTE This doesn't completely define the parser state, the (current ?) 177 * design of the parser uses recursive function calls since this allow 178 * and easy mapping from the production rules of the specification 179 * to the actual code. The drawback is that the actual function call 180 * also reflect the parser state. However most of the parsing routines 181 * takes as the only argument the parser context pointer, so migrating 182 * to a state based parser for progressive parsing shouldn't be too hard. 183 */ 184 struct _xmlParserCtxt { 185 struct _xmlSAXHandler *sax; /* The SAX handler */ 186 void *userData; /* For SAX interface only, used by DOM build */ 187 xmlDocPtr myDoc; /* the document being built */ 188 int wellFormed; /* is the document well formed */ 189 int replaceEntities; /* shall we replace entities ? */ 190 const xmlChar *version; /* the XML version string */ 191 const xmlChar *encoding; /* the declared encoding, if any */ 192 int standalone; /* standalone document */ 193 int html; /* an HTML(1)/Docbook(2) document 194 * 3 is HTML after <head> 195 * 10 is HTML after <body> 196 */ 197 198 /* Input stream stack */ 199 xmlParserInputPtr input; /* Current input stream */ 200 int inputNr; /* Number of current input streams */ 201 int inputMax; /* Max number of input streams */ 202 xmlParserInputPtr *inputTab; /* stack of inputs */ 203 204 /* Node analysis stack only used for DOM building */ 205 xmlNodePtr node; /* Current parsed Node */ 206 int nodeNr; /* Depth of the parsing stack */ 207 int nodeMax; /* Max depth of the parsing stack */ 208 xmlNodePtr *nodeTab; /* array of nodes */ 209 210 int record_info; /* Whether node info should be kept */ 211 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 212 213 int errNo; /* error code */ 214 215 int hasExternalSubset; /* reference and external subset */ 216 int hasPErefs; /* the internal subset has PE refs */ 217 int external; /* are we parsing an external entity */ 218 219 int valid; /* is the document valid */ 220 int validate; /* shall we try to validate ? */ 221 xmlValidCtxt vctxt; /* The validity context */ 222 223 xmlParserInputState instate; /* current type of input */ 224 int token; /* next char look-ahead */ 225 226 char *directory; /* the data directory */ 227 228 /* Node name stack */ 229 const xmlChar *name; /* Current parsed Node */ 230 int nameNr; /* Depth of the parsing stack */ 231 int nameMax; /* Max depth of the parsing stack */ 232 const xmlChar * *nameTab; /* array of nodes */ 233 234 long nbChars; /* number of xmlChar processed */ 235 long checkIndex; /* used by progressive parsing lookup */ 236 int keepBlanks; /* ugly but ... */ 237 int disableSAX; /* SAX callbacks are disabled */ 238 int inSubset; /* Parsing is in int 1/ext 2 subset */ 239 const xmlChar * intSubName; /* name of subset */ 240 xmlChar * extSubURI; /* URI of external subset */ 241 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 242 243 /* xml:space values */ 244 int * space; /* Should the parser preserve spaces */ 245 int spaceNr; /* Depth of the parsing stack */ 246 int spaceMax; /* Max depth of the parsing stack */ 247 int * spaceTab; /* array of space infos */ 248 249 int depth; /* to prevent entity substitution loops */ 250 xmlParserInputPtr entity; /* used to check entities boundaries */ 251 int charset; /* encoding of the in-memory content 252 actually an xmlCharEncoding */ 253 int nodelen; /* Those two fields are there to */ 254 int nodemem; /* Speed up large node parsing */ 255 int pedantic; /* signal pedantic warnings */ 256 void *_private; /* For user data, libxml won't touch it */ 257 258 int loadsubset; /* should the external subset be loaded */ 259 int linenumbers; /* set line number in element content */ 260 void *catalogs; /* document's own catalog */ 261 int recovery; /* run in recovery mode */ 262 int progressive; /* is this a progressive parsing */ 263 xmlDictPtr dict; /* dictionnary for the parser */ 264 const xmlChar * *atts; /* array for the attributes callbacks */ 265 int maxatts; /* the size of the array */ 266 int docdict; /* use strings from dict to build tree */ 267 268 /* 269 * pre-interned strings 270 */ 271 const xmlChar *str_xml; 272 const xmlChar *str_xmlns; 273 const xmlChar *str_xml_ns; 274 275 /* 276 * Everything below is used only by the new SAX mode 277 */ 278 int sax2; /* operating in the new SAX mode */ 279 int nsNr; /* the number of inherited namespaces */ 280 int nsMax; /* the size of the arrays */ 281 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 282 int *attallocs; /* which attribute were allocated */ 283 void * *pushTab; /* array of data for push */ 284 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 285 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 286 int nsWellFormed; /* is the document XML Nanespace okay */ 287 int options; /* Extra options */ 288 289 /* 290 * Those fields are needed only for treaming parsing so far 291 */ 292 int dictNames; /* Use dictionary names for the tree */ 293 int freeElemsNr; /* number of freed element nodes */ 294 xmlNodePtr freeElems; /* List of freed element nodes */ 295 int freeAttrsNr; /* number of freed attributes nodes */ 296 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 297 298 /* 299 * the complete error informations for the last error. 300 */ 301 xmlError lastError; 302 xmlParserMode parseMode; /* the parser mode */ 303 unsigned long nbentities; /* number of entities references */ 304 unsigned long sizeentities; /* size of parsed entities */ 305 306 /* for use by HTML non-recursive parser */ 307 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */ 308 int nodeInfoNr; /* Depth of the parsing stack */ 309 int nodeInfoMax; /* Max depth of the parsing stack */ 310 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ 311 }; 312 313 /** 314 * xmlSAXLocator: 315 * 316 * A SAX Locator. 317 */ 318 struct _xmlSAXLocator { 319 const xmlChar *(*getPublicId)(void *ctx); 320 const xmlChar *(*getSystemId)(void *ctx); 321 int (*getLineNumber)(void *ctx); 322 int (*getColumnNumber)(void *ctx); 323 }; 324 325 /** 326 * xmlSAXHandler: 327 * 328 * A SAX handler is bunch of callbacks called by the parser when processing 329 * of the input generate data or structure informations. 330 */ 331 332 /** 333 * resolveEntitySAXFunc: 334 * @ctx: the user data (XML parser context) 335 * @publicId: The public ID of the entity 336 * @systemId: The system ID of the entity 337 * 338 * Callback: 339 * The entity loader, to control the loading of external entities, 340 * the application can either: 341 * - override this resolveEntity() callback in the SAX block 342 * - or better use the xmlSetExternalEntityLoader() function to 343 * set up it's own entity resolution routine 344 * 345 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 346 */ 347 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 348 const xmlChar *publicId, 349 const xmlChar *systemId); 350 /** 351 * internalSubsetSAXFunc: 352 * @ctx: the user data (XML parser context) 353 * @name: the root element name 354 * @ExternalID: the external ID 355 * @SystemID: the SYSTEM ID (e.g. filename or URL) 356 * 357 * Callback on internal subset declaration. 358 */ 359 typedef void (*internalSubsetSAXFunc) (void *ctx, 360 const xmlChar *name, 361 const xmlChar *ExternalID, 362 const xmlChar *SystemID); 363 /** 364 * externalSubsetSAXFunc: 365 * @ctx: the user data (XML parser context) 366 * @name: the root element name 367 * @ExternalID: the external ID 368 * @SystemID: the SYSTEM ID (e.g. filename or URL) 369 * 370 * Callback on external subset declaration. 371 */ 372 typedef void (*externalSubsetSAXFunc) (void *ctx, 373 const xmlChar *name, 374 const xmlChar *ExternalID, 375 const xmlChar *SystemID); 376 /** 377 * getEntitySAXFunc: 378 * @ctx: the user data (XML parser context) 379 * @name: The entity name 380 * 381 * Get an entity by name. 382 * 383 * Returns the xmlEntityPtr if found. 384 */ 385 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 386 const xmlChar *name); 387 /** 388 * getParameterEntitySAXFunc: 389 * @ctx: the user data (XML parser context) 390 * @name: The entity name 391 * 392 * Get a parameter entity by name. 393 * 394 * Returns the xmlEntityPtr if found. 395 */ 396 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 397 const xmlChar *name); 398 /** 399 * entityDeclSAXFunc: 400 * @ctx: the user data (XML parser context) 401 * @name: the entity name 402 * @type: the entity type 403 * @publicId: The public ID of the entity 404 * @systemId: The system ID of the entity 405 * @content: the entity value (without processing). 406 * 407 * An entity definition has been parsed. 408 */ 409 typedef void (*entityDeclSAXFunc) (void *ctx, 410 const xmlChar *name, 411 int type, 412 const xmlChar *publicId, 413 const xmlChar *systemId, 414 xmlChar *content); 415 /** 416 * notationDeclSAXFunc: 417 * @ctx: the user data (XML parser context) 418 * @name: The name of the notation 419 * @publicId: The public ID of the entity 420 * @systemId: The system ID of the entity 421 * 422 * What to do when a notation declaration has been parsed. 423 */ 424 typedef void (*notationDeclSAXFunc)(void *ctx, 425 const xmlChar *name, 426 const xmlChar *publicId, 427 const xmlChar *systemId); 428 /** 429 * attributeDeclSAXFunc: 430 * @ctx: the user data (XML parser context) 431 * @elem: the name of the element 432 * @fullname: the attribute name 433 * @type: the attribute type 434 * @def: the type of default value 435 * @defaultValue: the attribute default value 436 * @tree: the tree of enumerated value set 437 * 438 * An attribute definition has been parsed. 439 */ 440 typedef void (*attributeDeclSAXFunc)(void *ctx, 441 const xmlChar *elem, 442 const xmlChar *fullname, 443 int type, 444 int def, 445 const xmlChar *defaultValue, 446 xmlEnumerationPtr tree); 447 /** 448 * elementDeclSAXFunc: 449 * @ctx: the user data (XML parser context) 450 * @name: the element name 451 * @type: the element type 452 * @content: the element value tree 453 * 454 * An element definition has been parsed. 455 */ 456 typedef void (*elementDeclSAXFunc)(void *ctx, 457 const xmlChar *name, 458 int type, 459 xmlElementContentPtr content); 460 /** 461 * unparsedEntityDeclSAXFunc: 462 * @ctx: the user data (XML parser context) 463 * @name: The name of the entity 464 * @publicId: The public ID of the entity 465 * @systemId: The system ID of the entity 466 * @notationName: the name of the notation 467 * 468 * What to do when an unparsed entity declaration is parsed. 469 */ 470 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 471 const xmlChar *name, 472 const xmlChar *publicId, 473 const xmlChar *systemId, 474 const xmlChar *notationName); 475 /** 476 * setDocumentLocatorSAXFunc: 477 * @ctx: the user data (XML parser context) 478 * @loc: A SAX Locator 479 * 480 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 481 * Everything is available on the context, so this is useless in our case. 482 */ 483 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 484 xmlSAXLocatorPtr loc); 485 /** 486 * startDocumentSAXFunc: 487 * @ctx: the user data (XML parser context) 488 * 489 * Called when the document start being processed. 490 */ 491 typedef void (*startDocumentSAXFunc) (void *ctx); 492 /** 493 * endDocumentSAXFunc: 494 * @ctx: the user data (XML parser context) 495 * 496 * Called when the document end has been detected. 497 */ 498 typedef void (*endDocumentSAXFunc) (void *ctx); 499 /** 500 * startElementSAXFunc: 501 * @ctx: the user data (XML parser context) 502 * @name: The element name, including namespace prefix 503 * @atts: An array of name/value attributes pairs, NULL terminated 504 * 505 * Called when an opening tag has been processed. 506 */ 507 typedef void (*startElementSAXFunc) (void *ctx, 508 const xmlChar *name, 509 const xmlChar **atts); 510 /** 511 * endElementSAXFunc: 512 * @ctx: the user data (XML parser context) 513 * @name: The element name 514 * 515 * Called when the end of an element has been detected. 516 */ 517 typedef void (*endElementSAXFunc) (void *ctx, 518 const xmlChar *name); 519 /** 520 * attributeSAXFunc: 521 * @ctx: the user data (XML parser context) 522 * @name: The attribute name, including namespace prefix 523 * @value: The attribute value 524 * 525 * Handle an attribute that has been read by the parser. 526 * The default handling is to convert the attribute into an 527 * DOM subtree and past it in a new xmlAttr element added to 528 * the element. 529 */ 530 typedef void (*attributeSAXFunc) (void *ctx, 531 const xmlChar *name, 532 const xmlChar *value); 533 /** 534 * referenceSAXFunc: 535 * @ctx: the user data (XML parser context) 536 * @name: The entity name 537 * 538 * Called when an entity reference is detected. 539 */ 540 typedef void (*referenceSAXFunc) (void *ctx, 541 const xmlChar *name); 542 /** 543 * charactersSAXFunc: 544 * @ctx: the user data (XML parser context) 545 * @ch: a xmlChar string 546 * @len: the number of xmlChar 547 * 548 * Receiving some chars from the parser. 549 */ 550 typedef void (*charactersSAXFunc) (void *ctx, 551 const xmlChar *ch, 552 int len); 553 /** 554 * ignorableWhitespaceSAXFunc: 555 * @ctx: the user data (XML parser context) 556 * @ch: a xmlChar string 557 * @len: the number of xmlChar 558 * 559 * Receiving some ignorable whitespaces from the parser. 560 * UNUSED: by default the DOM building will use characters. 561 */ 562 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 563 const xmlChar *ch, 564 int len); 565 /** 566 * processingInstructionSAXFunc: 567 * @ctx: the user data (XML parser context) 568 * @target: the target name 569 * @data: the PI data's 570 * 571 * A processing instruction has been parsed. 572 */ 573 typedef void (*processingInstructionSAXFunc) (void *ctx, 574 const xmlChar *target, 575 const xmlChar *data); 576 /** 577 * commentSAXFunc: 578 * @ctx: the user data (XML parser context) 579 * @value: the comment content 580 * 581 * A comment has been parsed. 582 */ 583 typedef void (*commentSAXFunc) (void *ctx, 584 const xmlChar *value); 585 /** 586 * cdataBlockSAXFunc: 587 * @ctx: the user data (XML parser context) 588 * @value: The pcdata content 589 * @len: the block length 590 * 591 * Called when a pcdata block has been parsed. 592 */ 593 typedef void (*cdataBlockSAXFunc) ( 594 void *ctx, 595 const xmlChar *value, 596 int len); 597 /** 598 * warningSAXFunc: 599 * @ctx: an XML parser context 600 * @msg: the message to display/transmit 601 * @...: extra parameters for the message display 602 * 603 * Display and format a warning messages, callback. 604 */ 605 typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 606 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 607 /** 608 * errorSAXFunc: 609 * @ctx: an XML parser context 610 * @msg: the message to display/transmit 611 * @...: extra parameters for the message display 612 * 613 * Display and format an error messages, callback. 614 */ 615 typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 616 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 617 /** 618 * fatalErrorSAXFunc: 619 * @ctx: an XML parser context 620 * @msg: the message to display/transmit 621 * @...: extra parameters for the message display 622 * 623 * Display and format fatal error messages, callback. 624 * Note: so far fatalError() SAX callbacks are not used, error() 625 * get all the callbacks for errors. 626 */ 627 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 628 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 629 /** 630 * isStandaloneSAXFunc: 631 * @ctx: the user data (XML parser context) 632 * 633 * Is this document tagged standalone? 634 * 635 * Returns 1 if true 636 */ 637 typedef int (*isStandaloneSAXFunc) (void *ctx); 638 /** 639 * hasInternalSubsetSAXFunc: 640 * @ctx: the user data (XML parser context) 641 * 642 * Does this document has an internal subset. 643 * 644 * Returns 1 if true 645 */ 646 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 647 648 /** 649 * hasExternalSubsetSAXFunc: 650 * @ctx: the user data (XML parser context) 651 * 652 * Does this document has an external subset? 653 * 654 * Returns 1 if true 655 */ 656 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 657 658 /************************************************************************ 659 * * 660 * The SAX version 2 API extensions * 661 * * 662 ************************************************************************/ 663 /** 664 * XML_SAX2_MAGIC: 665 * 666 * Special constant found in SAX2 blocks initialized fields 667 */ 668 #define XML_SAX2_MAGIC 0xDEEDBEAF 669 670 /** 671 * startElementNsSAX2Func: 672 * @ctx: the user data (XML parser context) 673 * @localname: the local name of the element 674 * @prefix: the element namespace prefix if available 675 * @URI: the element namespace name if available 676 * @nb_namespaces: number of namespace definitions on that node 677 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 678 * @nb_attributes: the number of attributes on that node 679 * @nb_defaulted: the number of defaulted attributes. The defaulted 680 * ones are at the end of the array 681 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 682 * attribute values. 683 * 684 * SAX2 callback when an element start has been detected by the parser. 685 * It provides the namespace informations for the element, as well as 686 * the new namespace declarations on the element. 687 */ 688 689 typedef void (*startElementNsSAX2Func) (void *ctx, 690 const xmlChar *localname, 691 const xmlChar *prefix, 692 const xmlChar *URI, 693 int nb_namespaces, 694 const xmlChar **namespaces, 695 int nb_attributes, 696 int nb_defaulted, 697 const xmlChar **attributes); 698 699 /** 700 * endElementNsSAX2Func: 701 * @ctx: the user data (XML parser context) 702 * @localname: the local name of the element 703 * @prefix: the element namespace prefix if available 704 * @URI: the element namespace name if available 705 * 706 * SAX2 callback when an element end has been detected by the parser. 707 * It provides the namespace informations for the element. 708 */ 709 710 typedef void (*endElementNsSAX2Func) (void *ctx, 711 const xmlChar *localname, 712 const xmlChar *prefix, 713 const xmlChar *URI); 714 715 716 struct _xmlSAXHandler { 717 internalSubsetSAXFunc internalSubset; 718 isStandaloneSAXFunc isStandalone; 719 hasInternalSubsetSAXFunc hasInternalSubset; 720 hasExternalSubsetSAXFunc hasExternalSubset; 721 resolveEntitySAXFunc resolveEntity; 722 getEntitySAXFunc getEntity; 723 entityDeclSAXFunc entityDecl; 724 notationDeclSAXFunc notationDecl; 725 attributeDeclSAXFunc attributeDecl; 726 elementDeclSAXFunc elementDecl; 727 unparsedEntityDeclSAXFunc unparsedEntityDecl; 728 setDocumentLocatorSAXFunc setDocumentLocator; 729 startDocumentSAXFunc startDocument; 730 endDocumentSAXFunc endDocument; 731 startElementSAXFunc startElement; 732 endElementSAXFunc endElement; 733 referenceSAXFunc reference; 734 charactersSAXFunc characters; 735 ignorableWhitespaceSAXFunc ignorableWhitespace; 736 processingInstructionSAXFunc processingInstruction; 737 commentSAXFunc comment; 738 warningSAXFunc warning; 739 errorSAXFunc error; 740 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 741 getParameterEntitySAXFunc getParameterEntity; 742 cdataBlockSAXFunc cdataBlock; 743 externalSubsetSAXFunc externalSubset; 744 unsigned int initialized; 745 /* The following fields are extensions available only on version 2 */ 746 void *_private; 747 startElementNsSAX2Func startElementNs; 748 endElementNsSAX2Func endElementNs; 749 xmlStructuredErrorFunc serror; 750 }; 751 752 /* 753 * SAX Version 1 754 */ 755 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 756 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 757 struct _xmlSAXHandlerV1 { 758 internalSubsetSAXFunc internalSubset; 759 isStandaloneSAXFunc isStandalone; 760 hasInternalSubsetSAXFunc hasInternalSubset; 761 hasExternalSubsetSAXFunc hasExternalSubset; 762 resolveEntitySAXFunc resolveEntity; 763 getEntitySAXFunc getEntity; 764 entityDeclSAXFunc entityDecl; 765 notationDeclSAXFunc notationDecl; 766 attributeDeclSAXFunc attributeDecl; 767 elementDeclSAXFunc elementDecl; 768 unparsedEntityDeclSAXFunc unparsedEntityDecl; 769 setDocumentLocatorSAXFunc setDocumentLocator; 770 startDocumentSAXFunc startDocument; 771 endDocumentSAXFunc endDocument; 772 startElementSAXFunc startElement; 773 endElementSAXFunc endElement; 774 referenceSAXFunc reference; 775 charactersSAXFunc characters; 776 ignorableWhitespaceSAXFunc ignorableWhitespace; 777 processingInstructionSAXFunc processingInstruction; 778 commentSAXFunc comment; 779 warningSAXFunc warning; 780 errorSAXFunc error; 781 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 782 getParameterEntitySAXFunc getParameterEntity; 783 cdataBlockSAXFunc cdataBlock; 784 externalSubsetSAXFunc externalSubset; 785 unsigned int initialized; 786 }; 787 788 789 /** 790 * xmlExternalEntityLoader: 791 * @URL: The System ID of the resource requested 792 * @ID: The Public ID of the resource requested 793 * @context: the XML parser context 794 * 795 * External entity loaders types. 796 * 797 * Returns the entity input parser. 798 */ 799 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 800 const char *ID, 801 xmlParserCtxtPtr context); 802 803 #ifdef __cplusplus 804 } 805 #endif 806 807 #include <libxml/encoding.h> 808 #include <libxml/xmlIO.h> 809 #include <libxml/globals.h> 810 811 #ifdef __cplusplus 812 extern "C" { 813 #endif 814 815 816 /* 817 * Init/Cleanup 818 */ 819 XMLPUBFUN void XMLCALL 820 xmlInitParser (void); 821 XMLPUBFUN void XMLCALL 822 xmlCleanupParser (void); 823 824 /* 825 * Input functions 826 */ 827 XMLPUBFUN int XMLCALL 828 xmlParserInputRead (xmlParserInputPtr in, 829 int len); 830 XMLPUBFUN int XMLCALL 831 xmlParserInputGrow (xmlParserInputPtr in, 832 int len); 833 834 /* 835 * Basic parsing Interfaces 836 */ 837 #ifdef LIBXML_SAX1_ENABLED 838 XMLPUBFUN xmlDocPtr XMLCALL 839 xmlParseDoc (const xmlChar *cur); 840 XMLPUBFUN xmlDocPtr XMLCALL 841 xmlParseFile (const char *filename); 842 XMLPUBFUN xmlDocPtr XMLCALL 843 xmlParseMemory (const char *buffer, 844 int size); 845 #endif /* LIBXML_SAX1_ENABLED */ 846 XMLPUBFUN int XMLCALL 847 xmlSubstituteEntitiesDefault(int val); 848 XMLPUBFUN int XMLCALL 849 xmlKeepBlanksDefault (int val); 850 XMLPUBFUN void XMLCALL 851 xmlStopParser (xmlParserCtxtPtr ctxt); 852 XMLPUBFUN int XMLCALL 853 xmlPedanticParserDefault(int val); 854 XMLPUBFUN int XMLCALL 855 xmlLineNumbersDefault (int val); 856 857 #ifdef LIBXML_SAX1_ENABLED 858 /* 859 * Recovery mode 860 */ 861 XMLPUBFUN xmlDocPtr XMLCALL 862 xmlRecoverDoc (const xmlChar *cur); 863 XMLPUBFUN xmlDocPtr XMLCALL 864 xmlRecoverMemory (const char *buffer, 865 int size); 866 XMLPUBFUN xmlDocPtr XMLCALL 867 xmlRecoverFile (const char *filename); 868 #endif /* LIBXML_SAX1_ENABLED */ 869 870 /* 871 * Less common routines and SAX interfaces 872 */ 873 XMLPUBFUN int XMLCALL 874 xmlParseDocument (xmlParserCtxtPtr ctxt); 875 XMLPUBFUN int XMLCALL 876 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 877 #ifdef LIBXML_SAX1_ENABLED 878 XMLPUBFUN int XMLCALL 879 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 880 void *user_data, 881 const char *filename); 882 XMLPUBFUN int XMLCALL 883 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 884 void *user_data, 885 const char *buffer, 886 int size); 887 XMLPUBFUN xmlDocPtr XMLCALL 888 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 889 const xmlChar *cur, 890 int recovery); 891 XMLPUBFUN xmlDocPtr XMLCALL 892 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 893 const char *buffer, 894 int size, 895 int recovery); 896 XMLPUBFUN xmlDocPtr XMLCALL 897 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 898 const char *buffer, 899 int size, 900 int recovery, 901 void *data); 902 XMLPUBFUN xmlDocPtr XMLCALL 903 xmlSAXParseFile (xmlSAXHandlerPtr sax, 904 const char *filename, 905 int recovery); 906 XMLPUBFUN xmlDocPtr XMLCALL 907 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 908 const char *filename, 909 int recovery, 910 void *data); 911 XMLPUBFUN xmlDocPtr XMLCALL 912 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 913 const char *filename); 914 XMLPUBFUN xmlDocPtr XMLCALL 915 xmlParseEntity (const char *filename); 916 #endif /* LIBXML_SAX1_ENABLED */ 917 918 #ifdef LIBXML_VALID_ENABLED 919 XMLPUBFUN xmlDtdPtr XMLCALL 920 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 921 const xmlChar *ExternalID, 922 const xmlChar *SystemID); 923 XMLPUBFUN xmlDtdPtr XMLCALL 924 xmlParseDTD (const xmlChar *ExternalID, 925 const xmlChar *SystemID); 926 XMLPUBFUN xmlDtdPtr XMLCALL 927 xmlIOParseDTD (xmlSAXHandlerPtr sax, 928 xmlParserInputBufferPtr input, 929 xmlCharEncoding enc); 930 #endif /* LIBXML_VALID_ENABLE */ 931 #ifdef LIBXML_SAX1_ENABLED 932 XMLPUBFUN int XMLCALL 933 xmlParseBalancedChunkMemory(xmlDocPtr doc, 934 xmlSAXHandlerPtr sax, 935 void *user_data, 936 int depth, 937 const xmlChar *string, 938 xmlNodePtr *lst); 939 #endif /* LIBXML_SAX1_ENABLED */ 940 XMLPUBFUN xmlParserErrors XMLCALL 941 xmlParseInNodeContext (xmlNodePtr node, 942 const char *data, 943 int datalen, 944 int options, 945 xmlNodePtr *lst); 946 #ifdef LIBXML_SAX1_ENABLED 947 XMLPUBFUN int XMLCALL 948 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 949 xmlSAXHandlerPtr sax, 950 void *user_data, 951 int depth, 952 const xmlChar *string, 953 xmlNodePtr *lst, 954 int recover); 955 XMLPUBFUN int XMLCALL 956 xmlParseExternalEntity (xmlDocPtr doc, 957 xmlSAXHandlerPtr sax, 958 void *user_data, 959 int depth, 960 const xmlChar *URL, 961 const xmlChar *ID, 962 xmlNodePtr *lst); 963 #endif /* LIBXML_SAX1_ENABLED */ 964 XMLPUBFUN int XMLCALL 965 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 966 const xmlChar *URL, 967 const xmlChar *ID, 968 xmlNodePtr *lst); 969 970 /* 971 * Parser contexts handling. 972 */ 973 XMLPUBFUN xmlParserCtxtPtr XMLCALL 974 xmlNewParserCtxt (void); 975 XMLPUBFUN int XMLCALL 976 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 977 XMLPUBFUN void XMLCALL 978 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 979 XMLPUBFUN void XMLCALL 980 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 981 #ifdef LIBXML_SAX1_ENABLED 982 XMLPUBFUN void XMLCALL 983 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 984 const xmlChar* buffer, 985 const char *filename); 986 #endif /* LIBXML_SAX1_ENABLED */ 987 XMLPUBFUN xmlParserCtxtPtr XMLCALL 988 xmlCreateDocParserCtxt (const xmlChar *cur); 989 990 #ifdef LIBXML_LEGACY_ENABLED 991 /* 992 * Reading/setting optional parsing features. 993 */ 994 XMLPUBFUN int XMLCALL 995 xmlGetFeaturesList (int *len, 996 const char **result); 997 XMLPUBFUN int XMLCALL 998 xmlGetFeature (xmlParserCtxtPtr ctxt, 999 const char *name, 1000 void *result); 1001 XMLPUBFUN int XMLCALL 1002 xmlSetFeature (xmlParserCtxtPtr ctxt, 1003 const char *name, 1004 void *value); 1005 #endif /* LIBXML_LEGACY_ENABLED */ 1006 1007 #ifdef LIBXML_PUSH_ENABLED 1008 /* 1009 * Interfaces for the Push mode. 1010 */ 1011 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1012 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1013 void *user_data, 1014 const char *chunk, 1015 int size, 1016 const char *filename); 1017 XMLPUBFUN int XMLCALL 1018 xmlParseChunk (xmlParserCtxtPtr ctxt, 1019 const char *chunk, 1020 int size, 1021 int terminate); 1022 #endif /* LIBXML_PUSH_ENABLED */ 1023 1024 /* 1025 * Special I/O mode. 1026 */ 1027 1028 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1029 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1030 void *user_data, 1031 xmlInputReadCallback ioread, 1032 xmlInputCloseCallback ioclose, 1033 void *ioctx, 1034 xmlCharEncoding enc); 1035 1036 XMLPUBFUN xmlParserInputPtr XMLCALL 1037 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1038 xmlParserInputBufferPtr input, 1039 xmlCharEncoding enc); 1040 1041 /* 1042 * Node infos. 1043 */ 1044 XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1045 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1046 const xmlNodePtr node); 1047 XMLPUBFUN void XMLCALL 1048 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1049 XMLPUBFUN void XMLCALL 1050 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1051 XMLPUBFUN unsigned long XMLCALL 1052 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1053 const xmlNodePtr node); 1054 XMLPUBFUN void XMLCALL 1055 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1056 const xmlParserNodeInfoPtr info); 1057 1058 /* 1059 * External entities handling actually implemented in xmlIO. 1060 */ 1061 1062 XMLPUBFUN void XMLCALL 1063 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1064 XMLPUBFUN xmlExternalEntityLoader XMLCALL 1065 xmlGetExternalEntityLoader(void); 1066 XMLPUBFUN xmlParserInputPtr XMLCALL 1067 xmlLoadExternalEntity (const char *URL, 1068 const char *ID, 1069 xmlParserCtxtPtr ctxt); 1070 1071 /* 1072 * Index lookup, actually implemented in the encoding module 1073 */ 1074 XMLPUBFUN long XMLCALL 1075 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1076 1077 /* 1078 * New set of simpler/more flexible APIs 1079 */ 1080 /** 1081 * xmlParserOption: 1082 * 1083 * This is the set of XML parser options that can be passed down 1084 * to the xmlReadDoc() and similar calls. 1085 */ 1086 typedef enum { 1087 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1088 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1089 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1090 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1091 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1092 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1093 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1094 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1095 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1096 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1097 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ 1098 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1099 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ 1100 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1101 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1102 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1103 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1104 the tree allowed afterwards (will possibly 1105 crash if you try to modify the tree) */ 1106 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1107 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1108 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */ 1109 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */ 1110 XML_PARSE_IGNORE_ENC= 1<<21 /* ignore internal document encoding hint */ 1111 } xmlParserOption; 1112 1113 XMLPUBFUN void XMLCALL 1114 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1115 XMLPUBFUN int XMLCALL 1116 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1117 const char *chunk, 1118 int size, 1119 const char *filename, 1120 const char *encoding); 1121 XMLPUBFUN int XMLCALL 1122 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1123 int options); 1124 XMLPUBFUN xmlDocPtr XMLCALL 1125 xmlReadDoc (const xmlChar *cur, 1126 const char *URL, 1127 const char *encoding, 1128 int options); 1129 XMLPUBFUN xmlDocPtr XMLCALL 1130 xmlReadFile (const char *URL, 1131 const char *encoding, 1132 int options); 1133 XMLPUBFUN xmlDocPtr XMLCALL 1134 xmlReadMemory (const char *buffer, 1135 int size, 1136 const char *URL, 1137 const char *encoding, 1138 int options); 1139 XMLPUBFUN xmlDocPtr XMLCALL 1140 xmlReadFd (int fd, 1141 const char *URL, 1142 const char *encoding, 1143 int options); 1144 XMLPUBFUN xmlDocPtr XMLCALL 1145 xmlReadIO (xmlInputReadCallback ioread, 1146 xmlInputCloseCallback ioclose, 1147 void *ioctx, 1148 const char *URL, 1149 const char *encoding, 1150 int options); 1151 XMLPUBFUN xmlDocPtr XMLCALL 1152 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1153 const xmlChar *cur, 1154 const char *URL, 1155 const char *encoding, 1156 int options); 1157 XMLPUBFUN xmlDocPtr XMLCALL 1158 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1159 const char *filename, 1160 const char *encoding, 1161 int options); 1162 XMLPUBFUN xmlDocPtr XMLCALL 1163 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1164 const char *buffer, 1165 int size, 1166 const char *URL, 1167 const char *encoding, 1168 int options); 1169 XMLPUBFUN xmlDocPtr XMLCALL 1170 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1171 int fd, 1172 const char *URL, 1173 const char *encoding, 1174 int options); 1175 XMLPUBFUN xmlDocPtr XMLCALL 1176 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1177 xmlInputReadCallback ioread, 1178 xmlInputCloseCallback ioclose, 1179 void *ioctx, 1180 const char *URL, 1181 const char *encoding, 1182 int options); 1183 1184 /* 1185 * Library wide options 1186 */ 1187 /** 1188 * xmlFeature: 1189 * 1190 * Used to examine the existance of features that can be enabled 1191 * or disabled at compile-time. 1192 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1193 */ 1194 typedef enum { 1195 XML_WITH_THREAD = 1, 1196 XML_WITH_TREE = 2, 1197 XML_WITH_OUTPUT = 3, 1198 XML_WITH_PUSH = 4, 1199 XML_WITH_READER = 5, 1200 XML_WITH_PATTERN = 6, 1201 XML_WITH_WRITER = 7, 1202 XML_WITH_SAX1 = 8, 1203 XML_WITH_FTP = 9, 1204 XML_WITH_HTTP = 10, 1205 XML_WITH_VALID = 11, 1206 XML_WITH_HTML = 12, 1207 XML_WITH_LEGACY = 13, 1208 XML_WITH_C14N = 14, 1209 XML_WITH_CATALOG = 15, 1210 XML_WITH_XPATH = 16, 1211 XML_WITH_XPTR = 17, 1212 XML_WITH_XINCLUDE = 18, 1213 XML_WITH_ICONV = 19, 1214 XML_WITH_ISO8859X = 20, 1215 XML_WITH_UNICODE = 21, 1216 XML_WITH_REGEXP = 22, 1217 XML_WITH_AUTOMATA = 23, 1218 XML_WITH_EXPR = 24, 1219 XML_WITH_SCHEMAS = 25, 1220 XML_WITH_SCHEMATRON = 26, 1221 XML_WITH_MODULES = 27, 1222 XML_WITH_DEBUG = 28, 1223 XML_WITH_DEBUG_MEM = 29, 1224 XML_WITH_DEBUG_RUN = 30, 1225 XML_WITH_ZLIB = 31, 1226 XML_WITH_ICU = 32, 1227 XML_WITH_LZMA = 33, 1228 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1229 } xmlFeature; 1230 1231 XMLPUBFUN int XMLCALL 1232 xmlHasFeature (xmlFeature feature); 1233 1234 #ifdef __cplusplus 1235 } 1236 #endif 1237 #endif /* __XML_PARSER_H__ */ 1238