1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <limits.h> 44 #include <string.h> 45 #include <stdarg.h> 46 #include <libxml/xmlmemory.h> 47 #include <libxml/threads.h> 48 #include <libxml/globals.h> 49 #include <libxml/tree.h> 50 #include <libxml/parser.h> 51 #include <libxml/parserInternals.h> 52 #include <libxml/valid.h> 53 #include <libxml/entities.h> 54 #include <libxml/xmlerror.h> 55 #include <libxml/encoding.h> 56 #include <libxml/xmlIO.h> 57 #include <libxml/uri.h> 58 #ifdef LIBXML_CATALOG_ENABLED 59 #include <libxml/catalog.h> 60 #endif 61 #ifdef LIBXML_SCHEMAS_ENABLED 62 #include <libxml/xmlschemastypes.h> 63 #include <libxml/relaxng.h> 64 #endif 65 #ifdef HAVE_CTYPE_H 66 #include <ctype.h> 67 #endif 68 #ifdef HAVE_STDLIB_H 69 #include <stdlib.h> 70 #endif 71 #ifdef HAVE_SYS_STAT_H 72 #include <sys/stat.h> 73 #endif 74 #ifdef HAVE_FCNTL_H 75 #include <fcntl.h> 76 #endif 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 #ifdef HAVE_ZLIB_H 81 #include <zlib.h> 82 #endif 83 #ifdef HAVE_LZMA_H 84 #include <lzma.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 98 99 /************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105 #define XML_PARSER_BIG_ENTITY 1000 106 #define XML_PARSER_LOT_ENTITY 5000 107 108 /* 109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 110 * replacement over the size in byte of the input indicates that you have 111 * and eponential behaviour. A value of 10 correspond to at least 3 entity 112 * replacement per byte of input. 113 */ 114 #define XML_PARSER_NON_LINEAR 10 115 116 /* 117 * xmlParserEntityCheck 118 * 119 * Function to check non-linear entity expansion behaviour 120 * This is here to detect and stop exponential linear entity expansion 121 * This is not a limitation of the parser but a safety 122 * boundary feature. It can be disabled with the XML_PARSE_HUGE 123 * parser option. 124 */ 125 static int 126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 127 xmlEntityPtr ent, size_t replacement) 128 { 129 size_t consumed = 0; 130 131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 132 return (0); 133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 134 return (1); 135 136 /* 137 * This may look absurd but is needed to detect 138 * entities problems 139 */ 140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 141 (ent->content != NULL) && (ent->checked == 0) && 142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 143 unsigned long oldnbent = ctxt->nbentities; 144 xmlChar *rep; 145 146 ent->checked = 1; 147 148 ++ctxt->depth; 149 rep = xmlStringDecodeEntities(ctxt, ent->content, 150 XML_SUBSTITUTE_REF, 0, 0, 0); 151 --ctxt->depth; 152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) { 153 ent->content[0] = 0; 154 } 155 156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 157 if (rep != NULL) { 158 if (xmlStrchr(rep, '<')) 159 ent->checked |= 1; 160 xmlFree(rep); 161 rep = NULL; 162 } 163 } 164 if (replacement != 0) { 165 if (replacement < XML_MAX_TEXT_LENGTH) 166 return(0); 167 168 /* 169 * If the volume of entity copy reaches 10 times the 170 * amount of parsed data and over the large text threshold 171 * then that's very likely to be an abuse. 172 */ 173 if (ctxt->input != NULL) { 174 consumed = ctxt->input->consumed + 175 (ctxt->input->cur - ctxt->input->base); 176 } 177 consumed += ctxt->sizeentities; 178 179 if (replacement < XML_PARSER_NON_LINEAR * consumed) 180 return(0); 181 } else if (size != 0) { 182 /* 183 * Do the check based on the replacement size of the entity 184 */ 185 if (size < XML_PARSER_BIG_ENTITY) 186 return(0); 187 188 /* 189 * A limit on the amount of text data reasonably used 190 */ 191 if (ctxt->input != NULL) { 192 consumed = ctxt->input->consumed + 193 (ctxt->input->cur - ctxt->input->base); 194 } 195 consumed += ctxt->sizeentities; 196 197 if ((size < XML_PARSER_NON_LINEAR * consumed) && 198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 199 return (0); 200 } else if (ent != NULL) { 201 /* 202 * use the number of parsed entities in the replacement 203 */ 204 size = ent->checked / 2; 205 206 /* 207 * The amount of data parsed counting entities size only once 208 */ 209 if (ctxt->input != NULL) { 210 consumed = ctxt->input->consumed + 211 (ctxt->input->cur - ctxt->input->base); 212 } 213 consumed += ctxt->sizeentities; 214 215 /* 216 * Check the density of entities for the amount of data 217 * knowing an entity reference will take at least 3 bytes 218 */ 219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 220 return (0); 221 } else { 222 /* 223 * strange we got no data for checking 224 */ 225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 227 (ctxt->nbentities <= 10000)) 228 return (0); 229 } 230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 231 return (1); 232 } 233 234 /** 235 * xmlParserMaxDepth: 236 * 237 * arbitrary depth limit for the XML documents that we allow to 238 * process. This is not a limitation of the parser but a safety 239 * boundary feature. It can be disabled with the XML_PARSE_HUGE 240 * parser option. 241 */ 242 unsigned int xmlParserMaxDepth = 256; 243 244 245 246 #define SAX2 1 247 #define XML_PARSER_BIG_BUFFER_SIZE 300 248 #define XML_PARSER_BUFFER_SIZE 100 249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 250 251 /** 252 * XML_PARSER_CHUNK_SIZE 253 * 254 * When calling GROW that's the minimal amount of data 255 * the parser expected to have received. It is not a hard 256 * limit but an optimization when reading strings like Names 257 * It is not strictly needed as long as inputs available characters 258 * are followed by 0, which should be provided by the I/O level 259 */ 260 #define XML_PARSER_CHUNK_SIZE 100 261 262 /* 263 * List of XML prefixed PI allowed by W3C specs 264 */ 265 266 static const char *xmlW3CPIs[] = { 267 "xml-stylesheet", 268 "xml-model", 269 NULL 270 }; 271 272 273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 275 const xmlChar **str); 276 277 static xmlParserErrors 278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 279 xmlSAXHandlerPtr sax, 280 void *user_data, int depth, const xmlChar *URL, 281 const xmlChar *ID, xmlNodePtr *list); 282 283 static int 284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 285 const char *encoding); 286 #ifdef LIBXML_LEGACY_ENABLED 287 static void 288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 289 xmlNodePtr lastNode); 290 #endif /* LIBXML_LEGACY_ENABLED */ 291 292 static xmlParserErrors 293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 294 const xmlChar *string, void *user_data, xmlNodePtr *lst); 295 296 static int 297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 298 299 /************************************************************************ 300 * * 301 * Some factorized error routines * 302 * * 303 ************************************************************************/ 304 305 /** 306 * xmlErrAttributeDup: 307 * @ctxt: an XML parser context 308 * @prefix: the attribute prefix 309 * @localname: the attribute localname 310 * 311 * Handle a redefinition of attribute error 312 */ 313 static void 314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 315 const xmlChar * localname) 316 { 317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 318 (ctxt->instate == XML_PARSER_EOF)) 319 return; 320 if (ctxt != NULL) 321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 322 323 if (prefix == NULL) 324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 326 (const char *) localname, NULL, NULL, 0, 0, 327 "Attribute %s redefined\n", localname); 328 else 329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 331 (const char *) prefix, (const char *) localname, 332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 333 localname); 334 if (ctxt != NULL) { 335 ctxt->wellFormed = 0; 336 if (ctxt->recovery == 0) 337 ctxt->disableSAX = 1; 338 } 339 } 340 341 /** 342 * xmlFatalErr: 343 * @ctxt: an XML parser context 344 * @error: the error number 345 * @extra: extra information string 346 * 347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 348 */ 349 static void 350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 351 { 352 const char *errmsg; 353 354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 355 (ctxt->instate == XML_PARSER_EOF)) 356 return; 357 switch (error) { 358 case XML_ERR_INVALID_HEX_CHARREF: 359 errmsg = "CharRef: invalid hexadecimal value"; 360 break; 361 case XML_ERR_INVALID_DEC_CHARREF: 362 errmsg = "CharRef: invalid decimal value"; 363 break; 364 case XML_ERR_INVALID_CHARREF: 365 errmsg = "CharRef: invalid value"; 366 break; 367 case XML_ERR_INTERNAL_ERROR: 368 errmsg = "internal error"; 369 break; 370 case XML_ERR_PEREF_AT_EOF: 371 errmsg = "PEReference at end of document"; 372 break; 373 case XML_ERR_PEREF_IN_PROLOG: 374 errmsg = "PEReference in prolog"; 375 break; 376 case XML_ERR_PEREF_IN_EPILOG: 377 errmsg = "PEReference in epilog"; 378 break; 379 case XML_ERR_PEREF_NO_NAME: 380 errmsg = "PEReference: no name"; 381 break; 382 case XML_ERR_PEREF_SEMICOL_MISSING: 383 errmsg = "PEReference: expecting ';'"; 384 break; 385 case XML_ERR_ENTITY_LOOP: 386 errmsg = "Detected an entity reference loop"; 387 break; 388 case XML_ERR_ENTITY_NOT_STARTED: 389 errmsg = "EntityValue: \" or ' expected"; 390 break; 391 case XML_ERR_ENTITY_PE_INTERNAL: 392 errmsg = "PEReferences forbidden in internal subset"; 393 break; 394 case XML_ERR_ENTITY_NOT_FINISHED: 395 errmsg = "EntityValue: \" or ' expected"; 396 break; 397 case XML_ERR_ATTRIBUTE_NOT_STARTED: 398 errmsg = "AttValue: \" or ' expected"; 399 break; 400 case XML_ERR_LT_IN_ATTRIBUTE: 401 errmsg = "Unescaped '<' not allowed in attributes values"; 402 break; 403 case XML_ERR_LITERAL_NOT_STARTED: 404 errmsg = "SystemLiteral \" or ' expected"; 405 break; 406 case XML_ERR_LITERAL_NOT_FINISHED: 407 errmsg = "Unfinished System or Public ID \" or ' expected"; 408 break; 409 case XML_ERR_MISPLACED_CDATA_END: 410 errmsg = "Sequence ']]>' not allowed in content"; 411 break; 412 case XML_ERR_URI_REQUIRED: 413 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 414 break; 415 case XML_ERR_PUBID_REQUIRED: 416 errmsg = "PUBLIC, the Public Identifier is missing"; 417 break; 418 case XML_ERR_HYPHEN_IN_COMMENT: 419 errmsg = "Comment must not contain '--' (double-hyphen)"; 420 break; 421 case XML_ERR_PI_NOT_STARTED: 422 errmsg = "xmlParsePI : no target name"; 423 break; 424 case XML_ERR_RESERVED_XML_NAME: 425 errmsg = "Invalid PI name"; 426 break; 427 case XML_ERR_NOTATION_NOT_STARTED: 428 errmsg = "NOTATION: Name expected here"; 429 break; 430 case XML_ERR_NOTATION_NOT_FINISHED: 431 errmsg = "'>' required to close NOTATION declaration"; 432 break; 433 case XML_ERR_VALUE_REQUIRED: 434 errmsg = "Entity value required"; 435 break; 436 case XML_ERR_URI_FRAGMENT: 437 errmsg = "Fragment not allowed"; 438 break; 439 case XML_ERR_ATTLIST_NOT_STARTED: 440 errmsg = "'(' required to start ATTLIST enumeration"; 441 break; 442 case XML_ERR_NMTOKEN_REQUIRED: 443 errmsg = "NmToken expected in ATTLIST enumeration"; 444 break; 445 case XML_ERR_ATTLIST_NOT_FINISHED: 446 errmsg = "')' required to finish ATTLIST enumeration"; 447 break; 448 case XML_ERR_MIXED_NOT_STARTED: 449 errmsg = "MixedContentDecl : '|' or ')*' expected"; 450 break; 451 case XML_ERR_PCDATA_REQUIRED: 452 errmsg = "MixedContentDecl : '#PCDATA' expected"; 453 break; 454 case XML_ERR_ELEMCONTENT_NOT_STARTED: 455 errmsg = "ContentDecl : Name or '(' expected"; 456 break; 457 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 458 errmsg = "ContentDecl : ',' '|' or ')' expected"; 459 break; 460 case XML_ERR_PEREF_IN_INT_SUBSET: 461 errmsg = 462 "PEReference: forbidden within markup decl in internal subset"; 463 break; 464 case XML_ERR_GT_REQUIRED: 465 errmsg = "expected '>'"; 466 break; 467 case XML_ERR_CONDSEC_INVALID: 468 errmsg = "XML conditional section '[' expected"; 469 break; 470 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 471 errmsg = "Content error in the external subset"; 472 break; 473 case XML_ERR_CONDSEC_INVALID_KEYWORD: 474 errmsg = 475 "conditional section INCLUDE or IGNORE keyword expected"; 476 break; 477 case XML_ERR_CONDSEC_NOT_FINISHED: 478 errmsg = "XML conditional section not closed"; 479 break; 480 case XML_ERR_XMLDECL_NOT_STARTED: 481 errmsg = "Text declaration '<?xml' required"; 482 break; 483 case XML_ERR_XMLDECL_NOT_FINISHED: 484 errmsg = "parsing XML declaration: '?>' expected"; 485 break; 486 case XML_ERR_EXT_ENTITY_STANDALONE: 487 errmsg = "external parsed entities cannot be standalone"; 488 break; 489 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 490 errmsg = "EntityRef: expecting ';'"; 491 break; 492 case XML_ERR_DOCTYPE_NOT_FINISHED: 493 errmsg = "DOCTYPE improperly terminated"; 494 break; 495 case XML_ERR_LTSLASH_REQUIRED: 496 errmsg = "EndTag: '</' not found"; 497 break; 498 case XML_ERR_EQUAL_REQUIRED: 499 errmsg = "expected '='"; 500 break; 501 case XML_ERR_STRING_NOT_CLOSED: 502 errmsg = "String not closed expecting \" or '"; 503 break; 504 case XML_ERR_STRING_NOT_STARTED: 505 errmsg = "String not started expecting ' or \""; 506 break; 507 case XML_ERR_ENCODING_NAME: 508 errmsg = "Invalid XML encoding name"; 509 break; 510 case XML_ERR_STANDALONE_VALUE: 511 errmsg = "standalone accepts only 'yes' or 'no'"; 512 break; 513 case XML_ERR_DOCUMENT_EMPTY: 514 errmsg = "Document is empty"; 515 break; 516 case XML_ERR_DOCUMENT_END: 517 errmsg = "Extra content at the end of the document"; 518 break; 519 case XML_ERR_NOT_WELL_BALANCED: 520 errmsg = "chunk is not well balanced"; 521 break; 522 case XML_ERR_EXTRA_CONTENT: 523 errmsg = "extra content at the end of well balanced chunk"; 524 break; 525 case XML_ERR_VERSION_MISSING: 526 errmsg = "Malformed declaration expecting version"; 527 break; 528 case XML_ERR_NAME_TOO_LONG: 529 errmsg = "Name too long use XML_PARSE_HUGE option"; 530 break; 531 #if 0 532 case: 533 errmsg = ""; 534 break; 535 #endif 536 default: 537 errmsg = "Unregistered error message"; 538 } 539 if (ctxt != NULL) 540 ctxt->errNo = error; 541 if (info == NULL) { 542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 544 errmsg); 545 } else { 546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 548 errmsg, info); 549 } 550 if (ctxt != NULL) { 551 ctxt->wellFormed = 0; 552 if (ctxt->recovery == 0) 553 ctxt->disableSAX = 1; 554 } 555 } 556 557 /** 558 * xmlFatalErrMsg: 559 * @ctxt: an XML parser context 560 * @error: the error number 561 * @msg: the error message 562 * 563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 564 */ 565 static void LIBXML_ATTR_FORMAT(3,0) 566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 567 const char *msg) 568 { 569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 570 (ctxt->instate == XML_PARSER_EOF)) 571 return; 572 if (ctxt != NULL) 573 ctxt->errNo = error; 574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 576 if (ctxt != NULL) { 577 ctxt->wellFormed = 0; 578 if (ctxt->recovery == 0) 579 ctxt->disableSAX = 1; 580 } 581 } 582 583 /** 584 * xmlWarningMsg: 585 * @ctxt: an XML parser context 586 * @error: the error number 587 * @msg: the error message 588 * @str1: extra data 589 * @str2: extra data 590 * 591 * Handle a warning. 592 */ 593 static void LIBXML_ATTR_FORMAT(3,0) 594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg, const xmlChar *str1, const xmlChar *str2) 596 { 597 xmlStructuredErrorFunc schannel = NULL; 598 599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 600 (ctxt->instate == XML_PARSER_EOF)) 601 return; 602 if ((ctxt != NULL) && (ctxt->sax != NULL) && 603 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 604 schannel = ctxt->sax->serror; 605 if (ctxt != NULL) { 606 __xmlRaiseError(schannel, 607 (ctxt->sax) ? ctxt->sax->warning : NULL, 608 ctxt->userData, 609 ctxt, NULL, XML_FROM_PARSER, error, 610 XML_ERR_WARNING, NULL, 0, 611 (const char *) str1, (const char *) str2, NULL, 0, 0, 612 msg, (const char *) str1, (const char *) str2); 613 } else { 614 __xmlRaiseError(schannel, NULL, NULL, 615 ctxt, NULL, XML_FROM_PARSER, error, 616 XML_ERR_WARNING, NULL, 0, 617 (const char *) str1, (const char *) str2, NULL, 0, 0, 618 msg, (const char *) str1, (const char *) str2); 619 } 620 } 621 622 /** 623 * xmlValidityError: 624 * @ctxt: an XML parser context 625 * @error: the error number 626 * @msg: the error message 627 * @str1: extra data 628 * 629 * Handle a validity error. 630 */ 631 static void LIBXML_ATTR_FORMAT(3,0) 632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 633 const char *msg, const xmlChar *str1, const xmlChar *str2) 634 { 635 xmlStructuredErrorFunc schannel = NULL; 636 637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 638 (ctxt->instate == XML_PARSER_EOF)) 639 return; 640 if (ctxt != NULL) { 641 ctxt->errNo = error; 642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 643 schannel = ctxt->sax->serror; 644 } 645 if (ctxt != NULL) { 646 __xmlRaiseError(schannel, 647 ctxt->vctxt.error, ctxt->vctxt.userData, 648 ctxt, NULL, XML_FROM_DTD, error, 649 XML_ERR_ERROR, NULL, 0, (const char *) str1, 650 (const char *) str2, NULL, 0, 0, 651 msg, (const char *) str1, (const char *) str2); 652 ctxt->valid = 0; 653 } else { 654 __xmlRaiseError(schannel, NULL, NULL, 655 ctxt, NULL, XML_FROM_DTD, error, 656 XML_ERR_ERROR, NULL, 0, (const char *) str1, 657 (const char *) str2, NULL, 0, 0, 658 msg, (const char *) str1, (const char *) str2); 659 } 660 } 661 662 /** 663 * xmlFatalErrMsgInt: 664 * @ctxt: an XML parser context 665 * @error: the error number 666 * @msg: the error message 667 * @val: an integer value 668 * 669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 670 */ 671 static void LIBXML_ATTR_FORMAT(3,0) 672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 673 const char *msg, int val) 674 { 675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 676 (ctxt->instate == XML_PARSER_EOF)) 677 return; 678 if (ctxt != NULL) 679 ctxt->errNo = error; 680 __xmlRaiseError(NULL, NULL, NULL, 681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 683 if (ctxt != NULL) { 684 ctxt->wellFormed = 0; 685 if (ctxt->recovery == 0) 686 ctxt->disableSAX = 1; 687 } 688 } 689 690 /** 691 * xmlFatalErrMsgStrIntStr: 692 * @ctxt: an XML parser context 693 * @error: the error number 694 * @msg: the error message 695 * @str1: an string info 696 * @val: an integer value 697 * @str2: an string info 698 * 699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 700 */ 701 static void LIBXML_ATTR_FORMAT(3,0) 702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 703 const char *msg, const xmlChar *str1, int val, 704 const xmlChar *str2) 705 { 706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 707 (ctxt->instate == XML_PARSER_EOF)) 708 return; 709 if (ctxt != NULL) 710 ctxt->errNo = error; 711 __xmlRaiseError(NULL, NULL, NULL, 712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 713 NULL, 0, (const char *) str1, (const char *) str2, 714 NULL, val, 0, msg, str1, val, str2); 715 if (ctxt != NULL) { 716 ctxt->wellFormed = 0; 717 if (ctxt->recovery == 0) 718 ctxt->disableSAX = 1; 719 } 720 } 721 722 /** 723 * xmlFatalErrMsgStr: 724 * @ctxt: an XML parser context 725 * @error: the error number 726 * @msg: the error message 727 * @val: a string value 728 * 729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 730 */ 731 static void LIBXML_ATTR_FORMAT(3,0) 732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 733 const char *msg, const xmlChar * val) 734 { 735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 736 (ctxt->instate == XML_PARSER_EOF)) 737 return; 738 if (ctxt != NULL) 739 ctxt->errNo = error; 740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 741 XML_FROM_PARSER, error, XML_ERR_FATAL, 742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 743 val); 744 if (ctxt != NULL) { 745 ctxt->wellFormed = 0; 746 if (ctxt->recovery == 0) 747 ctxt->disableSAX = 1; 748 } 749 } 750 751 /** 752 * xmlErrMsgStr: 753 * @ctxt: an XML parser context 754 * @error: the error number 755 * @msg: the error message 756 * @val: a string value 757 * 758 * Handle a non fatal parser error 759 */ 760 static void LIBXML_ATTR_FORMAT(3,0) 761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 762 const char *msg, const xmlChar * val) 763 { 764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 765 (ctxt->instate == XML_PARSER_EOF)) 766 return; 767 if (ctxt != NULL) 768 ctxt->errNo = error; 769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 770 XML_FROM_PARSER, error, XML_ERR_ERROR, 771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 772 val); 773 } 774 775 /** 776 * xmlNsErr: 777 * @ctxt: an XML parser context 778 * @error: the error number 779 * @msg: the message 780 * @info1: extra information string 781 * @info2: extra information string 782 * 783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 784 */ 785 static void LIBXML_ATTR_FORMAT(3,0) 786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 787 const char *msg, 788 const xmlChar * info1, const xmlChar * info2, 789 const xmlChar * info3) 790 { 791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 792 (ctxt->instate == XML_PARSER_EOF)) 793 return; 794 if (ctxt != NULL) 795 ctxt->errNo = error; 796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 797 XML_ERR_ERROR, NULL, 0, (const char *) info1, 798 (const char *) info2, (const char *) info3, 0, 0, msg, 799 info1, info2, info3); 800 if (ctxt != NULL) 801 ctxt->nsWellFormed = 0; 802 } 803 804 /** 805 * xmlNsWarn 806 * @ctxt: an XML parser context 807 * @error: the error number 808 * @msg: the message 809 * @info1: extra information string 810 * @info2: extra information string 811 * 812 * Handle a namespace warning error 813 */ 814 static void LIBXML_ATTR_FORMAT(3,0) 815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 816 const char *msg, 817 const xmlChar * info1, const xmlChar * info2, 818 const xmlChar * info3) 819 { 820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 821 (ctxt->instate == XML_PARSER_EOF)) 822 return; 823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 824 XML_ERR_WARNING, NULL, 0, (const char *) info1, 825 (const char *) info2, (const char *) info3, 0, 0, msg, 826 info1, info2, info3); 827 } 828 829 /************************************************************************ 830 * * 831 * Library wide options * 832 * * 833 ************************************************************************/ 834 835 /** 836 * xmlHasFeature: 837 * @feature: the feature to be examined 838 * 839 * Examines if the library has been compiled with a given feature. 840 * 841 * Returns a non-zero value if the feature exist, otherwise zero. 842 * Returns zero (0) if the feature does not exist or an unknown 843 * unknown feature is requested, non-zero otherwise. 844 */ 845 int 846 xmlHasFeature(xmlFeature feature) 847 { 848 switch (feature) { 849 case XML_WITH_THREAD: 850 #ifdef LIBXML_THREAD_ENABLED 851 return(1); 852 #else 853 return(0); 854 #endif 855 case XML_WITH_TREE: 856 #ifdef LIBXML_TREE_ENABLED 857 return(1); 858 #else 859 return(0); 860 #endif 861 case XML_WITH_OUTPUT: 862 #ifdef LIBXML_OUTPUT_ENABLED 863 return(1); 864 #else 865 return(0); 866 #endif 867 case XML_WITH_PUSH: 868 #ifdef LIBXML_PUSH_ENABLED 869 return(1); 870 #else 871 return(0); 872 #endif 873 case XML_WITH_READER: 874 #ifdef LIBXML_READER_ENABLED 875 return(1); 876 #else 877 return(0); 878 #endif 879 case XML_WITH_PATTERN: 880 #ifdef LIBXML_PATTERN_ENABLED 881 return(1); 882 #else 883 return(0); 884 #endif 885 case XML_WITH_WRITER: 886 #ifdef LIBXML_WRITER_ENABLED 887 return(1); 888 #else 889 return(0); 890 #endif 891 case XML_WITH_SAX1: 892 #ifdef LIBXML_SAX1_ENABLED 893 return(1); 894 #else 895 return(0); 896 #endif 897 case XML_WITH_FTP: 898 #ifdef LIBXML_FTP_ENABLED 899 return(1); 900 #else 901 return(0); 902 #endif 903 case XML_WITH_HTTP: 904 #ifdef LIBXML_HTTP_ENABLED 905 return(1); 906 #else 907 return(0); 908 #endif 909 case XML_WITH_VALID: 910 #ifdef LIBXML_VALID_ENABLED 911 return(1); 912 #else 913 return(0); 914 #endif 915 case XML_WITH_HTML: 916 #ifdef LIBXML_HTML_ENABLED 917 return(1); 918 #else 919 return(0); 920 #endif 921 case XML_WITH_LEGACY: 922 #ifdef LIBXML_LEGACY_ENABLED 923 return(1); 924 #else 925 return(0); 926 #endif 927 case XML_WITH_C14N: 928 #ifdef LIBXML_C14N_ENABLED 929 return(1); 930 #else 931 return(0); 932 #endif 933 case XML_WITH_CATALOG: 934 #ifdef LIBXML_CATALOG_ENABLED 935 return(1); 936 #else 937 return(0); 938 #endif 939 case XML_WITH_XPATH: 940 #ifdef LIBXML_XPATH_ENABLED 941 return(1); 942 #else 943 return(0); 944 #endif 945 case XML_WITH_XPTR: 946 #ifdef LIBXML_XPTR_ENABLED 947 return(1); 948 #else 949 return(0); 950 #endif 951 case XML_WITH_XINCLUDE: 952 #ifdef LIBXML_XINCLUDE_ENABLED 953 return(1); 954 #else 955 return(0); 956 #endif 957 case XML_WITH_ICONV: 958 #ifdef LIBXML_ICONV_ENABLED 959 return(1); 960 #else 961 return(0); 962 #endif 963 case XML_WITH_ISO8859X: 964 #ifdef LIBXML_ISO8859X_ENABLED 965 return(1); 966 #else 967 return(0); 968 #endif 969 case XML_WITH_UNICODE: 970 #ifdef LIBXML_UNICODE_ENABLED 971 return(1); 972 #else 973 return(0); 974 #endif 975 case XML_WITH_REGEXP: 976 #ifdef LIBXML_REGEXP_ENABLED 977 return(1); 978 #else 979 return(0); 980 #endif 981 case XML_WITH_AUTOMATA: 982 #ifdef LIBXML_AUTOMATA_ENABLED 983 return(1); 984 #else 985 return(0); 986 #endif 987 case XML_WITH_EXPR: 988 #ifdef LIBXML_EXPR_ENABLED 989 return(1); 990 #else 991 return(0); 992 #endif 993 case XML_WITH_SCHEMAS: 994 #ifdef LIBXML_SCHEMAS_ENABLED 995 return(1); 996 #else 997 return(0); 998 #endif 999 case XML_WITH_SCHEMATRON: 1000 #ifdef LIBXML_SCHEMATRON_ENABLED 1001 return(1); 1002 #else 1003 return(0); 1004 #endif 1005 case XML_WITH_MODULES: 1006 #ifdef LIBXML_MODULES_ENABLED 1007 return(1); 1008 #else 1009 return(0); 1010 #endif 1011 case XML_WITH_DEBUG: 1012 #ifdef LIBXML_DEBUG_ENABLED 1013 return(1); 1014 #else 1015 return(0); 1016 #endif 1017 case XML_WITH_DEBUG_MEM: 1018 #ifdef DEBUG_MEMORY_LOCATION 1019 return(1); 1020 #else 1021 return(0); 1022 #endif 1023 case XML_WITH_DEBUG_RUN: 1024 #ifdef LIBXML_DEBUG_RUNTIME 1025 return(1); 1026 #else 1027 return(0); 1028 #endif 1029 case XML_WITH_ZLIB: 1030 #ifdef LIBXML_ZLIB_ENABLED 1031 return(1); 1032 #else 1033 return(0); 1034 #endif 1035 case XML_WITH_LZMA: 1036 #ifdef LIBXML_LZMA_ENABLED 1037 return(1); 1038 #else 1039 return(0); 1040 #endif 1041 case XML_WITH_ICU: 1042 #ifdef LIBXML_ICU_ENABLED 1043 return(1); 1044 #else 1045 return(0); 1046 #endif 1047 default: 1048 break; 1049 } 1050 return(0); 1051 } 1052 1053 /************************************************************************ 1054 * * 1055 * SAX2 defaulted attributes handling * 1056 * * 1057 ************************************************************************/ 1058 1059 /** 1060 * xmlDetectSAX2: 1061 * @ctxt: an XML parser context 1062 * 1063 * Do the SAX2 detection and specific intialization 1064 */ 1065 static void 1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1067 if (ctxt == NULL) return; 1068 #ifdef LIBXML_SAX1_ENABLED 1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1070 ((ctxt->sax->startElementNs != NULL) || 1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1072 #else 1073 ctxt->sax2 = 1; 1074 #endif /* LIBXML_SAX1_ENABLED */ 1075 1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1080 (ctxt->str_xml_ns == NULL)) { 1081 xmlErrMemory(ctxt, NULL); 1082 } 1083 } 1084 1085 typedef struct _xmlDefAttrs xmlDefAttrs; 1086 typedef xmlDefAttrs *xmlDefAttrsPtr; 1087 struct _xmlDefAttrs { 1088 int nbAttrs; /* number of defaulted attributes on that element */ 1089 int maxAttrs; /* the size of the array */ 1090 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1091 }; 1092 1093 /** 1094 * xmlAttrNormalizeSpace: 1095 * @src: the source string 1096 * @dst: the target string 1097 * 1098 * Normalize the space in non CDATA attribute values: 1099 * If the attribute type is not CDATA, then the XML processor MUST further 1100 * process the normalized attribute value by discarding any leading and 1101 * trailing space (#x20) characters, and by replacing sequences of space 1102 * (#x20) characters by a single space (#x20) character. 1103 * Note that the size of dst need to be at least src, and if one doesn't need 1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1105 * passing src as dst is just fine. 1106 * 1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1108 * is needed. 1109 */ 1110 static xmlChar * 1111 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1112 { 1113 if ((src == NULL) || (dst == NULL)) 1114 return(NULL); 1115 1116 while (*src == 0x20) src++; 1117 while (*src != 0) { 1118 if (*src == 0x20) { 1119 while (*src == 0x20) src++; 1120 if (*src != 0) 1121 *dst++ = 0x20; 1122 } else { 1123 *dst++ = *src++; 1124 } 1125 } 1126 *dst = 0; 1127 if (dst == src) 1128 return(NULL); 1129 return(dst); 1130 } 1131 1132 /** 1133 * xmlAttrNormalizeSpace2: 1134 * @src: the source string 1135 * 1136 * Normalize the space in non CDATA attribute values, a slightly more complex 1137 * front end to avoid allocation problems when running on attribute values 1138 * coming from the input. 1139 * 1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1141 * is needed. 1142 */ 1143 static const xmlChar * 1144 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1145 { 1146 int i; 1147 int remove_head = 0; 1148 int need_realloc = 0; 1149 const xmlChar *cur; 1150 1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1152 return(NULL); 1153 i = *len; 1154 if (i <= 0) 1155 return(NULL); 1156 1157 cur = src; 1158 while (*cur == 0x20) { 1159 cur++; 1160 remove_head++; 1161 } 1162 while (*cur != 0) { 1163 if (*cur == 0x20) { 1164 cur++; 1165 if ((*cur == 0x20) || (*cur == 0)) { 1166 need_realloc = 1; 1167 break; 1168 } 1169 } else 1170 cur++; 1171 } 1172 if (need_realloc) { 1173 xmlChar *ret; 1174 1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1176 if (ret == NULL) { 1177 xmlErrMemory(ctxt, NULL); 1178 return(NULL); 1179 } 1180 xmlAttrNormalizeSpace(ret, ret); 1181 *len = (int) strlen((const char *)ret); 1182 return(ret); 1183 } else if (remove_head) { 1184 *len -= remove_head; 1185 memmove(src, src + remove_head, 1 + *len); 1186 return(src); 1187 } 1188 return(NULL); 1189 } 1190 1191 /** 1192 * xmlAddDefAttrs: 1193 * @ctxt: an XML parser context 1194 * @fullname: the element fullname 1195 * @fullattr: the attribute fullname 1196 * @value: the attribute value 1197 * 1198 * Add a defaulted attribute for an element 1199 */ 1200 static void 1201 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1202 const xmlChar *fullname, 1203 const xmlChar *fullattr, 1204 const xmlChar *value) { 1205 xmlDefAttrsPtr defaults; 1206 int len; 1207 const xmlChar *name; 1208 const xmlChar *prefix; 1209 1210 /* 1211 * Allows to detect attribute redefinitions 1212 */ 1213 if (ctxt->attsSpecial != NULL) { 1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1215 return; 1216 } 1217 1218 if (ctxt->attsDefault == NULL) { 1219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1220 if (ctxt->attsDefault == NULL) 1221 goto mem_error; 1222 } 1223 1224 /* 1225 * split the element name into prefix:localname , the string found 1226 * are within the DTD and then not associated to namespace names. 1227 */ 1228 name = xmlSplitQName3(fullname, &len); 1229 if (name == NULL) { 1230 name = xmlDictLookup(ctxt->dict, fullname, -1); 1231 prefix = NULL; 1232 } else { 1233 name = xmlDictLookup(ctxt->dict, name, -1); 1234 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1235 } 1236 1237 /* 1238 * make sure there is some storage 1239 */ 1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1241 if (defaults == NULL) { 1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1243 (4 * 5) * sizeof(const xmlChar *)); 1244 if (defaults == NULL) 1245 goto mem_error; 1246 defaults->nbAttrs = 0; 1247 defaults->maxAttrs = 4; 1248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1249 defaults, NULL) < 0) { 1250 xmlFree(defaults); 1251 goto mem_error; 1252 } 1253 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1254 xmlDefAttrsPtr temp; 1255 1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1258 if (temp == NULL) 1259 goto mem_error; 1260 defaults = temp; 1261 defaults->maxAttrs *= 2; 1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1263 defaults, NULL) < 0) { 1264 xmlFree(defaults); 1265 goto mem_error; 1266 } 1267 } 1268 1269 /* 1270 * Split the element name into prefix:localname , the string found 1271 * are within the DTD and hen not associated to namespace names. 1272 */ 1273 name = xmlSplitQName3(fullattr, &len); 1274 if (name == NULL) { 1275 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1276 prefix = NULL; 1277 } else { 1278 name = xmlDictLookup(ctxt->dict, name, -1); 1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1280 } 1281 1282 defaults->values[5 * defaults->nbAttrs] = name; 1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1284 /* intern the string and precompute the end */ 1285 len = xmlStrlen(value); 1286 value = xmlDictLookup(ctxt->dict, value, len); 1287 defaults->values[5 * defaults->nbAttrs + 2] = value; 1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1289 if (ctxt->external) 1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1291 else 1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1293 defaults->nbAttrs++; 1294 1295 return; 1296 1297 mem_error: 1298 xmlErrMemory(ctxt, NULL); 1299 return; 1300 } 1301 1302 /** 1303 * xmlAddSpecialAttr: 1304 * @ctxt: an XML parser context 1305 * @fullname: the element fullname 1306 * @fullattr: the attribute fullname 1307 * @type: the attribute type 1308 * 1309 * Register this attribute type 1310 */ 1311 static void 1312 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1313 const xmlChar *fullname, 1314 const xmlChar *fullattr, 1315 int type) 1316 { 1317 if (ctxt->attsSpecial == NULL) { 1318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1319 if (ctxt->attsSpecial == NULL) 1320 goto mem_error; 1321 } 1322 1323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1324 return; 1325 1326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1327 (void *) (long) type); 1328 return; 1329 1330 mem_error: 1331 xmlErrMemory(ctxt, NULL); 1332 return; 1333 } 1334 1335 /** 1336 * xmlCleanSpecialAttrCallback: 1337 * 1338 * Removes CDATA attributes from the special attribute table 1339 */ 1340 static void 1341 xmlCleanSpecialAttrCallback(void *payload, void *data, 1342 const xmlChar *fullname, const xmlChar *fullattr, 1343 const xmlChar *unused ATTRIBUTE_UNUSED) { 1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1345 1346 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1348 } 1349 } 1350 1351 /** 1352 * xmlCleanSpecialAttr: 1353 * @ctxt: an XML parser context 1354 * 1355 * Trim the list of attributes defined to remove all those of type 1356 * CDATA as they are not special. This call should be done when finishing 1357 * to parse the DTD and before starting to parse the document root. 1358 */ 1359 static void 1360 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1361 { 1362 if (ctxt->attsSpecial == NULL) 1363 return; 1364 1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1366 1367 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1368 xmlHashFree(ctxt->attsSpecial, NULL); 1369 ctxt->attsSpecial = NULL; 1370 } 1371 return; 1372 } 1373 1374 /** 1375 * xmlCheckLanguageID: 1376 * @lang: pointer to the string value 1377 * 1378 * Checks that the value conforms to the LanguageID production: 1379 * 1380 * NOTE: this is somewhat deprecated, those productions were removed from 1381 * the XML Second edition. 1382 * 1383 * [33] LanguageID ::= Langcode ('-' Subcode)* 1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1388 * [38] Subcode ::= ([a-z] | [A-Z])+ 1389 * 1390 * The current REC reference the sucessors of RFC 1766, currently 5646 1391 * 1392 * http://www.rfc-editor.org/rfc/rfc5646.txt 1393 * langtag = language 1394 * ["-" script] 1395 * ["-" region] 1396 * *("-" variant) 1397 * *("-" extension) 1398 * ["-" privateuse] 1399 * language = 2*3ALPHA ; shortest ISO 639 code 1400 * ["-" extlang] ; sometimes followed by 1401 * ; extended language subtags 1402 * / 4ALPHA ; or reserved for future use 1403 * / 5*8ALPHA ; or registered language subtag 1404 * 1405 * extlang = 3ALPHA ; selected ISO 639 codes 1406 * *2("-" 3ALPHA) ; permanently reserved 1407 * 1408 * script = 4ALPHA ; ISO 15924 code 1409 * 1410 * region = 2ALPHA ; ISO 3166-1 code 1411 * / 3DIGIT ; UN M.49 code 1412 * 1413 * variant = 5*8alphanum ; registered variants 1414 * / (DIGIT 3alphanum) 1415 * 1416 * extension = singleton 1*("-" (2*8alphanum)) 1417 * 1418 * ; Single alphanumerics 1419 * ; "x" reserved for private use 1420 * singleton = DIGIT ; 0 - 9 1421 * / %x41-57 ; A - W 1422 * / %x59-5A ; Y - Z 1423 * / %x61-77 ; a - w 1424 * / %x79-7A ; y - z 1425 * 1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1427 * The parser below doesn't try to cope with extension or privateuse 1428 * that could be added but that's not interoperable anyway 1429 * 1430 * Returns 1 if correct 0 otherwise 1431 **/ 1432 int 1433 xmlCheckLanguageID(const xmlChar * lang) 1434 { 1435 const xmlChar *cur = lang, *nxt; 1436 1437 if (cur == NULL) 1438 return (0); 1439 if (((cur[0] == 'i') && (cur[1] == '-')) || 1440 ((cur[0] == 'I') && (cur[1] == '-')) || 1441 ((cur[0] == 'x') && (cur[1] == '-')) || 1442 ((cur[0] == 'X') && (cur[1] == '-'))) { 1443 /* 1444 * Still allow IANA code and user code which were coming 1445 * from the previous version of the XML-1.0 specification 1446 * it's deprecated but we should not fail 1447 */ 1448 cur += 2; 1449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1450 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1451 cur++; 1452 return(cur[0] == 0); 1453 } 1454 nxt = cur; 1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1457 nxt++; 1458 if (nxt - cur >= 4) { 1459 /* 1460 * Reserved 1461 */ 1462 if ((nxt - cur > 8) || (nxt[0] != 0)) 1463 return(0); 1464 return(1); 1465 } 1466 if (nxt - cur < 2) 1467 return(0); 1468 /* we got an ISO 639 code */ 1469 if (nxt[0] == 0) 1470 return(1); 1471 if (nxt[0] != '-') 1472 return(0); 1473 1474 nxt++; 1475 cur = nxt; 1476 /* now we can have extlang or script or region or variant */ 1477 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1478 goto region_m49; 1479 1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1482 nxt++; 1483 if (nxt - cur == 4) 1484 goto script; 1485 if (nxt - cur == 2) 1486 goto region; 1487 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1488 goto variant; 1489 if (nxt - cur != 3) 1490 return(0); 1491 /* we parsed an extlang */ 1492 if (nxt[0] == 0) 1493 return(1); 1494 if (nxt[0] != '-') 1495 return(0); 1496 1497 nxt++; 1498 cur = nxt; 1499 /* now we can have script or region or variant */ 1500 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1501 goto region_m49; 1502 1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1505 nxt++; 1506 if (nxt - cur == 2) 1507 goto region; 1508 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1509 goto variant; 1510 if (nxt - cur != 4) 1511 return(0); 1512 /* we parsed a script */ 1513 script: 1514 if (nxt[0] == 0) 1515 return(1); 1516 if (nxt[0] != '-') 1517 return(0); 1518 1519 nxt++; 1520 cur = nxt; 1521 /* now we can have region or variant */ 1522 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1523 goto region_m49; 1524 1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1527 nxt++; 1528 1529 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1530 goto variant; 1531 if (nxt - cur != 2) 1532 return(0); 1533 /* we parsed a region */ 1534 region: 1535 if (nxt[0] == 0) 1536 return(1); 1537 if (nxt[0] != '-') 1538 return(0); 1539 1540 nxt++; 1541 cur = nxt; 1542 /* now we can just have a variant */ 1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1545 nxt++; 1546 1547 if ((nxt - cur < 5) || (nxt - cur > 8)) 1548 return(0); 1549 1550 /* we parsed a variant */ 1551 variant: 1552 if (nxt[0] == 0) 1553 return(1); 1554 if (nxt[0] != '-') 1555 return(0); 1556 /* extensions and private use subtags not checked */ 1557 return (1); 1558 1559 region_m49: 1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1562 nxt += 3; 1563 goto region; 1564 } 1565 return(0); 1566 } 1567 1568 /************************************************************************ 1569 * * 1570 * Parser stacks related functions and macros * 1571 * * 1572 ************************************************************************/ 1573 1574 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1575 const xmlChar ** str); 1576 1577 #ifdef SAX2 1578 /** 1579 * nsPush: 1580 * @ctxt: an XML parser context 1581 * @prefix: the namespace prefix or NULL 1582 * @URL: the namespace name 1583 * 1584 * Pushes a new parser namespace on top of the ns stack 1585 * 1586 * Returns -1 in case of error, -2 if the namespace should be discarded 1587 * and the index in the stack otherwise. 1588 */ 1589 static int 1590 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1591 { 1592 if (ctxt->options & XML_PARSE_NSCLEAN) { 1593 int i; 1594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1595 if (ctxt->nsTab[i] == prefix) { 1596 /* in scope */ 1597 if (ctxt->nsTab[i + 1] == URL) 1598 return(-2); 1599 /* out of scope keep it */ 1600 break; 1601 } 1602 } 1603 } 1604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1605 ctxt->nsMax = 10; 1606 ctxt->nsNr = 0; 1607 ctxt->nsTab = (const xmlChar **) 1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1609 if (ctxt->nsTab == NULL) { 1610 xmlErrMemory(ctxt, NULL); 1611 ctxt->nsMax = 0; 1612 return (-1); 1613 } 1614 } else if (ctxt->nsNr >= ctxt->nsMax) { 1615 const xmlChar ** tmp; 1616 ctxt->nsMax *= 2; 1617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1618 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1619 if (tmp == NULL) { 1620 xmlErrMemory(ctxt, NULL); 1621 ctxt->nsMax /= 2; 1622 return (-1); 1623 } 1624 ctxt->nsTab = tmp; 1625 } 1626 ctxt->nsTab[ctxt->nsNr++] = prefix; 1627 ctxt->nsTab[ctxt->nsNr++] = URL; 1628 return (ctxt->nsNr); 1629 } 1630 /** 1631 * nsPop: 1632 * @ctxt: an XML parser context 1633 * @nr: the number to pop 1634 * 1635 * Pops the top @nr parser prefix/namespace from the ns stack 1636 * 1637 * Returns the number of namespaces removed 1638 */ 1639 static int 1640 nsPop(xmlParserCtxtPtr ctxt, int nr) 1641 { 1642 int i; 1643 1644 if (ctxt->nsTab == NULL) return(0); 1645 if (ctxt->nsNr < nr) { 1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1647 nr = ctxt->nsNr; 1648 } 1649 if (ctxt->nsNr <= 0) 1650 return (0); 1651 1652 for (i = 0;i < nr;i++) { 1653 ctxt->nsNr--; 1654 ctxt->nsTab[ctxt->nsNr] = NULL; 1655 } 1656 return(nr); 1657 } 1658 #endif 1659 1660 static int 1661 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1662 const xmlChar **atts; 1663 int *attallocs; 1664 int maxatts; 1665 1666 if (ctxt->atts == NULL) { 1667 maxatts = 55; /* allow for 10 attrs by default */ 1668 atts = (const xmlChar **) 1669 xmlMalloc(maxatts * sizeof(xmlChar *)); 1670 if (atts == NULL) goto mem_error; 1671 ctxt->atts = atts; 1672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1673 if (attallocs == NULL) goto mem_error; 1674 ctxt->attallocs = attallocs; 1675 ctxt->maxatts = maxatts; 1676 } else if (nr + 5 > ctxt->maxatts) { 1677 maxatts = (nr + 5) * 2; 1678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1679 maxatts * sizeof(const xmlChar *)); 1680 if (atts == NULL) goto mem_error; 1681 ctxt->atts = atts; 1682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1683 (maxatts / 5) * sizeof(int)); 1684 if (attallocs == NULL) goto mem_error; 1685 ctxt->attallocs = attallocs; 1686 ctxt->maxatts = maxatts; 1687 } 1688 return(ctxt->maxatts); 1689 mem_error: 1690 xmlErrMemory(ctxt, NULL); 1691 return(-1); 1692 } 1693 1694 /** 1695 * inputPush: 1696 * @ctxt: an XML parser context 1697 * @value: the parser input 1698 * 1699 * Pushes a new parser input on top of the input stack 1700 * 1701 * Returns -1 in case of error, the index in the stack otherwise 1702 */ 1703 int 1704 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1705 { 1706 if ((ctxt == NULL) || (value == NULL)) 1707 return(-1); 1708 if (ctxt->inputNr >= ctxt->inputMax) { 1709 ctxt->inputMax *= 2; 1710 ctxt->inputTab = 1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1712 ctxt->inputMax * 1713 sizeof(ctxt->inputTab[0])); 1714 if (ctxt->inputTab == NULL) { 1715 xmlErrMemory(ctxt, NULL); 1716 xmlFreeInputStream(value); 1717 ctxt->inputMax /= 2; 1718 value = NULL; 1719 return (-1); 1720 } 1721 } 1722 ctxt->inputTab[ctxt->inputNr] = value; 1723 ctxt->input = value; 1724 return (ctxt->inputNr++); 1725 } 1726 /** 1727 * inputPop: 1728 * @ctxt: an XML parser context 1729 * 1730 * Pops the top parser input from the input stack 1731 * 1732 * Returns the input just removed 1733 */ 1734 xmlParserInputPtr 1735 inputPop(xmlParserCtxtPtr ctxt) 1736 { 1737 xmlParserInputPtr ret; 1738 1739 if (ctxt == NULL) 1740 return(NULL); 1741 if (ctxt->inputNr <= 0) 1742 return (NULL); 1743 ctxt->inputNr--; 1744 if (ctxt->inputNr > 0) 1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1746 else 1747 ctxt->input = NULL; 1748 ret = ctxt->inputTab[ctxt->inputNr]; 1749 ctxt->inputTab[ctxt->inputNr] = NULL; 1750 return (ret); 1751 } 1752 /** 1753 * nodePush: 1754 * @ctxt: an XML parser context 1755 * @value: the element node 1756 * 1757 * Pushes a new element node on top of the node stack 1758 * 1759 * Returns -1 in case of error, the index in the stack otherwise 1760 */ 1761 int 1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1763 { 1764 if (ctxt == NULL) return(0); 1765 if (ctxt->nodeNr >= ctxt->nodeMax) { 1766 xmlNodePtr *tmp; 1767 1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1769 ctxt->nodeMax * 2 * 1770 sizeof(ctxt->nodeTab[0])); 1771 if (tmp == NULL) { 1772 xmlErrMemory(ctxt, NULL); 1773 return (-1); 1774 } 1775 ctxt->nodeTab = tmp; 1776 ctxt->nodeMax *= 2; 1777 } 1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1782 xmlParserMaxDepth); 1783 xmlHaltParser(ctxt); 1784 return(-1); 1785 } 1786 ctxt->nodeTab[ctxt->nodeNr] = value; 1787 ctxt->node = value; 1788 return (ctxt->nodeNr++); 1789 } 1790 1791 /** 1792 * nodePop: 1793 * @ctxt: an XML parser context 1794 * 1795 * Pops the top element node from the node stack 1796 * 1797 * Returns the node just removed 1798 */ 1799 xmlNodePtr 1800 nodePop(xmlParserCtxtPtr ctxt) 1801 { 1802 xmlNodePtr ret; 1803 1804 if (ctxt == NULL) return(NULL); 1805 if (ctxt->nodeNr <= 0) 1806 return (NULL); 1807 ctxt->nodeNr--; 1808 if (ctxt->nodeNr > 0) 1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1810 else 1811 ctxt->node = NULL; 1812 ret = ctxt->nodeTab[ctxt->nodeNr]; 1813 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1814 return (ret); 1815 } 1816 1817 #ifdef LIBXML_PUSH_ENABLED 1818 /** 1819 * nameNsPush: 1820 * @ctxt: an XML parser context 1821 * @value: the element name 1822 * @prefix: the element prefix 1823 * @URI: the element namespace name 1824 * 1825 * Pushes a new element name/prefix/URL on top of the name stack 1826 * 1827 * Returns -1 in case of error, the index in the stack otherwise 1828 */ 1829 static int 1830 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1831 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1832 { 1833 if (ctxt->nameNr >= ctxt->nameMax) { 1834 const xmlChar * *tmp; 1835 void **tmp2; 1836 ctxt->nameMax *= 2; 1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1838 ctxt->nameMax * 1839 sizeof(ctxt->nameTab[0])); 1840 if (tmp == NULL) { 1841 ctxt->nameMax /= 2; 1842 goto mem_error; 1843 } 1844 ctxt->nameTab = tmp; 1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1846 ctxt->nameMax * 3 * 1847 sizeof(ctxt->pushTab[0])); 1848 if (tmp2 == NULL) { 1849 ctxt->nameMax /= 2; 1850 goto mem_error; 1851 } 1852 ctxt->pushTab = tmp2; 1853 } 1854 ctxt->nameTab[ctxt->nameNr] = value; 1855 ctxt->name = value; 1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1859 return (ctxt->nameNr++); 1860 mem_error: 1861 xmlErrMemory(ctxt, NULL); 1862 return (-1); 1863 } 1864 /** 1865 * nameNsPop: 1866 * @ctxt: an XML parser context 1867 * 1868 * Pops the top element/prefix/URI name from the name stack 1869 * 1870 * Returns the name just removed 1871 */ 1872 static const xmlChar * 1873 nameNsPop(xmlParserCtxtPtr ctxt) 1874 { 1875 const xmlChar *ret; 1876 1877 if (ctxt->nameNr <= 0) 1878 return (NULL); 1879 ctxt->nameNr--; 1880 if (ctxt->nameNr > 0) 1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1882 else 1883 ctxt->name = NULL; 1884 ret = ctxt->nameTab[ctxt->nameNr]; 1885 ctxt->nameTab[ctxt->nameNr] = NULL; 1886 return (ret); 1887 } 1888 #endif /* LIBXML_PUSH_ENABLED */ 1889 1890 /** 1891 * namePush: 1892 * @ctxt: an XML parser context 1893 * @value: the element name 1894 * 1895 * Pushes a new element name on top of the name stack 1896 * 1897 * Returns -1 in case of error, the index in the stack otherwise 1898 */ 1899 int 1900 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1901 { 1902 if (ctxt == NULL) return (-1); 1903 1904 if (ctxt->nameNr >= ctxt->nameMax) { 1905 const xmlChar * *tmp; 1906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1907 ctxt->nameMax * 2 * 1908 sizeof(ctxt->nameTab[0])); 1909 if (tmp == NULL) { 1910 goto mem_error; 1911 } 1912 ctxt->nameTab = tmp; 1913 ctxt->nameMax *= 2; 1914 } 1915 ctxt->nameTab[ctxt->nameNr] = value; 1916 ctxt->name = value; 1917 return (ctxt->nameNr++); 1918 mem_error: 1919 xmlErrMemory(ctxt, NULL); 1920 return (-1); 1921 } 1922 /** 1923 * namePop: 1924 * @ctxt: an XML parser context 1925 * 1926 * Pops the top element name from the name stack 1927 * 1928 * Returns the name just removed 1929 */ 1930 const xmlChar * 1931 namePop(xmlParserCtxtPtr ctxt) 1932 { 1933 const xmlChar *ret; 1934 1935 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1936 return (NULL); 1937 ctxt->nameNr--; 1938 if (ctxt->nameNr > 0) 1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1940 else 1941 ctxt->name = NULL; 1942 ret = ctxt->nameTab[ctxt->nameNr]; 1943 ctxt->nameTab[ctxt->nameNr] = NULL; 1944 return (ret); 1945 } 1946 1947 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1948 if (ctxt->spaceNr >= ctxt->spaceMax) { 1949 int *tmp; 1950 1951 ctxt->spaceMax *= 2; 1952 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1954 if (tmp == NULL) { 1955 xmlErrMemory(ctxt, NULL); 1956 ctxt->spaceMax /=2; 1957 return(-1); 1958 } 1959 ctxt->spaceTab = tmp; 1960 } 1961 ctxt->spaceTab[ctxt->spaceNr] = val; 1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1963 return(ctxt->spaceNr++); 1964 } 1965 1966 static int spacePop(xmlParserCtxtPtr ctxt) { 1967 int ret; 1968 if (ctxt->spaceNr <= 0) return(0); 1969 ctxt->spaceNr--; 1970 if (ctxt->spaceNr > 0) 1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1972 else 1973 ctxt->space = &ctxt->spaceTab[0]; 1974 ret = ctxt->spaceTab[ctxt->spaceNr]; 1975 ctxt->spaceTab[ctxt->spaceNr] = -1; 1976 return(ret); 1977 } 1978 1979 /* 1980 * Macros for accessing the content. Those should be used only by the parser, 1981 * and not exported. 1982 * 1983 * Dirty macros, i.e. one often need to make assumption on the context to 1984 * use them 1985 * 1986 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1987 * To be used with extreme caution since operations consuming 1988 * characters may move the input buffer to a different location ! 1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1990 * This should be used internally by the parser 1991 * only to compare to ASCII values otherwise it would break when 1992 * running with UTF-8 encoding. 1993 * RAW same as CUR but in the input buffer, bypass any token 1994 * extraction that may have been done 1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1996 * to compare on ASCII based substring. 1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1998 * strings without newlines within the parser. 1999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2000 * defined char within the parser. 2001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2002 * 2003 * NEXT Skip to the next character, this does the proper decoding 2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2005 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2006 * CUR_CHAR(l) returns the current unicode character (int), set l 2007 * to the number of xmlChars used for the encoding [0-5]. 2008 * CUR_SCHAR same but operate on a string instead of the context 2009 * COPY_BUF copy the current unicode char to the target buffer, increment 2010 * the index 2011 * GROW, SHRINK handling of input buffers 2012 */ 2013 2014 #define RAW (*ctxt->input->cur) 2015 #define CUR (*ctxt->input->cur) 2016 #define NXT(val) ctxt->input->cur[(val)] 2017 #define CUR_PTR ctxt->input->cur 2018 #define BASE_PTR ctxt->input->base 2019 2020 #define CMP4( s, c1, c2, c3, c4 ) \ 2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2023 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2025 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2027 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2029 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2031 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2033 ((unsigned char *) s)[ 8 ] == c9 ) 2034 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2036 ((unsigned char *) s)[ 9 ] == c10 ) 2037 2038 #define SKIP(val) do { \ 2039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2041 if ((*ctxt->input->cur == 0) && \ 2042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2043 xmlPopInput(ctxt); \ 2044 } while (0) 2045 2046 #define SKIPL(val) do { \ 2047 int skipl; \ 2048 for(skipl=0; skipl<val; skipl++) { \ 2049 if (*(ctxt->input->cur) == '\n') { \ 2050 ctxt->input->line++; ctxt->input->col = 1; \ 2051 } else ctxt->input->col++; \ 2052 ctxt->nbChars++; \ 2053 ctxt->input->cur++; \ 2054 } \ 2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2056 if ((*ctxt->input->cur == 0) && \ 2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2058 xmlPopInput(ctxt); \ 2059 } while (0) 2060 2061 #define SHRINK if ((ctxt->progressive == 0) && \ 2062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2064 xmlSHRINK (ctxt); 2065 2066 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2067 xmlParserInputShrink(ctxt->input); 2068 if ((*ctxt->input->cur == 0) && 2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2070 xmlPopInput(ctxt); 2071 } 2072 2073 #define GROW if ((ctxt->progressive == 0) && \ 2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2075 xmlGROW (ctxt); 2076 2077 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2080 2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2084 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2086 xmlHaltParser(ctxt); 2087 return; 2088 } 2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2090 if ((ctxt->input->cur > ctxt->input->end) || 2091 (ctxt->input->cur < ctxt->input->base)) { 2092 xmlHaltParser(ctxt); 2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2094 return; 2095 } 2096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2098 xmlPopInput(ctxt); 2099 } 2100 2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2102 2103 #define NEXT xmlNextChar(ctxt) 2104 2105 #define NEXT1 { \ 2106 ctxt->input->col++; \ 2107 ctxt->input->cur++; \ 2108 ctxt->nbChars++; \ 2109 if (*ctxt->input->cur == 0) \ 2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2111 } 2112 2113 #define NEXTL(l) do { \ 2114 if (*(ctxt->input->cur) == '\n') { \ 2115 ctxt->input->line++; ctxt->input->col = 1; \ 2116 } else ctxt->input->col++; \ 2117 ctxt->input->cur += l; \ 2118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2119 } while (0) 2120 2121 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2122 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2123 2124 #define COPY_BUF(l,b,i,v) \ 2125 if (l == 1) b[i++] = (xmlChar) v; \ 2126 else i += xmlCopyCharMultiByte(&b[i],v) 2127 2128 /** 2129 * xmlSkipBlankChars: 2130 * @ctxt: the XML parser context 2131 * 2132 * skip all blanks character found at that point in the input streams. 2133 * It pops up finished entities in the process if allowable at that point. 2134 * 2135 * Returns the number of space chars skipped 2136 */ 2137 2138 int 2139 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2140 int res = 0; 2141 2142 /* 2143 * It's Okay to use CUR/NEXT here since all the blanks are on 2144 * the ASCII range. 2145 */ 2146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2147 const xmlChar *cur; 2148 /* 2149 * if we are in the document content, go really fast 2150 */ 2151 cur = ctxt->input->cur; 2152 while (IS_BLANK_CH(*cur)) { 2153 if (*cur == '\n') { 2154 ctxt->input->line++; ctxt->input->col = 1; 2155 } else { 2156 ctxt->input->col++; 2157 } 2158 cur++; 2159 res++; 2160 if (*cur == 0) { 2161 ctxt->input->cur = cur; 2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2163 cur = ctxt->input->cur; 2164 } 2165 } 2166 ctxt->input->cur = cur; 2167 } else { 2168 int cur; 2169 do { 2170 cur = CUR; 2171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ 2172 (ctxt->instate != XML_PARSER_EOF))) { 2173 NEXT; 2174 cur = CUR; 2175 res++; 2176 } 2177 while ((cur == 0) && (ctxt->inputNr > 1) && 2178 (ctxt->instate != XML_PARSER_COMMENT)) { 2179 xmlPopInput(ctxt); 2180 cur = CUR; 2181 } 2182 /* 2183 * Need to handle support of entities branching here 2184 */ 2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ 2187 (ctxt->instate != XML_PARSER_EOF)); 2188 } 2189 return(res); 2190 } 2191 2192 /************************************************************************ 2193 * * 2194 * Commodity functions to handle entities * 2195 * * 2196 ************************************************************************/ 2197 2198 /** 2199 * xmlPopInput: 2200 * @ctxt: an XML parser context 2201 * 2202 * xmlPopInput: the current input pointed by ctxt->input came to an end 2203 * pop it and return the next char. 2204 * 2205 * Returns the current xmlChar in the parser context 2206 */ 2207 xmlChar 2208 xmlPopInput(xmlParserCtxtPtr ctxt) { 2209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2210 if (xmlParserDebugEntities) 2211 xmlGenericError(xmlGenericErrorContext, 2212 "Popping input %d\n", ctxt->inputNr); 2213 xmlFreeInputStream(inputPop(ctxt)); 2214 if ((*ctxt->input->cur == 0) && 2215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2216 return(xmlPopInput(ctxt)); 2217 return(CUR); 2218 } 2219 2220 /** 2221 * xmlPushInput: 2222 * @ctxt: an XML parser context 2223 * @input: an XML parser input fragment (entity, XML fragment ...). 2224 * 2225 * xmlPushInput: switch to a new input stream which is stacked on top 2226 * of the previous one(s). 2227 * Returns -1 in case of error or the index in the input stack 2228 */ 2229 int 2230 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2231 int ret; 2232 if (input == NULL) return(-1); 2233 2234 if (xmlParserDebugEntities) { 2235 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2236 xmlGenericError(xmlGenericErrorContext, 2237 "%s(%d): ", ctxt->input->filename, 2238 ctxt->input->line); 2239 xmlGenericError(xmlGenericErrorContext, 2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2241 } 2242 ret = inputPush(ctxt, input); 2243 if (ctxt->instate == XML_PARSER_EOF) 2244 return(-1); 2245 GROW; 2246 return(ret); 2247 } 2248 2249 /** 2250 * xmlParseCharRef: 2251 * @ctxt: an XML parser context 2252 * 2253 * parse Reference declarations 2254 * 2255 * [66] CharRef ::= '&#' [0-9]+ ';' | 2256 * '&#x' [0-9a-fA-F]+ ';' 2257 * 2258 * [ WFC: Legal Character ] 2259 * Characters referred to using character references must match the 2260 * production for Char. 2261 * 2262 * Returns the value parsed (as an int), 0 in case of error 2263 */ 2264 int 2265 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2266 unsigned int val = 0; 2267 int count = 0; 2268 unsigned int outofrange = 0; 2269 2270 /* 2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2272 */ 2273 if ((RAW == '&') && (NXT(1) == '#') && 2274 (NXT(2) == 'x')) { 2275 SKIP(3); 2276 GROW; 2277 while (RAW != ';') { /* loop blocked by count */ 2278 if (count++ > 20) { 2279 count = 0; 2280 GROW; 2281 if (ctxt->instate == XML_PARSER_EOF) 2282 return(0); 2283 } 2284 if ((RAW >= '0') && (RAW <= '9')) 2285 val = val * 16 + (CUR - '0'); 2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2287 val = val * 16 + (CUR - 'a') + 10; 2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2289 val = val * 16 + (CUR - 'A') + 10; 2290 else { 2291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2292 val = 0; 2293 break; 2294 } 2295 if (val > 0x10FFFF) 2296 outofrange = val; 2297 2298 NEXT; 2299 count++; 2300 } 2301 if (RAW == ';') { 2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2303 ctxt->input->col++; 2304 ctxt->nbChars ++; 2305 ctxt->input->cur++; 2306 } 2307 } else if ((RAW == '&') && (NXT(1) == '#')) { 2308 SKIP(2); 2309 GROW; 2310 while (RAW != ';') { /* loop blocked by count */ 2311 if (count++ > 20) { 2312 count = 0; 2313 GROW; 2314 if (ctxt->instate == XML_PARSER_EOF) 2315 return(0); 2316 } 2317 if ((RAW >= '0') && (RAW <= '9')) 2318 val = val * 10 + (CUR - '0'); 2319 else { 2320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2321 val = 0; 2322 break; 2323 } 2324 if (val > 0x10FFFF) 2325 outofrange = val; 2326 2327 NEXT; 2328 count++; 2329 } 2330 if (RAW == ';') { 2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2332 ctxt->input->col++; 2333 ctxt->nbChars ++; 2334 ctxt->input->cur++; 2335 } 2336 } else { 2337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2338 } 2339 2340 /* 2341 * [ WFC: Legal Character ] 2342 * Characters referred to using character references must match the 2343 * production for Char. 2344 */ 2345 if ((IS_CHAR(val) && (outofrange == 0))) { 2346 return(val); 2347 } else { 2348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2349 "xmlParseCharRef: invalid xmlChar value %d\n", 2350 val); 2351 } 2352 return(0); 2353 } 2354 2355 /** 2356 * xmlParseStringCharRef: 2357 * @ctxt: an XML parser context 2358 * @str: a pointer to an index in the string 2359 * 2360 * parse Reference declarations, variant parsing from a string rather 2361 * than an an input flow. 2362 * 2363 * [66] CharRef ::= '&#' [0-9]+ ';' | 2364 * '&#x' [0-9a-fA-F]+ ';' 2365 * 2366 * [ WFC: Legal Character ] 2367 * Characters referred to using character references must match the 2368 * production for Char. 2369 * 2370 * Returns the value parsed (as an int), 0 in case of error, str will be 2371 * updated to the current value of the index 2372 */ 2373 static int 2374 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2375 const xmlChar *ptr; 2376 xmlChar cur; 2377 unsigned int val = 0; 2378 unsigned int outofrange = 0; 2379 2380 if ((str == NULL) || (*str == NULL)) return(0); 2381 ptr = *str; 2382 cur = *ptr; 2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2384 ptr += 3; 2385 cur = *ptr; 2386 while (cur != ';') { /* Non input consuming loop */ 2387 if ((cur >= '0') && (cur <= '9')) 2388 val = val * 16 + (cur - '0'); 2389 else if ((cur >= 'a') && (cur <= 'f')) 2390 val = val * 16 + (cur - 'a') + 10; 2391 else if ((cur >= 'A') && (cur <= 'F')) 2392 val = val * 16 + (cur - 'A') + 10; 2393 else { 2394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2395 val = 0; 2396 break; 2397 } 2398 if (val > 0x10FFFF) 2399 outofrange = val; 2400 2401 ptr++; 2402 cur = *ptr; 2403 } 2404 if (cur == ';') 2405 ptr++; 2406 } else if ((cur == '&') && (ptr[1] == '#')){ 2407 ptr += 2; 2408 cur = *ptr; 2409 while (cur != ';') { /* Non input consuming loops */ 2410 if ((cur >= '0') && (cur <= '9')) 2411 val = val * 10 + (cur - '0'); 2412 else { 2413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2414 val = 0; 2415 break; 2416 } 2417 if (val > 0x10FFFF) 2418 outofrange = val; 2419 2420 ptr++; 2421 cur = *ptr; 2422 } 2423 if (cur == ';') 2424 ptr++; 2425 } else { 2426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2427 return(0); 2428 } 2429 *str = ptr; 2430 2431 /* 2432 * [ WFC: Legal Character ] 2433 * Characters referred to using character references must match the 2434 * production for Char. 2435 */ 2436 if ((IS_CHAR(val) && (outofrange == 0))) { 2437 return(val); 2438 } else { 2439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2440 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2441 val); 2442 } 2443 return(0); 2444 } 2445 2446 /** 2447 * xmlNewBlanksWrapperInputStream: 2448 * @ctxt: an XML parser context 2449 * @entity: an Entity pointer 2450 * 2451 * Create a new input stream for wrapping 2452 * blanks around a PEReference 2453 * 2454 * Returns the new input stream or NULL 2455 */ 2456 2457 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2458 2459 static xmlParserInputPtr 2460 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2461 xmlParserInputPtr input; 2462 xmlChar *buffer; 2463 size_t length; 2464 if (entity == NULL) { 2465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2466 "xmlNewBlanksWrapperInputStream entity\n"); 2467 return(NULL); 2468 } 2469 if (xmlParserDebugEntities) 2470 xmlGenericError(xmlGenericErrorContext, 2471 "new blanks wrapper for entity: %s\n", entity->name); 2472 input = xmlNewInputStream(ctxt); 2473 if (input == NULL) { 2474 return(NULL); 2475 } 2476 length = xmlStrlen(entity->name) + 5; 2477 buffer = xmlMallocAtomic(length); 2478 if (buffer == NULL) { 2479 xmlErrMemory(ctxt, NULL); 2480 xmlFree(input); 2481 return(NULL); 2482 } 2483 buffer [0] = ' '; 2484 buffer [1] = '%'; 2485 buffer [length-3] = ';'; 2486 buffer [length-2] = ' '; 2487 buffer [length-1] = 0; 2488 memcpy(buffer + 2, entity->name, length - 5); 2489 input->free = deallocblankswrapper; 2490 input->base = buffer; 2491 input->cur = buffer; 2492 input->length = length; 2493 input->end = &buffer[length]; 2494 return(input); 2495 } 2496 2497 /** 2498 * xmlParserHandlePEReference: 2499 * @ctxt: the parser context 2500 * 2501 * [69] PEReference ::= '%' Name ';' 2502 * 2503 * [ WFC: No Recursion ] 2504 * A parsed entity must not contain a recursive 2505 * reference to itself, either directly or indirectly. 2506 * 2507 * [ WFC: Entity Declared ] 2508 * In a document without any DTD, a document with only an internal DTD 2509 * subset which contains no parameter entity references, or a document 2510 * with "standalone='yes'", ... ... The declaration of a parameter 2511 * entity must precede any reference to it... 2512 * 2513 * [ VC: Entity Declared ] 2514 * In a document with an external subset or external parameter entities 2515 * with "standalone='no'", ... ... The declaration of a parameter entity 2516 * must precede any reference to it... 2517 * 2518 * [ WFC: In DTD ] 2519 * Parameter-entity references may only appear in the DTD. 2520 * NOTE: misleading but this is handled. 2521 * 2522 * A PEReference may have been detected in the current input stream 2523 * the handling is done accordingly to 2524 * http://www.w3.org/TR/REC-xml#entproc 2525 * i.e. 2526 * - Included in literal in entity values 2527 * - Included as Parameter Entity reference within DTDs 2528 */ 2529 void 2530 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2531 const xmlChar *name; 2532 xmlEntityPtr entity = NULL; 2533 xmlParserInputPtr input; 2534 2535 if (RAW != '%') return; 2536 switch(ctxt->instate) { 2537 case XML_PARSER_CDATA_SECTION: 2538 return; 2539 case XML_PARSER_COMMENT: 2540 return; 2541 case XML_PARSER_START_TAG: 2542 return; 2543 case XML_PARSER_END_TAG: 2544 return; 2545 case XML_PARSER_EOF: 2546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2547 return; 2548 case XML_PARSER_PROLOG: 2549 case XML_PARSER_START: 2550 case XML_PARSER_MISC: 2551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2552 return; 2553 case XML_PARSER_ENTITY_DECL: 2554 case XML_PARSER_CONTENT: 2555 case XML_PARSER_ATTRIBUTE_VALUE: 2556 case XML_PARSER_PI: 2557 case XML_PARSER_SYSTEM_LITERAL: 2558 case XML_PARSER_PUBLIC_LITERAL: 2559 /* we just ignore it there */ 2560 return; 2561 case XML_PARSER_EPILOG: 2562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2563 return; 2564 case XML_PARSER_ENTITY_VALUE: 2565 /* 2566 * NOTE: in the case of entity values, we don't do the 2567 * substitution here since we need the literal 2568 * entity value to be able to save the internal 2569 * subset of the document. 2570 * This will be handled by xmlStringDecodeEntities 2571 */ 2572 return; 2573 case XML_PARSER_DTD: 2574 /* 2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2576 * In the internal DTD subset, parameter-entity references 2577 * can occur only where markup declarations can occur, not 2578 * within markup declarations. 2579 * In that case this is handled in xmlParseMarkupDecl 2580 */ 2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2582 return; 2583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2584 return; 2585 break; 2586 case XML_PARSER_IGNORE: 2587 return; 2588 } 2589 2590 NEXT; 2591 name = xmlParseName(ctxt); 2592 if (xmlParserDebugEntities) 2593 xmlGenericError(xmlGenericErrorContext, 2594 "PEReference: %s\n", name); 2595 if (name == NULL) { 2596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2597 } else { 2598 if (RAW == ';') { 2599 NEXT; 2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2602 if (ctxt->instate == XML_PARSER_EOF) 2603 return; 2604 if (entity == NULL) { 2605 2606 /* 2607 * [ WFC: Entity Declared ] 2608 * In a document without any DTD, a document with only an 2609 * internal DTD subset which contains no parameter entity 2610 * references, or a document with "standalone='yes'", ... 2611 * ... The declaration of a parameter entity must precede 2612 * any reference to it... 2613 */ 2614 if ((ctxt->standalone == 1) || 2615 ((ctxt->hasExternalSubset == 0) && 2616 (ctxt->hasPErefs == 0))) { 2617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2618 "PEReference: %%%s; not found\n", name); 2619 } else { 2620 /* 2621 * [ VC: Entity Declared ] 2622 * In a document with an external subset or external 2623 * parameter entities with "standalone='no'", ... 2624 * ... The declaration of a parameter entity must precede 2625 * any reference to it... 2626 */ 2627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2629 "PEReference: %%%s; not found\n", 2630 name, NULL); 2631 } else 2632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2633 "PEReference: %%%s; not found\n", 2634 name, NULL); 2635 ctxt->valid = 0; 2636 } 2637 xmlParserEntityCheck(ctxt, 0, NULL, 0); 2638 } else if (ctxt->input->free != deallocblankswrapper) { 2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2640 if (xmlPushInput(ctxt, input) < 0) 2641 return; 2642 } else { 2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2645 xmlChar start[4]; 2646 xmlCharEncoding enc; 2647 2648 /* 2649 * Note: external parameter entities will not be loaded, it 2650 * is not required for a non-validating parser, unless the 2651 * option of validating, or substituting entities were 2652 * given. Doing so is far more secure as the parser will 2653 * only process data coming from the document entity by 2654 * default. 2655 */ 2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2657 ((ctxt->options & XML_PARSE_NOENT) == 0) && 2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 2661 (ctxt->replaceEntities == 0) && 2662 (ctxt->validate == 0)) 2663 return; 2664 2665 /* 2666 * handle the extra spaces added before and after 2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2668 * this is done independently. 2669 */ 2670 input = xmlNewEntityInputStream(ctxt, entity); 2671 if (xmlPushInput(ctxt, input) < 0) 2672 return; 2673 2674 /* 2675 * Get the 4 first bytes and decode the charset 2676 * if enc != XML_CHAR_ENCODING_NONE 2677 * plug some encoding conversion routines. 2678 * Note that, since we may have some non-UTF8 2679 * encoding (like UTF16, bug 135229), the 'length' 2680 * is not known, but we can calculate based upon 2681 * the amount of data in the buffer. 2682 */ 2683 GROW 2684 if (ctxt->instate == XML_PARSER_EOF) 2685 return; 2686 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2687 start[0] = RAW; 2688 start[1] = NXT(1); 2689 start[2] = NXT(2); 2690 start[3] = NXT(3); 2691 enc = xmlDetectCharEncoding(start, 4); 2692 if (enc != XML_CHAR_ENCODING_NONE) { 2693 xmlSwitchEncoding(ctxt, enc); 2694 } 2695 } 2696 2697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2699 (IS_BLANK_CH(NXT(5)))) { 2700 xmlParseTextDecl(ctxt); 2701 } 2702 } else { 2703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2704 "PEReference: %s is not a parameter entity\n", 2705 name); 2706 } 2707 } 2708 } else { 2709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2710 } 2711 } 2712 } 2713 2714 /* 2715 * Macro used to grow the current buffer. 2716 * buffer##_size is expected to be a size_t 2717 * mem_error: is expected to handle memory allocation failures 2718 */ 2719 #define growBuffer(buffer, n) { \ 2720 xmlChar *tmp; \ 2721 size_t new_size = buffer##_size * 2 + n; \ 2722 if (new_size < buffer##_size) goto mem_error; \ 2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2724 if (tmp == NULL) goto mem_error; \ 2725 buffer = tmp; \ 2726 buffer##_size = new_size; \ 2727 } 2728 2729 /** 2730 * xmlStringLenDecodeEntities: 2731 * @ctxt: the parser context 2732 * @str: the input string 2733 * @len: the string length 2734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2735 * @end: an end marker xmlChar, 0 if none 2736 * @end2: an end marker xmlChar, 0 if none 2737 * @end3: an end marker xmlChar, 0 if none 2738 * 2739 * Takes a entity string content and process to do the adequate substitutions. 2740 * 2741 * [67] Reference ::= EntityRef | CharRef 2742 * 2743 * [69] PEReference ::= '%' Name ';' 2744 * 2745 * Returns A newly allocated string with the substitution done. The caller 2746 * must deallocate it ! 2747 */ 2748 xmlChar * 2749 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2750 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2751 xmlChar *buffer = NULL; 2752 size_t buffer_size = 0; 2753 size_t nbchars = 0; 2754 2755 xmlChar *current = NULL; 2756 xmlChar *rep = NULL; 2757 const xmlChar *last; 2758 xmlEntityPtr ent; 2759 int c,l; 2760 2761 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2762 return(NULL); 2763 last = str + len; 2764 2765 if (((ctxt->depth > 40) && 2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2767 (ctxt->depth > 1024)) { 2768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2769 return(NULL); 2770 } 2771 2772 /* 2773 * allocate a translation buffer. 2774 */ 2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2777 if (buffer == NULL) goto mem_error; 2778 2779 /* 2780 * OK loop until we reach one of the ending char or a size limit. 2781 * we are operating on already parsed values. 2782 */ 2783 if (str < last) 2784 c = CUR_SCHAR(str, l); 2785 else 2786 c = 0; 2787 while ((c != 0) && (c != end) && /* non input consuming loop */ 2788 (c != end2) && (c != end3)) { 2789 2790 if (c == 0) break; 2791 if ((c == '&') && (str[1] == '#')) { 2792 int val = xmlParseStringCharRef(ctxt, &str); 2793 if (val != 0) { 2794 COPY_BUF(0,buffer,nbchars,val); 2795 } 2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2798 } 2799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2800 if (xmlParserDebugEntities) 2801 xmlGenericError(xmlGenericErrorContext, 2802 "String decoding Entity Reference: %.30s\n", 2803 str); 2804 ent = xmlParseStringEntityRef(ctxt, &str); 2805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2807 goto int_error; 2808 xmlParserEntityCheck(ctxt, 0, ent, 0); 2809 if (ent != NULL) 2810 ctxt->nbentities += ent->checked / 2; 2811 if ((ent != NULL) && 2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2813 if (ent->content != NULL) { 2814 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2817 } 2818 } else { 2819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2820 "predefined entity has no content\n"); 2821 } 2822 } else if ((ent != NULL) && (ent->content != NULL)) { 2823 ctxt->depth++; 2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2825 0, 0, 0); 2826 ctxt->depth--; 2827 2828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2830 goto int_error; 2831 2832 if (rep != NULL) { 2833 current = rep; 2834 while (*current != 0) { /* non input consuming loop */ 2835 buffer[nbchars++] = *current++; 2836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2838 goto int_error; 2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2840 } 2841 } 2842 xmlFree(rep); 2843 rep = NULL; 2844 } 2845 } else if (ent != NULL) { 2846 int i = xmlStrlen(ent->name); 2847 const xmlChar *cur = ent->name; 2848 2849 buffer[nbchars++] = '&'; 2850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2852 } 2853 for (;i > 0;i--) 2854 buffer[nbchars++] = *cur++; 2855 buffer[nbchars++] = ';'; 2856 } 2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2858 if (xmlParserDebugEntities) 2859 xmlGenericError(xmlGenericErrorContext, 2860 "String decoding PE Reference: %.30s\n", str); 2861 ent = xmlParseStringPEReference(ctxt, &str); 2862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2863 goto int_error; 2864 xmlParserEntityCheck(ctxt, 0, ent, 0); 2865 if (ent != NULL) 2866 ctxt->nbentities += ent->checked / 2; 2867 if (ent != NULL) { 2868 if (ent->content == NULL) { 2869 /* 2870 * Note: external parsed entities will not be loaded, 2871 * it is not required for a non-validating parser to 2872 * complete external PEreferences coming from the 2873 * internal subset 2874 */ 2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2877 (ctxt->validate != 0)) { 2878 xmlLoadEntityContent(ctxt, ent); 2879 } else { 2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2881 "not validating will not read content for PE entity %s\n", 2882 ent->name, NULL); 2883 } 2884 } 2885 ctxt->depth++; 2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2887 0, 0, 0); 2888 ctxt->depth--; 2889 if (rep != NULL) { 2890 current = rep; 2891 while (*current != 0) { /* non input consuming loop */ 2892 buffer[nbchars++] = *current++; 2893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2895 goto int_error; 2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2897 } 2898 } 2899 xmlFree(rep); 2900 rep = NULL; 2901 } 2902 } 2903 } else { 2904 COPY_BUF(l,buffer,nbchars,c); 2905 str += l; 2906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2908 } 2909 } 2910 if (str < last) 2911 c = CUR_SCHAR(str, l); 2912 else 2913 c = 0; 2914 } 2915 buffer[nbchars] = 0; 2916 return(buffer); 2917 2918 mem_error: 2919 xmlErrMemory(ctxt, NULL); 2920 int_error: 2921 if (rep != NULL) 2922 xmlFree(rep); 2923 if (buffer != NULL) 2924 xmlFree(buffer); 2925 return(NULL); 2926 } 2927 2928 /** 2929 * xmlStringDecodeEntities: 2930 * @ctxt: the parser context 2931 * @str: the input string 2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2933 * @end: an end marker xmlChar, 0 if none 2934 * @end2: an end marker xmlChar, 0 if none 2935 * @end3: an end marker xmlChar, 0 if none 2936 * 2937 * Takes a entity string content and process to do the adequate substitutions. 2938 * 2939 * [67] Reference ::= EntityRef | CharRef 2940 * 2941 * [69] PEReference ::= '%' Name ';' 2942 * 2943 * Returns A newly allocated string with the substitution done. The caller 2944 * must deallocate it ! 2945 */ 2946 xmlChar * 2947 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2948 xmlChar end, xmlChar end2, xmlChar end3) { 2949 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2951 end, end2, end3)); 2952 } 2953 2954 /************************************************************************ 2955 * * 2956 * Commodity functions, cleanup needed ? * 2957 * * 2958 ************************************************************************/ 2959 2960 /** 2961 * areBlanks: 2962 * @ctxt: an XML parser context 2963 * @str: a xmlChar * 2964 * @len: the size of @str 2965 * @blank_chars: we know the chars are blanks 2966 * 2967 * Is this a sequence of blank chars that one can ignore ? 2968 * 2969 * Returns 1 if ignorable 0 otherwise. 2970 */ 2971 2972 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2973 int blank_chars) { 2974 int i, ret; 2975 xmlNodePtr lastChild; 2976 2977 /* 2978 * Don't spend time trying to differentiate them, the same callback is 2979 * used ! 2980 */ 2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2982 return(0); 2983 2984 /* 2985 * Check for xml:space value. 2986 */ 2987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2988 (*(ctxt->space) == -2)) 2989 return(0); 2990 2991 /* 2992 * Check that the string is made of blanks 2993 */ 2994 if (blank_chars == 0) { 2995 for (i = 0;i < len;i++) 2996 if (!(IS_BLANK_CH(str[i]))) return(0); 2997 } 2998 2999 /* 3000 * Look if the element is mixed content in the DTD if available 3001 */ 3002 if (ctxt->node == NULL) return(0); 3003 if (ctxt->myDoc != NULL) { 3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 3005 if (ret == 0) return(1); 3006 if (ret == 1) return(0); 3007 } 3008 3009 /* 3010 * Otherwise, heuristic :-\ 3011 */ 3012 if ((RAW != '<') && (RAW != 0xD)) return(0); 3013 if ((ctxt->node->children == NULL) && 3014 (RAW == '<') && (NXT(1) == '/')) return(0); 3015 3016 lastChild = xmlGetLastChild(ctxt->node); 3017 if (lastChild == NULL) { 3018 if ((ctxt->node->type != XML_ELEMENT_NODE) && 3019 (ctxt->node->content != NULL)) return(0); 3020 } else if (xmlNodeIsText(lastChild)) 3021 return(0); 3022 else if ((ctxt->node->children != NULL) && 3023 (xmlNodeIsText(ctxt->node->children))) 3024 return(0); 3025 return(1); 3026 } 3027 3028 /************************************************************************ 3029 * * 3030 * Extra stuff for namespace support * 3031 * Relates to http://www.w3.org/TR/WD-xml-names * 3032 * * 3033 ************************************************************************/ 3034 3035 /** 3036 * xmlSplitQName: 3037 * @ctxt: an XML parser context 3038 * @name: an XML parser context 3039 * @prefix: a xmlChar ** 3040 * 3041 * parse an UTF8 encoded XML qualified name string 3042 * 3043 * [NS 5] QName ::= (Prefix ':')? LocalPart 3044 * 3045 * [NS 6] Prefix ::= NCName 3046 * 3047 * [NS 7] LocalPart ::= NCName 3048 * 3049 * Returns the local part, and prefix is updated 3050 * to get the Prefix if any. 3051 */ 3052 3053 xmlChar * 3054 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3055 xmlChar buf[XML_MAX_NAMELEN + 5]; 3056 xmlChar *buffer = NULL; 3057 int len = 0; 3058 int max = XML_MAX_NAMELEN; 3059 xmlChar *ret = NULL; 3060 const xmlChar *cur = name; 3061 int c; 3062 3063 if (prefix == NULL) return(NULL); 3064 *prefix = NULL; 3065 3066 if (cur == NULL) return(NULL); 3067 3068 #ifndef XML_XML_NAMESPACE 3069 /* xml: prefix is not really a namespace */ 3070 if ((cur[0] == 'x') && (cur[1] == 'm') && 3071 (cur[2] == 'l') && (cur[3] == ':')) 3072 return(xmlStrdup(name)); 3073 #endif 3074 3075 /* nasty but well=formed */ 3076 if (cur[0] == ':') 3077 return(xmlStrdup(name)); 3078 3079 c = *cur++; 3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3081 buf[len++] = c; 3082 c = *cur++; 3083 } 3084 if (len >= max) { 3085 /* 3086 * Okay someone managed to make a huge name, so he's ready to pay 3087 * for the processing speed. 3088 */ 3089 max = len * 2; 3090 3091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3092 if (buffer == NULL) { 3093 xmlErrMemory(ctxt, NULL); 3094 return(NULL); 3095 } 3096 memcpy(buffer, buf, len); 3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3098 if (len + 10 > max) { 3099 xmlChar *tmp; 3100 3101 max *= 2; 3102 tmp = (xmlChar *) xmlRealloc(buffer, 3103 max * sizeof(xmlChar)); 3104 if (tmp == NULL) { 3105 xmlFree(buffer); 3106 xmlErrMemory(ctxt, NULL); 3107 return(NULL); 3108 } 3109 buffer = tmp; 3110 } 3111 buffer[len++] = c; 3112 c = *cur++; 3113 } 3114 buffer[len] = 0; 3115 } 3116 3117 if ((c == ':') && (*cur == 0)) { 3118 if (buffer != NULL) 3119 xmlFree(buffer); 3120 *prefix = NULL; 3121 return(xmlStrdup(name)); 3122 } 3123 3124 if (buffer == NULL) 3125 ret = xmlStrndup(buf, len); 3126 else { 3127 ret = buffer; 3128 buffer = NULL; 3129 max = XML_MAX_NAMELEN; 3130 } 3131 3132 3133 if (c == ':') { 3134 c = *cur; 3135 *prefix = ret; 3136 if (c == 0) { 3137 return(xmlStrndup(BAD_CAST "", 0)); 3138 } 3139 len = 0; 3140 3141 /* 3142 * Check that the first character is proper to start 3143 * a new name 3144 */ 3145 if (!(((c >= 0x61) && (c <= 0x7A)) || 3146 ((c >= 0x41) && (c <= 0x5A)) || 3147 (c == '_') || (c == ':'))) { 3148 int l; 3149 int first = CUR_SCHAR(cur, l); 3150 3151 if (!IS_LETTER(first) && (first != '_')) { 3152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3153 "Name %s is not XML Namespace compliant\n", 3154 name); 3155 } 3156 } 3157 cur++; 3158 3159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3160 buf[len++] = c; 3161 c = *cur++; 3162 } 3163 if (len >= max) { 3164 /* 3165 * Okay someone managed to make a huge name, so he's ready to pay 3166 * for the processing speed. 3167 */ 3168 max = len * 2; 3169 3170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3171 if (buffer == NULL) { 3172 xmlErrMemory(ctxt, NULL); 3173 return(NULL); 3174 } 3175 memcpy(buffer, buf, len); 3176 while (c != 0) { /* tested bigname2.xml */ 3177 if (len + 10 > max) { 3178 xmlChar *tmp; 3179 3180 max *= 2; 3181 tmp = (xmlChar *) xmlRealloc(buffer, 3182 max * sizeof(xmlChar)); 3183 if (tmp == NULL) { 3184 xmlErrMemory(ctxt, NULL); 3185 xmlFree(buffer); 3186 return(NULL); 3187 } 3188 buffer = tmp; 3189 } 3190 buffer[len++] = c; 3191 c = *cur++; 3192 } 3193 buffer[len] = 0; 3194 } 3195 3196 if (buffer == NULL) 3197 ret = xmlStrndup(buf, len); 3198 else { 3199 ret = buffer; 3200 } 3201 } 3202 3203 return(ret); 3204 } 3205 3206 /************************************************************************ 3207 * * 3208 * The parser itself * 3209 * Relates to http://www.w3.org/TR/REC-xml * 3210 * * 3211 ************************************************************************/ 3212 3213 /************************************************************************ 3214 * * 3215 * Routines to parse Name, NCName and NmToken * 3216 * * 3217 ************************************************************************/ 3218 #ifdef DEBUG 3219 static unsigned long nbParseName = 0; 3220 static unsigned long nbParseNmToken = 0; 3221 static unsigned long nbParseNCName = 0; 3222 static unsigned long nbParseNCNameComplex = 0; 3223 static unsigned long nbParseNameComplex = 0; 3224 static unsigned long nbParseStringName = 0; 3225 #endif 3226 3227 /* 3228 * The two following functions are related to the change of accepted 3229 * characters for Name and NmToken in the Revision 5 of XML-1.0 3230 * They correspond to the modified production [4] and the new production [4a] 3231 * changes in that revision. Also note that the macros used for the 3232 * productions Letter, Digit, CombiningChar and Extender are not needed 3233 * anymore. 3234 * We still keep compatibility to pre-revision5 parsing semantic if the 3235 * new XML_PARSE_OLD10 option is given to the parser. 3236 */ 3237 static int 3238 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3240 /* 3241 * Use the new checks of production [4] [4a] amd [5] of the 3242 * Update 5 of XML-1.0 3243 */ 3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3245 (((c >= 'a') && (c <= 'z')) || 3246 ((c >= 'A') && (c <= 'Z')) || 3247 (c == '_') || (c == ':') || 3248 ((c >= 0xC0) && (c <= 0xD6)) || 3249 ((c >= 0xD8) && (c <= 0xF6)) || 3250 ((c >= 0xF8) && (c <= 0x2FF)) || 3251 ((c >= 0x370) && (c <= 0x37D)) || 3252 ((c >= 0x37F) && (c <= 0x1FFF)) || 3253 ((c >= 0x200C) && (c <= 0x200D)) || 3254 ((c >= 0x2070) && (c <= 0x218F)) || 3255 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3256 ((c >= 0x3001) && (c <= 0xD7FF)) || 3257 ((c >= 0xF900) && (c <= 0xFDCF)) || 3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3259 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3260 return(1); 3261 } else { 3262 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3263 return(1); 3264 } 3265 return(0); 3266 } 3267 3268 static int 3269 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3271 /* 3272 * Use the new checks of production [4] [4a] amd [5] of the 3273 * Update 5 of XML-1.0 3274 */ 3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3276 (((c >= 'a') && (c <= 'z')) || 3277 ((c >= 'A') && (c <= 'Z')) || 3278 ((c >= '0') && (c <= '9')) || /* !start */ 3279 (c == '_') || (c == ':') || 3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3281 ((c >= 0xC0) && (c <= 0xD6)) || 3282 ((c >= 0xD8) && (c <= 0xF6)) || 3283 ((c >= 0xF8) && (c <= 0x2FF)) || 3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3285 ((c >= 0x370) && (c <= 0x37D)) || 3286 ((c >= 0x37F) && (c <= 0x1FFF)) || 3287 ((c >= 0x200C) && (c <= 0x200D)) || 3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3289 ((c >= 0x2070) && (c <= 0x218F)) || 3290 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3291 ((c >= 0x3001) && (c <= 0xD7FF)) || 3292 ((c >= 0xF900) && (c <= 0xFDCF)) || 3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3294 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3295 return(1); 3296 } else { 3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3298 (c == '.') || (c == '-') || 3299 (c == '_') || (c == ':') || 3300 (IS_COMBINING(c)) || 3301 (IS_EXTENDER(c))) 3302 return(1); 3303 } 3304 return(0); 3305 } 3306 3307 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3308 int *len, int *alloc, int normalize); 3309 3310 static const xmlChar * 3311 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3312 int len = 0, l; 3313 int c; 3314 int count = 0; 3315 3316 #ifdef DEBUG 3317 nbParseNameComplex++; 3318 #endif 3319 3320 /* 3321 * Handler for more complex cases 3322 */ 3323 GROW; 3324 if (ctxt->instate == XML_PARSER_EOF) 3325 return(NULL); 3326 c = CUR_CHAR(l); 3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3328 /* 3329 * Use the new checks of production [4] [4a] amd [5] of the 3330 * Update 5 of XML-1.0 3331 */ 3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3333 (!(((c >= 'a') && (c <= 'z')) || 3334 ((c >= 'A') && (c <= 'Z')) || 3335 (c == '_') || (c == ':') || 3336 ((c >= 0xC0) && (c <= 0xD6)) || 3337 ((c >= 0xD8) && (c <= 0xF6)) || 3338 ((c >= 0xF8) && (c <= 0x2FF)) || 3339 ((c >= 0x370) && (c <= 0x37D)) || 3340 ((c >= 0x37F) && (c <= 0x1FFF)) || 3341 ((c >= 0x200C) && (c <= 0x200D)) || 3342 ((c >= 0x2070) && (c <= 0x218F)) || 3343 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3344 ((c >= 0x3001) && (c <= 0xD7FF)) || 3345 ((c >= 0xF900) && (c <= 0xFDCF)) || 3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3348 return(NULL); 3349 } 3350 len += l; 3351 NEXTL(l); 3352 c = CUR_CHAR(l); 3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3354 (((c >= 'a') && (c <= 'z')) || 3355 ((c >= 'A') && (c <= 'Z')) || 3356 ((c >= '0') && (c <= '9')) || /* !start */ 3357 (c == '_') || (c == ':') || 3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3359 ((c >= 0xC0) && (c <= 0xD6)) || 3360 ((c >= 0xD8) && (c <= 0xF6)) || 3361 ((c >= 0xF8) && (c <= 0x2FF)) || 3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3363 ((c >= 0x370) && (c <= 0x37D)) || 3364 ((c >= 0x37F) && (c <= 0x1FFF)) || 3365 ((c >= 0x200C) && (c <= 0x200D)) || 3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3367 ((c >= 0x2070) && (c <= 0x218F)) || 3368 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3369 ((c >= 0x3001) && (c <= 0xD7FF)) || 3370 ((c >= 0xF900) && (c <= 0xFDCF)) || 3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3372 ((c >= 0x10000) && (c <= 0xEFFFF)) 3373 )) { 3374 if (count++ > XML_PARSER_CHUNK_SIZE) { 3375 count = 0; 3376 GROW; 3377 if (ctxt->instate == XML_PARSER_EOF) 3378 return(NULL); 3379 } 3380 len += l; 3381 NEXTL(l); 3382 c = CUR_CHAR(l); 3383 } 3384 } else { 3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3386 (!IS_LETTER(c) && (c != '_') && 3387 (c != ':'))) { 3388 return(NULL); 3389 } 3390 len += l; 3391 NEXTL(l); 3392 c = CUR_CHAR(l); 3393 3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3396 (c == '.') || (c == '-') || 3397 (c == '_') || (c == ':') || 3398 (IS_COMBINING(c)) || 3399 (IS_EXTENDER(c)))) { 3400 if (count++ > XML_PARSER_CHUNK_SIZE) { 3401 count = 0; 3402 GROW; 3403 if (ctxt->instate == XML_PARSER_EOF) 3404 return(NULL); 3405 } 3406 len += l; 3407 NEXTL(l); 3408 c = CUR_CHAR(l); 3409 if (c == 0) { 3410 count = 0; 3411 GROW; 3412 if (ctxt->instate == XML_PARSER_EOF) 3413 return(NULL); 3414 c = CUR_CHAR(l); 3415 } 3416 } 3417 } 3418 if ((len > XML_MAX_NAME_LENGTH) && 3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3421 return(NULL); 3422 } 3423 if (ctxt->input->cur > ctxt->input->base && (*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) { 3424 if (ctxt->input->base > ctxt->input->cur - (len + 1)) { 3425 return(NULL); 3426 } 3427 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3428 } 3429 if (ctxt->input->base > ctxt->input->cur - len) { 3430 return(NULL); 3431 } 3432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3433 } 3434 3435 /** 3436 * xmlParseName: 3437 * @ctxt: an XML parser context 3438 * 3439 * parse an XML name. 3440 * 3441 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3442 * CombiningChar | Extender 3443 * 3444 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3445 * 3446 * [6] Names ::= Name (#x20 Name)* 3447 * 3448 * Returns the Name parsed or NULL 3449 */ 3450 3451 const xmlChar * 3452 xmlParseName(xmlParserCtxtPtr ctxt) { 3453 const xmlChar *in; 3454 const xmlChar *ret; 3455 int count = 0; 3456 3457 GROW; 3458 3459 #ifdef DEBUG 3460 nbParseName++; 3461 #endif 3462 3463 /* 3464 * Accelerator for simple ASCII names 3465 */ 3466 in = ctxt->input->cur; 3467 if (((*in >= 0x61) && (*in <= 0x7A)) || 3468 ((*in >= 0x41) && (*in <= 0x5A)) || 3469 (*in == '_') || (*in == ':')) { 3470 in++; 3471 while (((*in >= 0x61) && (*in <= 0x7A)) || 3472 ((*in >= 0x41) && (*in <= 0x5A)) || 3473 ((*in >= 0x30) && (*in <= 0x39)) || 3474 (*in == '_') || (*in == '-') || 3475 (*in == ':') || (*in == '.')) 3476 in++; 3477 if ((*in > 0) && (*in < 0x80)) { 3478 count = in - ctxt->input->cur; 3479 if ((count > XML_MAX_NAME_LENGTH) && 3480 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3481 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3482 return(NULL); 3483 } 3484 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3485 ctxt->input->cur = in; 3486 ctxt->nbChars += count; 3487 ctxt->input->col += count; 3488 if (ret == NULL) 3489 xmlErrMemory(ctxt, NULL); 3490 return(ret); 3491 } 3492 } 3493 /* accelerator for special cases */ 3494 return(xmlParseNameComplex(ctxt)); 3495 } 3496 3497 static const xmlChar * 3498 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3499 int len = 0, l; 3500 int c; 3501 int count = 0; 3502 size_t startPosition = 0; 3503 3504 #ifdef DEBUG 3505 nbParseNCNameComplex++; 3506 #endif 3507 3508 /* 3509 * Handler for more complex cases 3510 */ 3511 GROW; 3512 startPosition = CUR_PTR - BASE_PTR; 3513 c = CUR_CHAR(l); 3514 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3515 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3516 return(NULL); 3517 } 3518 3519 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3520 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3521 if (count++ > XML_PARSER_CHUNK_SIZE) { 3522 if ((len > XML_MAX_NAME_LENGTH) && 3523 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3524 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3525 return(NULL); 3526 } 3527 count = 0; 3528 GROW; 3529 if (ctxt->instate == XML_PARSER_EOF) 3530 return(NULL); 3531 } 3532 len += l; 3533 NEXTL(l); 3534 c = CUR_CHAR(l); 3535 if (c == 0) { 3536 count = 0; 3537 /* 3538 * when shrinking to extend the buffer we really need to preserve 3539 * the part of the name we already parsed. Hence rolling back 3540 * by current lenght. 3541 */ 3542 ctxt->input->cur -= l; 3543 GROW; 3544 ctxt->input->cur += l; 3545 if (ctxt->instate == XML_PARSER_EOF) 3546 return(NULL); 3547 c = CUR_CHAR(l); 3548 } 3549 } 3550 if ((len > XML_MAX_NAME_LENGTH) && 3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3553 return(NULL); 3554 } 3555 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3556 } 3557 3558 /** 3559 * xmlParseNCName: 3560 * @ctxt: an XML parser context 3561 * @len: length of the string parsed 3562 * 3563 * parse an XML name. 3564 * 3565 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3566 * CombiningChar | Extender 3567 * 3568 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3569 * 3570 * Returns the Name parsed or NULL 3571 */ 3572 3573 static const xmlChar * 3574 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3575 const xmlChar *in, *e; 3576 const xmlChar *ret; 3577 int count = 0; 3578 3579 #ifdef DEBUG 3580 nbParseNCName++; 3581 #endif 3582 3583 /* 3584 * Accelerator for simple ASCII names 3585 */ 3586 in = ctxt->input->cur; 3587 e = ctxt->input->end; 3588 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3589 ((*in >= 0x41) && (*in <= 0x5A)) || 3590 (*in == '_')) && (in < e)) { 3591 in++; 3592 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3593 ((*in >= 0x41) && (*in <= 0x5A)) || 3594 ((*in >= 0x30) && (*in <= 0x39)) || 3595 (*in == '_') || (*in == '-') || 3596 (*in == '.')) && (in < e)) 3597 in++; 3598 if (in >= e) 3599 goto complex; 3600 if ((*in > 0) && (*in < 0x80)) { 3601 count = in - ctxt->input->cur; 3602 if ((count > XML_MAX_NAME_LENGTH) && 3603 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3604 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3605 return(NULL); 3606 } 3607 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3608 ctxt->input->cur = in; 3609 ctxt->nbChars += count; 3610 ctxt->input->col += count; 3611 if (ret == NULL) { 3612 xmlErrMemory(ctxt, NULL); 3613 } 3614 return(ret); 3615 } 3616 } 3617 complex: 3618 return(xmlParseNCNameComplex(ctxt)); 3619 } 3620 3621 /** 3622 * xmlParseNameAndCompare: 3623 * @ctxt: an XML parser context 3624 * 3625 * parse an XML name and compares for match 3626 * (specialized for endtag parsing) 3627 * 3628 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3629 * and the name for mismatch 3630 */ 3631 3632 static const xmlChar * 3633 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3634 register const xmlChar *cmp = other; 3635 register const xmlChar *in; 3636 const xmlChar *ret; 3637 3638 GROW; 3639 if (ctxt->instate == XML_PARSER_EOF) 3640 return(NULL); 3641 3642 in = ctxt->input->cur; 3643 while (*in != 0 && *in == *cmp) { 3644 ++in; 3645 ++cmp; 3646 ctxt->input->col++; 3647 } 3648 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3649 /* success */ 3650 ctxt->input->cur = in; 3651 return (const xmlChar*) 1; 3652 } 3653 /* failure (or end of input buffer), check with full function */ 3654 ret = xmlParseName (ctxt); 3655 /* strings coming from the dictionary direct compare possible */ 3656 if (ret == other) { 3657 return (const xmlChar*) 1; 3658 } 3659 return ret; 3660 } 3661 3662 /** 3663 * xmlParseStringName: 3664 * @ctxt: an XML parser context 3665 * @str: a pointer to the string pointer (IN/OUT) 3666 * 3667 * parse an XML name. 3668 * 3669 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3670 * CombiningChar | Extender 3671 * 3672 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3673 * 3674 * [6] Names ::= Name (#x20 Name)* 3675 * 3676 * Returns the Name parsed or NULL. The @str pointer 3677 * is updated to the current location in the string. 3678 */ 3679 3680 static xmlChar * 3681 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3682 xmlChar buf[XML_MAX_NAMELEN + 5]; 3683 const xmlChar *cur = *str; 3684 int len = 0, l; 3685 int c; 3686 3687 #ifdef DEBUG 3688 nbParseStringName++; 3689 #endif 3690 3691 c = CUR_SCHAR(cur, l); 3692 if (!xmlIsNameStartChar(ctxt, c)) { 3693 return(NULL); 3694 } 3695 3696 COPY_BUF(l,buf,len,c); 3697 cur += l; 3698 c = CUR_SCHAR(cur, l); 3699 while (xmlIsNameChar(ctxt, c)) { 3700 COPY_BUF(l,buf,len,c); 3701 cur += l; 3702 c = CUR_SCHAR(cur, l); 3703 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3704 /* 3705 * Okay someone managed to make a huge name, so he's ready to pay 3706 * for the processing speed. 3707 */ 3708 xmlChar *buffer; 3709 int max = len * 2; 3710 3711 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3712 if (buffer == NULL) { 3713 xmlErrMemory(ctxt, NULL); 3714 return(NULL); 3715 } 3716 memcpy(buffer, buf, len); 3717 while (xmlIsNameChar(ctxt, c)) { 3718 if (len + 10 > max) { 3719 xmlChar *tmp; 3720 3721 if ((len > XML_MAX_NAME_LENGTH) && 3722 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3723 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3724 xmlFree(buffer); 3725 return(NULL); 3726 } 3727 max *= 2; 3728 tmp = (xmlChar *) xmlRealloc(buffer, 3729 max * sizeof(xmlChar)); 3730 if (tmp == NULL) { 3731 xmlErrMemory(ctxt, NULL); 3732 xmlFree(buffer); 3733 return(NULL); 3734 } 3735 buffer = tmp; 3736 } 3737 COPY_BUF(l,buffer,len,c); 3738 cur += l; 3739 c = CUR_SCHAR(cur, l); 3740 } 3741 buffer[len] = 0; 3742 *str = cur; 3743 return(buffer); 3744 } 3745 } 3746 if ((len > XML_MAX_NAME_LENGTH) && 3747 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3748 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3749 return(NULL); 3750 } 3751 *str = cur; 3752 return(xmlStrndup(buf, len)); 3753 } 3754 3755 /** 3756 * xmlParseNmtoken: 3757 * @ctxt: an XML parser context 3758 * 3759 * parse an XML Nmtoken. 3760 * 3761 * [7] Nmtoken ::= (NameChar)+ 3762 * 3763 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3764 * 3765 * Returns the Nmtoken parsed or NULL 3766 */ 3767 3768 xmlChar * 3769 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3770 xmlChar buf[XML_MAX_NAMELEN + 5]; 3771 int len = 0, l; 3772 int c; 3773 int count = 0; 3774 3775 #ifdef DEBUG 3776 nbParseNmToken++; 3777 #endif 3778 3779 GROW; 3780 if (ctxt->instate == XML_PARSER_EOF) 3781 return(NULL); 3782 c = CUR_CHAR(l); 3783 3784 while (xmlIsNameChar(ctxt, c)) { 3785 if (count++ > XML_PARSER_CHUNK_SIZE) { 3786 count = 0; 3787 GROW; 3788 } 3789 COPY_BUF(l,buf,len,c); 3790 NEXTL(l); 3791 c = CUR_CHAR(l); 3792 if (c == 0) { 3793 count = 0; 3794 GROW; 3795 if (ctxt->instate == XML_PARSER_EOF) 3796 return(NULL); 3797 c = CUR_CHAR(l); 3798 } 3799 if (len >= XML_MAX_NAMELEN) { 3800 /* 3801 * Okay someone managed to make a huge token, so he's ready to pay 3802 * for the processing speed. 3803 */ 3804 xmlChar *buffer; 3805 int max = len * 2; 3806 3807 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3808 if (buffer == NULL) { 3809 xmlErrMemory(ctxt, NULL); 3810 return(NULL); 3811 } 3812 memcpy(buffer, buf, len); 3813 while (xmlIsNameChar(ctxt, c)) { 3814 if (count++ > XML_PARSER_CHUNK_SIZE) { 3815 count = 0; 3816 GROW; 3817 if (ctxt->instate == XML_PARSER_EOF) { 3818 xmlFree(buffer); 3819 return(NULL); 3820 } 3821 } 3822 if (len + 10 > max) { 3823 xmlChar *tmp; 3824 3825 if ((max > XML_MAX_NAME_LENGTH) && 3826 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3827 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3828 xmlFree(buffer); 3829 return(NULL); 3830 } 3831 max *= 2; 3832 tmp = (xmlChar *) xmlRealloc(buffer, 3833 max * sizeof(xmlChar)); 3834 if (tmp == NULL) { 3835 xmlErrMemory(ctxt, NULL); 3836 xmlFree(buffer); 3837 return(NULL); 3838 } 3839 buffer = tmp; 3840 } 3841 COPY_BUF(l,buffer,len,c); 3842 NEXTL(l); 3843 c = CUR_CHAR(l); 3844 } 3845 buffer[len] = 0; 3846 return(buffer); 3847 } 3848 } 3849 if (len == 0) 3850 return(NULL); 3851 if ((len > XML_MAX_NAME_LENGTH) && 3852 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3853 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3854 return(NULL); 3855 } 3856 return(xmlStrndup(buf, len)); 3857 } 3858 3859 /** 3860 * xmlParseEntityValue: 3861 * @ctxt: an XML parser context 3862 * @orig: if non-NULL store a copy of the original entity value 3863 * 3864 * parse a value for ENTITY declarations 3865 * 3866 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3867 * "'" ([^%&'] | PEReference | Reference)* "'" 3868 * 3869 * Returns the EntityValue parsed with reference substituted or NULL 3870 */ 3871 3872 xmlChar * 3873 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3874 xmlChar *buf = NULL; 3875 int len = 0; 3876 int size = XML_PARSER_BUFFER_SIZE; 3877 int c, l; 3878 xmlChar stop; 3879 xmlChar *ret = NULL; 3880 const xmlChar *cur = NULL; 3881 xmlParserInputPtr input; 3882 3883 if (RAW == '"') stop = '"'; 3884 else if (RAW == '\'') stop = '\''; 3885 else { 3886 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3887 return(NULL); 3888 } 3889 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3890 if (buf == NULL) { 3891 xmlErrMemory(ctxt, NULL); 3892 return(NULL); 3893 } 3894 3895 /* 3896 * The content of the entity definition is copied in a buffer. 3897 */ 3898 3899 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3900 input = ctxt->input; 3901 GROW; 3902 if (ctxt->instate == XML_PARSER_EOF) { 3903 xmlFree(buf); 3904 return(NULL); 3905 } 3906 NEXT; 3907 c = CUR_CHAR(l); 3908 /* 3909 * NOTE: 4.4.5 Included in Literal 3910 * When a parameter entity reference appears in a literal entity 3911 * value, ... a single or double quote character in the replacement 3912 * text is always treated as a normal data character and will not 3913 * terminate the literal. 3914 * In practice it means we stop the loop only when back at parsing 3915 * the initial entity and the quote is found 3916 */ 3917 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3918 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3919 if (len + 5 >= size) { 3920 xmlChar *tmp; 3921 3922 size *= 2; 3923 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3924 if (tmp == NULL) { 3925 xmlErrMemory(ctxt, NULL); 3926 xmlFree(buf); 3927 return(NULL); 3928 } 3929 buf = tmp; 3930 } 3931 COPY_BUF(l,buf,len,c); 3932 NEXTL(l); 3933 /* 3934 * Pop-up of finished entities. 3935 */ 3936 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3937 xmlPopInput(ctxt); 3938 3939 GROW; 3940 c = CUR_CHAR(l); 3941 if (c == 0) { 3942 GROW; 3943 c = CUR_CHAR(l); 3944 } 3945 } 3946 buf[len] = 0; 3947 if (ctxt->instate == XML_PARSER_EOF) { 3948 xmlFree(buf); 3949 return(NULL); 3950 } 3951 3952 /* 3953 * Raise problem w.r.t. '&' and '%' being used in non-entities 3954 * reference constructs. Note Charref will be handled in 3955 * xmlStringDecodeEntities() 3956 */ 3957 cur = buf; 3958 while (*cur != 0) { /* non input consuming */ 3959 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3960 xmlChar *name; 3961 xmlChar tmp = *cur; 3962 3963 cur++; 3964 name = xmlParseStringName(ctxt, &cur); 3965 if ((name == NULL) || (*cur != ';')) { 3966 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3967 "EntityValue: '%c' forbidden except for entities references\n", 3968 tmp); 3969 } 3970 if ((tmp == '%') && (ctxt->inSubset == 1) && 3971 (ctxt->inputNr == 1)) { 3972 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3973 } 3974 if (name != NULL) 3975 xmlFree(name); 3976 if (*cur == 0) 3977 break; 3978 } 3979 cur++; 3980 } 3981 3982 /* 3983 * Then PEReference entities are substituted. 3984 */ 3985 if (c != stop) { 3986 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3987 xmlFree(buf); 3988 } else { 3989 NEXT; 3990 /* 3991 * NOTE: 4.4.7 Bypassed 3992 * When a general entity reference appears in the EntityValue in 3993 * an entity declaration, it is bypassed and left as is. 3994 * so XML_SUBSTITUTE_REF is not set here. 3995 */ 3996 ++ctxt->depth; 3997 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3998 0, 0, 0); 3999 --ctxt->depth; 4000 if (orig != NULL) 4001 *orig = buf; 4002 else 4003 xmlFree(buf); 4004 } 4005 4006 return(ret); 4007 } 4008 4009 /** 4010 * xmlParseAttValueComplex: 4011 * @ctxt: an XML parser context 4012 * @len: the resulting attribute len 4013 * @normalize: wether to apply the inner normalization 4014 * 4015 * parse a value for an attribute, this is the fallback function 4016 * of xmlParseAttValue() when the attribute parsing requires handling 4017 * of non-ASCII characters, or normalization compaction. 4018 * 4019 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4020 */ 4021 static xmlChar * 4022 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 4023 xmlChar limit = 0; 4024 xmlChar *buf = NULL; 4025 xmlChar *rep = NULL; 4026 size_t len = 0; 4027 size_t buf_size = 0; 4028 int c, l, in_space = 0; 4029 xmlChar *current = NULL; 4030 xmlEntityPtr ent; 4031 4032 if (NXT(0) == '"') { 4033 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4034 limit = '"'; 4035 NEXT; 4036 } else if (NXT(0) == '\'') { 4037 limit = '\''; 4038 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4039 NEXT; 4040 } else { 4041 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 4042 return(NULL); 4043 } 4044 4045 /* 4046 * allocate a translation buffer. 4047 */ 4048 buf_size = XML_PARSER_BUFFER_SIZE; 4049 buf = (xmlChar *) xmlMallocAtomic(buf_size); 4050 if (buf == NULL) goto mem_error; 4051 4052 /* 4053 * OK loop until we reach one of the ending char or a size limit. 4054 */ 4055 c = CUR_CHAR(l); 4056 while (((NXT(0) != limit) && /* checked */ 4057 (IS_CHAR(c)) && (c != '<')) && 4058 (ctxt->instate != XML_PARSER_EOF)) { 4059 /* 4060 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 4061 * special option is given 4062 */ 4063 if ((len > XML_MAX_TEXT_LENGTH) && 4064 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4066 "AttValue length too long\n"); 4067 goto mem_error; 4068 } 4069 if (c == 0) break; 4070 if (c == '&') { 4071 in_space = 0; 4072 if (NXT(1) == '#') { 4073 int val = xmlParseCharRef(ctxt); 4074 4075 if (val == '&') { 4076 if (ctxt->replaceEntities) { 4077 if (len + 10 > buf_size) { 4078 growBuffer(buf, 10); 4079 } 4080 buf[len++] = '&'; 4081 } else { 4082 /* 4083 * The reparsing will be done in xmlStringGetNodeList() 4084 * called by the attribute() function in SAX.c 4085 */ 4086 if (len + 10 > buf_size) { 4087 growBuffer(buf, 10); 4088 } 4089 buf[len++] = '&'; 4090 buf[len++] = '#'; 4091 buf[len++] = '3'; 4092 buf[len++] = '8'; 4093 buf[len++] = ';'; 4094 } 4095 } else if (val != 0) { 4096 if (len + 10 > buf_size) { 4097 growBuffer(buf, 10); 4098 } 4099 len += xmlCopyChar(0, &buf[len], val); 4100 } 4101 } else { 4102 ent = xmlParseEntityRef(ctxt); 4103 ctxt->nbentities++; 4104 if (ent != NULL) 4105 ctxt->nbentities += ent->owner; 4106 if ((ent != NULL) && 4107 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4108 if (len + 10 > buf_size) { 4109 growBuffer(buf, 10); 4110 } 4111 if ((ctxt->replaceEntities == 0) && 4112 (ent->content[0] == '&')) { 4113 buf[len++] = '&'; 4114 buf[len++] = '#'; 4115 buf[len++] = '3'; 4116 buf[len++] = '8'; 4117 buf[len++] = ';'; 4118 } else { 4119 buf[len++] = ent->content[0]; 4120 } 4121 } else if ((ent != NULL) && 4122 (ctxt->replaceEntities != 0)) { 4123 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4124 ++ctxt->depth; 4125 rep = xmlStringDecodeEntities(ctxt, ent->content, 4126 XML_SUBSTITUTE_REF, 4127 0, 0, 0); 4128 --ctxt->depth; 4129 if (rep != NULL) { 4130 current = rep; 4131 while (*current != 0) { /* non input consuming */ 4132 if ((*current == 0xD) || (*current == 0xA) || 4133 (*current == 0x9)) { 4134 buf[len++] = 0x20; 4135 current++; 4136 } else 4137 buf[len++] = *current++; 4138 if (len + 10 > buf_size) { 4139 growBuffer(buf, 10); 4140 } 4141 } 4142 xmlFree(rep); 4143 rep = NULL; 4144 } 4145 } else { 4146 if (len + 10 > buf_size) { 4147 growBuffer(buf, 10); 4148 } 4149 if (ent->content != NULL) 4150 buf[len++] = ent->content[0]; 4151 } 4152 } else if (ent != NULL) { 4153 int i = xmlStrlen(ent->name); 4154 const xmlChar *cur = ent->name; 4155 4156 /* 4157 * This may look absurd but is needed to detect 4158 * entities problems 4159 */ 4160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4161 (ent->content != NULL) && (ent->checked == 0)) { 4162 unsigned long oldnbent = ctxt->nbentities; 4163 4164 ++ctxt->depth; 4165 rep = xmlStringDecodeEntities(ctxt, ent->content, 4166 XML_SUBSTITUTE_REF, 0, 0, 0); 4167 --ctxt->depth; 4168 4169 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4170 if (rep != NULL) { 4171 if (xmlStrchr(rep, '<')) 4172 ent->checked |= 1; 4173 xmlFree(rep); 4174 rep = NULL; 4175 } 4176 } 4177 4178 /* 4179 * Just output the reference 4180 */ 4181 buf[len++] = '&'; 4182 while (len + i + 10 > buf_size) { 4183 growBuffer(buf, i + 10); 4184 } 4185 for (;i > 0;i--) 4186 buf[len++] = *cur++; 4187 buf[len++] = ';'; 4188 } 4189 } 4190 } else { 4191 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4192 if ((len != 0) || (!normalize)) { 4193 if ((!normalize) || (!in_space)) { 4194 COPY_BUF(l,buf,len,0x20); 4195 while (len + 10 > buf_size) { 4196 growBuffer(buf, 10); 4197 } 4198 } 4199 in_space = 1; 4200 } 4201 } else { 4202 in_space = 0; 4203 COPY_BUF(l,buf,len,c); 4204 if (len + 10 > buf_size) { 4205 growBuffer(buf, 10); 4206 } 4207 } 4208 NEXTL(l); 4209 } 4210 GROW; 4211 c = CUR_CHAR(l); 4212 } 4213 if (ctxt->instate == XML_PARSER_EOF) 4214 goto error; 4215 4216 if ((in_space) && (normalize)) { 4217 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4218 } 4219 buf[len] = 0; 4220 if (RAW == '<') { 4221 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4222 } else if (RAW != limit) { 4223 if ((c != 0) && (!IS_CHAR(c))) { 4224 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4225 "invalid character in attribute value\n"); 4226 } else { 4227 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4228 "AttValue: ' expected\n"); 4229 } 4230 } else 4231 NEXT; 4232 4233 /* 4234 * There we potentially risk an overflow, don't allow attribute value of 4235 * length more than INT_MAX it is a very reasonnable assumption ! 4236 */ 4237 if (len >= INT_MAX) { 4238 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4239 "AttValue length too long\n"); 4240 goto mem_error; 4241 } 4242 4243 if (attlen != NULL) *attlen = (int) len; 4244 return(buf); 4245 4246 mem_error: 4247 xmlErrMemory(ctxt, NULL); 4248 error: 4249 if (buf != NULL) 4250 xmlFree(buf); 4251 if (rep != NULL) 4252 xmlFree(rep); 4253 return(NULL); 4254 } 4255 4256 /** 4257 * xmlParseAttValue: 4258 * @ctxt: an XML parser context 4259 * 4260 * parse a value for an attribute 4261 * Note: the parser won't do substitution of entities here, this 4262 * will be handled later in xmlStringGetNodeList 4263 * 4264 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4265 * "'" ([^<&'] | Reference)* "'" 4266 * 4267 * 3.3.3 Attribute-Value Normalization: 4268 * Before the value of an attribute is passed to the application or 4269 * checked for validity, the XML processor must normalize it as follows: 4270 * - a character reference is processed by appending the referenced 4271 * character to the attribute value 4272 * - an entity reference is processed by recursively processing the 4273 * replacement text of the entity 4274 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4275 * appending #x20 to the normalized value, except that only a single 4276 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4277 * parsed entity or the literal entity value of an internal parsed entity 4278 * - other characters are processed by appending them to the normalized value 4279 * If the declared value is not CDATA, then the XML processor must further 4280 * process the normalized attribute value by discarding any leading and 4281 * trailing space (#x20) characters, and by replacing sequences of space 4282 * (#x20) characters by a single space (#x20) character. 4283 * All attributes for which no declaration has been read should be treated 4284 * by a non-validating parser as if declared CDATA. 4285 * 4286 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4287 */ 4288 4289 4290 xmlChar * 4291 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4292 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4293 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4294 } 4295 4296 /** 4297 * xmlParseSystemLiteral: 4298 * @ctxt: an XML parser context 4299 * 4300 * parse an XML Literal 4301 * 4302 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4303 * 4304 * Returns the SystemLiteral parsed or NULL 4305 */ 4306 4307 xmlChar * 4308 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4309 xmlChar *buf = NULL; 4310 int len = 0; 4311 int size = XML_PARSER_BUFFER_SIZE; 4312 int cur, l; 4313 xmlChar stop; 4314 int state = ctxt->instate; 4315 int count = 0; 4316 4317 SHRINK; 4318 if (RAW == '"') { 4319 NEXT; 4320 stop = '"'; 4321 } else if (RAW == '\'') { 4322 NEXT; 4323 stop = '\''; 4324 } else { 4325 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4326 return(NULL); 4327 } 4328 4329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4330 if (buf == NULL) { 4331 xmlErrMemory(ctxt, NULL); 4332 return(NULL); 4333 } 4334 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4335 cur = CUR_CHAR(l); 4336 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4337 if (len + 5 >= size) { 4338 xmlChar *tmp; 4339 4340 if ((size > XML_MAX_NAME_LENGTH) && 4341 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4343 xmlFree(buf); 4344 ctxt->instate = (xmlParserInputState) state; 4345 return(NULL); 4346 } 4347 size *= 2; 4348 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4349 if (tmp == NULL) { 4350 xmlFree(buf); 4351 xmlErrMemory(ctxt, NULL); 4352 ctxt->instate = (xmlParserInputState) state; 4353 return(NULL); 4354 } 4355 buf = tmp; 4356 } 4357 count++; 4358 if (count > 50) { 4359 GROW; 4360 count = 0; 4361 if (ctxt->instate == XML_PARSER_EOF) { 4362 xmlFree(buf); 4363 return(NULL); 4364 } 4365 } 4366 COPY_BUF(l,buf,len,cur); 4367 NEXTL(l); 4368 cur = CUR_CHAR(l); 4369 if (cur == 0) { 4370 GROW; 4371 SHRINK; 4372 cur = CUR_CHAR(l); 4373 } 4374 } 4375 buf[len] = 0; 4376 ctxt->instate = (xmlParserInputState) state; 4377 if (!IS_CHAR(cur)) { 4378 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4379 } else { 4380 NEXT; 4381 } 4382 return(buf); 4383 } 4384 4385 /** 4386 * xmlParsePubidLiteral: 4387 * @ctxt: an XML parser context 4388 * 4389 * parse an XML public literal 4390 * 4391 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4392 * 4393 * Returns the PubidLiteral parsed or NULL. 4394 */ 4395 4396 xmlChar * 4397 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4398 xmlChar *buf = NULL; 4399 int len = 0; 4400 int size = XML_PARSER_BUFFER_SIZE; 4401 xmlChar cur; 4402 xmlChar stop; 4403 int count = 0; 4404 xmlParserInputState oldstate = ctxt->instate; 4405 4406 SHRINK; 4407 if (RAW == '"') { 4408 NEXT; 4409 stop = '"'; 4410 } else if (RAW == '\'') { 4411 NEXT; 4412 stop = '\''; 4413 } else { 4414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4415 return(NULL); 4416 } 4417 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4418 if (buf == NULL) { 4419 xmlErrMemory(ctxt, NULL); 4420 return(NULL); 4421 } 4422 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4423 cur = CUR; 4424 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4425 if (len + 1 >= size) { 4426 xmlChar *tmp; 4427 4428 if ((size > XML_MAX_NAME_LENGTH) && 4429 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4430 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4431 xmlFree(buf); 4432 return(NULL); 4433 } 4434 size *= 2; 4435 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4436 if (tmp == NULL) { 4437 xmlErrMemory(ctxt, NULL); 4438 xmlFree(buf); 4439 return(NULL); 4440 } 4441 buf = tmp; 4442 } 4443 buf[len++] = cur; 4444 count++; 4445 if (count > 50) { 4446 GROW; 4447 count = 0; 4448 if (ctxt->instate == XML_PARSER_EOF) { 4449 xmlFree(buf); 4450 return(NULL); 4451 } 4452 } 4453 NEXT; 4454 cur = CUR; 4455 if (cur == 0) { 4456 GROW; 4457 SHRINK; 4458 cur = CUR; 4459 } 4460 } 4461 buf[len] = 0; 4462 if (cur != stop) { 4463 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4464 } else { 4465 NEXT; 4466 } 4467 ctxt->instate = oldstate; 4468 return(buf); 4469 } 4470 4471 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4472 4473 /* 4474 * used for the test in the inner loop of the char data testing 4475 */ 4476 static const unsigned char test_char_data[256] = { 4477 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4478 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4481 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4482 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4483 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4484 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4485 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4486 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4487 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4488 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4489 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4490 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4491 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4492 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4509 }; 4510 4511 /** 4512 * xmlParseCharData: 4513 * @ctxt: an XML parser context 4514 * @cdata: int indicating whether we are within a CDATA section 4515 * 4516 * parse a CharData section. 4517 * if we are within a CDATA section ']]>' marks an end of section. 4518 * 4519 * The right angle bracket (>) may be represented using the string ">", 4520 * and must, for compatibility, be escaped using ">" or a character 4521 * reference when it appears in the string "]]>" in content, when that 4522 * string is not marking the end of a CDATA section. 4523 * 4524 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4525 */ 4526 4527 void 4528 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4529 const xmlChar *in; 4530 int nbchar = 0; 4531 int line = ctxt->input->line; 4532 int col = ctxt->input->col; 4533 int ccol; 4534 4535 SHRINK; 4536 GROW; 4537 /* 4538 * Accelerated common case where input don't need to be 4539 * modified before passing it to the handler. 4540 */ 4541 if (!cdata) { 4542 in = ctxt->input->cur; 4543 do { 4544 get_more_space: 4545 while (*in == 0x20) { in++; ctxt->input->col++; } 4546 if (*in == 0xA) { 4547 do { 4548 ctxt->input->line++; ctxt->input->col = 1; 4549 in++; 4550 } while (*in == 0xA); 4551 goto get_more_space; 4552 } 4553 if (*in == '<') { 4554 nbchar = in - ctxt->input->cur; 4555 if (nbchar > 0) { 4556 const xmlChar *tmp = ctxt->input->cur; 4557 ctxt->input->cur = in; 4558 4559 if ((ctxt->sax != NULL) && 4560 (ctxt->sax->ignorableWhitespace != 4561 ctxt->sax->characters)) { 4562 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4563 if (ctxt->sax->ignorableWhitespace != NULL) 4564 ctxt->sax->ignorableWhitespace(ctxt->userData, 4565 tmp, nbchar); 4566 } else { 4567 if (ctxt->sax->characters != NULL) 4568 ctxt->sax->characters(ctxt->userData, 4569 tmp, nbchar); 4570 if (*ctxt->space == -1) 4571 *ctxt->space = -2; 4572 } 4573 } else if ((ctxt->sax != NULL) && 4574 (ctxt->sax->characters != NULL)) { 4575 ctxt->sax->characters(ctxt->userData, 4576 tmp, nbchar); 4577 } 4578 } 4579 return; 4580 } 4581 4582 get_more: 4583 ccol = ctxt->input->col; 4584 while (test_char_data[*in]) { 4585 in++; 4586 ccol++; 4587 } 4588 ctxt->input->col = ccol; 4589 if (*in == 0xA) { 4590 do { 4591 ctxt->input->line++; ctxt->input->col = 1; 4592 in++; 4593 } while (*in == 0xA); 4594 goto get_more; 4595 } 4596 if (*in == ']') { 4597 if ((in[1] == ']') && (in[2] == '>')) { 4598 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4599 ctxt->input->cur = in; 4600 return; 4601 } 4602 in++; 4603 ctxt->input->col++; 4604 goto get_more; 4605 } 4606 nbchar = in - ctxt->input->cur; 4607 if (nbchar > 0) { 4608 if ((ctxt->sax != NULL) && 4609 (ctxt->sax->ignorableWhitespace != 4610 ctxt->sax->characters) && 4611 (IS_BLANK_CH(*ctxt->input->cur))) { 4612 const xmlChar *tmp = ctxt->input->cur; 4613 ctxt->input->cur = in; 4614 4615 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4616 if (ctxt->sax->ignorableWhitespace != NULL) 4617 ctxt->sax->ignorableWhitespace(ctxt->userData, 4618 tmp, nbchar); 4619 } else { 4620 if (ctxt->sax->characters != NULL) 4621 ctxt->sax->characters(ctxt->userData, 4622 tmp, nbchar); 4623 if (*ctxt->space == -1) 4624 *ctxt->space = -2; 4625 } 4626 line = ctxt->input->line; 4627 col = ctxt->input->col; 4628 } else if (ctxt->sax != NULL) { 4629 if (ctxt->sax->characters != NULL) 4630 ctxt->sax->characters(ctxt->userData, 4631 ctxt->input->cur, nbchar); 4632 line = ctxt->input->line; 4633 col = ctxt->input->col; 4634 } 4635 /* something really bad happened in the SAX callback */ 4636 if (ctxt->instate != XML_PARSER_CONTENT) 4637 return; 4638 } 4639 ctxt->input->cur = in; 4640 if (*in == 0xD) { 4641 in++; 4642 if (*in == 0xA) { 4643 ctxt->input->cur = in; 4644 in++; 4645 ctxt->input->line++; ctxt->input->col = 1; 4646 continue; /* while */ 4647 } 4648 in--; 4649 } 4650 if (*in == '<') { 4651 return; 4652 } 4653 if (*in == '&') { 4654 return; 4655 } 4656 SHRINK; 4657 GROW; 4658 if (ctxt->instate == XML_PARSER_EOF) 4659 return; 4660 in = ctxt->input->cur; 4661 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4662 nbchar = 0; 4663 } 4664 ctxt->input->line = line; 4665 ctxt->input->col = col; 4666 xmlParseCharDataComplex(ctxt, cdata); 4667 } 4668 4669 /** 4670 * xmlParseCharDataComplex: 4671 * @ctxt: an XML parser context 4672 * @cdata: int indicating whether we are within a CDATA section 4673 * 4674 * parse a CharData section.this is the fallback function 4675 * of xmlParseCharData() when the parsing requires handling 4676 * of non-ASCII characters. 4677 */ 4678 static void 4679 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4680 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4681 int nbchar = 0; 4682 int cur, l; 4683 int count = 0; 4684 4685 SHRINK; 4686 GROW; 4687 cur = CUR_CHAR(l); 4688 while ((cur != '<') && /* checked */ 4689 (cur != '&') && 4690 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4691 if ((cur == ']') && (NXT(1) == ']') && 4692 (NXT(2) == '>')) { 4693 if (cdata) break; 4694 else { 4695 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4696 } 4697 } 4698 COPY_BUF(l,buf,nbchar,cur); 4699 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4700 buf[nbchar] = 0; 4701 4702 /* 4703 * OK the segment is to be consumed as chars. 4704 */ 4705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4706 if (areBlanks(ctxt, buf, nbchar, 0)) { 4707 if (ctxt->sax->ignorableWhitespace != NULL) 4708 ctxt->sax->ignorableWhitespace(ctxt->userData, 4709 buf, nbchar); 4710 } else { 4711 if (ctxt->sax->characters != NULL) 4712 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4713 if ((ctxt->sax->characters != 4714 ctxt->sax->ignorableWhitespace) && 4715 (*ctxt->space == -1)) 4716 *ctxt->space = -2; 4717 } 4718 } 4719 nbchar = 0; 4720 /* something really bad happened in the SAX callback */ 4721 if (ctxt->instate != XML_PARSER_CONTENT) 4722 return; 4723 } 4724 count++; 4725 if (count > 50) { 4726 GROW; 4727 count = 0; 4728 if (ctxt->instate == XML_PARSER_EOF) 4729 return; 4730 } 4731 NEXTL(l); 4732 cur = CUR_CHAR(l); 4733 } 4734 if (nbchar != 0) { 4735 buf[nbchar] = 0; 4736 /* 4737 * OK the segment is to be consumed as chars. 4738 */ 4739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4740 if (areBlanks(ctxt, buf, nbchar, 0)) { 4741 if (ctxt->sax->ignorableWhitespace != NULL) 4742 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4743 } else { 4744 if (ctxt->sax->characters != NULL) 4745 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4746 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4747 (*ctxt->space == -1)) 4748 *ctxt->space = -2; 4749 } 4750 } 4751 } 4752 if ((cur != 0) && (!IS_CHAR(cur))) { 4753 /* Generate the error and skip the offending character */ 4754 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4755 "PCDATA invalid Char value %d\n", 4756 cur); 4757 NEXTL(l); 4758 } 4759 } 4760 4761 /** 4762 * xmlParseExternalID: 4763 * @ctxt: an XML parser context 4764 * @publicID: a xmlChar** receiving PubidLiteral 4765 * @strict: indicate whether we should restrict parsing to only 4766 * production [75], see NOTE below 4767 * 4768 * Parse an External ID or a Public ID 4769 * 4770 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4771 * 'PUBLIC' S PubidLiteral S SystemLiteral 4772 * 4773 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4774 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4775 * 4776 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4777 * 4778 * Returns the function returns SystemLiteral and in the second 4779 * case publicID receives PubidLiteral, is strict is off 4780 * it is possible to return NULL and have publicID set. 4781 */ 4782 4783 xmlChar * 4784 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4785 xmlChar *URI = NULL; 4786 4787 SHRINK; 4788 4789 *publicID = NULL; 4790 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4791 SKIP(6); 4792 if (!IS_BLANK_CH(CUR)) { 4793 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4794 "Space required after 'SYSTEM'\n"); 4795 } 4796 SKIP_BLANKS; 4797 URI = xmlParseSystemLiteral(ctxt); 4798 if (URI == NULL) { 4799 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4800 } 4801 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4802 SKIP(6); 4803 if (!IS_BLANK_CH(CUR)) { 4804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4805 "Space required after 'PUBLIC'\n"); 4806 } 4807 SKIP_BLANKS; 4808 *publicID = xmlParsePubidLiteral(ctxt); 4809 if (*publicID == NULL) { 4810 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4811 } 4812 if (strict) { 4813 /* 4814 * We don't handle [83] so "S SystemLiteral" is required. 4815 */ 4816 if (!IS_BLANK_CH(CUR)) { 4817 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4818 "Space required after the Public Identifier\n"); 4819 } 4820 } else { 4821 /* 4822 * We handle [83] so we return immediately, if 4823 * "S SystemLiteral" is not detected. From a purely parsing 4824 * point of view that's a nice mess. 4825 */ 4826 const xmlChar *ptr; 4827 GROW; 4828 4829 ptr = CUR_PTR; 4830 if (!IS_BLANK_CH(*ptr)) return(NULL); 4831 4832 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4833 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4834 } 4835 SKIP_BLANKS; 4836 URI = xmlParseSystemLiteral(ctxt); 4837 if (URI == NULL) { 4838 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4839 } 4840 } 4841 return(URI); 4842 } 4843 4844 /** 4845 * xmlParseCommentComplex: 4846 * @ctxt: an XML parser context 4847 * @buf: the already parsed part of the buffer 4848 * @len: number of bytes filles in the buffer 4849 * @size: allocated size of the buffer 4850 * 4851 * Skip an XML (SGML) comment <!-- .... --> 4852 * The spec says that "For compatibility, the string "--" (double-hyphen) 4853 * must not occur within comments. " 4854 * This is the slow routine in case the accelerator for ascii didn't work 4855 * 4856 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4857 */ 4858 static void 4859 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4860 size_t len, size_t size) { 4861 int q, ql; 4862 int r, rl; 4863 int cur, l; 4864 size_t count = 0; 4865 int inputid; 4866 4867 inputid = ctxt->input->id; 4868 4869 if (buf == NULL) { 4870 len = 0; 4871 size = XML_PARSER_BUFFER_SIZE; 4872 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4873 if (buf == NULL) { 4874 xmlErrMemory(ctxt, NULL); 4875 return; 4876 } 4877 } 4878 GROW; /* Assure there's enough input data */ 4879 q = CUR_CHAR(ql); 4880 if (q == 0) 4881 goto not_terminated; 4882 if (!IS_CHAR(q)) { 4883 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4884 "xmlParseComment: invalid xmlChar value %d\n", 4885 q); 4886 xmlFree (buf); 4887 return; 4888 } 4889 NEXTL(ql); 4890 r = CUR_CHAR(rl); 4891 if (r == 0) 4892 goto not_terminated; 4893 if (!IS_CHAR(r)) { 4894 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4895 "xmlParseComment: invalid xmlChar value %d\n", 4896 q); 4897 xmlFree (buf); 4898 return; 4899 } 4900 NEXTL(rl); 4901 cur = CUR_CHAR(l); 4902 if (cur == 0) 4903 goto not_terminated; 4904 while (IS_CHAR(cur) && /* checked */ 4905 ((cur != '>') || 4906 (r != '-') || (q != '-'))) { 4907 if ((r == '-') && (q == '-')) { 4908 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4909 } 4910 if ((len > XML_MAX_TEXT_LENGTH) && 4911 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4912 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4913 "Comment too big found", NULL); 4914 xmlFree (buf); 4915 return; 4916 } 4917 if (len + 5 >= size) { 4918 xmlChar *new_buf; 4919 size_t new_size; 4920 4921 new_size = size * 2; 4922 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4923 if (new_buf == NULL) { 4924 xmlFree (buf); 4925 xmlErrMemory(ctxt, NULL); 4926 return; 4927 } 4928 buf = new_buf; 4929 size = new_size; 4930 } 4931 COPY_BUF(ql,buf,len,q); 4932 q = r; 4933 ql = rl; 4934 r = cur; 4935 rl = l; 4936 4937 count++; 4938 if (count > 50) { 4939 GROW; 4940 count = 0; 4941 if (ctxt->instate == XML_PARSER_EOF) { 4942 xmlFree(buf); 4943 return; 4944 } 4945 } 4946 NEXTL(l); 4947 cur = CUR_CHAR(l); 4948 if (cur == 0) { 4949 SHRINK; 4950 GROW; 4951 cur = CUR_CHAR(l); 4952 } 4953 } 4954 buf[len] = 0; 4955 if (cur == 0) { 4956 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4957 "Comment not terminated \n<!--%.50s\n", buf); 4958 } else if (!IS_CHAR(cur)) { 4959 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4960 "xmlParseComment: invalid xmlChar value %d\n", 4961 cur); 4962 } else { 4963 if (inputid != ctxt->input->id) { 4964 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4965 "Comment doesn't start and stop in the same entity\n"); 4966 } 4967 NEXT; 4968 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4969 (!ctxt->disableSAX)) 4970 ctxt->sax->comment(ctxt->userData, buf); 4971 } 4972 xmlFree(buf); 4973 return; 4974 not_terminated: 4975 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4976 "Comment not terminated\n", NULL); 4977 xmlFree(buf); 4978 return; 4979 } 4980 4981 /** 4982 * xmlParseComment: 4983 * @ctxt: an XML parser context 4984 * 4985 * Skip an XML (SGML) comment <!-- .... --> 4986 * The spec says that "For compatibility, the string "--" (double-hyphen) 4987 * must not occur within comments. " 4988 * 4989 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4990 */ 4991 void 4992 xmlParseComment(xmlParserCtxtPtr ctxt) { 4993 xmlChar *buf = NULL; 4994 size_t size = XML_PARSER_BUFFER_SIZE; 4995 size_t len = 0; 4996 xmlParserInputState state; 4997 const xmlChar *in; 4998 size_t nbchar = 0; 4999 int ccol; 5000 int inputid; 5001 5002 /* 5003 * Check that there is a comment right here. 5004 */ 5005 if ((RAW != '<') || (NXT(1) != '!') || 5006 (NXT(2) != '-') || (NXT(3) != '-')) return; 5007 state = ctxt->instate; 5008 ctxt->instate = XML_PARSER_COMMENT; 5009 inputid = ctxt->input->id; 5010 SKIP(4); 5011 SHRINK; 5012 GROW; 5013 5014 /* 5015 * Accelerated common case where input don't need to be 5016 * modified before passing it to the handler. 5017 */ 5018 in = ctxt->input->cur; 5019 do { 5020 if (*in == 0xA) { 5021 do { 5022 ctxt->input->line++; ctxt->input->col = 1; 5023 in++; 5024 } while (*in == 0xA); 5025 } 5026 get_more: 5027 ccol = ctxt->input->col; 5028 while (((*in > '-') && (*in <= 0x7F)) || 5029 ((*in >= 0x20) && (*in < '-')) || 5030 (*in == 0x09)) { 5031 in++; 5032 ccol++; 5033 } 5034 ctxt->input->col = ccol; 5035 if (*in == 0xA) { 5036 do { 5037 ctxt->input->line++; ctxt->input->col = 1; 5038 in++; 5039 } while (*in == 0xA); 5040 goto get_more; 5041 } 5042 nbchar = in - ctxt->input->cur; 5043 /* 5044 * save current set of data 5045 */ 5046 if (nbchar > 0) { 5047 if ((ctxt->sax != NULL) && 5048 (ctxt->sax->comment != NULL)) { 5049 if (buf == NULL) { 5050 if ((*in == '-') && (in[1] == '-')) 5051 size = nbchar + 1; 5052 else 5053 size = XML_PARSER_BUFFER_SIZE + nbchar; 5054 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5055 if (buf == NULL) { 5056 xmlErrMemory(ctxt, NULL); 5057 ctxt->instate = state; 5058 return; 5059 } 5060 len = 0; 5061 } else if (len + nbchar + 1 >= size) { 5062 xmlChar *new_buf; 5063 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5064 new_buf = (xmlChar *) xmlRealloc(buf, 5065 size * sizeof(xmlChar)); 5066 if (new_buf == NULL) { 5067 xmlFree (buf); 5068 xmlErrMemory(ctxt, NULL); 5069 ctxt->instate = state; 5070 return; 5071 } 5072 buf = new_buf; 5073 } 5074 memcpy(&buf[len], ctxt->input->cur, nbchar); 5075 len += nbchar; 5076 buf[len] = 0; 5077 } 5078 } 5079 if ((len > XML_MAX_TEXT_LENGTH) && 5080 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5081 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5082 "Comment too big found", NULL); 5083 xmlFree (buf); 5084 return; 5085 } 5086 ctxt->input->cur = in; 5087 if (*in == 0xA) { 5088 in++; 5089 ctxt->input->line++; ctxt->input->col = 1; 5090 } 5091 if (*in == 0xD) { 5092 in++; 5093 if (*in == 0xA) { 5094 ctxt->input->cur = in; 5095 in++; 5096 ctxt->input->line++; ctxt->input->col = 1; 5097 continue; /* while */ 5098 } 5099 in--; 5100 } 5101 SHRINK; 5102 GROW; 5103 if (ctxt->instate == XML_PARSER_EOF) { 5104 xmlFree(buf); 5105 return; 5106 } 5107 in = ctxt->input->cur; 5108 if (*in == '-') { 5109 if (in[1] == '-') { 5110 if (in[2] == '>') { 5111 if (ctxt->input->id != inputid) { 5112 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5113 "comment doesn't start and stop in the same entity\n"); 5114 } 5115 SKIP(3); 5116 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5117 (!ctxt->disableSAX)) { 5118 if (buf != NULL) 5119 ctxt->sax->comment(ctxt->userData, buf); 5120 else 5121 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5122 } 5123 if (buf != NULL) 5124 xmlFree(buf); 5125 if (ctxt->instate != XML_PARSER_EOF) 5126 ctxt->instate = state; 5127 return; 5128 } 5129 if (buf != NULL) { 5130 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5131 "Double hyphen within comment: " 5132 "<!--%.50s\n", 5133 buf); 5134 } else 5135 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5136 "Double hyphen within comment\n", NULL); 5137 in++; 5138 ctxt->input->col++; 5139 } 5140 in++; 5141 ctxt->input->col++; 5142 goto get_more; 5143 } 5144 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5145 xmlParseCommentComplex(ctxt, buf, len, size); 5146 ctxt->instate = state; 5147 return; 5148 } 5149 5150 5151 /** 5152 * xmlParsePITarget: 5153 * @ctxt: an XML parser context 5154 * 5155 * parse the name of a PI 5156 * 5157 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5158 * 5159 * Returns the PITarget name or NULL 5160 */ 5161 5162 const xmlChar * 5163 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5164 const xmlChar *name; 5165 5166 name = xmlParseName(ctxt); 5167 if ((name != NULL) && 5168 ((name[0] == 'x') || (name[0] == 'X')) && 5169 ((name[1] == 'm') || (name[1] == 'M')) && 5170 ((name[2] == 'l') || (name[2] == 'L'))) { 5171 int i; 5172 if ((name[0] == 'x') && (name[1] == 'm') && 5173 (name[2] == 'l') && (name[3] == 0)) { 5174 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5175 "XML declaration allowed only at the start of the document\n"); 5176 return(name); 5177 } else if (name[3] == 0) { 5178 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5179 return(name); 5180 } 5181 for (i = 0;;i++) { 5182 if (xmlW3CPIs[i] == NULL) break; 5183 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5184 return(name); 5185 } 5186 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5187 "xmlParsePITarget: invalid name prefix 'xml'\n", 5188 NULL, NULL); 5189 } 5190 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5191 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5192 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5193 } 5194 return(name); 5195 } 5196 5197 #ifdef LIBXML_CATALOG_ENABLED 5198 /** 5199 * xmlParseCatalogPI: 5200 * @ctxt: an XML parser context 5201 * @catalog: the PI value string 5202 * 5203 * parse an XML Catalog Processing Instruction. 5204 * 5205 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5206 * 5207 * Occurs only if allowed by the user and if happening in the Misc 5208 * part of the document before any doctype informations 5209 * This will add the given catalog to the parsing context in order 5210 * to be used if there is a resolution need further down in the document 5211 */ 5212 5213 static void 5214 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5215 xmlChar *URL = NULL; 5216 const xmlChar *tmp, *base; 5217 xmlChar marker; 5218 5219 tmp = catalog; 5220 while (IS_BLANK_CH(*tmp)) tmp++; 5221 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5222 goto error; 5223 tmp += 7; 5224 while (IS_BLANK_CH(*tmp)) tmp++; 5225 if (*tmp != '=') { 5226 return; 5227 } 5228 tmp++; 5229 while (IS_BLANK_CH(*tmp)) tmp++; 5230 marker = *tmp; 5231 if ((marker != '\'') && (marker != '"')) 5232 goto error; 5233 tmp++; 5234 base = tmp; 5235 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5236 if (*tmp == 0) 5237 goto error; 5238 URL = xmlStrndup(base, tmp - base); 5239 tmp++; 5240 while (IS_BLANK_CH(*tmp)) tmp++; 5241 if (*tmp != 0) 5242 goto error; 5243 5244 if (URL != NULL) { 5245 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5246 xmlFree(URL); 5247 } 5248 return; 5249 5250 error: 5251 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5252 "Catalog PI syntax error: %s\n", 5253 catalog, NULL); 5254 if (URL != NULL) 5255 xmlFree(URL); 5256 } 5257 #endif 5258 5259 /** 5260 * xmlParsePI: 5261 * @ctxt: an XML parser context 5262 * 5263 * parse an XML Processing Instruction. 5264 * 5265 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5266 * 5267 * The processing is transfered to SAX once parsed. 5268 */ 5269 5270 void 5271 xmlParsePI(xmlParserCtxtPtr ctxt) { 5272 xmlChar *buf = NULL; 5273 size_t len = 0; 5274 size_t size = XML_PARSER_BUFFER_SIZE; 5275 int cur, l; 5276 const xmlChar *target; 5277 xmlParserInputState state; 5278 int count = 0; 5279 5280 if ((RAW == '<') && (NXT(1) == '?')) { 5281 xmlParserInputPtr input = ctxt->input; 5282 state = ctxt->instate; 5283 ctxt->instate = XML_PARSER_PI; 5284 /* 5285 * this is a Processing Instruction. 5286 */ 5287 SKIP(2); 5288 SHRINK; 5289 5290 /* 5291 * Parse the target name and check for special support like 5292 * namespace. 5293 */ 5294 target = xmlParsePITarget(ctxt); 5295 if (target != NULL) { 5296 if ((RAW == '?') && (NXT(1) == '>')) { 5297 if (input != ctxt->input) { 5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5299 "PI declaration doesn't start and stop in the same entity\n"); 5300 } 5301 SKIP(2); 5302 5303 /* 5304 * SAX: PI detected. 5305 */ 5306 if ((ctxt->sax) && (!ctxt->disableSAX) && 5307 (ctxt->sax->processingInstruction != NULL)) 5308 ctxt->sax->processingInstruction(ctxt->userData, 5309 target, NULL); 5310 if (ctxt->instate != XML_PARSER_EOF) 5311 ctxt->instate = state; 5312 return; 5313 } 5314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5315 if (buf == NULL) { 5316 xmlErrMemory(ctxt, NULL); 5317 ctxt->instate = state; 5318 return; 5319 } 5320 cur = CUR; 5321 if (!IS_BLANK(cur)) { 5322 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5323 "ParsePI: PI %s space expected\n", target); 5324 } 5325 SKIP_BLANKS; 5326 cur = CUR_CHAR(l); 5327 while (IS_CHAR(cur) && /* checked */ 5328 ((cur != '?') || (NXT(1) != '>'))) { 5329 if (len + 5 >= size) { 5330 xmlChar *tmp; 5331 size_t new_size = size * 2; 5332 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5333 if (tmp == NULL) { 5334 xmlErrMemory(ctxt, NULL); 5335 xmlFree(buf); 5336 ctxt->instate = state; 5337 return; 5338 } 5339 buf = tmp; 5340 size = new_size; 5341 } 5342 count++; 5343 if (count > 50) { 5344 GROW; 5345 if (ctxt->instate == XML_PARSER_EOF) { 5346 xmlFree(buf); 5347 return; 5348 } 5349 count = 0; 5350 if ((len > XML_MAX_TEXT_LENGTH) && 5351 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5352 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5353 "PI %s too big found", target); 5354 xmlFree(buf); 5355 ctxt->instate = state; 5356 return; 5357 } 5358 } 5359 COPY_BUF(l,buf,len,cur); 5360 NEXTL(l); 5361 cur = CUR_CHAR(l); 5362 if (cur == 0) { 5363 SHRINK; 5364 GROW; 5365 cur = CUR_CHAR(l); 5366 } 5367 } 5368 if ((len > XML_MAX_TEXT_LENGTH) && 5369 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5370 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5371 "PI %s too big found", target); 5372 xmlFree(buf); 5373 ctxt->instate = state; 5374 return; 5375 } 5376 buf[len] = 0; 5377 if (cur != '?') { 5378 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5379 "ParsePI: PI %s never end ...\n", target); 5380 } else { 5381 if (input != ctxt->input) { 5382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5383 "PI declaration doesn't start and stop in the same entity\n"); 5384 } 5385 SKIP(2); 5386 5387 #ifdef LIBXML_CATALOG_ENABLED 5388 if (((state == XML_PARSER_MISC) || 5389 (state == XML_PARSER_START)) && 5390 (xmlStrEqual(target, XML_CATALOG_PI))) { 5391 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5392 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5393 (allow == XML_CATA_ALLOW_ALL)) 5394 xmlParseCatalogPI(ctxt, buf); 5395 } 5396 #endif 5397 5398 5399 /* 5400 * SAX: PI detected. 5401 */ 5402 if ((ctxt->sax) && (!ctxt->disableSAX) && 5403 (ctxt->sax->processingInstruction != NULL)) 5404 ctxt->sax->processingInstruction(ctxt->userData, 5405 target, buf); 5406 } 5407 xmlFree(buf); 5408 } else { 5409 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5410 } 5411 if (ctxt->instate != XML_PARSER_EOF) 5412 ctxt->instate = state; 5413 } 5414 } 5415 5416 /** 5417 * xmlParseNotationDecl: 5418 * @ctxt: an XML parser context 5419 * 5420 * parse a notation declaration 5421 * 5422 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5423 * 5424 * Hence there is actually 3 choices: 5425 * 'PUBLIC' S PubidLiteral 5426 * 'PUBLIC' S PubidLiteral S SystemLiteral 5427 * and 'SYSTEM' S SystemLiteral 5428 * 5429 * See the NOTE on xmlParseExternalID(). 5430 */ 5431 5432 void 5433 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5434 const xmlChar *name; 5435 xmlChar *Pubid; 5436 xmlChar *Systemid; 5437 5438 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5439 xmlParserInputPtr input = ctxt->input; 5440 SHRINK; 5441 SKIP(10); 5442 if (!IS_BLANK_CH(CUR)) { 5443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5444 "Space required after '<!NOTATION'\n"); 5445 return; 5446 } 5447 SKIP_BLANKS; 5448 5449 name = xmlParseName(ctxt); 5450 if (name == NULL) { 5451 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5452 return; 5453 } 5454 if (!IS_BLANK_CH(CUR)) { 5455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5456 "Space required after the NOTATION name'\n"); 5457 return; 5458 } 5459 if (xmlStrchr(name, ':') != NULL) { 5460 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5461 "colons are forbidden from notation names '%s'\n", 5462 name, NULL, NULL); 5463 } 5464 SKIP_BLANKS; 5465 5466 /* 5467 * Parse the IDs. 5468 */ 5469 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5470 SKIP_BLANKS; 5471 5472 if (RAW == '>') { 5473 if (input != ctxt->input) { 5474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5475 "Notation declaration doesn't start and stop in the same entity\n"); 5476 } 5477 NEXT; 5478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5479 (ctxt->sax->notationDecl != NULL)) 5480 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5481 } else { 5482 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5483 } 5484 if (Systemid != NULL) xmlFree(Systemid); 5485 if (Pubid != NULL) xmlFree(Pubid); 5486 } 5487 } 5488 5489 /** 5490 * xmlParseEntityDecl: 5491 * @ctxt: an XML parser context 5492 * 5493 * parse <!ENTITY declarations 5494 * 5495 * [70] EntityDecl ::= GEDecl | PEDecl 5496 * 5497 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5498 * 5499 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5500 * 5501 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5502 * 5503 * [74] PEDef ::= EntityValue | ExternalID 5504 * 5505 * [76] NDataDecl ::= S 'NDATA' S Name 5506 * 5507 * [ VC: Notation Declared ] 5508 * The Name must match the declared name of a notation. 5509 */ 5510 5511 void 5512 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5513 const xmlChar *name = NULL; 5514 xmlChar *value = NULL; 5515 xmlChar *URI = NULL, *literal = NULL; 5516 const xmlChar *ndata = NULL; 5517 int isParameter = 0; 5518 xmlChar *orig = NULL; 5519 int skipped; 5520 5521 /* GROW; done in the caller */ 5522 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5523 xmlParserInputPtr input = ctxt->input; 5524 SHRINK; 5525 SKIP(8); 5526 skipped = SKIP_BLANKS; 5527 if (skipped == 0) { 5528 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5529 "Space required after '<!ENTITY'\n"); 5530 } 5531 5532 if (RAW == '%') { 5533 NEXT; 5534 skipped = SKIP_BLANKS; 5535 if (skipped == 0) { 5536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5537 "Space required after '%%'\n"); 5538 } 5539 isParameter = 1; 5540 } 5541 5542 name = xmlParseName(ctxt); 5543 if (name == NULL) { 5544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5545 "xmlParseEntityDecl: no name\n"); 5546 return; 5547 } 5548 if (xmlStrchr(name, ':') != NULL) { 5549 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5550 "colons are forbidden from entities names '%s'\n", 5551 name, NULL, NULL); 5552 } 5553 skipped = SKIP_BLANKS; 5554 if (skipped == 0) { 5555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5556 "Space required after the entity name\n"); 5557 } 5558 5559 ctxt->instate = XML_PARSER_ENTITY_DECL; 5560 /* 5561 * handle the various case of definitions... 5562 */ 5563 if (isParameter) { 5564 if ((RAW == '"') || (RAW == '\'')) { 5565 value = xmlParseEntityValue(ctxt, &orig); 5566 if (value) { 5567 if ((ctxt->sax != NULL) && 5568 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5569 ctxt->sax->entityDecl(ctxt->userData, name, 5570 XML_INTERNAL_PARAMETER_ENTITY, 5571 NULL, NULL, value); 5572 } 5573 } else { 5574 URI = xmlParseExternalID(ctxt, &literal, 1); 5575 if ((URI == NULL) && (literal == NULL)) { 5576 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5577 } 5578 if (URI) { 5579 xmlURIPtr uri; 5580 5581 uri = xmlParseURI((const char *) URI); 5582 if (uri == NULL) { 5583 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5584 "Invalid URI: %s\n", URI); 5585 /* 5586 * This really ought to be a well formedness error 5587 * but the XML Core WG decided otherwise c.f. issue 5588 * E26 of the XML erratas. 5589 */ 5590 } else { 5591 if (uri->fragment != NULL) { 5592 /* 5593 * Okay this is foolish to block those but not 5594 * invalid URIs. 5595 */ 5596 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5597 } else { 5598 if ((ctxt->sax != NULL) && 5599 (!ctxt->disableSAX) && 5600 (ctxt->sax->entityDecl != NULL)) 5601 ctxt->sax->entityDecl(ctxt->userData, name, 5602 XML_EXTERNAL_PARAMETER_ENTITY, 5603 literal, URI, NULL); 5604 } 5605 xmlFreeURI(uri); 5606 } 5607 } 5608 } 5609 } else { 5610 if ((RAW == '"') || (RAW == '\'')) { 5611 value = xmlParseEntityValue(ctxt, &orig); 5612 if ((ctxt->sax != NULL) && 5613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5614 ctxt->sax->entityDecl(ctxt->userData, name, 5615 XML_INTERNAL_GENERAL_ENTITY, 5616 NULL, NULL, value); 5617 /* 5618 * For expat compatibility in SAX mode. 5619 */ 5620 if ((ctxt->myDoc == NULL) || 5621 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5622 if (ctxt->myDoc == NULL) { 5623 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5624 if (ctxt->myDoc == NULL) { 5625 xmlErrMemory(ctxt, "New Doc failed"); 5626 return; 5627 } 5628 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5629 } 5630 if (ctxt->myDoc->intSubset == NULL) 5631 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5632 BAD_CAST "fake", NULL, NULL); 5633 5634 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5635 NULL, NULL, value); 5636 } 5637 } else { 5638 URI = xmlParseExternalID(ctxt, &literal, 1); 5639 if ((URI == NULL) && (literal == NULL)) { 5640 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5641 } 5642 if (URI) { 5643 xmlURIPtr uri; 5644 5645 uri = xmlParseURI((const char *)URI); 5646 if (uri == NULL) { 5647 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5648 "Invalid URI: %s\n", URI); 5649 /* 5650 * This really ought to be a well formedness error 5651 * but the XML Core WG decided otherwise c.f. issue 5652 * E26 of the XML erratas. 5653 */ 5654 } else { 5655 if (uri->fragment != NULL) { 5656 /* 5657 * Okay this is foolish to block those but not 5658 * invalid URIs. 5659 */ 5660 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5661 } 5662 xmlFreeURI(uri); 5663 } 5664 } 5665 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5667 "Space required before 'NDATA'\n"); 5668 } 5669 SKIP_BLANKS; 5670 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5671 SKIP(5); 5672 if (!IS_BLANK_CH(CUR)) { 5673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5674 "Space required after 'NDATA'\n"); 5675 } 5676 SKIP_BLANKS; 5677 ndata = xmlParseName(ctxt); 5678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5679 (ctxt->sax->unparsedEntityDecl != NULL)) 5680 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5681 literal, URI, ndata); 5682 } else { 5683 if ((ctxt->sax != NULL) && 5684 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5685 ctxt->sax->entityDecl(ctxt->userData, name, 5686 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5687 literal, URI, NULL); 5688 /* 5689 * For expat compatibility in SAX mode. 5690 * assuming the entity repalcement was asked for 5691 */ 5692 if ((ctxt->replaceEntities != 0) && 5693 ((ctxt->myDoc == NULL) || 5694 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5695 if (ctxt->myDoc == NULL) { 5696 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5697 if (ctxt->myDoc == NULL) { 5698 xmlErrMemory(ctxt, "New Doc failed"); 5699 return; 5700 } 5701 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5702 } 5703 5704 if (ctxt->myDoc->intSubset == NULL) 5705 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5706 BAD_CAST "fake", NULL, NULL); 5707 xmlSAX2EntityDecl(ctxt, name, 5708 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5709 literal, URI, NULL); 5710 } 5711 } 5712 } 5713 } 5714 if (ctxt->instate == XML_PARSER_EOF) 5715 return; 5716 SKIP_BLANKS; 5717 if (RAW != '>') { 5718 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5719 "xmlParseEntityDecl: entity %s not terminated\n", name); 5720 xmlHaltParser(ctxt); 5721 } else { 5722 if (input != ctxt->input) { 5723 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5724 "Entity declaration doesn't start and stop in the same entity\n"); 5725 } 5726 NEXT; 5727 } 5728 if (orig != NULL) { 5729 /* 5730 * Ugly mechanism to save the raw entity value. 5731 */ 5732 xmlEntityPtr cur = NULL; 5733 5734 if (isParameter) { 5735 if ((ctxt->sax != NULL) && 5736 (ctxt->sax->getParameterEntity != NULL)) 5737 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5738 } else { 5739 if ((ctxt->sax != NULL) && 5740 (ctxt->sax->getEntity != NULL)) 5741 cur = ctxt->sax->getEntity(ctxt->userData, name); 5742 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5743 cur = xmlSAX2GetEntity(ctxt, name); 5744 } 5745 } 5746 if (cur != NULL) { 5747 if (cur->orig != NULL) 5748 xmlFree(orig); 5749 else 5750 cur->orig = orig; 5751 } else 5752 xmlFree(orig); 5753 } 5754 if (value != NULL) xmlFree(value); 5755 if (URI != NULL) xmlFree(URI); 5756 if (literal != NULL) xmlFree(literal); 5757 } 5758 } 5759 5760 /** 5761 * xmlParseDefaultDecl: 5762 * @ctxt: an XML parser context 5763 * @value: Receive a possible fixed default value for the attribute 5764 * 5765 * Parse an attribute default declaration 5766 * 5767 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5768 * 5769 * [ VC: Required Attribute ] 5770 * if the default declaration is the keyword #REQUIRED, then the 5771 * attribute must be specified for all elements of the type in the 5772 * attribute-list declaration. 5773 * 5774 * [ VC: Attribute Default Legal ] 5775 * The declared default value must meet the lexical constraints of 5776 * the declared attribute type c.f. xmlValidateAttributeDecl() 5777 * 5778 * [ VC: Fixed Attribute Default ] 5779 * if an attribute has a default value declared with the #FIXED 5780 * keyword, instances of that attribute must match the default value. 5781 * 5782 * [ WFC: No < in Attribute Values ] 5783 * handled in xmlParseAttValue() 5784 * 5785 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5786 * or XML_ATTRIBUTE_FIXED. 5787 */ 5788 5789 int 5790 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5791 int val; 5792 xmlChar *ret; 5793 5794 *value = NULL; 5795 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5796 SKIP(9); 5797 return(XML_ATTRIBUTE_REQUIRED); 5798 } 5799 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5800 SKIP(8); 5801 return(XML_ATTRIBUTE_IMPLIED); 5802 } 5803 val = XML_ATTRIBUTE_NONE; 5804 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5805 SKIP(6); 5806 val = XML_ATTRIBUTE_FIXED; 5807 if (!IS_BLANK_CH(CUR)) { 5808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5809 "Space required after '#FIXED'\n"); 5810 } 5811 SKIP_BLANKS; 5812 } 5813 ret = xmlParseAttValue(ctxt); 5814 ctxt->instate = XML_PARSER_DTD; 5815 if (ret == NULL) { 5816 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5817 "Attribute default value declaration error\n"); 5818 } else 5819 *value = ret; 5820 return(val); 5821 } 5822 5823 /** 5824 * xmlParseNotationType: 5825 * @ctxt: an XML parser context 5826 * 5827 * parse an Notation attribute type. 5828 * 5829 * Note: the leading 'NOTATION' S part has already being parsed... 5830 * 5831 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5832 * 5833 * [ VC: Notation Attributes ] 5834 * Values of this type must match one of the notation names included 5835 * in the declaration; all notation names in the declaration must be declared. 5836 * 5837 * Returns: the notation attribute tree built while parsing 5838 */ 5839 5840 xmlEnumerationPtr 5841 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5842 const xmlChar *name; 5843 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5844 5845 if (RAW != '(') { 5846 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5847 return(NULL); 5848 } 5849 SHRINK; 5850 do { 5851 NEXT; 5852 SKIP_BLANKS; 5853 name = xmlParseName(ctxt); 5854 if (name == NULL) { 5855 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5856 "Name expected in NOTATION declaration\n"); 5857 xmlFreeEnumeration(ret); 5858 return(NULL); 5859 } 5860 tmp = ret; 5861 while (tmp != NULL) { 5862 if (xmlStrEqual(name, tmp->name)) { 5863 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5864 "standalone: attribute notation value token %s duplicated\n", 5865 name, NULL); 5866 if (!xmlDictOwns(ctxt->dict, name)) 5867 xmlFree((xmlChar *) name); 5868 break; 5869 } 5870 tmp = tmp->next; 5871 } 5872 if (tmp == NULL) { 5873 cur = xmlCreateEnumeration(name); 5874 if (cur == NULL) { 5875 xmlFreeEnumeration(ret); 5876 return(NULL); 5877 } 5878 if (last == NULL) ret = last = cur; 5879 else { 5880 last->next = cur; 5881 last = cur; 5882 } 5883 } 5884 SKIP_BLANKS; 5885 } while (RAW == '|'); 5886 if (RAW != ')') { 5887 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5888 xmlFreeEnumeration(ret); 5889 return(NULL); 5890 } 5891 NEXT; 5892 return(ret); 5893 } 5894 5895 /** 5896 * xmlParseEnumerationType: 5897 * @ctxt: an XML parser context 5898 * 5899 * parse an Enumeration attribute type. 5900 * 5901 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5902 * 5903 * [ VC: Enumeration ] 5904 * Values of this type must match one of the Nmtoken tokens in 5905 * the declaration 5906 * 5907 * Returns: the enumeration attribute tree built while parsing 5908 */ 5909 5910 xmlEnumerationPtr 5911 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5912 xmlChar *name; 5913 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5914 5915 if (RAW != '(') { 5916 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5917 return(NULL); 5918 } 5919 SHRINK; 5920 do { 5921 NEXT; 5922 SKIP_BLANKS; 5923 name = xmlParseNmtoken(ctxt); 5924 if (name == NULL) { 5925 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5926 return(ret); 5927 } 5928 tmp = ret; 5929 while (tmp != NULL) { 5930 if (xmlStrEqual(name, tmp->name)) { 5931 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5932 "standalone: attribute enumeration value token %s duplicated\n", 5933 name, NULL); 5934 if (!xmlDictOwns(ctxt->dict, name)) 5935 xmlFree(name); 5936 break; 5937 } 5938 tmp = tmp->next; 5939 } 5940 if (tmp == NULL) { 5941 cur = xmlCreateEnumeration(name); 5942 if (!xmlDictOwns(ctxt->dict, name)) 5943 xmlFree(name); 5944 if (cur == NULL) { 5945 xmlFreeEnumeration(ret); 5946 return(NULL); 5947 } 5948 if (last == NULL) ret = last = cur; 5949 else { 5950 last->next = cur; 5951 last = cur; 5952 } 5953 } 5954 SKIP_BLANKS; 5955 } while (RAW == '|'); 5956 if (RAW != ')') { 5957 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5958 return(ret); 5959 } 5960 NEXT; 5961 return(ret); 5962 } 5963 5964 /** 5965 * xmlParseEnumeratedType: 5966 * @ctxt: an XML parser context 5967 * @tree: the enumeration tree built while parsing 5968 * 5969 * parse an Enumerated attribute type. 5970 * 5971 * [57] EnumeratedType ::= NotationType | Enumeration 5972 * 5973 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5974 * 5975 * 5976 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5977 */ 5978 5979 int 5980 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5981 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5982 SKIP(8); 5983 if (!IS_BLANK_CH(CUR)) { 5984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5985 "Space required after 'NOTATION'\n"); 5986 return(0); 5987 } 5988 SKIP_BLANKS; 5989 *tree = xmlParseNotationType(ctxt); 5990 if (*tree == NULL) return(0); 5991 return(XML_ATTRIBUTE_NOTATION); 5992 } 5993 *tree = xmlParseEnumerationType(ctxt); 5994 if (*tree == NULL) return(0); 5995 return(XML_ATTRIBUTE_ENUMERATION); 5996 } 5997 5998 /** 5999 * xmlParseAttributeType: 6000 * @ctxt: an XML parser context 6001 * @tree: the enumeration tree built while parsing 6002 * 6003 * parse the Attribute list def for an element 6004 * 6005 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 6006 * 6007 * [55] StringType ::= 'CDATA' 6008 * 6009 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 6010 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 6011 * 6012 * Validity constraints for attribute values syntax are checked in 6013 * xmlValidateAttributeValue() 6014 * 6015 * [ VC: ID ] 6016 * Values of type ID must match the Name production. A name must not 6017 * appear more than once in an XML document as a value of this type; 6018 * i.e., ID values must uniquely identify the elements which bear them. 6019 * 6020 * [ VC: One ID per Element Type ] 6021 * No element type may have more than one ID attribute specified. 6022 * 6023 * [ VC: ID Attribute Default ] 6024 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 6025 * 6026 * [ VC: IDREF ] 6027 * Values of type IDREF must match the Name production, and values 6028 * of type IDREFS must match Names; each IDREF Name must match the value 6029 * of an ID attribute on some element in the XML document; i.e. IDREF 6030 * values must match the value of some ID attribute. 6031 * 6032 * [ VC: Entity Name ] 6033 * Values of type ENTITY must match the Name production, values 6034 * of type ENTITIES must match Names; each Entity Name must match the 6035 * name of an unparsed entity declared in the DTD. 6036 * 6037 * [ VC: Name Token ] 6038 * Values of type NMTOKEN must match the Nmtoken production; values 6039 * of type NMTOKENS must match Nmtokens. 6040 * 6041 * Returns the attribute type 6042 */ 6043 int 6044 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6045 SHRINK; 6046 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 6047 SKIP(5); 6048 return(XML_ATTRIBUTE_CDATA); 6049 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 6050 SKIP(6); 6051 return(XML_ATTRIBUTE_IDREFS); 6052 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 6053 SKIP(5); 6054 return(XML_ATTRIBUTE_IDREF); 6055 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 6056 SKIP(2); 6057 return(XML_ATTRIBUTE_ID); 6058 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 6059 SKIP(6); 6060 return(XML_ATTRIBUTE_ENTITY); 6061 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6062 SKIP(8); 6063 return(XML_ATTRIBUTE_ENTITIES); 6064 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6065 SKIP(8); 6066 return(XML_ATTRIBUTE_NMTOKENS); 6067 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6068 SKIP(7); 6069 return(XML_ATTRIBUTE_NMTOKEN); 6070 } 6071 return(xmlParseEnumeratedType(ctxt, tree)); 6072 } 6073 6074 /** 6075 * xmlParseAttributeListDecl: 6076 * @ctxt: an XML parser context 6077 * 6078 * : parse the Attribute list def for an element 6079 * 6080 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6081 * 6082 * [53] AttDef ::= S Name S AttType S DefaultDecl 6083 * 6084 */ 6085 void 6086 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6087 const xmlChar *elemName; 6088 const xmlChar *attrName; 6089 xmlEnumerationPtr tree; 6090 6091 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6092 xmlParserInputPtr input = ctxt->input; 6093 6094 SKIP(9); 6095 if (!IS_BLANK_CH(CUR)) { 6096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6097 "Space required after '<!ATTLIST'\n"); 6098 } 6099 SKIP_BLANKS; 6100 elemName = xmlParseName(ctxt); 6101 if (elemName == NULL) { 6102 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6103 "ATTLIST: no name for Element\n"); 6104 return; 6105 } 6106 SKIP_BLANKS; 6107 GROW; 6108 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6109 const xmlChar *check = CUR_PTR; 6110 int type; 6111 int def; 6112 xmlChar *defaultValue = NULL; 6113 6114 GROW; 6115 tree = NULL; 6116 attrName = xmlParseName(ctxt); 6117 if (attrName == NULL) { 6118 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6119 "ATTLIST: no name for Attribute\n"); 6120 break; 6121 } 6122 GROW; 6123 if (!IS_BLANK_CH(CUR)) { 6124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6125 "Space required after the attribute name\n"); 6126 break; 6127 } 6128 SKIP_BLANKS; 6129 6130 type = xmlParseAttributeType(ctxt, &tree); 6131 if (type <= 0) { 6132 break; 6133 } 6134 6135 GROW; 6136 if (!IS_BLANK_CH(CUR)) { 6137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6138 "Space required after the attribute type\n"); 6139 if (tree != NULL) 6140 xmlFreeEnumeration(tree); 6141 break; 6142 } 6143 SKIP_BLANKS; 6144 6145 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6146 if (def <= 0) { 6147 if (defaultValue != NULL) 6148 xmlFree(defaultValue); 6149 if (tree != NULL) 6150 xmlFreeEnumeration(tree); 6151 break; 6152 } 6153 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6154 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6155 6156 GROW; 6157 if (RAW != '>') { 6158 if (!IS_BLANK_CH(CUR)) { 6159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6160 "Space required after the attribute default value\n"); 6161 if (defaultValue != NULL) 6162 xmlFree(defaultValue); 6163 if (tree != NULL) 6164 xmlFreeEnumeration(tree); 6165 break; 6166 } 6167 SKIP_BLANKS; 6168 } 6169 if (check == CUR_PTR) { 6170 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6171 "in xmlParseAttributeListDecl\n"); 6172 if (defaultValue != NULL) 6173 xmlFree(defaultValue); 6174 if (tree != NULL) 6175 xmlFreeEnumeration(tree); 6176 break; 6177 } 6178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6179 (ctxt->sax->attributeDecl != NULL)) 6180 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6181 type, def, defaultValue, tree); 6182 else if (tree != NULL) 6183 xmlFreeEnumeration(tree); 6184 6185 if ((ctxt->sax2) && (defaultValue != NULL) && 6186 (def != XML_ATTRIBUTE_IMPLIED) && 6187 (def != XML_ATTRIBUTE_REQUIRED)) { 6188 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6189 } 6190 if (ctxt->sax2) { 6191 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6192 } 6193 if (defaultValue != NULL) 6194 xmlFree(defaultValue); 6195 GROW; 6196 } 6197 if (RAW == '>') { 6198 if (input != ctxt->input) { 6199 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6200 "Attribute list declaration doesn't start and stop in the same entity\n", 6201 NULL, NULL); 6202 } 6203 NEXT; 6204 } 6205 } 6206 } 6207 6208 /** 6209 * xmlParseElementMixedContentDecl: 6210 * @ctxt: an XML parser context 6211 * @inputchk: the input used for the current entity, needed for boundary checks 6212 * 6213 * parse the declaration for a Mixed Element content 6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6215 * 6216 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6217 * '(' S? '#PCDATA' S? ')' 6218 * 6219 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6220 * 6221 * [ VC: No Duplicate Types ] 6222 * The same name must not appear more than once in a single 6223 * mixed-content declaration. 6224 * 6225 * returns: the list of the xmlElementContentPtr describing the element choices 6226 */ 6227 xmlElementContentPtr 6228 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6229 xmlElementContentPtr ret = NULL, cur = NULL, n; 6230 const xmlChar *elem = NULL; 6231 6232 GROW; 6233 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6234 SKIP(7); 6235 SKIP_BLANKS; 6236 SHRINK; 6237 if (RAW == ')') { 6238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6240 "Element content declaration doesn't start and stop in the same entity\n", 6241 NULL, NULL); 6242 } 6243 NEXT; 6244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6245 if (ret == NULL) 6246 return(NULL); 6247 if (RAW == '*') { 6248 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6249 NEXT; 6250 } 6251 return(ret); 6252 } 6253 if ((RAW == '(') || (RAW == '|')) { 6254 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6255 if (ret == NULL) return(NULL); 6256 } 6257 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6258 NEXT; 6259 if (elem == NULL) { 6260 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6261 if (ret == NULL) return(NULL); 6262 ret->c1 = cur; 6263 if (cur != NULL) 6264 cur->parent = ret; 6265 cur = ret; 6266 } else { 6267 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6268 if (n == NULL) return(NULL); 6269 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6270 if (n->c1 != NULL) 6271 n->c1->parent = n; 6272 cur->c2 = n; 6273 if (n != NULL) 6274 n->parent = cur; 6275 cur = n; 6276 } 6277 SKIP_BLANKS; 6278 elem = xmlParseName(ctxt); 6279 if (elem == NULL) { 6280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6281 "xmlParseElementMixedContentDecl : Name expected\n"); 6282 xmlFreeDocElementContent(ctxt->myDoc, cur); 6283 return(NULL); 6284 } 6285 SKIP_BLANKS; 6286 GROW; 6287 } 6288 if ((RAW == ')') && (NXT(1) == '*')) { 6289 if (elem != NULL) { 6290 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6291 XML_ELEMENT_CONTENT_ELEMENT); 6292 if (cur->c2 != NULL) 6293 cur->c2->parent = cur; 6294 } 6295 if (ret != NULL) 6296 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6297 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6298 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6299 "Element content declaration doesn't start and stop in the same entity\n", 6300 NULL, NULL); 6301 } 6302 SKIP(2); 6303 } else { 6304 xmlFreeDocElementContent(ctxt->myDoc, ret); 6305 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6306 return(NULL); 6307 } 6308 6309 } else { 6310 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6311 } 6312 return(ret); 6313 } 6314 6315 /** 6316 * xmlParseElementChildrenContentDeclPriv: 6317 * @ctxt: an XML parser context 6318 * @inputchk: the input used for the current entity, needed for boundary checks 6319 * @depth: the level of recursion 6320 * 6321 * parse the declaration for a Mixed Element content 6322 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6323 * 6324 * 6325 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6326 * 6327 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6328 * 6329 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6330 * 6331 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6332 * 6333 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6334 * TODO Parameter-entity replacement text must be properly nested 6335 * with parenthesized groups. That is to say, if either of the 6336 * opening or closing parentheses in a choice, seq, or Mixed 6337 * construct is contained in the replacement text for a parameter 6338 * entity, both must be contained in the same replacement text. For 6339 * interoperability, if a parameter-entity reference appears in a 6340 * choice, seq, or Mixed construct, its replacement text should not 6341 * be empty, and neither the first nor last non-blank character of 6342 * the replacement text should be a connector (| or ,). 6343 * 6344 * Returns the tree of xmlElementContentPtr describing the element 6345 * hierarchy. 6346 */ 6347 static xmlElementContentPtr 6348 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6349 int depth) { 6350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6351 const xmlChar *elem; 6352 xmlChar type = 0; 6353 6354 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6355 (depth > 2048)) { 6356 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6357 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6358 depth); 6359 return(NULL); 6360 } 6361 SKIP_BLANKS; 6362 GROW; 6363 if (RAW == '(') { 6364 int inputid = ctxt->input->id; 6365 6366 /* Recurse on first child */ 6367 NEXT; 6368 SKIP_BLANKS; 6369 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6370 depth + 1); 6371 SKIP_BLANKS; 6372 GROW; 6373 } else { 6374 elem = xmlParseName(ctxt); 6375 if (elem == NULL) { 6376 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6377 return(NULL); 6378 } 6379 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6380 if (cur == NULL) { 6381 xmlErrMemory(ctxt, NULL); 6382 return(NULL); 6383 } 6384 GROW; 6385 if (RAW == '?') { 6386 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6387 NEXT; 6388 } else if (RAW == '*') { 6389 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6390 NEXT; 6391 } else if (RAW == '+') { 6392 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6393 NEXT; 6394 } else { 6395 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6396 } 6397 GROW; 6398 } 6399 SKIP_BLANKS; 6400 SHRINK; 6401 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6402 /* 6403 * Each loop we parse one separator and one element. 6404 */ 6405 if (RAW == ',') { 6406 if (type == 0) type = CUR; 6407 6408 /* 6409 * Detect "Name | Name , Name" error 6410 */ 6411 else if (type != CUR) { 6412 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6413 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6414 type); 6415 if ((last != NULL) && (last != ret)) 6416 xmlFreeDocElementContent(ctxt->myDoc, last); 6417 if (ret != NULL) 6418 xmlFreeDocElementContent(ctxt->myDoc, ret); 6419 return(NULL); 6420 } 6421 NEXT; 6422 6423 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6424 if (op == NULL) { 6425 if ((last != NULL) && (last != ret)) 6426 xmlFreeDocElementContent(ctxt->myDoc, last); 6427 xmlFreeDocElementContent(ctxt->myDoc, ret); 6428 return(NULL); 6429 } 6430 if (last == NULL) { 6431 op->c1 = ret; 6432 if (ret != NULL) 6433 ret->parent = op; 6434 ret = cur = op; 6435 } else { 6436 cur->c2 = op; 6437 if (op != NULL) 6438 op->parent = cur; 6439 op->c1 = last; 6440 if (last != NULL) 6441 last->parent = op; 6442 cur =op; 6443 last = NULL; 6444 } 6445 } else if (RAW == '|') { 6446 if (type == 0) type = CUR; 6447 6448 /* 6449 * Detect "Name , Name | Name" error 6450 */ 6451 else if (type != CUR) { 6452 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6453 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6454 type); 6455 if ((last != NULL) && (last != ret)) 6456 xmlFreeDocElementContent(ctxt->myDoc, last); 6457 if (ret != NULL) 6458 xmlFreeDocElementContent(ctxt->myDoc, ret); 6459 return(NULL); 6460 } 6461 NEXT; 6462 6463 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6464 if (op == NULL) { 6465 if ((last != NULL) && (last != ret)) 6466 xmlFreeDocElementContent(ctxt->myDoc, last); 6467 if (ret != NULL) 6468 xmlFreeDocElementContent(ctxt->myDoc, ret); 6469 return(NULL); 6470 } 6471 if (last == NULL) { 6472 op->c1 = ret; 6473 if (ret != NULL) 6474 ret->parent = op; 6475 ret = cur = op; 6476 } else { 6477 cur->c2 = op; 6478 if (op != NULL) 6479 op->parent = cur; 6480 op->c1 = last; 6481 if (last != NULL) 6482 last->parent = op; 6483 cur =op; 6484 last = NULL; 6485 } 6486 } else { 6487 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6488 if ((last != NULL) && (last != ret)) 6489 xmlFreeDocElementContent(ctxt->myDoc, last); 6490 if (ret != NULL) 6491 xmlFreeDocElementContent(ctxt->myDoc, ret); 6492 return(NULL); 6493 } 6494 GROW; 6495 SKIP_BLANKS; 6496 GROW; 6497 if (RAW == '(') { 6498 int inputid = ctxt->input->id; 6499 /* Recurse on second child */ 6500 NEXT; 6501 SKIP_BLANKS; 6502 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6503 depth + 1); 6504 SKIP_BLANKS; 6505 } else { 6506 elem = xmlParseName(ctxt); 6507 if (elem == NULL) { 6508 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6509 if (ret != NULL) 6510 xmlFreeDocElementContent(ctxt->myDoc, ret); 6511 return(NULL); 6512 } 6513 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6514 if (last == NULL) { 6515 if (ret != NULL) 6516 xmlFreeDocElementContent(ctxt->myDoc, ret); 6517 return(NULL); 6518 } 6519 if (RAW == '?') { 6520 last->ocur = XML_ELEMENT_CONTENT_OPT; 6521 NEXT; 6522 } else if (RAW == '*') { 6523 last->ocur = XML_ELEMENT_CONTENT_MULT; 6524 NEXT; 6525 } else if (RAW == '+') { 6526 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6527 NEXT; 6528 } else { 6529 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6530 } 6531 } 6532 SKIP_BLANKS; 6533 GROW; 6534 } 6535 if ((cur != NULL) && (last != NULL)) { 6536 cur->c2 = last; 6537 if (last != NULL) 6538 last->parent = cur; 6539 } 6540 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6541 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6542 "Element content declaration doesn't start and stop in the same entity\n", 6543 NULL, NULL); 6544 } 6545 NEXT; 6546 if (RAW == '?') { 6547 if (ret != NULL) { 6548 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6549 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6550 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6551 else 6552 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6553 } 6554 NEXT; 6555 } else if (RAW == '*') { 6556 if (ret != NULL) { 6557 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6558 cur = ret; 6559 /* 6560 * Some normalization: 6561 * (a | b* | c?)* == (a | b | c)* 6562 */ 6563 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6564 if ((cur->c1 != NULL) && 6565 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6566 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6567 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6568 if ((cur->c2 != NULL) && 6569 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6570 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6571 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6572 cur = cur->c2; 6573 } 6574 } 6575 NEXT; 6576 } else if (RAW == '+') { 6577 if (ret != NULL) { 6578 int found = 0; 6579 6580 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6581 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6582 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6583 else 6584 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6585 /* 6586 * Some normalization: 6587 * (a | b*)+ == (a | b)* 6588 * (a | b?)+ == (a | b)* 6589 */ 6590 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6591 if ((cur->c1 != NULL) && 6592 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6593 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6594 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6595 found = 1; 6596 } 6597 if ((cur->c2 != NULL) && 6598 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6599 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6600 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6601 found = 1; 6602 } 6603 cur = cur->c2; 6604 } 6605 if (found) 6606 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6607 } 6608 NEXT; 6609 } 6610 return(ret); 6611 } 6612 6613 /** 6614 * xmlParseElementChildrenContentDecl: 6615 * @ctxt: an XML parser context 6616 * @inputchk: the input used for the current entity, needed for boundary checks 6617 * 6618 * parse the declaration for a Mixed Element content 6619 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6620 * 6621 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6622 * 6623 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6624 * 6625 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6626 * 6627 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6628 * 6629 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6630 * TODO Parameter-entity replacement text must be properly nested 6631 * with parenthesized groups. That is to say, if either of the 6632 * opening or closing parentheses in a choice, seq, or Mixed 6633 * construct is contained in the replacement text for a parameter 6634 * entity, both must be contained in the same replacement text. For 6635 * interoperability, if a parameter-entity reference appears in a 6636 * choice, seq, or Mixed construct, its replacement text should not 6637 * be empty, and neither the first nor last non-blank character of 6638 * the replacement text should be a connector (| or ,). 6639 * 6640 * Returns the tree of xmlElementContentPtr describing the element 6641 * hierarchy. 6642 */ 6643 xmlElementContentPtr 6644 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6645 /* stub left for API/ABI compat */ 6646 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6647 } 6648 6649 /** 6650 * xmlParseElementContentDecl: 6651 * @ctxt: an XML parser context 6652 * @name: the name of the element being defined. 6653 * @result: the Element Content pointer will be stored here if any 6654 * 6655 * parse the declaration for an Element content either Mixed or Children, 6656 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6657 * 6658 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6659 * 6660 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6661 */ 6662 6663 int 6664 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6665 xmlElementContentPtr *result) { 6666 6667 xmlElementContentPtr tree = NULL; 6668 int inputid = ctxt->input->id; 6669 int res; 6670 6671 *result = NULL; 6672 6673 if (RAW != '(') { 6674 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6675 "xmlParseElementContentDecl : %s '(' expected\n", name); 6676 return(-1); 6677 } 6678 NEXT; 6679 GROW; 6680 if (ctxt->instate == XML_PARSER_EOF) 6681 return(-1); 6682 SKIP_BLANKS; 6683 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6684 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6685 res = XML_ELEMENT_TYPE_MIXED; 6686 } else { 6687 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6688 res = XML_ELEMENT_TYPE_ELEMENT; 6689 } 6690 SKIP_BLANKS; 6691 *result = tree; 6692 return(res); 6693 } 6694 6695 /** 6696 * xmlParseElementDecl: 6697 * @ctxt: an XML parser context 6698 * 6699 * parse an Element declaration. 6700 * 6701 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6702 * 6703 * [ VC: Unique Element Type Declaration ] 6704 * No element type may be declared more than once 6705 * 6706 * Returns the type of the element, or -1 in case of error 6707 */ 6708 int 6709 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6710 const xmlChar *name; 6711 int ret = -1; 6712 xmlElementContentPtr content = NULL; 6713 6714 /* GROW; done in the caller */ 6715 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6716 xmlParserInputPtr input = ctxt->input; 6717 6718 SKIP(9); 6719 if (!IS_BLANK_CH(CUR)) { 6720 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6721 "Space required after 'ELEMENT'\n"); 6722 return(-1); 6723 } 6724 SKIP_BLANKS; 6725 name = xmlParseName(ctxt); 6726 if (name == NULL) { 6727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6728 "xmlParseElementDecl: no name for Element\n"); 6729 return(-1); 6730 } 6731 while ((RAW == 0) && (ctxt->inputNr > 1)) 6732 xmlPopInput(ctxt); 6733 if (!IS_BLANK_CH(CUR)) { 6734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6735 "Space required after the element name\n"); 6736 } 6737 SKIP_BLANKS; 6738 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6739 SKIP(5); 6740 /* 6741 * Element must always be empty. 6742 */ 6743 ret = XML_ELEMENT_TYPE_EMPTY; 6744 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6745 (NXT(2) == 'Y')) { 6746 SKIP(3); 6747 /* 6748 * Element is a generic container. 6749 */ 6750 ret = XML_ELEMENT_TYPE_ANY; 6751 } else if (RAW == '(') { 6752 ret = xmlParseElementContentDecl(ctxt, name, &content); 6753 } else { 6754 /* 6755 * [ WFC: PEs in Internal Subset ] error handling. 6756 */ 6757 if ((RAW == '%') && (ctxt->external == 0) && 6758 (ctxt->inputNr == 1)) { 6759 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6760 "PEReference: forbidden within markup decl in internal subset\n"); 6761 } else { 6762 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6763 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6764 } 6765 return(-1); 6766 } 6767 6768 SKIP_BLANKS; 6769 /* 6770 * Pop-up of finished entities. 6771 */ 6772 while ((RAW == 0) && (ctxt->inputNr > 1)) 6773 xmlPopInput(ctxt); 6774 SKIP_BLANKS; 6775 6776 if (RAW != '>') { 6777 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6778 if (content != NULL) { 6779 xmlFreeDocElementContent(ctxt->myDoc, content); 6780 } 6781 } else { 6782 if (input != ctxt->input) { 6783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6784 "Element declaration doesn't start and stop in the same entity\n"); 6785 } 6786 6787 NEXT; 6788 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6789 (ctxt->sax->elementDecl != NULL)) { 6790 if (content != NULL) 6791 content->parent = NULL; 6792 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6793 content); 6794 if ((content != NULL) && (content->parent == NULL)) { 6795 /* 6796 * this is a trick: if xmlAddElementDecl is called, 6797 * instead of copying the full tree it is plugged directly 6798 * if called from the parser. Avoid duplicating the 6799 * interfaces or change the API/ABI 6800 */ 6801 xmlFreeDocElementContent(ctxt->myDoc, content); 6802 } 6803 } else if (content != NULL) { 6804 xmlFreeDocElementContent(ctxt->myDoc, content); 6805 } 6806 } 6807 } 6808 return(ret); 6809 } 6810 6811 /** 6812 * xmlParseConditionalSections 6813 * @ctxt: an XML parser context 6814 * 6815 * [61] conditionalSect ::= includeSect | ignoreSect 6816 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6817 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6818 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6819 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6820 */ 6821 6822 static void 6823 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6824 int id = ctxt->input->id; 6825 6826 SKIP(3); 6827 SKIP_BLANKS; 6828 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6829 SKIP(7); 6830 SKIP_BLANKS; 6831 if (RAW != '[') { 6832 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6833 xmlHaltParser(ctxt); 6834 return; 6835 } else { 6836 if (ctxt->input->id != id) { 6837 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6838 "All markup of the conditional section is not in the same entity\n", 6839 NULL, NULL); 6840 } 6841 NEXT; 6842 } 6843 if (xmlParserDebugEntities) { 6844 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6845 xmlGenericError(xmlGenericErrorContext, 6846 "%s(%d): ", ctxt->input->filename, 6847 ctxt->input->line); 6848 xmlGenericError(xmlGenericErrorContext, 6849 "Entering INCLUDE Conditional Section\n"); 6850 } 6851 6852 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6853 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6854 const xmlChar *check = CUR_PTR; 6855 unsigned int cons = ctxt->input->consumed; 6856 6857 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6858 xmlParseConditionalSections(ctxt); 6859 } else if (IS_BLANK_CH(CUR)) { 6860 NEXT; 6861 } else if (RAW == '%') { 6862 xmlParsePEReference(ctxt); 6863 } else 6864 xmlParseMarkupDecl(ctxt); 6865 6866 /* 6867 * Pop-up of finished entities. 6868 */ 6869 while ((RAW == 0) && (ctxt->inputNr > 1)) 6870 xmlPopInput(ctxt); 6871 6872 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6873 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6874 xmlHaltParser(ctxt); 6875 break; 6876 } 6877 } 6878 if (xmlParserDebugEntities) { 6879 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6880 xmlGenericError(xmlGenericErrorContext, 6881 "%s(%d): ", ctxt->input->filename, 6882 ctxt->input->line); 6883 xmlGenericError(xmlGenericErrorContext, 6884 "Leaving INCLUDE Conditional Section\n"); 6885 } 6886 6887 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6888 int state; 6889 xmlParserInputState instate; 6890 int depth = 0; 6891 6892 SKIP(6); 6893 SKIP_BLANKS; 6894 if (RAW != '[') { 6895 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6896 xmlHaltParser(ctxt); 6897 return; 6898 } else { 6899 if (ctxt->input->id != id) { 6900 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6901 "All markup of the conditional section is not in the same entity\n", 6902 NULL, NULL); 6903 } 6904 NEXT; 6905 } 6906 if (xmlParserDebugEntities) { 6907 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6908 xmlGenericError(xmlGenericErrorContext, 6909 "%s(%d): ", ctxt->input->filename, 6910 ctxt->input->line); 6911 xmlGenericError(xmlGenericErrorContext, 6912 "Entering IGNORE Conditional Section\n"); 6913 } 6914 6915 /* 6916 * Parse up to the end of the conditional section 6917 * But disable SAX event generating DTD building in the meantime 6918 */ 6919 state = ctxt->disableSAX; 6920 instate = ctxt->instate; 6921 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6922 ctxt->instate = XML_PARSER_IGNORE; 6923 6924 while (((depth >= 0) && (RAW != 0)) && 6925 (ctxt->instate != XML_PARSER_EOF)) { 6926 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6927 depth++; 6928 SKIP(3); 6929 continue; 6930 } 6931 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6932 if (--depth >= 0) SKIP(3); 6933 continue; 6934 } 6935 NEXT; 6936 continue; 6937 } 6938 6939 ctxt->disableSAX = state; 6940 ctxt->instate = instate; 6941 6942 if (xmlParserDebugEntities) { 6943 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6944 xmlGenericError(xmlGenericErrorContext, 6945 "%s(%d): ", ctxt->input->filename, 6946 ctxt->input->line); 6947 xmlGenericError(xmlGenericErrorContext, 6948 "Leaving IGNORE Conditional Section\n"); 6949 } 6950 6951 } else { 6952 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6953 xmlHaltParser(ctxt); 6954 return; 6955 } 6956 6957 if (RAW == 0) 6958 SHRINK; 6959 6960 if (RAW == 0) { 6961 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6962 } else { 6963 if (ctxt->input->id != id) { 6964 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6965 "All markup of the conditional section is not in the same entity\n", 6966 NULL, NULL); 6967 } 6968 if ((ctxt-> instate != XML_PARSER_EOF) && 6969 ((ctxt->input->cur + 3) <= ctxt->input->end)) 6970 SKIP(3); 6971 } 6972 } 6973 6974 /** 6975 * xmlParseMarkupDecl: 6976 * @ctxt: an XML parser context 6977 * 6978 * parse Markup declarations 6979 * 6980 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6981 * NotationDecl | PI | Comment 6982 * 6983 * [ VC: Proper Declaration/PE Nesting ] 6984 * Parameter-entity replacement text must be properly nested with 6985 * markup declarations. That is to say, if either the first character 6986 * or the last character of a markup declaration (markupdecl above) is 6987 * contained in the replacement text for a parameter-entity reference, 6988 * both must be contained in the same replacement text. 6989 * 6990 * [ WFC: PEs in Internal Subset ] 6991 * In the internal DTD subset, parameter-entity references can occur 6992 * only where markup declarations can occur, not within markup declarations. 6993 * (This does not apply to references that occur in external parameter 6994 * entities or to the external subset.) 6995 */ 6996 void 6997 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6998 GROW; 6999 if (CUR == '<') { 7000 if (NXT(1) == '!') { 7001 switch (NXT(2)) { 7002 case 'E': 7003 if (NXT(3) == 'L') 7004 xmlParseElementDecl(ctxt); 7005 else if (NXT(3) == 'N') 7006 xmlParseEntityDecl(ctxt); 7007 break; 7008 case 'A': 7009 xmlParseAttributeListDecl(ctxt); 7010 break; 7011 case 'N': 7012 xmlParseNotationDecl(ctxt); 7013 break; 7014 case '-': 7015 xmlParseComment(ctxt); 7016 break; 7017 default: 7018 /* there is an error but it will be detected later */ 7019 break; 7020 } 7021 } else if (NXT(1) == '?') { 7022 xmlParsePI(ctxt); 7023 } 7024 } 7025 7026 /* 7027 * detect requirement to exit there and act accordingly 7028 * and avoid having instate overriden later on 7029 */ 7030 if (ctxt->instate == XML_PARSER_EOF) 7031 return; 7032 7033 /* 7034 * This is only for internal subset. On external entities, 7035 * the replacement is done before parsing stage 7036 */ 7037 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 7038 xmlParsePEReference(ctxt); 7039 7040 /* 7041 * Conditional sections are allowed from entities included 7042 * by PE References in the internal subset. 7043 */ 7044 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 7045 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7046 xmlParseConditionalSections(ctxt); 7047 } 7048 } 7049 7050 ctxt->instate = XML_PARSER_DTD; 7051 } 7052 7053 /** 7054 * xmlParseTextDecl: 7055 * @ctxt: an XML parser context 7056 * 7057 * parse an XML declaration header for external entities 7058 * 7059 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 7060 */ 7061 7062 void 7063 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 7064 xmlChar *version; 7065 const xmlChar *encoding; 7066 7067 /* 7068 * We know that '<?xml' is here. 7069 */ 7070 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 7071 SKIP(5); 7072 } else { 7073 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 7074 return; 7075 } 7076 7077 if (!IS_BLANK_CH(CUR)) { 7078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7079 "Space needed after '<?xml'\n"); 7080 } 7081 SKIP_BLANKS; 7082 7083 /* 7084 * We may have the VersionInfo here. 7085 */ 7086 version = xmlParseVersionInfo(ctxt); 7087 if (version == NULL) 7088 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7089 else { 7090 if (!IS_BLANK_CH(CUR)) { 7091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7092 "Space needed here\n"); 7093 } 7094 } 7095 ctxt->input->version = version; 7096 7097 /* 7098 * We must have the encoding declaration 7099 */ 7100 encoding = xmlParseEncodingDecl(ctxt); 7101 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7102 /* 7103 * The XML REC instructs us to stop parsing right here 7104 */ 7105 return; 7106 } 7107 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 7108 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 7109 "Missing encoding in text declaration\n"); 7110 } 7111 7112 SKIP_BLANKS; 7113 if ((RAW == '?') && (NXT(1) == '>')) { 7114 SKIP(2); 7115 } else if (RAW == '>') { 7116 /* Deprecated old WD ... */ 7117 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7118 NEXT; 7119 } else { 7120 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7121 MOVETO_ENDTAG(CUR_PTR); 7122 NEXT; 7123 } 7124 } 7125 7126 /** 7127 * xmlParseExternalSubset: 7128 * @ctxt: an XML parser context 7129 * @ExternalID: the external identifier 7130 * @SystemID: the system identifier (or URL) 7131 * 7132 * parse Markup declarations from an external subset 7133 * 7134 * [30] extSubset ::= textDecl? extSubsetDecl 7135 * 7136 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7137 */ 7138 void 7139 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7140 const xmlChar *SystemID) { 7141 xmlDetectSAX2(ctxt); 7142 GROW; 7143 7144 if ((ctxt->encoding == NULL) && 7145 (ctxt->input->end - ctxt->input->cur >= 4)) { 7146 xmlChar start[4]; 7147 xmlCharEncoding enc; 7148 7149 start[0] = RAW; 7150 start[1] = NXT(1); 7151 start[2] = NXT(2); 7152 start[3] = NXT(3); 7153 enc = xmlDetectCharEncoding(start, 4); 7154 if (enc != XML_CHAR_ENCODING_NONE) 7155 xmlSwitchEncoding(ctxt, enc); 7156 } 7157 7158 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7159 xmlParseTextDecl(ctxt); 7160 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7161 /* 7162 * The XML REC instructs us to stop parsing right here 7163 */ 7164 xmlHaltParser(ctxt); 7165 return; 7166 } 7167 } 7168 if (ctxt->myDoc == NULL) { 7169 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7170 if (ctxt->myDoc == NULL) { 7171 xmlErrMemory(ctxt, "New Doc failed"); 7172 return; 7173 } 7174 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7175 } 7176 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7177 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7178 7179 ctxt->instate = XML_PARSER_DTD; 7180 ctxt->external = 1; 7181 while (((RAW == '<') && (NXT(1) == '?')) || 7182 ((RAW == '<') && (NXT(1) == '!')) || 7183 (RAW == '%') || IS_BLANK_CH(CUR)) { 7184 const xmlChar *check = CUR_PTR; 7185 unsigned int cons = ctxt->input->consumed; 7186 7187 GROW; 7188 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7189 xmlParseConditionalSections(ctxt); 7190 } else if (IS_BLANK_CH(CUR)) { 7191 NEXT; 7192 } else if (RAW == '%') { 7193 xmlParsePEReference(ctxt); 7194 } else 7195 xmlParseMarkupDecl(ctxt); 7196 7197 /* 7198 * Pop-up of finished entities. 7199 */ 7200 while ((RAW == 0) && (ctxt->inputNr > 1)) 7201 xmlPopInput(ctxt); 7202 7203 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7204 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7205 break; 7206 } 7207 } 7208 7209 if (RAW != 0) { 7210 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7211 } 7212 7213 } 7214 7215 /** 7216 * xmlParseReference: 7217 * @ctxt: an XML parser context 7218 * 7219 * parse and handle entity references in content, depending on the SAX 7220 * interface, this may end-up in a call to character() if this is a 7221 * CharRef, a predefined entity, if there is no reference() callback. 7222 * or if the parser was asked to switch to that mode. 7223 * 7224 * [67] Reference ::= EntityRef | CharRef 7225 */ 7226 void 7227 xmlParseReference(xmlParserCtxtPtr ctxt) { 7228 xmlEntityPtr ent; 7229 xmlChar *val; 7230 int was_checked; 7231 xmlNodePtr list = NULL; 7232 xmlParserErrors ret = XML_ERR_OK; 7233 7234 7235 if (RAW != '&') 7236 return; 7237 7238 /* 7239 * Simple case of a CharRef 7240 */ 7241 if (NXT(1) == '#') { 7242 int i = 0; 7243 xmlChar out[10]; 7244 int hex = NXT(2); 7245 int value = xmlParseCharRef(ctxt); 7246 7247 if (value == 0) 7248 return; 7249 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7250 /* 7251 * So we are using non-UTF-8 buffers 7252 * Check that the char fit on 8bits, if not 7253 * generate a CharRef. 7254 */ 7255 if (value <= 0xFF) { 7256 out[0] = value; 7257 out[1] = 0; 7258 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7259 (!ctxt->disableSAX)) 7260 ctxt->sax->characters(ctxt->userData, out, 1); 7261 } else { 7262 if ((hex == 'x') || (hex == 'X')) 7263 snprintf((char *)out, sizeof(out), "#x%X", value); 7264 else 7265 snprintf((char *)out, sizeof(out), "#%d", value); 7266 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7267 (!ctxt->disableSAX)) 7268 ctxt->sax->reference(ctxt->userData, out); 7269 } 7270 } else { 7271 /* 7272 * Just encode the value in UTF-8 7273 */ 7274 COPY_BUF(0 ,out, i, value); 7275 out[i] = 0; 7276 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7277 (!ctxt->disableSAX)) 7278 ctxt->sax->characters(ctxt->userData, out, i); 7279 } 7280 return; 7281 } 7282 7283 /* 7284 * We are seeing an entity reference 7285 */ 7286 ent = xmlParseEntityRef(ctxt); 7287 if (ent == NULL) return; 7288 if (!ctxt->wellFormed) 7289 return; 7290 was_checked = ent->checked; 7291 7292 /* special case of predefined entities */ 7293 if ((ent->name == NULL) || 7294 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7295 val = ent->content; 7296 if (val == NULL) return; 7297 /* 7298 * inline the entity. 7299 */ 7300 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7301 (!ctxt->disableSAX)) 7302 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7303 return; 7304 } 7305 7306 /* 7307 * The first reference to the entity trigger a parsing phase 7308 * where the ent->children is filled with the result from 7309 * the parsing. 7310 * Note: external parsed entities will not be loaded, it is not 7311 * required for a non-validating parser, unless the parsing option 7312 * of validating, or substituting entities were given. Doing so is 7313 * far more secure as the parser will only process data coming from 7314 * the document entity by default. 7315 */ 7316 if (((ent->checked == 0) || 7317 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7318 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7319 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7320 unsigned long oldnbent = ctxt->nbentities; 7321 7322 /* 7323 * This is a bit hackish but this seems the best 7324 * way to make sure both SAX and DOM entity support 7325 * behaves okay. 7326 */ 7327 void *user_data; 7328 if (ctxt->userData == ctxt) 7329 user_data = NULL; 7330 else 7331 user_data = ctxt->userData; 7332 7333 /* 7334 * Check that this entity is well formed 7335 * 4.3.2: An internal general parsed entity is well-formed 7336 * if its replacement text matches the production labeled 7337 * content. 7338 */ 7339 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7340 ctxt->depth++; 7341 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7342 user_data, &list); 7343 ctxt->depth--; 7344 7345 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7346 ctxt->depth++; 7347 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7348 user_data, ctxt->depth, ent->URI, 7349 ent->ExternalID, &list); 7350 ctxt->depth--; 7351 } else { 7352 ret = XML_ERR_ENTITY_PE_INTERNAL; 7353 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7354 "invalid entity type found\n", NULL); 7355 } 7356 7357 /* 7358 * Store the number of entities needing parsing for this entity 7359 * content and do checkings 7360 */ 7361 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7362 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7363 ent->checked |= 1; 7364 if (ret == XML_ERR_ENTITY_LOOP) { 7365 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7366 xmlFreeNodeList(list); 7367 return; 7368 } 7369 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7370 xmlFreeNodeList(list); 7371 return; 7372 } 7373 7374 if ((ret == XML_ERR_OK) && (list != NULL)) { 7375 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7376 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7377 (ent->children == NULL)) { 7378 ent->children = list; 7379 if (ctxt->replaceEntities) { 7380 /* 7381 * Prune it directly in the generated document 7382 * except for single text nodes. 7383 */ 7384 if (((list->type == XML_TEXT_NODE) && 7385 (list->next == NULL)) || 7386 (ctxt->parseMode == XML_PARSE_READER)) { 7387 list->parent = (xmlNodePtr) ent; 7388 list = NULL; 7389 ent->owner = 1; 7390 } else { 7391 ent->owner = 0; 7392 while (list != NULL) { 7393 list->parent = (xmlNodePtr) ctxt->node; 7394 list->doc = ctxt->myDoc; 7395 if (list->next == NULL) 7396 ent->last = list; 7397 list = list->next; 7398 } 7399 list = ent->children; 7400 #ifdef LIBXML_LEGACY_ENABLED 7401 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7402 xmlAddEntityReference(ent, list, NULL); 7403 #endif /* LIBXML_LEGACY_ENABLED */ 7404 } 7405 } else { 7406 ent->owner = 1; 7407 while (list != NULL) { 7408 list->parent = (xmlNodePtr) ent; 7409 xmlSetTreeDoc(list, ent->doc); 7410 if (list->next == NULL) 7411 ent->last = list; 7412 list = list->next; 7413 } 7414 } 7415 } else { 7416 xmlFreeNodeList(list); 7417 list = NULL; 7418 } 7419 } else if ((ret != XML_ERR_OK) && 7420 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7421 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7422 "Entity '%s' failed to parse\n", ent->name); 7423 xmlParserEntityCheck(ctxt, 0, ent, 0); 7424 } else if (list != NULL) { 7425 xmlFreeNodeList(list); 7426 list = NULL; 7427 } 7428 if (ent->checked == 0) 7429 ent->checked = 2; 7430 } else if (ent->checked != 1) { 7431 ctxt->nbentities += ent->checked / 2; 7432 } 7433 7434 /* 7435 * Now that the entity content has been gathered 7436 * provide it to the application, this can take different forms based 7437 * on the parsing modes. 7438 */ 7439 if (ent->children == NULL) { 7440 /* 7441 * Probably running in SAX mode and the callbacks don't 7442 * build the entity content. So unless we already went 7443 * though parsing for first checking go though the entity 7444 * content to generate callbacks associated to the entity 7445 */ 7446 if (was_checked != 0) { 7447 void *user_data; 7448 /* 7449 * This is a bit hackish but this seems the best 7450 * way to make sure both SAX and DOM entity support 7451 * behaves okay. 7452 */ 7453 if (ctxt->userData == ctxt) 7454 user_data = NULL; 7455 else 7456 user_data = ctxt->userData; 7457 7458 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7459 ctxt->depth++; 7460 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7461 ent->content, user_data, NULL); 7462 ctxt->depth--; 7463 } else if (ent->etype == 7464 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7465 ctxt->depth++; 7466 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7467 ctxt->sax, user_data, ctxt->depth, 7468 ent->URI, ent->ExternalID, NULL); 7469 ctxt->depth--; 7470 } else { 7471 ret = XML_ERR_ENTITY_PE_INTERNAL; 7472 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7473 "invalid entity type found\n", NULL); 7474 } 7475 if (ret == XML_ERR_ENTITY_LOOP) { 7476 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7477 return; 7478 } 7479 } 7480 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7481 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7482 /* 7483 * Entity reference callback comes second, it's somewhat 7484 * superfluous but a compatibility to historical behaviour 7485 */ 7486 ctxt->sax->reference(ctxt->userData, ent->name); 7487 } 7488 return; 7489 } 7490 7491 /* 7492 * If we didn't get any children for the entity being built 7493 */ 7494 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7495 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7496 /* 7497 * Create a node. 7498 */ 7499 ctxt->sax->reference(ctxt->userData, ent->name); 7500 return; 7501 } 7502 7503 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7504 /* 7505 * There is a problem on the handling of _private for entities 7506 * (bug 155816): Should we copy the content of the field from 7507 * the entity (possibly overwriting some value set by the user 7508 * when a copy is created), should we leave it alone, or should 7509 * we try to take care of different situations? The problem 7510 * is exacerbated by the usage of this field by the xmlReader. 7511 * To fix this bug, we look at _private on the created node 7512 * and, if it's NULL, we copy in whatever was in the entity. 7513 * If it's not NULL we leave it alone. This is somewhat of a 7514 * hack - maybe we should have further tests to determine 7515 * what to do. 7516 */ 7517 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7518 /* 7519 * Seems we are generating the DOM content, do 7520 * a simple tree copy for all references except the first 7521 * In the first occurrence list contains the replacement. 7522 */ 7523 if (((list == NULL) && (ent->owner == 0)) || 7524 (ctxt->parseMode == XML_PARSE_READER)) { 7525 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7526 7527 /* 7528 * We are copying here, make sure there is no abuse 7529 */ 7530 ctxt->sizeentcopy += ent->length + 5; 7531 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7532 return; 7533 7534 /* 7535 * when operating on a reader, the entities definitions 7536 * are always owning the entities subtree. 7537 if (ctxt->parseMode == XML_PARSE_READER) 7538 ent->owner = 1; 7539 */ 7540 7541 cur = ent->children; 7542 while (cur != NULL) { 7543 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7544 if (nw != NULL) { 7545 if (nw->_private == NULL) 7546 nw->_private = cur->_private; 7547 if (firstChild == NULL){ 7548 firstChild = nw; 7549 } 7550 nw = xmlAddChild(ctxt->node, nw); 7551 } 7552 if (cur == ent->last) { 7553 /* 7554 * needed to detect some strange empty 7555 * node cases in the reader tests 7556 */ 7557 if ((ctxt->parseMode == XML_PARSE_READER) && 7558 (nw != NULL) && 7559 (nw->type == XML_ELEMENT_NODE) && 7560 (nw->children == NULL)) 7561 nw->extra = 1; 7562 7563 break; 7564 } 7565 cur = cur->next; 7566 } 7567 #ifdef LIBXML_LEGACY_ENABLED 7568 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7569 xmlAddEntityReference(ent, firstChild, nw); 7570 #endif /* LIBXML_LEGACY_ENABLED */ 7571 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7572 xmlNodePtr nw = NULL, cur, next, last, 7573 firstChild = NULL; 7574 7575 /* 7576 * We are copying here, make sure there is no abuse 7577 */ 7578 ctxt->sizeentcopy += ent->length + 5; 7579 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7580 return; 7581 7582 /* 7583 * Copy the entity child list and make it the new 7584 * entity child list. The goal is to make sure any 7585 * ID or REF referenced will be the one from the 7586 * document content and not the entity copy. 7587 */ 7588 cur = ent->children; 7589 ent->children = NULL; 7590 last = ent->last; 7591 ent->last = NULL; 7592 while (cur != NULL) { 7593 next = cur->next; 7594 cur->next = NULL; 7595 cur->parent = NULL; 7596 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7597 if (nw != NULL) { 7598 if (nw->_private == NULL) 7599 nw->_private = cur->_private; 7600 if (firstChild == NULL){ 7601 firstChild = cur; 7602 } 7603 xmlAddChild((xmlNodePtr) ent, nw); 7604 xmlAddChild(ctxt->node, cur); 7605 } 7606 if (cur == last) 7607 break; 7608 cur = next; 7609 } 7610 if (ent->owner == 0) 7611 ent->owner = 1; 7612 #ifdef LIBXML_LEGACY_ENABLED 7613 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7614 xmlAddEntityReference(ent, firstChild, nw); 7615 #endif /* LIBXML_LEGACY_ENABLED */ 7616 } else { 7617 const xmlChar *nbktext; 7618 7619 /* 7620 * the name change is to avoid coalescing of the 7621 * node with a possible previous text one which 7622 * would make ent->children a dangling pointer 7623 */ 7624 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7625 -1); 7626 if (ent->children->type == XML_TEXT_NODE) 7627 ent->children->name = nbktext; 7628 if ((ent->last != ent->children) && 7629 (ent->last->type == XML_TEXT_NODE)) 7630 ent->last->name = nbktext; 7631 xmlAddChildList(ctxt->node, ent->children); 7632 } 7633 7634 /* 7635 * This is to avoid a nasty side effect, see 7636 * characters() in SAX.c 7637 */ 7638 ctxt->nodemem = 0; 7639 ctxt->nodelen = 0; 7640 return; 7641 } 7642 } 7643 } 7644 7645 /** 7646 * xmlParseEntityRef: 7647 * @ctxt: an XML parser context 7648 * 7649 * parse ENTITY references declarations 7650 * 7651 * [68] EntityRef ::= '&' Name ';' 7652 * 7653 * [ WFC: Entity Declared ] 7654 * In a document without any DTD, a document with only an internal DTD 7655 * subset which contains no parameter entity references, or a document 7656 * with "standalone='yes'", the Name given in the entity reference 7657 * must match that in an entity declaration, except that well-formed 7658 * documents need not declare any of the following entities: amp, lt, 7659 * gt, apos, quot. The declaration of a parameter entity must precede 7660 * any reference to it. Similarly, the declaration of a general entity 7661 * must precede any reference to it which appears in a default value in an 7662 * attribute-list declaration. Note that if entities are declared in the 7663 * external subset or in external parameter entities, a non-validating 7664 * processor is not obligated to read and process their declarations; 7665 * for such documents, the rule that an entity must be declared is a 7666 * well-formedness constraint only if standalone='yes'. 7667 * 7668 * [ WFC: Parsed Entity ] 7669 * An entity reference must not contain the name of an unparsed entity 7670 * 7671 * Returns the xmlEntityPtr if found, or NULL otherwise. 7672 */ 7673 xmlEntityPtr 7674 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7675 const xmlChar *name; 7676 xmlEntityPtr ent = NULL; 7677 7678 GROW; 7679 if (ctxt->instate == XML_PARSER_EOF) 7680 return(NULL); 7681 7682 if (RAW != '&') 7683 return(NULL); 7684 NEXT; 7685 name = xmlParseName(ctxt); 7686 if (name == NULL) { 7687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7688 "xmlParseEntityRef: no name\n"); 7689 return(NULL); 7690 } 7691 if (RAW != ';') { 7692 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7693 return(NULL); 7694 } 7695 NEXT; 7696 7697 /* 7698 * Predefined entities override any extra definition 7699 */ 7700 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7701 ent = xmlGetPredefinedEntity(name); 7702 if (ent != NULL) 7703 return(ent); 7704 } 7705 7706 /* 7707 * Increase the number of entity references parsed 7708 */ 7709 ctxt->nbentities++; 7710 7711 /* 7712 * Ask first SAX for entity resolution, otherwise try the 7713 * entities which may have stored in the parser context. 7714 */ 7715 if (ctxt->sax != NULL) { 7716 if (ctxt->sax->getEntity != NULL) 7717 ent = ctxt->sax->getEntity(ctxt->userData, name); 7718 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7719 (ctxt->options & XML_PARSE_OLDSAX)) 7720 ent = xmlGetPredefinedEntity(name); 7721 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7722 (ctxt->userData==ctxt)) { 7723 ent = xmlSAX2GetEntity(ctxt, name); 7724 } 7725 } 7726 if (ctxt->instate == XML_PARSER_EOF) 7727 return(NULL); 7728 /* 7729 * [ WFC: Entity Declared ] 7730 * In a document without any DTD, a document with only an 7731 * internal DTD subset which contains no parameter entity 7732 * references, or a document with "standalone='yes'", the 7733 * Name given in the entity reference must match that in an 7734 * entity declaration, except that well-formed documents 7735 * need not declare any of the following entities: amp, lt, 7736 * gt, apos, quot. 7737 * The declaration of a parameter entity must precede any 7738 * reference to it. 7739 * Similarly, the declaration of a general entity must 7740 * precede any reference to it which appears in a default 7741 * value in an attribute-list declaration. Note that if 7742 * entities are declared in the external subset or in 7743 * external parameter entities, a non-validating processor 7744 * is not obligated to read and process their declarations; 7745 * for such documents, the rule that an entity must be 7746 * declared is a well-formedness constraint only if 7747 * standalone='yes'. 7748 */ 7749 if (ent == NULL) { 7750 if ((ctxt->standalone == 1) || 7751 ((ctxt->hasExternalSubset == 0) && 7752 (ctxt->hasPErefs == 0))) { 7753 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7754 "Entity '%s' not defined\n", name); 7755 } else { 7756 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7757 "Entity '%s' not defined\n", name); 7758 if ((ctxt->inSubset == 0) && 7759 (ctxt->sax != NULL) && 7760 (ctxt->sax->reference != NULL)) { 7761 ctxt->sax->reference(ctxt->userData, name); 7762 } 7763 } 7764 xmlParserEntityCheck(ctxt, 0, ent, 0); 7765 ctxt->valid = 0; 7766 } 7767 7768 /* 7769 * [ WFC: Parsed Entity ] 7770 * An entity reference must not contain the name of an 7771 * unparsed entity 7772 */ 7773 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7774 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7775 "Entity reference to unparsed entity %s\n", name); 7776 } 7777 7778 /* 7779 * [ WFC: No External Entity References ] 7780 * Attribute values cannot contain direct or indirect 7781 * entity references to external entities. 7782 */ 7783 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7784 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7785 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7786 "Attribute references external entity '%s'\n", name); 7787 } 7788 /* 7789 * [ WFC: No < in Attribute Values ] 7790 * The replacement text of any entity referred to directly or 7791 * indirectly in an attribute value (other than "<") must 7792 * not contain a <. 7793 */ 7794 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7795 (ent != NULL) && 7796 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7797 if (((ent->checked & 1) || (ent->checked == 0)) && 7798 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7799 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7800 "'<' in entity '%s' is not allowed in attributes values\n", name); 7801 } 7802 } 7803 7804 /* 7805 * Internal check, no parameter entities here ... 7806 */ 7807 else { 7808 switch (ent->etype) { 7809 case XML_INTERNAL_PARAMETER_ENTITY: 7810 case XML_EXTERNAL_PARAMETER_ENTITY: 7811 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7812 "Attempt to reference the parameter entity '%s'\n", 7813 name); 7814 break; 7815 default: 7816 break; 7817 } 7818 } 7819 7820 /* 7821 * [ WFC: No Recursion ] 7822 * A parsed entity must not contain a recursive reference 7823 * to itself, either directly or indirectly. 7824 * Done somewhere else 7825 */ 7826 return(ent); 7827 } 7828 7829 /** 7830 * xmlParseStringEntityRef: 7831 * @ctxt: an XML parser context 7832 * @str: a pointer to an index in the string 7833 * 7834 * parse ENTITY references declarations, but this version parses it from 7835 * a string value. 7836 * 7837 * [68] EntityRef ::= '&' Name ';' 7838 * 7839 * [ WFC: Entity Declared ] 7840 * In a document without any DTD, a document with only an internal DTD 7841 * subset which contains no parameter entity references, or a document 7842 * with "standalone='yes'", the Name given in the entity reference 7843 * must match that in an entity declaration, except that well-formed 7844 * documents need not declare any of the following entities: amp, lt, 7845 * gt, apos, quot. The declaration of a parameter entity must precede 7846 * any reference to it. Similarly, the declaration of a general entity 7847 * must precede any reference to it which appears in a default value in an 7848 * attribute-list declaration. Note that if entities are declared in the 7849 * external subset or in external parameter entities, a non-validating 7850 * processor is not obligated to read and process their declarations; 7851 * for such documents, the rule that an entity must be declared is a 7852 * well-formedness constraint only if standalone='yes'. 7853 * 7854 * [ WFC: Parsed Entity ] 7855 * An entity reference must not contain the name of an unparsed entity 7856 * 7857 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7858 * is updated to the current location in the string. 7859 */ 7860 static xmlEntityPtr 7861 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7862 xmlChar *name; 7863 const xmlChar *ptr; 7864 xmlChar cur; 7865 xmlEntityPtr ent = NULL; 7866 7867 if ((str == NULL) || (*str == NULL)) 7868 return(NULL); 7869 ptr = *str; 7870 cur = *ptr; 7871 if (cur != '&') 7872 return(NULL); 7873 7874 ptr++; 7875 name = xmlParseStringName(ctxt, &ptr); 7876 if (name == NULL) { 7877 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7878 "xmlParseStringEntityRef: no name\n"); 7879 *str = ptr; 7880 return(NULL); 7881 } 7882 if (*ptr != ';') { 7883 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7884 xmlFree(name); 7885 *str = ptr; 7886 return(NULL); 7887 } 7888 ptr++; 7889 7890 7891 /* 7892 * Predefined entities override any extra definition 7893 */ 7894 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7895 ent = xmlGetPredefinedEntity(name); 7896 if (ent != NULL) { 7897 xmlFree(name); 7898 *str = ptr; 7899 return(ent); 7900 } 7901 } 7902 7903 /* 7904 * Increate the number of entity references parsed 7905 */ 7906 ctxt->nbentities++; 7907 7908 /* 7909 * Ask first SAX for entity resolution, otherwise try the 7910 * entities which may have stored in the parser context. 7911 */ 7912 if (ctxt->sax != NULL) { 7913 if (ctxt->sax->getEntity != NULL) 7914 ent = ctxt->sax->getEntity(ctxt->userData, name); 7915 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7916 ent = xmlGetPredefinedEntity(name); 7917 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7918 ent = xmlSAX2GetEntity(ctxt, name); 7919 } 7920 } 7921 if (ctxt->instate == XML_PARSER_EOF) { 7922 xmlFree(name); 7923 return(NULL); 7924 } 7925 7926 /* 7927 * [ WFC: Entity Declared ] 7928 * In a document without any DTD, a document with only an 7929 * internal DTD subset which contains no parameter entity 7930 * references, or a document with "standalone='yes'", the 7931 * Name given in the entity reference must match that in an 7932 * entity declaration, except that well-formed documents 7933 * need not declare any of the following entities: amp, lt, 7934 * gt, apos, quot. 7935 * The declaration of a parameter entity must precede any 7936 * reference to it. 7937 * Similarly, the declaration of a general entity must 7938 * precede any reference to it which appears in a default 7939 * value in an attribute-list declaration. Note that if 7940 * entities are declared in the external subset or in 7941 * external parameter entities, a non-validating processor 7942 * is not obligated to read and process their declarations; 7943 * for such documents, the rule that an entity must be 7944 * declared is a well-formedness constraint only if 7945 * standalone='yes'. 7946 */ 7947 if (ent == NULL) { 7948 if ((ctxt->standalone == 1) || 7949 ((ctxt->hasExternalSubset == 0) && 7950 (ctxt->hasPErefs == 0))) { 7951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7952 "Entity '%s' not defined\n", name); 7953 } else { 7954 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7955 "Entity '%s' not defined\n", 7956 name); 7957 } 7958 xmlParserEntityCheck(ctxt, 0, ent, 0); 7959 /* TODO ? check regressions ctxt->valid = 0; */ 7960 } 7961 7962 /* 7963 * [ WFC: Parsed Entity ] 7964 * An entity reference must not contain the name of an 7965 * unparsed entity 7966 */ 7967 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7969 "Entity reference to unparsed entity %s\n", name); 7970 } 7971 7972 /* 7973 * [ WFC: No External Entity References ] 7974 * Attribute values cannot contain direct or indirect 7975 * entity references to external entities. 7976 */ 7977 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7978 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7979 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7980 "Attribute references external entity '%s'\n", name); 7981 } 7982 /* 7983 * [ WFC: No < in Attribute Values ] 7984 * The replacement text of any entity referred to directly or 7985 * indirectly in an attribute value (other than "<") must 7986 * not contain a <. 7987 */ 7988 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7989 (ent != NULL) && (ent->content != NULL) && 7990 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7991 (xmlStrchr(ent->content, '<'))) { 7992 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7993 "'<' in entity '%s' is not allowed in attributes values\n", 7994 name); 7995 } 7996 7997 /* 7998 * Internal check, no parameter entities here ... 7999 */ 8000 else { 8001 switch (ent->etype) { 8002 case XML_INTERNAL_PARAMETER_ENTITY: 8003 case XML_EXTERNAL_PARAMETER_ENTITY: 8004 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 8005 "Attempt to reference the parameter entity '%s'\n", 8006 name); 8007 break; 8008 default: 8009 break; 8010 } 8011 } 8012 8013 /* 8014 * [ WFC: No Recursion ] 8015 * A parsed entity must not contain a recursive reference 8016 * to itself, either directly or indirectly. 8017 * Done somewhere else 8018 */ 8019 8020 xmlFree(name); 8021 *str = ptr; 8022 return(ent); 8023 } 8024 8025 /** 8026 * xmlParsePEReference: 8027 * @ctxt: an XML parser context 8028 * 8029 * parse PEReference declarations 8030 * The entity content is handled directly by pushing it's content as 8031 * a new input stream. 8032 * 8033 * [69] PEReference ::= '%' Name ';' 8034 * 8035 * [ WFC: No Recursion ] 8036 * A parsed entity must not contain a recursive 8037 * reference to itself, either directly or indirectly. 8038 * 8039 * [ WFC: Entity Declared ] 8040 * In a document without any DTD, a document with only an internal DTD 8041 * subset which contains no parameter entity references, or a document 8042 * with "standalone='yes'", ... ... The declaration of a parameter 8043 * entity must precede any reference to it... 8044 * 8045 * [ VC: Entity Declared ] 8046 * In a document with an external subset or external parameter entities 8047 * with "standalone='no'", ... ... The declaration of a parameter entity 8048 * must precede any reference to it... 8049 * 8050 * [ WFC: In DTD ] 8051 * Parameter-entity references may only appear in the DTD. 8052 * NOTE: misleading but this is handled. 8053 */ 8054 void 8055 xmlParsePEReference(xmlParserCtxtPtr ctxt) 8056 { 8057 const xmlChar *name; 8058 xmlEntityPtr entity = NULL; 8059 xmlParserInputPtr input; 8060 8061 if (RAW != '%') 8062 return; 8063 NEXT; 8064 name = xmlParseName(ctxt); 8065 if (name == NULL) { 8066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8067 "xmlParsePEReference: no name\n"); 8068 return; 8069 } 8070 if (RAW != ';') { 8071 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8072 return; 8073 } 8074 8075 NEXT; 8076 8077 /* 8078 * Increate the number of entity references parsed 8079 */ 8080 ctxt->nbentities++; 8081 8082 /* 8083 * Request the entity from SAX 8084 */ 8085 if ((ctxt->sax != NULL) && 8086 (ctxt->sax->getParameterEntity != NULL)) 8087 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8088 if (ctxt->instate == XML_PARSER_EOF) 8089 return; 8090 if (entity == NULL) { 8091 /* 8092 * [ WFC: Entity Declared ] 8093 * In a document without any DTD, a document with only an 8094 * internal DTD subset which contains no parameter entity 8095 * references, or a document with "standalone='yes'", ... 8096 * ... The declaration of a parameter entity must precede 8097 * any reference to it... 8098 */ 8099 if ((ctxt->standalone == 1) || 8100 ((ctxt->hasExternalSubset == 0) && 8101 (ctxt->hasPErefs == 0))) { 8102 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8103 "PEReference: %%%s; not found\n", 8104 name); 8105 } else { 8106 /* 8107 * [ VC: Entity Declared ] 8108 * In a document with an external subset or external 8109 * parameter entities with "standalone='no'", ... 8110 * ... The declaration of a parameter entity must 8111 * precede any reference to it... 8112 */ 8113 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8114 "PEReference: %%%s; not found\n", 8115 name, NULL); 8116 ctxt->valid = 0; 8117 } 8118 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8119 } else { 8120 /* 8121 * Internal checking in case the entity quest barfed 8122 */ 8123 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8124 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8125 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8126 "Internal: %%%s; is not a parameter entity\n", 8127 name, NULL); 8128 } else if (ctxt->input->free != deallocblankswrapper) { 8129 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8130 if (xmlPushInput(ctxt, input) < 0) 8131 return; 8132 } else { 8133 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8134 ((ctxt->options & XML_PARSE_NOENT) == 0) && 8135 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 8136 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 8137 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 8138 (ctxt->replaceEntities == 0) && 8139 (ctxt->validate == 0)) 8140 return; 8141 /* 8142 * TODO !!! 8143 * handle the extra spaces added before and after 8144 * c.f. http://www.w3.org/TR/REC-xml#as-PE 8145 */ 8146 input = xmlNewEntityInputStream(ctxt, entity); 8147 if (xmlPushInput(ctxt, input) < 0) 8148 return; 8149 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8150 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8151 (IS_BLANK_CH(NXT(5)))) { 8152 xmlParseTextDecl(ctxt); 8153 if (ctxt->errNo == 8154 XML_ERR_UNSUPPORTED_ENCODING) { 8155 /* 8156 * The XML REC instructs us to stop parsing 8157 * right here 8158 */ 8159 xmlHaltParser(ctxt); 8160 return; 8161 } 8162 } 8163 } 8164 } 8165 ctxt->hasPErefs = 1; 8166 } 8167 8168 /** 8169 * xmlLoadEntityContent: 8170 * @ctxt: an XML parser context 8171 * @entity: an unloaded system entity 8172 * 8173 * Load the original content of the given system entity from the 8174 * ExternalID/SystemID given. This is to be used for Included in Literal 8175 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8176 * 8177 * Returns 0 in case of success and -1 in case of failure 8178 */ 8179 static int 8180 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8181 xmlParserInputPtr input; 8182 xmlBufferPtr buf; 8183 int l, c; 8184 int count = 0; 8185 8186 if ((ctxt == NULL) || (entity == NULL) || 8187 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8188 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8189 (entity->content != NULL)) { 8190 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8191 "xmlLoadEntityContent parameter error"); 8192 return(-1); 8193 } 8194 8195 if (xmlParserDebugEntities) 8196 xmlGenericError(xmlGenericErrorContext, 8197 "Reading %s entity content input\n", entity->name); 8198 8199 buf = xmlBufferCreate(); 8200 if (buf == NULL) { 8201 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8202 "xmlLoadEntityContent parameter error"); 8203 return(-1); 8204 } 8205 8206 input = xmlNewEntityInputStream(ctxt, entity); 8207 if (input == NULL) { 8208 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8209 "xmlLoadEntityContent input error"); 8210 xmlBufferFree(buf); 8211 return(-1); 8212 } 8213 8214 /* 8215 * Push the entity as the current input, read char by char 8216 * saving to the buffer until the end of the entity or an error 8217 */ 8218 if (xmlPushInput(ctxt, input) < 0) { 8219 xmlBufferFree(buf); 8220 return(-1); 8221 } 8222 8223 GROW; 8224 c = CUR_CHAR(l); 8225 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8226 (IS_CHAR(c))) { 8227 xmlBufferAdd(buf, ctxt->input->cur, l); 8228 if (count++ > XML_PARSER_CHUNK_SIZE) { 8229 count = 0; 8230 GROW; 8231 if (ctxt->instate == XML_PARSER_EOF) { 8232 xmlBufferFree(buf); 8233 return(-1); 8234 } 8235 } 8236 NEXTL(l); 8237 c = CUR_CHAR(l); 8238 if (c == 0) { 8239 count = 0; 8240 GROW; 8241 if (ctxt->instate == XML_PARSER_EOF) { 8242 xmlBufferFree(buf); 8243 return(-1); 8244 } 8245 c = CUR_CHAR(l); 8246 } 8247 } 8248 8249 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8250 xmlPopInput(ctxt); 8251 } else if (!IS_CHAR(c)) { 8252 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8253 "xmlLoadEntityContent: invalid char value %d\n", 8254 c); 8255 xmlBufferFree(buf); 8256 return(-1); 8257 } 8258 entity->content = buf->content; 8259 buf->content = NULL; 8260 xmlBufferFree(buf); 8261 8262 return(0); 8263 } 8264 8265 /** 8266 * xmlParseStringPEReference: 8267 * @ctxt: an XML parser context 8268 * @str: a pointer to an index in the string 8269 * 8270 * parse PEReference declarations 8271 * 8272 * [69] PEReference ::= '%' Name ';' 8273 * 8274 * [ WFC: No Recursion ] 8275 * A parsed entity must not contain a recursive 8276 * reference to itself, either directly or indirectly. 8277 * 8278 * [ WFC: Entity Declared ] 8279 * In a document without any DTD, a document with only an internal DTD 8280 * subset which contains no parameter entity references, or a document 8281 * with "standalone='yes'", ... ... The declaration of a parameter 8282 * entity must precede any reference to it... 8283 * 8284 * [ VC: Entity Declared ] 8285 * In a document with an external subset or external parameter entities 8286 * with "standalone='no'", ... ... The declaration of a parameter entity 8287 * must precede any reference to it... 8288 * 8289 * [ WFC: In DTD ] 8290 * Parameter-entity references may only appear in the DTD. 8291 * NOTE: misleading but this is handled. 8292 * 8293 * Returns the string of the entity content. 8294 * str is updated to the current value of the index 8295 */ 8296 static xmlEntityPtr 8297 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8298 const xmlChar *ptr; 8299 xmlChar cur; 8300 xmlChar *name; 8301 xmlEntityPtr entity = NULL; 8302 8303 if ((str == NULL) || (*str == NULL)) return(NULL); 8304 ptr = *str; 8305 cur = *ptr; 8306 if (cur != '%') 8307 return(NULL); 8308 ptr++; 8309 name = xmlParseStringName(ctxt, &ptr); 8310 if (name == NULL) { 8311 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8312 "xmlParseStringPEReference: no name\n"); 8313 *str = ptr; 8314 return(NULL); 8315 } 8316 cur = *ptr; 8317 if (cur != ';') { 8318 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8319 xmlFree(name); 8320 *str = ptr; 8321 return(NULL); 8322 } 8323 ptr++; 8324 8325 /* 8326 * Increate the number of entity references parsed 8327 */ 8328 ctxt->nbentities++; 8329 8330 /* 8331 * Request the entity from SAX 8332 */ 8333 if ((ctxt->sax != NULL) && 8334 (ctxt->sax->getParameterEntity != NULL)) 8335 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8336 if (ctxt->instate == XML_PARSER_EOF) { 8337 xmlFree(name); 8338 return(NULL); 8339 } 8340 if (entity == NULL) { 8341 /* 8342 * [ WFC: Entity Declared ] 8343 * In a document without any DTD, a document with only an 8344 * internal DTD subset which contains no parameter entity 8345 * references, or a document with "standalone='yes'", ... 8346 * ... The declaration of a parameter entity must precede 8347 * any reference to it... 8348 */ 8349 if ((ctxt->standalone == 1) || 8350 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8351 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8352 "PEReference: %%%s; not found\n", name); 8353 } else { 8354 /* 8355 * [ VC: Entity Declared ] 8356 * In a document with an external subset or external 8357 * parameter entities with "standalone='no'", ... 8358 * ... The declaration of a parameter entity must 8359 * precede any reference to it... 8360 */ 8361 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8362 "PEReference: %%%s; not found\n", 8363 name, NULL); 8364 ctxt->valid = 0; 8365 } 8366 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8367 } else { 8368 /* 8369 * Internal checking in case the entity quest barfed 8370 */ 8371 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8372 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8373 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8374 "%%%s; is not a parameter entity\n", 8375 name, NULL); 8376 } 8377 } 8378 ctxt->hasPErefs = 1; 8379 xmlFree(name); 8380 *str = ptr; 8381 return(entity); 8382 } 8383 8384 /** 8385 * xmlParseDocTypeDecl: 8386 * @ctxt: an XML parser context 8387 * 8388 * parse a DOCTYPE declaration 8389 * 8390 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8391 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8392 * 8393 * [ VC: Root Element Type ] 8394 * The Name in the document type declaration must match the element 8395 * type of the root element. 8396 */ 8397 8398 void 8399 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8400 const xmlChar *name = NULL; 8401 xmlChar *ExternalID = NULL; 8402 xmlChar *URI = NULL; 8403 8404 /* 8405 * We know that '<!DOCTYPE' has been detected. 8406 */ 8407 SKIP(9); 8408 8409 SKIP_BLANKS; 8410 8411 /* 8412 * Parse the DOCTYPE name. 8413 */ 8414 name = xmlParseName(ctxt); 8415 if (name == NULL) { 8416 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8417 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8418 } 8419 ctxt->intSubName = name; 8420 8421 SKIP_BLANKS; 8422 8423 /* 8424 * Check for SystemID and ExternalID 8425 */ 8426 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8427 8428 if ((URI != NULL) || (ExternalID != NULL)) { 8429 ctxt->hasExternalSubset = 1; 8430 } 8431 ctxt->extSubURI = URI; 8432 ctxt->extSubSystem = ExternalID; 8433 8434 SKIP_BLANKS; 8435 8436 /* 8437 * Create and update the internal subset. 8438 */ 8439 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8440 (!ctxt->disableSAX)) 8441 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8442 if (ctxt->instate == XML_PARSER_EOF) 8443 return; 8444 8445 /* 8446 * Is there any internal subset declarations ? 8447 * they are handled separately in xmlParseInternalSubset() 8448 */ 8449 if (RAW == '[') 8450 return; 8451 8452 /* 8453 * We should be at the end of the DOCTYPE declaration. 8454 */ 8455 if (RAW != '>') { 8456 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8457 } 8458 NEXT; 8459 } 8460 8461 /** 8462 * xmlParseInternalSubset: 8463 * @ctxt: an XML parser context 8464 * 8465 * parse the internal subset declaration 8466 * 8467 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8468 */ 8469 8470 static void 8471 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8472 /* 8473 * Is there any DTD definition ? 8474 */ 8475 if (RAW == '[') { 8476 ctxt->instate = XML_PARSER_DTD; 8477 NEXT; 8478 /* 8479 * Parse the succession of Markup declarations and 8480 * PEReferences. 8481 * Subsequence (markupdecl | PEReference | S)* 8482 */ 8483 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8484 const xmlChar *check = CUR_PTR; 8485 unsigned int cons = ctxt->input->consumed; 8486 8487 SKIP_BLANKS; 8488 xmlParseMarkupDecl(ctxt); 8489 xmlParsePEReference(ctxt); 8490 8491 /* 8492 * Pop-up of finished entities. 8493 */ 8494 while ((RAW == 0) && (ctxt->inputNr > 1)) 8495 xmlPopInput(ctxt); 8496 8497 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8499 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8500 break; 8501 } 8502 } 8503 if (RAW == ']') { 8504 NEXT; 8505 SKIP_BLANKS; 8506 } 8507 } 8508 8509 /* 8510 * We should be at the end of the DOCTYPE declaration. 8511 */ 8512 if (RAW != '>') { 8513 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8514 return; 8515 } 8516 NEXT; 8517 } 8518 8519 #ifdef LIBXML_SAX1_ENABLED 8520 /** 8521 * xmlParseAttribute: 8522 * @ctxt: an XML parser context 8523 * @value: a xmlChar ** used to store the value of the attribute 8524 * 8525 * parse an attribute 8526 * 8527 * [41] Attribute ::= Name Eq AttValue 8528 * 8529 * [ WFC: No External Entity References ] 8530 * Attribute values cannot contain direct or indirect entity references 8531 * to external entities. 8532 * 8533 * [ WFC: No < in Attribute Values ] 8534 * The replacement text of any entity referred to directly or indirectly in 8535 * an attribute value (other than "<") must not contain a <. 8536 * 8537 * [ VC: Attribute Value Type ] 8538 * The attribute must have been declared; the value must be of the type 8539 * declared for it. 8540 * 8541 * [25] Eq ::= S? '=' S? 8542 * 8543 * With namespace: 8544 * 8545 * [NS 11] Attribute ::= QName Eq AttValue 8546 * 8547 * Also the case QName == xmlns:??? is handled independently as a namespace 8548 * definition. 8549 * 8550 * Returns the attribute name, and the value in *value. 8551 */ 8552 8553 const xmlChar * 8554 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8555 const xmlChar *name; 8556 xmlChar *val; 8557 8558 *value = NULL; 8559 GROW; 8560 name = xmlParseName(ctxt); 8561 if (name == NULL) { 8562 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8563 "error parsing attribute name\n"); 8564 return(NULL); 8565 } 8566 8567 /* 8568 * read the value 8569 */ 8570 SKIP_BLANKS; 8571 if (RAW == '=') { 8572 NEXT; 8573 SKIP_BLANKS; 8574 val = xmlParseAttValue(ctxt); 8575 ctxt->instate = XML_PARSER_CONTENT; 8576 } else { 8577 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8578 "Specification mandate value for attribute %s\n", name); 8579 return(NULL); 8580 } 8581 8582 /* 8583 * Check that xml:lang conforms to the specification 8584 * No more registered as an error, just generate a warning now 8585 * since this was deprecated in XML second edition 8586 */ 8587 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8588 if (!xmlCheckLanguageID(val)) { 8589 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8590 "Malformed value for xml:lang : %s\n", 8591 val, NULL); 8592 } 8593 } 8594 8595 /* 8596 * Check that xml:space conforms to the specification 8597 */ 8598 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8599 if (xmlStrEqual(val, BAD_CAST "default")) 8600 *(ctxt->space) = 0; 8601 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8602 *(ctxt->space) = 1; 8603 else { 8604 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8605 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8606 val, NULL); 8607 } 8608 } 8609 8610 *value = val; 8611 return(name); 8612 } 8613 8614 /** 8615 * xmlParseStartTag: 8616 * @ctxt: an XML parser context 8617 * 8618 * parse a start of tag either for rule element or 8619 * EmptyElement. In both case we don't parse the tag closing chars. 8620 * 8621 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8622 * 8623 * [ WFC: Unique Att Spec ] 8624 * No attribute name may appear more than once in the same start-tag or 8625 * empty-element tag. 8626 * 8627 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8628 * 8629 * [ WFC: Unique Att Spec ] 8630 * No attribute name may appear more than once in the same start-tag or 8631 * empty-element tag. 8632 * 8633 * With namespace: 8634 * 8635 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8636 * 8637 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8638 * 8639 * Returns the element name parsed 8640 */ 8641 8642 const xmlChar * 8643 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8644 const xmlChar *name; 8645 const xmlChar *attname; 8646 xmlChar *attvalue; 8647 const xmlChar **atts = ctxt->atts; 8648 int nbatts = 0; 8649 int maxatts = ctxt->maxatts; 8650 int i; 8651 8652 if (RAW != '<') return(NULL); 8653 NEXT1; 8654 8655 name = xmlParseName(ctxt); 8656 if (name == NULL) { 8657 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8658 "xmlParseStartTag: invalid element name\n"); 8659 return(NULL); 8660 } 8661 8662 /* 8663 * Now parse the attributes, it ends up with the ending 8664 * 8665 * (S Attribute)* S? 8666 */ 8667 SKIP_BLANKS; 8668 GROW; 8669 8670 while (((RAW != '>') && 8671 ((RAW != '/') || (NXT(1) != '>')) && 8672 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8673 const xmlChar *q = CUR_PTR; 8674 unsigned int cons = ctxt->input->consumed; 8675 8676 attname = xmlParseAttribute(ctxt, &attvalue); 8677 if ((attname != NULL) && (attvalue != NULL)) { 8678 /* 8679 * [ WFC: Unique Att Spec ] 8680 * No attribute name may appear more than once in the same 8681 * start-tag or empty-element tag. 8682 */ 8683 for (i = 0; i < nbatts;i += 2) { 8684 if (xmlStrEqual(atts[i], attname)) { 8685 xmlErrAttributeDup(ctxt, NULL, attname); 8686 xmlFree(attvalue); 8687 goto failed; 8688 } 8689 } 8690 /* 8691 * Add the pair to atts 8692 */ 8693 if (atts == NULL) { 8694 maxatts = 22; /* allow for 10 attrs by default */ 8695 atts = (const xmlChar **) 8696 xmlMalloc(maxatts * sizeof(xmlChar *)); 8697 if (atts == NULL) { 8698 xmlErrMemory(ctxt, NULL); 8699 if (attvalue != NULL) 8700 xmlFree(attvalue); 8701 goto failed; 8702 } 8703 ctxt->atts = atts; 8704 ctxt->maxatts = maxatts; 8705 } else if (nbatts + 4 > maxatts) { 8706 const xmlChar **n; 8707 8708 maxatts *= 2; 8709 n = (const xmlChar **) xmlRealloc((void *) atts, 8710 maxatts * sizeof(const xmlChar *)); 8711 if (n == NULL) { 8712 xmlErrMemory(ctxt, NULL); 8713 if (attvalue != NULL) 8714 xmlFree(attvalue); 8715 goto failed; 8716 } 8717 atts = n; 8718 ctxt->atts = atts; 8719 ctxt->maxatts = maxatts; 8720 } 8721 atts[nbatts++] = attname; 8722 atts[nbatts++] = attvalue; 8723 atts[nbatts] = NULL; 8724 atts[nbatts + 1] = NULL; 8725 } else { 8726 if (attvalue != NULL) 8727 xmlFree(attvalue); 8728 } 8729 8730 failed: 8731 8732 GROW 8733 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8734 break; 8735 if (!IS_BLANK_CH(RAW)) { 8736 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8737 "attributes construct error\n"); 8738 } 8739 SKIP_BLANKS; 8740 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8741 (attname == NULL) && (attvalue == NULL)) { 8742 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8743 "xmlParseStartTag: problem parsing attributes\n"); 8744 break; 8745 } 8746 SHRINK; 8747 GROW; 8748 } 8749 8750 /* 8751 * SAX: Start of Element ! 8752 */ 8753 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8754 (!ctxt->disableSAX)) { 8755 if (nbatts > 0) 8756 ctxt->sax->startElement(ctxt->userData, name, atts); 8757 else 8758 ctxt->sax->startElement(ctxt->userData, name, NULL); 8759 } 8760 8761 if (atts != NULL) { 8762 /* Free only the content strings */ 8763 for (i = 1;i < nbatts;i+=2) 8764 if (atts[i] != NULL) 8765 xmlFree((xmlChar *) atts[i]); 8766 } 8767 return(name); 8768 } 8769 8770 /** 8771 * xmlParseEndTag1: 8772 * @ctxt: an XML parser context 8773 * @line: line of the start tag 8774 * @nsNr: number of namespaces on the start tag 8775 * 8776 * parse an end of tag 8777 * 8778 * [42] ETag ::= '</' Name S? '>' 8779 * 8780 * With namespace 8781 * 8782 * [NS 9] ETag ::= '</' QName S? '>' 8783 */ 8784 8785 static void 8786 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8787 const xmlChar *name; 8788 8789 GROW; 8790 if ((RAW != '<') || (NXT(1) != '/')) { 8791 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8792 "xmlParseEndTag: '</' not found\n"); 8793 return; 8794 } 8795 SKIP(2); 8796 8797 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8798 8799 /* 8800 * We should definitely be at the ending "S? '>'" part 8801 */ 8802 GROW; 8803 SKIP_BLANKS; 8804 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8805 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8806 } else 8807 NEXT1; 8808 8809 /* 8810 * [ WFC: Element Type Match ] 8811 * The Name in an element's end-tag must match the element type in the 8812 * start-tag. 8813 * 8814 */ 8815 if (name != (xmlChar*)1) { 8816 if (name == NULL) name = BAD_CAST "unparseable"; 8817 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8818 "Opening and ending tag mismatch: %s line %d and %s\n", 8819 ctxt->name, line, name); 8820 } 8821 8822 /* 8823 * SAX: End of Tag 8824 */ 8825 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8826 (!ctxt->disableSAX)) 8827 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8828 8829 namePop(ctxt); 8830 spacePop(ctxt); 8831 return; 8832 } 8833 8834 /** 8835 * xmlParseEndTag: 8836 * @ctxt: an XML parser context 8837 * 8838 * parse an end of tag 8839 * 8840 * [42] ETag ::= '</' Name S? '>' 8841 * 8842 * With namespace 8843 * 8844 * [NS 9] ETag ::= '</' QName S? '>' 8845 */ 8846 8847 void 8848 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8849 xmlParseEndTag1(ctxt, 0); 8850 } 8851 #endif /* LIBXML_SAX1_ENABLED */ 8852 8853 /************************************************************************ 8854 * * 8855 * SAX 2 specific operations * 8856 * * 8857 ************************************************************************/ 8858 8859 /* 8860 * xmlGetNamespace: 8861 * @ctxt: an XML parser context 8862 * @prefix: the prefix to lookup 8863 * 8864 * Lookup the namespace name for the @prefix (which ca be NULL) 8865 * The prefix must come from the @ctxt->dict dictionary 8866 * 8867 * Returns the namespace name or NULL if not bound 8868 */ 8869 static const xmlChar * 8870 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8871 int i; 8872 8873 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8874 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8875 if (ctxt->nsTab[i] == prefix) { 8876 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8877 return(NULL); 8878 return(ctxt->nsTab[i + 1]); 8879 } 8880 return(NULL); 8881 } 8882 8883 /** 8884 * xmlParseQName: 8885 * @ctxt: an XML parser context 8886 * @prefix: pointer to store the prefix part 8887 * 8888 * parse an XML Namespace QName 8889 * 8890 * [6] QName ::= (Prefix ':')? LocalPart 8891 * [7] Prefix ::= NCName 8892 * [8] LocalPart ::= NCName 8893 * 8894 * Returns the Name parsed or NULL 8895 */ 8896 8897 static const xmlChar * 8898 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8899 const xmlChar *l, *p; 8900 8901 GROW; 8902 8903 l = xmlParseNCName(ctxt); 8904 if (l == NULL) { 8905 if (CUR == ':') { 8906 l = xmlParseName(ctxt); 8907 if (l != NULL) { 8908 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8909 "Failed to parse QName '%s'\n", l, NULL, NULL); 8910 *prefix = NULL; 8911 return(l); 8912 } 8913 } 8914 return(NULL); 8915 } 8916 if (CUR == ':') { 8917 NEXT; 8918 p = l; 8919 l = xmlParseNCName(ctxt); 8920 if (l == NULL) { 8921 xmlChar *tmp; 8922 8923 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8924 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8925 l = xmlParseNmtoken(ctxt); 8926 if (l == NULL) 8927 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8928 else { 8929 tmp = xmlBuildQName(l, p, NULL, 0); 8930 xmlFree((char *)l); 8931 } 8932 p = xmlDictLookup(ctxt->dict, tmp, -1); 8933 if (tmp != NULL) xmlFree(tmp); 8934 *prefix = NULL; 8935 return(p); 8936 } 8937 if (CUR == ':') { 8938 xmlChar *tmp; 8939 8940 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8941 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8942 NEXT; 8943 tmp = (xmlChar *) xmlParseName(ctxt); 8944 if (tmp != NULL) { 8945 tmp = xmlBuildQName(tmp, l, NULL, 0); 8946 l = xmlDictLookup(ctxt->dict, tmp, -1); 8947 if (tmp != NULL) xmlFree(tmp); 8948 *prefix = p; 8949 return(l); 8950 } 8951 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8952 l = xmlDictLookup(ctxt->dict, tmp, -1); 8953 if (tmp != NULL) xmlFree(tmp); 8954 *prefix = p; 8955 return(l); 8956 } 8957 *prefix = p; 8958 } else 8959 *prefix = NULL; 8960 return(l); 8961 } 8962 8963 /** 8964 * xmlParseQNameAndCompare: 8965 * @ctxt: an XML parser context 8966 * @name: the localname 8967 * @prefix: the prefix, if any. 8968 * 8969 * parse an XML name and compares for match 8970 * (specialized for endtag parsing) 8971 * 8972 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8973 * and the name for mismatch 8974 */ 8975 8976 static const xmlChar * 8977 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8978 xmlChar const *prefix) { 8979 const xmlChar *cmp; 8980 const xmlChar *in; 8981 const xmlChar *ret; 8982 const xmlChar *prefix2; 8983 8984 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8985 8986 GROW; 8987 in = ctxt->input->cur; 8988 8989 cmp = prefix; 8990 while (*in != 0 && *in == *cmp) { 8991 ++in; 8992 ++cmp; 8993 } 8994 if ((*cmp == 0) && (*in == ':')) { 8995 in++; 8996 cmp = name; 8997 while (*in != 0 && *in == *cmp) { 8998 ++in; 8999 ++cmp; 9000 } 9001 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 9002 /* success */ 9003 ctxt->input->cur = in; 9004 return((const xmlChar*) 1); 9005 } 9006 } 9007 /* 9008 * all strings coms from the dictionary, equality can be done directly 9009 */ 9010 ret = xmlParseQName (ctxt, &prefix2); 9011 if ((ret == name) && (prefix == prefix2)) 9012 return((const xmlChar*) 1); 9013 return ret; 9014 } 9015 9016 /** 9017 * xmlParseAttValueInternal: 9018 * @ctxt: an XML parser context 9019 * @len: attribute len result 9020 * @alloc: whether the attribute was reallocated as a new string 9021 * @normalize: if 1 then further non-CDATA normalization must be done 9022 * 9023 * parse a value for an attribute. 9024 * NOTE: if no normalization is needed, the routine will return pointers 9025 * directly from the data buffer. 9026 * 9027 * 3.3.3 Attribute-Value Normalization: 9028 * Before the value of an attribute is passed to the application or 9029 * checked for validity, the XML processor must normalize it as follows: 9030 * - a character reference is processed by appending the referenced 9031 * character to the attribute value 9032 * - an entity reference is processed by recursively processing the 9033 * replacement text of the entity 9034 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 9035 * appending #x20 to the normalized value, except that only a single 9036 * #x20 is appended for a "#xD#xA" sequence that is part of an external 9037 * parsed entity or the literal entity value of an internal parsed entity 9038 * - other characters are processed by appending them to the normalized value 9039 * If the declared value is not CDATA, then the XML processor must further 9040 * process the normalized attribute value by discarding any leading and 9041 * trailing space (#x20) characters, and by replacing sequences of space 9042 * (#x20) characters by a single space (#x20) character. 9043 * All attributes for which no declaration has been read should be treated 9044 * by a non-validating parser as if declared CDATA. 9045 * 9046 * Returns the AttValue parsed or NULL. The value has to be freed by the 9047 * caller if it was copied, this can be detected by val[*len] == 0. 9048 */ 9049 9050 static xmlChar * 9051 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 9052 int normalize) 9053 { 9054 xmlChar limit = 0; 9055 const xmlChar *in = NULL, *start, *end, *last; 9056 xmlChar *ret = NULL; 9057 int line, col; 9058 9059 GROW; 9060 in = (xmlChar *) CUR_PTR; 9061 line = ctxt->input->line; 9062 col = ctxt->input->col; 9063 if (*in != '"' && *in != '\'') { 9064 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 9065 return (NULL); 9066 } 9067 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 9068 9069 /* 9070 * try to handle in this routine the most common case where no 9071 * allocation of a new string is required and where content is 9072 * pure ASCII. 9073 */ 9074 limit = *in++; 9075 col++; 9076 end = ctxt->input->end; 9077 start = in; 9078 if (in >= end) { 9079 const xmlChar *oldbase = ctxt->input->base; 9080 GROW; 9081 if (oldbase != ctxt->input->base) { 9082 long delta = ctxt->input->base - oldbase; 9083 start = start + delta; 9084 in = in + delta; 9085 } 9086 end = ctxt->input->end; 9087 } 9088 if (normalize) { 9089 /* 9090 * Skip any leading spaces 9091 */ 9092 while ((in < end) && (*in != limit) && 9093 ((*in == 0x20) || (*in == 0x9) || 9094 (*in == 0xA) || (*in == 0xD))) { 9095 if (*in == 0xA) { 9096 line++; col = 1; 9097 } else { 9098 col++; 9099 } 9100 in++; 9101 start = in; 9102 if (in >= end) { 9103 const xmlChar *oldbase = ctxt->input->base; 9104 GROW; 9105 if (ctxt->instate == XML_PARSER_EOF) 9106 return(NULL); 9107 if (oldbase != ctxt->input->base) { 9108 long delta = ctxt->input->base - oldbase; 9109 start = start + delta; 9110 in = in + delta; 9111 } 9112 end = ctxt->input->end; 9113 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9114 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9116 "AttValue length too long\n"); 9117 return(NULL); 9118 } 9119 } 9120 } 9121 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9122 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9123 col++; 9124 if ((*in++ == 0x20) && (*in == 0x20)) break; 9125 if (in >= end) { 9126 const xmlChar *oldbase = ctxt->input->base; 9127 GROW; 9128 if (ctxt->instate == XML_PARSER_EOF) 9129 return(NULL); 9130 if (oldbase != ctxt->input->base) { 9131 long delta = ctxt->input->base - oldbase; 9132 start = start + delta; 9133 in = in + delta; 9134 } 9135 end = ctxt->input->end; 9136 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9137 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9139 "AttValue length too long\n"); 9140 return(NULL); 9141 } 9142 } 9143 } 9144 last = in; 9145 /* 9146 * skip the trailing blanks 9147 */ 9148 while ((last[-1] == 0x20) && (last > start)) last--; 9149 while ((in < end) && (*in != limit) && 9150 ((*in == 0x20) || (*in == 0x9) || 9151 (*in == 0xA) || (*in == 0xD))) { 9152 if (*in == 0xA) { 9153 line++, col = 1; 9154 } else { 9155 col++; 9156 } 9157 in++; 9158 if (in >= end) { 9159 const xmlChar *oldbase = ctxt->input->base; 9160 GROW; 9161 if (ctxt->instate == XML_PARSER_EOF) 9162 return(NULL); 9163 if (oldbase != ctxt->input->base) { 9164 long delta = ctxt->input->base - oldbase; 9165 start = start + delta; 9166 in = in + delta; 9167 last = last + delta; 9168 } 9169 end = ctxt->input->end; 9170 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9171 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9172 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9173 "AttValue length too long\n"); 9174 return(NULL); 9175 } 9176 } 9177 } 9178 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9179 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9180 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9181 "AttValue length too long\n"); 9182 return(NULL); 9183 } 9184 if (*in != limit) goto need_complex; 9185 } else { 9186 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9187 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9188 in++; 9189 col++; 9190 if (in >= end) { 9191 const xmlChar *oldbase = ctxt->input->base; 9192 GROW; 9193 if (ctxt->instate == XML_PARSER_EOF) 9194 return(NULL); 9195 if (oldbase != ctxt->input->base) { 9196 long delta = ctxt->input->base - oldbase; 9197 start = start + delta; 9198 in = in + delta; 9199 } 9200 end = ctxt->input->end; 9201 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9202 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9203 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9204 "AttValue length too long\n"); 9205 return(NULL); 9206 } 9207 } 9208 } 9209 last = in; 9210 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9211 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9212 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9213 "AttValue length too long\n"); 9214 return(NULL); 9215 } 9216 if (*in != limit) goto need_complex; 9217 } 9218 in++; 9219 col++; 9220 if (len != NULL) { 9221 *len = last - start; 9222 ret = (xmlChar *) start; 9223 } else { 9224 if (alloc) *alloc = 1; 9225 ret = xmlStrndup(start, last - start); 9226 } 9227 CUR_PTR = in; 9228 ctxt->input->line = line; 9229 ctxt->input->col = col; 9230 if (alloc) *alloc = 0; 9231 return ret; 9232 need_complex: 9233 if (alloc) *alloc = 1; 9234 return xmlParseAttValueComplex(ctxt, len, normalize); 9235 } 9236 9237 /** 9238 * xmlParseAttribute2: 9239 * @ctxt: an XML parser context 9240 * @pref: the element prefix 9241 * @elem: the element name 9242 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9243 * @value: a xmlChar ** used to store the value of the attribute 9244 * @len: an int * to save the length of the attribute 9245 * @alloc: an int * to indicate if the attribute was allocated 9246 * 9247 * parse an attribute in the new SAX2 framework. 9248 * 9249 * Returns the attribute name, and the value in *value, . 9250 */ 9251 9252 static const xmlChar * 9253 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9254 const xmlChar * pref, const xmlChar * elem, 9255 const xmlChar ** prefix, xmlChar ** value, 9256 int *len, int *alloc) 9257 { 9258 const xmlChar *name; 9259 xmlChar *val, *internal_val = NULL; 9260 int normalize = 0; 9261 9262 *value = NULL; 9263 GROW; 9264 name = xmlParseQName(ctxt, prefix); 9265 if (name == NULL) { 9266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9267 "error parsing attribute name\n"); 9268 return (NULL); 9269 } 9270 9271 /* 9272 * get the type if needed 9273 */ 9274 if (ctxt->attsSpecial != NULL) { 9275 int type; 9276 9277 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9278 pref, elem, *prefix, name); 9279 if (type != 0) 9280 normalize = 1; 9281 } 9282 9283 /* 9284 * read the value 9285 */ 9286 SKIP_BLANKS; 9287 if (RAW == '=') { 9288 NEXT; 9289 SKIP_BLANKS; 9290 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9291 if (normalize) { 9292 /* 9293 * Sometimes a second normalisation pass for spaces is needed 9294 * but that only happens if charrefs or entities refernces 9295 * have been used in the attribute value, i.e. the attribute 9296 * value have been extracted in an allocated string already. 9297 */ 9298 if (*alloc) { 9299 const xmlChar *val2; 9300 9301 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9302 if ((val2 != NULL) && (val2 != val)) { 9303 xmlFree(val); 9304 val = (xmlChar *) val2; 9305 } 9306 } 9307 } 9308 ctxt->instate = XML_PARSER_CONTENT; 9309 } else { 9310 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9311 "Specification mandate value for attribute %s\n", 9312 name); 9313 return (NULL); 9314 } 9315 9316 if (*prefix == ctxt->str_xml) { 9317 /* 9318 * Check that xml:lang conforms to the specification 9319 * No more registered as an error, just generate a warning now 9320 * since this was deprecated in XML second edition 9321 */ 9322 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9323 internal_val = xmlStrndup(val, *len); 9324 if (!xmlCheckLanguageID(internal_val)) { 9325 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9326 "Malformed value for xml:lang : %s\n", 9327 internal_val, NULL); 9328 } 9329 } 9330 9331 /* 9332 * Check that xml:space conforms to the specification 9333 */ 9334 if (xmlStrEqual(name, BAD_CAST "space")) { 9335 internal_val = xmlStrndup(val, *len); 9336 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9337 *(ctxt->space) = 0; 9338 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9339 *(ctxt->space) = 1; 9340 else { 9341 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9342 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9343 internal_val, NULL); 9344 } 9345 } 9346 if (internal_val) { 9347 xmlFree(internal_val); 9348 } 9349 } 9350 9351 *value = val; 9352 return (name); 9353 } 9354 /** 9355 * xmlParseStartTag2: 9356 * @ctxt: an XML parser context 9357 * 9358 * parse a start of tag either for rule element or 9359 * EmptyElement. In both case we don't parse the tag closing chars. 9360 * This routine is called when running SAX2 parsing 9361 * 9362 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9363 * 9364 * [ WFC: Unique Att Spec ] 9365 * No attribute name may appear more than once in the same start-tag or 9366 * empty-element tag. 9367 * 9368 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9369 * 9370 * [ WFC: Unique Att Spec ] 9371 * No attribute name may appear more than once in the same start-tag or 9372 * empty-element tag. 9373 * 9374 * With namespace: 9375 * 9376 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9377 * 9378 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9379 * 9380 * Returns the element name parsed 9381 */ 9382 9383 static const xmlChar * 9384 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9385 const xmlChar **URI, int *tlen) { 9386 const xmlChar *localname; 9387 const xmlChar *prefix; 9388 const xmlChar *attname; 9389 const xmlChar *aprefix; 9390 const xmlChar *nsname; 9391 xmlChar *attvalue; 9392 const xmlChar **atts = ctxt->atts; 9393 int maxatts = ctxt->maxatts; 9394 int nratts, nbatts, nbdef; 9395 int i, j, nbNs, attval, oldline, oldcol, inputNr; 9396 const xmlChar *base; 9397 unsigned long cur; 9398 int nsNr = ctxt->nsNr; 9399 9400 if (RAW != '<') return(NULL); 9401 NEXT1; 9402 9403 /* 9404 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9405 * point since the attribute values may be stored as pointers to 9406 * the buffer and calling SHRINK would destroy them ! 9407 * The Shrinking is only possible once the full set of attribute 9408 * callbacks have been done. 9409 */ 9410 reparse: 9411 SHRINK; 9412 base = ctxt->input->base; 9413 cur = ctxt->input->cur - ctxt->input->base; 9414 inputNr = ctxt->inputNr; 9415 oldline = ctxt->input->line; 9416 oldcol = ctxt->input->col; 9417 nbatts = 0; 9418 nratts = 0; 9419 nbdef = 0; 9420 nbNs = 0; 9421 attval = 0; 9422 /* Forget any namespaces added during an earlier parse of this element. */ 9423 ctxt->nsNr = nsNr; 9424 9425 localname = xmlParseQName(ctxt, &prefix); 9426 if (localname == NULL) { 9427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9428 "StartTag: invalid element name\n"); 9429 return(NULL); 9430 } 9431 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9432 9433 /* 9434 * Now parse the attributes, it ends up with the ending 9435 * 9436 * (S Attribute)* S? 9437 */ 9438 SKIP_BLANKS; 9439 GROW; 9440 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9441 goto base_changed; 9442 9443 while (((RAW != '>') && 9444 ((RAW != '/') || (NXT(1) != '>')) && 9445 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9446 const xmlChar *q = CUR_PTR; 9447 unsigned int cons = ctxt->input->consumed; 9448 int len = -1, alloc = 0; 9449 9450 attname = xmlParseAttribute2(ctxt, prefix, localname, 9451 &aprefix, &attvalue, &len, &alloc); 9452 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) { 9453 if ((attvalue != NULL) && (alloc != 0)) 9454 xmlFree(attvalue); 9455 attvalue = NULL; 9456 goto base_changed; 9457 } 9458 if ((attname != NULL) && (attvalue != NULL)) { 9459 if (len < 0) len = xmlStrlen(attvalue); 9460 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9461 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9462 xmlURIPtr uri; 9463 9464 if (URL == NULL) { 9465 xmlErrMemory(ctxt, "dictionary allocation failure"); 9466 if ((attvalue != NULL) && (alloc != 0)) 9467 xmlFree(attvalue); 9468 return(NULL); 9469 } 9470 if (*URL != 0) { 9471 uri = xmlParseURI((const char *) URL); 9472 if (uri == NULL) { 9473 xmlNsErr(ctxt, XML_WAR_NS_URI, 9474 "xmlns: '%s' is not a valid URI\n", 9475 URL, NULL, NULL); 9476 } else { 9477 if (uri->scheme == NULL) { 9478 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9479 "xmlns: URI %s is not absolute\n", 9480 URL, NULL, NULL); 9481 } 9482 xmlFreeURI(uri); 9483 } 9484 if (URL == ctxt->str_xml_ns) { 9485 if (attname != ctxt->str_xml) { 9486 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9487 "xml namespace URI cannot be the default namespace\n", 9488 NULL, NULL, NULL); 9489 } 9490 goto skip_default_ns; 9491 } 9492 if ((len == 29) && 9493 (xmlStrEqual(URL, 9494 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9495 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9496 "reuse of the xmlns namespace name is forbidden\n", 9497 NULL, NULL, NULL); 9498 goto skip_default_ns; 9499 } 9500 } 9501 /* 9502 * check that it's not a defined namespace 9503 */ 9504 for (j = 1;j <= nbNs;j++) 9505 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9506 break; 9507 if (j <= nbNs) 9508 xmlErrAttributeDup(ctxt, NULL, attname); 9509 else 9510 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9511 skip_default_ns: 9512 if ((attvalue != NULL) && (alloc != 0)) { 9513 xmlFree(attvalue); 9514 attvalue = NULL; 9515 } 9516 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9517 break; 9518 if (!IS_BLANK_CH(RAW)) { 9519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9520 "attributes construct error\n"); 9521 break; 9522 } 9523 SKIP_BLANKS; 9524 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9525 goto base_changed; 9526 continue; 9527 } 9528 if (aprefix == ctxt->str_xmlns) { 9529 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9530 xmlURIPtr uri; 9531 9532 if (attname == ctxt->str_xml) { 9533 if (URL != ctxt->str_xml_ns) { 9534 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9535 "xml namespace prefix mapped to wrong URI\n", 9536 NULL, NULL, NULL); 9537 } 9538 /* 9539 * Do not keep a namespace definition node 9540 */ 9541 goto skip_ns; 9542 } 9543 if (URL == ctxt->str_xml_ns) { 9544 if (attname != ctxt->str_xml) { 9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9546 "xml namespace URI mapped to wrong prefix\n", 9547 NULL, NULL, NULL); 9548 } 9549 goto skip_ns; 9550 } 9551 if (attname == ctxt->str_xmlns) { 9552 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9553 "redefinition of the xmlns prefix is forbidden\n", 9554 NULL, NULL, NULL); 9555 goto skip_ns; 9556 } 9557 if ((len == 29) && 9558 (xmlStrEqual(URL, 9559 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9560 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9561 "reuse of the xmlns namespace name is forbidden\n", 9562 NULL, NULL, NULL); 9563 goto skip_ns; 9564 } 9565 if ((URL == NULL) || (URL[0] == 0)) { 9566 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9567 "xmlns:%s: Empty XML namespace is not allowed\n", 9568 attname, NULL, NULL); 9569 goto skip_ns; 9570 } else { 9571 uri = xmlParseURI((const char *) URL); 9572 if (uri == NULL) { 9573 xmlNsErr(ctxt, XML_WAR_NS_URI, 9574 "xmlns:%s: '%s' is not a valid URI\n", 9575 attname, URL, NULL); 9576 } else { 9577 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9578 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9579 "xmlns:%s: URI %s is not absolute\n", 9580 attname, URL, NULL); 9581 } 9582 xmlFreeURI(uri); 9583 } 9584 } 9585 9586 /* 9587 * check that it's not a defined namespace 9588 */ 9589 for (j = 1;j <= nbNs;j++) 9590 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9591 break; 9592 if (j <= nbNs) 9593 xmlErrAttributeDup(ctxt, aprefix, attname); 9594 else 9595 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9596 skip_ns: 9597 if ((attvalue != NULL) && (alloc != 0)) { 9598 xmlFree(attvalue); 9599 attvalue = NULL; 9600 } 9601 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9602 break; 9603 if (!IS_BLANK_CH(RAW)) { 9604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9605 "attributes construct error\n"); 9606 break; 9607 } 9608 SKIP_BLANKS; 9609 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9610 goto base_changed; 9611 continue; 9612 } 9613 9614 /* 9615 * Add the pair to atts 9616 */ 9617 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9618 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9619 if (attvalue[len] == 0) 9620 xmlFree(attvalue); 9621 goto failed; 9622 } 9623 maxatts = ctxt->maxatts; 9624 atts = ctxt->atts; 9625 } 9626 ctxt->attallocs[nratts++] = alloc; 9627 atts[nbatts++] = attname; 9628 atts[nbatts++] = aprefix; 9629 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9630 atts[nbatts++] = attvalue; 9631 attvalue += len; 9632 atts[nbatts++] = attvalue; 9633 /* 9634 * tag if some deallocation is needed 9635 */ 9636 if (alloc != 0) attval = 1; 9637 } else { 9638 if ((attvalue != NULL) && (attvalue[len] == 0)) 9639 xmlFree(attvalue); 9640 } 9641 9642 failed: 9643 9644 GROW 9645 if (ctxt->instate == XML_PARSER_EOF) 9646 break; 9647 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9648 goto base_changed; 9649 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9650 break; 9651 if (!IS_BLANK_CH(RAW)) { 9652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9653 "attributes construct error\n"); 9654 break; 9655 } 9656 SKIP_BLANKS; 9657 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9658 (attname == NULL) && (attvalue == NULL)) { 9659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9660 "xmlParseStartTag: problem parsing attributes\n"); 9661 break; 9662 } 9663 GROW; 9664 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9665 goto base_changed; 9666 } 9667 9668 /* 9669 * The attributes defaulting 9670 */ 9671 if (ctxt->attsDefault != NULL) { 9672 xmlDefAttrsPtr defaults; 9673 9674 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9675 if (defaults != NULL) { 9676 for (i = 0;i < defaults->nbAttrs;i++) { 9677 attname = defaults->values[5 * i]; 9678 aprefix = defaults->values[5 * i + 1]; 9679 9680 /* 9681 * special work for namespaces defaulted defs 9682 */ 9683 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9684 /* 9685 * check that it's not a defined namespace 9686 */ 9687 for (j = 1;j <= nbNs;j++) 9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9689 break; 9690 if (j <= nbNs) continue; 9691 9692 nsname = xmlGetNamespace(ctxt, NULL); 9693 if (nsname != defaults->values[5 * i + 2]) { 9694 if (nsPush(ctxt, NULL, 9695 defaults->values[5 * i + 2]) > 0) 9696 nbNs++; 9697 } 9698 } else if (aprefix == ctxt->str_xmlns) { 9699 /* 9700 * check that it's not a defined namespace 9701 */ 9702 for (j = 1;j <= nbNs;j++) 9703 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9704 break; 9705 if (j <= nbNs) continue; 9706 9707 nsname = xmlGetNamespace(ctxt, attname); 9708 if (nsname != defaults->values[2]) { 9709 if (nsPush(ctxt, attname, 9710 defaults->values[5 * i + 2]) > 0) 9711 nbNs++; 9712 } 9713 } else { 9714 /* 9715 * check that it's not a defined attribute 9716 */ 9717 for (j = 0;j < nbatts;j+=5) { 9718 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9719 break; 9720 } 9721 if (j < nbatts) continue; 9722 9723 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9724 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9725 return(NULL); 9726 } 9727 maxatts = ctxt->maxatts; 9728 atts = ctxt->atts; 9729 } 9730 atts[nbatts++] = attname; 9731 atts[nbatts++] = aprefix; 9732 if (aprefix == NULL) 9733 atts[nbatts++] = NULL; 9734 else 9735 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9736 atts[nbatts++] = defaults->values[5 * i + 2]; 9737 atts[nbatts++] = defaults->values[5 * i + 3]; 9738 if ((ctxt->standalone == 1) && 9739 (defaults->values[5 * i + 4] != NULL)) { 9740 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9741 "standalone: attribute %s on %s defaulted from external subset\n", 9742 attname, localname); 9743 } 9744 nbdef++; 9745 } 9746 } 9747 } 9748 } 9749 9750 /* 9751 * The attributes checkings 9752 */ 9753 for (i = 0; i < nbatts;i += 5) { 9754 /* 9755 * The default namespace does not apply to attribute names. 9756 */ 9757 if (atts[i + 1] != NULL) { 9758 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9759 if (nsname == NULL) { 9760 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9761 "Namespace prefix %s for %s on %s is not defined\n", 9762 atts[i + 1], atts[i], localname); 9763 } 9764 atts[i + 2] = nsname; 9765 } else 9766 nsname = NULL; 9767 /* 9768 * [ WFC: Unique Att Spec ] 9769 * No attribute name may appear more than once in the same 9770 * start-tag or empty-element tag. 9771 * As extended by the Namespace in XML REC. 9772 */ 9773 for (j = 0; j < i;j += 5) { 9774 if (atts[i] == atts[j]) { 9775 if (atts[i+1] == atts[j+1]) { 9776 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9777 break; 9778 } 9779 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9780 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9781 "Namespaced Attribute %s in '%s' redefined\n", 9782 atts[i], nsname, NULL); 9783 break; 9784 } 9785 } 9786 } 9787 } 9788 9789 nsname = xmlGetNamespace(ctxt, prefix); 9790 if ((prefix != NULL) && (nsname == NULL)) { 9791 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9792 "Namespace prefix %s on %s is not defined\n", 9793 prefix, localname, NULL); 9794 } 9795 *pref = prefix; 9796 *URI = nsname; 9797 9798 /* 9799 * SAX: Start of Element ! 9800 */ 9801 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9802 (!ctxt->disableSAX)) { 9803 if (nbNs > 0) 9804 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9805 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9806 nbatts / 5, nbdef, atts); 9807 else 9808 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9809 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9810 } 9811 9812 /* 9813 * Free up attribute allocated strings if needed 9814 */ 9815 if (attval != 0) { 9816 for (i = 3,j = 0; j < nratts;i += 5,j++) 9817 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9818 xmlFree((xmlChar *) atts[i]); 9819 } 9820 9821 return(localname); 9822 9823 base_changed: 9824 /* 9825 * the attribute strings are valid iif the base didn't changed 9826 */ 9827 if (attval != 0) { 9828 for (i = 3,j = 0; j < nratts;i += 5,j++) 9829 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9830 xmlFree((xmlChar *) atts[i]); 9831 } 9832 9833 /* 9834 * We can't switch from one entity to another in the middle 9835 * of a start tag 9836 */ 9837 if (inputNr != ctxt->inputNr) { 9838 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 9839 "Start tag doesn't start and stop in the same entity\n"); 9840 return(NULL); 9841 } 9842 9843 ctxt->input->cur = ctxt->input->base + cur; 9844 ctxt->input->line = oldline; 9845 ctxt->input->col = oldcol; 9846 if (ctxt->wellFormed == 1) { 9847 goto reparse; 9848 } 9849 return(NULL); 9850 } 9851 9852 /** 9853 * xmlParseEndTag2: 9854 * @ctxt: an XML parser context 9855 * @line: line of the start tag 9856 * @nsNr: number of namespaces on the start tag 9857 * 9858 * parse an end of tag 9859 * 9860 * [42] ETag ::= '</' Name S? '>' 9861 * 9862 * With namespace 9863 * 9864 * [NS 9] ETag ::= '</' QName S? '>' 9865 */ 9866 9867 static void 9868 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9869 const xmlChar *URI, int line, int nsNr, int tlen) { 9870 const xmlChar *name; 9871 size_t curLength; 9872 9873 GROW; 9874 if ((RAW != '<') || (NXT(1) != '/')) { 9875 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9876 return; 9877 } 9878 SKIP(2); 9879 9880 curLength = ctxt->input->end - ctxt->input->cur; 9881 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9882 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9883 if ((curLength >= (size_t)(tlen + 1)) && 9884 (ctxt->input->cur[tlen] == '>')) { 9885 ctxt->input->cur += tlen + 1; 9886 ctxt->input->col += tlen + 1; 9887 goto done; 9888 } 9889 ctxt->input->cur += tlen; 9890 ctxt->input->col += tlen; 9891 name = (xmlChar*)1; 9892 } else { 9893 if (prefix == NULL) 9894 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9895 else 9896 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9897 } 9898 9899 /* 9900 * We should definitely be at the ending "S? '>'" part 9901 */ 9902 GROW; 9903 if (ctxt->instate == XML_PARSER_EOF) 9904 return; 9905 SKIP_BLANKS; 9906 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9907 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9908 } else 9909 NEXT1; 9910 9911 /* 9912 * [ WFC: Element Type Match ] 9913 * The Name in an element's end-tag must match the element type in the 9914 * start-tag. 9915 * 9916 */ 9917 if (name != (xmlChar*)1) { 9918 if (name == NULL) name = BAD_CAST "unparseable"; 9919 if ((line == 0) && (ctxt->node != NULL)) 9920 line = ctxt->node->line; 9921 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9922 "Opening and ending tag mismatch: %s line %d and %s\n", 9923 ctxt->name, line, name); 9924 } 9925 9926 /* 9927 * SAX: End of Tag 9928 */ 9929 done: 9930 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9931 (!ctxt->disableSAX)) 9932 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9933 9934 spacePop(ctxt); 9935 if (nsNr != 0) 9936 nsPop(ctxt, nsNr); 9937 return; 9938 } 9939 9940 /** 9941 * xmlParseCDSect: 9942 * @ctxt: an XML parser context 9943 * 9944 * Parse escaped pure raw content. 9945 * 9946 * [18] CDSect ::= CDStart CData CDEnd 9947 * 9948 * [19] CDStart ::= '<![CDATA[' 9949 * 9950 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9951 * 9952 * [21] CDEnd ::= ']]>' 9953 */ 9954 void 9955 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9956 xmlChar *buf = NULL; 9957 int len = 0; 9958 int size = XML_PARSER_BUFFER_SIZE; 9959 int r, rl; 9960 int s, sl; 9961 int cur, l; 9962 int count = 0; 9963 9964 /* Check 2.6.0 was NXT(0) not RAW */ 9965 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9966 SKIP(9); 9967 } else 9968 return; 9969 9970 ctxt->instate = XML_PARSER_CDATA_SECTION; 9971 r = CUR_CHAR(rl); 9972 if (!IS_CHAR(r)) { 9973 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9974 ctxt->instate = XML_PARSER_CONTENT; 9975 return; 9976 } 9977 NEXTL(rl); 9978 s = CUR_CHAR(sl); 9979 if (!IS_CHAR(s)) { 9980 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9981 ctxt->instate = XML_PARSER_CONTENT; 9982 return; 9983 } 9984 NEXTL(sl); 9985 cur = CUR_CHAR(l); 9986 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9987 if (buf == NULL) { 9988 xmlErrMemory(ctxt, NULL); 9989 return; 9990 } 9991 while (IS_CHAR(cur) && 9992 ((r != ']') || (s != ']') || (cur != '>'))) { 9993 if (len + 5 >= size) { 9994 xmlChar *tmp; 9995 9996 if ((size > XML_MAX_TEXT_LENGTH) && 9997 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9998 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9999 "CData section too big found", NULL); 10000 xmlFree (buf); 10001 return; 10002 } 10003 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 10004 if (tmp == NULL) { 10005 xmlFree(buf); 10006 xmlErrMemory(ctxt, NULL); 10007 return; 10008 } 10009 buf = tmp; 10010 size *= 2; 10011 } 10012 COPY_BUF(rl,buf,len,r); 10013 r = s; 10014 rl = sl; 10015 s = cur; 10016 sl = l; 10017 count++; 10018 if (count > 50) { 10019 GROW; 10020 if (ctxt->instate == XML_PARSER_EOF) { 10021 xmlFree(buf); 10022 return; 10023 } 10024 count = 0; 10025 } 10026 NEXTL(l); 10027 cur = CUR_CHAR(l); 10028 } 10029 buf[len] = 0; 10030 ctxt->instate = XML_PARSER_CONTENT; 10031 if (cur != '>') { 10032 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10033 "CData section not finished\n%.50s\n", buf); 10034 xmlFree(buf); 10035 return; 10036 } 10037 NEXTL(l); 10038 10039 /* 10040 * OK the buffer is to be consumed as cdata. 10041 */ 10042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10043 if (ctxt->sax->cdataBlock != NULL) 10044 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 10045 else if (ctxt->sax->characters != NULL) 10046 ctxt->sax->characters(ctxt->userData, buf, len); 10047 } 10048 xmlFree(buf); 10049 } 10050 10051 /** 10052 * xmlParseContent: 10053 * @ctxt: an XML parser context 10054 * 10055 * Parse a content: 10056 * 10057 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10058 */ 10059 10060 void 10061 xmlParseContent(xmlParserCtxtPtr ctxt) { 10062 GROW; 10063 while ((RAW != 0) && 10064 ((RAW != '<') || (NXT(1) != '/')) && 10065 (ctxt->instate != XML_PARSER_EOF)) { 10066 const xmlChar *test = CUR_PTR; 10067 unsigned int cons = ctxt->input->consumed; 10068 const xmlChar *cur = ctxt->input->cur; 10069 10070 /* 10071 * First case : a Processing Instruction. 10072 */ 10073 if ((*cur == '<') && (cur[1] == '?')) { 10074 xmlParsePI(ctxt); 10075 } 10076 10077 /* 10078 * Second case : a CDSection 10079 */ 10080 /* 2.6.0 test was *cur not RAW */ 10081 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10082 xmlParseCDSect(ctxt); 10083 } 10084 10085 /* 10086 * Third case : a comment 10087 */ 10088 else if ((*cur == '<') && (NXT(1) == '!') && 10089 (NXT(2) == '-') && (NXT(3) == '-')) { 10090 xmlParseComment(ctxt); 10091 ctxt->instate = XML_PARSER_CONTENT; 10092 } 10093 10094 /* 10095 * Fourth case : a sub-element. 10096 */ 10097 else if (*cur == '<') { 10098 xmlParseElement(ctxt); 10099 } 10100 10101 /* 10102 * Fifth case : a reference. If if has not been resolved, 10103 * parsing returns it's Name, create the node 10104 */ 10105 10106 else if (*cur == '&') { 10107 xmlParseReference(ctxt); 10108 } 10109 10110 /* 10111 * Last case, text. Note that References are handled directly. 10112 */ 10113 else { 10114 xmlParseCharData(ctxt, 0); 10115 } 10116 10117 GROW; 10118 /* 10119 * Pop-up of finished entities. 10120 */ 10121 while ((RAW == 0) && (ctxt->inputNr > 1)) 10122 xmlPopInput(ctxt); 10123 SHRINK; 10124 10125 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10127 "detected an error in element content\n"); 10128 xmlHaltParser(ctxt); 10129 break; 10130 } 10131 } 10132 } 10133 10134 /** 10135 * xmlParseElement: 10136 * @ctxt: an XML parser context 10137 * 10138 * parse an XML element, this is highly recursive 10139 * 10140 * [39] element ::= EmptyElemTag | STag content ETag 10141 * 10142 * [ WFC: Element Type Match ] 10143 * The Name in an element's end-tag must match the element type in the 10144 * start-tag. 10145 * 10146 */ 10147 10148 void 10149 xmlParseElement(xmlParserCtxtPtr ctxt) { 10150 const xmlChar *name; 10151 const xmlChar *prefix = NULL; 10152 const xmlChar *URI = NULL; 10153 xmlParserNodeInfo node_info; 10154 int line, tlen = 0; 10155 xmlNodePtr ret; 10156 int nsNr = ctxt->nsNr; 10157 10158 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10159 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10160 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10161 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10162 xmlParserMaxDepth); 10163 xmlHaltParser(ctxt); 10164 return; 10165 } 10166 10167 /* Capture start position */ 10168 if (ctxt->record_info) { 10169 node_info.begin_pos = ctxt->input->consumed + 10170 (CUR_PTR - ctxt->input->base); 10171 node_info.begin_line = ctxt->input->line; 10172 } 10173 10174 if (ctxt->spaceNr == 0) 10175 spacePush(ctxt, -1); 10176 else if (*ctxt->space == -2) 10177 spacePush(ctxt, -1); 10178 else 10179 spacePush(ctxt, *ctxt->space); 10180 10181 line = ctxt->input->line; 10182 #ifdef LIBXML_SAX1_ENABLED 10183 if (ctxt->sax2) 10184 #endif /* LIBXML_SAX1_ENABLED */ 10185 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10186 #ifdef LIBXML_SAX1_ENABLED 10187 else 10188 name = xmlParseStartTag(ctxt); 10189 #endif /* LIBXML_SAX1_ENABLED */ 10190 if (ctxt->instate == XML_PARSER_EOF) 10191 return; 10192 if (name == NULL) { 10193 spacePop(ctxt); 10194 return; 10195 } 10196 namePush(ctxt, name); 10197 ret = ctxt->node; 10198 10199 #ifdef LIBXML_VALID_ENABLED 10200 /* 10201 * [ VC: Root Element Type ] 10202 * The Name in the document type declaration must match the element 10203 * type of the root element. 10204 */ 10205 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10206 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10207 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10208 #endif /* LIBXML_VALID_ENABLED */ 10209 10210 /* 10211 * Check for an Empty Element. 10212 */ 10213 if ((RAW == '/') && (NXT(1) == '>')) { 10214 SKIP(2); 10215 if (ctxt->sax2) { 10216 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10217 (!ctxt->disableSAX)) 10218 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10219 #ifdef LIBXML_SAX1_ENABLED 10220 } else { 10221 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10222 (!ctxt->disableSAX)) 10223 ctxt->sax->endElement(ctxt->userData, name); 10224 #endif /* LIBXML_SAX1_ENABLED */ 10225 } 10226 namePop(ctxt); 10227 spacePop(ctxt); 10228 if (nsNr != ctxt->nsNr) 10229 nsPop(ctxt, ctxt->nsNr - nsNr); 10230 if ( ret != NULL && ctxt->record_info ) { 10231 node_info.end_pos = ctxt->input->consumed + 10232 (CUR_PTR - ctxt->input->base); 10233 node_info.end_line = ctxt->input->line; 10234 node_info.node = ret; 10235 xmlParserAddNodeInfo(ctxt, &node_info); 10236 } 10237 return; 10238 } 10239 if (RAW == '>') { 10240 NEXT1; 10241 } else { 10242 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10243 "Couldn't find end of Start Tag %s line %d\n", 10244 name, line, NULL); 10245 10246 /* 10247 * end of parsing of this node. 10248 */ 10249 nodePop(ctxt); 10250 namePop(ctxt); 10251 spacePop(ctxt); 10252 if (nsNr != ctxt->nsNr) 10253 nsPop(ctxt, ctxt->nsNr - nsNr); 10254 10255 /* 10256 * Capture end position and add node 10257 */ 10258 if ( ret != NULL && ctxt->record_info ) { 10259 node_info.end_pos = ctxt->input->consumed + 10260 (CUR_PTR - ctxt->input->base); 10261 node_info.end_line = ctxt->input->line; 10262 node_info.node = ret; 10263 xmlParserAddNodeInfo(ctxt, &node_info); 10264 } 10265 return; 10266 } 10267 10268 /* 10269 * Parse the content of the element: 10270 */ 10271 xmlParseContent(ctxt); 10272 if (ctxt->instate == XML_PARSER_EOF) 10273 return; 10274 if (!IS_BYTE_CHAR(RAW)) { 10275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10276 "Premature end of data in tag %s line %d\n", 10277 name, line, NULL); 10278 10279 /* 10280 * end of parsing of this node. 10281 */ 10282 nodePop(ctxt); 10283 namePop(ctxt); 10284 spacePop(ctxt); 10285 if (nsNr != ctxt->nsNr) 10286 nsPop(ctxt, ctxt->nsNr - nsNr); 10287 return; 10288 } 10289 10290 /* 10291 * parse the end of tag: '</' should be here. 10292 */ 10293 if (ctxt->sax2) { 10294 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10295 namePop(ctxt); 10296 } 10297 #ifdef LIBXML_SAX1_ENABLED 10298 else 10299 xmlParseEndTag1(ctxt, line); 10300 #endif /* LIBXML_SAX1_ENABLED */ 10301 10302 /* 10303 * Capture end position and add node 10304 */ 10305 if ( ret != NULL && ctxt->record_info ) { 10306 node_info.end_pos = ctxt->input->consumed + 10307 (CUR_PTR - ctxt->input->base); 10308 node_info.end_line = ctxt->input->line; 10309 node_info.node = ret; 10310 xmlParserAddNodeInfo(ctxt, &node_info); 10311 } 10312 } 10313 10314 /** 10315 * xmlParseVersionNum: 10316 * @ctxt: an XML parser context 10317 * 10318 * parse the XML version value. 10319 * 10320 * [26] VersionNum ::= '1.' [0-9]+ 10321 * 10322 * In practice allow [0-9].[0-9]+ at that level 10323 * 10324 * Returns the string giving the XML version number, or NULL 10325 */ 10326 xmlChar * 10327 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10328 xmlChar *buf = NULL; 10329 int len = 0; 10330 int size = 10; 10331 xmlChar cur; 10332 10333 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10334 if (buf == NULL) { 10335 xmlErrMemory(ctxt, NULL); 10336 return(NULL); 10337 } 10338 cur = CUR; 10339 if (!((cur >= '0') && (cur <= '9'))) { 10340 xmlFree(buf); 10341 return(NULL); 10342 } 10343 buf[len++] = cur; 10344 NEXT; 10345 cur=CUR; 10346 if (cur != '.') { 10347 xmlFree(buf); 10348 return(NULL); 10349 } 10350 buf[len++] = cur; 10351 NEXT; 10352 cur=CUR; 10353 while ((cur >= '0') && (cur <= '9')) { 10354 if (len + 1 >= size) { 10355 xmlChar *tmp; 10356 10357 size *= 2; 10358 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10359 if (tmp == NULL) { 10360 xmlFree(buf); 10361 xmlErrMemory(ctxt, NULL); 10362 return(NULL); 10363 } 10364 buf = tmp; 10365 } 10366 buf[len++] = cur; 10367 NEXT; 10368 cur=CUR; 10369 } 10370 buf[len] = 0; 10371 return(buf); 10372 } 10373 10374 /** 10375 * xmlParseVersionInfo: 10376 * @ctxt: an XML parser context 10377 * 10378 * parse the XML version. 10379 * 10380 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10381 * 10382 * [25] Eq ::= S? '=' S? 10383 * 10384 * Returns the version string, e.g. "1.0" 10385 */ 10386 10387 xmlChar * 10388 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10389 xmlChar *version = NULL; 10390 10391 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10392 SKIP(7); 10393 SKIP_BLANKS; 10394 if (RAW != '=') { 10395 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10396 return(NULL); 10397 } 10398 NEXT; 10399 SKIP_BLANKS; 10400 if (RAW == '"') { 10401 NEXT; 10402 version = xmlParseVersionNum(ctxt); 10403 if (RAW != '"') { 10404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10405 } else 10406 NEXT; 10407 } else if (RAW == '\''){ 10408 NEXT; 10409 version = xmlParseVersionNum(ctxt); 10410 if (RAW != '\'') { 10411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10412 } else 10413 NEXT; 10414 } else { 10415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10416 } 10417 } 10418 return(version); 10419 } 10420 10421 /** 10422 * xmlParseEncName: 10423 * @ctxt: an XML parser context 10424 * 10425 * parse the XML encoding name 10426 * 10427 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10428 * 10429 * Returns the encoding name value or NULL 10430 */ 10431 xmlChar * 10432 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10433 xmlChar *buf = NULL; 10434 int len = 0; 10435 int size = 10; 10436 xmlChar cur; 10437 10438 cur = CUR; 10439 if (((cur >= 'a') && (cur <= 'z')) || 10440 ((cur >= 'A') && (cur <= 'Z'))) { 10441 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10442 if (buf == NULL) { 10443 xmlErrMemory(ctxt, NULL); 10444 return(NULL); 10445 } 10446 10447 buf[len++] = cur; 10448 NEXT; 10449 cur = CUR; 10450 while (((cur >= 'a') && (cur <= 'z')) || 10451 ((cur >= 'A') && (cur <= 'Z')) || 10452 ((cur >= '0') && (cur <= '9')) || 10453 (cur == '.') || (cur == '_') || 10454 (cur == '-')) { 10455 if (len + 1 >= size) { 10456 xmlChar *tmp; 10457 10458 size *= 2; 10459 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10460 if (tmp == NULL) { 10461 xmlErrMemory(ctxt, NULL); 10462 xmlFree(buf); 10463 return(NULL); 10464 } 10465 buf = tmp; 10466 } 10467 buf[len++] = cur; 10468 NEXT; 10469 cur = CUR; 10470 if (cur == 0) { 10471 SHRINK; 10472 GROW; 10473 cur = CUR; 10474 } 10475 } 10476 buf[len] = 0; 10477 } else { 10478 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10479 } 10480 return(buf); 10481 } 10482 10483 /** 10484 * xmlParseEncodingDecl: 10485 * @ctxt: an XML parser context 10486 * 10487 * parse the XML encoding declaration 10488 * 10489 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10490 * 10491 * this setups the conversion filters. 10492 * 10493 * Returns the encoding value or NULL 10494 */ 10495 10496 const xmlChar * 10497 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10498 xmlChar *encoding = NULL; 10499 10500 SKIP_BLANKS; 10501 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10502 SKIP(8); 10503 SKIP_BLANKS; 10504 if (RAW != '=') { 10505 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10506 return(NULL); 10507 } 10508 NEXT; 10509 SKIP_BLANKS; 10510 if (RAW == '"') { 10511 NEXT; 10512 encoding = xmlParseEncName(ctxt); 10513 if (RAW != '"') { 10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10515 xmlFree((xmlChar *) encoding); 10516 return(NULL); 10517 } else 10518 NEXT; 10519 } else if (RAW == '\''){ 10520 NEXT; 10521 encoding = xmlParseEncName(ctxt); 10522 if (RAW != '\'') { 10523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10524 xmlFree((xmlChar *) encoding); 10525 return(NULL); 10526 } else 10527 NEXT; 10528 } else { 10529 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10530 } 10531 10532 /* 10533 * Non standard parsing, allowing the user to ignore encoding 10534 */ 10535 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10536 xmlFree((xmlChar *) encoding); 10537 return(NULL); 10538 } 10539 10540 /* 10541 * UTF-16 encoding stwich has already taken place at this stage, 10542 * more over the little-endian/big-endian selection is already done 10543 */ 10544 if ((encoding != NULL) && 10545 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10546 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10547 /* 10548 * If no encoding was passed to the parser, that we are 10549 * using UTF-16 and no decoder is present i.e. the 10550 * document is apparently UTF-8 compatible, then raise an 10551 * encoding mismatch fatal error 10552 */ 10553 if ((ctxt->encoding == NULL) && 10554 (ctxt->input->buf != NULL) && 10555 (ctxt->input->buf->encoder == NULL)) { 10556 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10557 "Document labelled UTF-16 but has UTF-8 content\n"); 10558 } 10559 if (ctxt->encoding != NULL) 10560 xmlFree((xmlChar *) ctxt->encoding); 10561 ctxt->encoding = encoding; 10562 } 10563 /* 10564 * UTF-8 encoding is handled natively 10565 */ 10566 else if ((encoding != NULL) && 10567 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10568 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10569 if (ctxt->encoding != NULL) 10570 xmlFree((xmlChar *) ctxt->encoding); 10571 ctxt->encoding = encoding; 10572 } 10573 else if (encoding != NULL) { 10574 xmlCharEncodingHandlerPtr handler; 10575 10576 if (ctxt->input->encoding != NULL) 10577 xmlFree((xmlChar *) ctxt->input->encoding); 10578 ctxt->input->encoding = encoding; 10579 10580 handler = xmlFindCharEncodingHandler((const char *) encoding); 10581 if (handler != NULL) { 10582 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10583 /* failed to convert */ 10584 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10585 return(NULL); 10586 } 10587 } else { 10588 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10589 "Unsupported encoding %s\n", encoding); 10590 return(NULL); 10591 } 10592 } 10593 } 10594 return(encoding); 10595 } 10596 10597 /** 10598 * xmlParseSDDecl: 10599 * @ctxt: an XML parser context 10600 * 10601 * parse the XML standalone declaration 10602 * 10603 * [32] SDDecl ::= S 'standalone' Eq 10604 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10605 * 10606 * [ VC: Standalone Document Declaration ] 10607 * TODO The standalone document declaration must have the value "no" 10608 * if any external markup declarations contain declarations of: 10609 * - attributes with default values, if elements to which these 10610 * attributes apply appear in the document without specifications 10611 * of values for these attributes, or 10612 * - entities (other than amp, lt, gt, apos, quot), if references 10613 * to those entities appear in the document, or 10614 * - attributes with values subject to normalization, where the 10615 * attribute appears in the document with a value which will change 10616 * as a result of normalization, or 10617 * - element types with element content, if white space occurs directly 10618 * within any instance of those types. 10619 * 10620 * Returns: 10621 * 1 if standalone="yes" 10622 * 0 if standalone="no" 10623 * -2 if standalone attribute is missing or invalid 10624 * (A standalone value of -2 means that the XML declaration was found, 10625 * but no value was specified for the standalone attribute). 10626 */ 10627 10628 int 10629 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10630 int standalone = -2; 10631 10632 SKIP_BLANKS; 10633 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10634 SKIP(10); 10635 SKIP_BLANKS; 10636 if (RAW != '=') { 10637 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10638 return(standalone); 10639 } 10640 NEXT; 10641 SKIP_BLANKS; 10642 if (RAW == '\''){ 10643 NEXT; 10644 if ((RAW == 'n') && (NXT(1) == 'o')) { 10645 standalone = 0; 10646 SKIP(2); 10647 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10648 (NXT(2) == 's')) { 10649 standalone = 1; 10650 SKIP(3); 10651 } else { 10652 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10653 } 10654 if (RAW != '\'') { 10655 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10656 } else 10657 NEXT; 10658 } else if (RAW == '"'){ 10659 NEXT; 10660 if ((RAW == 'n') && (NXT(1) == 'o')) { 10661 standalone = 0; 10662 SKIP(2); 10663 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10664 (NXT(2) == 's')) { 10665 standalone = 1; 10666 SKIP(3); 10667 } else { 10668 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10669 } 10670 if (RAW != '"') { 10671 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10672 } else 10673 NEXT; 10674 } else { 10675 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10676 } 10677 } 10678 return(standalone); 10679 } 10680 10681 /** 10682 * xmlParseXMLDecl: 10683 * @ctxt: an XML parser context 10684 * 10685 * parse an XML declaration header 10686 * 10687 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10688 */ 10689 10690 void 10691 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10692 xmlChar *version; 10693 10694 /* 10695 * This value for standalone indicates that the document has an 10696 * XML declaration but it does not have a standalone attribute. 10697 * It will be overwritten later if a standalone attribute is found. 10698 */ 10699 ctxt->input->standalone = -2; 10700 10701 /* 10702 * We know that '<?xml' is here. 10703 */ 10704 SKIP(5); 10705 10706 if (!IS_BLANK_CH(RAW)) { 10707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10708 "Blank needed after '<?xml'\n"); 10709 } 10710 SKIP_BLANKS; 10711 10712 /* 10713 * We must have the VersionInfo here. 10714 */ 10715 version = xmlParseVersionInfo(ctxt); 10716 if (version == NULL) { 10717 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10718 } else { 10719 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10720 /* 10721 * Changed here for XML-1.0 5th edition 10722 */ 10723 if (ctxt->options & XML_PARSE_OLD10) { 10724 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10725 "Unsupported version '%s'\n", 10726 version); 10727 } else { 10728 if ((version[0] == '1') && ((version[1] == '.'))) { 10729 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10730 "Unsupported version '%s'\n", 10731 version, NULL); 10732 } else { 10733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10734 "Unsupported version '%s'\n", 10735 version); 10736 } 10737 } 10738 } 10739 if (ctxt->version != NULL) 10740 xmlFree((void *) ctxt->version); 10741 ctxt->version = version; 10742 } 10743 10744 /* 10745 * We may have the encoding declaration 10746 */ 10747 if (!IS_BLANK_CH(RAW)) { 10748 if ((RAW == '?') && (NXT(1) == '>')) { 10749 SKIP(2); 10750 return; 10751 } 10752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10753 } 10754 xmlParseEncodingDecl(ctxt); 10755 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10756 (ctxt->instate == XML_PARSER_EOF)) { 10757 /* 10758 * The XML REC instructs us to stop parsing right here 10759 */ 10760 return; 10761 } 10762 10763 /* 10764 * We may have the standalone status. 10765 */ 10766 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10767 if ((RAW == '?') && (NXT(1) == '>')) { 10768 SKIP(2); 10769 return; 10770 } 10771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10772 } 10773 10774 /* 10775 * We can grow the input buffer freely at that point 10776 */ 10777 GROW; 10778 10779 SKIP_BLANKS; 10780 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10781 10782 SKIP_BLANKS; 10783 if ((RAW == '?') && (NXT(1) == '>')) { 10784 SKIP(2); 10785 } else if (RAW == '>') { 10786 /* Deprecated old WD ... */ 10787 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10788 NEXT; 10789 } else { 10790 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10791 MOVETO_ENDTAG(CUR_PTR); 10792 NEXT; 10793 } 10794 } 10795 10796 /** 10797 * xmlParseMisc: 10798 * @ctxt: an XML parser context 10799 * 10800 * parse an XML Misc* optional field. 10801 * 10802 * [27] Misc ::= Comment | PI | S 10803 */ 10804 10805 void 10806 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10807 while ((ctxt->instate != XML_PARSER_EOF) && 10808 (((RAW == '<') && (NXT(1) == '?')) || 10809 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10810 IS_BLANK_CH(CUR))) { 10811 if ((RAW == '<') && (NXT(1) == '?')) { 10812 xmlParsePI(ctxt); 10813 } else if (IS_BLANK_CH(CUR)) { 10814 NEXT; 10815 } else 10816 xmlParseComment(ctxt); 10817 } 10818 } 10819 10820 /** 10821 * xmlParseDocument: 10822 * @ctxt: an XML parser context 10823 * 10824 * parse an XML document (and build a tree if using the standard SAX 10825 * interface). 10826 * 10827 * [1] document ::= prolog element Misc* 10828 * 10829 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10830 * 10831 * Returns 0, -1 in case of error. the parser context is augmented 10832 * as a result of the parsing. 10833 */ 10834 10835 int 10836 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10837 xmlChar start[4]; 10838 xmlCharEncoding enc; 10839 10840 xmlInitParser(); 10841 10842 if ((ctxt == NULL) || (ctxt->input == NULL)) 10843 return(-1); 10844 10845 GROW; 10846 10847 /* 10848 * SAX: detecting the level. 10849 */ 10850 xmlDetectSAX2(ctxt); 10851 10852 /* 10853 * SAX: beginning of the document processing. 10854 */ 10855 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10856 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10857 if (ctxt->instate == XML_PARSER_EOF) 10858 return(-1); 10859 10860 if ((ctxt->encoding == NULL) && 10861 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10862 /* 10863 * Get the 4 first bytes and decode the charset 10864 * if enc != XML_CHAR_ENCODING_NONE 10865 * plug some encoding conversion routines. 10866 */ 10867 start[0] = RAW; 10868 start[1] = NXT(1); 10869 start[2] = NXT(2); 10870 start[3] = NXT(3); 10871 enc = xmlDetectCharEncoding(&start[0], 4); 10872 if (enc != XML_CHAR_ENCODING_NONE) { 10873 xmlSwitchEncoding(ctxt, enc); 10874 } 10875 } 10876 10877 10878 if (CUR == 0) { 10879 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10880 return(-1); 10881 } 10882 10883 /* 10884 * Check for the XMLDecl in the Prolog. 10885 * do not GROW here to avoid the detected encoder to decode more 10886 * than just the first line, unless the amount of data is really 10887 * too small to hold "<?xml version="1.0" encoding="foo" 10888 */ 10889 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10890 GROW; 10891 } 10892 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10893 10894 /* 10895 * Note that we will switch encoding on the fly. 10896 */ 10897 xmlParseXMLDecl(ctxt); 10898 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10899 (ctxt->instate == XML_PARSER_EOF)) { 10900 /* 10901 * The XML REC instructs us to stop parsing right here 10902 */ 10903 return(-1); 10904 } 10905 ctxt->standalone = ctxt->input->standalone; 10906 SKIP_BLANKS; 10907 } else { 10908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10909 } 10910 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10911 ctxt->sax->startDocument(ctxt->userData); 10912 if (ctxt->instate == XML_PARSER_EOF) 10913 return(-1); 10914 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10915 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10916 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10917 } 10918 10919 /* 10920 * The Misc part of the Prolog 10921 */ 10922 GROW; 10923 xmlParseMisc(ctxt); 10924 10925 /* 10926 * Then possibly doc type declaration(s) and more Misc 10927 * (doctypedecl Misc*)? 10928 */ 10929 GROW; 10930 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10931 10932 ctxt->inSubset = 1; 10933 xmlParseDocTypeDecl(ctxt); 10934 if (RAW == '[') { 10935 ctxt->instate = XML_PARSER_DTD; 10936 xmlParseInternalSubset(ctxt); 10937 if (ctxt->instate == XML_PARSER_EOF) 10938 return(-1); 10939 } 10940 10941 /* 10942 * Create and update the external subset. 10943 */ 10944 ctxt->inSubset = 2; 10945 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10946 (!ctxt->disableSAX)) 10947 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10948 ctxt->extSubSystem, ctxt->extSubURI); 10949 if (ctxt->instate == XML_PARSER_EOF) 10950 return(-1); 10951 ctxt->inSubset = 0; 10952 10953 xmlCleanSpecialAttr(ctxt); 10954 10955 ctxt->instate = XML_PARSER_PROLOG; 10956 xmlParseMisc(ctxt); 10957 } 10958 10959 /* 10960 * Time to start parsing the tree itself 10961 */ 10962 GROW; 10963 if (RAW != '<') { 10964 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10965 "Start tag expected, '<' not found\n"); 10966 } else { 10967 ctxt->instate = XML_PARSER_CONTENT; 10968 xmlParseElement(ctxt); 10969 ctxt->instate = XML_PARSER_EPILOG; 10970 10971 10972 /* 10973 * The Misc part at the end 10974 */ 10975 xmlParseMisc(ctxt); 10976 10977 if (RAW != 0) { 10978 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10979 } 10980 ctxt->instate = XML_PARSER_EOF; 10981 } 10982 10983 /* 10984 * SAX: end of the document processing. 10985 */ 10986 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10987 ctxt->sax->endDocument(ctxt->userData); 10988 10989 /* 10990 * Remove locally kept entity definitions if the tree was not built 10991 */ 10992 if ((ctxt->myDoc != NULL) && 10993 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10994 xmlFreeDoc(ctxt->myDoc); 10995 ctxt->myDoc = NULL; 10996 } 10997 10998 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10999 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 11000 if (ctxt->valid) 11001 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 11002 if (ctxt->nsWellFormed) 11003 ctxt->myDoc->properties |= XML_DOC_NSVALID; 11004 if (ctxt->options & XML_PARSE_OLD10) 11005 ctxt->myDoc->properties |= XML_DOC_OLD10; 11006 } 11007 if (! ctxt->wellFormed) { 11008 ctxt->valid = 0; 11009 return(-1); 11010 } 11011 return(0); 11012 } 11013 11014 /** 11015 * xmlParseExtParsedEnt: 11016 * @ctxt: an XML parser context 11017 * 11018 * parse a general parsed entity 11019 * An external general parsed entity is well-formed if it matches the 11020 * production labeled extParsedEnt. 11021 * 11022 * [78] extParsedEnt ::= TextDecl? content 11023 * 11024 * Returns 0, -1 in case of error. the parser context is augmented 11025 * as a result of the parsing. 11026 */ 11027 11028 int 11029 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 11030 xmlChar start[4]; 11031 xmlCharEncoding enc; 11032 11033 if ((ctxt == NULL) || (ctxt->input == NULL)) 11034 return(-1); 11035 11036 xmlDefaultSAXHandlerInit(); 11037 11038 xmlDetectSAX2(ctxt); 11039 11040 GROW; 11041 11042 /* 11043 * SAX: beginning of the document processing. 11044 */ 11045 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11046 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11047 11048 /* 11049 * Get the 4 first bytes and decode the charset 11050 * if enc != XML_CHAR_ENCODING_NONE 11051 * plug some encoding conversion routines. 11052 */ 11053 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11054 start[0] = RAW; 11055 start[1] = NXT(1); 11056 start[2] = NXT(2); 11057 start[3] = NXT(3); 11058 enc = xmlDetectCharEncoding(start, 4); 11059 if (enc != XML_CHAR_ENCODING_NONE) { 11060 xmlSwitchEncoding(ctxt, enc); 11061 } 11062 } 11063 11064 11065 if (CUR == 0) { 11066 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11067 } 11068 11069 /* 11070 * Check for the XMLDecl in the Prolog. 11071 */ 11072 GROW; 11073 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11074 11075 /* 11076 * Note that we will switch encoding on the fly. 11077 */ 11078 xmlParseXMLDecl(ctxt); 11079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11080 /* 11081 * The XML REC instructs us to stop parsing right here 11082 */ 11083 return(-1); 11084 } 11085 SKIP_BLANKS; 11086 } else { 11087 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11088 } 11089 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11090 ctxt->sax->startDocument(ctxt->userData); 11091 if (ctxt->instate == XML_PARSER_EOF) 11092 return(-1); 11093 11094 /* 11095 * Doing validity checking on chunk doesn't make sense 11096 */ 11097 ctxt->instate = XML_PARSER_CONTENT; 11098 ctxt->validate = 0; 11099 ctxt->loadsubset = 0; 11100 ctxt->depth = 0; 11101 11102 xmlParseContent(ctxt); 11103 if (ctxt->instate == XML_PARSER_EOF) 11104 return(-1); 11105 11106 if ((RAW == '<') && (NXT(1) == '/')) { 11107 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11108 } else if (RAW != 0) { 11109 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11110 } 11111 11112 /* 11113 * SAX: end of the document processing. 11114 */ 11115 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11116 ctxt->sax->endDocument(ctxt->userData); 11117 11118 if (! ctxt->wellFormed) return(-1); 11119 return(0); 11120 } 11121 11122 #ifdef LIBXML_PUSH_ENABLED 11123 /************************************************************************ 11124 * * 11125 * Progressive parsing interfaces * 11126 * * 11127 ************************************************************************/ 11128 11129 /** 11130 * xmlParseLookupSequence: 11131 * @ctxt: an XML parser context 11132 * @first: the first char to lookup 11133 * @next: the next char to lookup or zero 11134 * @third: the next char to lookup or zero 11135 * 11136 * Try to find if a sequence (first, next, third) or just (first next) or 11137 * (first) is available in the input stream. 11138 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 11139 * to avoid rescanning sequences of bytes, it DOES change the state of the 11140 * parser, do not use liberally. 11141 * 11142 * Returns the index to the current parsing point if the full sequence 11143 * is available, -1 otherwise. 11144 */ 11145 static int 11146 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11147 xmlChar next, xmlChar third) { 11148 int base, len; 11149 xmlParserInputPtr in; 11150 const xmlChar *buf; 11151 11152 in = ctxt->input; 11153 if (in == NULL) return(-1); 11154 base = in->cur - in->base; 11155 if (base < 0) return(-1); 11156 if (ctxt->checkIndex > base) 11157 base = ctxt->checkIndex; 11158 if (in->buf == NULL) { 11159 buf = in->base; 11160 len = in->length; 11161 } else { 11162 buf = xmlBufContent(in->buf->buffer); 11163 len = xmlBufUse(in->buf->buffer); 11164 } 11165 /* take into account the sequence length */ 11166 if (third) len -= 2; 11167 else if (next) len --; 11168 for (;base < len;base++) { 11169 if (buf[base] == first) { 11170 if (third != 0) { 11171 if ((buf[base + 1] != next) || 11172 (buf[base + 2] != third)) continue; 11173 } else if (next != 0) { 11174 if (buf[base + 1] != next) continue; 11175 } 11176 ctxt->checkIndex = 0; 11177 #ifdef DEBUG_PUSH 11178 if (next == 0) 11179 xmlGenericError(xmlGenericErrorContext, 11180 "PP: lookup '%c' found at %d\n", 11181 first, base); 11182 else if (third == 0) 11183 xmlGenericError(xmlGenericErrorContext, 11184 "PP: lookup '%c%c' found at %d\n", 11185 first, next, base); 11186 else 11187 xmlGenericError(xmlGenericErrorContext, 11188 "PP: lookup '%c%c%c' found at %d\n", 11189 first, next, third, base); 11190 #endif 11191 return(base - (in->cur - in->base)); 11192 } 11193 } 11194 ctxt->checkIndex = base; 11195 #ifdef DEBUG_PUSH 11196 if (next == 0) 11197 xmlGenericError(xmlGenericErrorContext, 11198 "PP: lookup '%c' failed\n", first); 11199 else if (third == 0) 11200 xmlGenericError(xmlGenericErrorContext, 11201 "PP: lookup '%c%c' failed\n", first, next); 11202 else 11203 xmlGenericError(xmlGenericErrorContext, 11204 "PP: lookup '%c%c%c' failed\n", first, next, third); 11205 #endif 11206 return(-1); 11207 } 11208 11209 /** 11210 * xmlParseGetLasts: 11211 * @ctxt: an XML parser context 11212 * @lastlt: pointer to store the last '<' from the input 11213 * @lastgt: pointer to store the last '>' from the input 11214 * 11215 * Lookup the last < and > in the current chunk 11216 */ 11217 static void 11218 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11219 const xmlChar **lastgt) { 11220 const xmlChar *tmp; 11221 11222 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11223 xmlGenericError(xmlGenericErrorContext, 11224 "Internal error: xmlParseGetLasts\n"); 11225 return; 11226 } 11227 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11228 tmp = ctxt->input->end; 11229 tmp--; 11230 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11231 if (tmp < ctxt->input->base) { 11232 *lastlt = NULL; 11233 *lastgt = NULL; 11234 } else { 11235 *lastlt = tmp; 11236 tmp++; 11237 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11238 if (*tmp == '\'') { 11239 tmp++; 11240 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11241 if (tmp < ctxt->input->end) tmp++; 11242 } else if (*tmp == '"') { 11243 tmp++; 11244 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11245 if (tmp < ctxt->input->end) tmp++; 11246 } else 11247 tmp++; 11248 } 11249 if (tmp < ctxt->input->end) 11250 *lastgt = tmp; 11251 else { 11252 tmp = *lastlt; 11253 tmp--; 11254 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11255 if (tmp >= ctxt->input->base) 11256 *lastgt = tmp; 11257 else 11258 *lastgt = NULL; 11259 } 11260 } 11261 } else { 11262 *lastlt = NULL; 11263 *lastgt = NULL; 11264 } 11265 } 11266 /** 11267 * xmlCheckCdataPush: 11268 * @cur: pointer to the block of characters 11269 * @len: length of the block in bytes 11270 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11271 * 11272 * Check that the block of characters is okay as SCdata content [20] 11273 * 11274 * Returns the number of bytes to pass if okay, a negative index where an 11275 * UTF-8 error occured otherwise 11276 */ 11277 static int 11278 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11279 int ix; 11280 unsigned char c; 11281 int codepoint; 11282 11283 if ((utf == NULL) || (len <= 0)) 11284 return(0); 11285 11286 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11287 c = utf[ix]; 11288 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11289 if (c >= 0x20) 11290 ix++; 11291 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11292 ix++; 11293 else 11294 return(-ix); 11295 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11296 if (ix + 2 > len) return(complete ? -ix : ix); 11297 if ((utf[ix+1] & 0xc0 ) != 0x80) 11298 return(-ix); 11299 codepoint = (utf[ix] & 0x1f) << 6; 11300 codepoint |= utf[ix+1] & 0x3f; 11301 if (!xmlIsCharQ(codepoint)) 11302 return(-ix); 11303 ix += 2; 11304 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11305 if (ix + 3 > len) return(complete ? -ix : ix); 11306 if (((utf[ix+1] & 0xc0) != 0x80) || 11307 ((utf[ix+2] & 0xc0) != 0x80)) 11308 return(-ix); 11309 codepoint = (utf[ix] & 0xf) << 12; 11310 codepoint |= (utf[ix+1] & 0x3f) << 6; 11311 codepoint |= utf[ix+2] & 0x3f; 11312 if (!xmlIsCharQ(codepoint)) 11313 return(-ix); 11314 ix += 3; 11315 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11316 if (ix + 4 > len) return(complete ? -ix : ix); 11317 if (((utf[ix+1] & 0xc0) != 0x80) || 11318 ((utf[ix+2] & 0xc0) != 0x80) || 11319 ((utf[ix+3] & 0xc0) != 0x80)) 11320 return(-ix); 11321 codepoint = (utf[ix] & 0x7) << 18; 11322 codepoint |= (utf[ix+1] & 0x3f) << 12; 11323 codepoint |= (utf[ix+2] & 0x3f) << 6; 11324 codepoint |= utf[ix+3] & 0x3f; 11325 if (!xmlIsCharQ(codepoint)) 11326 return(-ix); 11327 ix += 4; 11328 } else /* unknown encoding */ 11329 return(-ix); 11330 } 11331 return(ix); 11332 } 11333 11334 /** 11335 * xmlParseTryOrFinish: 11336 * @ctxt: an XML parser context 11337 * @terminate: last chunk indicator 11338 * 11339 * Try to progress on parsing 11340 * 11341 * Returns zero if no parsing was possible 11342 */ 11343 static int 11344 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11345 int ret = 0; 11346 int avail, tlen; 11347 xmlChar cur, next; 11348 const xmlChar *lastlt, *lastgt; 11349 11350 if (ctxt->input == NULL) 11351 return(0); 11352 11353 #ifdef DEBUG_PUSH 11354 switch (ctxt->instate) { 11355 case XML_PARSER_EOF: 11356 xmlGenericError(xmlGenericErrorContext, 11357 "PP: try EOF\n"); break; 11358 case XML_PARSER_START: 11359 xmlGenericError(xmlGenericErrorContext, 11360 "PP: try START\n"); break; 11361 case XML_PARSER_MISC: 11362 xmlGenericError(xmlGenericErrorContext, 11363 "PP: try MISC\n");break; 11364 case XML_PARSER_COMMENT: 11365 xmlGenericError(xmlGenericErrorContext, 11366 "PP: try COMMENT\n");break; 11367 case XML_PARSER_PROLOG: 11368 xmlGenericError(xmlGenericErrorContext, 11369 "PP: try PROLOG\n");break; 11370 case XML_PARSER_START_TAG: 11371 xmlGenericError(xmlGenericErrorContext, 11372 "PP: try START_TAG\n");break; 11373 case XML_PARSER_CONTENT: 11374 xmlGenericError(xmlGenericErrorContext, 11375 "PP: try CONTENT\n");break; 11376 case XML_PARSER_CDATA_SECTION: 11377 xmlGenericError(xmlGenericErrorContext, 11378 "PP: try CDATA_SECTION\n");break; 11379 case XML_PARSER_END_TAG: 11380 xmlGenericError(xmlGenericErrorContext, 11381 "PP: try END_TAG\n");break; 11382 case XML_PARSER_ENTITY_DECL: 11383 xmlGenericError(xmlGenericErrorContext, 11384 "PP: try ENTITY_DECL\n");break; 11385 case XML_PARSER_ENTITY_VALUE: 11386 xmlGenericError(xmlGenericErrorContext, 11387 "PP: try ENTITY_VALUE\n");break; 11388 case XML_PARSER_ATTRIBUTE_VALUE: 11389 xmlGenericError(xmlGenericErrorContext, 11390 "PP: try ATTRIBUTE_VALUE\n");break; 11391 case XML_PARSER_DTD: 11392 xmlGenericError(xmlGenericErrorContext, 11393 "PP: try DTD\n");break; 11394 case XML_PARSER_EPILOG: 11395 xmlGenericError(xmlGenericErrorContext, 11396 "PP: try EPILOG\n");break; 11397 case XML_PARSER_PI: 11398 xmlGenericError(xmlGenericErrorContext, 11399 "PP: try PI\n");break; 11400 case XML_PARSER_IGNORE: 11401 xmlGenericError(xmlGenericErrorContext, 11402 "PP: try IGNORE\n");break; 11403 } 11404 #endif 11405 11406 if ((ctxt->input != NULL) && 11407 (ctxt->input->cur - ctxt->input->base > 4096)) { 11408 xmlSHRINK(ctxt); 11409 ctxt->checkIndex = 0; 11410 } 11411 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11412 11413 while (ctxt->instate != XML_PARSER_EOF) { 11414 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11415 return(0); 11416 11417 11418 /* 11419 * Pop-up of finished entities. 11420 */ 11421 while ((RAW == 0) && (ctxt->inputNr > 1)) 11422 xmlPopInput(ctxt); 11423 11424 if (ctxt->input == NULL) break; 11425 if (ctxt->input->buf == NULL) 11426 avail = ctxt->input->length - 11427 (ctxt->input->cur - ctxt->input->base); 11428 else { 11429 /* 11430 * If we are operating on converted input, try to flush 11431 * remainng chars to avoid them stalling in the non-converted 11432 * buffer. But do not do this in document start where 11433 * encoding="..." may not have been read and we work on a 11434 * guessed encoding. 11435 */ 11436 if ((ctxt->instate != XML_PARSER_START) && 11437 (ctxt->input->buf->raw != NULL) && 11438 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11439 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11440 ctxt->input); 11441 size_t current = ctxt->input->cur - ctxt->input->base; 11442 11443 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11444 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11445 base, current); 11446 } 11447 avail = xmlBufUse(ctxt->input->buf->buffer) - 11448 (ctxt->input->cur - ctxt->input->base); 11449 } 11450 if (avail < 1) 11451 goto done; 11452 switch (ctxt->instate) { 11453 case XML_PARSER_EOF: 11454 /* 11455 * Document parsing is done ! 11456 */ 11457 goto done; 11458 case XML_PARSER_START: 11459 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11460 xmlChar start[4]; 11461 xmlCharEncoding enc; 11462 11463 /* 11464 * Very first chars read from the document flow. 11465 */ 11466 if (avail < 4) 11467 goto done; 11468 11469 /* 11470 * Get the 4 first bytes and decode the charset 11471 * if enc != XML_CHAR_ENCODING_NONE 11472 * plug some encoding conversion routines, 11473 * else xmlSwitchEncoding will set to (default) 11474 * UTF8. 11475 */ 11476 start[0] = RAW; 11477 start[1] = NXT(1); 11478 start[2] = NXT(2); 11479 start[3] = NXT(3); 11480 enc = xmlDetectCharEncoding(start, 4); 11481 xmlSwitchEncoding(ctxt, enc); 11482 break; 11483 } 11484 11485 if (avail < 2) 11486 goto done; 11487 cur = ctxt->input->cur[0]; 11488 next = ctxt->input->cur[1]; 11489 if (cur == 0) { 11490 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11491 ctxt->sax->setDocumentLocator(ctxt->userData, 11492 &xmlDefaultSAXLocator); 11493 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11494 xmlHaltParser(ctxt); 11495 #ifdef DEBUG_PUSH 11496 xmlGenericError(xmlGenericErrorContext, 11497 "PP: entering EOF\n"); 11498 #endif 11499 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11500 ctxt->sax->endDocument(ctxt->userData); 11501 goto done; 11502 } 11503 if ((cur == '<') && (next == '?')) { 11504 /* PI or XML decl */ 11505 if (avail < 5) return(ret); 11506 if ((!terminate) && 11507 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11508 return(ret); 11509 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11510 ctxt->sax->setDocumentLocator(ctxt->userData, 11511 &xmlDefaultSAXLocator); 11512 if ((ctxt->input->cur[2] == 'x') && 11513 (ctxt->input->cur[3] == 'm') && 11514 (ctxt->input->cur[4] == 'l') && 11515 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11516 ret += 5; 11517 #ifdef DEBUG_PUSH 11518 xmlGenericError(xmlGenericErrorContext, 11519 "PP: Parsing XML Decl\n"); 11520 #endif 11521 xmlParseXMLDecl(ctxt); 11522 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11523 /* 11524 * The XML REC instructs us to stop parsing right 11525 * here 11526 */ 11527 xmlHaltParser(ctxt); 11528 return(0); 11529 } 11530 ctxt->standalone = ctxt->input->standalone; 11531 if ((ctxt->encoding == NULL) && 11532 (ctxt->input->encoding != NULL)) 11533 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11534 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11535 (!ctxt->disableSAX)) 11536 ctxt->sax->startDocument(ctxt->userData); 11537 ctxt->instate = XML_PARSER_MISC; 11538 #ifdef DEBUG_PUSH 11539 xmlGenericError(xmlGenericErrorContext, 11540 "PP: entering MISC\n"); 11541 #endif 11542 } else { 11543 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11544 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11545 (!ctxt->disableSAX)) 11546 ctxt->sax->startDocument(ctxt->userData); 11547 ctxt->instate = XML_PARSER_MISC; 11548 #ifdef DEBUG_PUSH 11549 xmlGenericError(xmlGenericErrorContext, 11550 "PP: entering MISC\n"); 11551 #endif 11552 } 11553 } else { 11554 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11555 ctxt->sax->setDocumentLocator(ctxt->userData, 11556 &xmlDefaultSAXLocator); 11557 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11558 if (ctxt->version == NULL) { 11559 xmlErrMemory(ctxt, NULL); 11560 break; 11561 } 11562 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11563 (!ctxt->disableSAX)) 11564 ctxt->sax->startDocument(ctxt->userData); 11565 ctxt->instate = XML_PARSER_MISC; 11566 #ifdef DEBUG_PUSH 11567 xmlGenericError(xmlGenericErrorContext, 11568 "PP: entering MISC\n"); 11569 #endif 11570 } 11571 break; 11572 case XML_PARSER_START_TAG: { 11573 const xmlChar *name; 11574 const xmlChar *prefix = NULL; 11575 const xmlChar *URI = NULL; 11576 int nsNr = ctxt->nsNr; 11577 11578 if ((avail < 2) && (ctxt->inputNr == 1)) 11579 goto done; 11580 cur = ctxt->input->cur[0]; 11581 if (cur != '<') { 11582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11583 xmlHaltParser(ctxt); 11584 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11585 ctxt->sax->endDocument(ctxt->userData); 11586 goto done; 11587 } 11588 if (!terminate) { 11589 if (ctxt->progressive) { 11590 /* > can be found unescaped in attribute values */ 11591 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11592 goto done; 11593 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11594 goto done; 11595 } 11596 } 11597 if (ctxt->spaceNr == 0) 11598 spacePush(ctxt, -1); 11599 else if (*ctxt->space == -2) 11600 spacePush(ctxt, -1); 11601 else 11602 spacePush(ctxt, *ctxt->space); 11603 #ifdef LIBXML_SAX1_ENABLED 11604 if (ctxt->sax2) 11605 #endif /* LIBXML_SAX1_ENABLED */ 11606 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11607 #ifdef LIBXML_SAX1_ENABLED 11608 else 11609 name = xmlParseStartTag(ctxt); 11610 #endif /* LIBXML_SAX1_ENABLED */ 11611 if (ctxt->instate == XML_PARSER_EOF) 11612 goto done; 11613 if (name == NULL) { 11614 spacePop(ctxt); 11615 xmlHaltParser(ctxt); 11616 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11617 ctxt->sax->endDocument(ctxt->userData); 11618 goto done; 11619 } 11620 #ifdef LIBXML_VALID_ENABLED 11621 /* 11622 * [ VC: Root Element Type ] 11623 * The Name in the document type declaration must match 11624 * the element type of the root element. 11625 */ 11626 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11627 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11628 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11629 #endif /* LIBXML_VALID_ENABLED */ 11630 11631 /* 11632 * Check for an Empty Element. 11633 */ 11634 if ((RAW == '/') && (NXT(1) == '>')) { 11635 SKIP(2); 11636 11637 if (ctxt->sax2) { 11638 if ((ctxt->sax != NULL) && 11639 (ctxt->sax->endElementNs != NULL) && 11640 (!ctxt->disableSAX)) 11641 ctxt->sax->endElementNs(ctxt->userData, name, 11642 prefix, URI); 11643 if (ctxt->nsNr - nsNr > 0) 11644 nsPop(ctxt, ctxt->nsNr - nsNr); 11645 #ifdef LIBXML_SAX1_ENABLED 11646 } else { 11647 if ((ctxt->sax != NULL) && 11648 (ctxt->sax->endElement != NULL) && 11649 (!ctxt->disableSAX)) 11650 ctxt->sax->endElement(ctxt->userData, name); 11651 #endif /* LIBXML_SAX1_ENABLED */ 11652 } 11653 if (ctxt->instate == XML_PARSER_EOF) 11654 goto done; 11655 spacePop(ctxt); 11656 if (ctxt->nameNr == 0) { 11657 ctxt->instate = XML_PARSER_EPILOG; 11658 } else { 11659 ctxt->instate = XML_PARSER_CONTENT; 11660 } 11661 ctxt->progressive = 1; 11662 break; 11663 } 11664 if (RAW == '>') { 11665 NEXT; 11666 } else { 11667 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11668 "Couldn't find end of Start Tag %s\n", 11669 name); 11670 nodePop(ctxt); 11671 spacePop(ctxt); 11672 } 11673 if (ctxt->sax2) 11674 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11675 #ifdef LIBXML_SAX1_ENABLED 11676 else 11677 namePush(ctxt, name); 11678 #endif /* LIBXML_SAX1_ENABLED */ 11679 11680 ctxt->instate = XML_PARSER_CONTENT; 11681 ctxt->progressive = 1; 11682 break; 11683 } 11684 case XML_PARSER_CONTENT: { 11685 const xmlChar *test; 11686 unsigned int cons; 11687 if ((avail < 2) && (ctxt->inputNr == 1)) 11688 goto done; 11689 cur = ctxt->input->cur[0]; 11690 next = ctxt->input->cur[1]; 11691 11692 test = CUR_PTR; 11693 cons = ctxt->input->consumed; 11694 if ((cur == '<') && (next == '/')) { 11695 ctxt->instate = XML_PARSER_END_TAG; 11696 break; 11697 } else if ((cur == '<') && (next == '?')) { 11698 if ((!terminate) && 11699 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11700 ctxt->progressive = XML_PARSER_PI; 11701 goto done; 11702 } 11703 xmlParsePI(ctxt); 11704 ctxt->instate = XML_PARSER_CONTENT; 11705 ctxt->progressive = 1; 11706 } else if ((cur == '<') && (next != '!')) { 11707 ctxt->instate = XML_PARSER_START_TAG; 11708 break; 11709 } else if ((cur == '<') && (next == '!') && 11710 (ctxt->input->cur[2] == '-') && 11711 (ctxt->input->cur[3] == '-')) { 11712 int term; 11713 11714 if (avail < 4) 11715 goto done; 11716 ctxt->input->cur += 4; 11717 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11718 ctxt->input->cur -= 4; 11719 if ((!terminate) && (term < 0)) { 11720 ctxt->progressive = XML_PARSER_COMMENT; 11721 goto done; 11722 } 11723 xmlParseComment(ctxt); 11724 ctxt->instate = XML_PARSER_CONTENT; 11725 ctxt->progressive = 1; 11726 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11727 (ctxt->input->cur[2] == '[') && 11728 (ctxt->input->cur[3] == 'C') && 11729 (ctxt->input->cur[4] == 'D') && 11730 (ctxt->input->cur[5] == 'A') && 11731 (ctxt->input->cur[6] == 'T') && 11732 (ctxt->input->cur[7] == 'A') && 11733 (ctxt->input->cur[8] == '[')) { 11734 SKIP(9); 11735 ctxt->instate = XML_PARSER_CDATA_SECTION; 11736 break; 11737 } else if ((cur == '<') && (next == '!') && 11738 (avail < 9)) { 11739 goto done; 11740 } else if (cur == '&') { 11741 if ((!terminate) && 11742 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11743 goto done; 11744 xmlParseReference(ctxt); 11745 } else { 11746 /* TODO Avoid the extra copy, handle directly !!! */ 11747 /* 11748 * Goal of the following test is: 11749 * - minimize calls to the SAX 'character' callback 11750 * when they are mergeable 11751 * - handle an problem for isBlank when we only parse 11752 * a sequence of blank chars and the next one is 11753 * not available to check against '<' presence. 11754 * - tries to homogenize the differences in SAX 11755 * callbacks between the push and pull versions 11756 * of the parser. 11757 */ 11758 if ((ctxt->inputNr == 1) && 11759 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11760 if (!terminate) { 11761 if (ctxt->progressive) { 11762 if ((lastlt == NULL) || 11763 (ctxt->input->cur > lastlt)) 11764 goto done; 11765 } else if (xmlParseLookupSequence(ctxt, 11766 '<', 0, 0) < 0) { 11767 goto done; 11768 } 11769 } 11770 } 11771 ctxt->checkIndex = 0; 11772 xmlParseCharData(ctxt, 0); 11773 } 11774 /* 11775 * Pop-up of finished entities. 11776 */ 11777 while ((RAW == 0) && (ctxt->inputNr > 1)) 11778 xmlPopInput(ctxt); 11779 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11780 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11781 "detected an error in element content\n"); 11782 xmlHaltParser(ctxt); 11783 break; 11784 } 11785 break; 11786 } 11787 case XML_PARSER_END_TAG: 11788 if (avail < 2) 11789 goto done; 11790 if (!terminate) { 11791 if (ctxt->progressive) { 11792 /* > can be found unescaped in attribute values */ 11793 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11794 goto done; 11795 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11796 goto done; 11797 } 11798 } 11799 if (ctxt->sax2) { 11800 xmlParseEndTag2(ctxt, 11801 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11802 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11803 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11804 nameNsPop(ctxt); 11805 } 11806 #ifdef LIBXML_SAX1_ENABLED 11807 else 11808 xmlParseEndTag1(ctxt, 0); 11809 #endif /* LIBXML_SAX1_ENABLED */ 11810 if (ctxt->instate == XML_PARSER_EOF) { 11811 /* Nothing */ 11812 } else if (ctxt->nameNr == 0) { 11813 ctxt->instate = XML_PARSER_EPILOG; 11814 } else { 11815 ctxt->instate = XML_PARSER_CONTENT; 11816 } 11817 break; 11818 case XML_PARSER_CDATA_SECTION: { 11819 /* 11820 * The Push mode need to have the SAX callback for 11821 * cdataBlock merge back contiguous callbacks. 11822 */ 11823 int base; 11824 11825 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11826 if (base < 0) { 11827 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11828 int tmp; 11829 11830 tmp = xmlCheckCdataPush(ctxt->input->cur, 11831 XML_PARSER_BIG_BUFFER_SIZE, 0); 11832 if (tmp < 0) { 11833 tmp = -tmp; 11834 ctxt->input->cur += tmp; 11835 goto encoding_error; 11836 } 11837 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11838 if (ctxt->sax->cdataBlock != NULL) 11839 ctxt->sax->cdataBlock(ctxt->userData, 11840 ctxt->input->cur, tmp); 11841 else if (ctxt->sax->characters != NULL) 11842 ctxt->sax->characters(ctxt->userData, 11843 ctxt->input->cur, tmp); 11844 } 11845 if (ctxt->instate == XML_PARSER_EOF) 11846 goto done; 11847 SKIPL(tmp); 11848 ctxt->checkIndex = 0; 11849 } 11850 goto done; 11851 } else { 11852 int tmp; 11853 11854 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11855 if ((tmp < 0) || (tmp != base)) { 11856 tmp = -tmp; 11857 ctxt->input->cur += tmp; 11858 goto encoding_error; 11859 } 11860 if ((ctxt->sax != NULL) && (base == 0) && 11861 (ctxt->sax->cdataBlock != NULL) && 11862 (!ctxt->disableSAX)) { 11863 /* 11864 * Special case to provide identical behaviour 11865 * between pull and push parsers on enpty CDATA 11866 * sections 11867 */ 11868 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11869 (!strncmp((const char *)&ctxt->input->cur[-9], 11870 "<![CDATA[", 9))) 11871 ctxt->sax->cdataBlock(ctxt->userData, 11872 BAD_CAST "", 0); 11873 } else if ((ctxt->sax != NULL) && (base > 0) && 11874 (!ctxt->disableSAX)) { 11875 if (ctxt->sax->cdataBlock != NULL) 11876 ctxt->sax->cdataBlock(ctxt->userData, 11877 ctxt->input->cur, base); 11878 else if (ctxt->sax->characters != NULL) 11879 ctxt->sax->characters(ctxt->userData, 11880 ctxt->input->cur, base); 11881 } 11882 if (ctxt->instate == XML_PARSER_EOF) 11883 goto done; 11884 SKIPL(base + 3); 11885 ctxt->checkIndex = 0; 11886 ctxt->instate = XML_PARSER_CONTENT; 11887 #ifdef DEBUG_PUSH 11888 xmlGenericError(xmlGenericErrorContext, 11889 "PP: entering CONTENT\n"); 11890 #endif 11891 } 11892 break; 11893 } 11894 case XML_PARSER_MISC: 11895 SKIP_BLANKS; 11896 if (ctxt->input->buf == NULL) 11897 avail = ctxt->input->length - 11898 (ctxt->input->cur - ctxt->input->base); 11899 else 11900 avail = xmlBufUse(ctxt->input->buf->buffer) - 11901 (ctxt->input->cur - ctxt->input->base); 11902 if (avail < 2) 11903 goto done; 11904 cur = ctxt->input->cur[0]; 11905 next = ctxt->input->cur[1]; 11906 if ((cur == '<') && (next == '?')) { 11907 if ((!terminate) && 11908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11909 ctxt->progressive = XML_PARSER_PI; 11910 goto done; 11911 } 11912 #ifdef DEBUG_PUSH 11913 xmlGenericError(xmlGenericErrorContext, 11914 "PP: Parsing PI\n"); 11915 #endif 11916 xmlParsePI(ctxt); 11917 if (ctxt->instate == XML_PARSER_EOF) 11918 goto done; 11919 ctxt->instate = XML_PARSER_MISC; 11920 ctxt->progressive = 1; 11921 ctxt->checkIndex = 0; 11922 } else if ((cur == '<') && (next == '!') && 11923 (ctxt->input->cur[2] == '-') && 11924 (ctxt->input->cur[3] == '-')) { 11925 if ((!terminate) && 11926 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11927 ctxt->progressive = XML_PARSER_COMMENT; 11928 goto done; 11929 } 11930 #ifdef DEBUG_PUSH 11931 xmlGenericError(xmlGenericErrorContext, 11932 "PP: Parsing Comment\n"); 11933 #endif 11934 xmlParseComment(ctxt); 11935 if (ctxt->instate == XML_PARSER_EOF) 11936 goto done; 11937 ctxt->instate = XML_PARSER_MISC; 11938 ctxt->progressive = 1; 11939 ctxt->checkIndex = 0; 11940 } else if ((cur == '<') && (next == '!') && 11941 (ctxt->input->cur[2] == 'D') && 11942 (ctxt->input->cur[3] == 'O') && 11943 (ctxt->input->cur[4] == 'C') && 11944 (ctxt->input->cur[5] == 'T') && 11945 (ctxt->input->cur[6] == 'Y') && 11946 (ctxt->input->cur[7] == 'P') && 11947 (ctxt->input->cur[8] == 'E')) { 11948 if ((!terminate) && 11949 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11950 ctxt->progressive = XML_PARSER_DTD; 11951 goto done; 11952 } 11953 #ifdef DEBUG_PUSH 11954 xmlGenericError(xmlGenericErrorContext, 11955 "PP: Parsing internal subset\n"); 11956 #endif 11957 ctxt->inSubset = 1; 11958 ctxt->progressive = 0; 11959 ctxt->checkIndex = 0; 11960 xmlParseDocTypeDecl(ctxt); 11961 if (ctxt->instate == XML_PARSER_EOF) 11962 goto done; 11963 if (RAW == '[') { 11964 ctxt->instate = XML_PARSER_DTD; 11965 #ifdef DEBUG_PUSH 11966 xmlGenericError(xmlGenericErrorContext, 11967 "PP: entering DTD\n"); 11968 #endif 11969 } else { 11970 /* 11971 * Create and update the external subset. 11972 */ 11973 ctxt->inSubset = 2; 11974 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11975 (ctxt->sax->externalSubset != NULL)) 11976 ctxt->sax->externalSubset(ctxt->userData, 11977 ctxt->intSubName, ctxt->extSubSystem, 11978 ctxt->extSubURI); 11979 ctxt->inSubset = 0; 11980 xmlCleanSpecialAttr(ctxt); 11981 ctxt->instate = XML_PARSER_PROLOG; 11982 #ifdef DEBUG_PUSH 11983 xmlGenericError(xmlGenericErrorContext, 11984 "PP: entering PROLOG\n"); 11985 #endif 11986 } 11987 } else if ((cur == '<') && (next == '!') && 11988 (avail < 9)) { 11989 goto done; 11990 } else { 11991 ctxt->instate = XML_PARSER_START_TAG; 11992 ctxt->progressive = XML_PARSER_START_TAG; 11993 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11994 #ifdef DEBUG_PUSH 11995 xmlGenericError(xmlGenericErrorContext, 11996 "PP: entering START_TAG\n"); 11997 #endif 11998 } 11999 break; 12000 case XML_PARSER_PROLOG: 12001 SKIP_BLANKS; 12002 if (ctxt->input->buf == NULL) 12003 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12004 else 12005 avail = xmlBufUse(ctxt->input->buf->buffer) - 12006 (ctxt->input->cur - ctxt->input->base); 12007 if (avail < 2) 12008 goto done; 12009 cur = ctxt->input->cur[0]; 12010 next = ctxt->input->cur[1]; 12011 if ((cur == '<') && (next == '?')) { 12012 if ((!terminate) && 12013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12014 ctxt->progressive = XML_PARSER_PI; 12015 goto done; 12016 } 12017 #ifdef DEBUG_PUSH 12018 xmlGenericError(xmlGenericErrorContext, 12019 "PP: Parsing PI\n"); 12020 #endif 12021 xmlParsePI(ctxt); 12022 if (ctxt->instate == XML_PARSER_EOF) 12023 goto done; 12024 ctxt->instate = XML_PARSER_PROLOG; 12025 ctxt->progressive = 1; 12026 } else if ((cur == '<') && (next == '!') && 12027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12028 if ((!terminate) && 12029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12030 ctxt->progressive = XML_PARSER_COMMENT; 12031 goto done; 12032 } 12033 #ifdef DEBUG_PUSH 12034 xmlGenericError(xmlGenericErrorContext, 12035 "PP: Parsing Comment\n"); 12036 #endif 12037 xmlParseComment(ctxt); 12038 if (ctxt->instate == XML_PARSER_EOF) 12039 goto done; 12040 ctxt->instate = XML_PARSER_PROLOG; 12041 ctxt->progressive = 1; 12042 } else if ((cur == '<') && (next == '!') && 12043 (avail < 4)) { 12044 goto done; 12045 } else { 12046 ctxt->instate = XML_PARSER_START_TAG; 12047 if (ctxt->progressive == 0) 12048 ctxt->progressive = XML_PARSER_START_TAG; 12049 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 12050 #ifdef DEBUG_PUSH 12051 xmlGenericError(xmlGenericErrorContext, 12052 "PP: entering START_TAG\n"); 12053 #endif 12054 } 12055 break; 12056 case XML_PARSER_EPILOG: 12057 SKIP_BLANKS; 12058 if (ctxt->input->buf == NULL) 12059 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12060 else 12061 avail = xmlBufUse(ctxt->input->buf->buffer) - 12062 (ctxt->input->cur - ctxt->input->base); 12063 if (avail < 2) 12064 goto done; 12065 cur = ctxt->input->cur[0]; 12066 next = ctxt->input->cur[1]; 12067 if ((cur == '<') && (next == '?')) { 12068 if ((!terminate) && 12069 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12070 ctxt->progressive = XML_PARSER_PI; 12071 goto done; 12072 } 12073 #ifdef DEBUG_PUSH 12074 xmlGenericError(xmlGenericErrorContext, 12075 "PP: Parsing PI\n"); 12076 #endif 12077 xmlParsePI(ctxt); 12078 if (ctxt->instate == XML_PARSER_EOF) 12079 goto done; 12080 ctxt->instate = XML_PARSER_EPILOG; 12081 ctxt->progressive = 1; 12082 } else if ((cur == '<') && (next == '!') && 12083 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12084 if ((!terminate) && 12085 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12086 ctxt->progressive = XML_PARSER_COMMENT; 12087 goto done; 12088 } 12089 #ifdef DEBUG_PUSH 12090 xmlGenericError(xmlGenericErrorContext, 12091 "PP: Parsing Comment\n"); 12092 #endif 12093 xmlParseComment(ctxt); 12094 if (ctxt->instate == XML_PARSER_EOF) 12095 goto done; 12096 ctxt->instate = XML_PARSER_EPILOG; 12097 ctxt->progressive = 1; 12098 } else if ((cur == '<') && (next == '!') && 12099 (avail < 4)) { 12100 goto done; 12101 } else { 12102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12103 xmlHaltParser(ctxt); 12104 #ifdef DEBUG_PUSH 12105 xmlGenericError(xmlGenericErrorContext, 12106 "PP: entering EOF\n"); 12107 #endif 12108 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12109 ctxt->sax->endDocument(ctxt->userData); 12110 goto done; 12111 } 12112 break; 12113 case XML_PARSER_DTD: { 12114 /* 12115 * Sorry but progressive parsing of the internal subset 12116 * is not expected to be supported. We first check that 12117 * the full content of the internal subset is available and 12118 * the parsing is launched only at that point. 12119 * Internal subset ends up with "']' S? '>'" in an unescaped 12120 * section and not in a ']]>' sequence which are conditional 12121 * sections (whoever argued to keep that crap in XML deserve 12122 * a place in hell !). 12123 */ 12124 int base, i; 12125 xmlChar *buf; 12126 xmlChar quote = 0; 12127 size_t use; 12128 12129 base = ctxt->input->cur - ctxt->input->base; 12130 if (base < 0) return(0); 12131 if (ctxt->checkIndex > base) 12132 base = ctxt->checkIndex; 12133 buf = xmlBufContent(ctxt->input->buf->buffer); 12134 use = xmlBufUse(ctxt->input->buf->buffer); 12135 for (;(unsigned int) base < use; base++) { 12136 if (quote != 0) { 12137 if (buf[base] == quote) 12138 quote = 0; 12139 continue; 12140 } 12141 if ((quote == 0) && (buf[base] == '<')) { 12142 int found = 0; 12143 /* special handling of comments */ 12144 if (((unsigned int) base + 4 < use) && 12145 (buf[base + 1] == '!') && 12146 (buf[base + 2] == '-') && 12147 (buf[base + 3] == '-')) { 12148 for (;(unsigned int) base + 3 < use; base++) { 12149 if ((buf[base] == '-') && 12150 (buf[base + 1] == '-') && 12151 (buf[base + 2] == '>')) { 12152 found = 1; 12153 base += 2; 12154 break; 12155 } 12156 } 12157 if (!found) { 12158 #if 0 12159 fprintf(stderr, "unfinished comment\n"); 12160 #endif 12161 break; /* for */ 12162 } 12163 continue; 12164 } 12165 } 12166 if (buf[base] == '"') { 12167 quote = '"'; 12168 continue; 12169 } 12170 if (buf[base] == '\'') { 12171 quote = '\''; 12172 continue; 12173 } 12174 if (buf[base] == ']') { 12175 #if 0 12176 fprintf(stderr, "%c%c%c%c: ", buf[base], 12177 buf[base + 1], buf[base + 2], buf[base + 3]); 12178 #endif 12179 if ((unsigned int) base +1 >= use) 12180 break; 12181 if (buf[base + 1] == ']') { 12182 /* conditional crap, skip both ']' ! */ 12183 base++; 12184 continue; 12185 } 12186 for (i = 1; (unsigned int) base + i < use; i++) { 12187 if (buf[base + i] == '>') { 12188 #if 0 12189 fprintf(stderr, "found\n"); 12190 #endif 12191 goto found_end_int_subset; 12192 } 12193 if (!IS_BLANK_CH(buf[base + i])) { 12194 #if 0 12195 fprintf(stderr, "not found\n"); 12196 #endif 12197 goto not_end_of_int_subset; 12198 } 12199 } 12200 #if 0 12201 fprintf(stderr, "end of stream\n"); 12202 #endif 12203 break; 12204 12205 } 12206 not_end_of_int_subset: 12207 continue; /* for */ 12208 } 12209 /* 12210 * We didn't found the end of the Internal subset 12211 */ 12212 if (quote == 0) 12213 ctxt->checkIndex = base; 12214 else 12215 ctxt->checkIndex = 0; 12216 #ifdef DEBUG_PUSH 12217 if (next == 0) 12218 xmlGenericError(xmlGenericErrorContext, 12219 "PP: lookup of int subset end filed\n"); 12220 #endif 12221 goto done; 12222 12223 found_end_int_subset: 12224 ctxt->checkIndex = 0; 12225 xmlParseInternalSubset(ctxt); 12226 if (ctxt->instate == XML_PARSER_EOF) 12227 goto done; 12228 ctxt->inSubset = 2; 12229 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12230 (ctxt->sax->externalSubset != NULL)) 12231 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12232 ctxt->extSubSystem, ctxt->extSubURI); 12233 ctxt->inSubset = 0; 12234 xmlCleanSpecialAttr(ctxt); 12235 if (ctxt->instate == XML_PARSER_EOF) 12236 goto done; 12237 ctxt->instate = XML_PARSER_PROLOG; 12238 ctxt->checkIndex = 0; 12239 #ifdef DEBUG_PUSH 12240 xmlGenericError(xmlGenericErrorContext, 12241 "PP: entering PROLOG\n"); 12242 #endif 12243 break; 12244 } 12245 case XML_PARSER_COMMENT: 12246 xmlGenericError(xmlGenericErrorContext, 12247 "PP: internal error, state == COMMENT\n"); 12248 ctxt->instate = XML_PARSER_CONTENT; 12249 #ifdef DEBUG_PUSH 12250 xmlGenericError(xmlGenericErrorContext, 12251 "PP: entering CONTENT\n"); 12252 #endif 12253 break; 12254 case XML_PARSER_IGNORE: 12255 xmlGenericError(xmlGenericErrorContext, 12256 "PP: internal error, state == IGNORE"); 12257 ctxt->instate = XML_PARSER_DTD; 12258 #ifdef DEBUG_PUSH 12259 xmlGenericError(xmlGenericErrorContext, 12260 "PP: entering DTD\n"); 12261 #endif 12262 break; 12263 case XML_PARSER_PI: 12264 xmlGenericError(xmlGenericErrorContext, 12265 "PP: internal error, state == PI\n"); 12266 ctxt->instate = XML_PARSER_CONTENT; 12267 #ifdef DEBUG_PUSH 12268 xmlGenericError(xmlGenericErrorContext, 12269 "PP: entering CONTENT\n"); 12270 #endif 12271 break; 12272 case XML_PARSER_ENTITY_DECL: 12273 xmlGenericError(xmlGenericErrorContext, 12274 "PP: internal error, state == ENTITY_DECL\n"); 12275 ctxt->instate = XML_PARSER_DTD; 12276 #ifdef DEBUG_PUSH 12277 xmlGenericError(xmlGenericErrorContext, 12278 "PP: entering DTD\n"); 12279 #endif 12280 break; 12281 case XML_PARSER_ENTITY_VALUE: 12282 xmlGenericError(xmlGenericErrorContext, 12283 "PP: internal error, state == ENTITY_VALUE\n"); 12284 ctxt->instate = XML_PARSER_CONTENT; 12285 #ifdef DEBUG_PUSH 12286 xmlGenericError(xmlGenericErrorContext, 12287 "PP: entering DTD\n"); 12288 #endif 12289 break; 12290 case XML_PARSER_ATTRIBUTE_VALUE: 12291 xmlGenericError(xmlGenericErrorContext, 12292 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12293 ctxt->instate = XML_PARSER_START_TAG; 12294 #ifdef DEBUG_PUSH 12295 xmlGenericError(xmlGenericErrorContext, 12296 "PP: entering START_TAG\n"); 12297 #endif 12298 break; 12299 case XML_PARSER_SYSTEM_LITERAL: 12300 xmlGenericError(xmlGenericErrorContext, 12301 "PP: internal error, state == SYSTEM_LITERAL\n"); 12302 ctxt->instate = XML_PARSER_START_TAG; 12303 #ifdef DEBUG_PUSH 12304 xmlGenericError(xmlGenericErrorContext, 12305 "PP: entering START_TAG\n"); 12306 #endif 12307 break; 12308 case XML_PARSER_PUBLIC_LITERAL: 12309 xmlGenericError(xmlGenericErrorContext, 12310 "PP: internal error, state == PUBLIC_LITERAL\n"); 12311 ctxt->instate = XML_PARSER_START_TAG; 12312 #ifdef DEBUG_PUSH 12313 xmlGenericError(xmlGenericErrorContext, 12314 "PP: entering START_TAG\n"); 12315 #endif 12316 break; 12317 } 12318 } 12319 done: 12320 #ifdef DEBUG_PUSH 12321 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12322 #endif 12323 return(ret); 12324 encoding_error: 12325 { 12326 char buffer[150]; 12327 12328 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12329 ctxt->input->cur[0], ctxt->input->cur[1], 12330 ctxt->input->cur[2], ctxt->input->cur[3]); 12331 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12332 "Input is not proper UTF-8, indicate encoding !\n%s", 12333 BAD_CAST buffer, NULL); 12334 } 12335 return(0); 12336 } 12337 12338 /** 12339 * xmlParseCheckTransition: 12340 * @ctxt: an XML parser context 12341 * @chunk: a char array 12342 * @size: the size in byte of the chunk 12343 * 12344 * Check depending on the current parser state if the chunk given must be 12345 * processed immediately or one need more data to advance on parsing. 12346 * 12347 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12348 */ 12349 static int 12350 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12351 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12352 return(-1); 12353 if (ctxt->instate == XML_PARSER_START_TAG) { 12354 if (memchr(chunk, '>', size) != NULL) 12355 return(1); 12356 return(0); 12357 } 12358 if (ctxt->progressive == XML_PARSER_COMMENT) { 12359 if (memchr(chunk, '>', size) != NULL) 12360 return(1); 12361 return(0); 12362 } 12363 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12364 if (memchr(chunk, '>', size) != NULL) 12365 return(1); 12366 return(0); 12367 } 12368 if (ctxt->progressive == XML_PARSER_PI) { 12369 if (memchr(chunk, '>', size) != NULL) 12370 return(1); 12371 return(0); 12372 } 12373 if (ctxt->instate == XML_PARSER_END_TAG) { 12374 if (memchr(chunk, '>', size) != NULL) 12375 return(1); 12376 return(0); 12377 } 12378 if ((ctxt->progressive == XML_PARSER_DTD) || 12379 (ctxt->instate == XML_PARSER_DTD)) { 12380 if (memchr(chunk, '>', size) != NULL) 12381 return(1); 12382 return(0); 12383 } 12384 return(1); 12385 } 12386 12387 /** 12388 * xmlParseChunk: 12389 * @ctxt: an XML parser context 12390 * @chunk: an char array 12391 * @size: the size in byte of the chunk 12392 * @terminate: last chunk indicator 12393 * 12394 * Parse a Chunk of memory 12395 * 12396 * Returns zero if no error, the xmlParserErrors otherwise. 12397 */ 12398 int 12399 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12400 int terminate) { 12401 int end_in_lf = 0; 12402 int remain = 0; 12403 size_t old_avail = 0; 12404 size_t avail = 0; 12405 12406 if (ctxt == NULL) 12407 return(XML_ERR_INTERNAL_ERROR); 12408 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12409 return(ctxt->errNo); 12410 if (ctxt->instate == XML_PARSER_EOF) 12411 return(-1); 12412 if (ctxt->instate == XML_PARSER_START) 12413 xmlDetectSAX2(ctxt); 12414 if ((size > 0) && (chunk != NULL) && (!terminate) && 12415 (chunk[size - 1] == '\r')) { 12416 end_in_lf = 1; 12417 size--; 12418 } 12419 12420 xmldecl_done: 12421 12422 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12423 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12425 size_t cur = ctxt->input->cur - ctxt->input->base; 12426 int res; 12427 12428 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12429 /* 12430 * Specific handling if we autodetected an encoding, we should not 12431 * push more than the first line ... which depend on the encoding 12432 * And only push the rest once the final encoding was detected 12433 */ 12434 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12435 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12436 unsigned int len = 45; 12437 12438 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12439 BAD_CAST "UTF-16")) || 12440 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12441 BAD_CAST "UTF16"))) 12442 len = 90; 12443 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12444 BAD_CAST "UCS-4")) || 12445 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12446 BAD_CAST "UCS4"))) 12447 len = 180; 12448 12449 if (ctxt->input->buf->rawconsumed < len) 12450 len -= ctxt->input->buf->rawconsumed; 12451 12452 /* 12453 * Change size for reading the initial declaration only 12454 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12455 * will blindly copy extra bytes from memory. 12456 */ 12457 if ((unsigned int) size > len) { 12458 remain = size - len; 12459 size = len; 12460 } else { 12461 remain = 0; 12462 } 12463 } 12464 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12465 if (res < 0) { 12466 ctxt->errNo = XML_PARSER_EOF; 12467 xmlHaltParser(ctxt); 12468 return (XML_PARSER_EOF); 12469 } 12470 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12471 #ifdef DEBUG_PUSH 12472 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12473 #endif 12474 12475 } else if (ctxt->instate != XML_PARSER_EOF) { 12476 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12477 xmlParserInputBufferPtr in = ctxt->input->buf; 12478 if ((in->encoder != NULL) && (in->buffer != NULL) && 12479 (in->raw != NULL)) { 12480 int nbchars; 12481 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12482 size_t current = ctxt->input->cur - ctxt->input->base; 12483 12484 nbchars = xmlCharEncInput(in, terminate); 12485 if (nbchars < 0) { 12486 /* TODO 2.6.0 */ 12487 xmlGenericError(xmlGenericErrorContext, 12488 "xmlParseChunk: encoder error\n"); 12489 return(XML_ERR_INVALID_ENCODING); 12490 } 12491 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12492 } 12493 } 12494 } 12495 if (remain != 0) { 12496 xmlParseTryOrFinish(ctxt, 0); 12497 } else { 12498 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12499 avail = xmlBufUse(ctxt->input->buf->buffer); 12500 /* 12501 * Depending on the current state it may not be such 12502 * a good idea to try parsing if there is nothing in the chunk 12503 * which would be worth doing a parser state transition and we 12504 * need to wait for more data 12505 */ 12506 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12507 (old_avail == 0) || (avail == 0) || 12508 (xmlParseCheckTransition(ctxt, 12509 (const char *)&ctxt->input->base[old_avail], 12510 avail - old_avail))) 12511 xmlParseTryOrFinish(ctxt, terminate); 12512 } 12513 if (ctxt->instate == XML_PARSER_EOF) 12514 return(ctxt->errNo); 12515 12516 if ((ctxt->input != NULL) && 12517 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12518 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12519 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12520 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12521 xmlHaltParser(ctxt); 12522 } 12523 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12524 return(ctxt->errNo); 12525 12526 if (remain != 0) { 12527 chunk += size; 12528 size = remain; 12529 remain = 0; 12530 goto xmldecl_done; 12531 } 12532 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12533 (ctxt->input->buf != NULL)) { 12534 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12535 ctxt->input); 12536 size_t current = ctxt->input->cur - ctxt->input->base; 12537 12538 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12539 12540 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12541 base, current); 12542 } 12543 if (terminate) { 12544 /* 12545 * Check for termination 12546 */ 12547 int cur_avail = 0; 12548 12549 if (ctxt->input != NULL) { 12550 if (ctxt->input->buf == NULL) 12551 cur_avail = ctxt->input->length - 12552 (ctxt->input->cur - ctxt->input->base); 12553 else 12554 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12555 (ctxt->input->cur - ctxt->input->base); 12556 } 12557 12558 if ((ctxt->instate != XML_PARSER_EOF) && 12559 (ctxt->instate != XML_PARSER_EPILOG)) { 12560 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12561 } 12562 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12563 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12564 } 12565 if (ctxt->instate != XML_PARSER_EOF) { 12566 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12567 ctxt->sax->endDocument(ctxt->userData); 12568 } 12569 ctxt->instate = XML_PARSER_EOF; 12570 } 12571 if (ctxt->wellFormed == 0) 12572 return((xmlParserErrors) ctxt->errNo); 12573 else 12574 return(0); 12575 } 12576 12577 /************************************************************************ 12578 * * 12579 * I/O front end functions to the parser * 12580 * * 12581 ************************************************************************/ 12582 12583 /** 12584 * xmlCreatePushParserCtxt: 12585 * @sax: a SAX handler 12586 * @user_data: The user data returned on SAX callbacks 12587 * @chunk: a pointer to an array of chars 12588 * @size: number of chars in the array 12589 * @filename: an optional file name or URI 12590 * 12591 * Create a parser context for using the XML parser in push mode. 12592 * If @buffer and @size are non-NULL, the data is used to detect 12593 * the encoding. The remaining characters will be parsed so they 12594 * don't need to be fed in again through xmlParseChunk. 12595 * To allow content encoding detection, @size should be >= 4 12596 * The value of @filename is used for fetching external entities 12597 * and error/warning reports. 12598 * 12599 * Returns the new parser context or NULL 12600 */ 12601 12602 xmlParserCtxtPtr 12603 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12604 const char *chunk, int size, const char *filename) { 12605 xmlParserCtxtPtr ctxt; 12606 xmlParserInputPtr inputStream; 12607 xmlParserInputBufferPtr buf; 12608 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12609 12610 /* 12611 * plug some encoding conversion routines 12612 */ 12613 if ((chunk != NULL) && (size >= 4)) 12614 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12615 12616 buf = xmlAllocParserInputBuffer(enc); 12617 if (buf == NULL) return(NULL); 12618 12619 ctxt = xmlNewParserCtxt(); 12620 if (ctxt == NULL) { 12621 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12622 xmlFreeParserInputBuffer(buf); 12623 return(NULL); 12624 } 12625 ctxt->dictNames = 1; 12626 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12627 if (ctxt->pushTab == NULL) { 12628 xmlErrMemory(ctxt, NULL); 12629 xmlFreeParserInputBuffer(buf); 12630 xmlFreeParserCtxt(ctxt); 12631 return(NULL); 12632 } 12633 if (sax != NULL) { 12634 #ifdef LIBXML_SAX1_ENABLED 12635 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12636 #endif /* LIBXML_SAX1_ENABLED */ 12637 xmlFree(ctxt->sax); 12638 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12639 if (ctxt->sax == NULL) { 12640 xmlErrMemory(ctxt, NULL); 12641 xmlFreeParserInputBuffer(buf); 12642 xmlFreeParserCtxt(ctxt); 12643 return(NULL); 12644 } 12645 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12646 if (sax->initialized == XML_SAX2_MAGIC) 12647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12648 else 12649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12650 if (user_data != NULL) 12651 ctxt->userData = user_data; 12652 } 12653 if (filename == NULL) { 12654 ctxt->directory = NULL; 12655 } else { 12656 ctxt->directory = xmlParserGetDirectory(filename); 12657 } 12658 12659 inputStream = xmlNewInputStream(ctxt); 12660 if (inputStream == NULL) { 12661 xmlFreeParserCtxt(ctxt); 12662 xmlFreeParserInputBuffer(buf); 12663 return(NULL); 12664 } 12665 12666 if (filename == NULL) 12667 inputStream->filename = NULL; 12668 else { 12669 inputStream->filename = (char *) 12670 xmlCanonicPath((const xmlChar *) filename); 12671 if (inputStream->filename == NULL) { 12672 xmlFreeParserCtxt(ctxt); 12673 xmlFreeParserInputBuffer(buf); 12674 return(NULL); 12675 } 12676 } 12677 inputStream->buf = buf; 12678 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12679 inputPush(ctxt, inputStream); 12680 12681 /* 12682 * If the caller didn't provide an initial 'chunk' for determining 12683 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12684 * that it can be automatically determined later 12685 */ 12686 if ((size == 0) || (chunk == NULL)) { 12687 ctxt->charset = XML_CHAR_ENCODING_NONE; 12688 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12689 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12690 size_t cur = ctxt->input->cur - ctxt->input->base; 12691 12692 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12693 12694 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12695 #ifdef DEBUG_PUSH 12696 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12697 #endif 12698 } 12699 12700 if (enc != XML_CHAR_ENCODING_NONE) { 12701 xmlSwitchEncoding(ctxt, enc); 12702 } 12703 12704 return(ctxt); 12705 } 12706 #endif /* LIBXML_PUSH_ENABLED */ 12707 12708 /** 12709 * xmlHaltParser: 12710 * @ctxt: an XML parser context 12711 * 12712 * Blocks further parser processing don't override error 12713 * for internal use 12714 */ 12715 static void 12716 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12717 if (ctxt == NULL) 12718 return; 12719 ctxt->instate = XML_PARSER_EOF; 12720 ctxt->disableSAX = 1; 12721 if (ctxt->input != NULL) { 12722 /* 12723 * in case there was a specific allocation deallocate before 12724 * overriding base 12725 */ 12726 if (ctxt->input->free != NULL) { 12727 ctxt->input->free((xmlChar *) ctxt->input->base); 12728 ctxt->input->free = NULL; 12729 } 12730 ctxt->input->cur = BAD_CAST""; 12731 ctxt->input->base = ctxt->input->cur; 12732 } 12733 } 12734 12735 /** 12736 * xmlStopParser: 12737 * @ctxt: an XML parser context 12738 * 12739 * Blocks further parser processing 12740 */ 12741 void 12742 xmlStopParser(xmlParserCtxtPtr ctxt) { 12743 if (ctxt == NULL) 12744 return; 12745 xmlHaltParser(ctxt); 12746 ctxt->errNo = XML_ERR_USER_STOP; 12747 } 12748 12749 /** 12750 * xmlCreateIOParserCtxt: 12751 * @sax: a SAX handler 12752 * @user_data: The user data returned on SAX callbacks 12753 * @ioread: an I/O read function 12754 * @ioclose: an I/O close function 12755 * @ioctx: an I/O handler 12756 * @enc: the charset encoding if known 12757 * 12758 * Create a parser context for using the XML parser with an existing 12759 * I/O stream 12760 * 12761 * Returns the new parser context or NULL 12762 */ 12763 xmlParserCtxtPtr 12764 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12765 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12766 void *ioctx, xmlCharEncoding enc) { 12767 xmlParserCtxtPtr ctxt; 12768 xmlParserInputPtr inputStream; 12769 xmlParserInputBufferPtr buf; 12770 12771 if (ioread == NULL) return(NULL); 12772 12773 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12774 if (buf == NULL) { 12775 if (ioclose != NULL) 12776 ioclose(ioctx); 12777 return (NULL); 12778 } 12779 12780 ctxt = xmlNewParserCtxt(); 12781 if (ctxt == NULL) { 12782 xmlFreeParserInputBuffer(buf); 12783 return(NULL); 12784 } 12785 if (sax != NULL) { 12786 #ifdef LIBXML_SAX1_ENABLED 12787 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12788 #endif /* LIBXML_SAX1_ENABLED */ 12789 xmlFree(ctxt->sax); 12790 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12791 if (ctxt->sax == NULL) { 12792 xmlErrMemory(ctxt, NULL); 12793 xmlFreeParserCtxt(ctxt); 12794 return(NULL); 12795 } 12796 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12797 if (sax->initialized == XML_SAX2_MAGIC) 12798 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12799 else 12800 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12801 if (user_data != NULL) 12802 ctxt->userData = user_data; 12803 } 12804 12805 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12806 if (inputStream == NULL) { 12807 xmlFreeParserCtxt(ctxt); 12808 return(NULL); 12809 } 12810 inputPush(ctxt, inputStream); 12811 12812 return(ctxt); 12813 } 12814 12815 #ifdef LIBXML_VALID_ENABLED 12816 /************************************************************************ 12817 * * 12818 * Front ends when parsing a DTD * 12819 * * 12820 ************************************************************************/ 12821 12822 /** 12823 * xmlIOParseDTD: 12824 * @sax: the SAX handler block or NULL 12825 * @input: an Input Buffer 12826 * @enc: the charset encoding if known 12827 * 12828 * Load and parse a DTD 12829 * 12830 * Returns the resulting xmlDtdPtr or NULL in case of error. 12831 * @input will be freed by the function in any case. 12832 */ 12833 12834 xmlDtdPtr 12835 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12836 xmlCharEncoding enc) { 12837 xmlDtdPtr ret = NULL; 12838 xmlParserCtxtPtr ctxt; 12839 xmlParserInputPtr pinput = NULL; 12840 xmlChar start[4]; 12841 12842 if (input == NULL) 12843 return(NULL); 12844 12845 ctxt = xmlNewParserCtxt(); 12846 if (ctxt == NULL) { 12847 xmlFreeParserInputBuffer(input); 12848 return(NULL); 12849 } 12850 12851 /* We are loading a DTD */ 12852 ctxt->options |= XML_PARSE_DTDLOAD; 12853 12854 /* 12855 * Set-up the SAX context 12856 */ 12857 if (sax != NULL) { 12858 if (ctxt->sax != NULL) 12859 xmlFree(ctxt->sax); 12860 ctxt->sax = sax; 12861 ctxt->userData = ctxt; 12862 } 12863 xmlDetectSAX2(ctxt); 12864 12865 /* 12866 * generate a parser input from the I/O handler 12867 */ 12868 12869 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12870 if (pinput == NULL) { 12871 if (sax != NULL) ctxt->sax = NULL; 12872 xmlFreeParserInputBuffer(input); 12873 xmlFreeParserCtxt(ctxt); 12874 return(NULL); 12875 } 12876 12877 /* 12878 * plug some encoding conversion routines here. 12879 */ 12880 if (xmlPushInput(ctxt, pinput) < 0) { 12881 if (sax != NULL) ctxt->sax = NULL; 12882 xmlFreeParserCtxt(ctxt); 12883 return(NULL); 12884 } 12885 if (enc != XML_CHAR_ENCODING_NONE) { 12886 xmlSwitchEncoding(ctxt, enc); 12887 } 12888 12889 pinput->filename = NULL; 12890 pinput->line = 1; 12891 pinput->col = 1; 12892 pinput->base = ctxt->input->cur; 12893 pinput->cur = ctxt->input->cur; 12894 pinput->free = NULL; 12895 12896 /* 12897 * let's parse that entity knowing it's an external subset. 12898 */ 12899 ctxt->inSubset = 2; 12900 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12901 if (ctxt->myDoc == NULL) { 12902 xmlErrMemory(ctxt, "New Doc failed"); 12903 return(NULL); 12904 } 12905 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12907 BAD_CAST "none", BAD_CAST "none"); 12908 12909 if ((enc == XML_CHAR_ENCODING_NONE) && 12910 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12911 /* 12912 * Get the 4 first bytes and decode the charset 12913 * if enc != XML_CHAR_ENCODING_NONE 12914 * plug some encoding conversion routines. 12915 */ 12916 start[0] = RAW; 12917 start[1] = NXT(1); 12918 start[2] = NXT(2); 12919 start[3] = NXT(3); 12920 enc = xmlDetectCharEncoding(start, 4); 12921 if (enc != XML_CHAR_ENCODING_NONE) { 12922 xmlSwitchEncoding(ctxt, enc); 12923 } 12924 } 12925 12926 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12927 12928 if (ctxt->myDoc != NULL) { 12929 if (ctxt->wellFormed) { 12930 ret = ctxt->myDoc->extSubset; 12931 ctxt->myDoc->extSubset = NULL; 12932 if (ret != NULL) { 12933 xmlNodePtr tmp; 12934 12935 ret->doc = NULL; 12936 tmp = ret->children; 12937 while (tmp != NULL) { 12938 tmp->doc = NULL; 12939 tmp = tmp->next; 12940 } 12941 } 12942 } else { 12943 ret = NULL; 12944 } 12945 xmlFreeDoc(ctxt->myDoc); 12946 ctxt->myDoc = NULL; 12947 } 12948 if (sax != NULL) ctxt->sax = NULL; 12949 xmlFreeParserCtxt(ctxt); 12950 12951 return(ret); 12952 } 12953 12954 /** 12955 * xmlSAXParseDTD: 12956 * @sax: the SAX handler block 12957 * @ExternalID: a NAME* containing the External ID of the DTD 12958 * @SystemID: a NAME* containing the URL to the DTD 12959 * 12960 * Load and parse an external subset. 12961 * 12962 * Returns the resulting xmlDtdPtr or NULL in case of error. 12963 */ 12964 12965 xmlDtdPtr 12966 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12967 const xmlChar *SystemID) { 12968 xmlDtdPtr ret = NULL; 12969 xmlParserCtxtPtr ctxt; 12970 xmlParserInputPtr input = NULL; 12971 xmlCharEncoding enc; 12972 xmlChar* systemIdCanonic; 12973 12974 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12975 12976 ctxt = xmlNewParserCtxt(); 12977 if (ctxt == NULL) { 12978 return(NULL); 12979 } 12980 12981 /* We are loading a DTD */ 12982 ctxt->options |= XML_PARSE_DTDLOAD; 12983 12984 /* 12985 * Set-up the SAX context 12986 */ 12987 if (sax != NULL) { 12988 if (ctxt->sax != NULL) 12989 xmlFree(ctxt->sax); 12990 ctxt->sax = sax; 12991 ctxt->userData = ctxt; 12992 } 12993 12994 /* 12995 * Canonicalise the system ID 12996 */ 12997 systemIdCanonic = xmlCanonicPath(SystemID); 12998 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12999 xmlFreeParserCtxt(ctxt); 13000 return(NULL); 13001 } 13002 13003 /* 13004 * Ask the Entity resolver to load the damn thing 13005 */ 13006 13007 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 13008 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 13009 systemIdCanonic); 13010 if (input == NULL) { 13011 if (sax != NULL) ctxt->sax = NULL; 13012 xmlFreeParserCtxt(ctxt); 13013 if (systemIdCanonic != NULL) 13014 xmlFree(systemIdCanonic); 13015 return(NULL); 13016 } 13017 13018 /* 13019 * plug some encoding conversion routines here. 13020 */ 13021 if (xmlPushInput(ctxt, input) < 0) { 13022 if (sax != NULL) ctxt->sax = NULL; 13023 xmlFreeParserCtxt(ctxt); 13024 if (systemIdCanonic != NULL) 13025 xmlFree(systemIdCanonic); 13026 return(NULL); 13027 } 13028 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13029 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 13030 xmlSwitchEncoding(ctxt, enc); 13031 } 13032 13033 if (input->filename == NULL) 13034 input->filename = (char *) systemIdCanonic; 13035 else 13036 xmlFree(systemIdCanonic); 13037 input->line = 1; 13038 input->col = 1; 13039 input->base = ctxt->input->cur; 13040 input->cur = ctxt->input->cur; 13041 input->free = NULL; 13042 13043 /* 13044 * let's parse that entity knowing it's an external subset. 13045 */ 13046 ctxt->inSubset = 2; 13047 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 13048 if (ctxt->myDoc == NULL) { 13049 xmlErrMemory(ctxt, "New Doc failed"); 13050 if (sax != NULL) ctxt->sax = NULL; 13051 xmlFreeParserCtxt(ctxt); 13052 return(NULL); 13053 } 13054 ctxt->myDoc->properties = XML_DOC_INTERNAL; 13055 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 13056 ExternalID, SystemID); 13057 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 13058 13059 if (ctxt->myDoc != NULL) { 13060 if (ctxt->wellFormed) { 13061 ret = ctxt->myDoc->extSubset; 13062 ctxt->myDoc->extSubset = NULL; 13063 if (ret != NULL) { 13064 xmlNodePtr tmp; 13065 13066 ret->doc = NULL; 13067 tmp = ret->children; 13068 while (tmp != NULL) { 13069 tmp->doc = NULL; 13070 tmp = tmp->next; 13071 } 13072 } 13073 } else { 13074 ret = NULL; 13075 } 13076 xmlFreeDoc(ctxt->myDoc); 13077 ctxt->myDoc = NULL; 13078 } 13079 if (sax != NULL) ctxt->sax = NULL; 13080 xmlFreeParserCtxt(ctxt); 13081 13082 return(ret); 13083 } 13084 13085 13086 /** 13087 * xmlParseDTD: 13088 * @ExternalID: a NAME* containing the External ID of the DTD 13089 * @SystemID: a NAME* containing the URL to the DTD 13090 * 13091 * Load and parse an external subset. 13092 * 13093 * Returns the resulting xmlDtdPtr or NULL in case of error. 13094 */ 13095 13096 xmlDtdPtr 13097 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 13098 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 13099 } 13100 #endif /* LIBXML_VALID_ENABLED */ 13101 13102 /************************************************************************ 13103 * * 13104 * Front ends when parsing an Entity * 13105 * * 13106 ************************************************************************/ 13107 13108 /** 13109 * xmlParseCtxtExternalEntity: 13110 * @ctx: the existing parsing context 13111 * @URL: the URL for the entity to load 13112 * @ID: the System ID for the entity to load 13113 * @lst: the return value for the set of parsed nodes 13114 * 13115 * Parse an external general entity within an existing parsing context 13116 * An external general parsed entity is well-formed if it matches the 13117 * production labeled extParsedEnt. 13118 * 13119 * [78] extParsedEnt ::= TextDecl? content 13120 * 13121 * Returns 0 if the entity is well formed, -1 in case of args problem and 13122 * the parser error code otherwise 13123 */ 13124 13125 int 13126 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 13127 const xmlChar *ID, xmlNodePtr *lst) { 13128 xmlParserCtxtPtr ctxt; 13129 xmlDocPtr newDoc; 13130 xmlNodePtr newRoot; 13131 xmlSAXHandlerPtr oldsax = NULL; 13132 int ret = 0; 13133 xmlChar start[4]; 13134 xmlCharEncoding enc; 13135 13136 if (ctx == NULL) return(-1); 13137 13138 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 13139 (ctx->depth > 1024)) { 13140 return(XML_ERR_ENTITY_LOOP); 13141 } 13142 13143 if (lst != NULL) 13144 *lst = NULL; 13145 if ((URL == NULL) && (ID == NULL)) 13146 return(-1); 13147 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 13148 return(-1); 13149 13150 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 13151 if (ctxt == NULL) { 13152 return(-1); 13153 } 13154 13155 oldsax = ctxt->sax; 13156 ctxt->sax = ctx->sax; 13157 xmlDetectSAX2(ctxt); 13158 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13159 if (newDoc == NULL) { 13160 xmlFreeParserCtxt(ctxt); 13161 return(-1); 13162 } 13163 newDoc->properties = XML_DOC_INTERNAL; 13164 if (ctx->myDoc->dict) { 13165 newDoc->dict = ctx->myDoc->dict; 13166 xmlDictReference(newDoc->dict); 13167 } 13168 if (ctx->myDoc != NULL) { 13169 newDoc->intSubset = ctx->myDoc->intSubset; 13170 newDoc->extSubset = ctx->myDoc->extSubset; 13171 } 13172 if (ctx->myDoc->URL != NULL) { 13173 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 13174 } 13175 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13176 if (newRoot == NULL) { 13177 ctxt->sax = oldsax; 13178 xmlFreeParserCtxt(ctxt); 13179 newDoc->intSubset = NULL; 13180 newDoc->extSubset = NULL; 13181 xmlFreeDoc(newDoc); 13182 return(-1); 13183 } 13184 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13185 nodePush(ctxt, newDoc->children); 13186 if (ctx->myDoc == NULL) { 13187 ctxt->myDoc = newDoc; 13188 } else { 13189 ctxt->myDoc = ctx->myDoc; 13190 newDoc->children->doc = ctx->myDoc; 13191 } 13192 13193 /* 13194 * Get the 4 first bytes and decode the charset 13195 * if enc != XML_CHAR_ENCODING_NONE 13196 * plug some encoding conversion routines. 13197 */ 13198 GROW 13199 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13200 start[0] = RAW; 13201 start[1] = NXT(1); 13202 start[2] = NXT(2); 13203 start[3] = NXT(3); 13204 enc = xmlDetectCharEncoding(start, 4); 13205 if (enc != XML_CHAR_ENCODING_NONE) { 13206 xmlSwitchEncoding(ctxt, enc); 13207 } 13208 } 13209 13210 /* 13211 * Parse a possible text declaration first 13212 */ 13213 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13214 xmlParseTextDecl(ctxt); 13215 /* 13216 * An XML-1.0 document can't reference an entity not XML-1.0 13217 */ 13218 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 13219 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13220 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13221 "Version mismatch between document and entity\n"); 13222 } 13223 } 13224 13225 /* 13226 * If the user provided its own SAX callbacks then reuse the 13227 * useData callback field, otherwise the expected setup in a 13228 * DOM builder is to have userData == ctxt 13229 */ 13230 if (ctx->userData == ctx) 13231 ctxt->userData = ctxt; 13232 else 13233 ctxt->userData = ctx->userData; 13234 13235 /* 13236 * Doing validity checking on chunk doesn't make sense 13237 */ 13238 ctxt->instate = XML_PARSER_CONTENT; 13239 ctxt->validate = ctx->validate; 13240 ctxt->valid = ctx->valid; 13241 ctxt->loadsubset = ctx->loadsubset; 13242 ctxt->depth = ctx->depth + 1; 13243 ctxt->replaceEntities = ctx->replaceEntities; 13244 if (ctxt->validate) { 13245 ctxt->vctxt.error = ctx->vctxt.error; 13246 ctxt->vctxt.warning = ctx->vctxt.warning; 13247 } else { 13248 ctxt->vctxt.error = NULL; 13249 ctxt->vctxt.warning = NULL; 13250 } 13251 ctxt->vctxt.nodeTab = NULL; 13252 ctxt->vctxt.nodeNr = 0; 13253 ctxt->vctxt.nodeMax = 0; 13254 ctxt->vctxt.node = NULL; 13255 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13256 ctxt->dict = ctx->dict; 13257 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13258 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13259 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13260 ctxt->dictNames = ctx->dictNames; 13261 ctxt->attsDefault = ctx->attsDefault; 13262 ctxt->attsSpecial = ctx->attsSpecial; 13263 ctxt->linenumbers = ctx->linenumbers; 13264 13265 xmlParseContent(ctxt); 13266 13267 ctx->validate = ctxt->validate; 13268 ctx->valid = ctxt->valid; 13269 if ((RAW == '<') && (NXT(1) == '/')) { 13270 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13271 } else if (RAW != 0) { 13272 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13273 } 13274 if (ctxt->node != newDoc->children) { 13275 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13276 } 13277 13278 if (!ctxt->wellFormed) { 13279 if (ctxt->errNo == 0) 13280 ret = 1; 13281 else 13282 ret = ctxt->errNo; 13283 } else { 13284 if (lst != NULL) { 13285 xmlNodePtr cur; 13286 13287 /* 13288 * Return the newly created nodeset after unlinking it from 13289 * they pseudo parent. 13290 */ 13291 cur = newDoc->children->children; 13292 *lst = cur; 13293 while (cur != NULL) { 13294 cur->parent = NULL; 13295 cur = cur->next; 13296 } 13297 newDoc->children->children = NULL; 13298 } 13299 ret = 0; 13300 } 13301 ctxt->sax = oldsax; 13302 ctxt->dict = NULL; 13303 ctxt->attsDefault = NULL; 13304 ctxt->attsSpecial = NULL; 13305 xmlFreeParserCtxt(ctxt); 13306 newDoc->intSubset = NULL; 13307 newDoc->extSubset = NULL; 13308 xmlFreeDoc(newDoc); 13309 13310 return(ret); 13311 } 13312 13313 /** 13314 * xmlParseExternalEntityPrivate: 13315 * @doc: the document the chunk pertains to 13316 * @oldctxt: the previous parser context if available 13317 * @sax: the SAX handler bloc (possibly NULL) 13318 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13319 * @depth: Used for loop detection, use 0 13320 * @URL: the URL for the entity to load 13321 * @ID: the System ID for the entity to load 13322 * @list: the return value for the set of parsed nodes 13323 * 13324 * Private version of xmlParseExternalEntity() 13325 * 13326 * Returns 0 if the entity is well formed, -1 in case of args problem and 13327 * the parser error code otherwise 13328 */ 13329 13330 static xmlParserErrors 13331 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13332 xmlSAXHandlerPtr sax, 13333 void *user_data, int depth, const xmlChar *URL, 13334 const xmlChar *ID, xmlNodePtr *list) { 13335 xmlParserCtxtPtr ctxt; 13336 xmlDocPtr newDoc; 13337 xmlNodePtr newRoot; 13338 xmlSAXHandlerPtr oldsax = NULL; 13339 xmlParserErrors ret = XML_ERR_OK; 13340 xmlChar start[4]; 13341 xmlCharEncoding enc; 13342 13343 if (((depth > 40) && 13344 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13345 (depth > 1024)) { 13346 return(XML_ERR_ENTITY_LOOP); 13347 } 13348 13349 if (list != NULL) 13350 *list = NULL; 13351 if ((URL == NULL) && (ID == NULL)) 13352 return(XML_ERR_INTERNAL_ERROR); 13353 if (doc == NULL) 13354 return(XML_ERR_INTERNAL_ERROR); 13355 13356 13357 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13358 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13359 ctxt->userData = ctxt; 13360 if (oldctxt != NULL) { 13361 ctxt->_private = oldctxt->_private; 13362 ctxt->loadsubset = oldctxt->loadsubset; 13363 ctxt->validate = oldctxt->validate; 13364 ctxt->external = oldctxt->external; 13365 ctxt->record_info = oldctxt->record_info; 13366 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13367 ctxt->node_seq.length = oldctxt->node_seq.length; 13368 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13369 } else { 13370 /* 13371 * Doing validity checking on chunk without context 13372 * doesn't make sense 13373 */ 13374 ctxt->_private = NULL; 13375 ctxt->validate = 0; 13376 ctxt->external = 2; 13377 ctxt->loadsubset = 0; 13378 } 13379 if (sax != NULL) { 13380 oldsax = ctxt->sax; 13381 ctxt->sax = sax; 13382 if (user_data != NULL) 13383 ctxt->userData = user_data; 13384 } 13385 xmlDetectSAX2(ctxt); 13386 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13387 if (newDoc == NULL) { 13388 ctxt->node_seq.maximum = 0; 13389 ctxt->node_seq.length = 0; 13390 ctxt->node_seq.buffer = NULL; 13391 xmlFreeParserCtxt(ctxt); 13392 return(XML_ERR_INTERNAL_ERROR); 13393 } 13394 newDoc->properties = XML_DOC_INTERNAL; 13395 newDoc->intSubset = doc->intSubset; 13396 newDoc->extSubset = doc->extSubset; 13397 newDoc->dict = doc->dict; 13398 xmlDictReference(newDoc->dict); 13399 13400 if (doc->URL != NULL) { 13401 newDoc->URL = xmlStrdup(doc->URL); 13402 } 13403 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13404 if (newRoot == NULL) { 13405 if (sax != NULL) 13406 ctxt->sax = oldsax; 13407 ctxt->node_seq.maximum = 0; 13408 ctxt->node_seq.length = 0; 13409 ctxt->node_seq.buffer = NULL; 13410 xmlFreeParserCtxt(ctxt); 13411 newDoc->intSubset = NULL; 13412 newDoc->extSubset = NULL; 13413 xmlFreeDoc(newDoc); 13414 return(XML_ERR_INTERNAL_ERROR); 13415 } 13416 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13417 nodePush(ctxt, newDoc->children); 13418 ctxt->myDoc = doc; 13419 newRoot->doc = doc; 13420 13421 /* 13422 * Get the 4 first bytes and decode the charset 13423 * if enc != XML_CHAR_ENCODING_NONE 13424 * plug some encoding conversion routines. 13425 */ 13426 GROW; 13427 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13428 start[0] = RAW; 13429 start[1] = NXT(1); 13430 start[2] = NXT(2); 13431 start[3] = NXT(3); 13432 enc = xmlDetectCharEncoding(start, 4); 13433 if (enc != XML_CHAR_ENCODING_NONE) { 13434 xmlSwitchEncoding(ctxt, enc); 13435 } 13436 } 13437 13438 /* 13439 * Parse a possible text declaration first 13440 */ 13441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13442 xmlParseTextDecl(ctxt); 13443 } 13444 13445 ctxt->instate = XML_PARSER_CONTENT; 13446 ctxt->depth = depth; 13447 13448 xmlParseContent(ctxt); 13449 13450 if ((RAW == '<') && (NXT(1) == '/')) { 13451 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13452 } else if (RAW != 0) { 13453 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13454 } 13455 if (ctxt->node != newDoc->children) { 13456 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13457 } 13458 13459 if (!ctxt->wellFormed) { 13460 if (ctxt->errNo == 0) 13461 ret = XML_ERR_INTERNAL_ERROR; 13462 else 13463 ret = (xmlParserErrors)ctxt->errNo; 13464 } else { 13465 if (list != NULL) { 13466 xmlNodePtr cur; 13467 13468 /* 13469 * Return the newly created nodeset after unlinking it from 13470 * they pseudo parent. 13471 */ 13472 cur = newDoc->children->children; 13473 *list = cur; 13474 while (cur != NULL) { 13475 cur->parent = NULL; 13476 cur = cur->next; 13477 } 13478 newDoc->children->children = NULL; 13479 } 13480 ret = XML_ERR_OK; 13481 } 13482 13483 /* 13484 * Record in the parent context the number of entities replacement 13485 * done when parsing that reference. 13486 */ 13487 if (oldctxt != NULL) 13488 oldctxt->nbentities += ctxt->nbentities; 13489 13490 /* 13491 * Also record the size of the entity parsed 13492 */ 13493 if (ctxt->input != NULL && oldctxt != NULL) { 13494 oldctxt->sizeentities += ctxt->input->consumed; 13495 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13496 } 13497 /* 13498 * And record the last error if any 13499 */ 13500 if (ctxt->lastError.code != XML_ERR_OK) 13501 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13502 13503 if (sax != NULL) 13504 ctxt->sax = oldsax; 13505 if (oldctxt != NULL) { 13506 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13507 oldctxt->node_seq.length = ctxt->node_seq.length; 13508 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13509 } 13510 ctxt->node_seq.maximum = 0; 13511 ctxt->node_seq.length = 0; 13512 ctxt->node_seq.buffer = NULL; 13513 xmlFreeParserCtxt(ctxt); 13514 newDoc->intSubset = NULL; 13515 newDoc->extSubset = NULL; 13516 xmlFreeDoc(newDoc); 13517 13518 return(ret); 13519 } 13520 13521 #ifdef LIBXML_SAX1_ENABLED 13522 /** 13523 * xmlParseExternalEntity: 13524 * @doc: the document the chunk pertains to 13525 * @sax: the SAX handler bloc (possibly NULL) 13526 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13527 * @depth: Used for loop detection, use 0 13528 * @URL: the URL for the entity to load 13529 * @ID: the System ID for the entity to load 13530 * @lst: the return value for the set of parsed nodes 13531 * 13532 * Parse an external general entity 13533 * An external general parsed entity is well-formed if it matches the 13534 * production labeled extParsedEnt. 13535 * 13536 * [78] extParsedEnt ::= TextDecl? content 13537 * 13538 * Returns 0 if the entity is well formed, -1 in case of args problem and 13539 * the parser error code otherwise 13540 */ 13541 13542 int 13543 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13544 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13545 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13546 ID, lst)); 13547 } 13548 13549 /** 13550 * xmlParseBalancedChunkMemory: 13551 * @doc: the document the chunk pertains to 13552 * @sax: the SAX handler bloc (possibly NULL) 13553 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13554 * @depth: Used for loop detection, use 0 13555 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13556 * @lst: the return value for the set of parsed nodes 13557 * 13558 * Parse a well-balanced chunk of an XML document 13559 * called by the parser 13560 * The allowed sequence for the Well Balanced Chunk is the one defined by 13561 * the content production in the XML grammar: 13562 * 13563 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13564 * 13565 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13566 * the parser error code otherwise 13567 */ 13568 13569 int 13570 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13571 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13572 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13573 depth, string, lst, 0 ); 13574 } 13575 #endif /* LIBXML_SAX1_ENABLED */ 13576 13577 /** 13578 * xmlParseBalancedChunkMemoryInternal: 13579 * @oldctxt: the existing parsing context 13580 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13581 * @user_data: the user data field for the parser context 13582 * @lst: the return value for the set of parsed nodes 13583 * 13584 * 13585 * Parse a well-balanced chunk of an XML document 13586 * called by the parser 13587 * The allowed sequence for the Well Balanced Chunk is the one defined by 13588 * the content production in the XML grammar: 13589 * 13590 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13591 * 13592 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13593 * error code otherwise 13594 * 13595 * In case recover is set to 1, the nodelist will not be empty even if 13596 * the parsed chunk is not well balanced. 13597 */ 13598 static xmlParserErrors 13599 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13600 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13601 xmlParserCtxtPtr ctxt; 13602 xmlDocPtr newDoc = NULL; 13603 xmlNodePtr newRoot; 13604 xmlSAXHandlerPtr oldsax = NULL; 13605 xmlNodePtr content = NULL; 13606 xmlNodePtr last = NULL; 13607 int size; 13608 xmlParserErrors ret = XML_ERR_OK; 13609 #ifdef SAX2 13610 int i; 13611 #endif 13612 13613 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13614 (oldctxt->depth > 1024)) { 13615 return(XML_ERR_ENTITY_LOOP); 13616 } 13617 13618 13619 if (lst != NULL) 13620 *lst = NULL; 13621 if (string == NULL) 13622 return(XML_ERR_INTERNAL_ERROR); 13623 13624 size = xmlStrlen(string); 13625 13626 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13627 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13628 if (user_data != NULL) 13629 ctxt->userData = user_data; 13630 else 13631 ctxt->userData = ctxt; 13632 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13633 ctxt->dict = oldctxt->dict; 13634 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13635 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13636 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13637 13638 #ifdef SAX2 13639 /* propagate namespaces down the entity */ 13640 for (i = 0;i < oldctxt->nsNr;i += 2) { 13641 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13642 } 13643 #endif 13644 13645 oldsax = ctxt->sax; 13646 ctxt->sax = oldctxt->sax; 13647 xmlDetectSAX2(ctxt); 13648 ctxt->replaceEntities = oldctxt->replaceEntities; 13649 ctxt->options = oldctxt->options; 13650 13651 ctxt->_private = oldctxt->_private; 13652 if (oldctxt->myDoc == NULL) { 13653 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13654 if (newDoc == NULL) { 13655 ctxt->sax = oldsax; 13656 ctxt->dict = NULL; 13657 xmlFreeParserCtxt(ctxt); 13658 return(XML_ERR_INTERNAL_ERROR); 13659 } 13660 newDoc->properties = XML_DOC_INTERNAL; 13661 newDoc->dict = ctxt->dict; 13662 xmlDictReference(newDoc->dict); 13663 ctxt->myDoc = newDoc; 13664 } else { 13665 ctxt->myDoc = oldctxt->myDoc; 13666 content = ctxt->myDoc->children; 13667 last = ctxt->myDoc->last; 13668 } 13669 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13670 if (newRoot == NULL) { 13671 ctxt->sax = oldsax; 13672 ctxt->dict = NULL; 13673 xmlFreeParserCtxt(ctxt); 13674 if (newDoc != NULL) { 13675 xmlFreeDoc(newDoc); 13676 } 13677 return(XML_ERR_INTERNAL_ERROR); 13678 } 13679 ctxt->myDoc->children = NULL; 13680 ctxt->myDoc->last = NULL; 13681 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13682 nodePush(ctxt, ctxt->myDoc->children); 13683 ctxt->instate = XML_PARSER_CONTENT; 13684 ctxt->depth = oldctxt->depth + 1; 13685 13686 ctxt->validate = 0; 13687 ctxt->loadsubset = oldctxt->loadsubset; 13688 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13689 /* 13690 * ID/IDREF registration will be done in xmlValidateElement below 13691 */ 13692 ctxt->loadsubset |= XML_SKIP_IDS; 13693 } 13694 ctxt->dictNames = oldctxt->dictNames; 13695 ctxt->attsDefault = oldctxt->attsDefault; 13696 ctxt->attsSpecial = oldctxt->attsSpecial; 13697 13698 xmlParseContent(ctxt); 13699 if ((RAW == '<') && (NXT(1) == '/')) { 13700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13701 } else if (RAW != 0) { 13702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13703 } 13704 if (ctxt->node != ctxt->myDoc->children) { 13705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13706 } 13707 13708 if (!ctxt->wellFormed) { 13709 if (ctxt->errNo == 0) 13710 ret = XML_ERR_INTERNAL_ERROR; 13711 else 13712 ret = (xmlParserErrors)ctxt->errNo; 13713 } else { 13714 ret = XML_ERR_OK; 13715 } 13716 13717 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13718 xmlNodePtr cur; 13719 13720 /* 13721 * Return the newly created nodeset after unlinking it from 13722 * they pseudo parent. 13723 */ 13724 cur = ctxt->myDoc->children->children; 13725 *lst = cur; 13726 while (cur != NULL) { 13727 #ifdef LIBXML_VALID_ENABLED 13728 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13729 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13730 (cur->type == XML_ELEMENT_NODE)) { 13731 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13732 oldctxt->myDoc, cur); 13733 } 13734 #endif /* LIBXML_VALID_ENABLED */ 13735 cur->parent = NULL; 13736 cur = cur->next; 13737 } 13738 ctxt->myDoc->children->children = NULL; 13739 } 13740 if (ctxt->myDoc != NULL) { 13741 xmlFreeNode(ctxt->myDoc->children); 13742 ctxt->myDoc->children = content; 13743 ctxt->myDoc->last = last; 13744 } 13745 13746 /* 13747 * Record in the parent context the number of entities replacement 13748 * done when parsing that reference. 13749 */ 13750 if (oldctxt != NULL) 13751 oldctxt->nbentities += ctxt->nbentities; 13752 13753 /* 13754 * Also record the last error if any 13755 */ 13756 if (ctxt->lastError.code != XML_ERR_OK) 13757 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13758 13759 ctxt->sax = oldsax; 13760 ctxt->dict = NULL; 13761 ctxt->attsDefault = NULL; 13762 ctxt->attsSpecial = NULL; 13763 xmlFreeParserCtxt(ctxt); 13764 if (newDoc != NULL) { 13765 xmlFreeDoc(newDoc); 13766 } 13767 13768 return(ret); 13769 } 13770 13771 /** 13772 * xmlParseInNodeContext: 13773 * @node: the context node 13774 * @data: the input string 13775 * @datalen: the input string length in bytes 13776 * @options: a combination of xmlParserOption 13777 * @lst: the return value for the set of parsed nodes 13778 * 13779 * Parse a well-balanced chunk of an XML document 13780 * within the context (DTD, namespaces, etc ...) of the given node. 13781 * 13782 * The allowed sequence for the data is a Well Balanced Chunk defined by 13783 * the content production in the XML grammar: 13784 * 13785 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13786 * 13787 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13788 * error code otherwise 13789 */ 13790 xmlParserErrors 13791 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13792 int options, xmlNodePtr *lst) { 13793 #ifdef SAX2 13794 xmlParserCtxtPtr ctxt; 13795 xmlDocPtr doc = NULL; 13796 xmlNodePtr fake, cur; 13797 int nsnr = 0; 13798 13799 xmlParserErrors ret = XML_ERR_OK; 13800 13801 /* 13802 * check all input parameters, grab the document 13803 */ 13804 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13805 return(XML_ERR_INTERNAL_ERROR); 13806 switch (node->type) { 13807 case XML_ELEMENT_NODE: 13808 case XML_ATTRIBUTE_NODE: 13809 case XML_TEXT_NODE: 13810 case XML_CDATA_SECTION_NODE: 13811 case XML_ENTITY_REF_NODE: 13812 case XML_PI_NODE: 13813 case XML_COMMENT_NODE: 13814 case XML_DOCUMENT_NODE: 13815 case XML_HTML_DOCUMENT_NODE: 13816 break; 13817 default: 13818 return(XML_ERR_INTERNAL_ERROR); 13819 13820 } 13821 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13822 (node->type != XML_DOCUMENT_NODE) && 13823 (node->type != XML_HTML_DOCUMENT_NODE)) 13824 node = node->parent; 13825 if (node == NULL) 13826 return(XML_ERR_INTERNAL_ERROR); 13827 if (node->type == XML_ELEMENT_NODE) 13828 doc = node->doc; 13829 else 13830 doc = (xmlDocPtr) node; 13831 if (doc == NULL) 13832 return(XML_ERR_INTERNAL_ERROR); 13833 13834 /* 13835 * allocate a context and set-up everything not related to the 13836 * node position in the tree 13837 */ 13838 if (doc->type == XML_DOCUMENT_NODE) 13839 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13840 #ifdef LIBXML_HTML_ENABLED 13841 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13842 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13843 /* 13844 * When parsing in context, it makes no sense to add implied 13845 * elements like html/body/etc... 13846 */ 13847 options |= HTML_PARSE_NOIMPLIED; 13848 } 13849 #endif 13850 else 13851 return(XML_ERR_INTERNAL_ERROR); 13852 13853 if (ctxt == NULL) 13854 return(XML_ERR_NO_MEMORY); 13855 13856 /* 13857 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13858 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13859 * we must wait until the last moment to free the original one. 13860 */ 13861 if (doc->dict != NULL) { 13862 if (ctxt->dict != NULL) 13863 xmlDictFree(ctxt->dict); 13864 ctxt->dict = doc->dict; 13865 } else 13866 options |= XML_PARSE_NODICT; 13867 13868 if (doc->encoding != NULL) { 13869 xmlCharEncodingHandlerPtr hdlr; 13870 13871 if (ctxt->encoding != NULL) 13872 xmlFree((xmlChar *) ctxt->encoding); 13873 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13874 13875 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13876 if (hdlr != NULL) { 13877 xmlSwitchToEncoding(ctxt, hdlr); 13878 } else { 13879 return(XML_ERR_UNSUPPORTED_ENCODING); 13880 } 13881 } 13882 13883 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13884 xmlDetectSAX2(ctxt); 13885 ctxt->myDoc = doc; 13886 /* parsing in context, i.e. as within existing content */ 13887 ctxt->instate = XML_PARSER_CONTENT; 13888 13889 fake = xmlNewComment(NULL); 13890 if (fake == NULL) { 13891 xmlFreeParserCtxt(ctxt); 13892 return(XML_ERR_NO_MEMORY); 13893 } 13894 xmlAddChild(node, fake); 13895 13896 if (node->type == XML_ELEMENT_NODE) { 13897 nodePush(ctxt, node); 13898 /* 13899 * initialize the SAX2 namespaces stack 13900 */ 13901 cur = node; 13902 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13903 xmlNsPtr ns = cur->nsDef; 13904 const xmlChar *iprefix, *ihref; 13905 13906 while (ns != NULL) { 13907 if (ctxt->dict) { 13908 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13909 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13910 } else { 13911 iprefix = ns->prefix; 13912 ihref = ns->href; 13913 } 13914 13915 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13916 nsPush(ctxt, iprefix, ihref); 13917 nsnr++; 13918 } 13919 ns = ns->next; 13920 } 13921 cur = cur->parent; 13922 } 13923 } 13924 13925 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13926 /* 13927 * ID/IDREF registration will be done in xmlValidateElement below 13928 */ 13929 ctxt->loadsubset |= XML_SKIP_IDS; 13930 } 13931 13932 #ifdef LIBXML_HTML_ENABLED 13933 if (doc->type == XML_HTML_DOCUMENT_NODE) 13934 __htmlParseContent(ctxt); 13935 else 13936 #endif 13937 xmlParseContent(ctxt); 13938 13939 nsPop(ctxt, nsnr); 13940 if ((RAW == '<') && (NXT(1) == '/')) { 13941 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13942 } else if (RAW != 0) { 13943 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13944 } 13945 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13946 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13947 ctxt->wellFormed = 0; 13948 } 13949 13950 if (!ctxt->wellFormed) { 13951 if (ctxt->errNo == 0) 13952 ret = XML_ERR_INTERNAL_ERROR; 13953 else 13954 ret = (xmlParserErrors)ctxt->errNo; 13955 } else { 13956 ret = XML_ERR_OK; 13957 } 13958 13959 /* 13960 * Return the newly created nodeset after unlinking it from 13961 * the pseudo sibling. 13962 */ 13963 13964 cur = fake->next; 13965 fake->next = NULL; 13966 node->last = fake; 13967 13968 if (cur != NULL) { 13969 cur->prev = NULL; 13970 } 13971 13972 *lst = cur; 13973 13974 while (cur != NULL) { 13975 cur->parent = NULL; 13976 cur = cur->next; 13977 } 13978 13979 xmlUnlinkNode(fake); 13980 xmlFreeNode(fake); 13981 13982 13983 if (ret != XML_ERR_OK) { 13984 xmlFreeNodeList(*lst); 13985 *lst = NULL; 13986 } 13987 13988 if (doc->dict != NULL) 13989 ctxt->dict = NULL; 13990 xmlFreeParserCtxt(ctxt); 13991 13992 return(ret); 13993 #else /* !SAX2 */ 13994 return(XML_ERR_INTERNAL_ERROR); 13995 #endif 13996 } 13997 13998 #ifdef LIBXML_SAX1_ENABLED 13999 /** 14000 * xmlParseBalancedChunkMemoryRecover: 14001 * @doc: the document the chunk pertains to 14002 * @sax: the SAX handler bloc (possibly NULL) 14003 * @user_data: The user data returned on SAX callbacks (possibly NULL) 14004 * @depth: Used for loop detection, use 0 14005 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 14006 * @lst: the return value for the set of parsed nodes 14007 * @recover: return nodes even if the data is broken (use 0) 14008 * 14009 * 14010 * Parse a well-balanced chunk of an XML document 14011 * called by the parser 14012 * The allowed sequence for the Well Balanced Chunk is the one defined by 14013 * the content production in the XML grammar: 14014 * 14015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 14016 * 14017 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 14018 * the parser error code otherwise 14019 * 14020 * In case recover is set to 1, the nodelist will not be empty even if 14021 * the parsed chunk is not well balanced, assuming the parsing succeeded to 14022 * some extent. 14023 */ 14024 int 14025 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 14026 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 14027 int recover) { 14028 xmlParserCtxtPtr ctxt; 14029 xmlDocPtr newDoc; 14030 xmlSAXHandlerPtr oldsax = NULL; 14031 xmlNodePtr content, newRoot; 14032 int size; 14033 int ret = 0; 14034 14035 if (depth > 40) { 14036 return(XML_ERR_ENTITY_LOOP); 14037 } 14038 14039 14040 if (lst != NULL) 14041 *lst = NULL; 14042 if (string == NULL) 14043 return(-1); 14044 14045 size = xmlStrlen(string); 14046 14047 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 14048 if (ctxt == NULL) return(-1); 14049 ctxt->userData = ctxt; 14050 if (sax != NULL) { 14051 oldsax = ctxt->sax; 14052 ctxt->sax = sax; 14053 if (user_data != NULL) 14054 ctxt->userData = user_data; 14055 } 14056 newDoc = xmlNewDoc(BAD_CAST "1.0"); 14057 if (newDoc == NULL) { 14058 xmlFreeParserCtxt(ctxt); 14059 return(-1); 14060 } 14061 newDoc->properties = XML_DOC_INTERNAL; 14062 if ((doc != NULL) && (doc->dict != NULL)) { 14063 xmlDictFree(ctxt->dict); 14064 ctxt->dict = doc->dict; 14065 xmlDictReference(ctxt->dict); 14066 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 14067 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 14068 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 14069 ctxt->dictNames = 1; 14070 } else { 14071 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 14072 } 14073 if (doc != NULL) { 14074 newDoc->intSubset = doc->intSubset; 14075 newDoc->extSubset = doc->extSubset; 14076 } 14077 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 14078 if (newRoot == NULL) { 14079 if (sax != NULL) 14080 ctxt->sax = oldsax; 14081 xmlFreeParserCtxt(ctxt); 14082 newDoc->intSubset = NULL; 14083 newDoc->extSubset = NULL; 14084 xmlFreeDoc(newDoc); 14085 return(-1); 14086 } 14087 xmlAddChild((xmlNodePtr) newDoc, newRoot); 14088 nodePush(ctxt, newRoot); 14089 if (doc == NULL) { 14090 ctxt->myDoc = newDoc; 14091 } else { 14092 ctxt->myDoc = newDoc; 14093 newDoc->children->doc = doc; 14094 /* Ensure that doc has XML spec namespace */ 14095 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 14096 newDoc->oldNs = doc->oldNs; 14097 } 14098 ctxt->instate = XML_PARSER_CONTENT; 14099 ctxt->depth = depth; 14100 14101 /* 14102 * Doing validity checking on chunk doesn't make sense 14103 */ 14104 ctxt->validate = 0; 14105 ctxt->loadsubset = 0; 14106 xmlDetectSAX2(ctxt); 14107 14108 if ( doc != NULL ){ 14109 content = doc->children; 14110 doc->children = NULL; 14111 xmlParseContent(ctxt); 14112 doc->children = content; 14113 } 14114 else { 14115 xmlParseContent(ctxt); 14116 } 14117 if ((RAW == '<') && (NXT(1) == '/')) { 14118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14119 } else if (RAW != 0) { 14120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 14121 } 14122 if (ctxt->node != newDoc->children) { 14123 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14124 } 14125 14126 if (!ctxt->wellFormed) { 14127 if (ctxt->errNo == 0) 14128 ret = 1; 14129 else 14130 ret = ctxt->errNo; 14131 } else { 14132 ret = 0; 14133 } 14134 14135 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 14136 xmlNodePtr cur; 14137 14138 /* 14139 * Return the newly created nodeset after unlinking it from 14140 * they pseudo parent. 14141 */ 14142 cur = newDoc->children->children; 14143 *lst = cur; 14144 while (cur != NULL) { 14145 xmlSetTreeDoc(cur, doc); 14146 cur->parent = NULL; 14147 cur = cur->next; 14148 } 14149 newDoc->children->children = NULL; 14150 } 14151 14152 if (sax != NULL) 14153 ctxt->sax = oldsax; 14154 xmlFreeParserCtxt(ctxt); 14155 newDoc->intSubset = NULL; 14156 newDoc->extSubset = NULL; 14157 newDoc->oldNs = NULL; 14158 xmlFreeDoc(newDoc); 14159 14160 return(ret); 14161 } 14162 14163 /** 14164 * xmlSAXParseEntity: 14165 * @sax: the SAX handler block 14166 * @filename: the filename 14167 * 14168 * parse an XML external entity out of context and build a tree. 14169 * It use the given SAX function block to handle the parsing callback. 14170 * If sax is NULL, fallback to the default DOM tree building routines. 14171 * 14172 * [78] extParsedEnt ::= TextDecl? content 14173 * 14174 * This correspond to a "Well Balanced" chunk 14175 * 14176 * Returns the resulting document tree 14177 */ 14178 14179 xmlDocPtr 14180 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 14181 xmlDocPtr ret; 14182 xmlParserCtxtPtr ctxt; 14183 14184 ctxt = xmlCreateFileParserCtxt(filename); 14185 if (ctxt == NULL) { 14186 return(NULL); 14187 } 14188 if (sax != NULL) { 14189 if (ctxt->sax != NULL) 14190 xmlFree(ctxt->sax); 14191 ctxt->sax = sax; 14192 ctxt->userData = NULL; 14193 } 14194 14195 xmlParseExtParsedEnt(ctxt); 14196 14197 if (ctxt->wellFormed) 14198 ret = ctxt->myDoc; 14199 else { 14200 ret = NULL; 14201 xmlFreeDoc(ctxt->myDoc); 14202 ctxt->myDoc = NULL; 14203 } 14204 if (sax != NULL) 14205 ctxt->sax = NULL; 14206 xmlFreeParserCtxt(ctxt); 14207 14208 return(ret); 14209 } 14210 14211 /** 14212 * xmlParseEntity: 14213 * @filename: the filename 14214 * 14215 * parse an XML external entity out of context and build a tree. 14216 * 14217 * [78] extParsedEnt ::= TextDecl? content 14218 * 14219 * This correspond to a "Well Balanced" chunk 14220 * 14221 * Returns the resulting document tree 14222 */ 14223 14224 xmlDocPtr 14225 xmlParseEntity(const char *filename) { 14226 return(xmlSAXParseEntity(NULL, filename)); 14227 } 14228 #endif /* LIBXML_SAX1_ENABLED */ 14229 14230 /** 14231 * xmlCreateEntityParserCtxtInternal: 14232 * @URL: the entity URL 14233 * @ID: the entity PUBLIC ID 14234 * @base: a possible base for the target URI 14235 * @pctx: parser context used to set options on new context 14236 * 14237 * Create a parser context for an external entity 14238 * Automatic support for ZLIB/Compress compressed document is provided 14239 * by default if found at compile-time. 14240 * 14241 * Returns the new parser context or NULL 14242 */ 14243 static xmlParserCtxtPtr 14244 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14245 const xmlChar *base, xmlParserCtxtPtr pctx) { 14246 xmlParserCtxtPtr ctxt; 14247 xmlParserInputPtr inputStream; 14248 char *directory = NULL; 14249 xmlChar *uri; 14250 14251 ctxt = xmlNewParserCtxt(); 14252 if (ctxt == NULL) { 14253 return(NULL); 14254 } 14255 14256 if (pctx != NULL) { 14257 ctxt->options = pctx->options; 14258 ctxt->_private = pctx->_private; 14259 } 14260 14261 uri = xmlBuildURI(URL, base); 14262 14263 if (uri == NULL) { 14264 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14265 if (inputStream == NULL) { 14266 xmlFreeParserCtxt(ctxt); 14267 return(NULL); 14268 } 14269 14270 inputPush(ctxt, inputStream); 14271 14272 if ((ctxt->directory == NULL) && (directory == NULL)) 14273 directory = xmlParserGetDirectory((char *)URL); 14274 if ((ctxt->directory == NULL) && (directory != NULL)) 14275 ctxt->directory = directory; 14276 } else { 14277 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14278 if (inputStream == NULL) { 14279 xmlFree(uri); 14280 xmlFreeParserCtxt(ctxt); 14281 return(NULL); 14282 } 14283 14284 inputPush(ctxt, inputStream); 14285 14286 if ((ctxt->directory == NULL) && (directory == NULL)) 14287 directory = xmlParserGetDirectory((char *)uri); 14288 if ((ctxt->directory == NULL) && (directory != NULL)) 14289 ctxt->directory = directory; 14290 xmlFree(uri); 14291 } 14292 return(ctxt); 14293 } 14294 14295 /** 14296 * xmlCreateEntityParserCtxt: 14297 * @URL: the entity URL 14298 * @ID: the entity PUBLIC ID 14299 * @base: a possible base for the target URI 14300 * 14301 * Create a parser context for an external entity 14302 * Automatic support for ZLIB/Compress compressed document is provided 14303 * by default if found at compile-time. 14304 * 14305 * Returns the new parser context or NULL 14306 */ 14307 xmlParserCtxtPtr 14308 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14309 const xmlChar *base) { 14310 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14311 14312 } 14313 14314 /************************************************************************ 14315 * * 14316 * Front ends when parsing from a file * 14317 * * 14318 ************************************************************************/ 14319 14320 /** 14321 * xmlCreateURLParserCtxt: 14322 * @filename: the filename or URL 14323 * @options: a combination of xmlParserOption 14324 * 14325 * Create a parser context for a file or URL content. 14326 * Automatic support for ZLIB/Compress compressed document is provided 14327 * by default if found at compile-time and for file accesses 14328 * 14329 * Returns the new parser context or NULL 14330 */ 14331 xmlParserCtxtPtr 14332 xmlCreateURLParserCtxt(const char *filename, int options) 14333 { 14334 xmlParserCtxtPtr ctxt; 14335 xmlParserInputPtr inputStream; 14336 char *directory = NULL; 14337 14338 ctxt = xmlNewParserCtxt(); 14339 if (ctxt == NULL) { 14340 xmlErrMemory(NULL, "cannot allocate parser context"); 14341 return(NULL); 14342 } 14343 14344 if (options) 14345 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14346 ctxt->linenumbers = 1; 14347 14348 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14349 if (inputStream == NULL) { 14350 xmlFreeParserCtxt(ctxt); 14351 return(NULL); 14352 } 14353 14354 inputPush(ctxt, inputStream); 14355 if ((ctxt->directory == NULL) && (directory == NULL)) 14356 directory = xmlParserGetDirectory(filename); 14357 if ((ctxt->directory == NULL) && (directory != NULL)) 14358 ctxt->directory = directory; 14359 14360 return(ctxt); 14361 } 14362 14363 /** 14364 * xmlCreateFileParserCtxt: 14365 * @filename: the filename 14366 * 14367 * Create a parser context for a file content. 14368 * Automatic support for ZLIB/Compress compressed document is provided 14369 * by default if found at compile-time. 14370 * 14371 * Returns the new parser context or NULL 14372 */ 14373 xmlParserCtxtPtr 14374 xmlCreateFileParserCtxt(const char *filename) 14375 { 14376 return(xmlCreateURLParserCtxt(filename, 0)); 14377 } 14378 14379 #ifdef LIBXML_SAX1_ENABLED 14380 /** 14381 * xmlSAXParseFileWithData: 14382 * @sax: the SAX handler block 14383 * @filename: the filename 14384 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14385 * documents 14386 * @data: the userdata 14387 * 14388 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14389 * compressed document is provided by default if found at compile-time. 14390 * It use the given SAX function block to handle the parsing callback. 14391 * If sax is NULL, fallback to the default DOM tree building routines. 14392 * 14393 * User data (void *) is stored within the parser context in the 14394 * context's _private member, so it is available nearly everywhere in libxml 14395 * 14396 * Returns the resulting document tree 14397 */ 14398 14399 xmlDocPtr 14400 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14401 int recovery, void *data) { 14402 xmlDocPtr ret; 14403 xmlParserCtxtPtr ctxt; 14404 14405 xmlInitParser(); 14406 14407 ctxt = xmlCreateFileParserCtxt(filename); 14408 if (ctxt == NULL) { 14409 return(NULL); 14410 } 14411 if (sax != NULL) { 14412 if (ctxt->sax != NULL) 14413 xmlFree(ctxt->sax); 14414 ctxt->sax = sax; 14415 } 14416 xmlDetectSAX2(ctxt); 14417 if (data!=NULL) { 14418 ctxt->_private = data; 14419 } 14420 14421 if (ctxt->directory == NULL) 14422 ctxt->directory = xmlParserGetDirectory(filename); 14423 14424 ctxt->recovery = recovery; 14425 14426 xmlParseDocument(ctxt); 14427 14428 if ((ctxt->wellFormed) || recovery) { 14429 ret = ctxt->myDoc; 14430 if (ret != NULL) { 14431 if (ctxt->input->buf->compressed > 0) 14432 ret->compression = 9; 14433 else 14434 ret->compression = ctxt->input->buf->compressed; 14435 } 14436 } 14437 else { 14438 ret = NULL; 14439 xmlFreeDoc(ctxt->myDoc); 14440 ctxt->myDoc = NULL; 14441 } 14442 if (sax != NULL) 14443 ctxt->sax = NULL; 14444 xmlFreeParserCtxt(ctxt); 14445 14446 return(ret); 14447 } 14448 14449 /** 14450 * xmlSAXParseFile: 14451 * @sax: the SAX handler block 14452 * @filename: the filename 14453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14454 * documents 14455 * 14456 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14457 * compressed document is provided by default if found at compile-time. 14458 * It use the given SAX function block to handle the parsing callback. 14459 * If sax is NULL, fallback to the default DOM tree building routines. 14460 * 14461 * Returns the resulting document tree 14462 */ 14463 14464 xmlDocPtr 14465 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14466 int recovery) { 14467 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14468 } 14469 14470 /** 14471 * xmlRecoverDoc: 14472 * @cur: a pointer to an array of xmlChar 14473 * 14474 * parse an XML in-memory document and build a tree. 14475 * In the case the document is not Well Formed, a attempt to build a 14476 * tree is tried anyway 14477 * 14478 * Returns the resulting document tree or NULL in case of failure 14479 */ 14480 14481 xmlDocPtr 14482 xmlRecoverDoc(const xmlChar *cur) { 14483 return(xmlSAXParseDoc(NULL, cur, 1)); 14484 } 14485 14486 /** 14487 * xmlParseFile: 14488 * @filename: the filename 14489 * 14490 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14491 * compressed document is provided by default if found at compile-time. 14492 * 14493 * Returns the resulting document tree if the file was wellformed, 14494 * NULL otherwise. 14495 */ 14496 14497 xmlDocPtr 14498 xmlParseFile(const char *filename) { 14499 return(xmlSAXParseFile(NULL, filename, 0)); 14500 } 14501 14502 /** 14503 * xmlRecoverFile: 14504 * @filename: the filename 14505 * 14506 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14507 * compressed document is provided by default if found at compile-time. 14508 * In the case the document is not Well Formed, it attempts to build 14509 * a tree anyway 14510 * 14511 * Returns the resulting document tree or NULL in case of failure 14512 */ 14513 14514 xmlDocPtr 14515 xmlRecoverFile(const char *filename) { 14516 return(xmlSAXParseFile(NULL, filename, 1)); 14517 } 14518 14519 14520 /** 14521 * xmlSetupParserForBuffer: 14522 * @ctxt: an XML parser context 14523 * @buffer: a xmlChar * buffer 14524 * @filename: a file name 14525 * 14526 * Setup the parser context to parse a new buffer; Clears any prior 14527 * contents from the parser context. The buffer parameter must not be 14528 * NULL, but the filename parameter can be 14529 */ 14530 void 14531 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14532 const char* filename) 14533 { 14534 xmlParserInputPtr input; 14535 14536 if ((ctxt == NULL) || (buffer == NULL)) 14537 return; 14538 14539 input = xmlNewInputStream(ctxt); 14540 if (input == NULL) { 14541 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14542 xmlClearParserCtxt(ctxt); 14543 return; 14544 } 14545 14546 xmlClearParserCtxt(ctxt); 14547 if (filename != NULL) 14548 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14549 input->base = buffer; 14550 input->cur = buffer; 14551 input->end = &buffer[xmlStrlen(buffer)]; 14552 inputPush(ctxt, input); 14553 } 14554 14555 /** 14556 * xmlSAXUserParseFile: 14557 * @sax: a SAX handler 14558 * @user_data: The user data returned on SAX callbacks 14559 * @filename: a file name 14560 * 14561 * parse an XML file and call the given SAX handler routines. 14562 * Automatic support for ZLIB/Compress compressed document is provided 14563 * 14564 * Returns 0 in case of success or a error number otherwise 14565 */ 14566 int 14567 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14568 const char *filename) { 14569 int ret = 0; 14570 xmlParserCtxtPtr ctxt; 14571 14572 ctxt = xmlCreateFileParserCtxt(filename); 14573 if (ctxt == NULL) return -1; 14574 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14575 xmlFree(ctxt->sax); 14576 ctxt->sax = sax; 14577 xmlDetectSAX2(ctxt); 14578 14579 if (user_data != NULL) 14580 ctxt->userData = user_data; 14581 14582 xmlParseDocument(ctxt); 14583 14584 if (ctxt->wellFormed) 14585 ret = 0; 14586 else { 14587 if (ctxt->errNo != 0) 14588 ret = ctxt->errNo; 14589 else 14590 ret = -1; 14591 } 14592 if (sax != NULL) 14593 ctxt->sax = NULL; 14594 if (ctxt->myDoc != NULL) { 14595 xmlFreeDoc(ctxt->myDoc); 14596 ctxt->myDoc = NULL; 14597 } 14598 xmlFreeParserCtxt(ctxt); 14599 14600 return ret; 14601 } 14602 #endif /* LIBXML_SAX1_ENABLED */ 14603 14604 /************************************************************************ 14605 * * 14606 * Front ends when parsing from memory * 14607 * * 14608 ************************************************************************/ 14609 14610 /** 14611 * xmlCreateMemoryParserCtxt: 14612 * @buffer: a pointer to a char array 14613 * @size: the size of the array 14614 * 14615 * Create a parser context for an XML in-memory document. 14616 * 14617 * Returns the new parser context or NULL 14618 */ 14619 xmlParserCtxtPtr 14620 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14621 xmlParserCtxtPtr ctxt; 14622 xmlParserInputPtr input; 14623 xmlParserInputBufferPtr buf; 14624 14625 if (buffer == NULL) 14626 return(NULL); 14627 if (size <= 0) 14628 return(NULL); 14629 14630 ctxt = xmlNewParserCtxt(); 14631 if (ctxt == NULL) 14632 return(NULL); 14633 14634 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14635 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14636 if (buf == NULL) { 14637 xmlFreeParserCtxt(ctxt); 14638 return(NULL); 14639 } 14640 14641 input = xmlNewInputStream(ctxt); 14642 if (input == NULL) { 14643 xmlFreeParserInputBuffer(buf); 14644 xmlFreeParserCtxt(ctxt); 14645 return(NULL); 14646 } 14647 14648 input->filename = NULL; 14649 input->buf = buf; 14650 xmlBufResetInput(input->buf->buffer, input); 14651 14652 inputPush(ctxt, input); 14653 return(ctxt); 14654 } 14655 14656 #ifdef LIBXML_SAX1_ENABLED 14657 /** 14658 * xmlSAXParseMemoryWithData: 14659 * @sax: the SAX handler block 14660 * @buffer: an pointer to a char array 14661 * @size: the size of the array 14662 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14663 * documents 14664 * @data: the userdata 14665 * 14666 * parse an XML in-memory block and use the given SAX function block 14667 * to handle the parsing callback. If sax is NULL, fallback to the default 14668 * DOM tree building routines. 14669 * 14670 * User data (void *) is stored within the parser context in the 14671 * context's _private member, so it is available nearly everywhere in libxml 14672 * 14673 * Returns the resulting document tree 14674 */ 14675 14676 xmlDocPtr 14677 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14678 int size, int recovery, void *data) { 14679 xmlDocPtr ret; 14680 xmlParserCtxtPtr ctxt; 14681 14682 xmlInitParser(); 14683 14684 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14685 if (ctxt == NULL) return(NULL); 14686 if (sax != NULL) { 14687 if (ctxt->sax != NULL) 14688 xmlFree(ctxt->sax); 14689 ctxt->sax = sax; 14690 } 14691 xmlDetectSAX2(ctxt); 14692 if (data!=NULL) { 14693 ctxt->_private=data; 14694 } 14695 14696 ctxt->recovery = recovery; 14697 14698 xmlParseDocument(ctxt); 14699 14700 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14701 else { 14702 ret = NULL; 14703 xmlFreeDoc(ctxt->myDoc); 14704 ctxt->myDoc = NULL; 14705 } 14706 if (sax != NULL) 14707 ctxt->sax = NULL; 14708 xmlFreeParserCtxt(ctxt); 14709 14710 return(ret); 14711 } 14712 14713 /** 14714 * xmlSAXParseMemory: 14715 * @sax: the SAX handler block 14716 * @buffer: an pointer to a char array 14717 * @size: the size of the array 14718 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14719 * documents 14720 * 14721 * parse an XML in-memory block and use the given SAX function block 14722 * to handle the parsing callback. If sax is NULL, fallback to the default 14723 * DOM tree building routines. 14724 * 14725 * Returns the resulting document tree 14726 */ 14727 xmlDocPtr 14728 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14729 int size, int recovery) { 14730 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14731 } 14732 14733 /** 14734 * xmlParseMemory: 14735 * @buffer: an pointer to a char array 14736 * @size: the size of the array 14737 * 14738 * parse an XML in-memory block and build a tree. 14739 * 14740 * Returns the resulting document tree 14741 */ 14742 14743 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14744 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14745 } 14746 14747 /** 14748 * xmlRecoverMemory: 14749 * @buffer: an pointer to a char array 14750 * @size: the size of the array 14751 * 14752 * parse an XML in-memory block and build a tree. 14753 * In the case the document is not Well Formed, an attempt to 14754 * build a tree is tried anyway 14755 * 14756 * Returns the resulting document tree or NULL in case of error 14757 */ 14758 14759 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14760 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14761 } 14762 14763 /** 14764 * xmlSAXUserParseMemory: 14765 * @sax: a SAX handler 14766 * @user_data: The user data returned on SAX callbacks 14767 * @buffer: an in-memory XML document input 14768 * @size: the length of the XML document in bytes 14769 * 14770 * A better SAX parsing routine. 14771 * parse an XML in-memory buffer and call the given SAX handler routines. 14772 * 14773 * Returns 0 in case of success or a error number otherwise 14774 */ 14775 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14776 const char *buffer, int size) { 14777 int ret = 0; 14778 xmlParserCtxtPtr ctxt; 14779 14780 xmlInitParser(); 14781 14782 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14783 if (ctxt == NULL) return -1; 14784 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14785 xmlFree(ctxt->sax); 14786 ctxt->sax = sax; 14787 xmlDetectSAX2(ctxt); 14788 14789 if (user_data != NULL) 14790 ctxt->userData = user_data; 14791 14792 xmlParseDocument(ctxt); 14793 14794 if (ctxt->wellFormed) 14795 ret = 0; 14796 else { 14797 if (ctxt->errNo != 0) 14798 ret = ctxt->errNo; 14799 else 14800 ret = -1; 14801 } 14802 if (sax != NULL) 14803 ctxt->sax = NULL; 14804 if (ctxt->myDoc != NULL) { 14805 xmlFreeDoc(ctxt->myDoc); 14806 ctxt->myDoc = NULL; 14807 } 14808 xmlFreeParserCtxt(ctxt); 14809 14810 return ret; 14811 } 14812 #endif /* LIBXML_SAX1_ENABLED */ 14813 14814 /** 14815 * xmlCreateDocParserCtxt: 14816 * @cur: a pointer to an array of xmlChar 14817 * 14818 * Creates a parser context for an XML in-memory document. 14819 * 14820 * Returns the new parser context or NULL 14821 */ 14822 xmlParserCtxtPtr 14823 xmlCreateDocParserCtxt(const xmlChar *cur) { 14824 int len; 14825 14826 if (cur == NULL) 14827 return(NULL); 14828 len = xmlStrlen(cur); 14829 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14830 } 14831 14832 #ifdef LIBXML_SAX1_ENABLED 14833 /** 14834 * xmlSAXParseDoc: 14835 * @sax: the SAX handler block 14836 * @cur: a pointer to an array of xmlChar 14837 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14838 * documents 14839 * 14840 * parse an XML in-memory document and build a tree. 14841 * It use the given SAX function block to handle the parsing callback. 14842 * If sax is NULL, fallback to the default DOM tree building routines. 14843 * 14844 * Returns the resulting document tree 14845 */ 14846 14847 xmlDocPtr 14848 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14849 xmlDocPtr ret; 14850 xmlParserCtxtPtr ctxt; 14851 xmlSAXHandlerPtr oldsax = NULL; 14852 14853 if (cur == NULL) return(NULL); 14854 14855 14856 ctxt = xmlCreateDocParserCtxt(cur); 14857 if (ctxt == NULL) return(NULL); 14858 if (sax != NULL) { 14859 oldsax = ctxt->sax; 14860 ctxt->sax = sax; 14861 ctxt->userData = NULL; 14862 } 14863 xmlDetectSAX2(ctxt); 14864 14865 xmlParseDocument(ctxt); 14866 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14867 else { 14868 ret = NULL; 14869 xmlFreeDoc(ctxt->myDoc); 14870 ctxt->myDoc = NULL; 14871 } 14872 if (sax != NULL) 14873 ctxt->sax = oldsax; 14874 xmlFreeParserCtxt(ctxt); 14875 14876 return(ret); 14877 } 14878 14879 /** 14880 * xmlParseDoc: 14881 * @cur: a pointer to an array of xmlChar 14882 * 14883 * parse an XML in-memory document and build a tree. 14884 * 14885 * Returns the resulting document tree 14886 */ 14887 14888 xmlDocPtr 14889 xmlParseDoc(const xmlChar *cur) { 14890 return(xmlSAXParseDoc(NULL, cur, 0)); 14891 } 14892 #endif /* LIBXML_SAX1_ENABLED */ 14893 14894 #ifdef LIBXML_LEGACY_ENABLED 14895 /************************************************************************ 14896 * * 14897 * Specific function to keep track of entities references * 14898 * and used by the XSLT debugger * 14899 * * 14900 ************************************************************************/ 14901 14902 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14903 14904 /** 14905 * xmlAddEntityReference: 14906 * @ent : A valid entity 14907 * @firstNode : A valid first node for children of entity 14908 * @lastNode : A valid last node of children entity 14909 * 14910 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14911 */ 14912 static void 14913 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14914 xmlNodePtr lastNode) 14915 { 14916 if (xmlEntityRefFunc != NULL) { 14917 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14918 } 14919 } 14920 14921 14922 /** 14923 * xmlSetEntityReferenceFunc: 14924 * @func: A valid function 14925 * 14926 * Set the function to call call back when a xml reference has been made 14927 */ 14928 void 14929 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14930 { 14931 xmlEntityRefFunc = func; 14932 } 14933 #endif /* LIBXML_LEGACY_ENABLED */ 14934 14935 /************************************************************************ 14936 * * 14937 * Miscellaneous * 14938 * * 14939 ************************************************************************/ 14940 14941 #ifdef LIBXML_XPATH_ENABLED 14942 #include <libxml/xpath.h> 14943 #endif 14944 14945 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14946 static int xmlParserInitialized = 0; 14947 14948 /** 14949 * xmlInitParser: 14950 * 14951 * Initialization function for the XML parser. 14952 * This is not reentrant. Call once before processing in case of 14953 * use in multithreaded programs. 14954 */ 14955 14956 void 14957 xmlInitParser(void) { 14958 if (xmlParserInitialized != 0) 14959 return; 14960 14961 #ifdef LIBXML_THREAD_ENABLED 14962 __xmlGlobalInitMutexLock(); 14963 if (xmlParserInitialized == 0) { 14964 #endif 14965 xmlInitThreads(); 14966 xmlInitGlobals(); 14967 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14968 (xmlGenericError == NULL)) 14969 initGenericErrorDefaultFunc(NULL); 14970 xmlInitMemory(); 14971 xmlInitializeDict(); 14972 xmlInitCharEncodingHandlers(); 14973 xmlDefaultSAXHandlerInit(); 14974 xmlRegisterDefaultInputCallbacks(); 14975 #ifdef LIBXML_OUTPUT_ENABLED 14976 xmlRegisterDefaultOutputCallbacks(); 14977 #endif /* LIBXML_OUTPUT_ENABLED */ 14978 #ifdef LIBXML_HTML_ENABLED 14979 htmlInitAutoClose(); 14980 htmlDefaultSAXHandlerInit(); 14981 #endif 14982 #ifdef LIBXML_XPATH_ENABLED 14983 xmlXPathInit(); 14984 #endif 14985 xmlParserInitialized = 1; 14986 #ifdef LIBXML_THREAD_ENABLED 14987 } 14988 __xmlGlobalInitMutexUnlock(); 14989 #endif 14990 } 14991 14992 /** 14993 * xmlCleanupParser: 14994 * 14995 * This function name is somewhat misleading. It does not clean up 14996 * parser state, it cleans up memory allocated by the library itself. 14997 * It is a cleanup function for the XML library. It tries to reclaim all 14998 * related global memory allocated for the library processing. 14999 * It doesn't deallocate any document related memory. One should 15000 * call xmlCleanupParser() only when the process has finished using 15001 * the library and all XML/HTML documents built with it. 15002 * See also xmlInitParser() which has the opposite function of preparing 15003 * the library for operations. 15004 * 15005 * WARNING: if your application is multithreaded or has plugin support 15006 * calling this may crash the application if another thread or 15007 * a plugin is still using libxml2. It's sometimes very hard to 15008 * guess if libxml2 is in use in the application, some libraries 15009 * or plugins may use it without notice. In case of doubt abstain 15010 * from calling this function or do it just before calling exit() 15011 * to avoid leak reports from valgrind ! 15012 */ 15013 15014 void 15015 xmlCleanupParser(void) { 15016 if (!xmlParserInitialized) 15017 return; 15018 15019 xmlCleanupCharEncodingHandlers(); 15020 #ifdef LIBXML_CATALOG_ENABLED 15021 xmlCatalogCleanup(); 15022 #endif 15023 xmlDictCleanup(); 15024 xmlCleanupInputCallbacks(); 15025 #ifdef LIBXML_OUTPUT_ENABLED 15026 xmlCleanupOutputCallbacks(); 15027 #endif 15028 #ifdef LIBXML_SCHEMAS_ENABLED 15029 xmlSchemaCleanupTypes(); 15030 xmlRelaxNGCleanupTypes(); 15031 #endif 15032 xmlResetLastError(); 15033 xmlCleanupGlobals(); 15034 xmlCleanupThreads(); /* must be last if called not from the main thread */ 15035 xmlCleanupMemory(); 15036 xmlParserInitialized = 0; 15037 } 15038 15039 /************************************************************************ 15040 * * 15041 * New set (2.6.0) of simpler and more flexible APIs * 15042 * * 15043 ************************************************************************/ 15044 15045 /** 15046 * DICT_FREE: 15047 * @str: a string 15048 * 15049 * Free a string if it is not owned by the "dict" dictionary in the 15050 * current scope 15051 */ 15052 #define DICT_FREE(str) \ 15053 if ((str) && ((!dict) || \ 15054 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 15055 xmlFree((char *)(str)); 15056 15057 /** 15058 * xmlCtxtReset: 15059 * @ctxt: an XML parser context 15060 * 15061 * Reset a parser context 15062 */ 15063 void 15064 xmlCtxtReset(xmlParserCtxtPtr ctxt) 15065 { 15066 xmlParserInputPtr input; 15067 xmlDictPtr dict; 15068 15069 if (ctxt == NULL) 15070 return; 15071 15072 dict = ctxt->dict; 15073 15074 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 15075 xmlFreeInputStream(input); 15076 } 15077 ctxt->inputNr = 0; 15078 ctxt->input = NULL; 15079 15080 ctxt->spaceNr = 0; 15081 if (ctxt->spaceTab != NULL) { 15082 ctxt->spaceTab[0] = -1; 15083 ctxt->space = &ctxt->spaceTab[0]; 15084 } else { 15085 ctxt->space = NULL; 15086 } 15087 15088 15089 ctxt->nodeNr = 0; 15090 ctxt->node = NULL; 15091 15092 ctxt->nameNr = 0; 15093 ctxt->name = NULL; 15094 15095 DICT_FREE(ctxt->version); 15096 ctxt->version = NULL; 15097 DICT_FREE(ctxt->encoding); 15098 ctxt->encoding = NULL; 15099 DICT_FREE(ctxt->directory); 15100 ctxt->directory = NULL; 15101 DICT_FREE(ctxt->extSubURI); 15102 ctxt->extSubURI = NULL; 15103 DICT_FREE(ctxt->extSubSystem); 15104 ctxt->extSubSystem = NULL; 15105 if (ctxt->myDoc != NULL) 15106 xmlFreeDoc(ctxt->myDoc); 15107 ctxt->myDoc = NULL; 15108 15109 ctxt->standalone = -1; 15110 ctxt->hasExternalSubset = 0; 15111 ctxt->hasPErefs = 0; 15112 ctxt->html = 0; 15113 ctxt->external = 0; 15114 ctxt->instate = XML_PARSER_START; 15115 ctxt->token = 0; 15116 15117 ctxt->wellFormed = 1; 15118 ctxt->nsWellFormed = 1; 15119 ctxt->disableSAX = 0; 15120 ctxt->valid = 1; 15121 #if 0 15122 ctxt->vctxt.userData = ctxt; 15123 ctxt->vctxt.error = xmlParserValidityError; 15124 ctxt->vctxt.warning = xmlParserValidityWarning; 15125 #endif 15126 ctxt->record_info = 0; 15127 ctxt->nbChars = 0; 15128 ctxt->checkIndex = 0; 15129 ctxt->inSubset = 0; 15130 ctxt->errNo = XML_ERR_OK; 15131 ctxt->depth = 0; 15132 ctxt->charset = XML_CHAR_ENCODING_UTF8; 15133 ctxt->catalogs = NULL; 15134 ctxt->nbentities = 0; 15135 ctxt->sizeentities = 0; 15136 ctxt->sizeentcopy = 0; 15137 xmlInitNodeInfoSeq(&ctxt->node_seq); 15138 15139 if (ctxt->attsDefault != NULL) { 15140 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 15141 ctxt->attsDefault = NULL; 15142 } 15143 if (ctxt->attsSpecial != NULL) { 15144 xmlHashFree(ctxt->attsSpecial, NULL); 15145 ctxt->attsSpecial = NULL; 15146 } 15147 15148 #ifdef LIBXML_CATALOG_ENABLED 15149 if (ctxt->catalogs != NULL) 15150 xmlCatalogFreeLocal(ctxt->catalogs); 15151 #endif 15152 if (ctxt->lastError.code != XML_ERR_OK) 15153 xmlResetError(&ctxt->lastError); 15154 } 15155 15156 /** 15157 * xmlCtxtResetPush: 15158 * @ctxt: an XML parser context 15159 * @chunk: a pointer to an array of chars 15160 * @size: number of chars in the array 15161 * @filename: an optional file name or URI 15162 * @encoding: the document encoding, or NULL 15163 * 15164 * Reset a push parser context 15165 * 15166 * Returns 0 in case of success and 1 in case of error 15167 */ 15168 int 15169 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 15170 int size, const char *filename, const char *encoding) 15171 { 15172 xmlParserInputPtr inputStream; 15173 xmlParserInputBufferPtr buf; 15174 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 15175 15176 if (ctxt == NULL) 15177 return(1); 15178 15179 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 15180 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 15181 15182 buf = xmlAllocParserInputBuffer(enc); 15183 if (buf == NULL) 15184 return(1); 15185 15186 if (ctxt == NULL) { 15187 xmlFreeParserInputBuffer(buf); 15188 return(1); 15189 } 15190 15191 xmlCtxtReset(ctxt); 15192 15193 if (ctxt->pushTab == NULL) { 15194 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 15195 sizeof(xmlChar *)); 15196 if (ctxt->pushTab == NULL) { 15197 xmlErrMemory(ctxt, NULL); 15198 xmlFreeParserInputBuffer(buf); 15199 return(1); 15200 } 15201 } 15202 15203 if (filename == NULL) { 15204 ctxt->directory = NULL; 15205 } else { 15206 ctxt->directory = xmlParserGetDirectory(filename); 15207 } 15208 15209 inputStream = xmlNewInputStream(ctxt); 15210 if (inputStream == NULL) { 15211 xmlFreeParserInputBuffer(buf); 15212 return(1); 15213 } 15214 15215 if (filename == NULL) 15216 inputStream->filename = NULL; 15217 else 15218 inputStream->filename = (char *) 15219 xmlCanonicPath((const xmlChar *) filename); 15220 inputStream->buf = buf; 15221 xmlBufResetInput(buf->buffer, inputStream); 15222 15223 inputPush(ctxt, inputStream); 15224 15225 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 15226 (ctxt->input->buf != NULL)) { 15227 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15228 size_t cur = ctxt->input->cur - ctxt->input->base; 15229 15230 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15231 15232 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15233 #ifdef DEBUG_PUSH 15234 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15235 #endif 15236 } 15237 15238 if (encoding != NULL) { 15239 xmlCharEncodingHandlerPtr hdlr; 15240 15241 if (ctxt->encoding != NULL) 15242 xmlFree((xmlChar *) ctxt->encoding); 15243 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15244 15245 hdlr = xmlFindCharEncodingHandler(encoding); 15246 if (hdlr != NULL) { 15247 xmlSwitchToEncoding(ctxt, hdlr); 15248 } else { 15249 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15250 "Unsupported encoding %s\n", BAD_CAST encoding); 15251 } 15252 } else if (enc != XML_CHAR_ENCODING_NONE) { 15253 xmlSwitchEncoding(ctxt, enc); 15254 } 15255 15256 return(0); 15257 } 15258 15259 15260 /** 15261 * xmlCtxtUseOptionsInternal: 15262 * @ctxt: an XML parser context 15263 * @options: a combination of xmlParserOption 15264 * @encoding: the user provided encoding to use 15265 * 15266 * Applies the options to the parser context 15267 * 15268 * Returns 0 in case of success, the set of unknown or unimplemented options 15269 * in case of error. 15270 */ 15271 static int 15272 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15273 { 15274 if (ctxt == NULL) 15275 return(-1); 15276 if (encoding != NULL) { 15277 if (ctxt->encoding != NULL) 15278 xmlFree((xmlChar *) ctxt->encoding); 15279 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15280 } 15281 if (options & XML_PARSE_RECOVER) { 15282 ctxt->recovery = 1; 15283 options -= XML_PARSE_RECOVER; 15284 ctxt->options |= XML_PARSE_RECOVER; 15285 } else 15286 ctxt->recovery = 0; 15287 if (options & XML_PARSE_DTDLOAD) { 15288 ctxt->loadsubset = XML_DETECT_IDS; 15289 options -= XML_PARSE_DTDLOAD; 15290 ctxt->options |= XML_PARSE_DTDLOAD; 15291 } else 15292 ctxt->loadsubset = 0; 15293 if (options & XML_PARSE_DTDATTR) { 15294 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15295 options -= XML_PARSE_DTDATTR; 15296 ctxt->options |= XML_PARSE_DTDATTR; 15297 } 15298 if (options & XML_PARSE_NOENT) { 15299 ctxt->replaceEntities = 1; 15300 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15301 options -= XML_PARSE_NOENT; 15302 ctxt->options |= XML_PARSE_NOENT; 15303 } else 15304 ctxt->replaceEntities = 0; 15305 if (options & XML_PARSE_PEDANTIC) { 15306 ctxt->pedantic = 1; 15307 options -= XML_PARSE_PEDANTIC; 15308 ctxt->options |= XML_PARSE_PEDANTIC; 15309 } else 15310 ctxt->pedantic = 0; 15311 if (options & XML_PARSE_NOBLANKS) { 15312 ctxt->keepBlanks = 0; 15313 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15314 options -= XML_PARSE_NOBLANKS; 15315 ctxt->options |= XML_PARSE_NOBLANKS; 15316 } else 15317 ctxt->keepBlanks = 1; 15318 if (options & XML_PARSE_DTDVALID) { 15319 ctxt->validate = 1; 15320 if (options & XML_PARSE_NOWARNING) 15321 ctxt->vctxt.warning = NULL; 15322 if (options & XML_PARSE_NOERROR) 15323 ctxt->vctxt.error = NULL; 15324 options -= XML_PARSE_DTDVALID; 15325 ctxt->options |= XML_PARSE_DTDVALID; 15326 } else 15327 ctxt->validate = 0; 15328 if (options & XML_PARSE_NOWARNING) { 15329 ctxt->sax->warning = NULL; 15330 options -= XML_PARSE_NOWARNING; 15331 } 15332 if (options & XML_PARSE_NOERROR) { 15333 ctxt->sax->error = NULL; 15334 ctxt->sax->fatalError = NULL; 15335 options -= XML_PARSE_NOERROR; 15336 } 15337 #ifdef LIBXML_SAX1_ENABLED 15338 if (options & XML_PARSE_SAX1) { 15339 ctxt->sax->startElement = xmlSAX2StartElement; 15340 ctxt->sax->endElement = xmlSAX2EndElement; 15341 ctxt->sax->startElementNs = NULL; 15342 ctxt->sax->endElementNs = NULL; 15343 ctxt->sax->initialized = 1; 15344 options -= XML_PARSE_SAX1; 15345 ctxt->options |= XML_PARSE_SAX1; 15346 } 15347 #endif /* LIBXML_SAX1_ENABLED */ 15348 if (options & XML_PARSE_NODICT) { 15349 ctxt->dictNames = 0; 15350 options -= XML_PARSE_NODICT; 15351 ctxt->options |= XML_PARSE_NODICT; 15352 } else { 15353 ctxt->dictNames = 1; 15354 } 15355 if (options & XML_PARSE_NOCDATA) { 15356 ctxt->sax->cdataBlock = NULL; 15357 options -= XML_PARSE_NOCDATA; 15358 ctxt->options |= XML_PARSE_NOCDATA; 15359 } 15360 if (options & XML_PARSE_NSCLEAN) { 15361 ctxt->options |= XML_PARSE_NSCLEAN; 15362 options -= XML_PARSE_NSCLEAN; 15363 } 15364 if (options & XML_PARSE_NONET) { 15365 ctxt->options |= XML_PARSE_NONET; 15366 options -= XML_PARSE_NONET; 15367 } 15368 if (options & XML_PARSE_COMPACT) { 15369 ctxt->options |= XML_PARSE_COMPACT; 15370 options -= XML_PARSE_COMPACT; 15371 } 15372 if (options & XML_PARSE_OLD10) { 15373 ctxt->options |= XML_PARSE_OLD10; 15374 options -= XML_PARSE_OLD10; 15375 } 15376 if (options & XML_PARSE_NOBASEFIX) { 15377 ctxt->options |= XML_PARSE_NOBASEFIX; 15378 options -= XML_PARSE_NOBASEFIX; 15379 } 15380 if (options & XML_PARSE_HUGE) { 15381 ctxt->options |= XML_PARSE_HUGE; 15382 options -= XML_PARSE_HUGE; 15383 if (ctxt->dict != NULL) 15384 xmlDictSetLimit(ctxt->dict, 0); 15385 } 15386 if (options & XML_PARSE_OLDSAX) { 15387 ctxt->options |= XML_PARSE_OLDSAX; 15388 options -= XML_PARSE_OLDSAX; 15389 } 15390 if (options & XML_PARSE_IGNORE_ENC) { 15391 ctxt->options |= XML_PARSE_IGNORE_ENC; 15392 options -= XML_PARSE_IGNORE_ENC; 15393 } 15394 if (options & XML_PARSE_BIG_LINES) { 15395 ctxt->options |= XML_PARSE_BIG_LINES; 15396 options -= XML_PARSE_BIG_LINES; 15397 } 15398 ctxt->linenumbers = 1; 15399 return (options); 15400 } 15401 15402 /** 15403 * xmlCtxtUseOptions: 15404 * @ctxt: an XML parser context 15405 * @options: a combination of xmlParserOption 15406 * 15407 * Applies the options to the parser context 15408 * 15409 * Returns 0 in case of success, the set of unknown or unimplemented options 15410 * in case of error. 15411 */ 15412 int 15413 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15414 { 15415 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15416 } 15417 15418 /** 15419 * xmlDoRead: 15420 * @ctxt: an XML parser context 15421 * @URL: the base URL to use for the document 15422 * @encoding: the document encoding, or NULL 15423 * @options: a combination of xmlParserOption 15424 * @reuse: keep the context for reuse 15425 * 15426 * Common front-end for the xmlRead functions 15427 * 15428 * Returns the resulting document tree or NULL 15429 */ 15430 static xmlDocPtr 15431 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15432 int options, int reuse) 15433 { 15434 xmlDocPtr ret; 15435 15436 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15437 if (encoding != NULL) { 15438 xmlCharEncodingHandlerPtr hdlr; 15439 15440 hdlr = xmlFindCharEncodingHandler(encoding); 15441 if (hdlr != NULL) 15442 xmlSwitchToEncoding(ctxt, hdlr); 15443 } 15444 if ((URL != NULL) && (ctxt->input != NULL) && 15445 (ctxt->input->filename == NULL)) 15446 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15447 xmlParseDocument(ctxt); 15448 if ((ctxt->wellFormed) || ctxt->recovery) 15449 ret = ctxt->myDoc; 15450 else { 15451 ret = NULL; 15452 if (ctxt->myDoc != NULL) { 15453 xmlFreeDoc(ctxt->myDoc); 15454 } 15455 } 15456 ctxt->myDoc = NULL; 15457 if (!reuse) { 15458 xmlFreeParserCtxt(ctxt); 15459 } 15460 15461 return (ret); 15462 } 15463 15464 /** 15465 * xmlReadDoc: 15466 * @cur: a pointer to a zero terminated string 15467 * @URL: the base URL to use for the document 15468 * @encoding: the document encoding, or NULL 15469 * @options: a combination of xmlParserOption 15470 * 15471 * parse an XML in-memory document and build a tree. 15472 * 15473 * Returns the resulting document tree 15474 */ 15475 xmlDocPtr 15476 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15477 { 15478 xmlParserCtxtPtr ctxt; 15479 15480 if (cur == NULL) 15481 return (NULL); 15482 xmlInitParser(); 15483 15484 ctxt = xmlCreateDocParserCtxt(cur); 15485 if (ctxt == NULL) 15486 return (NULL); 15487 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15488 } 15489 15490 /** 15491 * xmlReadFile: 15492 * @filename: a file or URL 15493 * @encoding: the document encoding, or NULL 15494 * @options: a combination of xmlParserOption 15495 * 15496 * parse an XML file from the filesystem or the network. 15497 * 15498 * Returns the resulting document tree 15499 */ 15500 xmlDocPtr 15501 xmlReadFile(const char *filename, const char *encoding, int options) 15502 { 15503 xmlParserCtxtPtr ctxt; 15504 15505 xmlInitParser(); 15506 ctxt = xmlCreateURLParserCtxt(filename, options); 15507 if (ctxt == NULL) 15508 return (NULL); 15509 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15510 } 15511 15512 /** 15513 * xmlReadMemory: 15514 * @buffer: a pointer to a char array 15515 * @size: the size of the array 15516 * @URL: the base URL to use for the document 15517 * @encoding: the document encoding, or NULL 15518 * @options: a combination of xmlParserOption 15519 * 15520 * parse an XML in-memory document and build a tree. 15521 * 15522 * Returns the resulting document tree 15523 */ 15524 xmlDocPtr 15525 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15526 { 15527 xmlParserCtxtPtr ctxt; 15528 15529 xmlInitParser(); 15530 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15531 if (ctxt == NULL) 15532 return (NULL); 15533 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15534 } 15535 15536 /** 15537 * xmlReadFd: 15538 * @fd: an open file descriptor 15539 * @URL: the base URL to use for the document 15540 * @encoding: the document encoding, or NULL 15541 * @options: a combination of xmlParserOption 15542 * 15543 * parse an XML from a file descriptor and build a tree. 15544 * NOTE that the file descriptor will not be closed when the 15545 * reader is closed or reset. 15546 * 15547 * Returns the resulting document tree 15548 */ 15549 xmlDocPtr 15550 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15551 { 15552 xmlParserCtxtPtr ctxt; 15553 xmlParserInputBufferPtr input; 15554 xmlParserInputPtr stream; 15555 15556 if (fd < 0) 15557 return (NULL); 15558 xmlInitParser(); 15559 15560 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15561 if (input == NULL) 15562 return (NULL); 15563 input->closecallback = NULL; 15564 ctxt = xmlNewParserCtxt(); 15565 if (ctxt == NULL) { 15566 xmlFreeParserInputBuffer(input); 15567 return (NULL); 15568 } 15569 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15570 if (stream == NULL) { 15571 xmlFreeParserInputBuffer(input); 15572 xmlFreeParserCtxt(ctxt); 15573 return (NULL); 15574 } 15575 inputPush(ctxt, stream); 15576 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15577 } 15578 15579 /** 15580 * xmlReadIO: 15581 * @ioread: an I/O read function 15582 * @ioclose: an I/O close function 15583 * @ioctx: an I/O handler 15584 * @URL: the base URL to use for the document 15585 * @encoding: the document encoding, or NULL 15586 * @options: a combination of xmlParserOption 15587 * 15588 * parse an XML document from I/O functions and source and build a tree. 15589 * 15590 * Returns the resulting document tree 15591 */ 15592 xmlDocPtr 15593 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15594 void *ioctx, const char *URL, const char *encoding, int options) 15595 { 15596 xmlParserCtxtPtr ctxt; 15597 xmlParserInputBufferPtr input; 15598 xmlParserInputPtr stream; 15599 15600 if (ioread == NULL) 15601 return (NULL); 15602 xmlInitParser(); 15603 15604 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15605 XML_CHAR_ENCODING_NONE); 15606 if (input == NULL) { 15607 if (ioclose != NULL) 15608 ioclose(ioctx); 15609 return (NULL); 15610 } 15611 ctxt = xmlNewParserCtxt(); 15612 if (ctxt == NULL) { 15613 xmlFreeParserInputBuffer(input); 15614 return (NULL); 15615 } 15616 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15617 if (stream == NULL) { 15618 xmlFreeParserInputBuffer(input); 15619 xmlFreeParserCtxt(ctxt); 15620 return (NULL); 15621 } 15622 inputPush(ctxt, stream); 15623 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15624 } 15625 15626 /** 15627 * xmlCtxtReadDoc: 15628 * @ctxt: an XML parser context 15629 * @cur: a pointer to a zero terminated string 15630 * @URL: the base URL to use for the document 15631 * @encoding: the document encoding, or NULL 15632 * @options: a combination of xmlParserOption 15633 * 15634 * parse an XML in-memory document and build a tree. 15635 * This reuses the existing @ctxt parser context 15636 * 15637 * Returns the resulting document tree 15638 */ 15639 xmlDocPtr 15640 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15641 const char *URL, const char *encoding, int options) 15642 { 15643 xmlParserInputPtr stream; 15644 15645 if (cur == NULL) 15646 return (NULL); 15647 if (ctxt == NULL) 15648 return (NULL); 15649 xmlInitParser(); 15650 15651 xmlCtxtReset(ctxt); 15652 15653 stream = xmlNewStringInputStream(ctxt, cur); 15654 if (stream == NULL) { 15655 return (NULL); 15656 } 15657 inputPush(ctxt, stream); 15658 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15659 } 15660 15661 /** 15662 * xmlCtxtReadFile: 15663 * @ctxt: an XML parser context 15664 * @filename: a file or URL 15665 * @encoding: the document encoding, or NULL 15666 * @options: a combination of xmlParserOption 15667 * 15668 * parse an XML file from the filesystem or the network. 15669 * This reuses the existing @ctxt parser context 15670 * 15671 * Returns the resulting document tree 15672 */ 15673 xmlDocPtr 15674 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15675 const char *encoding, int options) 15676 { 15677 xmlParserInputPtr stream; 15678 15679 if (filename == NULL) 15680 return (NULL); 15681 if (ctxt == NULL) 15682 return (NULL); 15683 xmlInitParser(); 15684 15685 xmlCtxtReset(ctxt); 15686 15687 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15688 if (stream == NULL) { 15689 return (NULL); 15690 } 15691 inputPush(ctxt, stream); 15692 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15693 } 15694 15695 /** 15696 * xmlCtxtReadMemory: 15697 * @ctxt: an XML parser context 15698 * @buffer: a pointer to a char array 15699 * @size: the size of the array 15700 * @URL: the base URL to use for the document 15701 * @encoding: the document encoding, or NULL 15702 * @options: a combination of xmlParserOption 15703 * 15704 * parse an XML in-memory document and build a tree. 15705 * This reuses the existing @ctxt parser context 15706 * 15707 * Returns the resulting document tree 15708 */ 15709 xmlDocPtr 15710 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15711 const char *URL, const char *encoding, int options) 15712 { 15713 xmlParserInputBufferPtr input; 15714 xmlParserInputPtr stream; 15715 15716 if (ctxt == NULL) 15717 return (NULL); 15718 if (buffer == NULL) 15719 return (NULL); 15720 xmlInitParser(); 15721 15722 xmlCtxtReset(ctxt); 15723 15724 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15725 if (input == NULL) { 15726 return(NULL); 15727 } 15728 15729 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15730 if (stream == NULL) { 15731 xmlFreeParserInputBuffer(input); 15732 return(NULL); 15733 } 15734 15735 inputPush(ctxt, stream); 15736 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15737 } 15738 15739 /** 15740 * xmlCtxtReadFd: 15741 * @ctxt: an XML parser context 15742 * @fd: an open file descriptor 15743 * @URL: the base URL to use for the document 15744 * @encoding: the document encoding, or NULL 15745 * @options: a combination of xmlParserOption 15746 * 15747 * parse an XML from a file descriptor and build a tree. 15748 * This reuses the existing @ctxt parser context 15749 * NOTE that the file descriptor will not be closed when the 15750 * reader is closed or reset. 15751 * 15752 * Returns the resulting document tree 15753 */ 15754 xmlDocPtr 15755 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15756 const char *URL, const char *encoding, int options) 15757 { 15758 xmlParserInputBufferPtr input; 15759 xmlParserInputPtr stream; 15760 15761 if (fd < 0) 15762 return (NULL); 15763 if (ctxt == NULL) 15764 return (NULL); 15765 xmlInitParser(); 15766 15767 xmlCtxtReset(ctxt); 15768 15769 15770 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15771 if (input == NULL) 15772 return (NULL); 15773 input->closecallback = NULL; 15774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15775 if (stream == NULL) { 15776 xmlFreeParserInputBuffer(input); 15777 return (NULL); 15778 } 15779 inputPush(ctxt, stream); 15780 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15781 } 15782 15783 /** 15784 * xmlCtxtReadIO: 15785 * @ctxt: an XML parser context 15786 * @ioread: an I/O read function 15787 * @ioclose: an I/O close function 15788 * @ioctx: an I/O handler 15789 * @URL: the base URL to use for the document 15790 * @encoding: the document encoding, or NULL 15791 * @options: a combination of xmlParserOption 15792 * 15793 * parse an XML document from I/O functions and source and build a tree. 15794 * This reuses the existing @ctxt parser context 15795 * 15796 * Returns the resulting document tree 15797 */ 15798 xmlDocPtr 15799 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15800 xmlInputCloseCallback ioclose, void *ioctx, 15801 const char *URL, 15802 const char *encoding, int options) 15803 { 15804 xmlParserInputBufferPtr input; 15805 xmlParserInputPtr stream; 15806 15807 if (ioread == NULL) 15808 return (NULL); 15809 if (ctxt == NULL) 15810 return (NULL); 15811 xmlInitParser(); 15812 15813 xmlCtxtReset(ctxt); 15814 15815 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15816 XML_CHAR_ENCODING_NONE); 15817 if (input == NULL) { 15818 if (ioclose != NULL) 15819 ioclose(ioctx); 15820 return (NULL); 15821 } 15822 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15823 if (stream == NULL) { 15824 xmlFreeParserInputBuffer(input); 15825 return (NULL); 15826 } 15827 inputPush(ctxt, stream); 15828 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15829 } 15830 15831 #define bottom_parser 15832 #include "elfgcchack.h" 15833