1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <limits.h> 44 #include <string.h> 45 #include <stdarg.h> 46 #include <libxml/xmlmemory.h> 47 #include <libxml/threads.h> 48 #include <libxml/globals.h> 49 #include <libxml/tree.h> 50 #include <libxml/parser.h> 51 #include <libxml/parserInternals.h> 52 #include <libxml/valid.h> 53 #include <libxml/entities.h> 54 #include <libxml/xmlerror.h> 55 #include <libxml/encoding.h> 56 #include <libxml/xmlIO.h> 57 #include <libxml/uri.h> 58 #ifdef LIBXML_CATALOG_ENABLED 59 #include <libxml/catalog.h> 60 #endif 61 #ifdef LIBXML_SCHEMAS_ENABLED 62 #include <libxml/xmlschemastypes.h> 63 #include <libxml/relaxng.h> 64 #endif 65 #ifdef HAVE_CTYPE_H 66 #include <ctype.h> 67 #endif 68 #ifdef HAVE_STDLIB_H 69 #include <stdlib.h> 70 #endif 71 #ifdef HAVE_SYS_STAT_H 72 #include <sys/stat.h> 73 #endif 74 #ifdef HAVE_FCNTL_H 75 #include <fcntl.h> 76 #endif 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 #ifdef HAVE_ZLIB_H 81 #include <zlib.h> 82 #endif 83 #ifdef HAVE_LZMA_H 84 #include <lzma.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 /************************************************************************ 98 * * 99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 100 * * 101 ************************************************************************/ 102 103 #define XML_PARSER_BIG_ENTITY 1000 104 #define XML_PARSER_LOT_ENTITY 5000 105 106 /* 107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 108 * replacement over the size in byte of the input indicates that you have 109 * and eponential behaviour. A value of 10 correspond to at least 3 entity 110 * replacement per byte of input. 111 */ 112 #define XML_PARSER_NON_LINEAR 10 113 114 /* 115 * xmlParserEntityCheck 116 * 117 * Function to check non-linear entity expansion behaviour 118 * This is here to detect and stop exponential linear entity expansion 119 * This is not a limitation of the parser but a safety 120 * boundary feature. It can be disabled with the XML_PARSE_HUGE 121 * parser option. 122 */ 123 static int 124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 125 xmlEntityPtr ent, size_t replacement) 126 { 127 size_t consumed = 0; 128 129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 130 return (0); 131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 132 return (1); 133 134 /* 135 * This may look absurd but is needed to detect 136 * entities problems 137 */ 138 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 139 (ent->content != NULL) && (ent->checked == 0)) { 140 unsigned long oldnbent = ctxt->nbentities; 141 xmlChar *rep; 142 143 ent->checked = 1; 144 145 rep = xmlStringDecodeEntities(ctxt, ent->content, 146 XML_SUBSTITUTE_REF, 0, 0, 0); 147 148 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 149 if (rep != NULL) { 150 if (xmlStrchr(rep, '<')) 151 ent->checked |= 1; 152 xmlFree(rep); 153 rep = NULL; 154 } 155 } 156 if (replacement != 0) { 157 if (replacement < XML_MAX_TEXT_LENGTH) 158 return(0); 159 160 /* 161 * If the volume of entity copy reaches 10 times the 162 * amount of parsed data and over the large text threshold 163 * then that's very likely to be an abuse. 164 */ 165 if (ctxt->input != NULL) { 166 consumed = ctxt->input->consumed + 167 (ctxt->input->cur - ctxt->input->base); 168 } 169 consumed += ctxt->sizeentities; 170 171 if (replacement < XML_PARSER_NON_LINEAR * consumed) 172 return(0); 173 } else if (size != 0) { 174 /* 175 * Do the check based on the replacement size of the entity 176 */ 177 if (size < XML_PARSER_BIG_ENTITY) 178 return(0); 179 180 /* 181 * A limit on the amount of text data reasonably used 182 */ 183 if (ctxt->input != NULL) { 184 consumed = ctxt->input->consumed + 185 (ctxt->input->cur - ctxt->input->base); 186 } 187 consumed += ctxt->sizeentities; 188 189 if ((size < XML_PARSER_NON_LINEAR * consumed) && 190 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 191 return (0); 192 } else if (ent != NULL) { 193 /* 194 * use the number of parsed entities in the replacement 195 */ 196 size = ent->checked / 2; 197 198 /* 199 * The amount of data parsed counting entities size only once 200 */ 201 if (ctxt->input != NULL) { 202 consumed = ctxt->input->consumed + 203 (ctxt->input->cur - ctxt->input->base); 204 } 205 consumed += ctxt->sizeentities; 206 207 /* 208 * Check the density of entities for the amount of data 209 * knowing an entity reference will take at least 3 bytes 210 */ 211 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 212 return (0); 213 } else { 214 /* 215 * strange we got no data for checking 216 */ 217 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 218 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 219 (ctxt->nbentities <= 10000)) 220 return (0); 221 } 222 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 223 return (1); 224 } 225 226 /** 227 * xmlParserMaxDepth: 228 * 229 * arbitrary depth limit for the XML documents that we allow to 230 * process. This is not a limitation of the parser but a safety 231 * boundary feature. It can be disabled with the XML_PARSE_HUGE 232 * parser option. 233 */ 234 unsigned int xmlParserMaxDepth = 256; 235 236 237 238 #define SAX2 1 239 #define XML_PARSER_BIG_BUFFER_SIZE 300 240 #define XML_PARSER_BUFFER_SIZE 100 241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 242 243 /** 244 * XML_PARSER_CHUNK_SIZE 245 * 246 * When calling GROW that's the minimal amount of data 247 * the parser expected to have received. It is not a hard 248 * limit but an optimization when reading strings like Names 249 * It is not strictly needed as long as inputs available characters 250 * are followed by 0, which should be provided by the I/O level 251 */ 252 #define XML_PARSER_CHUNK_SIZE 100 253 254 /* 255 * List of XML prefixed PI allowed by W3C specs 256 */ 257 258 static const char *xmlW3CPIs[] = { 259 "xml-stylesheet", 260 "xml-model", 261 NULL 262 }; 263 264 265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 266 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 267 const xmlChar **str); 268 269 static xmlParserErrors 270 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 271 xmlSAXHandlerPtr sax, 272 void *user_data, int depth, const xmlChar *URL, 273 const xmlChar *ID, xmlNodePtr *list); 274 275 static int 276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 277 const char *encoding); 278 #ifdef LIBXML_LEGACY_ENABLED 279 static void 280 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 281 xmlNodePtr lastNode); 282 #endif /* LIBXML_LEGACY_ENABLED */ 283 284 static xmlParserErrors 285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 286 const xmlChar *string, void *user_data, xmlNodePtr *lst); 287 288 static int 289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 290 291 /************************************************************************ 292 * * 293 * Some factorized error routines * 294 * * 295 ************************************************************************/ 296 297 /** 298 * xmlErrAttributeDup: 299 * @ctxt: an XML parser context 300 * @prefix: the attribute prefix 301 * @localname: the attribute localname 302 * 303 * Handle a redefinition of attribute error 304 */ 305 static void 306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 307 const xmlChar * localname) 308 { 309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 310 (ctxt->instate == XML_PARSER_EOF)) 311 return; 312 if (ctxt != NULL) 313 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 314 315 if (prefix == NULL) 316 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 317 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 318 (const char *) localname, NULL, NULL, 0, 0, 319 "Attribute %s redefined\n", localname); 320 else 321 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 322 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 323 (const char *) prefix, (const char *) localname, 324 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 325 localname); 326 if (ctxt != NULL) { 327 ctxt->wellFormed = 0; 328 if (ctxt->recovery == 0) 329 ctxt->disableSAX = 1; 330 } 331 } 332 333 /** 334 * xmlFatalErr: 335 * @ctxt: an XML parser context 336 * @error: the error number 337 * @extra: extra information string 338 * 339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 340 */ 341 static void 342 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 343 { 344 const char *errmsg; 345 char errstr[129] = ""; 346 347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 348 (ctxt->instate == XML_PARSER_EOF)) 349 return; 350 switch (error) { 351 case XML_ERR_INVALID_HEX_CHARREF: 352 errmsg = "CharRef: invalid hexadecimal value"; 353 break; 354 case XML_ERR_INVALID_DEC_CHARREF: 355 errmsg = "CharRef: invalid decimal value"; 356 break; 357 case XML_ERR_INVALID_CHARREF: 358 errmsg = "CharRef: invalid value"; 359 break; 360 case XML_ERR_INTERNAL_ERROR: 361 errmsg = "internal error"; 362 break; 363 case XML_ERR_PEREF_AT_EOF: 364 errmsg = "PEReference at end of document"; 365 break; 366 case XML_ERR_PEREF_IN_PROLOG: 367 errmsg = "PEReference in prolog"; 368 break; 369 case XML_ERR_PEREF_IN_EPILOG: 370 errmsg = "PEReference in epilog"; 371 break; 372 case XML_ERR_PEREF_NO_NAME: 373 errmsg = "PEReference: no name"; 374 break; 375 case XML_ERR_PEREF_SEMICOL_MISSING: 376 errmsg = "PEReference: expecting ';'"; 377 break; 378 case XML_ERR_ENTITY_LOOP: 379 errmsg = "Detected an entity reference loop"; 380 break; 381 case XML_ERR_ENTITY_NOT_STARTED: 382 errmsg = "EntityValue: \" or ' expected"; 383 break; 384 case XML_ERR_ENTITY_PE_INTERNAL: 385 errmsg = "PEReferences forbidden in internal subset"; 386 break; 387 case XML_ERR_ENTITY_NOT_FINISHED: 388 errmsg = "EntityValue: \" or ' expected"; 389 break; 390 case XML_ERR_ATTRIBUTE_NOT_STARTED: 391 errmsg = "AttValue: \" or ' expected"; 392 break; 393 case XML_ERR_LT_IN_ATTRIBUTE: 394 errmsg = "Unescaped '<' not allowed in attributes values"; 395 break; 396 case XML_ERR_LITERAL_NOT_STARTED: 397 errmsg = "SystemLiteral \" or ' expected"; 398 break; 399 case XML_ERR_LITERAL_NOT_FINISHED: 400 errmsg = "Unfinished System or Public ID \" or ' expected"; 401 break; 402 case XML_ERR_MISPLACED_CDATA_END: 403 errmsg = "Sequence ']]>' not allowed in content"; 404 break; 405 case XML_ERR_URI_REQUIRED: 406 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 407 break; 408 case XML_ERR_PUBID_REQUIRED: 409 errmsg = "PUBLIC, the Public Identifier is missing"; 410 break; 411 case XML_ERR_HYPHEN_IN_COMMENT: 412 errmsg = "Comment must not contain '--' (double-hyphen)"; 413 break; 414 case XML_ERR_PI_NOT_STARTED: 415 errmsg = "xmlParsePI : no target name"; 416 break; 417 case XML_ERR_RESERVED_XML_NAME: 418 errmsg = "Invalid PI name"; 419 break; 420 case XML_ERR_NOTATION_NOT_STARTED: 421 errmsg = "NOTATION: Name expected here"; 422 break; 423 case XML_ERR_NOTATION_NOT_FINISHED: 424 errmsg = "'>' required to close NOTATION declaration"; 425 break; 426 case XML_ERR_VALUE_REQUIRED: 427 errmsg = "Entity value required"; 428 break; 429 case XML_ERR_URI_FRAGMENT: 430 errmsg = "Fragment not allowed"; 431 break; 432 case XML_ERR_ATTLIST_NOT_STARTED: 433 errmsg = "'(' required to start ATTLIST enumeration"; 434 break; 435 case XML_ERR_NMTOKEN_REQUIRED: 436 errmsg = "NmToken expected in ATTLIST enumeration"; 437 break; 438 case XML_ERR_ATTLIST_NOT_FINISHED: 439 errmsg = "')' required to finish ATTLIST enumeration"; 440 break; 441 case XML_ERR_MIXED_NOT_STARTED: 442 errmsg = "MixedContentDecl : '|' or ')*' expected"; 443 break; 444 case XML_ERR_PCDATA_REQUIRED: 445 errmsg = "MixedContentDecl : '#PCDATA' expected"; 446 break; 447 case XML_ERR_ELEMCONTENT_NOT_STARTED: 448 errmsg = "ContentDecl : Name or '(' expected"; 449 break; 450 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 451 errmsg = "ContentDecl : ',' '|' or ')' expected"; 452 break; 453 case XML_ERR_PEREF_IN_INT_SUBSET: 454 errmsg = 455 "PEReference: forbidden within markup decl in internal subset"; 456 break; 457 case XML_ERR_GT_REQUIRED: 458 errmsg = "expected '>'"; 459 break; 460 case XML_ERR_CONDSEC_INVALID: 461 errmsg = "XML conditional section '[' expected"; 462 break; 463 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 464 errmsg = "Content error in the external subset"; 465 break; 466 case XML_ERR_CONDSEC_INVALID_KEYWORD: 467 errmsg = 468 "conditional section INCLUDE or IGNORE keyword expected"; 469 break; 470 case XML_ERR_CONDSEC_NOT_FINISHED: 471 errmsg = "XML conditional section not closed"; 472 break; 473 case XML_ERR_XMLDECL_NOT_STARTED: 474 errmsg = "Text declaration '<?xml' required"; 475 break; 476 case XML_ERR_XMLDECL_NOT_FINISHED: 477 errmsg = "parsing XML declaration: '?>' expected"; 478 break; 479 case XML_ERR_EXT_ENTITY_STANDALONE: 480 errmsg = "external parsed entities cannot be standalone"; 481 break; 482 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 483 errmsg = "EntityRef: expecting ';'"; 484 break; 485 case XML_ERR_DOCTYPE_NOT_FINISHED: 486 errmsg = "DOCTYPE improperly terminated"; 487 break; 488 case XML_ERR_LTSLASH_REQUIRED: 489 errmsg = "EndTag: '</' not found"; 490 break; 491 case XML_ERR_EQUAL_REQUIRED: 492 errmsg = "expected '='"; 493 break; 494 case XML_ERR_STRING_NOT_CLOSED: 495 errmsg = "String not closed expecting \" or '"; 496 break; 497 case XML_ERR_STRING_NOT_STARTED: 498 errmsg = "String not started expecting ' or \""; 499 break; 500 case XML_ERR_ENCODING_NAME: 501 errmsg = "Invalid XML encoding name"; 502 break; 503 case XML_ERR_STANDALONE_VALUE: 504 errmsg = "standalone accepts only 'yes' or 'no'"; 505 break; 506 case XML_ERR_DOCUMENT_EMPTY: 507 errmsg = "Document is empty"; 508 break; 509 case XML_ERR_DOCUMENT_END: 510 errmsg = "Extra content at the end of the document"; 511 break; 512 case XML_ERR_NOT_WELL_BALANCED: 513 errmsg = "chunk is not well balanced"; 514 break; 515 case XML_ERR_EXTRA_CONTENT: 516 errmsg = "extra content at the end of well balanced chunk"; 517 break; 518 case XML_ERR_VERSION_MISSING: 519 errmsg = "Malformed declaration expecting version"; 520 break; 521 case XML_ERR_NAME_TOO_LONG: 522 errmsg = "Name too long use XML_PARSE_HUGE option"; 523 break; 524 #if 0 525 case: 526 errmsg = ""; 527 break; 528 #endif 529 default: 530 errmsg = "Unregistered error message"; 531 } 532 if (info == NULL) 533 snprintf(errstr, 128, "%s\n", errmsg); 534 else 535 snprintf(errstr, 128, "%s: %%s\n", errmsg); 536 if (ctxt != NULL) 537 ctxt->errNo = error; 538 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 539 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 540 info); 541 if (ctxt != NULL) { 542 ctxt->wellFormed = 0; 543 if (ctxt->recovery == 0) 544 ctxt->disableSAX = 1; 545 } 546 } 547 548 /** 549 * xmlFatalErrMsg: 550 * @ctxt: an XML parser context 551 * @error: the error number 552 * @msg: the error message 553 * 554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 555 */ 556 static void 557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 558 const char *msg) 559 { 560 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 561 (ctxt->instate == XML_PARSER_EOF)) 562 return; 563 if (ctxt != NULL) 564 ctxt->errNo = error; 565 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 566 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 567 if (ctxt != NULL) { 568 ctxt->wellFormed = 0; 569 if (ctxt->recovery == 0) 570 ctxt->disableSAX = 1; 571 } 572 } 573 574 /** 575 * xmlWarningMsg: 576 * @ctxt: an XML parser context 577 * @error: the error number 578 * @msg: the error message 579 * @str1: extra data 580 * @str2: extra data 581 * 582 * Handle a warning. 583 */ 584 static void 585 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 586 const char *msg, const xmlChar *str1, const xmlChar *str2) 587 { 588 xmlStructuredErrorFunc schannel = NULL; 589 590 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 591 (ctxt->instate == XML_PARSER_EOF)) 592 return; 593 if ((ctxt != NULL) && (ctxt->sax != NULL) && 594 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 595 schannel = ctxt->sax->serror; 596 if (ctxt != NULL) { 597 __xmlRaiseError(schannel, 598 (ctxt->sax) ? ctxt->sax->warning : NULL, 599 ctxt->userData, 600 ctxt, NULL, XML_FROM_PARSER, error, 601 XML_ERR_WARNING, NULL, 0, 602 (const char *) str1, (const char *) str2, NULL, 0, 0, 603 msg, (const char *) str1, (const char *) str2); 604 } else { 605 __xmlRaiseError(schannel, NULL, NULL, 606 ctxt, NULL, XML_FROM_PARSER, error, 607 XML_ERR_WARNING, NULL, 0, 608 (const char *) str1, (const char *) str2, NULL, 0, 0, 609 msg, (const char *) str1, (const char *) str2); 610 } 611 } 612 613 /** 614 * xmlValidityError: 615 * @ctxt: an XML parser context 616 * @error: the error number 617 * @msg: the error message 618 * @str1: extra data 619 * 620 * Handle a validity error. 621 */ 622 static void 623 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 624 const char *msg, const xmlChar *str1, const xmlChar *str2) 625 { 626 xmlStructuredErrorFunc schannel = NULL; 627 628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 629 (ctxt->instate == XML_PARSER_EOF)) 630 return; 631 if (ctxt != NULL) { 632 ctxt->errNo = error; 633 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 634 schannel = ctxt->sax->serror; 635 } 636 if (ctxt != NULL) { 637 __xmlRaiseError(schannel, 638 ctxt->vctxt.error, ctxt->vctxt.userData, 639 ctxt, NULL, XML_FROM_DTD, error, 640 XML_ERR_ERROR, NULL, 0, (const char *) str1, 641 (const char *) str2, NULL, 0, 0, 642 msg, (const char *) str1, (const char *) str2); 643 ctxt->valid = 0; 644 } else { 645 __xmlRaiseError(schannel, NULL, NULL, 646 ctxt, NULL, XML_FROM_DTD, error, 647 XML_ERR_ERROR, NULL, 0, (const char *) str1, 648 (const char *) str2, NULL, 0, 0, 649 msg, (const char *) str1, (const char *) str2); 650 } 651 } 652 653 /** 654 * xmlFatalErrMsgInt: 655 * @ctxt: an XML parser context 656 * @error: the error number 657 * @msg: the error message 658 * @val: an integer value 659 * 660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 661 */ 662 static void 663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 664 const char *msg, int val) 665 { 666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 667 (ctxt->instate == XML_PARSER_EOF)) 668 return; 669 if (ctxt != NULL) 670 ctxt->errNo = error; 671 __xmlRaiseError(NULL, NULL, NULL, 672 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 673 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 674 if (ctxt != NULL) { 675 ctxt->wellFormed = 0; 676 if (ctxt->recovery == 0) 677 ctxt->disableSAX = 1; 678 } 679 } 680 681 /** 682 * xmlFatalErrMsgStrIntStr: 683 * @ctxt: an XML parser context 684 * @error: the error number 685 * @msg: the error message 686 * @str1: an string info 687 * @val: an integer value 688 * @str2: an string info 689 * 690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 691 */ 692 static void 693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 694 const char *msg, const xmlChar *str1, int val, 695 const xmlChar *str2) 696 { 697 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 698 (ctxt->instate == XML_PARSER_EOF)) 699 return; 700 if (ctxt != NULL) 701 ctxt->errNo = error; 702 __xmlRaiseError(NULL, NULL, NULL, 703 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 704 NULL, 0, (const char *) str1, (const char *) str2, 705 NULL, val, 0, msg, str1, val, str2); 706 if (ctxt != NULL) { 707 ctxt->wellFormed = 0; 708 if (ctxt->recovery == 0) 709 ctxt->disableSAX = 1; 710 } 711 } 712 713 /** 714 * xmlFatalErrMsgStr: 715 * @ctxt: an XML parser context 716 * @error: the error number 717 * @msg: the error message 718 * @val: a string value 719 * 720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 721 */ 722 static void 723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 724 const char *msg, const xmlChar * val) 725 { 726 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 727 (ctxt->instate == XML_PARSER_EOF)) 728 return; 729 if (ctxt != NULL) 730 ctxt->errNo = error; 731 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 732 XML_FROM_PARSER, error, XML_ERR_FATAL, 733 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 734 val); 735 if (ctxt != NULL) { 736 ctxt->wellFormed = 0; 737 if (ctxt->recovery == 0) 738 ctxt->disableSAX = 1; 739 } 740 } 741 742 /** 743 * xmlErrMsgStr: 744 * @ctxt: an XML parser context 745 * @error: the error number 746 * @msg: the error message 747 * @val: a string value 748 * 749 * Handle a non fatal parser error 750 */ 751 static void 752 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 753 const char *msg, const xmlChar * val) 754 { 755 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 756 (ctxt->instate == XML_PARSER_EOF)) 757 return; 758 if (ctxt != NULL) 759 ctxt->errNo = error; 760 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 761 XML_FROM_PARSER, error, XML_ERR_ERROR, 762 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 763 val); 764 } 765 766 /** 767 * xmlNsErr: 768 * @ctxt: an XML parser context 769 * @error: the error number 770 * @msg: the message 771 * @info1: extra information string 772 * @info2: extra information string 773 * 774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 775 */ 776 static void 777 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 778 const char *msg, 779 const xmlChar * info1, const xmlChar * info2, 780 const xmlChar * info3) 781 { 782 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 783 (ctxt->instate == XML_PARSER_EOF)) 784 return; 785 if (ctxt != NULL) 786 ctxt->errNo = error; 787 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 788 XML_ERR_ERROR, NULL, 0, (const char *) info1, 789 (const char *) info2, (const char *) info3, 0, 0, msg, 790 info1, info2, info3); 791 if (ctxt != NULL) 792 ctxt->nsWellFormed = 0; 793 } 794 795 /** 796 * xmlNsWarn 797 * @ctxt: an XML parser context 798 * @error: the error number 799 * @msg: the message 800 * @info1: extra information string 801 * @info2: extra information string 802 * 803 * Handle a namespace warning error 804 */ 805 static void 806 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 807 const char *msg, 808 const xmlChar * info1, const xmlChar * info2, 809 const xmlChar * info3) 810 { 811 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 812 (ctxt->instate == XML_PARSER_EOF)) 813 return; 814 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 815 XML_ERR_WARNING, NULL, 0, (const char *) info1, 816 (const char *) info2, (const char *) info3, 0, 0, msg, 817 info1, info2, info3); 818 } 819 820 /************************************************************************ 821 * * 822 * Library wide options * 823 * * 824 ************************************************************************/ 825 826 /** 827 * xmlHasFeature: 828 * @feature: the feature to be examined 829 * 830 * Examines if the library has been compiled with a given feature. 831 * 832 * Returns a non-zero value if the feature exist, otherwise zero. 833 * Returns zero (0) if the feature does not exist or an unknown 834 * unknown feature is requested, non-zero otherwise. 835 */ 836 int 837 xmlHasFeature(xmlFeature feature) 838 { 839 switch (feature) { 840 case XML_WITH_THREAD: 841 #ifdef LIBXML_THREAD_ENABLED 842 return(1); 843 #else 844 return(0); 845 #endif 846 case XML_WITH_TREE: 847 #ifdef LIBXML_TREE_ENABLED 848 return(1); 849 #else 850 return(0); 851 #endif 852 case XML_WITH_OUTPUT: 853 #ifdef LIBXML_OUTPUT_ENABLED 854 return(1); 855 #else 856 return(0); 857 #endif 858 case XML_WITH_PUSH: 859 #ifdef LIBXML_PUSH_ENABLED 860 return(1); 861 #else 862 return(0); 863 #endif 864 case XML_WITH_READER: 865 #ifdef LIBXML_READER_ENABLED 866 return(1); 867 #else 868 return(0); 869 #endif 870 case XML_WITH_PATTERN: 871 #ifdef LIBXML_PATTERN_ENABLED 872 return(1); 873 #else 874 return(0); 875 #endif 876 case XML_WITH_WRITER: 877 #ifdef LIBXML_WRITER_ENABLED 878 return(1); 879 #else 880 return(0); 881 #endif 882 case XML_WITH_SAX1: 883 #ifdef LIBXML_SAX1_ENABLED 884 return(1); 885 #else 886 return(0); 887 #endif 888 case XML_WITH_FTP: 889 #ifdef LIBXML_FTP_ENABLED 890 return(1); 891 #else 892 return(0); 893 #endif 894 case XML_WITH_HTTP: 895 #ifdef LIBXML_HTTP_ENABLED 896 return(1); 897 #else 898 return(0); 899 #endif 900 case XML_WITH_VALID: 901 #ifdef LIBXML_VALID_ENABLED 902 return(1); 903 #else 904 return(0); 905 #endif 906 case XML_WITH_HTML: 907 #ifdef LIBXML_HTML_ENABLED 908 return(1); 909 #else 910 return(0); 911 #endif 912 case XML_WITH_LEGACY: 913 #ifdef LIBXML_LEGACY_ENABLED 914 return(1); 915 #else 916 return(0); 917 #endif 918 case XML_WITH_C14N: 919 #ifdef LIBXML_C14N_ENABLED 920 return(1); 921 #else 922 return(0); 923 #endif 924 case XML_WITH_CATALOG: 925 #ifdef LIBXML_CATALOG_ENABLED 926 return(1); 927 #else 928 return(0); 929 #endif 930 case XML_WITH_XPATH: 931 #ifdef LIBXML_XPATH_ENABLED 932 return(1); 933 #else 934 return(0); 935 #endif 936 case XML_WITH_XPTR: 937 #ifdef LIBXML_XPTR_ENABLED 938 return(1); 939 #else 940 return(0); 941 #endif 942 case XML_WITH_XINCLUDE: 943 #ifdef LIBXML_XINCLUDE_ENABLED 944 return(1); 945 #else 946 return(0); 947 #endif 948 case XML_WITH_ICONV: 949 #ifdef LIBXML_ICONV_ENABLED 950 return(1); 951 #else 952 return(0); 953 #endif 954 case XML_WITH_ISO8859X: 955 #ifdef LIBXML_ISO8859X_ENABLED 956 return(1); 957 #else 958 return(0); 959 #endif 960 case XML_WITH_UNICODE: 961 #ifdef LIBXML_UNICODE_ENABLED 962 return(1); 963 #else 964 return(0); 965 #endif 966 case XML_WITH_REGEXP: 967 #ifdef LIBXML_REGEXP_ENABLED 968 return(1); 969 #else 970 return(0); 971 #endif 972 case XML_WITH_AUTOMATA: 973 #ifdef LIBXML_AUTOMATA_ENABLED 974 return(1); 975 #else 976 return(0); 977 #endif 978 case XML_WITH_EXPR: 979 #ifdef LIBXML_EXPR_ENABLED 980 return(1); 981 #else 982 return(0); 983 #endif 984 case XML_WITH_SCHEMAS: 985 #ifdef LIBXML_SCHEMAS_ENABLED 986 return(1); 987 #else 988 return(0); 989 #endif 990 case XML_WITH_SCHEMATRON: 991 #ifdef LIBXML_SCHEMATRON_ENABLED 992 return(1); 993 #else 994 return(0); 995 #endif 996 case XML_WITH_MODULES: 997 #ifdef LIBXML_MODULES_ENABLED 998 return(1); 999 #else 1000 return(0); 1001 #endif 1002 case XML_WITH_DEBUG: 1003 #ifdef LIBXML_DEBUG_ENABLED 1004 return(1); 1005 #else 1006 return(0); 1007 #endif 1008 case XML_WITH_DEBUG_MEM: 1009 #ifdef DEBUG_MEMORY_LOCATION 1010 return(1); 1011 #else 1012 return(0); 1013 #endif 1014 case XML_WITH_DEBUG_RUN: 1015 #ifdef LIBXML_DEBUG_RUNTIME 1016 return(1); 1017 #else 1018 return(0); 1019 #endif 1020 case XML_WITH_ZLIB: 1021 #ifdef LIBXML_ZLIB_ENABLED 1022 return(1); 1023 #else 1024 return(0); 1025 #endif 1026 case XML_WITH_LZMA: 1027 #ifdef LIBXML_LZMA_ENABLED 1028 return(1); 1029 #else 1030 return(0); 1031 #endif 1032 case XML_WITH_ICU: 1033 #ifdef LIBXML_ICU_ENABLED 1034 return(1); 1035 #else 1036 return(0); 1037 #endif 1038 default: 1039 break; 1040 } 1041 return(0); 1042 } 1043 1044 /************************************************************************ 1045 * * 1046 * SAX2 defaulted attributes handling * 1047 * * 1048 ************************************************************************/ 1049 1050 /** 1051 * xmlDetectSAX2: 1052 * @ctxt: an XML parser context 1053 * 1054 * Do the SAX2 detection and specific intialization 1055 */ 1056 static void 1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1058 if (ctxt == NULL) return; 1059 #ifdef LIBXML_SAX1_ENABLED 1060 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1061 ((ctxt->sax->startElementNs != NULL) || 1062 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1063 #else 1064 ctxt->sax2 = 1; 1065 #endif /* LIBXML_SAX1_ENABLED */ 1066 1067 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1068 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1069 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1070 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1071 (ctxt->str_xml_ns == NULL)) { 1072 xmlErrMemory(ctxt, NULL); 1073 } 1074 } 1075 1076 typedef struct _xmlDefAttrs xmlDefAttrs; 1077 typedef xmlDefAttrs *xmlDefAttrsPtr; 1078 struct _xmlDefAttrs { 1079 int nbAttrs; /* number of defaulted attributes on that element */ 1080 int maxAttrs; /* the size of the array */ 1081 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1082 }; 1083 1084 /** 1085 * xmlAttrNormalizeSpace: 1086 * @src: the source string 1087 * @dst: the target string 1088 * 1089 * Normalize the space in non CDATA attribute values: 1090 * If the attribute type is not CDATA, then the XML processor MUST further 1091 * process the normalized attribute value by discarding any leading and 1092 * trailing space (#x20) characters, and by replacing sequences of space 1093 * (#x20) characters by a single space (#x20) character. 1094 * Note that the size of dst need to be at least src, and if one doesn't need 1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1096 * passing src as dst is just fine. 1097 * 1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1099 * is needed. 1100 */ 1101 static xmlChar * 1102 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1103 { 1104 if ((src == NULL) || (dst == NULL)) 1105 return(NULL); 1106 1107 while (*src == 0x20) src++; 1108 while (*src != 0) { 1109 if (*src == 0x20) { 1110 while (*src == 0x20) src++; 1111 if (*src != 0) 1112 *dst++ = 0x20; 1113 } else { 1114 *dst++ = *src++; 1115 } 1116 } 1117 *dst = 0; 1118 if (dst == src) 1119 return(NULL); 1120 return(dst); 1121 } 1122 1123 /** 1124 * xmlAttrNormalizeSpace2: 1125 * @src: the source string 1126 * 1127 * Normalize the space in non CDATA attribute values, a slightly more complex 1128 * front end to avoid allocation problems when running on attribute values 1129 * coming from the input. 1130 * 1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1132 * is needed. 1133 */ 1134 static const xmlChar * 1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1136 { 1137 int i; 1138 int remove_head = 0; 1139 int need_realloc = 0; 1140 const xmlChar *cur; 1141 1142 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1143 return(NULL); 1144 i = *len; 1145 if (i <= 0) 1146 return(NULL); 1147 1148 cur = src; 1149 while (*cur == 0x20) { 1150 cur++; 1151 remove_head++; 1152 } 1153 while (*cur != 0) { 1154 if (*cur == 0x20) { 1155 cur++; 1156 if ((*cur == 0x20) || (*cur == 0)) { 1157 need_realloc = 1; 1158 break; 1159 } 1160 } else 1161 cur++; 1162 } 1163 if (need_realloc) { 1164 xmlChar *ret; 1165 1166 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1167 if (ret == NULL) { 1168 xmlErrMemory(ctxt, NULL); 1169 return(NULL); 1170 } 1171 xmlAttrNormalizeSpace(ret, ret); 1172 *len = (int) strlen((const char *)ret); 1173 return(ret); 1174 } else if (remove_head) { 1175 *len -= remove_head; 1176 memmove(src, src + remove_head, 1 + *len); 1177 return(src); 1178 } 1179 return(NULL); 1180 } 1181 1182 /** 1183 * xmlAddDefAttrs: 1184 * @ctxt: an XML parser context 1185 * @fullname: the element fullname 1186 * @fullattr: the attribute fullname 1187 * @value: the attribute value 1188 * 1189 * Add a defaulted attribute for an element 1190 */ 1191 static void 1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1193 const xmlChar *fullname, 1194 const xmlChar *fullattr, 1195 const xmlChar *value) { 1196 xmlDefAttrsPtr defaults; 1197 int len; 1198 const xmlChar *name; 1199 const xmlChar *prefix; 1200 1201 /* 1202 * Allows to detect attribute redefinitions 1203 */ 1204 if (ctxt->attsSpecial != NULL) { 1205 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1206 return; 1207 } 1208 1209 if (ctxt->attsDefault == NULL) { 1210 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1211 if (ctxt->attsDefault == NULL) 1212 goto mem_error; 1213 } 1214 1215 /* 1216 * split the element name into prefix:localname , the string found 1217 * are within the DTD and then not associated to namespace names. 1218 */ 1219 name = xmlSplitQName3(fullname, &len); 1220 if (name == NULL) { 1221 name = xmlDictLookup(ctxt->dict, fullname, -1); 1222 prefix = NULL; 1223 } else { 1224 name = xmlDictLookup(ctxt->dict, name, -1); 1225 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1226 } 1227 1228 /* 1229 * make sure there is some storage 1230 */ 1231 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1232 if (defaults == NULL) { 1233 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1234 (4 * 5) * sizeof(const xmlChar *)); 1235 if (defaults == NULL) 1236 goto mem_error; 1237 defaults->nbAttrs = 0; 1238 defaults->maxAttrs = 4; 1239 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1240 defaults, NULL) < 0) { 1241 xmlFree(defaults); 1242 goto mem_error; 1243 } 1244 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1245 xmlDefAttrsPtr temp; 1246 1247 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1248 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1249 if (temp == NULL) 1250 goto mem_error; 1251 defaults = temp; 1252 defaults->maxAttrs *= 2; 1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1254 defaults, NULL) < 0) { 1255 xmlFree(defaults); 1256 goto mem_error; 1257 } 1258 } 1259 1260 /* 1261 * Split the element name into prefix:localname , the string found 1262 * are within the DTD and hen not associated to namespace names. 1263 */ 1264 name = xmlSplitQName3(fullattr, &len); 1265 if (name == NULL) { 1266 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1267 prefix = NULL; 1268 } else { 1269 name = xmlDictLookup(ctxt->dict, name, -1); 1270 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1271 } 1272 1273 defaults->values[5 * defaults->nbAttrs] = name; 1274 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1275 /* intern the string and precompute the end */ 1276 len = xmlStrlen(value); 1277 value = xmlDictLookup(ctxt->dict, value, len); 1278 defaults->values[5 * defaults->nbAttrs + 2] = value; 1279 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1280 if (ctxt->external) 1281 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1282 else 1283 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1284 defaults->nbAttrs++; 1285 1286 return; 1287 1288 mem_error: 1289 xmlErrMemory(ctxt, NULL); 1290 return; 1291 } 1292 1293 /** 1294 * xmlAddSpecialAttr: 1295 * @ctxt: an XML parser context 1296 * @fullname: the element fullname 1297 * @fullattr: the attribute fullname 1298 * @type: the attribute type 1299 * 1300 * Register this attribute type 1301 */ 1302 static void 1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1304 const xmlChar *fullname, 1305 const xmlChar *fullattr, 1306 int type) 1307 { 1308 if (ctxt->attsSpecial == NULL) { 1309 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1310 if (ctxt->attsSpecial == NULL) 1311 goto mem_error; 1312 } 1313 1314 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1315 return; 1316 1317 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1318 (void *) (long) type); 1319 return; 1320 1321 mem_error: 1322 xmlErrMemory(ctxt, NULL); 1323 return; 1324 } 1325 1326 /** 1327 * xmlCleanSpecialAttrCallback: 1328 * 1329 * Removes CDATA attributes from the special attribute table 1330 */ 1331 static void 1332 xmlCleanSpecialAttrCallback(void *payload, void *data, 1333 const xmlChar *fullname, const xmlChar *fullattr, 1334 const xmlChar *unused ATTRIBUTE_UNUSED) { 1335 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1336 1337 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1338 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1339 } 1340 } 1341 1342 /** 1343 * xmlCleanSpecialAttr: 1344 * @ctxt: an XML parser context 1345 * 1346 * Trim the list of attributes defined to remove all those of type 1347 * CDATA as they are not special. This call should be done when finishing 1348 * to parse the DTD and before starting to parse the document root. 1349 */ 1350 static void 1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1352 { 1353 if (ctxt->attsSpecial == NULL) 1354 return; 1355 1356 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1357 1358 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1359 xmlHashFree(ctxt->attsSpecial, NULL); 1360 ctxt->attsSpecial = NULL; 1361 } 1362 return; 1363 } 1364 1365 /** 1366 * xmlCheckLanguageID: 1367 * @lang: pointer to the string value 1368 * 1369 * Checks that the value conforms to the LanguageID production: 1370 * 1371 * NOTE: this is somewhat deprecated, those productions were removed from 1372 * the XML Second edition. 1373 * 1374 * [33] LanguageID ::= Langcode ('-' Subcode)* 1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1379 * [38] Subcode ::= ([a-z] | [A-Z])+ 1380 * 1381 * The current REC reference the sucessors of RFC 1766, currently 5646 1382 * 1383 * http://www.rfc-editor.org/rfc/rfc5646.txt 1384 * langtag = language 1385 * ["-" script] 1386 * ["-" region] 1387 * *("-" variant) 1388 * *("-" extension) 1389 * ["-" privateuse] 1390 * language = 2*3ALPHA ; shortest ISO 639 code 1391 * ["-" extlang] ; sometimes followed by 1392 * ; extended language subtags 1393 * / 4ALPHA ; or reserved for future use 1394 * / 5*8ALPHA ; or registered language subtag 1395 * 1396 * extlang = 3ALPHA ; selected ISO 639 codes 1397 * *2("-" 3ALPHA) ; permanently reserved 1398 * 1399 * script = 4ALPHA ; ISO 15924 code 1400 * 1401 * region = 2ALPHA ; ISO 3166-1 code 1402 * / 3DIGIT ; UN M.49 code 1403 * 1404 * variant = 5*8alphanum ; registered variants 1405 * / (DIGIT 3alphanum) 1406 * 1407 * extension = singleton 1*("-" (2*8alphanum)) 1408 * 1409 * ; Single alphanumerics 1410 * ; "x" reserved for private use 1411 * singleton = DIGIT ; 0 - 9 1412 * / %x41-57 ; A - W 1413 * / %x59-5A ; Y - Z 1414 * / %x61-77 ; a - w 1415 * / %x79-7A ; y - z 1416 * 1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1418 * The parser below doesn't try to cope with extension or privateuse 1419 * that could be added but that's not interoperable anyway 1420 * 1421 * Returns 1 if correct 0 otherwise 1422 **/ 1423 int 1424 xmlCheckLanguageID(const xmlChar * lang) 1425 { 1426 const xmlChar *cur = lang, *nxt; 1427 1428 if (cur == NULL) 1429 return (0); 1430 if (((cur[0] == 'i') && (cur[1] == '-')) || 1431 ((cur[0] == 'I') && (cur[1] == '-')) || 1432 ((cur[0] == 'x') && (cur[1] == '-')) || 1433 ((cur[0] == 'X') && (cur[1] == '-'))) { 1434 /* 1435 * Still allow IANA code and user code which were coming 1436 * from the previous version of the XML-1.0 specification 1437 * it's deprecated but we should not fail 1438 */ 1439 cur += 2; 1440 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1441 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1442 cur++; 1443 return(cur[0] == 0); 1444 } 1445 nxt = cur; 1446 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1447 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1448 nxt++; 1449 if (nxt - cur >= 4) { 1450 /* 1451 * Reserved 1452 */ 1453 if ((nxt - cur > 8) || (nxt[0] != 0)) 1454 return(0); 1455 return(1); 1456 } 1457 if (nxt - cur < 2) 1458 return(0); 1459 /* we got an ISO 639 code */ 1460 if (nxt[0] == 0) 1461 return(1); 1462 if (nxt[0] != '-') 1463 return(0); 1464 1465 nxt++; 1466 cur = nxt; 1467 /* now we can have extlang or script or region or variant */ 1468 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1469 goto region_m49; 1470 1471 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1472 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1473 nxt++; 1474 if (nxt - cur == 4) 1475 goto script; 1476 if (nxt - cur == 2) 1477 goto region; 1478 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1479 goto variant; 1480 if (nxt - cur != 3) 1481 return(0); 1482 /* we parsed an extlang */ 1483 if (nxt[0] == 0) 1484 return(1); 1485 if (nxt[0] != '-') 1486 return(0); 1487 1488 nxt++; 1489 cur = nxt; 1490 /* now we can have script or region or variant */ 1491 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1492 goto region_m49; 1493 1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1496 nxt++; 1497 if (nxt - cur == 2) 1498 goto region; 1499 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1500 goto variant; 1501 if (nxt - cur != 4) 1502 return(0); 1503 /* we parsed a script */ 1504 script: 1505 if (nxt[0] == 0) 1506 return(1); 1507 if (nxt[0] != '-') 1508 return(0); 1509 1510 nxt++; 1511 cur = nxt; 1512 /* now we can have region or variant */ 1513 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1514 goto region_m49; 1515 1516 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1517 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1518 nxt++; 1519 1520 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1521 goto variant; 1522 if (nxt - cur != 2) 1523 return(0); 1524 /* we parsed a region */ 1525 region: 1526 if (nxt[0] == 0) 1527 return(1); 1528 if (nxt[0] != '-') 1529 return(0); 1530 1531 nxt++; 1532 cur = nxt; 1533 /* now we can just have a variant */ 1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1536 nxt++; 1537 1538 if ((nxt - cur < 5) || (nxt - cur > 8)) 1539 return(0); 1540 1541 /* we parsed a variant */ 1542 variant: 1543 if (nxt[0] == 0) 1544 return(1); 1545 if (nxt[0] != '-') 1546 return(0); 1547 /* extensions and private use subtags not checked */ 1548 return (1); 1549 1550 region_m49: 1551 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1552 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1553 nxt += 3; 1554 goto region; 1555 } 1556 return(0); 1557 } 1558 1559 /************************************************************************ 1560 * * 1561 * Parser stacks related functions and macros * 1562 * * 1563 ************************************************************************/ 1564 1565 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1566 const xmlChar ** str); 1567 1568 #ifdef SAX2 1569 /** 1570 * nsPush: 1571 * @ctxt: an XML parser context 1572 * @prefix: the namespace prefix or NULL 1573 * @URL: the namespace name 1574 * 1575 * Pushes a new parser namespace on top of the ns stack 1576 * 1577 * Returns -1 in case of error, -2 if the namespace should be discarded 1578 * and the index in the stack otherwise. 1579 */ 1580 static int 1581 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1582 { 1583 if (ctxt->options & XML_PARSE_NSCLEAN) { 1584 int i; 1585 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1586 if (ctxt->nsTab[i] == prefix) { 1587 /* in scope */ 1588 if (ctxt->nsTab[i + 1] == URL) 1589 return(-2); 1590 /* out of scope keep it */ 1591 break; 1592 } 1593 } 1594 } 1595 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1596 ctxt->nsMax = 10; 1597 ctxt->nsNr = 0; 1598 ctxt->nsTab = (const xmlChar **) 1599 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1600 if (ctxt->nsTab == NULL) { 1601 xmlErrMemory(ctxt, NULL); 1602 ctxt->nsMax = 0; 1603 return (-1); 1604 } 1605 } else if (ctxt->nsNr >= ctxt->nsMax) { 1606 const xmlChar ** tmp; 1607 ctxt->nsMax *= 2; 1608 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1609 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1610 if (tmp == NULL) { 1611 xmlErrMemory(ctxt, NULL); 1612 ctxt->nsMax /= 2; 1613 return (-1); 1614 } 1615 ctxt->nsTab = tmp; 1616 } 1617 ctxt->nsTab[ctxt->nsNr++] = prefix; 1618 ctxt->nsTab[ctxt->nsNr++] = URL; 1619 return (ctxt->nsNr); 1620 } 1621 /** 1622 * nsPop: 1623 * @ctxt: an XML parser context 1624 * @nr: the number to pop 1625 * 1626 * Pops the top @nr parser prefix/namespace from the ns stack 1627 * 1628 * Returns the number of namespaces removed 1629 */ 1630 static int 1631 nsPop(xmlParserCtxtPtr ctxt, int nr) 1632 { 1633 int i; 1634 1635 if (ctxt->nsTab == NULL) return(0); 1636 if (ctxt->nsNr < nr) { 1637 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1638 nr = ctxt->nsNr; 1639 } 1640 if (ctxt->nsNr <= 0) 1641 return (0); 1642 1643 for (i = 0;i < nr;i++) { 1644 ctxt->nsNr--; 1645 ctxt->nsTab[ctxt->nsNr] = NULL; 1646 } 1647 return(nr); 1648 } 1649 #endif 1650 1651 static int 1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1653 const xmlChar **atts; 1654 int *attallocs; 1655 int maxatts; 1656 1657 if (ctxt->atts == NULL) { 1658 maxatts = 55; /* allow for 10 attrs by default */ 1659 atts = (const xmlChar **) 1660 xmlMalloc(maxatts * sizeof(xmlChar *)); 1661 if (atts == NULL) goto mem_error; 1662 ctxt->atts = atts; 1663 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1664 if (attallocs == NULL) goto mem_error; 1665 ctxt->attallocs = attallocs; 1666 ctxt->maxatts = maxatts; 1667 } else if (nr + 5 > ctxt->maxatts) { 1668 maxatts = (nr + 5) * 2; 1669 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1670 maxatts * sizeof(const xmlChar *)); 1671 if (atts == NULL) goto mem_error; 1672 ctxt->atts = atts; 1673 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1674 (maxatts / 5) * sizeof(int)); 1675 if (attallocs == NULL) goto mem_error; 1676 ctxt->attallocs = attallocs; 1677 ctxt->maxatts = maxatts; 1678 } 1679 return(ctxt->maxatts); 1680 mem_error: 1681 xmlErrMemory(ctxt, NULL); 1682 return(-1); 1683 } 1684 1685 /** 1686 * inputPush: 1687 * @ctxt: an XML parser context 1688 * @value: the parser input 1689 * 1690 * Pushes a new parser input on top of the input stack 1691 * 1692 * Returns -1 in case of error, the index in the stack otherwise 1693 */ 1694 int 1695 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1696 { 1697 if ((ctxt == NULL) || (value == NULL)) 1698 return(-1); 1699 if (ctxt->inputNr >= ctxt->inputMax) { 1700 ctxt->inputMax *= 2; 1701 ctxt->inputTab = 1702 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1703 ctxt->inputMax * 1704 sizeof(ctxt->inputTab[0])); 1705 if (ctxt->inputTab == NULL) { 1706 xmlErrMemory(ctxt, NULL); 1707 xmlFreeInputStream(value); 1708 ctxt->inputMax /= 2; 1709 value = NULL; 1710 return (-1); 1711 } 1712 } 1713 ctxt->inputTab[ctxt->inputNr] = value; 1714 ctxt->input = value; 1715 return (ctxt->inputNr++); 1716 } 1717 /** 1718 * inputPop: 1719 * @ctxt: an XML parser context 1720 * 1721 * Pops the top parser input from the input stack 1722 * 1723 * Returns the input just removed 1724 */ 1725 xmlParserInputPtr 1726 inputPop(xmlParserCtxtPtr ctxt) 1727 { 1728 xmlParserInputPtr ret; 1729 1730 if (ctxt == NULL) 1731 return(NULL); 1732 if (ctxt->inputNr <= 0) 1733 return (NULL); 1734 ctxt->inputNr--; 1735 if (ctxt->inputNr > 0) 1736 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1737 else 1738 ctxt->input = NULL; 1739 ret = ctxt->inputTab[ctxt->inputNr]; 1740 ctxt->inputTab[ctxt->inputNr] = NULL; 1741 return (ret); 1742 } 1743 /** 1744 * nodePush: 1745 * @ctxt: an XML parser context 1746 * @value: the element node 1747 * 1748 * Pushes a new element node on top of the node stack 1749 * 1750 * Returns -1 in case of error, the index in the stack otherwise 1751 */ 1752 int 1753 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1754 { 1755 if (ctxt == NULL) return(0); 1756 if (ctxt->nodeNr >= ctxt->nodeMax) { 1757 xmlNodePtr *tmp; 1758 1759 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1760 ctxt->nodeMax * 2 * 1761 sizeof(ctxt->nodeTab[0])); 1762 if (tmp == NULL) { 1763 xmlErrMemory(ctxt, NULL); 1764 return (-1); 1765 } 1766 ctxt->nodeTab = tmp; 1767 ctxt->nodeMax *= 2; 1768 } 1769 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1770 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1771 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1773 xmlParserMaxDepth); 1774 ctxt->instate = XML_PARSER_EOF; 1775 return(-1); 1776 } 1777 ctxt->nodeTab[ctxt->nodeNr] = value; 1778 ctxt->node = value; 1779 return (ctxt->nodeNr++); 1780 } 1781 1782 /** 1783 * nodePop: 1784 * @ctxt: an XML parser context 1785 * 1786 * Pops the top element node from the node stack 1787 * 1788 * Returns the node just removed 1789 */ 1790 xmlNodePtr 1791 nodePop(xmlParserCtxtPtr ctxt) 1792 { 1793 xmlNodePtr ret; 1794 1795 if (ctxt == NULL) return(NULL); 1796 if (ctxt->nodeNr <= 0) 1797 return (NULL); 1798 ctxt->nodeNr--; 1799 if (ctxt->nodeNr > 0) 1800 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1801 else 1802 ctxt->node = NULL; 1803 ret = ctxt->nodeTab[ctxt->nodeNr]; 1804 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1805 return (ret); 1806 } 1807 1808 #ifdef LIBXML_PUSH_ENABLED 1809 /** 1810 * nameNsPush: 1811 * @ctxt: an XML parser context 1812 * @value: the element name 1813 * @prefix: the element prefix 1814 * @URI: the element namespace name 1815 * 1816 * Pushes a new element name/prefix/URL on top of the name stack 1817 * 1818 * Returns -1 in case of error, the index in the stack otherwise 1819 */ 1820 static int 1821 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1822 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1823 { 1824 if (ctxt->nameNr >= ctxt->nameMax) { 1825 const xmlChar * *tmp; 1826 void **tmp2; 1827 ctxt->nameMax *= 2; 1828 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1829 ctxt->nameMax * 1830 sizeof(ctxt->nameTab[0])); 1831 if (tmp == NULL) { 1832 ctxt->nameMax /= 2; 1833 goto mem_error; 1834 } 1835 ctxt->nameTab = tmp; 1836 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1837 ctxt->nameMax * 3 * 1838 sizeof(ctxt->pushTab[0])); 1839 if (tmp2 == NULL) { 1840 ctxt->nameMax /= 2; 1841 goto mem_error; 1842 } 1843 ctxt->pushTab = tmp2; 1844 } 1845 ctxt->nameTab[ctxt->nameNr] = value; 1846 ctxt->name = value; 1847 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1848 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1849 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1850 return (ctxt->nameNr++); 1851 mem_error: 1852 xmlErrMemory(ctxt, NULL); 1853 return (-1); 1854 } 1855 /** 1856 * nameNsPop: 1857 * @ctxt: an XML parser context 1858 * 1859 * Pops the top element/prefix/URI name from the name stack 1860 * 1861 * Returns the name just removed 1862 */ 1863 static const xmlChar * 1864 nameNsPop(xmlParserCtxtPtr ctxt) 1865 { 1866 const xmlChar *ret; 1867 1868 if (ctxt->nameNr <= 0) 1869 return (NULL); 1870 ctxt->nameNr--; 1871 if (ctxt->nameNr > 0) 1872 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1873 else 1874 ctxt->name = NULL; 1875 ret = ctxt->nameTab[ctxt->nameNr]; 1876 ctxt->nameTab[ctxt->nameNr] = NULL; 1877 return (ret); 1878 } 1879 #endif /* LIBXML_PUSH_ENABLED */ 1880 1881 /** 1882 * namePush: 1883 * @ctxt: an XML parser context 1884 * @value: the element name 1885 * 1886 * Pushes a new element name on top of the name stack 1887 * 1888 * Returns -1 in case of error, the index in the stack otherwise 1889 */ 1890 int 1891 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1892 { 1893 if (ctxt == NULL) return (-1); 1894 1895 if (ctxt->nameNr >= ctxt->nameMax) { 1896 const xmlChar * *tmp; 1897 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1898 ctxt->nameMax * 2 * 1899 sizeof(ctxt->nameTab[0])); 1900 if (tmp == NULL) { 1901 goto mem_error; 1902 } 1903 ctxt->nameTab = tmp; 1904 ctxt->nameMax *= 2; 1905 } 1906 ctxt->nameTab[ctxt->nameNr] = value; 1907 ctxt->name = value; 1908 return (ctxt->nameNr++); 1909 mem_error: 1910 xmlErrMemory(ctxt, NULL); 1911 return (-1); 1912 } 1913 /** 1914 * namePop: 1915 * @ctxt: an XML parser context 1916 * 1917 * Pops the top element name from the name stack 1918 * 1919 * Returns the name just removed 1920 */ 1921 const xmlChar * 1922 namePop(xmlParserCtxtPtr ctxt) 1923 { 1924 const xmlChar *ret; 1925 1926 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1927 return (NULL); 1928 ctxt->nameNr--; 1929 if (ctxt->nameNr > 0) 1930 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1931 else 1932 ctxt->name = NULL; 1933 ret = ctxt->nameTab[ctxt->nameNr]; 1934 ctxt->nameTab[ctxt->nameNr] = NULL; 1935 return (ret); 1936 } 1937 1938 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1939 if (ctxt->spaceNr >= ctxt->spaceMax) { 1940 int *tmp; 1941 1942 ctxt->spaceMax *= 2; 1943 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1944 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1945 if (tmp == NULL) { 1946 xmlErrMemory(ctxt, NULL); 1947 ctxt->spaceMax /=2; 1948 return(-1); 1949 } 1950 ctxt->spaceTab = tmp; 1951 } 1952 ctxt->spaceTab[ctxt->spaceNr] = val; 1953 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1954 return(ctxt->spaceNr++); 1955 } 1956 1957 static int spacePop(xmlParserCtxtPtr ctxt) { 1958 int ret; 1959 if (ctxt->spaceNr <= 0) return(0); 1960 ctxt->spaceNr--; 1961 if (ctxt->spaceNr > 0) 1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1963 else 1964 ctxt->space = &ctxt->spaceTab[0]; 1965 ret = ctxt->spaceTab[ctxt->spaceNr]; 1966 ctxt->spaceTab[ctxt->spaceNr] = -1; 1967 return(ret); 1968 } 1969 1970 /* 1971 * Macros for accessing the content. Those should be used only by the parser, 1972 * and not exported. 1973 * 1974 * Dirty macros, i.e. one often need to make assumption on the context to 1975 * use them 1976 * 1977 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1978 * To be used with extreme caution since operations consuming 1979 * characters may move the input buffer to a different location ! 1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1981 * This should be used internally by the parser 1982 * only to compare to ASCII values otherwise it would break when 1983 * running with UTF-8 encoding. 1984 * RAW same as CUR but in the input buffer, bypass any token 1985 * extraction that may have been done 1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1987 * to compare on ASCII based substring. 1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1989 * strings without newlines within the parser. 1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1991 * defined char within the parser. 1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1993 * 1994 * NEXT Skip to the next character, this does the proper decoding 1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1996 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1997 * CUR_CHAR(l) returns the current unicode character (int), set l 1998 * to the number of xmlChars used for the encoding [0-5]. 1999 * CUR_SCHAR same but operate on a string instead of the context 2000 * COPY_BUF copy the current unicode char to the target buffer, increment 2001 * the index 2002 * GROW, SHRINK handling of input buffers 2003 */ 2004 2005 #define RAW (*ctxt->input->cur) 2006 #define CUR (*ctxt->input->cur) 2007 #define NXT(val) ctxt->input->cur[(val)] 2008 #define CUR_PTR ctxt->input->cur 2009 2010 #define CMP4( s, c1, c2, c3, c4 ) \ 2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2023 ((unsigned char *) s)[ 8 ] == c9 ) 2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2026 ((unsigned char *) s)[ 9 ] == c10 ) 2027 2028 #define SKIP(val) do { \ 2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2031 if ((*ctxt->input->cur == 0) && \ 2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2033 xmlPopInput(ctxt); \ 2034 } while (0) 2035 2036 #define SKIPL(val) do { \ 2037 int skipl; \ 2038 for(skipl=0; skipl<val; skipl++) { \ 2039 if (*(ctxt->input->cur) == '\n') { \ 2040 ctxt->input->line++; ctxt->input->col = 1; \ 2041 } else ctxt->input->col++; \ 2042 ctxt->nbChars++; \ 2043 ctxt->input->cur++; \ 2044 } \ 2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2046 if ((*ctxt->input->cur == 0) && \ 2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2048 xmlPopInput(ctxt); \ 2049 } while (0) 2050 2051 #define SHRINK if ((ctxt->progressive == 0) && \ 2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2054 xmlSHRINK (ctxt); 2055 2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2057 xmlParserInputShrink(ctxt->input); 2058 if ((*ctxt->input->cur == 0) && 2059 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2060 xmlPopInput(ctxt); 2061 } 2062 2063 #define GROW if ((ctxt->progressive == 0) && \ 2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2065 xmlGROW (ctxt); 2066 2067 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2068 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2069 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2070 2071 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2072 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2073 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2074 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2076 ctxt->instate = XML_PARSER_EOF; 2077 } 2078 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2079 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2081 xmlPopInput(ctxt); 2082 } 2083 2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2085 2086 #define NEXT xmlNextChar(ctxt) 2087 2088 #define NEXT1 { \ 2089 ctxt->input->col++; \ 2090 ctxt->input->cur++; \ 2091 ctxt->nbChars++; \ 2092 if (*ctxt->input->cur == 0) \ 2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2094 } 2095 2096 #define NEXTL(l) do { \ 2097 if (*(ctxt->input->cur) == '\n') { \ 2098 ctxt->input->line++; ctxt->input->col = 1; \ 2099 } else ctxt->input->col++; \ 2100 ctxt->input->cur += l; \ 2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2102 } while (0) 2103 2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2106 2107 #define COPY_BUF(l,b,i,v) \ 2108 if (l == 1) b[i++] = (xmlChar) v; \ 2109 else i += xmlCopyCharMultiByte(&b[i],v) 2110 2111 /** 2112 * xmlSkipBlankChars: 2113 * @ctxt: the XML parser context 2114 * 2115 * skip all blanks character found at that point in the input streams. 2116 * It pops up finished entities in the process if allowable at that point. 2117 * 2118 * Returns the number of space chars skipped 2119 */ 2120 2121 int 2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2123 int res = 0; 2124 2125 /* 2126 * It's Okay to use CUR/NEXT here since all the blanks are on 2127 * the ASCII range. 2128 */ 2129 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2130 const xmlChar *cur; 2131 /* 2132 * if we are in the document content, go really fast 2133 */ 2134 cur = ctxt->input->cur; 2135 while (IS_BLANK_CH(*cur)) { 2136 if (*cur == '\n') { 2137 ctxt->input->line++; ctxt->input->col = 1; 2138 } else { 2139 ctxt->input->col++; 2140 } 2141 cur++; 2142 res++; 2143 if (*cur == 0) { 2144 ctxt->input->cur = cur; 2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2146 cur = ctxt->input->cur; 2147 } 2148 } 2149 ctxt->input->cur = cur; 2150 } else { 2151 int cur; 2152 do { 2153 cur = CUR; 2154 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2155 NEXT; 2156 cur = CUR; 2157 res++; 2158 } 2159 while ((cur == 0) && (ctxt->inputNr > 1) && 2160 (ctxt->instate != XML_PARSER_COMMENT)) { 2161 xmlPopInput(ctxt); 2162 cur = CUR; 2163 } 2164 /* 2165 * Need to handle support of entities branching here 2166 */ 2167 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2168 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2169 } 2170 return(res); 2171 } 2172 2173 /************************************************************************ 2174 * * 2175 * Commodity functions to handle entities * 2176 * * 2177 ************************************************************************/ 2178 2179 /** 2180 * xmlPopInput: 2181 * @ctxt: an XML parser context 2182 * 2183 * xmlPopInput: the current input pointed by ctxt->input came to an end 2184 * pop it and return the next char. 2185 * 2186 * Returns the current xmlChar in the parser context 2187 */ 2188 xmlChar 2189 xmlPopInput(xmlParserCtxtPtr ctxt) { 2190 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2191 if (xmlParserDebugEntities) 2192 xmlGenericError(xmlGenericErrorContext, 2193 "Popping input %d\n", ctxt->inputNr); 2194 xmlFreeInputStream(inputPop(ctxt)); 2195 if ((*ctxt->input->cur == 0) && 2196 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2197 return(xmlPopInput(ctxt)); 2198 return(CUR); 2199 } 2200 2201 /** 2202 * xmlPushInput: 2203 * @ctxt: an XML parser context 2204 * @input: an XML parser input fragment (entity, XML fragment ...). 2205 * 2206 * xmlPushInput: switch to a new input stream which is stacked on top 2207 * of the previous one(s). 2208 * Returns -1 in case of error or the index in the input stack 2209 */ 2210 int 2211 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2212 int ret; 2213 if (input == NULL) return(-1); 2214 2215 if (xmlParserDebugEntities) { 2216 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2217 xmlGenericError(xmlGenericErrorContext, 2218 "%s(%d): ", ctxt->input->filename, 2219 ctxt->input->line); 2220 xmlGenericError(xmlGenericErrorContext, 2221 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2222 } 2223 ret = inputPush(ctxt, input); 2224 if (ctxt->instate == XML_PARSER_EOF) 2225 return(-1); 2226 GROW; 2227 return(ret); 2228 } 2229 2230 /** 2231 * xmlParseCharRef: 2232 * @ctxt: an XML parser context 2233 * 2234 * parse Reference declarations 2235 * 2236 * [66] CharRef ::= '&#' [0-9]+ ';' | 2237 * '&#x' [0-9a-fA-F]+ ';' 2238 * 2239 * [ WFC: Legal Character ] 2240 * Characters referred to using character references must match the 2241 * production for Char. 2242 * 2243 * Returns the value parsed (as an int), 0 in case of error 2244 */ 2245 int 2246 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2247 unsigned int val = 0; 2248 int count = 0; 2249 unsigned int outofrange = 0; 2250 2251 /* 2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2253 */ 2254 if ((RAW == '&') && (NXT(1) == '#') && 2255 (NXT(2) == 'x')) { 2256 SKIP(3); 2257 GROW; 2258 while (RAW != ';') { /* loop blocked by count */ 2259 if (count++ > 20) { 2260 count = 0; 2261 GROW; 2262 if (ctxt->instate == XML_PARSER_EOF) 2263 return(0); 2264 } 2265 if ((RAW >= '0') && (RAW <= '9')) 2266 val = val * 16 + (CUR - '0'); 2267 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2268 val = val * 16 + (CUR - 'a') + 10; 2269 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2270 val = val * 16 + (CUR - 'A') + 10; 2271 else { 2272 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2273 val = 0; 2274 break; 2275 } 2276 if (val > 0x10FFFF) 2277 outofrange = val; 2278 2279 NEXT; 2280 count++; 2281 } 2282 if (RAW == ';') { 2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2284 ctxt->input->col++; 2285 ctxt->nbChars ++; 2286 ctxt->input->cur++; 2287 } 2288 } else if ((RAW == '&') && (NXT(1) == '#')) { 2289 SKIP(2); 2290 GROW; 2291 while (RAW != ';') { /* loop blocked by count */ 2292 if (count++ > 20) { 2293 count = 0; 2294 GROW; 2295 if (ctxt->instate == XML_PARSER_EOF) 2296 return(0); 2297 } 2298 if ((RAW >= '0') && (RAW <= '9')) 2299 val = val * 10 + (CUR - '0'); 2300 else { 2301 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2302 val = 0; 2303 break; 2304 } 2305 if (val > 0x10FFFF) 2306 outofrange = val; 2307 2308 NEXT; 2309 count++; 2310 } 2311 if (RAW == ';') { 2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2313 ctxt->input->col++; 2314 ctxt->nbChars ++; 2315 ctxt->input->cur++; 2316 } 2317 } else { 2318 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2319 } 2320 2321 /* 2322 * [ WFC: Legal Character ] 2323 * Characters referred to using character references must match the 2324 * production for Char. 2325 */ 2326 if ((IS_CHAR(val) && (outofrange == 0))) { 2327 return(val); 2328 } else { 2329 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2330 "xmlParseCharRef: invalid xmlChar value %d\n", 2331 val); 2332 } 2333 return(0); 2334 } 2335 2336 /** 2337 * xmlParseStringCharRef: 2338 * @ctxt: an XML parser context 2339 * @str: a pointer to an index in the string 2340 * 2341 * parse Reference declarations, variant parsing from a string rather 2342 * than an an input flow. 2343 * 2344 * [66] CharRef ::= '&#' [0-9]+ ';' | 2345 * '&#x' [0-9a-fA-F]+ ';' 2346 * 2347 * [ WFC: Legal Character ] 2348 * Characters referred to using character references must match the 2349 * production for Char. 2350 * 2351 * Returns the value parsed (as an int), 0 in case of error, str will be 2352 * updated to the current value of the index 2353 */ 2354 static int 2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2356 const xmlChar *ptr; 2357 xmlChar cur; 2358 unsigned int val = 0; 2359 unsigned int outofrange = 0; 2360 2361 if ((str == NULL) || (*str == NULL)) return(0); 2362 ptr = *str; 2363 cur = *ptr; 2364 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2365 ptr += 3; 2366 cur = *ptr; 2367 while (cur != ';') { /* Non input consuming loop */ 2368 if ((cur >= '0') && (cur <= '9')) 2369 val = val * 16 + (cur - '0'); 2370 else if ((cur >= 'a') && (cur <= 'f')) 2371 val = val * 16 + (cur - 'a') + 10; 2372 else if ((cur >= 'A') && (cur <= 'F')) 2373 val = val * 16 + (cur - 'A') + 10; 2374 else { 2375 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2376 val = 0; 2377 break; 2378 } 2379 if (val > 0x10FFFF) 2380 outofrange = val; 2381 2382 ptr++; 2383 cur = *ptr; 2384 } 2385 if (cur == ';') 2386 ptr++; 2387 } else if ((cur == '&') && (ptr[1] == '#')){ 2388 ptr += 2; 2389 cur = *ptr; 2390 while (cur != ';') { /* Non input consuming loops */ 2391 if ((cur >= '0') && (cur <= '9')) 2392 val = val * 10 + (cur - '0'); 2393 else { 2394 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2395 val = 0; 2396 break; 2397 } 2398 if (val > 0x10FFFF) 2399 outofrange = val; 2400 2401 ptr++; 2402 cur = *ptr; 2403 } 2404 if (cur == ';') 2405 ptr++; 2406 } else { 2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2408 return(0); 2409 } 2410 *str = ptr; 2411 2412 /* 2413 * [ WFC: Legal Character ] 2414 * Characters referred to using character references must match the 2415 * production for Char. 2416 */ 2417 if ((IS_CHAR(val) && (outofrange == 0))) { 2418 return(val); 2419 } else { 2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2421 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2422 val); 2423 } 2424 return(0); 2425 } 2426 2427 /** 2428 * xmlNewBlanksWrapperInputStream: 2429 * @ctxt: an XML parser context 2430 * @entity: an Entity pointer 2431 * 2432 * Create a new input stream for wrapping 2433 * blanks around a PEReference 2434 * 2435 * Returns the new input stream or NULL 2436 */ 2437 2438 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2439 2440 static xmlParserInputPtr 2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2442 xmlParserInputPtr input; 2443 xmlChar *buffer; 2444 size_t length; 2445 if (entity == NULL) { 2446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2447 "xmlNewBlanksWrapperInputStream entity\n"); 2448 return(NULL); 2449 } 2450 if (xmlParserDebugEntities) 2451 xmlGenericError(xmlGenericErrorContext, 2452 "new blanks wrapper for entity: %s\n", entity->name); 2453 input = xmlNewInputStream(ctxt); 2454 if (input == NULL) { 2455 return(NULL); 2456 } 2457 length = xmlStrlen(entity->name) + 5; 2458 buffer = xmlMallocAtomic(length); 2459 if (buffer == NULL) { 2460 xmlErrMemory(ctxt, NULL); 2461 xmlFree(input); 2462 return(NULL); 2463 } 2464 buffer [0] = ' '; 2465 buffer [1] = '%'; 2466 buffer [length-3] = ';'; 2467 buffer [length-2] = ' '; 2468 buffer [length-1] = 0; 2469 memcpy(buffer + 2, entity->name, length - 5); 2470 input->free = deallocblankswrapper; 2471 input->base = buffer; 2472 input->cur = buffer; 2473 input->length = length; 2474 input->end = &buffer[length]; 2475 return(input); 2476 } 2477 2478 /** 2479 * xmlParserHandlePEReference: 2480 * @ctxt: the parser context 2481 * 2482 * [69] PEReference ::= '%' Name ';' 2483 * 2484 * [ WFC: No Recursion ] 2485 * A parsed entity must not contain a recursive 2486 * reference to itself, either directly or indirectly. 2487 * 2488 * [ WFC: Entity Declared ] 2489 * In a document without any DTD, a document with only an internal DTD 2490 * subset which contains no parameter entity references, or a document 2491 * with "standalone='yes'", ... ... The declaration of a parameter 2492 * entity must precede any reference to it... 2493 * 2494 * [ VC: Entity Declared ] 2495 * In a document with an external subset or external parameter entities 2496 * with "standalone='no'", ... ... The declaration of a parameter entity 2497 * must precede any reference to it... 2498 * 2499 * [ WFC: In DTD ] 2500 * Parameter-entity references may only appear in the DTD. 2501 * NOTE: misleading but this is handled. 2502 * 2503 * A PEReference may have been detected in the current input stream 2504 * the handling is done accordingly to 2505 * http://www.w3.org/TR/REC-xml#entproc 2506 * i.e. 2507 * - Included in literal in entity values 2508 * - Included as Parameter Entity reference within DTDs 2509 */ 2510 void 2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2512 const xmlChar *name; 2513 xmlEntityPtr entity = NULL; 2514 xmlParserInputPtr input; 2515 2516 if (RAW != '%') return; 2517 switch(ctxt->instate) { 2518 case XML_PARSER_CDATA_SECTION: 2519 return; 2520 case XML_PARSER_COMMENT: 2521 return; 2522 case XML_PARSER_START_TAG: 2523 return; 2524 case XML_PARSER_END_TAG: 2525 return; 2526 case XML_PARSER_EOF: 2527 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2528 return; 2529 case XML_PARSER_PROLOG: 2530 case XML_PARSER_START: 2531 case XML_PARSER_MISC: 2532 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2533 return; 2534 case XML_PARSER_ENTITY_DECL: 2535 case XML_PARSER_CONTENT: 2536 case XML_PARSER_ATTRIBUTE_VALUE: 2537 case XML_PARSER_PI: 2538 case XML_PARSER_SYSTEM_LITERAL: 2539 case XML_PARSER_PUBLIC_LITERAL: 2540 /* we just ignore it there */ 2541 return; 2542 case XML_PARSER_EPILOG: 2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2544 return; 2545 case XML_PARSER_ENTITY_VALUE: 2546 /* 2547 * NOTE: in the case of entity values, we don't do the 2548 * substitution here since we need the literal 2549 * entity value to be able to save the internal 2550 * subset of the document. 2551 * This will be handled by xmlStringDecodeEntities 2552 */ 2553 return; 2554 case XML_PARSER_DTD: 2555 /* 2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2557 * In the internal DTD subset, parameter-entity references 2558 * can occur only where markup declarations can occur, not 2559 * within markup declarations. 2560 * In that case this is handled in xmlParseMarkupDecl 2561 */ 2562 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2563 return; 2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2565 return; 2566 break; 2567 case XML_PARSER_IGNORE: 2568 return; 2569 } 2570 2571 NEXT; 2572 name = xmlParseName(ctxt); 2573 if (xmlParserDebugEntities) 2574 xmlGenericError(xmlGenericErrorContext, 2575 "PEReference: %s\n", name); 2576 if (name == NULL) { 2577 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2578 } else { 2579 if (RAW == ';') { 2580 NEXT; 2581 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2582 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2583 if (ctxt->instate == XML_PARSER_EOF) 2584 return; 2585 if (entity == NULL) { 2586 2587 /* 2588 * [ WFC: Entity Declared ] 2589 * In a document without any DTD, a document with only an 2590 * internal DTD subset which contains no parameter entity 2591 * references, or a document with "standalone='yes'", ... 2592 * ... The declaration of a parameter entity must precede 2593 * any reference to it... 2594 */ 2595 if ((ctxt->standalone == 1) || 2596 ((ctxt->hasExternalSubset == 0) && 2597 (ctxt->hasPErefs == 0))) { 2598 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2599 "PEReference: %%%s; not found\n", name); 2600 } else { 2601 /* 2602 * [ VC: Entity Declared ] 2603 * In a document with an external subset or external 2604 * parameter entities with "standalone='no'", ... 2605 * ... The declaration of a parameter entity must precede 2606 * any reference to it... 2607 */ 2608 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2609 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2610 "PEReference: %%%s; not found\n", 2611 name, NULL); 2612 } else 2613 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2614 "PEReference: %%%s; not found\n", 2615 name, NULL); 2616 ctxt->valid = 0; 2617 } 2618 xmlParserEntityCheck(ctxt, 0, NULL, 0); 2619 } else if (ctxt->input->free != deallocblankswrapper) { 2620 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2621 if (xmlPushInput(ctxt, input) < 0) 2622 return; 2623 } else { 2624 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2625 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2626 xmlChar start[4]; 2627 xmlCharEncoding enc; 2628 2629 /* 2630 * Note: external parameter entities will not be loaded, it 2631 * is not required for a non-validating parser, unless the 2632 * option of validating, or substituting entities were 2633 * given. Doing so is far more secure as the parser will 2634 * only process data coming from the document entity by 2635 * default. 2636 */ 2637 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2638 ((ctxt->options & XML_PARSE_NOENT) == 0) && 2639 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2640 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 2641 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 2642 (ctxt->replaceEntities == 0) && 2643 (ctxt->validate == 0)) 2644 return; 2645 2646 /* 2647 * handle the extra spaces added before and after 2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2649 * this is done independently. 2650 */ 2651 input = xmlNewEntityInputStream(ctxt, entity); 2652 if (xmlPushInput(ctxt, input) < 0) 2653 return; 2654 2655 /* 2656 * Get the 4 first bytes and decode the charset 2657 * if enc != XML_CHAR_ENCODING_NONE 2658 * plug some encoding conversion routines. 2659 * Note that, since we may have some non-UTF8 2660 * encoding (like UTF16, bug 135229), the 'length' 2661 * is not known, but we can calculate based upon 2662 * the amount of data in the buffer. 2663 */ 2664 GROW 2665 if (ctxt->instate == XML_PARSER_EOF) 2666 return; 2667 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2668 start[0] = RAW; 2669 start[1] = NXT(1); 2670 start[2] = NXT(2); 2671 start[3] = NXT(3); 2672 enc = xmlDetectCharEncoding(start, 4); 2673 if (enc != XML_CHAR_ENCODING_NONE) { 2674 xmlSwitchEncoding(ctxt, enc); 2675 } 2676 } 2677 2678 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2679 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2680 (IS_BLANK_CH(NXT(5)))) { 2681 xmlParseTextDecl(ctxt); 2682 } 2683 } else { 2684 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2685 "PEReference: %s is not a parameter entity\n", 2686 name); 2687 } 2688 } 2689 } else { 2690 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2691 } 2692 } 2693 } 2694 2695 /* 2696 * Macro used to grow the current buffer. 2697 * buffer##_size is expected to be a size_t 2698 * mem_error: is expected to handle memory allocation failures 2699 */ 2700 #define growBuffer(buffer, n) { \ 2701 xmlChar *tmp; \ 2702 size_t new_size = buffer##_size * 2 + n; \ 2703 if (new_size < buffer##_size) goto mem_error; \ 2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2705 if (tmp == NULL) goto mem_error; \ 2706 buffer = tmp; \ 2707 buffer##_size = new_size; \ 2708 } 2709 2710 /** 2711 * xmlStringLenDecodeEntities: 2712 * @ctxt: the parser context 2713 * @str: the input string 2714 * @len: the string length 2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2716 * @end: an end marker xmlChar, 0 if none 2717 * @end2: an end marker xmlChar, 0 if none 2718 * @end3: an end marker xmlChar, 0 if none 2719 * 2720 * Takes a entity string content and process to do the adequate substitutions. 2721 * 2722 * [67] Reference ::= EntityRef | CharRef 2723 * 2724 * [69] PEReference ::= '%' Name ';' 2725 * 2726 * Returns A newly allocated string with the substitution done. The caller 2727 * must deallocate it ! 2728 */ 2729 xmlChar * 2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2731 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2732 xmlChar *buffer = NULL; 2733 size_t buffer_size = 0; 2734 size_t nbchars = 0; 2735 2736 xmlChar *current = NULL; 2737 xmlChar *rep = NULL; 2738 const xmlChar *last; 2739 xmlEntityPtr ent; 2740 int c,l; 2741 2742 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2743 return(NULL); 2744 last = str + len; 2745 2746 if (((ctxt->depth > 40) && 2747 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2748 (ctxt->depth > 1024)) { 2749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2750 return(NULL); 2751 } 2752 2753 /* 2754 * allocate a translation buffer. 2755 */ 2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2757 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2758 if (buffer == NULL) goto mem_error; 2759 2760 /* 2761 * OK loop until we reach one of the ending char or a size limit. 2762 * we are operating on already parsed values. 2763 */ 2764 if (str < last) 2765 c = CUR_SCHAR(str, l); 2766 else 2767 c = 0; 2768 while ((c != 0) && (c != end) && /* non input consuming loop */ 2769 (c != end2) && (c != end3)) { 2770 2771 if (c == 0) break; 2772 if ((c == '&') && (str[1] == '#')) { 2773 int val = xmlParseStringCharRef(ctxt, &str); 2774 if (val != 0) { 2775 COPY_BUF(0,buffer,nbchars,val); 2776 } 2777 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2778 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2779 } 2780 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2781 if (xmlParserDebugEntities) 2782 xmlGenericError(xmlGenericErrorContext, 2783 "String decoding Entity Reference: %.30s\n", 2784 str); 2785 ent = xmlParseStringEntityRef(ctxt, &str); 2786 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2787 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2788 goto int_error; 2789 xmlParserEntityCheck(ctxt, 0, ent, 0); 2790 if (ent != NULL) 2791 ctxt->nbentities += ent->checked / 2; 2792 if ((ent != NULL) && 2793 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2794 if (ent->content != NULL) { 2795 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2798 } 2799 } else { 2800 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2801 "predefined entity has no content\n"); 2802 } 2803 } else if ((ent != NULL) && (ent->content != NULL)) { 2804 ctxt->depth++; 2805 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2806 0, 0, 0); 2807 ctxt->depth--; 2808 2809 if (rep != NULL) { 2810 current = rep; 2811 while (*current != 0) { /* non input consuming loop */ 2812 buffer[nbchars++] = *current++; 2813 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2814 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2815 goto int_error; 2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2817 } 2818 } 2819 xmlFree(rep); 2820 rep = NULL; 2821 } 2822 } else if (ent != NULL) { 2823 int i = xmlStrlen(ent->name); 2824 const xmlChar *cur = ent->name; 2825 2826 buffer[nbchars++] = '&'; 2827 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2828 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2829 } 2830 for (;i > 0;i--) 2831 buffer[nbchars++] = *cur++; 2832 buffer[nbchars++] = ';'; 2833 } 2834 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2835 if (xmlParserDebugEntities) 2836 xmlGenericError(xmlGenericErrorContext, 2837 "String decoding PE Reference: %.30s\n", str); 2838 ent = xmlParseStringPEReference(ctxt, &str); 2839 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2840 goto int_error; 2841 xmlParserEntityCheck(ctxt, 0, ent, 0); 2842 if (ent != NULL) 2843 ctxt->nbentities += ent->checked / 2; 2844 if (ent != NULL) { 2845 if (ent->content == NULL) { 2846 xmlLoadEntityContent(ctxt, ent); 2847 } 2848 ctxt->depth++; 2849 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2850 0, 0, 0); 2851 ctxt->depth--; 2852 if (rep != NULL) { 2853 current = rep; 2854 while (*current != 0) { /* non input consuming loop */ 2855 buffer[nbchars++] = *current++; 2856 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2857 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2858 goto int_error; 2859 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2860 } 2861 } 2862 xmlFree(rep); 2863 rep = NULL; 2864 } 2865 } 2866 } else { 2867 COPY_BUF(l,buffer,nbchars,c); 2868 str += l; 2869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2870 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2871 } 2872 } 2873 if (str < last) 2874 c = CUR_SCHAR(str, l); 2875 else 2876 c = 0; 2877 } 2878 buffer[nbchars] = 0; 2879 return(buffer); 2880 2881 mem_error: 2882 xmlErrMemory(ctxt, NULL); 2883 int_error: 2884 if (rep != NULL) 2885 xmlFree(rep); 2886 if (buffer != NULL) 2887 xmlFree(buffer); 2888 return(NULL); 2889 } 2890 2891 /** 2892 * xmlStringDecodeEntities: 2893 * @ctxt: the parser context 2894 * @str: the input string 2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2896 * @end: an end marker xmlChar, 0 if none 2897 * @end2: an end marker xmlChar, 0 if none 2898 * @end3: an end marker xmlChar, 0 if none 2899 * 2900 * Takes a entity string content and process to do the adequate substitutions. 2901 * 2902 * [67] Reference ::= EntityRef | CharRef 2903 * 2904 * [69] PEReference ::= '%' Name ';' 2905 * 2906 * Returns A newly allocated string with the substitution done. The caller 2907 * must deallocate it ! 2908 */ 2909 xmlChar * 2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2911 xmlChar end, xmlChar end2, xmlChar end3) { 2912 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2913 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2914 end, end2, end3)); 2915 } 2916 2917 /************************************************************************ 2918 * * 2919 * Commodity functions, cleanup needed ? * 2920 * * 2921 ************************************************************************/ 2922 2923 /** 2924 * areBlanks: 2925 * @ctxt: an XML parser context 2926 * @str: a xmlChar * 2927 * @len: the size of @str 2928 * @blank_chars: we know the chars are blanks 2929 * 2930 * Is this a sequence of blank chars that one can ignore ? 2931 * 2932 * Returns 1 if ignorable 0 otherwise. 2933 */ 2934 2935 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2936 int blank_chars) { 2937 int i, ret; 2938 xmlNodePtr lastChild; 2939 2940 /* 2941 * Don't spend time trying to differentiate them, the same callback is 2942 * used ! 2943 */ 2944 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2945 return(0); 2946 2947 /* 2948 * Check for xml:space value. 2949 */ 2950 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2951 (*(ctxt->space) == -2)) 2952 return(0); 2953 2954 /* 2955 * Check that the string is made of blanks 2956 */ 2957 if (blank_chars == 0) { 2958 for (i = 0;i < len;i++) 2959 if (!(IS_BLANK_CH(str[i]))) return(0); 2960 } 2961 2962 /* 2963 * Look if the element is mixed content in the DTD if available 2964 */ 2965 if (ctxt->node == NULL) return(0); 2966 if (ctxt->myDoc != NULL) { 2967 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2968 if (ret == 0) return(1); 2969 if (ret == 1) return(0); 2970 } 2971 2972 /* 2973 * Otherwise, heuristic :-\ 2974 */ 2975 if ((RAW != '<') && (RAW != 0xD)) return(0); 2976 if ((ctxt->node->children == NULL) && 2977 (RAW == '<') && (NXT(1) == '/')) return(0); 2978 2979 lastChild = xmlGetLastChild(ctxt->node); 2980 if (lastChild == NULL) { 2981 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2982 (ctxt->node->content != NULL)) return(0); 2983 } else if (xmlNodeIsText(lastChild)) 2984 return(0); 2985 else if ((ctxt->node->children != NULL) && 2986 (xmlNodeIsText(ctxt->node->children))) 2987 return(0); 2988 return(1); 2989 } 2990 2991 /************************************************************************ 2992 * * 2993 * Extra stuff for namespace support * 2994 * Relates to http://www.w3.org/TR/WD-xml-names * 2995 * * 2996 ************************************************************************/ 2997 2998 /** 2999 * xmlSplitQName: 3000 * @ctxt: an XML parser context 3001 * @name: an XML parser context 3002 * @prefix: a xmlChar ** 3003 * 3004 * parse an UTF8 encoded XML qualified name string 3005 * 3006 * [NS 5] QName ::= (Prefix ':')? LocalPart 3007 * 3008 * [NS 6] Prefix ::= NCName 3009 * 3010 * [NS 7] LocalPart ::= NCName 3011 * 3012 * Returns the local part, and prefix is updated 3013 * to get the Prefix if any. 3014 */ 3015 3016 xmlChar * 3017 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3018 xmlChar buf[XML_MAX_NAMELEN + 5]; 3019 xmlChar *buffer = NULL; 3020 int len = 0; 3021 int max = XML_MAX_NAMELEN; 3022 xmlChar *ret = NULL; 3023 const xmlChar *cur = name; 3024 int c; 3025 3026 if (prefix == NULL) return(NULL); 3027 *prefix = NULL; 3028 3029 if (cur == NULL) return(NULL); 3030 3031 #ifndef XML_XML_NAMESPACE 3032 /* xml: prefix is not really a namespace */ 3033 if ((cur[0] == 'x') && (cur[1] == 'm') && 3034 (cur[2] == 'l') && (cur[3] == ':')) 3035 return(xmlStrdup(name)); 3036 #endif 3037 3038 /* nasty but well=formed */ 3039 if (cur[0] == ':') 3040 return(xmlStrdup(name)); 3041 3042 c = *cur++; 3043 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3044 buf[len++] = c; 3045 c = *cur++; 3046 } 3047 if (len >= max) { 3048 /* 3049 * Okay someone managed to make a huge name, so he's ready to pay 3050 * for the processing speed. 3051 */ 3052 max = len * 2; 3053 3054 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3055 if (buffer == NULL) { 3056 xmlErrMemory(ctxt, NULL); 3057 return(NULL); 3058 } 3059 memcpy(buffer, buf, len); 3060 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3061 if (len + 10 > max) { 3062 xmlChar *tmp; 3063 3064 max *= 2; 3065 tmp = (xmlChar *) xmlRealloc(buffer, 3066 max * sizeof(xmlChar)); 3067 if (tmp == NULL) { 3068 xmlFree(buffer); 3069 xmlErrMemory(ctxt, NULL); 3070 return(NULL); 3071 } 3072 buffer = tmp; 3073 } 3074 buffer[len++] = c; 3075 c = *cur++; 3076 } 3077 buffer[len] = 0; 3078 } 3079 3080 if ((c == ':') && (*cur == 0)) { 3081 if (buffer != NULL) 3082 xmlFree(buffer); 3083 *prefix = NULL; 3084 return(xmlStrdup(name)); 3085 } 3086 3087 if (buffer == NULL) 3088 ret = xmlStrndup(buf, len); 3089 else { 3090 ret = buffer; 3091 buffer = NULL; 3092 max = XML_MAX_NAMELEN; 3093 } 3094 3095 3096 if (c == ':') { 3097 c = *cur; 3098 *prefix = ret; 3099 if (c == 0) { 3100 return(xmlStrndup(BAD_CAST "", 0)); 3101 } 3102 len = 0; 3103 3104 /* 3105 * Check that the first character is proper to start 3106 * a new name 3107 */ 3108 if (!(((c >= 0x61) && (c <= 0x7A)) || 3109 ((c >= 0x41) && (c <= 0x5A)) || 3110 (c == '_') || (c == ':'))) { 3111 int l; 3112 int first = CUR_SCHAR(cur, l); 3113 3114 if (!IS_LETTER(first) && (first != '_')) { 3115 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3116 "Name %s is not XML Namespace compliant\n", 3117 name); 3118 } 3119 } 3120 cur++; 3121 3122 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3123 buf[len++] = c; 3124 c = *cur++; 3125 } 3126 if (len >= max) { 3127 /* 3128 * Okay someone managed to make a huge name, so he's ready to pay 3129 * for the processing speed. 3130 */ 3131 max = len * 2; 3132 3133 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3134 if (buffer == NULL) { 3135 xmlErrMemory(ctxt, NULL); 3136 return(NULL); 3137 } 3138 memcpy(buffer, buf, len); 3139 while (c != 0) { /* tested bigname2.xml */ 3140 if (len + 10 > max) { 3141 xmlChar *tmp; 3142 3143 max *= 2; 3144 tmp = (xmlChar *) xmlRealloc(buffer, 3145 max * sizeof(xmlChar)); 3146 if (tmp == NULL) { 3147 xmlErrMemory(ctxt, NULL); 3148 xmlFree(buffer); 3149 return(NULL); 3150 } 3151 buffer = tmp; 3152 } 3153 buffer[len++] = c; 3154 c = *cur++; 3155 } 3156 buffer[len] = 0; 3157 } 3158 3159 if (buffer == NULL) 3160 ret = xmlStrndup(buf, len); 3161 else { 3162 ret = buffer; 3163 } 3164 } 3165 3166 return(ret); 3167 } 3168 3169 /************************************************************************ 3170 * * 3171 * The parser itself * 3172 * Relates to http://www.w3.org/TR/REC-xml * 3173 * * 3174 ************************************************************************/ 3175 3176 /************************************************************************ 3177 * * 3178 * Routines to parse Name, NCName and NmToken * 3179 * * 3180 ************************************************************************/ 3181 #ifdef DEBUG 3182 static unsigned long nbParseName = 0; 3183 static unsigned long nbParseNmToken = 0; 3184 static unsigned long nbParseNCName = 0; 3185 static unsigned long nbParseNCNameComplex = 0; 3186 static unsigned long nbParseNameComplex = 0; 3187 static unsigned long nbParseStringName = 0; 3188 #endif 3189 3190 /* 3191 * The two following functions are related to the change of accepted 3192 * characters for Name and NmToken in the Revision 5 of XML-1.0 3193 * They correspond to the modified production [4] and the new production [4a] 3194 * changes in that revision. Also note that the macros used for the 3195 * productions Letter, Digit, CombiningChar and Extender are not needed 3196 * anymore. 3197 * We still keep compatibility to pre-revision5 parsing semantic if the 3198 * new XML_PARSE_OLD10 option is given to the parser. 3199 */ 3200 static int 3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3203 /* 3204 * Use the new checks of production [4] [4a] amd [5] of the 3205 * Update 5 of XML-1.0 3206 */ 3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3208 (((c >= 'a') && (c <= 'z')) || 3209 ((c >= 'A') && (c <= 'Z')) || 3210 (c == '_') || (c == ':') || 3211 ((c >= 0xC0) && (c <= 0xD6)) || 3212 ((c >= 0xD8) && (c <= 0xF6)) || 3213 ((c >= 0xF8) && (c <= 0x2FF)) || 3214 ((c >= 0x370) && (c <= 0x37D)) || 3215 ((c >= 0x37F) && (c <= 0x1FFF)) || 3216 ((c >= 0x200C) && (c <= 0x200D)) || 3217 ((c >= 0x2070) && (c <= 0x218F)) || 3218 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3219 ((c >= 0x3001) && (c <= 0xD7FF)) || 3220 ((c >= 0xF900) && (c <= 0xFDCF)) || 3221 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3222 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3223 return(1); 3224 } else { 3225 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3226 return(1); 3227 } 3228 return(0); 3229 } 3230 3231 static int 3232 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3233 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3234 /* 3235 * Use the new checks of production [4] [4a] amd [5] of the 3236 * Update 5 of XML-1.0 3237 */ 3238 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3239 (((c >= 'a') && (c <= 'z')) || 3240 ((c >= 'A') && (c <= 'Z')) || 3241 ((c >= '0') && (c <= '9')) || /* !start */ 3242 (c == '_') || (c == ':') || 3243 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3244 ((c >= 0xC0) && (c <= 0xD6)) || 3245 ((c >= 0xD8) && (c <= 0xF6)) || 3246 ((c >= 0xF8) && (c <= 0x2FF)) || 3247 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3248 ((c >= 0x370) && (c <= 0x37D)) || 3249 ((c >= 0x37F) && (c <= 0x1FFF)) || 3250 ((c >= 0x200C) && (c <= 0x200D)) || 3251 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3252 ((c >= 0x2070) && (c <= 0x218F)) || 3253 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3254 ((c >= 0x3001) && (c <= 0xD7FF)) || 3255 ((c >= 0xF900) && (c <= 0xFDCF)) || 3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3257 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3258 return(1); 3259 } else { 3260 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3261 (c == '.') || (c == '-') || 3262 (c == '_') || (c == ':') || 3263 (IS_COMBINING(c)) || 3264 (IS_EXTENDER(c))) 3265 return(1); 3266 } 3267 return(0); 3268 } 3269 3270 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3271 int *len, int *alloc, int normalize); 3272 3273 static const xmlChar * 3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3275 int len = 0, l; 3276 int c; 3277 int count = 0; 3278 3279 #ifdef DEBUG 3280 nbParseNameComplex++; 3281 #endif 3282 3283 /* 3284 * Handler for more complex cases 3285 */ 3286 GROW; 3287 if (ctxt->instate == XML_PARSER_EOF) 3288 return(NULL); 3289 c = CUR_CHAR(l); 3290 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3291 /* 3292 * Use the new checks of production [4] [4a] amd [5] of the 3293 * Update 5 of XML-1.0 3294 */ 3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3296 (!(((c >= 'a') && (c <= 'z')) || 3297 ((c >= 'A') && (c <= 'Z')) || 3298 (c == '_') || (c == ':') || 3299 ((c >= 0xC0) && (c <= 0xD6)) || 3300 ((c >= 0xD8) && (c <= 0xF6)) || 3301 ((c >= 0xF8) && (c <= 0x2FF)) || 3302 ((c >= 0x370) && (c <= 0x37D)) || 3303 ((c >= 0x37F) && (c <= 0x1FFF)) || 3304 ((c >= 0x200C) && (c <= 0x200D)) || 3305 ((c >= 0x2070) && (c <= 0x218F)) || 3306 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3307 ((c >= 0x3001) && (c <= 0xD7FF)) || 3308 ((c >= 0xF900) && (c <= 0xFDCF)) || 3309 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3310 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3311 return(NULL); 3312 } 3313 len += l; 3314 NEXTL(l); 3315 c = CUR_CHAR(l); 3316 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3317 (((c >= 'a') && (c <= 'z')) || 3318 ((c >= 'A') && (c <= 'Z')) || 3319 ((c >= '0') && (c <= '9')) || /* !start */ 3320 (c == '_') || (c == ':') || 3321 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3322 ((c >= 0xC0) && (c <= 0xD6)) || 3323 ((c >= 0xD8) && (c <= 0xF6)) || 3324 ((c >= 0xF8) && (c <= 0x2FF)) || 3325 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3326 ((c >= 0x370) && (c <= 0x37D)) || 3327 ((c >= 0x37F) && (c <= 0x1FFF)) || 3328 ((c >= 0x200C) && (c <= 0x200D)) || 3329 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3330 ((c >= 0x2070) && (c <= 0x218F)) || 3331 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3332 ((c >= 0x3001) && (c <= 0xD7FF)) || 3333 ((c >= 0xF900) && (c <= 0xFDCF)) || 3334 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3335 ((c >= 0x10000) && (c <= 0xEFFFF)) 3336 )) { 3337 if (count++ > XML_PARSER_CHUNK_SIZE) { 3338 count = 0; 3339 GROW; 3340 if (ctxt->instate == XML_PARSER_EOF) 3341 return(NULL); 3342 } 3343 len += l; 3344 NEXTL(l); 3345 c = CUR_CHAR(l); 3346 } 3347 } else { 3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3349 (!IS_LETTER(c) && (c != '_') && 3350 (c != ':'))) { 3351 return(NULL); 3352 } 3353 len += l; 3354 NEXTL(l); 3355 c = CUR_CHAR(l); 3356 3357 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3358 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3359 (c == '.') || (c == '-') || 3360 (c == '_') || (c == ':') || 3361 (IS_COMBINING(c)) || 3362 (IS_EXTENDER(c)))) { 3363 if (count++ > XML_PARSER_CHUNK_SIZE) { 3364 count = 0; 3365 GROW; 3366 if (ctxt->instate == XML_PARSER_EOF) 3367 return(NULL); 3368 } 3369 len += l; 3370 NEXTL(l); 3371 c = CUR_CHAR(l); 3372 if (c == 0) { 3373 count = 0; 3374 GROW; 3375 if (ctxt->instate == XML_PARSER_EOF) 3376 return(NULL); 3377 c = CUR_CHAR(l); 3378 } 3379 } 3380 } 3381 if ((len > XML_MAX_NAME_LENGTH) && 3382 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3383 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3384 return(NULL); 3385 } 3386 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3387 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3388 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3389 } 3390 3391 /** 3392 * xmlParseName: 3393 * @ctxt: an XML parser context 3394 * 3395 * parse an XML name. 3396 * 3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3398 * CombiningChar | Extender 3399 * 3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3401 * 3402 * [6] Names ::= Name (#x20 Name)* 3403 * 3404 * Returns the Name parsed or NULL 3405 */ 3406 3407 const xmlChar * 3408 xmlParseName(xmlParserCtxtPtr ctxt) { 3409 const xmlChar *in; 3410 const xmlChar *ret; 3411 int count = 0; 3412 3413 GROW; 3414 3415 #ifdef DEBUG 3416 nbParseName++; 3417 #endif 3418 3419 /* 3420 * Accelerator for simple ASCII names 3421 */ 3422 in = ctxt->input->cur; 3423 if (((*in >= 0x61) && (*in <= 0x7A)) || 3424 ((*in >= 0x41) && (*in <= 0x5A)) || 3425 (*in == '_') || (*in == ':')) { 3426 in++; 3427 while (((*in >= 0x61) && (*in <= 0x7A)) || 3428 ((*in >= 0x41) && (*in <= 0x5A)) || 3429 ((*in >= 0x30) && (*in <= 0x39)) || 3430 (*in == '_') || (*in == '-') || 3431 (*in == ':') || (*in == '.')) 3432 in++; 3433 if ((*in > 0) && (*in < 0x80)) { 3434 count = in - ctxt->input->cur; 3435 if ((count > XML_MAX_NAME_LENGTH) && 3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3438 return(NULL); 3439 } 3440 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3441 ctxt->input->cur = in; 3442 ctxt->nbChars += count; 3443 ctxt->input->col += count; 3444 if (ret == NULL) 3445 xmlErrMemory(ctxt, NULL); 3446 return(ret); 3447 } 3448 } 3449 /* accelerator for special cases */ 3450 return(xmlParseNameComplex(ctxt)); 3451 } 3452 3453 static const xmlChar * 3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3455 int len = 0, l; 3456 int c; 3457 int count = 0; 3458 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */ 3459 3460 #ifdef DEBUG 3461 nbParseNCNameComplex++; 3462 #endif 3463 3464 /* 3465 * Handler for more complex cases 3466 */ 3467 GROW; 3468 end = ctxt->input->cur; 3469 c = CUR_CHAR(l); 3470 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3471 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3472 return(NULL); 3473 } 3474 3475 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3476 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3477 if (count++ > XML_PARSER_CHUNK_SIZE) { 3478 if ((len > XML_MAX_NAME_LENGTH) && 3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3481 return(NULL); 3482 } 3483 count = 0; 3484 GROW; 3485 if (ctxt->instate == XML_PARSER_EOF) 3486 return(NULL); 3487 } 3488 len += l; 3489 NEXTL(l); 3490 end = ctxt->input->cur; 3491 c = CUR_CHAR(l); 3492 if (c == 0) { 3493 count = 0; 3494 GROW; 3495 if (ctxt->instate == XML_PARSER_EOF) 3496 return(NULL); 3497 end = ctxt->input->cur; 3498 c = CUR_CHAR(l); 3499 } 3500 } 3501 if ((len > XML_MAX_NAME_LENGTH) && 3502 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3504 return(NULL); 3505 } 3506 return(xmlDictLookup(ctxt->dict, end - len, len)); 3507 } 3508 3509 /** 3510 * xmlParseNCName: 3511 * @ctxt: an XML parser context 3512 * @len: length of the string parsed 3513 * 3514 * parse an XML name. 3515 * 3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3517 * CombiningChar | Extender 3518 * 3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3520 * 3521 * Returns the Name parsed or NULL 3522 */ 3523 3524 static const xmlChar * 3525 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3526 const xmlChar *in; 3527 const xmlChar *ret; 3528 int count = 0; 3529 3530 #ifdef DEBUG 3531 nbParseNCName++; 3532 #endif 3533 3534 /* 3535 * Accelerator for simple ASCII names 3536 */ 3537 in = ctxt->input->cur; 3538 if (((*in >= 0x61) && (*in <= 0x7A)) || 3539 ((*in >= 0x41) && (*in <= 0x5A)) || 3540 (*in == '_')) { 3541 in++; 3542 while (((*in >= 0x61) && (*in <= 0x7A)) || 3543 ((*in >= 0x41) && (*in <= 0x5A)) || 3544 ((*in >= 0x30) && (*in <= 0x39)) || 3545 (*in == '_') || (*in == '-') || 3546 (*in == '.')) 3547 in++; 3548 if ((*in > 0) && (*in < 0x80)) { 3549 count = in - ctxt->input->cur; 3550 if ((count > XML_MAX_NAME_LENGTH) && 3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3553 return(NULL); 3554 } 3555 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3556 ctxt->input->cur = in; 3557 ctxt->nbChars += count; 3558 ctxt->input->col += count; 3559 if (ret == NULL) { 3560 xmlErrMemory(ctxt, NULL); 3561 } 3562 return(ret); 3563 } 3564 } 3565 return(xmlParseNCNameComplex(ctxt)); 3566 } 3567 3568 /** 3569 * xmlParseNameAndCompare: 3570 * @ctxt: an XML parser context 3571 * 3572 * parse an XML name and compares for match 3573 * (specialized for endtag parsing) 3574 * 3575 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3576 * and the name for mismatch 3577 */ 3578 3579 static const xmlChar * 3580 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3581 register const xmlChar *cmp = other; 3582 register const xmlChar *in; 3583 const xmlChar *ret; 3584 3585 GROW; 3586 if (ctxt->instate == XML_PARSER_EOF) 3587 return(NULL); 3588 3589 in = ctxt->input->cur; 3590 while (*in != 0 && *in == *cmp) { 3591 ++in; 3592 ++cmp; 3593 ctxt->input->col++; 3594 } 3595 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3596 /* success */ 3597 ctxt->input->cur = in; 3598 return (const xmlChar*) 1; 3599 } 3600 /* failure (or end of input buffer), check with full function */ 3601 ret = xmlParseName (ctxt); 3602 /* strings coming from the dictionnary direct compare possible */ 3603 if (ret == other) { 3604 return (const xmlChar*) 1; 3605 } 3606 return ret; 3607 } 3608 3609 /** 3610 * xmlParseStringName: 3611 * @ctxt: an XML parser context 3612 * @str: a pointer to the string pointer (IN/OUT) 3613 * 3614 * parse an XML name. 3615 * 3616 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3617 * CombiningChar | Extender 3618 * 3619 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3620 * 3621 * [6] Names ::= Name (#x20 Name)* 3622 * 3623 * Returns the Name parsed or NULL. The @str pointer 3624 * is updated to the current location in the string. 3625 */ 3626 3627 static xmlChar * 3628 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3629 xmlChar buf[XML_MAX_NAMELEN + 5]; 3630 const xmlChar *cur = *str; 3631 int len = 0, l; 3632 int c; 3633 3634 #ifdef DEBUG 3635 nbParseStringName++; 3636 #endif 3637 3638 c = CUR_SCHAR(cur, l); 3639 if (!xmlIsNameStartChar(ctxt, c)) { 3640 return(NULL); 3641 } 3642 3643 COPY_BUF(l,buf,len,c); 3644 cur += l; 3645 c = CUR_SCHAR(cur, l); 3646 while (xmlIsNameChar(ctxt, c)) { 3647 COPY_BUF(l,buf,len,c); 3648 cur += l; 3649 c = CUR_SCHAR(cur, l); 3650 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3651 /* 3652 * Okay someone managed to make a huge name, so he's ready to pay 3653 * for the processing speed. 3654 */ 3655 xmlChar *buffer; 3656 int max = len * 2; 3657 3658 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3659 if (buffer == NULL) { 3660 xmlErrMemory(ctxt, NULL); 3661 return(NULL); 3662 } 3663 memcpy(buffer, buf, len); 3664 while (xmlIsNameChar(ctxt, c)) { 3665 if (len + 10 > max) { 3666 xmlChar *tmp; 3667 3668 if ((len > XML_MAX_NAME_LENGTH) && 3669 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3670 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3671 xmlFree(buffer); 3672 return(NULL); 3673 } 3674 max *= 2; 3675 tmp = (xmlChar *) xmlRealloc(buffer, 3676 max * sizeof(xmlChar)); 3677 if (tmp == NULL) { 3678 xmlErrMemory(ctxt, NULL); 3679 xmlFree(buffer); 3680 return(NULL); 3681 } 3682 buffer = tmp; 3683 } 3684 COPY_BUF(l,buffer,len,c); 3685 cur += l; 3686 c = CUR_SCHAR(cur, l); 3687 } 3688 buffer[len] = 0; 3689 *str = cur; 3690 return(buffer); 3691 } 3692 } 3693 if ((len > XML_MAX_NAME_LENGTH) && 3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3696 return(NULL); 3697 } 3698 *str = cur; 3699 return(xmlStrndup(buf, len)); 3700 } 3701 3702 /** 3703 * xmlParseNmtoken: 3704 * @ctxt: an XML parser context 3705 * 3706 * parse an XML Nmtoken. 3707 * 3708 * [7] Nmtoken ::= (NameChar)+ 3709 * 3710 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3711 * 3712 * Returns the Nmtoken parsed or NULL 3713 */ 3714 3715 xmlChar * 3716 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3717 xmlChar buf[XML_MAX_NAMELEN + 5]; 3718 int len = 0, l; 3719 int c; 3720 int count = 0; 3721 3722 #ifdef DEBUG 3723 nbParseNmToken++; 3724 #endif 3725 3726 GROW; 3727 if (ctxt->instate == XML_PARSER_EOF) 3728 return(NULL); 3729 c = CUR_CHAR(l); 3730 3731 while (xmlIsNameChar(ctxt, c)) { 3732 if (count++ > XML_PARSER_CHUNK_SIZE) { 3733 count = 0; 3734 GROW; 3735 } 3736 COPY_BUF(l,buf,len,c); 3737 NEXTL(l); 3738 c = CUR_CHAR(l); 3739 if (c == 0) { 3740 count = 0; 3741 GROW; 3742 if (ctxt->instate == XML_PARSER_EOF) 3743 return(NULL); 3744 c = CUR_CHAR(l); 3745 } 3746 if (len >= XML_MAX_NAMELEN) { 3747 /* 3748 * Okay someone managed to make a huge token, so he's ready to pay 3749 * for the processing speed. 3750 */ 3751 xmlChar *buffer; 3752 int max = len * 2; 3753 3754 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3755 if (buffer == NULL) { 3756 xmlErrMemory(ctxt, NULL); 3757 return(NULL); 3758 } 3759 memcpy(buffer, buf, len); 3760 while (xmlIsNameChar(ctxt, c)) { 3761 if (count++ > XML_PARSER_CHUNK_SIZE) { 3762 count = 0; 3763 GROW; 3764 if (ctxt->instate == XML_PARSER_EOF) { 3765 xmlFree(buffer); 3766 return(NULL); 3767 } 3768 } 3769 if (len + 10 > max) { 3770 xmlChar *tmp; 3771 3772 if ((max > XML_MAX_NAME_LENGTH) && 3773 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3774 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3775 xmlFree(buffer); 3776 return(NULL); 3777 } 3778 max *= 2; 3779 tmp = (xmlChar *) xmlRealloc(buffer, 3780 max * sizeof(xmlChar)); 3781 if (tmp == NULL) { 3782 xmlErrMemory(ctxt, NULL); 3783 xmlFree(buffer); 3784 return(NULL); 3785 } 3786 buffer = tmp; 3787 } 3788 COPY_BUF(l,buffer,len,c); 3789 NEXTL(l); 3790 c = CUR_CHAR(l); 3791 } 3792 buffer[len] = 0; 3793 return(buffer); 3794 } 3795 } 3796 if (len == 0) 3797 return(NULL); 3798 if ((len > XML_MAX_NAME_LENGTH) && 3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3801 return(NULL); 3802 } 3803 return(xmlStrndup(buf, len)); 3804 } 3805 3806 /** 3807 * xmlParseEntityValue: 3808 * @ctxt: an XML parser context 3809 * @orig: if non-NULL store a copy of the original entity value 3810 * 3811 * parse a value for ENTITY declarations 3812 * 3813 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3814 * "'" ([^%&'] | PEReference | Reference)* "'" 3815 * 3816 * Returns the EntityValue parsed with reference substituted or NULL 3817 */ 3818 3819 xmlChar * 3820 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3821 xmlChar *buf = NULL; 3822 int len = 0; 3823 int size = XML_PARSER_BUFFER_SIZE; 3824 int c, l; 3825 xmlChar stop; 3826 xmlChar *ret = NULL; 3827 const xmlChar *cur = NULL; 3828 xmlParserInputPtr input; 3829 3830 if (RAW == '"') stop = '"'; 3831 else if (RAW == '\'') stop = '\''; 3832 else { 3833 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3834 return(NULL); 3835 } 3836 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3837 if (buf == NULL) { 3838 xmlErrMemory(ctxt, NULL); 3839 return(NULL); 3840 } 3841 3842 /* 3843 * The content of the entity definition is copied in a buffer. 3844 */ 3845 3846 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3847 input = ctxt->input; 3848 GROW; 3849 if (ctxt->instate == XML_PARSER_EOF) { 3850 xmlFree(buf); 3851 return(NULL); 3852 } 3853 NEXT; 3854 c = CUR_CHAR(l); 3855 /* 3856 * NOTE: 4.4.5 Included in Literal 3857 * When a parameter entity reference appears in a literal entity 3858 * value, ... a single or double quote character in the replacement 3859 * text is always treated as a normal data character and will not 3860 * terminate the literal. 3861 * In practice it means we stop the loop only when back at parsing 3862 * the initial entity and the quote is found 3863 */ 3864 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3865 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3866 if (len + 5 >= size) { 3867 xmlChar *tmp; 3868 3869 size *= 2; 3870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3871 if (tmp == NULL) { 3872 xmlErrMemory(ctxt, NULL); 3873 xmlFree(buf); 3874 return(NULL); 3875 } 3876 buf = tmp; 3877 } 3878 COPY_BUF(l,buf,len,c); 3879 NEXTL(l); 3880 /* 3881 * Pop-up of finished entities. 3882 */ 3883 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3884 xmlPopInput(ctxt); 3885 3886 GROW; 3887 c = CUR_CHAR(l); 3888 if (c == 0) { 3889 GROW; 3890 c = CUR_CHAR(l); 3891 } 3892 } 3893 buf[len] = 0; 3894 if (ctxt->instate == XML_PARSER_EOF) { 3895 xmlFree(buf); 3896 return(NULL); 3897 } 3898 3899 /* 3900 * Raise problem w.r.t. '&' and '%' being used in non-entities 3901 * reference constructs. Note Charref will be handled in 3902 * xmlStringDecodeEntities() 3903 */ 3904 cur = buf; 3905 while (*cur != 0) { /* non input consuming */ 3906 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3907 xmlChar *name; 3908 xmlChar tmp = *cur; 3909 3910 cur++; 3911 name = xmlParseStringName(ctxt, &cur); 3912 if ((name == NULL) || (*cur != ';')) { 3913 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3914 "EntityValue: '%c' forbidden except for entities references\n", 3915 tmp); 3916 } 3917 if ((tmp == '%') && (ctxt->inSubset == 1) && 3918 (ctxt->inputNr == 1)) { 3919 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3920 } 3921 if (name != NULL) 3922 xmlFree(name); 3923 if (*cur == 0) 3924 break; 3925 } 3926 cur++; 3927 } 3928 3929 /* 3930 * Then PEReference entities are substituted. 3931 */ 3932 if (c != stop) { 3933 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3934 xmlFree(buf); 3935 } else { 3936 NEXT; 3937 /* 3938 * NOTE: 4.4.7 Bypassed 3939 * When a general entity reference appears in the EntityValue in 3940 * an entity declaration, it is bypassed and left as is. 3941 * so XML_SUBSTITUTE_REF is not set here. 3942 */ 3943 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3944 0, 0, 0); 3945 if (orig != NULL) 3946 *orig = buf; 3947 else 3948 xmlFree(buf); 3949 } 3950 3951 return(ret); 3952 } 3953 3954 /** 3955 * xmlParseAttValueComplex: 3956 * @ctxt: an XML parser context 3957 * @len: the resulting attribute len 3958 * @normalize: wether to apply the inner normalization 3959 * 3960 * parse a value for an attribute, this is the fallback function 3961 * of xmlParseAttValue() when the attribute parsing requires handling 3962 * of non-ASCII characters, or normalization compaction. 3963 * 3964 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3965 */ 3966 static xmlChar * 3967 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3968 xmlChar limit = 0; 3969 xmlChar *buf = NULL; 3970 xmlChar *rep = NULL; 3971 size_t len = 0; 3972 size_t buf_size = 0; 3973 int c, l, in_space = 0; 3974 xmlChar *current = NULL; 3975 xmlEntityPtr ent; 3976 3977 if (NXT(0) == '"') { 3978 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3979 limit = '"'; 3980 NEXT; 3981 } else if (NXT(0) == '\'') { 3982 limit = '\''; 3983 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3984 NEXT; 3985 } else { 3986 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3987 return(NULL); 3988 } 3989 3990 /* 3991 * allocate a translation buffer. 3992 */ 3993 buf_size = XML_PARSER_BUFFER_SIZE; 3994 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3995 if (buf == NULL) goto mem_error; 3996 3997 /* 3998 * OK loop until we reach one of the ending char or a size limit. 3999 */ 4000 c = CUR_CHAR(l); 4001 while (((NXT(0) != limit) && /* checked */ 4002 (IS_CHAR(c)) && (c != '<')) && 4003 (ctxt->instate != XML_PARSER_EOF)) { 4004 /* 4005 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 4006 * special option is given 4007 */ 4008 if ((len > XML_MAX_TEXT_LENGTH) && 4009 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4011 "AttValue length too long\n"); 4012 goto mem_error; 4013 } 4014 if (c == 0) break; 4015 if (c == '&') { 4016 in_space = 0; 4017 if (NXT(1) == '#') { 4018 int val = xmlParseCharRef(ctxt); 4019 4020 if (val == '&') { 4021 if (ctxt->replaceEntities) { 4022 if (len + 10 > buf_size) { 4023 growBuffer(buf, 10); 4024 } 4025 buf[len++] = '&'; 4026 } else { 4027 /* 4028 * The reparsing will be done in xmlStringGetNodeList() 4029 * called by the attribute() function in SAX.c 4030 */ 4031 if (len + 10 > buf_size) { 4032 growBuffer(buf, 10); 4033 } 4034 buf[len++] = '&'; 4035 buf[len++] = '#'; 4036 buf[len++] = '3'; 4037 buf[len++] = '8'; 4038 buf[len++] = ';'; 4039 } 4040 } else if (val != 0) { 4041 if (len + 10 > buf_size) { 4042 growBuffer(buf, 10); 4043 } 4044 len += xmlCopyChar(0, &buf[len], val); 4045 } 4046 } else { 4047 ent = xmlParseEntityRef(ctxt); 4048 ctxt->nbentities++; 4049 if (ent != NULL) 4050 ctxt->nbentities += ent->owner; 4051 if ((ent != NULL) && 4052 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4053 if (len + 10 > buf_size) { 4054 growBuffer(buf, 10); 4055 } 4056 if ((ctxt->replaceEntities == 0) && 4057 (ent->content[0] == '&')) { 4058 buf[len++] = '&'; 4059 buf[len++] = '#'; 4060 buf[len++] = '3'; 4061 buf[len++] = '8'; 4062 buf[len++] = ';'; 4063 } else { 4064 buf[len++] = ent->content[0]; 4065 } 4066 } else if ((ent != NULL) && 4067 (ctxt->replaceEntities != 0)) { 4068 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4069 rep = xmlStringDecodeEntities(ctxt, ent->content, 4070 XML_SUBSTITUTE_REF, 4071 0, 0, 0); 4072 if (rep != NULL) { 4073 current = rep; 4074 while (*current != 0) { /* non input consuming */ 4075 if ((*current == 0xD) || (*current == 0xA) || 4076 (*current == 0x9)) { 4077 buf[len++] = 0x20; 4078 current++; 4079 } else 4080 buf[len++] = *current++; 4081 if (len + 10 > buf_size) { 4082 growBuffer(buf, 10); 4083 } 4084 } 4085 xmlFree(rep); 4086 rep = NULL; 4087 } 4088 } else { 4089 if (len + 10 > buf_size) { 4090 growBuffer(buf, 10); 4091 } 4092 if (ent->content != NULL) 4093 buf[len++] = ent->content[0]; 4094 } 4095 } else if (ent != NULL) { 4096 int i = xmlStrlen(ent->name); 4097 const xmlChar *cur = ent->name; 4098 4099 /* 4100 * This may look absurd but is needed to detect 4101 * entities problems 4102 */ 4103 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4104 (ent->content != NULL) && (ent->checked == 0)) { 4105 unsigned long oldnbent = ctxt->nbentities; 4106 4107 rep = xmlStringDecodeEntities(ctxt, ent->content, 4108 XML_SUBSTITUTE_REF, 0, 0, 0); 4109 4110 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4111 if (rep != NULL) { 4112 if (xmlStrchr(rep, '<')) 4113 ent->checked |= 1; 4114 xmlFree(rep); 4115 rep = NULL; 4116 } 4117 } 4118 4119 /* 4120 * Just output the reference 4121 */ 4122 buf[len++] = '&'; 4123 while (len + i + 10 > buf_size) { 4124 growBuffer(buf, i + 10); 4125 } 4126 for (;i > 0;i--) 4127 buf[len++] = *cur++; 4128 buf[len++] = ';'; 4129 } 4130 } 4131 } else { 4132 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4133 if ((len != 0) || (!normalize)) { 4134 if ((!normalize) || (!in_space)) { 4135 COPY_BUF(l,buf,len,0x20); 4136 while (len + 10 > buf_size) { 4137 growBuffer(buf, 10); 4138 } 4139 } 4140 in_space = 1; 4141 } 4142 } else { 4143 in_space = 0; 4144 COPY_BUF(l,buf,len,c); 4145 if (len + 10 > buf_size) { 4146 growBuffer(buf, 10); 4147 } 4148 } 4149 NEXTL(l); 4150 } 4151 GROW; 4152 c = CUR_CHAR(l); 4153 } 4154 if (ctxt->instate == XML_PARSER_EOF) 4155 goto error; 4156 4157 if ((in_space) && (normalize)) { 4158 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4159 } 4160 buf[len] = 0; 4161 if (RAW == '<') { 4162 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4163 } else if (RAW != limit) { 4164 if ((c != 0) && (!IS_CHAR(c))) { 4165 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4166 "invalid character in attribute value\n"); 4167 } else { 4168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4169 "AttValue: ' expected\n"); 4170 } 4171 } else 4172 NEXT; 4173 4174 /* 4175 * There we potentially risk an overflow, don't allow attribute value of 4176 * length more than INT_MAX it is a very reasonnable assumption ! 4177 */ 4178 if (len >= INT_MAX) { 4179 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4180 "AttValue length too long\n"); 4181 goto mem_error; 4182 } 4183 4184 if (attlen != NULL) *attlen = (int) len; 4185 return(buf); 4186 4187 mem_error: 4188 xmlErrMemory(ctxt, NULL); 4189 error: 4190 if (buf != NULL) 4191 xmlFree(buf); 4192 if (rep != NULL) 4193 xmlFree(rep); 4194 return(NULL); 4195 } 4196 4197 /** 4198 * xmlParseAttValue: 4199 * @ctxt: an XML parser context 4200 * 4201 * parse a value for an attribute 4202 * Note: the parser won't do substitution of entities here, this 4203 * will be handled later in xmlStringGetNodeList 4204 * 4205 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4206 * "'" ([^<&'] | Reference)* "'" 4207 * 4208 * 3.3.3 Attribute-Value Normalization: 4209 * Before the value of an attribute is passed to the application or 4210 * checked for validity, the XML processor must normalize it as follows: 4211 * - a character reference is processed by appending the referenced 4212 * character to the attribute value 4213 * - an entity reference is processed by recursively processing the 4214 * replacement text of the entity 4215 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4216 * appending #x20 to the normalized value, except that only a single 4217 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4218 * parsed entity or the literal entity value of an internal parsed entity 4219 * - other characters are processed by appending them to the normalized value 4220 * If the declared value is not CDATA, then the XML processor must further 4221 * process the normalized attribute value by discarding any leading and 4222 * trailing space (#x20) characters, and by replacing sequences of space 4223 * (#x20) characters by a single space (#x20) character. 4224 * All attributes for which no declaration has been read should be treated 4225 * by a non-validating parser as if declared CDATA. 4226 * 4227 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4228 */ 4229 4230 4231 xmlChar * 4232 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4233 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4234 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4235 } 4236 4237 /** 4238 * xmlParseSystemLiteral: 4239 * @ctxt: an XML parser context 4240 * 4241 * parse an XML Literal 4242 * 4243 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4244 * 4245 * Returns the SystemLiteral parsed or NULL 4246 */ 4247 4248 xmlChar * 4249 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4250 xmlChar *buf = NULL; 4251 int len = 0; 4252 int size = XML_PARSER_BUFFER_SIZE; 4253 int cur, l; 4254 xmlChar stop; 4255 int state = ctxt->instate; 4256 int count = 0; 4257 4258 SHRINK; 4259 if (RAW == '"') { 4260 NEXT; 4261 stop = '"'; 4262 } else if (RAW == '\'') { 4263 NEXT; 4264 stop = '\''; 4265 } else { 4266 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4267 return(NULL); 4268 } 4269 4270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4271 if (buf == NULL) { 4272 xmlErrMemory(ctxt, NULL); 4273 return(NULL); 4274 } 4275 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4276 cur = CUR_CHAR(l); 4277 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4278 if (len + 5 >= size) { 4279 xmlChar *tmp; 4280 4281 if ((size > XML_MAX_NAME_LENGTH) && 4282 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4283 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4284 xmlFree(buf); 4285 ctxt->instate = (xmlParserInputState) state; 4286 return(NULL); 4287 } 4288 size *= 2; 4289 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4290 if (tmp == NULL) { 4291 xmlFree(buf); 4292 xmlErrMemory(ctxt, NULL); 4293 ctxt->instate = (xmlParserInputState) state; 4294 return(NULL); 4295 } 4296 buf = tmp; 4297 } 4298 count++; 4299 if (count > 50) { 4300 GROW; 4301 count = 0; 4302 if (ctxt->instate == XML_PARSER_EOF) { 4303 xmlFree(buf); 4304 return(NULL); 4305 } 4306 } 4307 COPY_BUF(l,buf,len,cur); 4308 NEXTL(l); 4309 cur = CUR_CHAR(l); 4310 if (cur == 0) { 4311 GROW; 4312 SHRINK; 4313 cur = CUR_CHAR(l); 4314 } 4315 } 4316 buf[len] = 0; 4317 ctxt->instate = (xmlParserInputState) state; 4318 if (!IS_CHAR(cur)) { 4319 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4320 } else { 4321 NEXT; 4322 } 4323 return(buf); 4324 } 4325 4326 /** 4327 * xmlParsePubidLiteral: 4328 * @ctxt: an XML parser context 4329 * 4330 * parse an XML public literal 4331 * 4332 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4333 * 4334 * Returns the PubidLiteral parsed or NULL. 4335 */ 4336 4337 xmlChar * 4338 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4339 xmlChar *buf = NULL; 4340 int len = 0; 4341 int size = XML_PARSER_BUFFER_SIZE; 4342 xmlChar cur; 4343 xmlChar stop; 4344 int count = 0; 4345 xmlParserInputState oldstate = ctxt->instate; 4346 4347 SHRINK; 4348 if (RAW == '"') { 4349 NEXT; 4350 stop = '"'; 4351 } else if (RAW == '\'') { 4352 NEXT; 4353 stop = '\''; 4354 } else { 4355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4356 return(NULL); 4357 } 4358 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4359 if (buf == NULL) { 4360 xmlErrMemory(ctxt, NULL); 4361 return(NULL); 4362 } 4363 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4364 cur = CUR; 4365 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4366 if (len + 1 >= size) { 4367 xmlChar *tmp; 4368 4369 if ((size > XML_MAX_NAME_LENGTH) && 4370 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4371 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4372 xmlFree(buf); 4373 return(NULL); 4374 } 4375 size *= 2; 4376 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4377 if (tmp == NULL) { 4378 xmlErrMemory(ctxt, NULL); 4379 xmlFree(buf); 4380 return(NULL); 4381 } 4382 buf = tmp; 4383 } 4384 buf[len++] = cur; 4385 count++; 4386 if (count > 50) { 4387 GROW; 4388 count = 0; 4389 if (ctxt->instate == XML_PARSER_EOF) { 4390 xmlFree(buf); 4391 return(NULL); 4392 } 4393 } 4394 NEXT; 4395 cur = CUR; 4396 if (cur == 0) { 4397 GROW; 4398 SHRINK; 4399 cur = CUR; 4400 } 4401 } 4402 buf[len] = 0; 4403 if (cur != stop) { 4404 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4405 } else { 4406 NEXT; 4407 } 4408 ctxt->instate = oldstate; 4409 return(buf); 4410 } 4411 4412 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4413 4414 /* 4415 * used for the test in the inner loop of the char data testing 4416 */ 4417 static const unsigned char test_char_data[256] = { 4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4419 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4423 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4424 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4425 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4426 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4427 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4428 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4429 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4430 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4431 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4432 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4433 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4450 }; 4451 4452 /** 4453 * xmlParseCharData: 4454 * @ctxt: an XML parser context 4455 * @cdata: int indicating whether we are within a CDATA section 4456 * 4457 * parse a CharData section. 4458 * if we are within a CDATA section ']]>' marks an end of section. 4459 * 4460 * The right angle bracket (>) may be represented using the string ">", 4461 * and must, for compatibility, be escaped using ">" or a character 4462 * reference when it appears in the string "]]>" in content, when that 4463 * string is not marking the end of a CDATA section. 4464 * 4465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4466 */ 4467 4468 void 4469 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4470 const xmlChar *in; 4471 int nbchar = 0; 4472 int line = ctxt->input->line; 4473 int col = ctxt->input->col; 4474 int ccol; 4475 4476 SHRINK; 4477 GROW; 4478 /* 4479 * Accelerated common case where input don't need to be 4480 * modified before passing it to the handler. 4481 */ 4482 if (!cdata) { 4483 in = ctxt->input->cur; 4484 do { 4485 get_more_space: 4486 while (*in == 0x20) { in++; ctxt->input->col++; } 4487 if (*in == 0xA) { 4488 do { 4489 ctxt->input->line++; ctxt->input->col = 1; 4490 in++; 4491 } while (*in == 0xA); 4492 goto get_more_space; 4493 } 4494 if (*in == '<') { 4495 nbchar = in - ctxt->input->cur; 4496 if (nbchar > 0) { 4497 const xmlChar *tmp = ctxt->input->cur; 4498 ctxt->input->cur = in; 4499 4500 if ((ctxt->sax != NULL) && 4501 (ctxt->sax->ignorableWhitespace != 4502 ctxt->sax->characters)) { 4503 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4504 if (ctxt->sax->ignorableWhitespace != NULL) 4505 ctxt->sax->ignorableWhitespace(ctxt->userData, 4506 tmp, nbchar); 4507 } else { 4508 if (ctxt->sax->characters != NULL) 4509 ctxt->sax->characters(ctxt->userData, 4510 tmp, nbchar); 4511 if (*ctxt->space == -1) 4512 *ctxt->space = -2; 4513 } 4514 } else if ((ctxt->sax != NULL) && 4515 (ctxt->sax->characters != NULL)) { 4516 ctxt->sax->characters(ctxt->userData, 4517 tmp, nbchar); 4518 } 4519 } 4520 return; 4521 } 4522 4523 get_more: 4524 ccol = ctxt->input->col; 4525 while (test_char_data[*in]) { 4526 in++; 4527 ccol++; 4528 } 4529 ctxt->input->col = ccol; 4530 if (*in == 0xA) { 4531 do { 4532 ctxt->input->line++; ctxt->input->col = 1; 4533 in++; 4534 } while (*in == 0xA); 4535 goto get_more; 4536 } 4537 if (*in == ']') { 4538 if ((in[1] == ']') && (in[2] == '>')) { 4539 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4540 ctxt->input->cur = in; 4541 return; 4542 } 4543 in++; 4544 ctxt->input->col++; 4545 goto get_more; 4546 } 4547 nbchar = in - ctxt->input->cur; 4548 if (nbchar > 0) { 4549 if ((ctxt->sax != NULL) && 4550 (ctxt->sax->ignorableWhitespace != 4551 ctxt->sax->characters) && 4552 (IS_BLANK_CH(*ctxt->input->cur))) { 4553 const xmlChar *tmp = ctxt->input->cur; 4554 ctxt->input->cur = in; 4555 4556 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4557 if (ctxt->sax->ignorableWhitespace != NULL) 4558 ctxt->sax->ignorableWhitespace(ctxt->userData, 4559 tmp, nbchar); 4560 } else { 4561 if (ctxt->sax->characters != NULL) 4562 ctxt->sax->characters(ctxt->userData, 4563 tmp, nbchar); 4564 if (*ctxt->space == -1) 4565 *ctxt->space = -2; 4566 } 4567 line = ctxt->input->line; 4568 col = ctxt->input->col; 4569 } else if (ctxt->sax != NULL) { 4570 if (ctxt->sax->characters != NULL) 4571 ctxt->sax->characters(ctxt->userData, 4572 ctxt->input->cur, nbchar); 4573 line = ctxt->input->line; 4574 col = ctxt->input->col; 4575 } 4576 /* something really bad happened in the SAX callback */ 4577 if (ctxt->instate != XML_PARSER_CONTENT) 4578 return; 4579 } 4580 ctxt->input->cur = in; 4581 if (*in == 0xD) { 4582 in++; 4583 if (*in == 0xA) { 4584 ctxt->input->cur = in; 4585 in++; 4586 ctxt->input->line++; ctxt->input->col = 1; 4587 continue; /* while */ 4588 } 4589 in--; 4590 } 4591 if (*in == '<') { 4592 return; 4593 } 4594 if (*in == '&') { 4595 return; 4596 } 4597 SHRINK; 4598 GROW; 4599 if (ctxt->instate == XML_PARSER_EOF) 4600 return; 4601 in = ctxt->input->cur; 4602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4603 nbchar = 0; 4604 } 4605 ctxt->input->line = line; 4606 ctxt->input->col = col; 4607 xmlParseCharDataComplex(ctxt, cdata); 4608 } 4609 4610 /** 4611 * xmlParseCharDataComplex: 4612 * @ctxt: an XML parser context 4613 * @cdata: int indicating whether we are within a CDATA section 4614 * 4615 * parse a CharData section.this is the fallback function 4616 * of xmlParseCharData() when the parsing requires handling 4617 * of non-ASCII characters. 4618 */ 4619 static void 4620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4622 int nbchar = 0; 4623 int cur, l; 4624 int count = 0; 4625 4626 SHRINK; 4627 GROW; 4628 cur = CUR_CHAR(l); 4629 while ((cur != '<') && /* checked */ 4630 (cur != '&') && 4631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4632 if ((cur == ']') && (NXT(1) == ']') && 4633 (NXT(2) == '>')) { 4634 if (cdata) break; 4635 else { 4636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4637 } 4638 } 4639 COPY_BUF(l,buf,nbchar,cur); 4640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4641 buf[nbchar] = 0; 4642 4643 /* 4644 * OK the segment is to be consumed as chars. 4645 */ 4646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4647 if (areBlanks(ctxt, buf, nbchar, 0)) { 4648 if (ctxt->sax->ignorableWhitespace != NULL) 4649 ctxt->sax->ignorableWhitespace(ctxt->userData, 4650 buf, nbchar); 4651 } else { 4652 if (ctxt->sax->characters != NULL) 4653 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4654 if ((ctxt->sax->characters != 4655 ctxt->sax->ignorableWhitespace) && 4656 (*ctxt->space == -1)) 4657 *ctxt->space = -2; 4658 } 4659 } 4660 nbchar = 0; 4661 /* something really bad happened in the SAX callback */ 4662 if (ctxt->instate != XML_PARSER_CONTENT) 4663 return; 4664 } 4665 count++; 4666 if (count > 50) { 4667 GROW; 4668 count = 0; 4669 if (ctxt->instate == XML_PARSER_EOF) 4670 return; 4671 } 4672 NEXTL(l); 4673 cur = CUR_CHAR(l); 4674 } 4675 if (nbchar != 0) { 4676 buf[nbchar] = 0; 4677 /* 4678 * OK the segment is to be consumed as chars. 4679 */ 4680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4681 if (areBlanks(ctxt, buf, nbchar, 0)) { 4682 if (ctxt->sax->ignorableWhitespace != NULL) 4683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4684 } else { 4685 if (ctxt->sax->characters != NULL) 4686 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4688 (*ctxt->space == -1)) 4689 *ctxt->space = -2; 4690 } 4691 } 4692 } 4693 if ((cur != 0) && (!IS_CHAR(cur))) { 4694 /* Generate the error and skip the offending character */ 4695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4696 "PCDATA invalid Char value %d\n", 4697 cur); 4698 NEXTL(l); 4699 } 4700 } 4701 4702 /** 4703 * xmlParseExternalID: 4704 * @ctxt: an XML parser context 4705 * @publicID: a xmlChar** receiving PubidLiteral 4706 * @strict: indicate whether we should restrict parsing to only 4707 * production [75], see NOTE below 4708 * 4709 * Parse an External ID or a Public ID 4710 * 4711 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4712 * 'PUBLIC' S PubidLiteral S SystemLiteral 4713 * 4714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4715 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4716 * 4717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4718 * 4719 * Returns the function returns SystemLiteral and in the second 4720 * case publicID receives PubidLiteral, is strict is off 4721 * it is possible to return NULL and have publicID set. 4722 */ 4723 4724 xmlChar * 4725 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4726 xmlChar *URI = NULL; 4727 4728 SHRINK; 4729 4730 *publicID = NULL; 4731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4732 SKIP(6); 4733 if (!IS_BLANK_CH(CUR)) { 4734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4735 "Space required after 'SYSTEM'\n"); 4736 } 4737 SKIP_BLANKS; 4738 URI = xmlParseSystemLiteral(ctxt); 4739 if (URI == NULL) { 4740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4741 } 4742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4743 SKIP(6); 4744 if (!IS_BLANK_CH(CUR)) { 4745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4746 "Space required after 'PUBLIC'\n"); 4747 } 4748 SKIP_BLANKS; 4749 *publicID = xmlParsePubidLiteral(ctxt); 4750 if (*publicID == NULL) { 4751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4752 } 4753 if (strict) { 4754 /* 4755 * We don't handle [83] so "S SystemLiteral" is required. 4756 */ 4757 if (!IS_BLANK_CH(CUR)) { 4758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4759 "Space required after the Public Identifier\n"); 4760 } 4761 } else { 4762 /* 4763 * We handle [83] so we return immediately, if 4764 * "S SystemLiteral" is not detected. From a purely parsing 4765 * point of view that's a nice mess. 4766 */ 4767 const xmlChar *ptr; 4768 GROW; 4769 4770 ptr = CUR_PTR; 4771 if (!IS_BLANK_CH(*ptr)) return(NULL); 4772 4773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4775 } 4776 SKIP_BLANKS; 4777 URI = xmlParseSystemLiteral(ctxt); 4778 if (URI == NULL) { 4779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4780 } 4781 } 4782 return(URI); 4783 } 4784 4785 /** 4786 * xmlParseCommentComplex: 4787 * @ctxt: an XML parser context 4788 * @buf: the already parsed part of the buffer 4789 * @len: number of bytes filles in the buffer 4790 * @size: allocated size of the buffer 4791 * 4792 * Skip an XML (SGML) comment <!-- .... --> 4793 * The spec says that "For compatibility, the string "--" (double-hyphen) 4794 * must not occur within comments. " 4795 * This is the slow routine in case the accelerator for ascii didn't work 4796 * 4797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4798 */ 4799 static void 4800 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4801 size_t len, size_t size) { 4802 int q, ql; 4803 int r, rl; 4804 int cur, l; 4805 size_t count = 0; 4806 int inputid; 4807 4808 inputid = ctxt->input->id; 4809 4810 if (buf == NULL) { 4811 len = 0; 4812 size = XML_PARSER_BUFFER_SIZE; 4813 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4814 if (buf == NULL) { 4815 xmlErrMemory(ctxt, NULL); 4816 return; 4817 } 4818 } 4819 GROW; /* Assure there's enough input data */ 4820 q = CUR_CHAR(ql); 4821 if (q == 0) 4822 goto not_terminated; 4823 if (!IS_CHAR(q)) { 4824 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4825 "xmlParseComment: invalid xmlChar value %d\n", 4826 q); 4827 xmlFree (buf); 4828 return; 4829 } 4830 NEXTL(ql); 4831 r = CUR_CHAR(rl); 4832 if (r == 0) 4833 goto not_terminated; 4834 if (!IS_CHAR(r)) { 4835 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4836 "xmlParseComment: invalid xmlChar value %d\n", 4837 q); 4838 xmlFree (buf); 4839 return; 4840 } 4841 NEXTL(rl); 4842 cur = CUR_CHAR(l); 4843 if (cur == 0) 4844 goto not_terminated; 4845 while (IS_CHAR(cur) && /* checked */ 4846 ((cur != '>') || 4847 (r != '-') || (q != '-'))) { 4848 if ((r == '-') && (q == '-')) { 4849 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4850 } 4851 if ((len > XML_MAX_TEXT_LENGTH) && 4852 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4854 "Comment too big found", NULL); 4855 xmlFree (buf); 4856 return; 4857 } 4858 if (len + 5 >= size) { 4859 xmlChar *new_buf; 4860 size_t new_size; 4861 4862 new_size = size * 2; 4863 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4864 if (new_buf == NULL) { 4865 xmlFree (buf); 4866 xmlErrMemory(ctxt, NULL); 4867 return; 4868 } 4869 buf = new_buf; 4870 size = new_size; 4871 } 4872 COPY_BUF(ql,buf,len,q); 4873 q = r; 4874 ql = rl; 4875 r = cur; 4876 rl = l; 4877 4878 count++; 4879 if (count > 50) { 4880 GROW; 4881 count = 0; 4882 if (ctxt->instate == XML_PARSER_EOF) { 4883 xmlFree(buf); 4884 return; 4885 } 4886 } 4887 NEXTL(l); 4888 cur = CUR_CHAR(l); 4889 if (cur == 0) { 4890 SHRINK; 4891 GROW; 4892 cur = CUR_CHAR(l); 4893 } 4894 } 4895 buf[len] = 0; 4896 if (cur == 0) { 4897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4898 "Comment not terminated \n<!--%.50s\n", buf); 4899 } else if (!IS_CHAR(cur)) { 4900 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4901 "xmlParseComment: invalid xmlChar value %d\n", 4902 cur); 4903 } else { 4904 if (inputid != ctxt->input->id) { 4905 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4906 "Comment doesn't start and stop in the same entity\n"); 4907 } 4908 NEXT; 4909 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4910 (!ctxt->disableSAX)) 4911 ctxt->sax->comment(ctxt->userData, buf); 4912 } 4913 xmlFree(buf); 4914 return; 4915 not_terminated: 4916 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4917 "Comment not terminated\n", NULL); 4918 xmlFree(buf); 4919 return; 4920 } 4921 4922 /** 4923 * xmlParseComment: 4924 * @ctxt: an XML parser context 4925 * 4926 * Skip an XML (SGML) comment <!-- .... --> 4927 * The spec says that "For compatibility, the string "--" (double-hyphen) 4928 * must not occur within comments. " 4929 * 4930 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4931 */ 4932 void 4933 xmlParseComment(xmlParserCtxtPtr ctxt) { 4934 xmlChar *buf = NULL; 4935 size_t size = XML_PARSER_BUFFER_SIZE; 4936 size_t len = 0; 4937 xmlParserInputState state; 4938 const xmlChar *in; 4939 size_t nbchar = 0; 4940 int ccol; 4941 int inputid; 4942 4943 /* 4944 * Check that there is a comment right here. 4945 */ 4946 if ((RAW != '<') || (NXT(1) != '!') || 4947 (NXT(2) != '-') || (NXT(3) != '-')) return; 4948 state = ctxt->instate; 4949 ctxt->instate = XML_PARSER_COMMENT; 4950 inputid = ctxt->input->id; 4951 SKIP(4); 4952 SHRINK; 4953 GROW; 4954 4955 /* 4956 * Accelerated common case where input don't need to be 4957 * modified before passing it to the handler. 4958 */ 4959 in = ctxt->input->cur; 4960 do { 4961 if (*in == 0xA) { 4962 do { 4963 ctxt->input->line++; ctxt->input->col = 1; 4964 in++; 4965 } while (*in == 0xA); 4966 } 4967 get_more: 4968 ccol = ctxt->input->col; 4969 while (((*in > '-') && (*in <= 0x7F)) || 4970 ((*in >= 0x20) && (*in < '-')) || 4971 (*in == 0x09)) { 4972 in++; 4973 ccol++; 4974 } 4975 ctxt->input->col = ccol; 4976 if (*in == 0xA) { 4977 do { 4978 ctxt->input->line++; ctxt->input->col = 1; 4979 in++; 4980 } while (*in == 0xA); 4981 goto get_more; 4982 } 4983 nbchar = in - ctxt->input->cur; 4984 /* 4985 * save current set of data 4986 */ 4987 if (nbchar > 0) { 4988 if ((ctxt->sax != NULL) && 4989 (ctxt->sax->comment != NULL)) { 4990 if (buf == NULL) { 4991 if ((*in == '-') && (in[1] == '-')) 4992 size = nbchar + 1; 4993 else 4994 size = XML_PARSER_BUFFER_SIZE + nbchar; 4995 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4996 if (buf == NULL) { 4997 xmlErrMemory(ctxt, NULL); 4998 ctxt->instate = state; 4999 return; 5000 } 5001 len = 0; 5002 } else if (len + nbchar + 1 >= size) { 5003 xmlChar *new_buf; 5004 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5005 new_buf = (xmlChar *) xmlRealloc(buf, 5006 size * sizeof(xmlChar)); 5007 if (new_buf == NULL) { 5008 xmlFree (buf); 5009 xmlErrMemory(ctxt, NULL); 5010 ctxt->instate = state; 5011 return; 5012 } 5013 buf = new_buf; 5014 } 5015 memcpy(&buf[len], ctxt->input->cur, nbchar); 5016 len += nbchar; 5017 buf[len] = 0; 5018 } 5019 } 5020 if ((len > XML_MAX_TEXT_LENGTH) && 5021 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5022 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5023 "Comment too big found", NULL); 5024 xmlFree (buf); 5025 return; 5026 } 5027 ctxt->input->cur = in; 5028 if (*in == 0xA) { 5029 in++; 5030 ctxt->input->line++; ctxt->input->col = 1; 5031 } 5032 if (*in == 0xD) { 5033 in++; 5034 if (*in == 0xA) { 5035 ctxt->input->cur = in; 5036 in++; 5037 ctxt->input->line++; ctxt->input->col = 1; 5038 continue; /* while */ 5039 } 5040 in--; 5041 } 5042 SHRINK; 5043 GROW; 5044 if (ctxt->instate == XML_PARSER_EOF) { 5045 xmlFree(buf); 5046 return; 5047 } 5048 in = ctxt->input->cur; 5049 if (*in == '-') { 5050 if (in[1] == '-') { 5051 if (in[2] == '>') { 5052 if (ctxt->input->id != inputid) { 5053 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5054 "comment doesn't start and stop in the same entity\n"); 5055 } 5056 SKIP(3); 5057 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5058 (!ctxt->disableSAX)) { 5059 if (buf != NULL) 5060 ctxt->sax->comment(ctxt->userData, buf); 5061 else 5062 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5063 } 5064 if (buf != NULL) 5065 xmlFree(buf); 5066 if (ctxt->instate != XML_PARSER_EOF) 5067 ctxt->instate = state; 5068 return; 5069 } 5070 if (buf != NULL) { 5071 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5072 "Double hyphen within comment: " 5073 "<!--%.50s\n", 5074 buf); 5075 } else 5076 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5077 "Double hyphen within comment\n", NULL); 5078 in++; 5079 ctxt->input->col++; 5080 } 5081 in++; 5082 ctxt->input->col++; 5083 goto get_more; 5084 } 5085 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5086 xmlParseCommentComplex(ctxt, buf, len, size); 5087 ctxt->instate = state; 5088 return; 5089 } 5090 5091 5092 /** 5093 * xmlParsePITarget: 5094 * @ctxt: an XML parser context 5095 * 5096 * parse the name of a PI 5097 * 5098 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5099 * 5100 * Returns the PITarget name or NULL 5101 */ 5102 5103 const xmlChar * 5104 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5105 const xmlChar *name; 5106 5107 name = xmlParseName(ctxt); 5108 if ((name != NULL) && 5109 ((name[0] == 'x') || (name[0] == 'X')) && 5110 ((name[1] == 'm') || (name[1] == 'M')) && 5111 ((name[2] == 'l') || (name[2] == 'L'))) { 5112 int i; 5113 if ((name[0] == 'x') && (name[1] == 'm') && 5114 (name[2] == 'l') && (name[3] == 0)) { 5115 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5116 "XML declaration allowed only at the start of the document\n"); 5117 return(name); 5118 } else if (name[3] == 0) { 5119 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5120 return(name); 5121 } 5122 for (i = 0;;i++) { 5123 if (xmlW3CPIs[i] == NULL) break; 5124 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5125 return(name); 5126 } 5127 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5128 "xmlParsePITarget: invalid name prefix 'xml'\n", 5129 NULL, NULL); 5130 } 5131 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5132 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5133 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5134 } 5135 return(name); 5136 } 5137 5138 #ifdef LIBXML_CATALOG_ENABLED 5139 /** 5140 * xmlParseCatalogPI: 5141 * @ctxt: an XML parser context 5142 * @catalog: the PI value string 5143 * 5144 * parse an XML Catalog Processing Instruction. 5145 * 5146 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5147 * 5148 * Occurs only if allowed by the user and if happening in the Misc 5149 * part of the document before any doctype informations 5150 * This will add the given catalog to the parsing context in order 5151 * to be used if there is a resolution need further down in the document 5152 */ 5153 5154 static void 5155 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5156 xmlChar *URL = NULL; 5157 const xmlChar *tmp, *base; 5158 xmlChar marker; 5159 5160 tmp = catalog; 5161 while (IS_BLANK_CH(*tmp)) tmp++; 5162 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5163 goto error; 5164 tmp += 7; 5165 while (IS_BLANK_CH(*tmp)) tmp++; 5166 if (*tmp != '=') { 5167 return; 5168 } 5169 tmp++; 5170 while (IS_BLANK_CH(*tmp)) tmp++; 5171 marker = *tmp; 5172 if ((marker != '\'') && (marker != '"')) 5173 goto error; 5174 tmp++; 5175 base = tmp; 5176 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5177 if (*tmp == 0) 5178 goto error; 5179 URL = xmlStrndup(base, tmp - base); 5180 tmp++; 5181 while (IS_BLANK_CH(*tmp)) tmp++; 5182 if (*tmp != 0) 5183 goto error; 5184 5185 if (URL != NULL) { 5186 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5187 xmlFree(URL); 5188 } 5189 return; 5190 5191 error: 5192 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5193 "Catalog PI syntax error: %s\n", 5194 catalog, NULL); 5195 if (URL != NULL) 5196 xmlFree(URL); 5197 } 5198 #endif 5199 5200 /** 5201 * xmlParsePI: 5202 * @ctxt: an XML parser context 5203 * 5204 * parse an XML Processing Instruction. 5205 * 5206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5207 * 5208 * The processing is transfered to SAX once parsed. 5209 */ 5210 5211 void 5212 xmlParsePI(xmlParserCtxtPtr ctxt) { 5213 xmlChar *buf = NULL; 5214 size_t len = 0; 5215 size_t size = XML_PARSER_BUFFER_SIZE; 5216 int cur, l; 5217 const xmlChar *target; 5218 xmlParserInputState state; 5219 int count = 0; 5220 5221 if ((RAW == '<') && (NXT(1) == '?')) { 5222 xmlParserInputPtr input = ctxt->input; 5223 state = ctxt->instate; 5224 ctxt->instate = XML_PARSER_PI; 5225 /* 5226 * this is a Processing Instruction. 5227 */ 5228 SKIP(2); 5229 SHRINK; 5230 5231 /* 5232 * Parse the target name and check for special support like 5233 * namespace. 5234 */ 5235 target = xmlParsePITarget(ctxt); 5236 if (target != NULL) { 5237 if ((RAW == '?') && (NXT(1) == '>')) { 5238 if (input != ctxt->input) { 5239 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5240 "PI declaration doesn't start and stop in the same entity\n"); 5241 } 5242 SKIP(2); 5243 5244 /* 5245 * SAX: PI detected. 5246 */ 5247 if ((ctxt->sax) && (!ctxt->disableSAX) && 5248 (ctxt->sax->processingInstruction != NULL)) 5249 ctxt->sax->processingInstruction(ctxt->userData, 5250 target, NULL); 5251 if (ctxt->instate != XML_PARSER_EOF) 5252 ctxt->instate = state; 5253 return; 5254 } 5255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5256 if (buf == NULL) { 5257 xmlErrMemory(ctxt, NULL); 5258 ctxt->instate = state; 5259 return; 5260 } 5261 cur = CUR; 5262 if (!IS_BLANK(cur)) { 5263 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5264 "ParsePI: PI %s space expected\n", target); 5265 } 5266 SKIP_BLANKS; 5267 cur = CUR_CHAR(l); 5268 while (IS_CHAR(cur) && /* checked */ 5269 ((cur != '?') || (NXT(1) != '>'))) { 5270 if (len + 5 >= size) { 5271 xmlChar *tmp; 5272 size_t new_size = size * 2; 5273 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5274 if (tmp == NULL) { 5275 xmlErrMemory(ctxt, NULL); 5276 xmlFree(buf); 5277 ctxt->instate = state; 5278 return; 5279 } 5280 buf = tmp; 5281 size = new_size; 5282 } 5283 count++; 5284 if (count > 50) { 5285 GROW; 5286 if (ctxt->instate == XML_PARSER_EOF) { 5287 xmlFree(buf); 5288 return; 5289 } 5290 count = 0; 5291 if ((len > XML_MAX_TEXT_LENGTH) && 5292 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5293 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5294 "PI %s too big found", target); 5295 xmlFree(buf); 5296 ctxt->instate = state; 5297 return; 5298 } 5299 } 5300 COPY_BUF(l,buf,len,cur); 5301 NEXTL(l); 5302 cur = CUR_CHAR(l); 5303 if (cur == 0) { 5304 SHRINK; 5305 GROW; 5306 cur = CUR_CHAR(l); 5307 } 5308 } 5309 if ((len > XML_MAX_TEXT_LENGTH) && 5310 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5311 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5312 "PI %s too big found", target); 5313 xmlFree(buf); 5314 ctxt->instate = state; 5315 return; 5316 } 5317 buf[len] = 0; 5318 if (cur != '?') { 5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5320 "ParsePI: PI %s never end ...\n", target); 5321 } else { 5322 if (input != ctxt->input) { 5323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5324 "PI declaration doesn't start and stop in the same entity\n"); 5325 } 5326 SKIP(2); 5327 5328 #ifdef LIBXML_CATALOG_ENABLED 5329 if (((state == XML_PARSER_MISC) || 5330 (state == XML_PARSER_START)) && 5331 (xmlStrEqual(target, XML_CATALOG_PI))) { 5332 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5333 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5334 (allow == XML_CATA_ALLOW_ALL)) 5335 xmlParseCatalogPI(ctxt, buf); 5336 } 5337 #endif 5338 5339 5340 /* 5341 * SAX: PI detected. 5342 */ 5343 if ((ctxt->sax) && (!ctxt->disableSAX) && 5344 (ctxt->sax->processingInstruction != NULL)) 5345 ctxt->sax->processingInstruction(ctxt->userData, 5346 target, buf); 5347 } 5348 xmlFree(buf); 5349 } else { 5350 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5351 } 5352 if (ctxt->instate != XML_PARSER_EOF) 5353 ctxt->instate = state; 5354 } 5355 } 5356 5357 /** 5358 * xmlParseNotationDecl: 5359 * @ctxt: an XML parser context 5360 * 5361 * parse a notation declaration 5362 * 5363 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5364 * 5365 * Hence there is actually 3 choices: 5366 * 'PUBLIC' S PubidLiteral 5367 * 'PUBLIC' S PubidLiteral S SystemLiteral 5368 * and 'SYSTEM' S SystemLiteral 5369 * 5370 * See the NOTE on xmlParseExternalID(). 5371 */ 5372 5373 void 5374 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5375 const xmlChar *name; 5376 xmlChar *Pubid; 5377 xmlChar *Systemid; 5378 5379 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5380 xmlParserInputPtr input = ctxt->input; 5381 SHRINK; 5382 SKIP(10); 5383 if (!IS_BLANK_CH(CUR)) { 5384 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5385 "Space required after '<!NOTATION'\n"); 5386 return; 5387 } 5388 SKIP_BLANKS; 5389 5390 name = xmlParseName(ctxt); 5391 if (name == NULL) { 5392 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5393 return; 5394 } 5395 if (!IS_BLANK_CH(CUR)) { 5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5397 "Space required after the NOTATION name'\n"); 5398 return; 5399 } 5400 if (xmlStrchr(name, ':') != NULL) { 5401 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5402 "colons are forbidden from notation names '%s'\n", 5403 name, NULL, NULL); 5404 } 5405 SKIP_BLANKS; 5406 5407 /* 5408 * Parse the IDs. 5409 */ 5410 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5411 SKIP_BLANKS; 5412 5413 if (RAW == '>') { 5414 if (input != ctxt->input) { 5415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5416 "Notation declaration doesn't start and stop in the same entity\n"); 5417 } 5418 NEXT; 5419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5420 (ctxt->sax->notationDecl != NULL)) 5421 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5422 } else { 5423 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5424 } 5425 if (Systemid != NULL) xmlFree(Systemid); 5426 if (Pubid != NULL) xmlFree(Pubid); 5427 } 5428 } 5429 5430 /** 5431 * xmlParseEntityDecl: 5432 * @ctxt: an XML parser context 5433 * 5434 * parse <!ENTITY declarations 5435 * 5436 * [70] EntityDecl ::= GEDecl | PEDecl 5437 * 5438 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5439 * 5440 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5441 * 5442 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5443 * 5444 * [74] PEDef ::= EntityValue | ExternalID 5445 * 5446 * [76] NDataDecl ::= S 'NDATA' S Name 5447 * 5448 * [ VC: Notation Declared ] 5449 * The Name must match the declared name of a notation. 5450 */ 5451 5452 void 5453 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5454 const xmlChar *name = NULL; 5455 xmlChar *value = NULL; 5456 xmlChar *URI = NULL, *literal = NULL; 5457 const xmlChar *ndata = NULL; 5458 int isParameter = 0; 5459 xmlChar *orig = NULL; 5460 int skipped; 5461 5462 /* GROW; done in the caller */ 5463 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5464 xmlParserInputPtr input = ctxt->input; 5465 SHRINK; 5466 SKIP(8); 5467 skipped = SKIP_BLANKS; 5468 if (skipped == 0) { 5469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5470 "Space required after '<!ENTITY'\n"); 5471 } 5472 5473 if (RAW == '%') { 5474 NEXT; 5475 skipped = SKIP_BLANKS; 5476 if (skipped == 0) { 5477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5478 "Space required after '%'\n"); 5479 } 5480 isParameter = 1; 5481 } 5482 5483 name = xmlParseName(ctxt); 5484 if (name == NULL) { 5485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5486 "xmlParseEntityDecl: no name\n"); 5487 return; 5488 } 5489 if (xmlStrchr(name, ':') != NULL) { 5490 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5491 "colons are forbidden from entities names '%s'\n", 5492 name, NULL, NULL); 5493 } 5494 skipped = SKIP_BLANKS; 5495 if (skipped == 0) { 5496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5497 "Space required after the entity name\n"); 5498 } 5499 5500 ctxt->instate = XML_PARSER_ENTITY_DECL; 5501 /* 5502 * handle the various case of definitions... 5503 */ 5504 if (isParameter) { 5505 if ((RAW == '"') || (RAW == '\'')) { 5506 value = xmlParseEntityValue(ctxt, &orig); 5507 if (value) { 5508 if ((ctxt->sax != NULL) && 5509 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5510 ctxt->sax->entityDecl(ctxt->userData, name, 5511 XML_INTERNAL_PARAMETER_ENTITY, 5512 NULL, NULL, value); 5513 } 5514 } else { 5515 URI = xmlParseExternalID(ctxt, &literal, 1); 5516 if ((URI == NULL) && (literal == NULL)) { 5517 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5518 } 5519 if (URI) { 5520 xmlURIPtr uri; 5521 5522 uri = xmlParseURI((const char *) URI); 5523 if (uri == NULL) { 5524 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5525 "Invalid URI: %s\n", URI); 5526 /* 5527 * This really ought to be a well formedness error 5528 * but the XML Core WG decided otherwise c.f. issue 5529 * E26 of the XML erratas. 5530 */ 5531 } else { 5532 if (uri->fragment != NULL) { 5533 /* 5534 * Okay this is foolish to block those but not 5535 * invalid URIs. 5536 */ 5537 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5538 } else { 5539 if ((ctxt->sax != NULL) && 5540 (!ctxt->disableSAX) && 5541 (ctxt->sax->entityDecl != NULL)) 5542 ctxt->sax->entityDecl(ctxt->userData, name, 5543 XML_EXTERNAL_PARAMETER_ENTITY, 5544 literal, URI, NULL); 5545 } 5546 xmlFreeURI(uri); 5547 } 5548 } 5549 } 5550 } else { 5551 if ((RAW == '"') || (RAW == '\'')) { 5552 value = xmlParseEntityValue(ctxt, &orig); 5553 if ((ctxt->sax != NULL) && 5554 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5555 ctxt->sax->entityDecl(ctxt->userData, name, 5556 XML_INTERNAL_GENERAL_ENTITY, 5557 NULL, NULL, value); 5558 /* 5559 * For expat compatibility in SAX mode. 5560 */ 5561 if ((ctxt->myDoc == NULL) || 5562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5563 if (ctxt->myDoc == NULL) { 5564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5565 if (ctxt->myDoc == NULL) { 5566 xmlErrMemory(ctxt, "New Doc failed"); 5567 return; 5568 } 5569 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5570 } 5571 if (ctxt->myDoc->intSubset == NULL) 5572 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5573 BAD_CAST "fake", NULL, NULL); 5574 5575 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5576 NULL, NULL, value); 5577 } 5578 } else { 5579 URI = xmlParseExternalID(ctxt, &literal, 1); 5580 if ((URI == NULL) && (literal == NULL)) { 5581 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5582 } 5583 if (URI) { 5584 xmlURIPtr uri; 5585 5586 uri = xmlParseURI((const char *)URI); 5587 if (uri == NULL) { 5588 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5589 "Invalid URI: %s\n", URI); 5590 /* 5591 * This really ought to be a well formedness error 5592 * but the XML Core WG decided otherwise c.f. issue 5593 * E26 of the XML erratas. 5594 */ 5595 } else { 5596 if (uri->fragment != NULL) { 5597 /* 5598 * Okay this is foolish to block those but not 5599 * invalid URIs. 5600 */ 5601 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5602 } 5603 xmlFreeURI(uri); 5604 } 5605 } 5606 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5608 "Space required before 'NDATA'\n"); 5609 } 5610 SKIP_BLANKS; 5611 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5612 SKIP(5); 5613 if (!IS_BLANK_CH(CUR)) { 5614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5615 "Space required after 'NDATA'\n"); 5616 } 5617 SKIP_BLANKS; 5618 ndata = xmlParseName(ctxt); 5619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5620 (ctxt->sax->unparsedEntityDecl != NULL)) 5621 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5622 literal, URI, ndata); 5623 } else { 5624 if ((ctxt->sax != NULL) && 5625 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5626 ctxt->sax->entityDecl(ctxt->userData, name, 5627 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5628 literal, URI, NULL); 5629 /* 5630 * For expat compatibility in SAX mode. 5631 * assuming the entity repalcement was asked for 5632 */ 5633 if ((ctxt->replaceEntities != 0) && 5634 ((ctxt->myDoc == NULL) || 5635 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5636 if (ctxt->myDoc == NULL) { 5637 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5638 if (ctxt->myDoc == NULL) { 5639 xmlErrMemory(ctxt, "New Doc failed"); 5640 return; 5641 } 5642 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5643 } 5644 5645 if (ctxt->myDoc->intSubset == NULL) 5646 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5647 BAD_CAST "fake", NULL, NULL); 5648 xmlSAX2EntityDecl(ctxt, name, 5649 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5650 literal, URI, NULL); 5651 } 5652 } 5653 } 5654 } 5655 if (ctxt->instate == XML_PARSER_EOF) 5656 return; 5657 SKIP_BLANKS; 5658 if (RAW != '>') { 5659 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5660 "xmlParseEntityDecl: entity %s not terminated\n", name); 5661 } else { 5662 if (input != ctxt->input) { 5663 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5664 "Entity declaration doesn't start and stop in the same entity\n"); 5665 } 5666 NEXT; 5667 } 5668 if (orig != NULL) { 5669 /* 5670 * Ugly mechanism to save the raw entity value. 5671 */ 5672 xmlEntityPtr cur = NULL; 5673 5674 if (isParameter) { 5675 if ((ctxt->sax != NULL) && 5676 (ctxt->sax->getParameterEntity != NULL)) 5677 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5678 } else { 5679 if ((ctxt->sax != NULL) && 5680 (ctxt->sax->getEntity != NULL)) 5681 cur = ctxt->sax->getEntity(ctxt->userData, name); 5682 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5683 cur = xmlSAX2GetEntity(ctxt, name); 5684 } 5685 } 5686 if (cur != NULL) { 5687 if (cur->orig != NULL) 5688 xmlFree(orig); 5689 else 5690 cur->orig = orig; 5691 } else 5692 xmlFree(orig); 5693 } 5694 if (value != NULL) xmlFree(value); 5695 if (URI != NULL) xmlFree(URI); 5696 if (literal != NULL) xmlFree(literal); 5697 } 5698 } 5699 5700 /** 5701 * xmlParseDefaultDecl: 5702 * @ctxt: an XML parser context 5703 * @value: Receive a possible fixed default value for the attribute 5704 * 5705 * Parse an attribute default declaration 5706 * 5707 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5708 * 5709 * [ VC: Required Attribute ] 5710 * if the default declaration is the keyword #REQUIRED, then the 5711 * attribute must be specified for all elements of the type in the 5712 * attribute-list declaration. 5713 * 5714 * [ VC: Attribute Default Legal ] 5715 * The declared default value must meet the lexical constraints of 5716 * the declared attribute type c.f. xmlValidateAttributeDecl() 5717 * 5718 * [ VC: Fixed Attribute Default ] 5719 * if an attribute has a default value declared with the #FIXED 5720 * keyword, instances of that attribute must match the default value. 5721 * 5722 * [ WFC: No < in Attribute Values ] 5723 * handled in xmlParseAttValue() 5724 * 5725 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5726 * or XML_ATTRIBUTE_FIXED. 5727 */ 5728 5729 int 5730 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5731 int val; 5732 xmlChar *ret; 5733 5734 *value = NULL; 5735 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5736 SKIP(9); 5737 return(XML_ATTRIBUTE_REQUIRED); 5738 } 5739 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5740 SKIP(8); 5741 return(XML_ATTRIBUTE_IMPLIED); 5742 } 5743 val = XML_ATTRIBUTE_NONE; 5744 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5745 SKIP(6); 5746 val = XML_ATTRIBUTE_FIXED; 5747 if (!IS_BLANK_CH(CUR)) { 5748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5749 "Space required after '#FIXED'\n"); 5750 } 5751 SKIP_BLANKS; 5752 } 5753 ret = xmlParseAttValue(ctxt); 5754 ctxt->instate = XML_PARSER_DTD; 5755 if (ret == NULL) { 5756 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5757 "Attribute default value declaration error\n"); 5758 } else 5759 *value = ret; 5760 return(val); 5761 } 5762 5763 /** 5764 * xmlParseNotationType: 5765 * @ctxt: an XML parser context 5766 * 5767 * parse an Notation attribute type. 5768 * 5769 * Note: the leading 'NOTATION' S part has already being parsed... 5770 * 5771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5772 * 5773 * [ VC: Notation Attributes ] 5774 * Values of this type must match one of the notation names included 5775 * in the declaration; all notation names in the declaration must be declared. 5776 * 5777 * Returns: the notation attribute tree built while parsing 5778 */ 5779 5780 xmlEnumerationPtr 5781 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5782 const xmlChar *name; 5783 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5784 5785 if (RAW != '(') { 5786 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5787 return(NULL); 5788 } 5789 SHRINK; 5790 do { 5791 NEXT; 5792 SKIP_BLANKS; 5793 name = xmlParseName(ctxt); 5794 if (name == NULL) { 5795 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5796 "Name expected in NOTATION declaration\n"); 5797 xmlFreeEnumeration(ret); 5798 return(NULL); 5799 } 5800 tmp = ret; 5801 while (tmp != NULL) { 5802 if (xmlStrEqual(name, tmp->name)) { 5803 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5804 "standalone: attribute notation value token %s duplicated\n", 5805 name, NULL); 5806 if (!xmlDictOwns(ctxt->dict, name)) 5807 xmlFree((xmlChar *) name); 5808 break; 5809 } 5810 tmp = tmp->next; 5811 } 5812 if (tmp == NULL) { 5813 cur = xmlCreateEnumeration(name); 5814 if (cur == NULL) { 5815 xmlFreeEnumeration(ret); 5816 return(NULL); 5817 } 5818 if (last == NULL) ret = last = cur; 5819 else { 5820 last->next = cur; 5821 last = cur; 5822 } 5823 } 5824 SKIP_BLANKS; 5825 } while (RAW == '|'); 5826 if (RAW != ')') { 5827 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5828 xmlFreeEnumeration(ret); 5829 return(NULL); 5830 } 5831 NEXT; 5832 return(ret); 5833 } 5834 5835 /** 5836 * xmlParseEnumerationType: 5837 * @ctxt: an XML parser context 5838 * 5839 * parse an Enumeration attribute type. 5840 * 5841 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5842 * 5843 * [ VC: Enumeration ] 5844 * Values of this type must match one of the Nmtoken tokens in 5845 * the declaration 5846 * 5847 * Returns: the enumeration attribute tree built while parsing 5848 */ 5849 5850 xmlEnumerationPtr 5851 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5852 xmlChar *name; 5853 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5854 5855 if (RAW != '(') { 5856 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5857 return(NULL); 5858 } 5859 SHRINK; 5860 do { 5861 NEXT; 5862 SKIP_BLANKS; 5863 name = xmlParseNmtoken(ctxt); 5864 if (name == NULL) { 5865 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5866 return(ret); 5867 } 5868 tmp = ret; 5869 while (tmp != NULL) { 5870 if (xmlStrEqual(name, tmp->name)) { 5871 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5872 "standalone: attribute enumeration value token %s duplicated\n", 5873 name, NULL); 5874 if (!xmlDictOwns(ctxt->dict, name)) 5875 xmlFree(name); 5876 break; 5877 } 5878 tmp = tmp->next; 5879 } 5880 if (tmp == NULL) { 5881 cur = xmlCreateEnumeration(name); 5882 if (!xmlDictOwns(ctxt->dict, name)) 5883 xmlFree(name); 5884 if (cur == NULL) { 5885 xmlFreeEnumeration(ret); 5886 return(NULL); 5887 } 5888 if (last == NULL) ret = last = cur; 5889 else { 5890 last->next = cur; 5891 last = cur; 5892 } 5893 } 5894 SKIP_BLANKS; 5895 } while (RAW == '|'); 5896 if (RAW != ')') { 5897 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5898 return(ret); 5899 } 5900 NEXT; 5901 return(ret); 5902 } 5903 5904 /** 5905 * xmlParseEnumeratedType: 5906 * @ctxt: an XML parser context 5907 * @tree: the enumeration tree built while parsing 5908 * 5909 * parse an Enumerated attribute type. 5910 * 5911 * [57] EnumeratedType ::= NotationType | Enumeration 5912 * 5913 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5914 * 5915 * 5916 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5917 */ 5918 5919 int 5920 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5921 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5922 SKIP(8); 5923 if (!IS_BLANK_CH(CUR)) { 5924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5925 "Space required after 'NOTATION'\n"); 5926 return(0); 5927 } 5928 SKIP_BLANKS; 5929 *tree = xmlParseNotationType(ctxt); 5930 if (*tree == NULL) return(0); 5931 return(XML_ATTRIBUTE_NOTATION); 5932 } 5933 *tree = xmlParseEnumerationType(ctxt); 5934 if (*tree == NULL) return(0); 5935 return(XML_ATTRIBUTE_ENUMERATION); 5936 } 5937 5938 /** 5939 * xmlParseAttributeType: 5940 * @ctxt: an XML parser context 5941 * @tree: the enumeration tree built while parsing 5942 * 5943 * parse the Attribute list def for an element 5944 * 5945 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5946 * 5947 * [55] StringType ::= 'CDATA' 5948 * 5949 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5950 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5951 * 5952 * Validity constraints for attribute values syntax are checked in 5953 * xmlValidateAttributeValue() 5954 * 5955 * [ VC: ID ] 5956 * Values of type ID must match the Name production. A name must not 5957 * appear more than once in an XML document as a value of this type; 5958 * i.e., ID values must uniquely identify the elements which bear them. 5959 * 5960 * [ VC: One ID per Element Type ] 5961 * No element type may have more than one ID attribute specified. 5962 * 5963 * [ VC: ID Attribute Default ] 5964 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5965 * 5966 * [ VC: IDREF ] 5967 * Values of type IDREF must match the Name production, and values 5968 * of type IDREFS must match Names; each IDREF Name must match the value 5969 * of an ID attribute on some element in the XML document; i.e. IDREF 5970 * values must match the value of some ID attribute. 5971 * 5972 * [ VC: Entity Name ] 5973 * Values of type ENTITY must match the Name production, values 5974 * of type ENTITIES must match Names; each Entity Name must match the 5975 * name of an unparsed entity declared in the DTD. 5976 * 5977 * [ VC: Name Token ] 5978 * Values of type NMTOKEN must match the Nmtoken production; values 5979 * of type NMTOKENS must match Nmtokens. 5980 * 5981 * Returns the attribute type 5982 */ 5983 int 5984 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5985 SHRINK; 5986 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5987 SKIP(5); 5988 return(XML_ATTRIBUTE_CDATA); 5989 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5990 SKIP(6); 5991 return(XML_ATTRIBUTE_IDREFS); 5992 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5993 SKIP(5); 5994 return(XML_ATTRIBUTE_IDREF); 5995 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5996 SKIP(2); 5997 return(XML_ATTRIBUTE_ID); 5998 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5999 SKIP(6); 6000 return(XML_ATTRIBUTE_ENTITY); 6001 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6002 SKIP(8); 6003 return(XML_ATTRIBUTE_ENTITIES); 6004 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6005 SKIP(8); 6006 return(XML_ATTRIBUTE_NMTOKENS); 6007 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6008 SKIP(7); 6009 return(XML_ATTRIBUTE_NMTOKEN); 6010 } 6011 return(xmlParseEnumeratedType(ctxt, tree)); 6012 } 6013 6014 /** 6015 * xmlParseAttributeListDecl: 6016 * @ctxt: an XML parser context 6017 * 6018 * : parse the Attribute list def for an element 6019 * 6020 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6021 * 6022 * [53] AttDef ::= S Name S AttType S DefaultDecl 6023 * 6024 */ 6025 void 6026 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6027 const xmlChar *elemName; 6028 const xmlChar *attrName; 6029 xmlEnumerationPtr tree; 6030 6031 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6032 xmlParserInputPtr input = ctxt->input; 6033 6034 SKIP(9); 6035 if (!IS_BLANK_CH(CUR)) { 6036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6037 "Space required after '<!ATTLIST'\n"); 6038 } 6039 SKIP_BLANKS; 6040 elemName = xmlParseName(ctxt); 6041 if (elemName == NULL) { 6042 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6043 "ATTLIST: no name for Element\n"); 6044 return; 6045 } 6046 SKIP_BLANKS; 6047 GROW; 6048 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6049 const xmlChar *check = CUR_PTR; 6050 int type; 6051 int def; 6052 xmlChar *defaultValue = NULL; 6053 6054 GROW; 6055 tree = NULL; 6056 attrName = xmlParseName(ctxt); 6057 if (attrName == NULL) { 6058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6059 "ATTLIST: no name for Attribute\n"); 6060 break; 6061 } 6062 GROW; 6063 if (!IS_BLANK_CH(CUR)) { 6064 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6065 "Space required after the attribute name\n"); 6066 break; 6067 } 6068 SKIP_BLANKS; 6069 6070 type = xmlParseAttributeType(ctxt, &tree); 6071 if (type <= 0) { 6072 break; 6073 } 6074 6075 GROW; 6076 if (!IS_BLANK_CH(CUR)) { 6077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6078 "Space required after the attribute type\n"); 6079 if (tree != NULL) 6080 xmlFreeEnumeration(tree); 6081 break; 6082 } 6083 SKIP_BLANKS; 6084 6085 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6086 if (def <= 0) { 6087 if (defaultValue != NULL) 6088 xmlFree(defaultValue); 6089 if (tree != NULL) 6090 xmlFreeEnumeration(tree); 6091 break; 6092 } 6093 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6094 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6095 6096 GROW; 6097 if (RAW != '>') { 6098 if (!IS_BLANK_CH(CUR)) { 6099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6100 "Space required after the attribute default value\n"); 6101 if (defaultValue != NULL) 6102 xmlFree(defaultValue); 6103 if (tree != NULL) 6104 xmlFreeEnumeration(tree); 6105 break; 6106 } 6107 SKIP_BLANKS; 6108 } 6109 if (check == CUR_PTR) { 6110 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6111 "in xmlParseAttributeListDecl\n"); 6112 if (defaultValue != NULL) 6113 xmlFree(defaultValue); 6114 if (tree != NULL) 6115 xmlFreeEnumeration(tree); 6116 break; 6117 } 6118 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6119 (ctxt->sax->attributeDecl != NULL)) 6120 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6121 type, def, defaultValue, tree); 6122 else if (tree != NULL) 6123 xmlFreeEnumeration(tree); 6124 6125 if ((ctxt->sax2) && (defaultValue != NULL) && 6126 (def != XML_ATTRIBUTE_IMPLIED) && 6127 (def != XML_ATTRIBUTE_REQUIRED)) { 6128 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6129 } 6130 if (ctxt->sax2) { 6131 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6132 } 6133 if (defaultValue != NULL) 6134 xmlFree(defaultValue); 6135 GROW; 6136 } 6137 if (RAW == '>') { 6138 if (input != ctxt->input) { 6139 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6140 "Attribute list declaration doesn't start and stop in the same entity\n", 6141 NULL, NULL); 6142 } 6143 NEXT; 6144 } 6145 } 6146 } 6147 6148 /** 6149 * xmlParseElementMixedContentDecl: 6150 * @ctxt: an XML parser context 6151 * @inputchk: the input used for the current entity, needed for boundary checks 6152 * 6153 * parse the declaration for a Mixed Element content 6154 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6155 * 6156 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6157 * '(' S? '#PCDATA' S? ')' 6158 * 6159 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6160 * 6161 * [ VC: No Duplicate Types ] 6162 * The same name must not appear more than once in a single 6163 * mixed-content declaration. 6164 * 6165 * returns: the list of the xmlElementContentPtr describing the element choices 6166 */ 6167 xmlElementContentPtr 6168 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6169 xmlElementContentPtr ret = NULL, cur = NULL, n; 6170 const xmlChar *elem = NULL; 6171 6172 GROW; 6173 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6174 SKIP(7); 6175 SKIP_BLANKS; 6176 SHRINK; 6177 if (RAW == ')') { 6178 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6179 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6180 "Element content declaration doesn't start and stop in the same entity\n", 6181 NULL, NULL); 6182 } 6183 NEXT; 6184 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6185 if (ret == NULL) 6186 return(NULL); 6187 if (RAW == '*') { 6188 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6189 NEXT; 6190 } 6191 return(ret); 6192 } 6193 if ((RAW == '(') || (RAW == '|')) { 6194 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6195 if (ret == NULL) return(NULL); 6196 } 6197 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6198 NEXT; 6199 if (elem == NULL) { 6200 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6201 if (ret == NULL) return(NULL); 6202 ret->c1 = cur; 6203 if (cur != NULL) 6204 cur->parent = ret; 6205 cur = ret; 6206 } else { 6207 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6208 if (n == NULL) return(NULL); 6209 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6210 if (n->c1 != NULL) 6211 n->c1->parent = n; 6212 cur->c2 = n; 6213 if (n != NULL) 6214 n->parent = cur; 6215 cur = n; 6216 } 6217 SKIP_BLANKS; 6218 elem = xmlParseName(ctxt); 6219 if (elem == NULL) { 6220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6221 "xmlParseElementMixedContentDecl : Name expected\n"); 6222 xmlFreeDocElementContent(ctxt->myDoc, cur); 6223 return(NULL); 6224 } 6225 SKIP_BLANKS; 6226 GROW; 6227 } 6228 if ((RAW == ')') && (NXT(1) == '*')) { 6229 if (elem != NULL) { 6230 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6231 XML_ELEMENT_CONTENT_ELEMENT); 6232 if (cur->c2 != NULL) 6233 cur->c2->parent = cur; 6234 } 6235 if (ret != NULL) 6236 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6237 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6238 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6239 "Element content declaration doesn't start and stop in the same entity\n", 6240 NULL, NULL); 6241 } 6242 SKIP(2); 6243 } else { 6244 xmlFreeDocElementContent(ctxt->myDoc, ret); 6245 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6246 return(NULL); 6247 } 6248 6249 } else { 6250 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6251 } 6252 return(ret); 6253 } 6254 6255 /** 6256 * xmlParseElementChildrenContentDeclPriv: 6257 * @ctxt: an XML parser context 6258 * @inputchk: the input used for the current entity, needed for boundary checks 6259 * @depth: the level of recursion 6260 * 6261 * parse the declaration for a Mixed Element content 6262 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6263 * 6264 * 6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6266 * 6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6268 * 6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6270 * 6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6272 * 6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6274 * TODO Parameter-entity replacement text must be properly nested 6275 * with parenthesized groups. That is to say, if either of the 6276 * opening or closing parentheses in a choice, seq, or Mixed 6277 * construct is contained in the replacement text for a parameter 6278 * entity, both must be contained in the same replacement text. For 6279 * interoperability, if a parameter-entity reference appears in a 6280 * choice, seq, or Mixed construct, its replacement text should not 6281 * be empty, and neither the first nor last non-blank character of 6282 * the replacement text should be a connector (| or ,). 6283 * 6284 * Returns the tree of xmlElementContentPtr describing the element 6285 * hierarchy. 6286 */ 6287 static xmlElementContentPtr 6288 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6289 int depth) { 6290 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6291 const xmlChar *elem; 6292 xmlChar type = 0; 6293 6294 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6295 (depth > 2048)) { 6296 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6297 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6298 depth); 6299 return(NULL); 6300 } 6301 SKIP_BLANKS; 6302 GROW; 6303 if (RAW == '(') { 6304 int inputid = ctxt->input->id; 6305 6306 /* Recurse on first child */ 6307 NEXT; 6308 SKIP_BLANKS; 6309 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6310 depth + 1); 6311 SKIP_BLANKS; 6312 GROW; 6313 } else { 6314 elem = xmlParseName(ctxt); 6315 if (elem == NULL) { 6316 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6317 return(NULL); 6318 } 6319 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6320 if (cur == NULL) { 6321 xmlErrMemory(ctxt, NULL); 6322 return(NULL); 6323 } 6324 GROW; 6325 if (RAW == '?') { 6326 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6327 NEXT; 6328 } else if (RAW == '*') { 6329 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6330 NEXT; 6331 } else if (RAW == '+') { 6332 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6333 NEXT; 6334 } else { 6335 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6336 } 6337 GROW; 6338 } 6339 SKIP_BLANKS; 6340 SHRINK; 6341 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6342 /* 6343 * Each loop we parse one separator and one element. 6344 */ 6345 if (RAW == ',') { 6346 if (type == 0) type = CUR; 6347 6348 /* 6349 * Detect "Name | Name , Name" error 6350 */ 6351 else if (type != CUR) { 6352 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6353 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6354 type); 6355 if ((last != NULL) && (last != ret)) 6356 xmlFreeDocElementContent(ctxt->myDoc, last); 6357 if (ret != NULL) 6358 xmlFreeDocElementContent(ctxt->myDoc, ret); 6359 return(NULL); 6360 } 6361 NEXT; 6362 6363 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6364 if (op == NULL) { 6365 if ((last != NULL) && (last != ret)) 6366 xmlFreeDocElementContent(ctxt->myDoc, last); 6367 xmlFreeDocElementContent(ctxt->myDoc, ret); 6368 return(NULL); 6369 } 6370 if (last == NULL) { 6371 op->c1 = ret; 6372 if (ret != NULL) 6373 ret->parent = op; 6374 ret = cur = op; 6375 } else { 6376 cur->c2 = op; 6377 if (op != NULL) 6378 op->parent = cur; 6379 op->c1 = last; 6380 if (last != NULL) 6381 last->parent = op; 6382 cur =op; 6383 last = NULL; 6384 } 6385 } else if (RAW == '|') { 6386 if (type == 0) type = CUR; 6387 6388 /* 6389 * Detect "Name , Name | Name" error 6390 */ 6391 else if (type != CUR) { 6392 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6393 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6394 type); 6395 if ((last != NULL) && (last != ret)) 6396 xmlFreeDocElementContent(ctxt->myDoc, last); 6397 if (ret != NULL) 6398 xmlFreeDocElementContent(ctxt->myDoc, ret); 6399 return(NULL); 6400 } 6401 NEXT; 6402 6403 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6404 if (op == NULL) { 6405 if ((last != NULL) && (last != ret)) 6406 xmlFreeDocElementContent(ctxt->myDoc, last); 6407 if (ret != NULL) 6408 xmlFreeDocElementContent(ctxt->myDoc, ret); 6409 return(NULL); 6410 } 6411 if (last == NULL) { 6412 op->c1 = ret; 6413 if (ret != NULL) 6414 ret->parent = op; 6415 ret = cur = op; 6416 } else { 6417 cur->c2 = op; 6418 if (op != NULL) 6419 op->parent = cur; 6420 op->c1 = last; 6421 if (last != NULL) 6422 last->parent = op; 6423 cur =op; 6424 last = NULL; 6425 } 6426 } else { 6427 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6428 if ((last != NULL) && (last != ret)) 6429 xmlFreeDocElementContent(ctxt->myDoc, last); 6430 if (ret != NULL) 6431 xmlFreeDocElementContent(ctxt->myDoc, ret); 6432 return(NULL); 6433 } 6434 GROW; 6435 SKIP_BLANKS; 6436 GROW; 6437 if (RAW == '(') { 6438 int inputid = ctxt->input->id; 6439 /* Recurse on second child */ 6440 NEXT; 6441 SKIP_BLANKS; 6442 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6443 depth + 1); 6444 SKIP_BLANKS; 6445 } else { 6446 elem = xmlParseName(ctxt); 6447 if (elem == NULL) { 6448 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6449 if (ret != NULL) 6450 xmlFreeDocElementContent(ctxt->myDoc, ret); 6451 return(NULL); 6452 } 6453 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6454 if (last == NULL) { 6455 if (ret != NULL) 6456 xmlFreeDocElementContent(ctxt->myDoc, ret); 6457 return(NULL); 6458 } 6459 if (RAW == '?') { 6460 last->ocur = XML_ELEMENT_CONTENT_OPT; 6461 NEXT; 6462 } else if (RAW == '*') { 6463 last->ocur = XML_ELEMENT_CONTENT_MULT; 6464 NEXT; 6465 } else if (RAW == '+') { 6466 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6467 NEXT; 6468 } else { 6469 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6470 } 6471 } 6472 SKIP_BLANKS; 6473 GROW; 6474 } 6475 if ((cur != NULL) && (last != NULL)) { 6476 cur->c2 = last; 6477 if (last != NULL) 6478 last->parent = cur; 6479 } 6480 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6481 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6482 "Element content declaration doesn't start and stop in the same entity\n", 6483 NULL, NULL); 6484 } 6485 NEXT; 6486 if (RAW == '?') { 6487 if (ret != NULL) { 6488 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6489 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6490 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6491 else 6492 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6493 } 6494 NEXT; 6495 } else if (RAW == '*') { 6496 if (ret != NULL) { 6497 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6498 cur = ret; 6499 /* 6500 * Some normalization: 6501 * (a | b* | c?)* == (a | b | c)* 6502 */ 6503 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6504 if ((cur->c1 != NULL) && 6505 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6506 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6507 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6508 if ((cur->c2 != NULL) && 6509 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6510 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6511 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6512 cur = cur->c2; 6513 } 6514 } 6515 NEXT; 6516 } else if (RAW == '+') { 6517 if (ret != NULL) { 6518 int found = 0; 6519 6520 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6521 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6522 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6523 else 6524 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6525 /* 6526 * Some normalization: 6527 * (a | b*)+ == (a | b)* 6528 * (a | b?)+ == (a | b)* 6529 */ 6530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6531 if ((cur->c1 != NULL) && 6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6535 found = 1; 6536 } 6537 if ((cur->c2 != NULL) && 6538 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6539 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6540 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6541 found = 1; 6542 } 6543 cur = cur->c2; 6544 } 6545 if (found) 6546 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6547 } 6548 NEXT; 6549 } 6550 return(ret); 6551 } 6552 6553 /** 6554 * xmlParseElementChildrenContentDecl: 6555 * @ctxt: an XML parser context 6556 * @inputchk: the input used for the current entity, needed for boundary checks 6557 * 6558 * parse the declaration for a Mixed Element content 6559 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6560 * 6561 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6562 * 6563 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6564 * 6565 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6566 * 6567 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6568 * 6569 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6570 * TODO Parameter-entity replacement text must be properly nested 6571 * with parenthesized groups. That is to say, if either of the 6572 * opening or closing parentheses in a choice, seq, or Mixed 6573 * construct is contained in the replacement text for a parameter 6574 * entity, both must be contained in the same replacement text. For 6575 * interoperability, if a parameter-entity reference appears in a 6576 * choice, seq, or Mixed construct, its replacement text should not 6577 * be empty, and neither the first nor last non-blank character of 6578 * the replacement text should be a connector (| or ,). 6579 * 6580 * Returns the tree of xmlElementContentPtr describing the element 6581 * hierarchy. 6582 */ 6583 xmlElementContentPtr 6584 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6585 /* stub left for API/ABI compat */ 6586 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6587 } 6588 6589 /** 6590 * xmlParseElementContentDecl: 6591 * @ctxt: an XML parser context 6592 * @name: the name of the element being defined. 6593 * @result: the Element Content pointer will be stored here if any 6594 * 6595 * parse the declaration for an Element content either Mixed or Children, 6596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6597 * 6598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6599 * 6600 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6601 */ 6602 6603 int 6604 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6605 xmlElementContentPtr *result) { 6606 6607 xmlElementContentPtr tree = NULL; 6608 int inputid = ctxt->input->id; 6609 int res; 6610 6611 *result = NULL; 6612 6613 if (RAW != '(') { 6614 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6615 "xmlParseElementContentDecl : %s '(' expected\n", name); 6616 return(-1); 6617 } 6618 NEXT; 6619 GROW; 6620 if (ctxt->instate == XML_PARSER_EOF) 6621 return(-1); 6622 SKIP_BLANKS; 6623 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6624 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6625 res = XML_ELEMENT_TYPE_MIXED; 6626 } else { 6627 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6628 res = XML_ELEMENT_TYPE_ELEMENT; 6629 } 6630 SKIP_BLANKS; 6631 *result = tree; 6632 return(res); 6633 } 6634 6635 /** 6636 * xmlParseElementDecl: 6637 * @ctxt: an XML parser context 6638 * 6639 * parse an Element declaration. 6640 * 6641 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6642 * 6643 * [ VC: Unique Element Type Declaration ] 6644 * No element type may be declared more than once 6645 * 6646 * Returns the type of the element, or -1 in case of error 6647 */ 6648 int 6649 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6650 const xmlChar *name; 6651 int ret = -1; 6652 xmlElementContentPtr content = NULL; 6653 6654 /* GROW; done in the caller */ 6655 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6656 xmlParserInputPtr input = ctxt->input; 6657 6658 SKIP(9); 6659 if (!IS_BLANK_CH(CUR)) { 6660 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6661 "Space required after 'ELEMENT'\n"); 6662 } 6663 SKIP_BLANKS; 6664 name = xmlParseName(ctxt); 6665 if (name == NULL) { 6666 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6667 "xmlParseElementDecl: no name for Element\n"); 6668 return(-1); 6669 } 6670 while ((RAW == 0) && (ctxt->inputNr > 1)) 6671 xmlPopInput(ctxt); 6672 if (!IS_BLANK_CH(CUR)) { 6673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6674 "Space required after the element name\n"); 6675 } 6676 SKIP_BLANKS; 6677 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6678 SKIP(5); 6679 /* 6680 * Element must always be empty. 6681 */ 6682 ret = XML_ELEMENT_TYPE_EMPTY; 6683 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6684 (NXT(2) == 'Y')) { 6685 SKIP(3); 6686 /* 6687 * Element is a generic container. 6688 */ 6689 ret = XML_ELEMENT_TYPE_ANY; 6690 } else if (RAW == '(') { 6691 ret = xmlParseElementContentDecl(ctxt, name, &content); 6692 } else { 6693 /* 6694 * [ WFC: PEs in Internal Subset ] error handling. 6695 */ 6696 if ((RAW == '%') && (ctxt->external == 0) && 6697 (ctxt->inputNr == 1)) { 6698 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6699 "PEReference: forbidden within markup decl in internal subset\n"); 6700 } else { 6701 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6702 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6703 } 6704 return(-1); 6705 } 6706 6707 SKIP_BLANKS; 6708 /* 6709 * Pop-up of finished entities. 6710 */ 6711 while ((RAW == 0) && (ctxt->inputNr > 1)) 6712 xmlPopInput(ctxt); 6713 SKIP_BLANKS; 6714 6715 if (RAW != '>') { 6716 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6717 if (content != NULL) { 6718 xmlFreeDocElementContent(ctxt->myDoc, content); 6719 } 6720 } else { 6721 if (input != ctxt->input) { 6722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6723 "Element declaration doesn't start and stop in the same entity\n"); 6724 } 6725 6726 NEXT; 6727 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6728 (ctxt->sax->elementDecl != NULL)) { 6729 if (content != NULL) 6730 content->parent = NULL; 6731 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6732 content); 6733 if ((content != NULL) && (content->parent == NULL)) { 6734 /* 6735 * this is a trick: if xmlAddElementDecl is called, 6736 * instead of copying the full tree it is plugged directly 6737 * if called from the parser. Avoid duplicating the 6738 * interfaces or change the API/ABI 6739 */ 6740 xmlFreeDocElementContent(ctxt->myDoc, content); 6741 } 6742 } else if (content != NULL) { 6743 xmlFreeDocElementContent(ctxt->myDoc, content); 6744 } 6745 } 6746 } 6747 return(ret); 6748 } 6749 6750 /** 6751 * xmlParseConditionalSections 6752 * @ctxt: an XML parser context 6753 * 6754 * [61] conditionalSect ::= includeSect | ignoreSect 6755 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6756 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6757 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6758 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6759 */ 6760 6761 static void 6762 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6763 int id = ctxt->input->id; 6764 6765 SKIP(3); 6766 SKIP_BLANKS; 6767 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6768 SKIP(7); 6769 SKIP_BLANKS; 6770 if (RAW != '[') { 6771 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6772 } else { 6773 if (ctxt->input->id != id) { 6774 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6775 "All markup of the conditional section is not in the same entity\n", 6776 NULL, NULL); 6777 } 6778 NEXT; 6779 } 6780 if (xmlParserDebugEntities) { 6781 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6782 xmlGenericError(xmlGenericErrorContext, 6783 "%s(%d): ", ctxt->input->filename, 6784 ctxt->input->line); 6785 xmlGenericError(xmlGenericErrorContext, 6786 "Entering INCLUDE Conditional Section\n"); 6787 } 6788 6789 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6790 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6791 const xmlChar *check = CUR_PTR; 6792 unsigned int cons = ctxt->input->consumed; 6793 6794 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6795 xmlParseConditionalSections(ctxt); 6796 } else if (IS_BLANK_CH(CUR)) { 6797 NEXT; 6798 } else if (RAW == '%') { 6799 xmlParsePEReference(ctxt); 6800 } else 6801 xmlParseMarkupDecl(ctxt); 6802 6803 /* 6804 * Pop-up of finished entities. 6805 */ 6806 while ((RAW == 0) && (ctxt->inputNr > 1)) 6807 xmlPopInput(ctxt); 6808 6809 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6810 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6811 break; 6812 } 6813 } 6814 if (xmlParserDebugEntities) { 6815 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6816 xmlGenericError(xmlGenericErrorContext, 6817 "%s(%d): ", ctxt->input->filename, 6818 ctxt->input->line); 6819 xmlGenericError(xmlGenericErrorContext, 6820 "Leaving INCLUDE Conditional Section\n"); 6821 } 6822 6823 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6824 int state; 6825 xmlParserInputState instate; 6826 int depth = 0; 6827 6828 SKIP(6); 6829 SKIP_BLANKS; 6830 if (RAW != '[') { 6831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6832 } else { 6833 if (ctxt->input->id != id) { 6834 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6835 "All markup of the conditional section is not in the same entity\n", 6836 NULL, NULL); 6837 } 6838 NEXT; 6839 } 6840 if (xmlParserDebugEntities) { 6841 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6842 xmlGenericError(xmlGenericErrorContext, 6843 "%s(%d): ", ctxt->input->filename, 6844 ctxt->input->line); 6845 xmlGenericError(xmlGenericErrorContext, 6846 "Entering IGNORE Conditional Section\n"); 6847 } 6848 6849 /* 6850 * Parse up to the end of the conditional section 6851 * But disable SAX event generating DTD building in the meantime 6852 */ 6853 state = ctxt->disableSAX; 6854 instate = ctxt->instate; 6855 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6856 ctxt->instate = XML_PARSER_IGNORE; 6857 6858 while (((depth >= 0) && (RAW != 0)) && 6859 (ctxt->instate != XML_PARSER_EOF)) { 6860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6861 depth++; 6862 SKIP(3); 6863 continue; 6864 } 6865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6866 if (--depth >= 0) SKIP(3); 6867 continue; 6868 } 6869 NEXT; 6870 continue; 6871 } 6872 6873 ctxt->disableSAX = state; 6874 ctxt->instate = instate; 6875 6876 if (xmlParserDebugEntities) { 6877 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6878 xmlGenericError(xmlGenericErrorContext, 6879 "%s(%d): ", ctxt->input->filename, 6880 ctxt->input->line); 6881 xmlGenericError(xmlGenericErrorContext, 6882 "Leaving IGNORE Conditional Section\n"); 6883 } 6884 6885 } else { 6886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6887 } 6888 6889 if (RAW == 0) 6890 SHRINK; 6891 6892 if (RAW == 0) { 6893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6894 } else { 6895 if (ctxt->input->id != id) { 6896 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6897 "All markup of the conditional section is not in the same entity\n", 6898 NULL, NULL); 6899 } 6900 SKIP(3); 6901 } 6902 } 6903 6904 /** 6905 * xmlParseMarkupDecl: 6906 * @ctxt: an XML parser context 6907 * 6908 * parse Markup declarations 6909 * 6910 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6911 * NotationDecl | PI | Comment 6912 * 6913 * [ VC: Proper Declaration/PE Nesting ] 6914 * Parameter-entity replacement text must be properly nested with 6915 * markup declarations. That is to say, if either the first character 6916 * or the last character of a markup declaration (markupdecl above) is 6917 * contained in the replacement text for a parameter-entity reference, 6918 * both must be contained in the same replacement text. 6919 * 6920 * [ WFC: PEs in Internal Subset ] 6921 * In the internal DTD subset, parameter-entity references can occur 6922 * only where markup declarations can occur, not within markup declarations. 6923 * (This does not apply to references that occur in external parameter 6924 * entities or to the external subset.) 6925 */ 6926 void 6927 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6928 GROW; 6929 if (CUR == '<') { 6930 if (NXT(1) == '!') { 6931 switch (NXT(2)) { 6932 case 'E': 6933 if (NXT(3) == 'L') 6934 xmlParseElementDecl(ctxt); 6935 else if (NXT(3) == 'N') 6936 xmlParseEntityDecl(ctxt); 6937 break; 6938 case 'A': 6939 xmlParseAttributeListDecl(ctxt); 6940 break; 6941 case 'N': 6942 xmlParseNotationDecl(ctxt); 6943 break; 6944 case '-': 6945 xmlParseComment(ctxt); 6946 break; 6947 default: 6948 /* there is an error but it will be detected later */ 6949 break; 6950 } 6951 } else if (NXT(1) == '?') { 6952 xmlParsePI(ctxt); 6953 } 6954 } 6955 /* 6956 * This is only for internal subset. On external entities, 6957 * the replacement is done before parsing stage 6958 */ 6959 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6960 xmlParsePEReference(ctxt); 6961 6962 /* 6963 * Conditional sections are allowed from entities included 6964 * by PE References in the internal subset. 6965 */ 6966 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6967 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6968 xmlParseConditionalSections(ctxt); 6969 } 6970 } 6971 6972 ctxt->instate = XML_PARSER_DTD; 6973 } 6974 6975 /** 6976 * xmlParseTextDecl: 6977 * @ctxt: an XML parser context 6978 * 6979 * parse an XML declaration header for external entities 6980 * 6981 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6982 */ 6983 6984 void 6985 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6986 xmlChar *version; 6987 const xmlChar *encoding; 6988 6989 /* 6990 * We know that '<?xml' is here. 6991 */ 6992 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6993 SKIP(5); 6994 } else { 6995 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6996 return; 6997 } 6998 6999 if (!IS_BLANK_CH(CUR)) { 7000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7001 "Space needed after '<?xml'\n"); 7002 } 7003 SKIP_BLANKS; 7004 7005 /* 7006 * We may have the VersionInfo here. 7007 */ 7008 version = xmlParseVersionInfo(ctxt); 7009 if (version == NULL) 7010 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7011 else { 7012 if (!IS_BLANK_CH(CUR)) { 7013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7014 "Space needed here\n"); 7015 } 7016 } 7017 ctxt->input->version = version; 7018 7019 /* 7020 * We must have the encoding declaration 7021 */ 7022 encoding = xmlParseEncodingDecl(ctxt); 7023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7024 /* 7025 * The XML REC instructs us to stop parsing right here 7026 */ 7027 return; 7028 } 7029 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 7030 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 7031 "Missing encoding in text declaration\n"); 7032 } 7033 7034 SKIP_BLANKS; 7035 if ((RAW == '?') && (NXT(1) == '>')) { 7036 SKIP(2); 7037 } else if (RAW == '>') { 7038 /* Deprecated old WD ... */ 7039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7040 NEXT; 7041 } else { 7042 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7043 MOVETO_ENDTAG(CUR_PTR); 7044 NEXT; 7045 } 7046 } 7047 7048 /** 7049 * xmlParseExternalSubset: 7050 * @ctxt: an XML parser context 7051 * @ExternalID: the external identifier 7052 * @SystemID: the system identifier (or URL) 7053 * 7054 * parse Markup declarations from an external subset 7055 * 7056 * [30] extSubset ::= textDecl? extSubsetDecl 7057 * 7058 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7059 */ 7060 void 7061 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7062 const xmlChar *SystemID) { 7063 xmlDetectSAX2(ctxt); 7064 GROW; 7065 7066 if ((ctxt->encoding == NULL) && 7067 (ctxt->input->end - ctxt->input->cur >= 4)) { 7068 xmlChar start[4]; 7069 xmlCharEncoding enc; 7070 7071 start[0] = RAW; 7072 start[1] = NXT(1); 7073 start[2] = NXT(2); 7074 start[3] = NXT(3); 7075 enc = xmlDetectCharEncoding(start, 4); 7076 if (enc != XML_CHAR_ENCODING_NONE) 7077 xmlSwitchEncoding(ctxt, enc); 7078 } 7079 7080 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7081 xmlParseTextDecl(ctxt); 7082 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7083 /* 7084 * The XML REC instructs us to stop parsing right here 7085 */ 7086 ctxt->instate = XML_PARSER_EOF; 7087 return; 7088 } 7089 } 7090 if (ctxt->myDoc == NULL) { 7091 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7092 if (ctxt->myDoc == NULL) { 7093 xmlErrMemory(ctxt, "New Doc failed"); 7094 return; 7095 } 7096 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7097 } 7098 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7099 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7100 7101 ctxt->instate = XML_PARSER_DTD; 7102 ctxt->external = 1; 7103 while (((RAW == '<') && (NXT(1) == '?')) || 7104 ((RAW == '<') && (NXT(1) == '!')) || 7105 (RAW == '%') || IS_BLANK_CH(CUR)) { 7106 const xmlChar *check = CUR_PTR; 7107 unsigned int cons = ctxt->input->consumed; 7108 7109 GROW; 7110 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7111 xmlParseConditionalSections(ctxt); 7112 } else if (IS_BLANK_CH(CUR)) { 7113 NEXT; 7114 } else if (RAW == '%') { 7115 xmlParsePEReference(ctxt); 7116 } else 7117 xmlParseMarkupDecl(ctxt); 7118 7119 /* 7120 * Pop-up of finished entities. 7121 */ 7122 while ((RAW == 0) && (ctxt->inputNr > 1)) 7123 xmlPopInput(ctxt); 7124 7125 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7126 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7127 break; 7128 } 7129 } 7130 7131 if (RAW != 0) { 7132 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7133 } 7134 7135 } 7136 7137 /** 7138 * xmlParseReference: 7139 * @ctxt: an XML parser context 7140 * 7141 * parse and handle entity references in content, depending on the SAX 7142 * interface, this may end-up in a call to character() if this is a 7143 * CharRef, a predefined entity, if there is no reference() callback. 7144 * or if the parser was asked to switch to that mode. 7145 * 7146 * [67] Reference ::= EntityRef | CharRef 7147 */ 7148 void 7149 xmlParseReference(xmlParserCtxtPtr ctxt) { 7150 xmlEntityPtr ent; 7151 xmlChar *val; 7152 int was_checked; 7153 xmlNodePtr list = NULL; 7154 xmlParserErrors ret = XML_ERR_OK; 7155 7156 7157 if (RAW != '&') 7158 return; 7159 7160 /* 7161 * Simple case of a CharRef 7162 */ 7163 if (NXT(1) == '#') { 7164 int i = 0; 7165 xmlChar out[10]; 7166 int hex = NXT(2); 7167 int value = xmlParseCharRef(ctxt); 7168 7169 if (value == 0) 7170 return; 7171 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7172 /* 7173 * So we are using non-UTF-8 buffers 7174 * Check that the char fit on 8bits, if not 7175 * generate a CharRef. 7176 */ 7177 if (value <= 0xFF) { 7178 out[0] = value; 7179 out[1] = 0; 7180 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7181 (!ctxt->disableSAX)) 7182 ctxt->sax->characters(ctxt->userData, out, 1); 7183 } else { 7184 if ((hex == 'x') || (hex == 'X')) 7185 snprintf((char *)out, sizeof(out), "#x%X", value); 7186 else 7187 snprintf((char *)out, sizeof(out), "#%d", value); 7188 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7189 (!ctxt->disableSAX)) 7190 ctxt->sax->reference(ctxt->userData, out); 7191 } 7192 } else { 7193 /* 7194 * Just encode the value in UTF-8 7195 */ 7196 COPY_BUF(0 ,out, i, value); 7197 out[i] = 0; 7198 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7199 (!ctxt->disableSAX)) 7200 ctxt->sax->characters(ctxt->userData, out, i); 7201 } 7202 return; 7203 } 7204 7205 /* 7206 * We are seeing an entity reference 7207 */ 7208 ent = xmlParseEntityRef(ctxt); 7209 if (ent == NULL) return; 7210 if (!ctxt->wellFormed) 7211 return; 7212 was_checked = ent->checked; 7213 7214 /* special case of predefined entities */ 7215 if ((ent->name == NULL) || 7216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7217 val = ent->content; 7218 if (val == NULL) return; 7219 /* 7220 * inline the entity. 7221 */ 7222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7223 (!ctxt->disableSAX)) 7224 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7225 return; 7226 } 7227 7228 /* 7229 * The first reference to the entity trigger a parsing phase 7230 * where the ent->children is filled with the result from 7231 * the parsing. 7232 * Note: external parsed entities will not be loaded, it is not 7233 * required for a non-validating parser, unless the parsing option 7234 * of validating, or substituting entities were given. Doing so is 7235 * far more secure as the parser will only process data coming from 7236 * the document entity by default. 7237 */ 7238 if (((ent->checked == 0) || 7239 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7240 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7241 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7242 unsigned long oldnbent = ctxt->nbentities; 7243 7244 /* 7245 * This is a bit hackish but this seems the best 7246 * way to make sure both SAX and DOM entity support 7247 * behaves okay. 7248 */ 7249 void *user_data; 7250 if (ctxt->userData == ctxt) 7251 user_data = NULL; 7252 else 7253 user_data = ctxt->userData; 7254 7255 /* 7256 * Check that this entity is well formed 7257 * 4.3.2: An internal general parsed entity is well-formed 7258 * if its replacement text matches the production labeled 7259 * content. 7260 */ 7261 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7262 ctxt->depth++; 7263 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7264 user_data, &list); 7265 ctxt->depth--; 7266 7267 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7268 ctxt->depth++; 7269 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7270 user_data, ctxt->depth, ent->URI, 7271 ent->ExternalID, &list); 7272 ctxt->depth--; 7273 } else { 7274 ret = XML_ERR_ENTITY_PE_INTERNAL; 7275 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7276 "invalid entity type found\n", NULL); 7277 } 7278 7279 /* 7280 * Store the number of entities needing parsing for this entity 7281 * content and do checkings 7282 */ 7283 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7284 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7285 ent->checked |= 1; 7286 if (ret == XML_ERR_ENTITY_LOOP) { 7287 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7288 xmlFreeNodeList(list); 7289 return; 7290 } 7291 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7292 xmlFreeNodeList(list); 7293 return; 7294 } 7295 7296 if ((ret == XML_ERR_OK) && (list != NULL)) { 7297 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7298 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7299 (ent->children == NULL)) { 7300 ent->children = list; 7301 if (ctxt->replaceEntities) { 7302 /* 7303 * Prune it directly in the generated document 7304 * except for single text nodes. 7305 */ 7306 if (((list->type == XML_TEXT_NODE) && 7307 (list->next == NULL)) || 7308 (ctxt->parseMode == XML_PARSE_READER)) { 7309 list->parent = (xmlNodePtr) ent; 7310 list = NULL; 7311 ent->owner = 1; 7312 } else { 7313 ent->owner = 0; 7314 while (list != NULL) { 7315 list->parent = (xmlNodePtr) ctxt->node; 7316 list->doc = ctxt->myDoc; 7317 if (list->next == NULL) 7318 ent->last = list; 7319 list = list->next; 7320 } 7321 list = ent->children; 7322 #ifdef LIBXML_LEGACY_ENABLED 7323 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7324 xmlAddEntityReference(ent, list, NULL); 7325 #endif /* LIBXML_LEGACY_ENABLED */ 7326 } 7327 } else { 7328 ent->owner = 1; 7329 while (list != NULL) { 7330 list->parent = (xmlNodePtr) ent; 7331 xmlSetTreeDoc(list, ent->doc); 7332 if (list->next == NULL) 7333 ent->last = list; 7334 list = list->next; 7335 } 7336 } 7337 } else { 7338 xmlFreeNodeList(list); 7339 list = NULL; 7340 } 7341 } else if ((ret != XML_ERR_OK) && 7342 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7343 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7344 "Entity '%s' failed to parse\n", ent->name); 7345 xmlParserEntityCheck(ctxt, 0, ent, 0); 7346 } else if (list != NULL) { 7347 xmlFreeNodeList(list); 7348 list = NULL; 7349 } 7350 if (ent->checked == 0) 7351 ent->checked = 2; 7352 } else if (ent->checked != 1) { 7353 ctxt->nbentities += ent->checked / 2; 7354 } 7355 7356 /* 7357 * Now that the entity content has been gathered 7358 * provide it to the application, this can take different forms based 7359 * on the parsing modes. 7360 */ 7361 if (ent->children == NULL) { 7362 /* 7363 * Probably running in SAX mode and the callbacks don't 7364 * build the entity content. So unless we already went 7365 * though parsing for first checking go though the entity 7366 * content to generate callbacks associated to the entity 7367 */ 7368 if (was_checked != 0) { 7369 void *user_data; 7370 /* 7371 * This is a bit hackish but this seems the best 7372 * way to make sure both SAX and DOM entity support 7373 * behaves okay. 7374 */ 7375 if (ctxt->userData == ctxt) 7376 user_data = NULL; 7377 else 7378 user_data = ctxt->userData; 7379 7380 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7381 ctxt->depth++; 7382 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7383 ent->content, user_data, NULL); 7384 ctxt->depth--; 7385 } else if (ent->etype == 7386 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7387 ctxt->depth++; 7388 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7389 ctxt->sax, user_data, ctxt->depth, 7390 ent->URI, ent->ExternalID, NULL); 7391 ctxt->depth--; 7392 } else { 7393 ret = XML_ERR_ENTITY_PE_INTERNAL; 7394 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7395 "invalid entity type found\n", NULL); 7396 } 7397 if (ret == XML_ERR_ENTITY_LOOP) { 7398 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7399 return; 7400 } 7401 } 7402 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7403 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7404 /* 7405 * Entity reference callback comes second, it's somewhat 7406 * superfluous but a compatibility to historical behaviour 7407 */ 7408 ctxt->sax->reference(ctxt->userData, ent->name); 7409 } 7410 return; 7411 } 7412 7413 /* 7414 * If we didn't get any children for the entity being built 7415 */ 7416 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7417 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7418 /* 7419 * Create a node. 7420 */ 7421 ctxt->sax->reference(ctxt->userData, ent->name); 7422 return; 7423 } 7424 7425 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7426 /* 7427 * There is a problem on the handling of _private for entities 7428 * (bug 155816): Should we copy the content of the field from 7429 * the entity (possibly overwriting some value set by the user 7430 * when a copy is created), should we leave it alone, or should 7431 * we try to take care of different situations? The problem 7432 * is exacerbated by the usage of this field by the xmlReader. 7433 * To fix this bug, we look at _private on the created node 7434 * and, if it's NULL, we copy in whatever was in the entity. 7435 * If it's not NULL we leave it alone. This is somewhat of a 7436 * hack - maybe we should have further tests to determine 7437 * what to do. 7438 */ 7439 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7440 /* 7441 * Seems we are generating the DOM content, do 7442 * a simple tree copy for all references except the first 7443 * In the first occurrence list contains the replacement. 7444 */ 7445 if (((list == NULL) && (ent->owner == 0)) || 7446 (ctxt->parseMode == XML_PARSE_READER)) { 7447 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7448 7449 /* 7450 * We are copying here, make sure there is no abuse 7451 */ 7452 ctxt->sizeentcopy += ent->length + 5; 7453 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7454 return; 7455 7456 /* 7457 * when operating on a reader, the entities definitions 7458 * are always owning the entities subtree. 7459 if (ctxt->parseMode == XML_PARSE_READER) 7460 ent->owner = 1; 7461 */ 7462 7463 cur = ent->children; 7464 while (cur != NULL) { 7465 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7466 if (nw != NULL) { 7467 if (nw->_private == NULL) 7468 nw->_private = cur->_private; 7469 if (firstChild == NULL){ 7470 firstChild = nw; 7471 } 7472 nw = xmlAddChild(ctxt->node, nw); 7473 } 7474 if (cur == ent->last) { 7475 /* 7476 * needed to detect some strange empty 7477 * node cases in the reader tests 7478 */ 7479 if ((ctxt->parseMode == XML_PARSE_READER) && 7480 (nw != NULL) && 7481 (nw->type == XML_ELEMENT_NODE) && 7482 (nw->children == NULL)) 7483 nw->extra = 1; 7484 7485 break; 7486 } 7487 cur = cur->next; 7488 } 7489 #ifdef LIBXML_LEGACY_ENABLED 7490 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7491 xmlAddEntityReference(ent, firstChild, nw); 7492 #endif /* LIBXML_LEGACY_ENABLED */ 7493 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7494 xmlNodePtr nw = NULL, cur, next, last, 7495 firstChild = NULL; 7496 7497 /* 7498 * We are copying here, make sure there is no abuse 7499 */ 7500 ctxt->sizeentcopy += ent->length + 5; 7501 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7502 return; 7503 7504 /* 7505 * Copy the entity child list and make it the new 7506 * entity child list. The goal is to make sure any 7507 * ID or REF referenced will be the one from the 7508 * document content and not the entity copy. 7509 */ 7510 cur = ent->children; 7511 ent->children = NULL; 7512 last = ent->last; 7513 ent->last = NULL; 7514 while (cur != NULL) { 7515 next = cur->next; 7516 cur->next = NULL; 7517 cur->parent = NULL; 7518 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7519 if (nw != NULL) { 7520 if (nw->_private == NULL) 7521 nw->_private = cur->_private; 7522 if (firstChild == NULL){ 7523 firstChild = cur; 7524 } 7525 xmlAddChild((xmlNodePtr) ent, nw); 7526 xmlAddChild(ctxt->node, cur); 7527 } 7528 if (cur == last) 7529 break; 7530 cur = next; 7531 } 7532 if (ent->owner == 0) 7533 ent->owner = 1; 7534 #ifdef LIBXML_LEGACY_ENABLED 7535 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7536 xmlAddEntityReference(ent, firstChild, nw); 7537 #endif /* LIBXML_LEGACY_ENABLED */ 7538 } else { 7539 const xmlChar *nbktext; 7540 7541 /* 7542 * the name change is to avoid coalescing of the 7543 * node with a possible previous text one which 7544 * would make ent->children a dangling pointer 7545 */ 7546 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7547 -1); 7548 if (ent->children->type == XML_TEXT_NODE) 7549 ent->children->name = nbktext; 7550 if ((ent->last != ent->children) && 7551 (ent->last->type == XML_TEXT_NODE)) 7552 ent->last->name = nbktext; 7553 xmlAddChildList(ctxt->node, ent->children); 7554 } 7555 7556 /* 7557 * This is to avoid a nasty side effect, see 7558 * characters() in SAX.c 7559 */ 7560 ctxt->nodemem = 0; 7561 ctxt->nodelen = 0; 7562 return; 7563 } 7564 } 7565 } 7566 7567 /** 7568 * xmlParseEntityRef: 7569 * @ctxt: an XML parser context 7570 * 7571 * parse ENTITY references declarations 7572 * 7573 * [68] EntityRef ::= '&' Name ';' 7574 * 7575 * [ WFC: Entity Declared ] 7576 * In a document without any DTD, a document with only an internal DTD 7577 * subset which contains no parameter entity references, or a document 7578 * with "standalone='yes'", the Name given in the entity reference 7579 * must match that in an entity declaration, except that well-formed 7580 * documents need not declare any of the following entities: amp, lt, 7581 * gt, apos, quot. The declaration of a parameter entity must precede 7582 * any reference to it. Similarly, the declaration of a general entity 7583 * must precede any reference to it which appears in a default value in an 7584 * attribute-list declaration. Note that if entities are declared in the 7585 * external subset or in external parameter entities, a non-validating 7586 * processor is not obligated to read and process their declarations; 7587 * for such documents, the rule that an entity must be declared is a 7588 * well-formedness constraint only if standalone='yes'. 7589 * 7590 * [ WFC: Parsed Entity ] 7591 * An entity reference must not contain the name of an unparsed entity 7592 * 7593 * Returns the xmlEntityPtr if found, or NULL otherwise. 7594 */ 7595 xmlEntityPtr 7596 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7597 const xmlChar *name; 7598 xmlEntityPtr ent = NULL; 7599 7600 GROW; 7601 if (ctxt->instate == XML_PARSER_EOF) 7602 return(NULL); 7603 7604 if (RAW != '&') 7605 return(NULL); 7606 NEXT; 7607 name = xmlParseName(ctxt); 7608 if (name == NULL) { 7609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7610 "xmlParseEntityRef: no name\n"); 7611 return(NULL); 7612 } 7613 if (RAW != ';') { 7614 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7615 return(NULL); 7616 } 7617 NEXT; 7618 7619 /* 7620 * Predefined entities override any extra definition 7621 */ 7622 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7623 ent = xmlGetPredefinedEntity(name); 7624 if (ent != NULL) 7625 return(ent); 7626 } 7627 7628 /* 7629 * Increase the number of entity references parsed 7630 */ 7631 ctxt->nbentities++; 7632 7633 /* 7634 * Ask first SAX for entity resolution, otherwise try the 7635 * entities which may have stored in the parser context. 7636 */ 7637 if (ctxt->sax != NULL) { 7638 if (ctxt->sax->getEntity != NULL) 7639 ent = ctxt->sax->getEntity(ctxt->userData, name); 7640 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7641 (ctxt->options & XML_PARSE_OLDSAX)) 7642 ent = xmlGetPredefinedEntity(name); 7643 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7644 (ctxt->userData==ctxt)) { 7645 ent = xmlSAX2GetEntity(ctxt, name); 7646 } 7647 } 7648 if (ctxt->instate == XML_PARSER_EOF) 7649 return(NULL); 7650 /* 7651 * [ WFC: Entity Declared ] 7652 * In a document without any DTD, a document with only an 7653 * internal DTD subset which contains no parameter entity 7654 * references, or a document with "standalone='yes'", the 7655 * Name given in the entity reference must match that in an 7656 * entity declaration, except that well-formed documents 7657 * need not declare any of the following entities: amp, lt, 7658 * gt, apos, quot. 7659 * The declaration of a parameter entity must precede any 7660 * reference to it. 7661 * Similarly, the declaration of a general entity must 7662 * precede any reference to it which appears in a default 7663 * value in an attribute-list declaration. Note that if 7664 * entities are declared in the external subset or in 7665 * external parameter entities, a non-validating processor 7666 * is not obligated to read and process their declarations; 7667 * for such documents, the rule that an entity must be 7668 * declared is a well-formedness constraint only if 7669 * standalone='yes'. 7670 */ 7671 if (ent == NULL) { 7672 if ((ctxt->standalone == 1) || 7673 ((ctxt->hasExternalSubset == 0) && 7674 (ctxt->hasPErefs == 0))) { 7675 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7676 "Entity '%s' not defined\n", name); 7677 } else { 7678 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7679 "Entity '%s' not defined\n", name); 7680 if ((ctxt->inSubset == 0) && 7681 (ctxt->sax != NULL) && 7682 (ctxt->sax->reference != NULL)) { 7683 ctxt->sax->reference(ctxt->userData, name); 7684 } 7685 } 7686 xmlParserEntityCheck(ctxt, 0, ent, 0); 7687 ctxt->valid = 0; 7688 } 7689 7690 /* 7691 * [ WFC: Parsed Entity ] 7692 * An entity reference must not contain the name of an 7693 * unparsed entity 7694 */ 7695 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7696 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7697 "Entity reference to unparsed entity %s\n", name); 7698 } 7699 7700 /* 7701 * [ WFC: No External Entity References ] 7702 * Attribute values cannot contain direct or indirect 7703 * entity references to external entities. 7704 */ 7705 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7706 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7707 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7708 "Attribute references external entity '%s'\n", name); 7709 } 7710 /* 7711 * [ WFC: No < in Attribute Values ] 7712 * The replacement text of any entity referred to directly or 7713 * indirectly in an attribute value (other than "<") must 7714 * not contain a <. 7715 */ 7716 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7717 (ent != NULL) && 7718 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7719 if (((ent->checked & 1) || (ent->checked == 0)) && 7720 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7721 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7722 "'<' in entity '%s' is not allowed in attributes values\n", name); 7723 } 7724 } 7725 7726 /* 7727 * Internal check, no parameter entities here ... 7728 */ 7729 else { 7730 switch (ent->etype) { 7731 case XML_INTERNAL_PARAMETER_ENTITY: 7732 case XML_EXTERNAL_PARAMETER_ENTITY: 7733 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7734 "Attempt to reference the parameter entity '%s'\n", 7735 name); 7736 break; 7737 default: 7738 break; 7739 } 7740 } 7741 7742 /* 7743 * [ WFC: No Recursion ] 7744 * A parsed entity must not contain a recursive reference 7745 * to itself, either directly or indirectly. 7746 * Done somewhere else 7747 */ 7748 return(ent); 7749 } 7750 7751 /** 7752 * xmlParseStringEntityRef: 7753 * @ctxt: an XML parser context 7754 * @str: a pointer to an index in the string 7755 * 7756 * parse ENTITY references declarations, but this version parses it from 7757 * a string value. 7758 * 7759 * [68] EntityRef ::= '&' Name ';' 7760 * 7761 * [ WFC: Entity Declared ] 7762 * In a document without any DTD, a document with only an internal DTD 7763 * subset which contains no parameter entity references, or a document 7764 * with "standalone='yes'", the Name given in the entity reference 7765 * must match that in an entity declaration, except that well-formed 7766 * documents need not declare any of the following entities: amp, lt, 7767 * gt, apos, quot. The declaration of a parameter entity must precede 7768 * any reference to it. Similarly, the declaration of a general entity 7769 * must precede any reference to it which appears in a default value in an 7770 * attribute-list declaration. Note that if entities are declared in the 7771 * external subset or in external parameter entities, a non-validating 7772 * processor is not obligated to read and process their declarations; 7773 * for such documents, the rule that an entity must be declared is a 7774 * well-formedness constraint only if standalone='yes'. 7775 * 7776 * [ WFC: Parsed Entity ] 7777 * An entity reference must not contain the name of an unparsed entity 7778 * 7779 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7780 * is updated to the current location in the string. 7781 */ 7782 static xmlEntityPtr 7783 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7784 xmlChar *name; 7785 const xmlChar *ptr; 7786 xmlChar cur; 7787 xmlEntityPtr ent = NULL; 7788 7789 if ((str == NULL) || (*str == NULL)) 7790 return(NULL); 7791 ptr = *str; 7792 cur = *ptr; 7793 if (cur != '&') 7794 return(NULL); 7795 7796 ptr++; 7797 name = xmlParseStringName(ctxt, &ptr); 7798 if (name == NULL) { 7799 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7800 "xmlParseStringEntityRef: no name\n"); 7801 *str = ptr; 7802 return(NULL); 7803 } 7804 if (*ptr != ';') { 7805 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7806 xmlFree(name); 7807 *str = ptr; 7808 return(NULL); 7809 } 7810 ptr++; 7811 7812 7813 /* 7814 * Predefined entities override any extra definition 7815 */ 7816 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7817 ent = xmlGetPredefinedEntity(name); 7818 if (ent != NULL) { 7819 xmlFree(name); 7820 *str = ptr; 7821 return(ent); 7822 } 7823 } 7824 7825 /* 7826 * Increate the number of entity references parsed 7827 */ 7828 ctxt->nbentities++; 7829 7830 /* 7831 * Ask first SAX for entity resolution, otherwise try the 7832 * entities which may have stored in the parser context. 7833 */ 7834 if (ctxt->sax != NULL) { 7835 if (ctxt->sax->getEntity != NULL) 7836 ent = ctxt->sax->getEntity(ctxt->userData, name); 7837 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7838 ent = xmlGetPredefinedEntity(name); 7839 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7840 ent = xmlSAX2GetEntity(ctxt, name); 7841 } 7842 } 7843 if (ctxt->instate == XML_PARSER_EOF) { 7844 xmlFree(name); 7845 return(NULL); 7846 } 7847 7848 /* 7849 * [ WFC: Entity Declared ] 7850 * In a document without any DTD, a document with only an 7851 * internal DTD subset which contains no parameter entity 7852 * references, or a document with "standalone='yes'", the 7853 * Name given in the entity reference must match that in an 7854 * entity declaration, except that well-formed documents 7855 * need not declare any of the following entities: amp, lt, 7856 * gt, apos, quot. 7857 * The declaration of a parameter entity must precede any 7858 * reference to it. 7859 * Similarly, the declaration of a general entity must 7860 * precede any reference to it which appears in a default 7861 * value in an attribute-list declaration. Note that if 7862 * entities are declared in the external subset or in 7863 * external parameter entities, a non-validating processor 7864 * is not obligated to read and process their declarations; 7865 * for such documents, the rule that an entity must be 7866 * declared is a well-formedness constraint only if 7867 * standalone='yes'. 7868 */ 7869 if (ent == NULL) { 7870 if ((ctxt->standalone == 1) || 7871 ((ctxt->hasExternalSubset == 0) && 7872 (ctxt->hasPErefs == 0))) { 7873 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7874 "Entity '%s' not defined\n", name); 7875 } else { 7876 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7877 "Entity '%s' not defined\n", 7878 name); 7879 } 7880 xmlParserEntityCheck(ctxt, 0, ent, 0); 7881 /* TODO ? check regressions ctxt->valid = 0; */ 7882 } 7883 7884 /* 7885 * [ WFC: Parsed Entity ] 7886 * An entity reference must not contain the name of an 7887 * unparsed entity 7888 */ 7889 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7890 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7891 "Entity reference to unparsed entity %s\n", name); 7892 } 7893 7894 /* 7895 * [ WFC: No External Entity References ] 7896 * Attribute values cannot contain direct or indirect 7897 * entity references to external entities. 7898 */ 7899 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7900 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7901 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7902 "Attribute references external entity '%s'\n", name); 7903 } 7904 /* 7905 * [ WFC: No < in Attribute Values ] 7906 * The replacement text of any entity referred to directly or 7907 * indirectly in an attribute value (other than "<") must 7908 * not contain a <. 7909 */ 7910 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7911 (ent != NULL) && (ent->content != NULL) && 7912 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7913 (xmlStrchr(ent->content, '<'))) { 7914 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7915 "'<' in entity '%s' is not allowed in attributes values\n", 7916 name); 7917 } 7918 7919 /* 7920 * Internal check, no parameter entities here ... 7921 */ 7922 else { 7923 switch (ent->etype) { 7924 case XML_INTERNAL_PARAMETER_ENTITY: 7925 case XML_EXTERNAL_PARAMETER_ENTITY: 7926 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7927 "Attempt to reference the parameter entity '%s'\n", 7928 name); 7929 break; 7930 default: 7931 break; 7932 } 7933 } 7934 7935 /* 7936 * [ WFC: No Recursion ] 7937 * A parsed entity must not contain a recursive reference 7938 * to itself, either directly or indirectly. 7939 * Done somewhere else 7940 */ 7941 7942 xmlFree(name); 7943 *str = ptr; 7944 return(ent); 7945 } 7946 7947 /** 7948 * xmlParsePEReference: 7949 * @ctxt: an XML parser context 7950 * 7951 * parse PEReference declarations 7952 * The entity content is handled directly by pushing it's content as 7953 * a new input stream. 7954 * 7955 * [69] PEReference ::= '%' Name ';' 7956 * 7957 * [ WFC: No Recursion ] 7958 * A parsed entity must not contain a recursive 7959 * reference to itself, either directly or indirectly. 7960 * 7961 * [ WFC: Entity Declared ] 7962 * In a document without any DTD, a document with only an internal DTD 7963 * subset which contains no parameter entity references, or a document 7964 * with "standalone='yes'", ... ... The declaration of a parameter 7965 * entity must precede any reference to it... 7966 * 7967 * [ VC: Entity Declared ] 7968 * In a document with an external subset or external parameter entities 7969 * with "standalone='no'", ... ... The declaration of a parameter entity 7970 * must precede any reference to it... 7971 * 7972 * [ WFC: In DTD ] 7973 * Parameter-entity references may only appear in the DTD. 7974 * NOTE: misleading but this is handled. 7975 */ 7976 void 7977 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7978 { 7979 const xmlChar *name; 7980 xmlEntityPtr entity = NULL; 7981 xmlParserInputPtr input; 7982 7983 if (RAW != '%') 7984 return; 7985 NEXT; 7986 name = xmlParseName(ctxt); 7987 if (name == NULL) { 7988 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7989 "xmlParsePEReference: no name\n"); 7990 return; 7991 } 7992 if (RAW != ';') { 7993 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7994 return; 7995 } 7996 7997 NEXT; 7998 7999 /* 8000 * Increate the number of entity references parsed 8001 */ 8002 ctxt->nbentities++; 8003 8004 /* 8005 * Request the entity from SAX 8006 */ 8007 if ((ctxt->sax != NULL) && 8008 (ctxt->sax->getParameterEntity != NULL)) 8009 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8010 if (ctxt->instate == XML_PARSER_EOF) 8011 return; 8012 if (entity == NULL) { 8013 /* 8014 * [ WFC: Entity Declared ] 8015 * In a document without any DTD, a document with only an 8016 * internal DTD subset which contains no parameter entity 8017 * references, or a document with "standalone='yes'", ... 8018 * ... The declaration of a parameter entity must precede 8019 * any reference to it... 8020 */ 8021 if ((ctxt->standalone == 1) || 8022 ((ctxt->hasExternalSubset == 0) && 8023 (ctxt->hasPErefs == 0))) { 8024 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8025 "PEReference: %%%s; not found\n", 8026 name); 8027 } else { 8028 /* 8029 * [ VC: Entity Declared ] 8030 * In a document with an external subset or external 8031 * parameter entities with "standalone='no'", ... 8032 * ... The declaration of a parameter entity must 8033 * precede any reference to it... 8034 */ 8035 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8036 "PEReference: %%%s; not found\n", 8037 name, NULL); 8038 ctxt->valid = 0; 8039 } 8040 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8041 } else { 8042 /* 8043 * Internal checking in case the entity quest barfed 8044 */ 8045 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8046 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8047 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8048 "Internal: %%%s; is not a parameter entity\n", 8049 name, NULL); 8050 } else if (ctxt->input->free != deallocblankswrapper) { 8051 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8052 if (xmlPushInput(ctxt, input) < 0) 8053 return; 8054 } else { 8055 /* 8056 * TODO !!! 8057 * handle the extra spaces added before and after 8058 * c.f. http://www.w3.org/TR/REC-xml#as-PE 8059 */ 8060 input = xmlNewEntityInputStream(ctxt, entity); 8061 if (xmlPushInput(ctxt, input) < 0) 8062 return; 8063 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8064 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8065 (IS_BLANK_CH(NXT(5)))) { 8066 xmlParseTextDecl(ctxt); 8067 if (ctxt->errNo == 8068 XML_ERR_UNSUPPORTED_ENCODING) { 8069 /* 8070 * The XML REC instructs us to stop parsing 8071 * right here 8072 */ 8073 ctxt->instate = XML_PARSER_EOF; 8074 return; 8075 } 8076 } 8077 } 8078 } 8079 ctxt->hasPErefs = 1; 8080 } 8081 8082 /** 8083 * xmlLoadEntityContent: 8084 * @ctxt: an XML parser context 8085 * @entity: an unloaded system entity 8086 * 8087 * Load the original content of the given system entity from the 8088 * ExternalID/SystemID given. This is to be used for Included in Literal 8089 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8090 * 8091 * Returns 0 in case of success and -1 in case of failure 8092 */ 8093 static int 8094 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8095 xmlParserInputPtr input; 8096 xmlBufferPtr buf; 8097 int l, c; 8098 int count = 0; 8099 8100 if ((ctxt == NULL) || (entity == NULL) || 8101 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8102 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8103 (entity->content != NULL)) { 8104 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8105 "xmlLoadEntityContent parameter error"); 8106 return(-1); 8107 } 8108 8109 if (xmlParserDebugEntities) 8110 xmlGenericError(xmlGenericErrorContext, 8111 "Reading %s entity content input\n", entity->name); 8112 8113 buf = xmlBufferCreate(); 8114 if (buf == NULL) { 8115 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8116 "xmlLoadEntityContent parameter error"); 8117 return(-1); 8118 } 8119 8120 input = xmlNewEntityInputStream(ctxt, entity); 8121 if (input == NULL) { 8122 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8123 "xmlLoadEntityContent input error"); 8124 xmlBufferFree(buf); 8125 return(-1); 8126 } 8127 8128 /* 8129 * Push the entity as the current input, read char by char 8130 * saving to the buffer until the end of the entity or an error 8131 */ 8132 if (xmlPushInput(ctxt, input) < 0) { 8133 xmlBufferFree(buf); 8134 return(-1); 8135 } 8136 8137 GROW; 8138 c = CUR_CHAR(l); 8139 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8140 (IS_CHAR(c))) { 8141 xmlBufferAdd(buf, ctxt->input->cur, l); 8142 if (count++ > XML_PARSER_CHUNK_SIZE) { 8143 count = 0; 8144 GROW; 8145 if (ctxt->instate == XML_PARSER_EOF) { 8146 xmlBufferFree(buf); 8147 return(-1); 8148 } 8149 } 8150 NEXTL(l); 8151 c = CUR_CHAR(l); 8152 if (c == 0) { 8153 count = 0; 8154 GROW; 8155 if (ctxt->instate == XML_PARSER_EOF) { 8156 xmlBufferFree(buf); 8157 return(-1); 8158 } 8159 c = CUR_CHAR(l); 8160 } 8161 } 8162 8163 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8164 xmlPopInput(ctxt); 8165 } else if (!IS_CHAR(c)) { 8166 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8167 "xmlLoadEntityContent: invalid char value %d\n", 8168 c); 8169 xmlBufferFree(buf); 8170 return(-1); 8171 } 8172 entity->content = buf->content; 8173 buf->content = NULL; 8174 xmlBufferFree(buf); 8175 8176 return(0); 8177 } 8178 8179 /** 8180 * xmlParseStringPEReference: 8181 * @ctxt: an XML parser context 8182 * @str: a pointer to an index in the string 8183 * 8184 * parse PEReference declarations 8185 * 8186 * [69] PEReference ::= '%' Name ';' 8187 * 8188 * [ WFC: No Recursion ] 8189 * A parsed entity must not contain a recursive 8190 * reference to itself, either directly or indirectly. 8191 * 8192 * [ WFC: Entity Declared ] 8193 * In a document without any DTD, a document with only an internal DTD 8194 * subset which contains no parameter entity references, or a document 8195 * with "standalone='yes'", ... ... The declaration of a parameter 8196 * entity must precede any reference to it... 8197 * 8198 * [ VC: Entity Declared ] 8199 * In a document with an external subset or external parameter entities 8200 * with "standalone='no'", ... ... The declaration of a parameter entity 8201 * must precede any reference to it... 8202 * 8203 * [ WFC: In DTD ] 8204 * Parameter-entity references may only appear in the DTD. 8205 * NOTE: misleading but this is handled. 8206 * 8207 * Returns the string of the entity content. 8208 * str is updated to the current value of the index 8209 */ 8210 static xmlEntityPtr 8211 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8212 const xmlChar *ptr; 8213 xmlChar cur; 8214 xmlChar *name; 8215 xmlEntityPtr entity = NULL; 8216 8217 if ((str == NULL) || (*str == NULL)) return(NULL); 8218 ptr = *str; 8219 cur = *ptr; 8220 if (cur != '%') 8221 return(NULL); 8222 ptr++; 8223 name = xmlParseStringName(ctxt, &ptr); 8224 if (name == NULL) { 8225 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8226 "xmlParseStringPEReference: no name\n"); 8227 *str = ptr; 8228 return(NULL); 8229 } 8230 cur = *ptr; 8231 if (cur != ';') { 8232 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8233 xmlFree(name); 8234 *str = ptr; 8235 return(NULL); 8236 } 8237 ptr++; 8238 8239 /* 8240 * Increate the number of entity references parsed 8241 */ 8242 ctxt->nbentities++; 8243 8244 /* 8245 * Request the entity from SAX 8246 */ 8247 if ((ctxt->sax != NULL) && 8248 (ctxt->sax->getParameterEntity != NULL)) 8249 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8250 if (ctxt->instate == XML_PARSER_EOF) { 8251 xmlFree(name); 8252 return(NULL); 8253 } 8254 if (entity == NULL) { 8255 /* 8256 * [ WFC: Entity Declared ] 8257 * In a document without any DTD, a document with only an 8258 * internal DTD subset which contains no parameter entity 8259 * references, or a document with "standalone='yes'", ... 8260 * ... The declaration of a parameter entity must precede 8261 * any reference to it... 8262 */ 8263 if ((ctxt->standalone == 1) || 8264 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8265 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8266 "PEReference: %%%s; not found\n", name); 8267 } else { 8268 /* 8269 * [ VC: Entity Declared ] 8270 * In a document with an external subset or external 8271 * parameter entities with "standalone='no'", ... 8272 * ... The declaration of a parameter entity must 8273 * precede any reference to it... 8274 */ 8275 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8276 "PEReference: %%%s; not found\n", 8277 name, NULL); 8278 ctxt->valid = 0; 8279 } 8280 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8281 } else { 8282 /* 8283 * Internal checking in case the entity quest barfed 8284 */ 8285 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8286 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8287 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8288 "%%%s; is not a parameter entity\n", 8289 name, NULL); 8290 } 8291 } 8292 ctxt->hasPErefs = 1; 8293 xmlFree(name); 8294 *str = ptr; 8295 return(entity); 8296 } 8297 8298 /** 8299 * xmlParseDocTypeDecl: 8300 * @ctxt: an XML parser context 8301 * 8302 * parse a DOCTYPE declaration 8303 * 8304 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8305 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8306 * 8307 * [ VC: Root Element Type ] 8308 * The Name in the document type declaration must match the element 8309 * type of the root element. 8310 */ 8311 8312 void 8313 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8314 const xmlChar *name = NULL; 8315 xmlChar *ExternalID = NULL; 8316 xmlChar *URI = NULL; 8317 8318 /* 8319 * We know that '<!DOCTYPE' has been detected. 8320 */ 8321 SKIP(9); 8322 8323 SKIP_BLANKS; 8324 8325 /* 8326 * Parse the DOCTYPE name. 8327 */ 8328 name = xmlParseName(ctxt); 8329 if (name == NULL) { 8330 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8331 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8332 } 8333 ctxt->intSubName = name; 8334 8335 SKIP_BLANKS; 8336 8337 /* 8338 * Check for SystemID and ExternalID 8339 */ 8340 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8341 8342 if ((URI != NULL) || (ExternalID != NULL)) { 8343 ctxt->hasExternalSubset = 1; 8344 } 8345 ctxt->extSubURI = URI; 8346 ctxt->extSubSystem = ExternalID; 8347 8348 SKIP_BLANKS; 8349 8350 /* 8351 * Create and update the internal subset. 8352 */ 8353 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8354 (!ctxt->disableSAX)) 8355 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8356 if (ctxt->instate == XML_PARSER_EOF) 8357 return; 8358 8359 /* 8360 * Is there any internal subset declarations ? 8361 * they are handled separately in xmlParseInternalSubset() 8362 */ 8363 if (RAW == '[') 8364 return; 8365 8366 /* 8367 * We should be at the end of the DOCTYPE declaration. 8368 */ 8369 if (RAW != '>') { 8370 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8371 } 8372 NEXT; 8373 } 8374 8375 /** 8376 * xmlParseInternalSubset: 8377 * @ctxt: an XML parser context 8378 * 8379 * parse the internal subset declaration 8380 * 8381 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8382 */ 8383 8384 static void 8385 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8386 /* 8387 * Is there any DTD definition ? 8388 */ 8389 if (RAW == '[') { 8390 ctxt->instate = XML_PARSER_DTD; 8391 NEXT; 8392 /* 8393 * Parse the succession of Markup declarations and 8394 * PEReferences. 8395 * Subsequence (markupdecl | PEReference | S)* 8396 */ 8397 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8398 const xmlChar *check = CUR_PTR; 8399 unsigned int cons = ctxt->input->consumed; 8400 8401 SKIP_BLANKS; 8402 xmlParseMarkupDecl(ctxt); 8403 xmlParsePEReference(ctxt); 8404 8405 /* 8406 * Pop-up of finished entities. 8407 */ 8408 while ((RAW == 0) && (ctxt->inputNr > 1)) 8409 xmlPopInput(ctxt); 8410 8411 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8412 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8413 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8414 break; 8415 } 8416 } 8417 if (RAW == ']') { 8418 NEXT; 8419 SKIP_BLANKS; 8420 } 8421 } 8422 8423 /* 8424 * We should be at the end of the DOCTYPE declaration. 8425 */ 8426 if (RAW != '>') { 8427 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8428 } 8429 NEXT; 8430 } 8431 8432 #ifdef LIBXML_SAX1_ENABLED 8433 /** 8434 * xmlParseAttribute: 8435 * @ctxt: an XML parser context 8436 * @value: a xmlChar ** used to store the value of the attribute 8437 * 8438 * parse an attribute 8439 * 8440 * [41] Attribute ::= Name Eq AttValue 8441 * 8442 * [ WFC: No External Entity References ] 8443 * Attribute values cannot contain direct or indirect entity references 8444 * to external entities. 8445 * 8446 * [ WFC: No < in Attribute Values ] 8447 * The replacement text of any entity referred to directly or indirectly in 8448 * an attribute value (other than "<") must not contain a <. 8449 * 8450 * [ VC: Attribute Value Type ] 8451 * The attribute must have been declared; the value must be of the type 8452 * declared for it. 8453 * 8454 * [25] Eq ::= S? '=' S? 8455 * 8456 * With namespace: 8457 * 8458 * [NS 11] Attribute ::= QName Eq AttValue 8459 * 8460 * Also the case QName == xmlns:??? is handled independently as a namespace 8461 * definition. 8462 * 8463 * Returns the attribute name, and the value in *value. 8464 */ 8465 8466 const xmlChar * 8467 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8468 const xmlChar *name; 8469 xmlChar *val; 8470 8471 *value = NULL; 8472 GROW; 8473 name = xmlParseName(ctxt); 8474 if (name == NULL) { 8475 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8476 "error parsing attribute name\n"); 8477 return(NULL); 8478 } 8479 8480 /* 8481 * read the value 8482 */ 8483 SKIP_BLANKS; 8484 if (RAW == '=') { 8485 NEXT; 8486 SKIP_BLANKS; 8487 val = xmlParseAttValue(ctxt); 8488 ctxt->instate = XML_PARSER_CONTENT; 8489 } else { 8490 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8491 "Specification mandate value for attribute %s\n", name); 8492 return(NULL); 8493 } 8494 8495 /* 8496 * Check that xml:lang conforms to the specification 8497 * No more registered as an error, just generate a warning now 8498 * since this was deprecated in XML second edition 8499 */ 8500 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8501 if (!xmlCheckLanguageID(val)) { 8502 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8503 "Malformed value for xml:lang : %s\n", 8504 val, NULL); 8505 } 8506 } 8507 8508 /* 8509 * Check that xml:space conforms to the specification 8510 */ 8511 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8512 if (xmlStrEqual(val, BAD_CAST "default")) 8513 *(ctxt->space) = 0; 8514 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8515 *(ctxt->space) = 1; 8516 else { 8517 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8518 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8519 val, NULL); 8520 } 8521 } 8522 8523 *value = val; 8524 return(name); 8525 } 8526 8527 /** 8528 * xmlParseStartTag: 8529 * @ctxt: an XML parser context 8530 * 8531 * parse a start of tag either for rule element or 8532 * EmptyElement. In both case we don't parse the tag closing chars. 8533 * 8534 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8535 * 8536 * [ WFC: Unique Att Spec ] 8537 * No attribute name may appear more than once in the same start-tag or 8538 * empty-element tag. 8539 * 8540 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8541 * 8542 * [ WFC: Unique Att Spec ] 8543 * No attribute name may appear more than once in the same start-tag or 8544 * empty-element tag. 8545 * 8546 * With namespace: 8547 * 8548 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8549 * 8550 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8551 * 8552 * Returns the element name parsed 8553 */ 8554 8555 const xmlChar * 8556 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8557 const xmlChar *name; 8558 const xmlChar *attname; 8559 xmlChar *attvalue; 8560 const xmlChar **atts = ctxt->atts; 8561 int nbatts = 0; 8562 int maxatts = ctxt->maxatts; 8563 int i; 8564 8565 if (RAW != '<') return(NULL); 8566 NEXT1; 8567 8568 name = xmlParseName(ctxt); 8569 if (name == NULL) { 8570 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8571 "xmlParseStartTag: invalid element name\n"); 8572 return(NULL); 8573 } 8574 8575 /* 8576 * Now parse the attributes, it ends up with the ending 8577 * 8578 * (S Attribute)* S? 8579 */ 8580 SKIP_BLANKS; 8581 GROW; 8582 8583 while (((RAW != '>') && 8584 ((RAW != '/') || (NXT(1) != '>')) && 8585 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8586 const xmlChar *q = CUR_PTR; 8587 unsigned int cons = ctxt->input->consumed; 8588 8589 attname = xmlParseAttribute(ctxt, &attvalue); 8590 if ((attname != NULL) && (attvalue != NULL)) { 8591 /* 8592 * [ WFC: Unique Att Spec ] 8593 * No attribute name may appear more than once in the same 8594 * start-tag or empty-element tag. 8595 */ 8596 for (i = 0; i < nbatts;i += 2) { 8597 if (xmlStrEqual(atts[i], attname)) { 8598 xmlErrAttributeDup(ctxt, NULL, attname); 8599 xmlFree(attvalue); 8600 goto failed; 8601 } 8602 } 8603 /* 8604 * Add the pair to atts 8605 */ 8606 if (atts == NULL) { 8607 maxatts = 22; /* allow for 10 attrs by default */ 8608 atts = (const xmlChar **) 8609 xmlMalloc(maxatts * sizeof(xmlChar *)); 8610 if (atts == NULL) { 8611 xmlErrMemory(ctxt, NULL); 8612 if (attvalue != NULL) 8613 xmlFree(attvalue); 8614 goto failed; 8615 } 8616 ctxt->atts = atts; 8617 ctxt->maxatts = maxatts; 8618 } else if (nbatts + 4 > maxatts) { 8619 const xmlChar **n; 8620 8621 maxatts *= 2; 8622 n = (const xmlChar **) xmlRealloc((void *) atts, 8623 maxatts * sizeof(const xmlChar *)); 8624 if (n == NULL) { 8625 xmlErrMemory(ctxt, NULL); 8626 if (attvalue != NULL) 8627 xmlFree(attvalue); 8628 goto failed; 8629 } 8630 atts = n; 8631 ctxt->atts = atts; 8632 ctxt->maxatts = maxatts; 8633 } 8634 atts[nbatts++] = attname; 8635 atts[nbatts++] = attvalue; 8636 atts[nbatts] = NULL; 8637 atts[nbatts + 1] = NULL; 8638 } else { 8639 if (attvalue != NULL) 8640 xmlFree(attvalue); 8641 } 8642 8643 failed: 8644 8645 GROW 8646 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8647 break; 8648 if (!IS_BLANK_CH(RAW)) { 8649 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8650 "attributes construct error\n"); 8651 } 8652 SKIP_BLANKS; 8653 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8654 (attname == NULL) && (attvalue == NULL)) { 8655 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8656 "xmlParseStartTag: problem parsing attributes\n"); 8657 break; 8658 } 8659 SHRINK; 8660 GROW; 8661 } 8662 8663 /* 8664 * SAX: Start of Element ! 8665 */ 8666 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8667 (!ctxt->disableSAX)) { 8668 if (nbatts > 0) 8669 ctxt->sax->startElement(ctxt->userData, name, atts); 8670 else 8671 ctxt->sax->startElement(ctxt->userData, name, NULL); 8672 } 8673 8674 if (atts != NULL) { 8675 /* Free only the content strings */ 8676 for (i = 1;i < nbatts;i+=2) 8677 if (atts[i] != NULL) 8678 xmlFree((xmlChar *) atts[i]); 8679 } 8680 return(name); 8681 } 8682 8683 /** 8684 * xmlParseEndTag1: 8685 * @ctxt: an XML parser context 8686 * @line: line of the start tag 8687 * @nsNr: number of namespaces on the start tag 8688 * 8689 * parse an end of tag 8690 * 8691 * [42] ETag ::= '</' Name S? '>' 8692 * 8693 * With namespace 8694 * 8695 * [NS 9] ETag ::= '</' QName S? '>' 8696 */ 8697 8698 static void 8699 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8700 const xmlChar *name; 8701 8702 GROW; 8703 if ((RAW != '<') || (NXT(1) != '/')) { 8704 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8705 "xmlParseEndTag: '</' not found\n"); 8706 return; 8707 } 8708 SKIP(2); 8709 8710 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8711 8712 /* 8713 * We should definitely be at the ending "S? '>'" part 8714 */ 8715 GROW; 8716 SKIP_BLANKS; 8717 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8718 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8719 } else 8720 NEXT1; 8721 8722 /* 8723 * [ WFC: Element Type Match ] 8724 * The Name in an element's end-tag must match the element type in the 8725 * start-tag. 8726 * 8727 */ 8728 if (name != (xmlChar*)1) { 8729 if (name == NULL) name = BAD_CAST "unparseable"; 8730 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8731 "Opening and ending tag mismatch: %s line %d and %s\n", 8732 ctxt->name, line, name); 8733 } 8734 8735 /* 8736 * SAX: End of Tag 8737 */ 8738 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8739 (!ctxt->disableSAX)) 8740 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8741 8742 namePop(ctxt); 8743 spacePop(ctxt); 8744 return; 8745 } 8746 8747 /** 8748 * xmlParseEndTag: 8749 * @ctxt: an XML parser context 8750 * 8751 * parse an end of tag 8752 * 8753 * [42] ETag ::= '</' Name S? '>' 8754 * 8755 * With namespace 8756 * 8757 * [NS 9] ETag ::= '</' QName S? '>' 8758 */ 8759 8760 void 8761 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8762 xmlParseEndTag1(ctxt, 0); 8763 } 8764 #endif /* LIBXML_SAX1_ENABLED */ 8765 8766 /************************************************************************ 8767 * * 8768 * SAX 2 specific operations * 8769 * * 8770 ************************************************************************/ 8771 8772 /* 8773 * xmlGetNamespace: 8774 * @ctxt: an XML parser context 8775 * @prefix: the prefix to lookup 8776 * 8777 * Lookup the namespace name for the @prefix (which ca be NULL) 8778 * The prefix must come from the @ctxt->dict dictionnary 8779 * 8780 * Returns the namespace name or NULL if not bound 8781 */ 8782 static const xmlChar * 8783 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8784 int i; 8785 8786 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8787 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8788 if (ctxt->nsTab[i] == prefix) { 8789 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8790 return(NULL); 8791 return(ctxt->nsTab[i + 1]); 8792 } 8793 return(NULL); 8794 } 8795 8796 /** 8797 * xmlParseQName: 8798 * @ctxt: an XML parser context 8799 * @prefix: pointer to store the prefix part 8800 * 8801 * parse an XML Namespace QName 8802 * 8803 * [6] QName ::= (Prefix ':')? LocalPart 8804 * [7] Prefix ::= NCName 8805 * [8] LocalPart ::= NCName 8806 * 8807 * Returns the Name parsed or NULL 8808 */ 8809 8810 static const xmlChar * 8811 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8812 const xmlChar *l, *p; 8813 8814 GROW; 8815 8816 l = xmlParseNCName(ctxt); 8817 if (l == NULL) { 8818 if (CUR == ':') { 8819 l = xmlParseName(ctxt); 8820 if (l != NULL) { 8821 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8822 "Failed to parse QName '%s'\n", l, NULL, NULL); 8823 *prefix = NULL; 8824 return(l); 8825 } 8826 } 8827 return(NULL); 8828 } 8829 if (CUR == ':') { 8830 NEXT; 8831 p = l; 8832 l = xmlParseNCName(ctxt); 8833 if (l == NULL) { 8834 xmlChar *tmp; 8835 8836 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8837 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8838 l = xmlParseNmtoken(ctxt); 8839 if (l == NULL) 8840 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8841 else { 8842 tmp = xmlBuildQName(l, p, NULL, 0); 8843 xmlFree((char *)l); 8844 } 8845 p = xmlDictLookup(ctxt->dict, tmp, -1); 8846 if (tmp != NULL) xmlFree(tmp); 8847 *prefix = NULL; 8848 return(p); 8849 } 8850 if (CUR == ':') { 8851 xmlChar *tmp; 8852 8853 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8854 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8855 NEXT; 8856 tmp = (xmlChar *) xmlParseName(ctxt); 8857 if (tmp != NULL) { 8858 tmp = xmlBuildQName(tmp, l, NULL, 0); 8859 l = xmlDictLookup(ctxt->dict, tmp, -1); 8860 if (tmp != NULL) xmlFree(tmp); 8861 *prefix = p; 8862 return(l); 8863 } 8864 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8865 l = xmlDictLookup(ctxt->dict, tmp, -1); 8866 if (tmp != NULL) xmlFree(tmp); 8867 *prefix = p; 8868 return(l); 8869 } 8870 *prefix = p; 8871 } else 8872 *prefix = NULL; 8873 return(l); 8874 } 8875 8876 /** 8877 * xmlParseQNameAndCompare: 8878 * @ctxt: an XML parser context 8879 * @name: the localname 8880 * @prefix: the prefix, if any. 8881 * 8882 * parse an XML name and compares for match 8883 * (specialized for endtag parsing) 8884 * 8885 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8886 * and the name for mismatch 8887 */ 8888 8889 static const xmlChar * 8890 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8891 xmlChar const *prefix) { 8892 const xmlChar *cmp; 8893 const xmlChar *in; 8894 const xmlChar *ret; 8895 const xmlChar *prefix2; 8896 8897 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8898 8899 GROW; 8900 in = ctxt->input->cur; 8901 8902 cmp = prefix; 8903 while (*in != 0 && *in == *cmp) { 8904 ++in; 8905 ++cmp; 8906 } 8907 if ((*cmp == 0) && (*in == ':')) { 8908 in++; 8909 cmp = name; 8910 while (*in != 0 && *in == *cmp) { 8911 ++in; 8912 ++cmp; 8913 } 8914 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8915 /* success */ 8916 ctxt->input->cur = in; 8917 return((const xmlChar*) 1); 8918 } 8919 } 8920 /* 8921 * all strings coms from the dictionary, equality can be done directly 8922 */ 8923 ret = xmlParseQName (ctxt, &prefix2); 8924 if ((ret == name) && (prefix == prefix2)) 8925 return((const xmlChar*) 1); 8926 return ret; 8927 } 8928 8929 /** 8930 * xmlParseAttValueInternal: 8931 * @ctxt: an XML parser context 8932 * @len: attribute len result 8933 * @alloc: whether the attribute was reallocated as a new string 8934 * @normalize: if 1 then further non-CDATA normalization must be done 8935 * 8936 * parse a value for an attribute. 8937 * NOTE: if no normalization is needed, the routine will return pointers 8938 * directly from the data buffer. 8939 * 8940 * 3.3.3 Attribute-Value Normalization: 8941 * Before the value of an attribute is passed to the application or 8942 * checked for validity, the XML processor must normalize it as follows: 8943 * - a character reference is processed by appending the referenced 8944 * character to the attribute value 8945 * - an entity reference is processed by recursively processing the 8946 * replacement text of the entity 8947 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8948 * appending #x20 to the normalized value, except that only a single 8949 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8950 * parsed entity or the literal entity value of an internal parsed entity 8951 * - other characters are processed by appending them to the normalized value 8952 * If the declared value is not CDATA, then the XML processor must further 8953 * process the normalized attribute value by discarding any leading and 8954 * trailing space (#x20) characters, and by replacing sequences of space 8955 * (#x20) characters by a single space (#x20) character. 8956 * All attributes for which no declaration has been read should be treated 8957 * by a non-validating parser as if declared CDATA. 8958 * 8959 * Returns the AttValue parsed or NULL. The value has to be freed by the 8960 * caller if it was copied, this can be detected by val[*len] == 0. 8961 */ 8962 8963 static xmlChar * 8964 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8965 int normalize) 8966 { 8967 xmlChar limit = 0; 8968 const xmlChar *in = NULL, *start, *end, *last; 8969 xmlChar *ret = NULL; 8970 int line, col; 8971 8972 GROW; 8973 in = (xmlChar *) CUR_PTR; 8974 line = ctxt->input->line; 8975 col = ctxt->input->col; 8976 if (*in != '"' && *in != '\'') { 8977 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8978 return (NULL); 8979 } 8980 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8981 8982 /* 8983 * try to handle in this routine the most common case where no 8984 * allocation of a new string is required and where content is 8985 * pure ASCII. 8986 */ 8987 limit = *in++; 8988 col++; 8989 end = ctxt->input->end; 8990 start = in; 8991 if (in >= end) { 8992 const xmlChar *oldbase = ctxt->input->base; 8993 GROW; 8994 if (oldbase != ctxt->input->base) { 8995 long delta = ctxt->input->base - oldbase; 8996 start = start + delta; 8997 in = in + delta; 8998 } 8999 end = ctxt->input->end; 9000 } 9001 if (normalize) { 9002 /* 9003 * Skip any leading spaces 9004 */ 9005 while ((in < end) && (*in != limit) && 9006 ((*in == 0x20) || (*in == 0x9) || 9007 (*in == 0xA) || (*in == 0xD))) { 9008 if (*in == 0xA) { 9009 line++; col = 1; 9010 } else { 9011 col++; 9012 } 9013 in++; 9014 start = in; 9015 if (in >= end) { 9016 const xmlChar *oldbase = ctxt->input->base; 9017 GROW; 9018 if (ctxt->instate == XML_PARSER_EOF) 9019 return(NULL); 9020 if (oldbase != ctxt->input->base) { 9021 long delta = ctxt->input->base - oldbase; 9022 start = start + delta; 9023 in = in + delta; 9024 } 9025 end = ctxt->input->end; 9026 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9027 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9028 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9029 "AttValue length too long\n"); 9030 return(NULL); 9031 } 9032 } 9033 } 9034 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9035 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9036 col++; 9037 if ((*in++ == 0x20) && (*in == 0x20)) break; 9038 if (in >= end) { 9039 const xmlChar *oldbase = ctxt->input->base; 9040 GROW; 9041 if (ctxt->instate == XML_PARSER_EOF) 9042 return(NULL); 9043 if (oldbase != ctxt->input->base) { 9044 long delta = ctxt->input->base - oldbase; 9045 start = start + delta; 9046 in = in + delta; 9047 } 9048 end = ctxt->input->end; 9049 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9050 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9051 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9052 "AttValue length too long\n"); 9053 return(NULL); 9054 } 9055 } 9056 } 9057 last = in; 9058 /* 9059 * skip the trailing blanks 9060 */ 9061 while ((last[-1] == 0x20) && (last > start)) last--; 9062 while ((in < end) && (*in != limit) && 9063 ((*in == 0x20) || (*in == 0x9) || 9064 (*in == 0xA) || (*in == 0xD))) { 9065 if (*in == 0xA) { 9066 line++, col = 1; 9067 } else { 9068 col++; 9069 } 9070 in++; 9071 if (in >= end) { 9072 const xmlChar *oldbase = ctxt->input->base; 9073 GROW; 9074 if (ctxt->instate == XML_PARSER_EOF) 9075 return(NULL); 9076 if (oldbase != ctxt->input->base) { 9077 long delta = ctxt->input->base - oldbase; 9078 start = start + delta; 9079 in = in + delta; 9080 last = last + delta; 9081 } 9082 end = ctxt->input->end; 9083 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9084 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9085 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9086 "AttValue length too long\n"); 9087 return(NULL); 9088 } 9089 } 9090 } 9091 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9092 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9093 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9094 "AttValue length too long\n"); 9095 return(NULL); 9096 } 9097 if (*in != limit) goto need_complex; 9098 } else { 9099 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9100 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9101 in++; 9102 col++; 9103 if (in >= end) { 9104 const xmlChar *oldbase = ctxt->input->base; 9105 GROW; 9106 if (ctxt->instate == XML_PARSER_EOF) 9107 return(NULL); 9108 if (oldbase != ctxt->input->base) { 9109 long delta = ctxt->input->base - oldbase; 9110 start = start + delta; 9111 in = in + delta; 9112 } 9113 end = ctxt->input->end; 9114 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9115 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9117 "AttValue length too long\n"); 9118 return(NULL); 9119 } 9120 } 9121 } 9122 last = in; 9123 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9124 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9125 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9126 "AttValue length too long\n"); 9127 return(NULL); 9128 } 9129 if (*in != limit) goto need_complex; 9130 } 9131 in++; 9132 col++; 9133 if (len != NULL) { 9134 *len = last - start; 9135 ret = (xmlChar *) start; 9136 } else { 9137 if (alloc) *alloc = 1; 9138 ret = xmlStrndup(start, last - start); 9139 } 9140 CUR_PTR = in; 9141 ctxt->input->line = line; 9142 ctxt->input->col = col; 9143 if (alloc) *alloc = 0; 9144 return ret; 9145 need_complex: 9146 if (alloc) *alloc = 1; 9147 return xmlParseAttValueComplex(ctxt, len, normalize); 9148 } 9149 9150 /** 9151 * xmlParseAttribute2: 9152 * @ctxt: an XML parser context 9153 * @pref: the element prefix 9154 * @elem: the element name 9155 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9156 * @value: a xmlChar ** used to store the value of the attribute 9157 * @len: an int * to save the length of the attribute 9158 * @alloc: an int * to indicate if the attribute was allocated 9159 * 9160 * parse an attribute in the new SAX2 framework. 9161 * 9162 * Returns the attribute name, and the value in *value, . 9163 */ 9164 9165 static const xmlChar * 9166 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9167 const xmlChar * pref, const xmlChar * elem, 9168 const xmlChar ** prefix, xmlChar ** value, 9169 int *len, int *alloc) 9170 { 9171 const xmlChar *name; 9172 xmlChar *val, *internal_val = NULL; 9173 int normalize = 0; 9174 9175 *value = NULL; 9176 GROW; 9177 name = xmlParseQName(ctxt, prefix); 9178 if (name == NULL) { 9179 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9180 "error parsing attribute name\n"); 9181 return (NULL); 9182 } 9183 9184 /* 9185 * get the type if needed 9186 */ 9187 if (ctxt->attsSpecial != NULL) { 9188 int type; 9189 9190 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9191 pref, elem, *prefix, name); 9192 if (type != 0) 9193 normalize = 1; 9194 } 9195 9196 /* 9197 * read the value 9198 */ 9199 SKIP_BLANKS; 9200 if (RAW == '=') { 9201 NEXT; 9202 SKIP_BLANKS; 9203 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9204 if (normalize) { 9205 /* 9206 * Sometimes a second normalisation pass for spaces is needed 9207 * but that only happens if charrefs or entities refernces 9208 * have been used in the attribute value, i.e. the attribute 9209 * value have been extracted in an allocated string already. 9210 */ 9211 if (*alloc) { 9212 const xmlChar *val2; 9213 9214 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9215 if ((val2 != NULL) && (val2 != val)) { 9216 xmlFree(val); 9217 val = (xmlChar *) val2; 9218 } 9219 } 9220 } 9221 ctxt->instate = XML_PARSER_CONTENT; 9222 } else { 9223 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9224 "Specification mandate value for attribute %s\n", 9225 name); 9226 return (NULL); 9227 } 9228 9229 if (*prefix == ctxt->str_xml) { 9230 /* 9231 * Check that xml:lang conforms to the specification 9232 * No more registered as an error, just generate a warning now 9233 * since this was deprecated in XML second edition 9234 */ 9235 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9236 internal_val = xmlStrndup(val, *len); 9237 if (!xmlCheckLanguageID(internal_val)) { 9238 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9239 "Malformed value for xml:lang : %s\n", 9240 internal_val, NULL); 9241 } 9242 } 9243 9244 /* 9245 * Check that xml:space conforms to the specification 9246 */ 9247 if (xmlStrEqual(name, BAD_CAST "space")) { 9248 internal_val = xmlStrndup(val, *len); 9249 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9250 *(ctxt->space) = 0; 9251 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9252 *(ctxt->space) = 1; 9253 else { 9254 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9255 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9256 internal_val, NULL); 9257 } 9258 } 9259 if (internal_val) { 9260 xmlFree(internal_val); 9261 } 9262 } 9263 9264 *value = val; 9265 return (name); 9266 } 9267 /** 9268 * xmlParseStartTag2: 9269 * @ctxt: an XML parser context 9270 * 9271 * parse a start of tag either for rule element or 9272 * EmptyElement. In both case we don't parse the tag closing chars. 9273 * This routine is called when running SAX2 parsing 9274 * 9275 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9276 * 9277 * [ WFC: Unique Att Spec ] 9278 * No attribute name may appear more than once in the same start-tag or 9279 * empty-element tag. 9280 * 9281 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9282 * 9283 * [ WFC: Unique Att Spec ] 9284 * No attribute name may appear more than once in the same start-tag or 9285 * empty-element tag. 9286 * 9287 * With namespace: 9288 * 9289 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9290 * 9291 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9292 * 9293 * Returns the element name parsed 9294 */ 9295 9296 static const xmlChar * 9297 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9298 const xmlChar **URI, int *tlen) { 9299 const xmlChar *localname; 9300 const xmlChar *prefix; 9301 const xmlChar *attname; 9302 const xmlChar *aprefix; 9303 const xmlChar *nsname; 9304 xmlChar *attvalue; 9305 const xmlChar **atts = ctxt->atts; 9306 int maxatts = ctxt->maxatts; 9307 int nratts, nbatts, nbdef; 9308 int i, j, nbNs, attval, oldline, oldcol; 9309 const xmlChar *base; 9310 unsigned long cur; 9311 int nsNr = ctxt->nsNr; 9312 9313 if (RAW != '<') return(NULL); 9314 NEXT1; 9315 9316 /* 9317 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9318 * point since the attribute values may be stored as pointers to 9319 * the buffer and calling SHRINK would destroy them ! 9320 * The Shrinking is only possible once the full set of attribute 9321 * callbacks have been done. 9322 */ 9323 reparse: 9324 SHRINK; 9325 base = ctxt->input->base; 9326 cur = ctxt->input->cur - ctxt->input->base; 9327 oldline = ctxt->input->line; 9328 oldcol = ctxt->input->col; 9329 nbatts = 0; 9330 nratts = 0; 9331 nbdef = 0; 9332 nbNs = 0; 9333 attval = 0; 9334 /* Forget any namespaces added during an earlier parse of this element. */ 9335 ctxt->nsNr = nsNr; 9336 9337 localname = xmlParseQName(ctxt, &prefix); 9338 if (localname == NULL) { 9339 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9340 "StartTag: invalid element name\n"); 9341 return(NULL); 9342 } 9343 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9344 9345 /* 9346 * Now parse the attributes, it ends up with the ending 9347 * 9348 * (S Attribute)* S? 9349 */ 9350 SKIP_BLANKS; 9351 GROW; 9352 if (ctxt->input->base != base) goto base_changed; 9353 9354 while (((RAW != '>') && 9355 ((RAW != '/') || (NXT(1) != '>')) && 9356 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9357 const xmlChar *q = CUR_PTR; 9358 unsigned int cons = ctxt->input->consumed; 9359 int len = -1, alloc = 0; 9360 9361 attname = xmlParseAttribute2(ctxt, prefix, localname, 9362 &aprefix, &attvalue, &len, &alloc); 9363 if (ctxt->input->base != base) { 9364 if ((attvalue != NULL) && (alloc != 0)) 9365 xmlFree(attvalue); 9366 attvalue = NULL; 9367 goto base_changed; 9368 } 9369 if ((attname != NULL) && (attvalue != NULL)) { 9370 if (len < 0) len = xmlStrlen(attvalue); 9371 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9372 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9373 xmlURIPtr uri; 9374 9375 if (URL == NULL) { 9376 xmlErrMemory(ctxt, "dictionary allocation failure"); 9377 if ((attvalue != NULL) && (alloc != 0)) 9378 xmlFree(attvalue); 9379 return(NULL); 9380 } 9381 if (*URL != 0) { 9382 uri = xmlParseURI((const char *) URL); 9383 if (uri == NULL) { 9384 xmlNsErr(ctxt, XML_WAR_NS_URI, 9385 "xmlns: '%s' is not a valid URI\n", 9386 URL, NULL, NULL); 9387 } else { 9388 if (uri->scheme == NULL) { 9389 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9390 "xmlns: URI %s is not absolute\n", 9391 URL, NULL, NULL); 9392 } 9393 xmlFreeURI(uri); 9394 } 9395 if (URL == ctxt->str_xml_ns) { 9396 if (attname != ctxt->str_xml) { 9397 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9398 "xml namespace URI cannot be the default namespace\n", 9399 NULL, NULL, NULL); 9400 } 9401 goto skip_default_ns; 9402 } 9403 if ((len == 29) && 9404 (xmlStrEqual(URL, 9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9407 "reuse of the xmlns namespace name is forbidden\n", 9408 NULL, NULL, NULL); 9409 goto skip_default_ns; 9410 } 9411 } 9412 /* 9413 * check that it's not a defined namespace 9414 */ 9415 for (j = 1;j <= nbNs;j++) 9416 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9417 break; 9418 if (j <= nbNs) 9419 xmlErrAttributeDup(ctxt, NULL, attname); 9420 else 9421 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9422 skip_default_ns: 9423 if (alloc != 0) xmlFree(attvalue); 9424 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9425 break; 9426 if (!IS_BLANK_CH(RAW)) { 9427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9428 "attributes construct error\n"); 9429 break; 9430 } 9431 SKIP_BLANKS; 9432 continue; 9433 } 9434 if (aprefix == ctxt->str_xmlns) { 9435 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9436 xmlURIPtr uri; 9437 9438 if (attname == ctxt->str_xml) { 9439 if (URL != ctxt->str_xml_ns) { 9440 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9441 "xml namespace prefix mapped to wrong URI\n", 9442 NULL, NULL, NULL); 9443 } 9444 /* 9445 * Do not keep a namespace definition node 9446 */ 9447 goto skip_ns; 9448 } 9449 if (URL == ctxt->str_xml_ns) { 9450 if (attname != ctxt->str_xml) { 9451 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9452 "xml namespace URI mapped to wrong prefix\n", 9453 NULL, NULL, NULL); 9454 } 9455 goto skip_ns; 9456 } 9457 if (attname == ctxt->str_xmlns) { 9458 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9459 "redefinition of the xmlns prefix is forbidden\n", 9460 NULL, NULL, NULL); 9461 goto skip_ns; 9462 } 9463 if ((len == 29) && 9464 (xmlStrEqual(URL, 9465 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9466 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9467 "reuse of the xmlns namespace name is forbidden\n", 9468 NULL, NULL, NULL); 9469 goto skip_ns; 9470 } 9471 if ((URL == NULL) || (URL[0] == 0)) { 9472 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9473 "xmlns:%s: Empty XML namespace is not allowed\n", 9474 attname, NULL, NULL); 9475 goto skip_ns; 9476 } else { 9477 uri = xmlParseURI((const char *) URL); 9478 if (uri == NULL) { 9479 xmlNsErr(ctxt, XML_WAR_NS_URI, 9480 "xmlns:%s: '%s' is not a valid URI\n", 9481 attname, URL, NULL); 9482 } else { 9483 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9484 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9485 "xmlns:%s: URI %s is not absolute\n", 9486 attname, URL, NULL); 9487 } 9488 xmlFreeURI(uri); 9489 } 9490 } 9491 9492 /* 9493 * check that it's not a defined namespace 9494 */ 9495 for (j = 1;j <= nbNs;j++) 9496 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9497 break; 9498 if (j <= nbNs) 9499 xmlErrAttributeDup(ctxt, aprefix, attname); 9500 else 9501 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9502 skip_ns: 9503 if (alloc != 0) xmlFree(attvalue); 9504 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9505 break; 9506 if (!IS_BLANK_CH(RAW)) { 9507 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9508 "attributes construct error\n"); 9509 break; 9510 } 9511 SKIP_BLANKS; 9512 if (ctxt->input->base != base) goto base_changed; 9513 continue; 9514 } 9515 9516 /* 9517 * Add the pair to atts 9518 */ 9519 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9520 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9521 if (attvalue[len] == 0) 9522 xmlFree(attvalue); 9523 goto failed; 9524 } 9525 maxatts = ctxt->maxatts; 9526 atts = ctxt->atts; 9527 } 9528 ctxt->attallocs[nratts++] = alloc; 9529 atts[nbatts++] = attname; 9530 atts[nbatts++] = aprefix; 9531 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9532 atts[nbatts++] = attvalue; 9533 attvalue += len; 9534 atts[nbatts++] = attvalue; 9535 /* 9536 * tag if some deallocation is needed 9537 */ 9538 if (alloc != 0) attval = 1; 9539 } else { 9540 if ((attvalue != NULL) && (attvalue[len] == 0)) 9541 xmlFree(attvalue); 9542 } 9543 9544 failed: 9545 9546 GROW 9547 if (ctxt->instate == XML_PARSER_EOF) 9548 break; 9549 if (ctxt->input->base != base) goto base_changed; 9550 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9551 break; 9552 if (!IS_BLANK_CH(RAW)) { 9553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9554 "attributes construct error\n"); 9555 break; 9556 } 9557 SKIP_BLANKS; 9558 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9559 (attname == NULL) && (attvalue == NULL)) { 9560 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9561 "xmlParseStartTag: problem parsing attributes\n"); 9562 break; 9563 } 9564 GROW; 9565 if (ctxt->input->base != base) goto base_changed; 9566 } 9567 9568 /* 9569 * The attributes defaulting 9570 */ 9571 if (ctxt->attsDefault != NULL) { 9572 xmlDefAttrsPtr defaults; 9573 9574 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9575 if (defaults != NULL) { 9576 for (i = 0;i < defaults->nbAttrs;i++) { 9577 attname = defaults->values[5 * i]; 9578 aprefix = defaults->values[5 * i + 1]; 9579 9580 /* 9581 * special work for namespaces defaulted defs 9582 */ 9583 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9584 /* 9585 * check that it's not a defined namespace 9586 */ 9587 for (j = 1;j <= nbNs;j++) 9588 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9589 break; 9590 if (j <= nbNs) continue; 9591 9592 nsname = xmlGetNamespace(ctxt, NULL); 9593 if (nsname != defaults->values[5 * i + 2]) { 9594 if (nsPush(ctxt, NULL, 9595 defaults->values[5 * i + 2]) > 0) 9596 nbNs++; 9597 } 9598 } else if (aprefix == ctxt->str_xmlns) { 9599 /* 9600 * check that it's not a defined namespace 9601 */ 9602 for (j = 1;j <= nbNs;j++) 9603 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9604 break; 9605 if (j <= nbNs) continue; 9606 9607 nsname = xmlGetNamespace(ctxt, attname); 9608 if (nsname != defaults->values[2]) { 9609 if (nsPush(ctxt, attname, 9610 defaults->values[5 * i + 2]) > 0) 9611 nbNs++; 9612 } 9613 } else { 9614 /* 9615 * check that it's not a defined attribute 9616 */ 9617 for (j = 0;j < nbatts;j+=5) { 9618 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9619 break; 9620 } 9621 if (j < nbatts) continue; 9622 9623 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9624 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9625 return(NULL); 9626 } 9627 maxatts = ctxt->maxatts; 9628 atts = ctxt->atts; 9629 } 9630 atts[nbatts++] = attname; 9631 atts[nbatts++] = aprefix; 9632 if (aprefix == NULL) 9633 atts[nbatts++] = NULL; 9634 else 9635 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9636 atts[nbatts++] = defaults->values[5 * i + 2]; 9637 atts[nbatts++] = defaults->values[5 * i + 3]; 9638 if ((ctxt->standalone == 1) && 9639 (defaults->values[5 * i + 4] != NULL)) { 9640 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9641 "standalone: attribute %s on %s defaulted from external subset\n", 9642 attname, localname); 9643 } 9644 nbdef++; 9645 } 9646 } 9647 } 9648 } 9649 9650 /* 9651 * The attributes checkings 9652 */ 9653 for (i = 0; i < nbatts;i += 5) { 9654 /* 9655 * The default namespace does not apply to attribute names. 9656 */ 9657 if (atts[i + 1] != NULL) { 9658 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9659 if (nsname == NULL) { 9660 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9661 "Namespace prefix %s for %s on %s is not defined\n", 9662 atts[i + 1], atts[i], localname); 9663 } 9664 atts[i + 2] = nsname; 9665 } else 9666 nsname = NULL; 9667 /* 9668 * [ WFC: Unique Att Spec ] 9669 * No attribute name may appear more than once in the same 9670 * start-tag or empty-element tag. 9671 * As extended by the Namespace in XML REC. 9672 */ 9673 for (j = 0; j < i;j += 5) { 9674 if (atts[i] == atts[j]) { 9675 if (atts[i+1] == atts[j+1]) { 9676 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9677 break; 9678 } 9679 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9680 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9681 "Namespaced Attribute %s in '%s' redefined\n", 9682 atts[i], nsname, NULL); 9683 break; 9684 } 9685 } 9686 } 9687 } 9688 9689 nsname = xmlGetNamespace(ctxt, prefix); 9690 if ((prefix != NULL) && (nsname == NULL)) { 9691 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9692 "Namespace prefix %s on %s is not defined\n", 9693 prefix, localname, NULL); 9694 } 9695 *pref = prefix; 9696 *URI = nsname; 9697 9698 /* 9699 * SAX: Start of Element ! 9700 */ 9701 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9702 (!ctxt->disableSAX)) { 9703 if (nbNs > 0) 9704 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9705 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9706 nbatts / 5, nbdef, atts); 9707 else 9708 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9709 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9710 } 9711 9712 /* 9713 * Free up attribute allocated strings if needed 9714 */ 9715 if (attval != 0) { 9716 for (i = 3,j = 0; j < nratts;i += 5,j++) 9717 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9718 xmlFree((xmlChar *) atts[i]); 9719 } 9720 9721 return(localname); 9722 9723 base_changed: 9724 /* 9725 * the attribute strings are valid iif the base didn't changed 9726 */ 9727 if (attval != 0) { 9728 for (i = 3,j = 0; j < nratts;i += 5,j++) 9729 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9730 xmlFree((xmlChar *) atts[i]); 9731 } 9732 ctxt->input->cur = ctxt->input->base + cur; 9733 ctxt->input->line = oldline; 9734 ctxt->input->col = oldcol; 9735 if (ctxt->wellFormed == 1) { 9736 goto reparse; 9737 } 9738 return(NULL); 9739 } 9740 9741 /** 9742 * xmlParseEndTag2: 9743 * @ctxt: an XML parser context 9744 * @line: line of the start tag 9745 * @nsNr: number of namespaces on the start tag 9746 * 9747 * parse an end of tag 9748 * 9749 * [42] ETag ::= '</' Name S? '>' 9750 * 9751 * With namespace 9752 * 9753 * [NS 9] ETag ::= '</' QName S? '>' 9754 */ 9755 9756 static void 9757 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9758 const xmlChar *URI, int line, int nsNr, int tlen) { 9759 const xmlChar *name; 9760 9761 GROW; 9762 if ((RAW != '<') || (NXT(1) != '/')) { 9763 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9764 return; 9765 } 9766 SKIP(2); 9767 9768 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9769 if (ctxt->input->cur[tlen] == '>') { 9770 ctxt->input->cur += tlen + 1; 9771 ctxt->input->col += tlen + 1; 9772 goto done; 9773 } 9774 ctxt->input->cur += tlen; 9775 ctxt->input->col += tlen; 9776 name = (xmlChar*)1; 9777 } else { 9778 if (prefix == NULL) 9779 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9780 else 9781 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9782 } 9783 9784 /* 9785 * We should definitely be at the ending "S? '>'" part 9786 */ 9787 GROW; 9788 if (ctxt->instate == XML_PARSER_EOF) 9789 return; 9790 SKIP_BLANKS; 9791 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9792 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9793 } else 9794 NEXT1; 9795 9796 /* 9797 * [ WFC: Element Type Match ] 9798 * The Name in an element's end-tag must match the element type in the 9799 * start-tag. 9800 * 9801 */ 9802 if (name != (xmlChar*)1) { 9803 if (name == NULL) name = BAD_CAST "unparseable"; 9804 if ((line == 0) && (ctxt->node != NULL)) 9805 line = ctxt->node->line; 9806 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9807 "Opening and ending tag mismatch: %s line %d and %s\n", 9808 ctxt->name, line, name); 9809 } 9810 9811 /* 9812 * SAX: End of Tag 9813 */ 9814 done: 9815 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9816 (!ctxt->disableSAX)) 9817 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9818 9819 spacePop(ctxt); 9820 if (nsNr != 0) 9821 nsPop(ctxt, nsNr); 9822 return; 9823 } 9824 9825 /** 9826 * xmlParseCDSect: 9827 * @ctxt: an XML parser context 9828 * 9829 * Parse escaped pure raw content. 9830 * 9831 * [18] CDSect ::= CDStart CData CDEnd 9832 * 9833 * [19] CDStart ::= '<![CDATA[' 9834 * 9835 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9836 * 9837 * [21] CDEnd ::= ']]>' 9838 */ 9839 void 9840 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9841 xmlChar *buf = NULL; 9842 int len = 0; 9843 int size = XML_PARSER_BUFFER_SIZE; 9844 int r, rl; 9845 int s, sl; 9846 int cur, l; 9847 int count = 0; 9848 9849 /* Check 2.6.0 was NXT(0) not RAW */ 9850 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9851 SKIP(9); 9852 } else 9853 return; 9854 9855 ctxt->instate = XML_PARSER_CDATA_SECTION; 9856 r = CUR_CHAR(rl); 9857 if (!IS_CHAR(r)) { 9858 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9859 ctxt->instate = XML_PARSER_CONTENT; 9860 return; 9861 } 9862 NEXTL(rl); 9863 s = CUR_CHAR(sl); 9864 if (!IS_CHAR(s)) { 9865 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9866 ctxt->instate = XML_PARSER_CONTENT; 9867 return; 9868 } 9869 NEXTL(sl); 9870 cur = CUR_CHAR(l); 9871 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9872 if (buf == NULL) { 9873 xmlErrMemory(ctxt, NULL); 9874 return; 9875 } 9876 while (IS_CHAR(cur) && 9877 ((r != ']') || (s != ']') || (cur != '>'))) { 9878 if (len + 5 >= size) { 9879 xmlChar *tmp; 9880 9881 if ((size > XML_MAX_TEXT_LENGTH) && 9882 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9883 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9884 "CData section too big found", NULL); 9885 xmlFree (buf); 9886 return; 9887 } 9888 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9889 if (tmp == NULL) { 9890 xmlFree(buf); 9891 xmlErrMemory(ctxt, NULL); 9892 return; 9893 } 9894 buf = tmp; 9895 size *= 2; 9896 } 9897 COPY_BUF(rl,buf,len,r); 9898 r = s; 9899 rl = sl; 9900 s = cur; 9901 sl = l; 9902 count++; 9903 if (count > 50) { 9904 GROW; 9905 if (ctxt->instate == XML_PARSER_EOF) { 9906 xmlFree(buf); 9907 return; 9908 } 9909 count = 0; 9910 } 9911 NEXTL(l); 9912 cur = CUR_CHAR(l); 9913 } 9914 buf[len] = 0; 9915 ctxt->instate = XML_PARSER_CONTENT; 9916 if (cur != '>') { 9917 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9918 "CData section not finished\n%.50s\n", buf); 9919 xmlFree(buf); 9920 return; 9921 } 9922 NEXTL(l); 9923 9924 /* 9925 * OK the buffer is to be consumed as cdata. 9926 */ 9927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9928 if (ctxt->sax->cdataBlock != NULL) 9929 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9930 else if (ctxt->sax->characters != NULL) 9931 ctxt->sax->characters(ctxt->userData, buf, len); 9932 } 9933 xmlFree(buf); 9934 } 9935 9936 /** 9937 * xmlParseContent: 9938 * @ctxt: an XML parser context 9939 * 9940 * Parse a content: 9941 * 9942 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9943 */ 9944 9945 void 9946 xmlParseContent(xmlParserCtxtPtr ctxt) { 9947 GROW; 9948 while ((RAW != 0) && 9949 ((RAW != '<') || (NXT(1) != '/')) && 9950 (ctxt->instate != XML_PARSER_EOF)) { 9951 const xmlChar *test = CUR_PTR; 9952 unsigned int cons = ctxt->input->consumed; 9953 const xmlChar *cur = ctxt->input->cur; 9954 9955 /* 9956 * First case : a Processing Instruction. 9957 */ 9958 if ((*cur == '<') && (cur[1] == '?')) { 9959 xmlParsePI(ctxt); 9960 } 9961 9962 /* 9963 * Second case : a CDSection 9964 */ 9965 /* 2.6.0 test was *cur not RAW */ 9966 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9967 xmlParseCDSect(ctxt); 9968 } 9969 9970 /* 9971 * Third case : a comment 9972 */ 9973 else if ((*cur == '<') && (NXT(1) == '!') && 9974 (NXT(2) == '-') && (NXT(3) == '-')) { 9975 xmlParseComment(ctxt); 9976 ctxt->instate = XML_PARSER_CONTENT; 9977 } 9978 9979 /* 9980 * Fourth case : a sub-element. 9981 */ 9982 else if (*cur == '<') { 9983 xmlParseElement(ctxt); 9984 } 9985 9986 /* 9987 * Fifth case : a reference. If if has not been resolved, 9988 * parsing returns it's Name, create the node 9989 */ 9990 9991 else if (*cur == '&') { 9992 xmlParseReference(ctxt); 9993 } 9994 9995 /* 9996 * Last case, text. Note that References are handled directly. 9997 */ 9998 else { 9999 xmlParseCharData(ctxt, 0); 10000 } 10001 10002 GROW; 10003 /* 10004 * Pop-up of finished entities. 10005 */ 10006 while ((RAW == 0) && (ctxt->inputNr > 1)) 10007 xmlPopInput(ctxt); 10008 SHRINK; 10009 10010 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10011 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10012 "detected an error in element content\n"); 10013 ctxt->instate = XML_PARSER_EOF; 10014 break; 10015 } 10016 } 10017 } 10018 10019 /** 10020 * xmlParseElement: 10021 * @ctxt: an XML parser context 10022 * 10023 * parse an XML element, this is highly recursive 10024 * 10025 * [39] element ::= EmptyElemTag | STag content ETag 10026 * 10027 * [ WFC: Element Type Match ] 10028 * The Name in an element's end-tag must match the element type in the 10029 * start-tag. 10030 * 10031 */ 10032 10033 void 10034 xmlParseElement(xmlParserCtxtPtr ctxt) { 10035 const xmlChar *name; 10036 const xmlChar *prefix = NULL; 10037 const xmlChar *URI = NULL; 10038 xmlParserNodeInfo node_info; 10039 int line, tlen = 0; 10040 xmlNodePtr ret; 10041 int nsNr = ctxt->nsNr; 10042 10043 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10044 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10045 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10046 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10047 xmlParserMaxDepth); 10048 ctxt->instate = XML_PARSER_EOF; 10049 return; 10050 } 10051 10052 /* Capture start position */ 10053 if (ctxt->record_info) { 10054 node_info.begin_pos = ctxt->input->consumed + 10055 (CUR_PTR - ctxt->input->base); 10056 node_info.begin_line = ctxt->input->line; 10057 } 10058 10059 if (ctxt->spaceNr == 0) 10060 spacePush(ctxt, -1); 10061 else if (*ctxt->space == -2) 10062 spacePush(ctxt, -1); 10063 else 10064 spacePush(ctxt, *ctxt->space); 10065 10066 line = ctxt->input->line; 10067 #ifdef LIBXML_SAX1_ENABLED 10068 if (ctxt->sax2) 10069 #endif /* LIBXML_SAX1_ENABLED */ 10070 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10071 #ifdef LIBXML_SAX1_ENABLED 10072 else 10073 name = xmlParseStartTag(ctxt); 10074 #endif /* LIBXML_SAX1_ENABLED */ 10075 if (ctxt->instate == XML_PARSER_EOF) 10076 return; 10077 if (name == NULL) { 10078 spacePop(ctxt); 10079 return; 10080 } 10081 namePush(ctxt, name); 10082 ret = ctxt->node; 10083 10084 #ifdef LIBXML_VALID_ENABLED 10085 /* 10086 * [ VC: Root Element Type ] 10087 * The Name in the document type declaration must match the element 10088 * type of the root element. 10089 */ 10090 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10091 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10092 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10093 #endif /* LIBXML_VALID_ENABLED */ 10094 10095 /* 10096 * Check for an Empty Element. 10097 */ 10098 if ((RAW == '/') && (NXT(1) == '>')) { 10099 SKIP(2); 10100 if (ctxt->sax2) { 10101 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10102 (!ctxt->disableSAX)) 10103 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10104 #ifdef LIBXML_SAX1_ENABLED 10105 } else { 10106 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10107 (!ctxt->disableSAX)) 10108 ctxt->sax->endElement(ctxt->userData, name); 10109 #endif /* LIBXML_SAX1_ENABLED */ 10110 } 10111 namePop(ctxt); 10112 spacePop(ctxt); 10113 if (nsNr != ctxt->nsNr) 10114 nsPop(ctxt, ctxt->nsNr - nsNr); 10115 if ( ret != NULL && ctxt->record_info ) { 10116 node_info.end_pos = ctxt->input->consumed + 10117 (CUR_PTR - ctxt->input->base); 10118 node_info.end_line = ctxt->input->line; 10119 node_info.node = ret; 10120 xmlParserAddNodeInfo(ctxt, &node_info); 10121 } 10122 return; 10123 } 10124 if (RAW == '>') { 10125 NEXT1; 10126 } else { 10127 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10128 "Couldn't find end of Start Tag %s line %d\n", 10129 name, line, NULL); 10130 10131 /* 10132 * end of parsing of this node. 10133 */ 10134 nodePop(ctxt); 10135 namePop(ctxt); 10136 spacePop(ctxt); 10137 if (nsNr != ctxt->nsNr) 10138 nsPop(ctxt, ctxt->nsNr - nsNr); 10139 10140 /* 10141 * Capture end position and add node 10142 */ 10143 if ( ret != NULL && ctxt->record_info ) { 10144 node_info.end_pos = ctxt->input->consumed + 10145 (CUR_PTR - ctxt->input->base); 10146 node_info.end_line = ctxt->input->line; 10147 node_info.node = ret; 10148 xmlParserAddNodeInfo(ctxt, &node_info); 10149 } 10150 return; 10151 } 10152 10153 /* 10154 * Parse the content of the element: 10155 */ 10156 xmlParseContent(ctxt); 10157 if (ctxt->instate == XML_PARSER_EOF) 10158 return; 10159 if (!IS_BYTE_CHAR(RAW)) { 10160 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10161 "Premature end of data in tag %s line %d\n", 10162 name, line, NULL); 10163 10164 /* 10165 * end of parsing of this node. 10166 */ 10167 nodePop(ctxt); 10168 namePop(ctxt); 10169 spacePop(ctxt); 10170 if (nsNr != ctxt->nsNr) 10171 nsPop(ctxt, ctxt->nsNr - nsNr); 10172 return; 10173 } 10174 10175 /* 10176 * parse the end of tag: '</' should be here. 10177 */ 10178 if (ctxt->sax2) { 10179 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10180 namePop(ctxt); 10181 } 10182 #ifdef LIBXML_SAX1_ENABLED 10183 else 10184 xmlParseEndTag1(ctxt, line); 10185 #endif /* LIBXML_SAX1_ENABLED */ 10186 10187 /* 10188 * Capture end position and add node 10189 */ 10190 if ( ret != NULL && ctxt->record_info ) { 10191 node_info.end_pos = ctxt->input->consumed + 10192 (CUR_PTR - ctxt->input->base); 10193 node_info.end_line = ctxt->input->line; 10194 node_info.node = ret; 10195 xmlParserAddNodeInfo(ctxt, &node_info); 10196 } 10197 } 10198 10199 /** 10200 * xmlParseVersionNum: 10201 * @ctxt: an XML parser context 10202 * 10203 * parse the XML version value. 10204 * 10205 * [26] VersionNum ::= '1.' [0-9]+ 10206 * 10207 * In practice allow [0-9].[0-9]+ at that level 10208 * 10209 * Returns the string giving the XML version number, or NULL 10210 */ 10211 xmlChar * 10212 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10213 xmlChar *buf = NULL; 10214 int len = 0; 10215 int size = 10; 10216 xmlChar cur; 10217 10218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10219 if (buf == NULL) { 10220 xmlErrMemory(ctxt, NULL); 10221 return(NULL); 10222 } 10223 cur = CUR; 10224 if (!((cur >= '0') && (cur <= '9'))) { 10225 xmlFree(buf); 10226 return(NULL); 10227 } 10228 buf[len++] = cur; 10229 NEXT; 10230 cur=CUR; 10231 if (cur != '.') { 10232 xmlFree(buf); 10233 return(NULL); 10234 } 10235 buf[len++] = cur; 10236 NEXT; 10237 cur=CUR; 10238 while ((cur >= '0') && (cur <= '9')) { 10239 if (len + 1 >= size) { 10240 xmlChar *tmp; 10241 10242 size *= 2; 10243 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10244 if (tmp == NULL) { 10245 xmlFree(buf); 10246 xmlErrMemory(ctxt, NULL); 10247 return(NULL); 10248 } 10249 buf = tmp; 10250 } 10251 buf[len++] = cur; 10252 NEXT; 10253 cur=CUR; 10254 } 10255 buf[len] = 0; 10256 return(buf); 10257 } 10258 10259 /** 10260 * xmlParseVersionInfo: 10261 * @ctxt: an XML parser context 10262 * 10263 * parse the XML version. 10264 * 10265 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10266 * 10267 * [25] Eq ::= S? '=' S? 10268 * 10269 * Returns the version string, e.g. "1.0" 10270 */ 10271 10272 xmlChar * 10273 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10274 xmlChar *version = NULL; 10275 10276 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10277 SKIP(7); 10278 SKIP_BLANKS; 10279 if (RAW != '=') { 10280 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10281 return(NULL); 10282 } 10283 NEXT; 10284 SKIP_BLANKS; 10285 if (RAW == '"') { 10286 NEXT; 10287 version = xmlParseVersionNum(ctxt); 10288 if (RAW != '"') { 10289 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10290 } else 10291 NEXT; 10292 } else if (RAW == '\''){ 10293 NEXT; 10294 version = xmlParseVersionNum(ctxt); 10295 if (RAW != '\'') { 10296 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10297 } else 10298 NEXT; 10299 } else { 10300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10301 } 10302 } 10303 return(version); 10304 } 10305 10306 /** 10307 * xmlParseEncName: 10308 * @ctxt: an XML parser context 10309 * 10310 * parse the XML encoding name 10311 * 10312 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10313 * 10314 * Returns the encoding name value or NULL 10315 */ 10316 xmlChar * 10317 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10318 xmlChar *buf = NULL; 10319 int len = 0; 10320 int size = 10; 10321 xmlChar cur; 10322 10323 cur = CUR; 10324 if (((cur >= 'a') && (cur <= 'z')) || 10325 ((cur >= 'A') && (cur <= 'Z'))) { 10326 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10327 if (buf == NULL) { 10328 xmlErrMemory(ctxt, NULL); 10329 return(NULL); 10330 } 10331 10332 buf[len++] = cur; 10333 NEXT; 10334 cur = CUR; 10335 while (((cur >= 'a') && (cur <= 'z')) || 10336 ((cur >= 'A') && (cur <= 'Z')) || 10337 ((cur >= '0') && (cur <= '9')) || 10338 (cur == '.') || (cur == '_') || 10339 (cur == '-')) { 10340 if (len + 1 >= size) { 10341 xmlChar *tmp; 10342 10343 size *= 2; 10344 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10345 if (tmp == NULL) { 10346 xmlErrMemory(ctxt, NULL); 10347 xmlFree(buf); 10348 return(NULL); 10349 } 10350 buf = tmp; 10351 } 10352 buf[len++] = cur; 10353 NEXT; 10354 cur = CUR; 10355 if (cur == 0) { 10356 SHRINK; 10357 GROW; 10358 cur = CUR; 10359 } 10360 } 10361 buf[len] = 0; 10362 } else { 10363 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10364 } 10365 return(buf); 10366 } 10367 10368 /** 10369 * xmlParseEncodingDecl: 10370 * @ctxt: an XML parser context 10371 * 10372 * parse the XML encoding declaration 10373 * 10374 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10375 * 10376 * this setups the conversion filters. 10377 * 10378 * Returns the encoding value or NULL 10379 */ 10380 10381 const xmlChar * 10382 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10383 xmlChar *encoding = NULL; 10384 10385 SKIP_BLANKS; 10386 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10387 SKIP(8); 10388 SKIP_BLANKS; 10389 if (RAW != '=') { 10390 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10391 return(NULL); 10392 } 10393 NEXT; 10394 SKIP_BLANKS; 10395 if (RAW == '"') { 10396 NEXT; 10397 encoding = xmlParseEncName(ctxt); 10398 if (RAW != '"') { 10399 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10400 } else 10401 NEXT; 10402 } else if (RAW == '\''){ 10403 NEXT; 10404 encoding = xmlParseEncName(ctxt); 10405 if (RAW != '\'') { 10406 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10407 } else 10408 NEXT; 10409 } else { 10410 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10411 } 10412 10413 /* 10414 * Non standard parsing, allowing the user to ignore encoding 10415 */ 10416 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10417 xmlFree((xmlChar *) encoding); 10418 return(NULL); 10419 } 10420 10421 /* 10422 * UTF-16 encoding stwich has already taken place at this stage, 10423 * more over the little-endian/big-endian selection is already done 10424 */ 10425 if ((encoding != NULL) && 10426 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10427 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10428 /* 10429 * If no encoding was passed to the parser, that we are 10430 * using UTF-16 and no decoder is present i.e. the 10431 * document is apparently UTF-8 compatible, then raise an 10432 * encoding mismatch fatal error 10433 */ 10434 if ((ctxt->encoding == NULL) && 10435 (ctxt->input->buf != NULL) && 10436 (ctxt->input->buf->encoder == NULL)) { 10437 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10438 "Document labelled UTF-16 but has UTF-8 content\n"); 10439 } 10440 if (ctxt->encoding != NULL) 10441 xmlFree((xmlChar *) ctxt->encoding); 10442 ctxt->encoding = encoding; 10443 } 10444 /* 10445 * UTF-8 encoding is handled natively 10446 */ 10447 else if ((encoding != NULL) && 10448 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10449 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10450 if (ctxt->encoding != NULL) 10451 xmlFree((xmlChar *) ctxt->encoding); 10452 ctxt->encoding = encoding; 10453 } 10454 else if (encoding != NULL) { 10455 xmlCharEncodingHandlerPtr handler; 10456 10457 if (ctxt->input->encoding != NULL) 10458 xmlFree((xmlChar *) ctxt->input->encoding); 10459 ctxt->input->encoding = encoding; 10460 10461 handler = xmlFindCharEncodingHandler((const char *) encoding); 10462 if (handler != NULL) { 10463 xmlSwitchToEncoding(ctxt, handler); 10464 } else { 10465 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10466 "Unsupported encoding %s\n", encoding); 10467 return(NULL); 10468 } 10469 } 10470 } 10471 return(encoding); 10472 } 10473 10474 /** 10475 * xmlParseSDDecl: 10476 * @ctxt: an XML parser context 10477 * 10478 * parse the XML standalone declaration 10479 * 10480 * [32] SDDecl ::= S 'standalone' Eq 10481 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10482 * 10483 * [ VC: Standalone Document Declaration ] 10484 * TODO The standalone document declaration must have the value "no" 10485 * if any external markup declarations contain declarations of: 10486 * - attributes with default values, if elements to which these 10487 * attributes apply appear in the document without specifications 10488 * of values for these attributes, or 10489 * - entities (other than amp, lt, gt, apos, quot), if references 10490 * to those entities appear in the document, or 10491 * - attributes with values subject to normalization, where the 10492 * attribute appears in the document with a value which will change 10493 * as a result of normalization, or 10494 * - element types with element content, if white space occurs directly 10495 * within any instance of those types. 10496 * 10497 * Returns: 10498 * 1 if standalone="yes" 10499 * 0 if standalone="no" 10500 * -2 if standalone attribute is missing or invalid 10501 * (A standalone value of -2 means that the XML declaration was found, 10502 * but no value was specified for the standalone attribute). 10503 */ 10504 10505 int 10506 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10507 int standalone = -2; 10508 10509 SKIP_BLANKS; 10510 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10511 SKIP(10); 10512 SKIP_BLANKS; 10513 if (RAW != '=') { 10514 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10515 return(standalone); 10516 } 10517 NEXT; 10518 SKIP_BLANKS; 10519 if (RAW == '\''){ 10520 NEXT; 10521 if ((RAW == 'n') && (NXT(1) == 'o')) { 10522 standalone = 0; 10523 SKIP(2); 10524 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10525 (NXT(2) == 's')) { 10526 standalone = 1; 10527 SKIP(3); 10528 } else { 10529 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10530 } 10531 if (RAW != '\'') { 10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10533 } else 10534 NEXT; 10535 } else if (RAW == '"'){ 10536 NEXT; 10537 if ((RAW == 'n') && (NXT(1) == 'o')) { 10538 standalone = 0; 10539 SKIP(2); 10540 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10541 (NXT(2) == 's')) { 10542 standalone = 1; 10543 SKIP(3); 10544 } else { 10545 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10546 } 10547 if (RAW != '"') { 10548 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10549 } else 10550 NEXT; 10551 } else { 10552 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10553 } 10554 } 10555 return(standalone); 10556 } 10557 10558 /** 10559 * xmlParseXMLDecl: 10560 * @ctxt: an XML parser context 10561 * 10562 * parse an XML declaration header 10563 * 10564 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10565 */ 10566 10567 void 10568 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10569 xmlChar *version; 10570 10571 /* 10572 * This value for standalone indicates that the document has an 10573 * XML declaration but it does not have a standalone attribute. 10574 * It will be overwritten later if a standalone attribute is found. 10575 */ 10576 ctxt->input->standalone = -2; 10577 10578 /* 10579 * We know that '<?xml' is here. 10580 */ 10581 SKIP(5); 10582 10583 if (!IS_BLANK_CH(RAW)) { 10584 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10585 "Blank needed after '<?xml'\n"); 10586 } 10587 SKIP_BLANKS; 10588 10589 /* 10590 * We must have the VersionInfo here. 10591 */ 10592 version = xmlParseVersionInfo(ctxt); 10593 if (version == NULL) { 10594 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10595 } else { 10596 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10597 /* 10598 * Changed here for XML-1.0 5th edition 10599 */ 10600 if (ctxt->options & XML_PARSE_OLD10) { 10601 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10602 "Unsupported version '%s'\n", 10603 version); 10604 } else { 10605 if ((version[0] == '1') && ((version[1] == '.'))) { 10606 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10607 "Unsupported version '%s'\n", 10608 version, NULL); 10609 } else { 10610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10611 "Unsupported version '%s'\n", 10612 version); 10613 } 10614 } 10615 } 10616 if (ctxt->version != NULL) 10617 xmlFree((void *) ctxt->version); 10618 ctxt->version = version; 10619 } 10620 10621 /* 10622 * We may have the encoding declaration 10623 */ 10624 if (!IS_BLANK_CH(RAW)) { 10625 if ((RAW == '?') && (NXT(1) == '>')) { 10626 SKIP(2); 10627 return; 10628 } 10629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10630 } 10631 xmlParseEncodingDecl(ctxt); 10632 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10633 /* 10634 * The XML REC instructs us to stop parsing right here 10635 */ 10636 return; 10637 } 10638 10639 /* 10640 * We may have the standalone status. 10641 */ 10642 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10643 if ((RAW == '?') && (NXT(1) == '>')) { 10644 SKIP(2); 10645 return; 10646 } 10647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10648 } 10649 10650 /* 10651 * We can grow the input buffer freely at that point 10652 */ 10653 GROW; 10654 10655 SKIP_BLANKS; 10656 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10657 10658 SKIP_BLANKS; 10659 if ((RAW == '?') && (NXT(1) == '>')) { 10660 SKIP(2); 10661 } else if (RAW == '>') { 10662 /* Deprecated old WD ... */ 10663 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10664 NEXT; 10665 } else { 10666 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10667 MOVETO_ENDTAG(CUR_PTR); 10668 NEXT; 10669 } 10670 } 10671 10672 /** 10673 * xmlParseMisc: 10674 * @ctxt: an XML parser context 10675 * 10676 * parse an XML Misc* optional field. 10677 * 10678 * [27] Misc ::= Comment | PI | S 10679 */ 10680 10681 void 10682 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10683 while ((ctxt->instate != XML_PARSER_EOF) && 10684 (((RAW == '<') && (NXT(1) == '?')) || 10685 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10686 IS_BLANK_CH(CUR))) { 10687 if ((RAW == '<') && (NXT(1) == '?')) { 10688 xmlParsePI(ctxt); 10689 } else if (IS_BLANK_CH(CUR)) { 10690 NEXT; 10691 } else 10692 xmlParseComment(ctxt); 10693 } 10694 } 10695 10696 /** 10697 * xmlParseDocument: 10698 * @ctxt: an XML parser context 10699 * 10700 * parse an XML document (and build a tree if using the standard SAX 10701 * interface). 10702 * 10703 * [1] document ::= prolog element Misc* 10704 * 10705 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10706 * 10707 * Returns 0, -1 in case of error. the parser context is augmented 10708 * as a result of the parsing. 10709 */ 10710 10711 int 10712 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10713 xmlChar start[4]; 10714 xmlCharEncoding enc; 10715 10716 xmlInitParser(); 10717 10718 if ((ctxt == NULL) || (ctxt->input == NULL)) 10719 return(-1); 10720 10721 GROW; 10722 10723 /* 10724 * SAX: detecting the level. 10725 */ 10726 xmlDetectSAX2(ctxt); 10727 10728 /* 10729 * SAX: beginning of the document processing. 10730 */ 10731 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10732 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10733 if (ctxt->instate == XML_PARSER_EOF) 10734 return(-1); 10735 10736 if ((ctxt->encoding == NULL) && 10737 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10738 /* 10739 * Get the 4 first bytes and decode the charset 10740 * if enc != XML_CHAR_ENCODING_NONE 10741 * plug some encoding conversion routines. 10742 */ 10743 start[0] = RAW; 10744 start[1] = NXT(1); 10745 start[2] = NXT(2); 10746 start[3] = NXT(3); 10747 enc = xmlDetectCharEncoding(&start[0], 4); 10748 if (enc != XML_CHAR_ENCODING_NONE) { 10749 xmlSwitchEncoding(ctxt, enc); 10750 } 10751 } 10752 10753 10754 if (CUR == 0) { 10755 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10756 } 10757 10758 /* 10759 * Check for the XMLDecl in the Prolog. 10760 * do not GROW here to avoid the detected encoder to decode more 10761 * than just the first line, unless the amount of data is really 10762 * too small to hold "<?xml version="1.0" encoding="foo" 10763 */ 10764 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10765 GROW; 10766 } 10767 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10768 10769 /* 10770 * Note that we will switch encoding on the fly. 10771 */ 10772 xmlParseXMLDecl(ctxt); 10773 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10774 /* 10775 * The XML REC instructs us to stop parsing right here 10776 */ 10777 return(-1); 10778 } 10779 ctxt->standalone = ctxt->input->standalone; 10780 SKIP_BLANKS; 10781 } else { 10782 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10783 } 10784 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10785 ctxt->sax->startDocument(ctxt->userData); 10786 if (ctxt->instate == XML_PARSER_EOF) 10787 return(-1); 10788 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10789 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10790 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10791 } 10792 10793 /* 10794 * The Misc part of the Prolog 10795 */ 10796 GROW; 10797 xmlParseMisc(ctxt); 10798 10799 /* 10800 * Then possibly doc type declaration(s) and more Misc 10801 * (doctypedecl Misc*)? 10802 */ 10803 GROW; 10804 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10805 10806 ctxt->inSubset = 1; 10807 xmlParseDocTypeDecl(ctxt); 10808 if (RAW == '[') { 10809 ctxt->instate = XML_PARSER_DTD; 10810 xmlParseInternalSubset(ctxt); 10811 if (ctxt->instate == XML_PARSER_EOF) 10812 return(-1); 10813 } 10814 10815 /* 10816 * Create and update the external subset. 10817 */ 10818 ctxt->inSubset = 2; 10819 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10820 (!ctxt->disableSAX)) 10821 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10822 ctxt->extSubSystem, ctxt->extSubURI); 10823 if (ctxt->instate == XML_PARSER_EOF) 10824 return(-1); 10825 ctxt->inSubset = 0; 10826 10827 xmlCleanSpecialAttr(ctxt); 10828 10829 ctxt->instate = XML_PARSER_PROLOG; 10830 xmlParseMisc(ctxt); 10831 } 10832 10833 /* 10834 * Time to start parsing the tree itself 10835 */ 10836 GROW; 10837 if (RAW != '<') { 10838 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10839 "Start tag expected, '<' not found\n"); 10840 } else { 10841 ctxt->instate = XML_PARSER_CONTENT; 10842 xmlParseElement(ctxt); 10843 ctxt->instate = XML_PARSER_EPILOG; 10844 10845 10846 /* 10847 * The Misc part at the end 10848 */ 10849 xmlParseMisc(ctxt); 10850 10851 if (RAW != 0) { 10852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10853 } 10854 ctxt->instate = XML_PARSER_EOF; 10855 } 10856 10857 /* 10858 * SAX: end of the document processing. 10859 */ 10860 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10861 ctxt->sax->endDocument(ctxt->userData); 10862 10863 /* 10864 * Remove locally kept entity definitions if the tree was not built 10865 */ 10866 if ((ctxt->myDoc != NULL) && 10867 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10868 xmlFreeDoc(ctxt->myDoc); 10869 ctxt->myDoc = NULL; 10870 } 10871 10872 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10873 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10874 if (ctxt->valid) 10875 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10876 if (ctxt->nsWellFormed) 10877 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10878 if (ctxt->options & XML_PARSE_OLD10) 10879 ctxt->myDoc->properties |= XML_DOC_OLD10; 10880 } 10881 if (! ctxt->wellFormed) { 10882 ctxt->valid = 0; 10883 return(-1); 10884 } 10885 return(0); 10886 } 10887 10888 /** 10889 * xmlParseExtParsedEnt: 10890 * @ctxt: an XML parser context 10891 * 10892 * parse a general parsed entity 10893 * An external general parsed entity is well-formed if it matches the 10894 * production labeled extParsedEnt. 10895 * 10896 * [78] extParsedEnt ::= TextDecl? content 10897 * 10898 * Returns 0, -1 in case of error. the parser context is augmented 10899 * as a result of the parsing. 10900 */ 10901 10902 int 10903 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10904 xmlChar start[4]; 10905 xmlCharEncoding enc; 10906 10907 if ((ctxt == NULL) || (ctxt->input == NULL)) 10908 return(-1); 10909 10910 xmlDefaultSAXHandlerInit(); 10911 10912 xmlDetectSAX2(ctxt); 10913 10914 GROW; 10915 10916 /* 10917 * SAX: beginning of the document processing. 10918 */ 10919 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10920 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10921 10922 /* 10923 * Get the 4 first bytes and decode the charset 10924 * if enc != XML_CHAR_ENCODING_NONE 10925 * plug some encoding conversion routines. 10926 */ 10927 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10928 start[0] = RAW; 10929 start[1] = NXT(1); 10930 start[2] = NXT(2); 10931 start[3] = NXT(3); 10932 enc = xmlDetectCharEncoding(start, 4); 10933 if (enc != XML_CHAR_ENCODING_NONE) { 10934 xmlSwitchEncoding(ctxt, enc); 10935 } 10936 } 10937 10938 10939 if (CUR == 0) { 10940 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10941 } 10942 10943 /* 10944 * Check for the XMLDecl in the Prolog. 10945 */ 10946 GROW; 10947 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10948 10949 /* 10950 * Note that we will switch encoding on the fly. 10951 */ 10952 xmlParseXMLDecl(ctxt); 10953 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10954 /* 10955 * The XML REC instructs us to stop parsing right here 10956 */ 10957 return(-1); 10958 } 10959 SKIP_BLANKS; 10960 } else { 10961 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10962 } 10963 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10964 ctxt->sax->startDocument(ctxt->userData); 10965 if (ctxt->instate == XML_PARSER_EOF) 10966 return(-1); 10967 10968 /* 10969 * Doing validity checking on chunk doesn't make sense 10970 */ 10971 ctxt->instate = XML_PARSER_CONTENT; 10972 ctxt->validate = 0; 10973 ctxt->loadsubset = 0; 10974 ctxt->depth = 0; 10975 10976 xmlParseContent(ctxt); 10977 if (ctxt->instate == XML_PARSER_EOF) 10978 return(-1); 10979 10980 if ((RAW == '<') && (NXT(1) == '/')) { 10981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10982 } else if (RAW != 0) { 10983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10984 } 10985 10986 /* 10987 * SAX: end of the document processing. 10988 */ 10989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10990 ctxt->sax->endDocument(ctxt->userData); 10991 10992 if (! ctxt->wellFormed) return(-1); 10993 return(0); 10994 } 10995 10996 #ifdef LIBXML_PUSH_ENABLED 10997 /************************************************************************ 10998 * * 10999 * Progressive parsing interfaces * 11000 * * 11001 ************************************************************************/ 11002 11003 /** 11004 * xmlParseLookupSequence: 11005 * @ctxt: an XML parser context 11006 * @first: the first char to lookup 11007 * @next: the next char to lookup or zero 11008 * @third: the next char to lookup or zero 11009 * 11010 * Try to find if a sequence (first, next, third) or just (first next) or 11011 * (first) is available in the input stream. 11012 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 11013 * to avoid rescanning sequences of bytes, it DOES change the state of the 11014 * parser, do not use liberally. 11015 * 11016 * Returns the index to the current parsing point if the full sequence 11017 * is available, -1 otherwise. 11018 */ 11019 static int 11020 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11021 xmlChar next, xmlChar third) { 11022 int base, len; 11023 xmlParserInputPtr in; 11024 const xmlChar *buf; 11025 11026 in = ctxt->input; 11027 if (in == NULL) return(-1); 11028 base = in->cur - in->base; 11029 if (base < 0) return(-1); 11030 if (ctxt->checkIndex > base) 11031 base = ctxt->checkIndex; 11032 if (in->buf == NULL) { 11033 buf = in->base; 11034 len = in->length; 11035 } else { 11036 buf = xmlBufContent(in->buf->buffer); 11037 len = xmlBufUse(in->buf->buffer); 11038 } 11039 /* take into account the sequence length */ 11040 if (third) len -= 2; 11041 else if (next) len --; 11042 for (;base < len;base++) { 11043 if (buf[base] == first) { 11044 if (third != 0) { 11045 if ((buf[base + 1] != next) || 11046 (buf[base + 2] != third)) continue; 11047 } else if (next != 0) { 11048 if (buf[base + 1] != next) continue; 11049 } 11050 ctxt->checkIndex = 0; 11051 #ifdef DEBUG_PUSH 11052 if (next == 0) 11053 xmlGenericError(xmlGenericErrorContext, 11054 "PP: lookup '%c' found at %d\n", 11055 first, base); 11056 else if (third == 0) 11057 xmlGenericError(xmlGenericErrorContext, 11058 "PP: lookup '%c%c' found at %d\n", 11059 first, next, base); 11060 else 11061 xmlGenericError(xmlGenericErrorContext, 11062 "PP: lookup '%c%c%c' found at %d\n", 11063 first, next, third, base); 11064 #endif 11065 return(base - (in->cur - in->base)); 11066 } 11067 } 11068 ctxt->checkIndex = base; 11069 #ifdef DEBUG_PUSH 11070 if (next == 0) 11071 xmlGenericError(xmlGenericErrorContext, 11072 "PP: lookup '%c' failed\n", first); 11073 else if (third == 0) 11074 xmlGenericError(xmlGenericErrorContext, 11075 "PP: lookup '%c%c' failed\n", first, next); 11076 else 11077 xmlGenericError(xmlGenericErrorContext, 11078 "PP: lookup '%c%c%c' failed\n", first, next, third); 11079 #endif 11080 return(-1); 11081 } 11082 11083 /** 11084 * xmlParseGetLasts: 11085 * @ctxt: an XML parser context 11086 * @lastlt: pointer to store the last '<' from the input 11087 * @lastgt: pointer to store the last '>' from the input 11088 * 11089 * Lookup the last < and > in the current chunk 11090 */ 11091 static void 11092 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11093 const xmlChar **lastgt) { 11094 const xmlChar *tmp; 11095 11096 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11097 xmlGenericError(xmlGenericErrorContext, 11098 "Internal error: xmlParseGetLasts\n"); 11099 return; 11100 } 11101 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11102 tmp = ctxt->input->end; 11103 tmp--; 11104 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11105 if (tmp < ctxt->input->base) { 11106 *lastlt = NULL; 11107 *lastgt = NULL; 11108 } else { 11109 *lastlt = tmp; 11110 tmp++; 11111 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11112 if (*tmp == '\'') { 11113 tmp++; 11114 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11115 if (tmp < ctxt->input->end) tmp++; 11116 } else if (*tmp == '"') { 11117 tmp++; 11118 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11119 if (tmp < ctxt->input->end) tmp++; 11120 } else 11121 tmp++; 11122 } 11123 if (tmp < ctxt->input->end) 11124 *lastgt = tmp; 11125 else { 11126 tmp = *lastlt; 11127 tmp--; 11128 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11129 if (tmp >= ctxt->input->base) 11130 *lastgt = tmp; 11131 else 11132 *lastgt = NULL; 11133 } 11134 } 11135 } else { 11136 *lastlt = NULL; 11137 *lastgt = NULL; 11138 } 11139 } 11140 /** 11141 * xmlCheckCdataPush: 11142 * @cur: pointer to the bock of characters 11143 * @len: length of the block in bytes 11144 * 11145 * Check that the block of characters is okay as SCdata content [20] 11146 * 11147 * Returns the number of bytes to pass if okay, a negative index where an 11148 * UTF-8 error occured otherwise 11149 */ 11150 static int 11151 xmlCheckCdataPush(const xmlChar *utf, int len) { 11152 int ix; 11153 unsigned char c; 11154 int codepoint; 11155 11156 if ((utf == NULL) || (len <= 0)) 11157 return(0); 11158 11159 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11160 c = utf[ix]; 11161 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11162 if (c >= 0x20) 11163 ix++; 11164 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11165 ix++; 11166 else 11167 return(-ix); 11168 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11169 if (ix + 2 > len) return(ix); 11170 if ((utf[ix+1] & 0xc0 ) != 0x80) 11171 return(-ix); 11172 codepoint = (utf[ix] & 0x1f) << 6; 11173 codepoint |= utf[ix+1] & 0x3f; 11174 if (!xmlIsCharQ(codepoint)) 11175 return(-ix); 11176 ix += 2; 11177 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11178 if (ix + 3 > len) return(ix); 11179 if (((utf[ix+1] & 0xc0) != 0x80) || 11180 ((utf[ix+2] & 0xc0) != 0x80)) 11181 return(-ix); 11182 codepoint = (utf[ix] & 0xf) << 12; 11183 codepoint |= (utf[ix+1] & 0x3f) << 6; 11184 codepoint |= utf[ix+2] & 0x3f; 11185 if (!xmlIsCharQ(codepoint)) 11186 return(-ix); 11187 ix += 3; 11188 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11189 if (ix + 4 > len) return(ix); 11190 if (((utf[ix+1] & 0xc0) != 0x80) || 11191 ((utf[ix+2] & 0xc0) != 0x80) || 11192 ((utf[ix+3] & 0xc0) != 0x80)) 11193 return(-ix); 11194 codepoint = (utf[ix] & 0x7) << 18; 11195 codepoint |= (utf[ix+1] & 0x3f) << 12; 11196 codepoint |= (utf[ix+2] & 0x3f) << 6; 11197 codepoint |= utf[ix+3] & 0x3f; 11198 if (!xmlIsCharQ(codepoint)) 11199 return(-ix); 11200 ix += 4; 11201 } else /* unknown encoding */ 11202 return(-ix); 11203 } 11204 return(ix); 11205 } 11206 11207 /** 11208 * xmlParseTryOrFinish: 11209 * @ctxt: an XML parser context 11210 * @terminate: last chunk indicator 11211 * 11212 * Try to progress on parsing 11213 * 11214 * Returns zero if no parsing was possible 11215 */ 11216 static int 11217 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11218 int ret = 0; 11219 int avail, tlen; 11220 xmlChar cur, next; 11221 const xmlChar *lastlt, *lastgt; 11222 11223 if (ctxt->input == NULL) 11224 return(0); 11225 11226 #ifdef DEBUG_PUSH 11227 switch (ctxt->instate) { 11228 case XML_PARSER_EOF: 11229 xmlGenericError(xmlGenericErrorContext, 11230 "PP: try EOF\n"); break; 11231 case XML_PARSER_START: 11232 xmlGenericError(xmlGenericErrorContext, 11233 "PP: try START\n"); break; 11234 case XML_PARSER_MISC: 11235 xmlGenericError(xmlGenericErrorContext, 11236 "PP: try MISC\n");break; 11237 case XML_PARSER_COMMENT: 11238 xmlGenericError(xmlGenericErrorContext, 11239 "PP: try COMMENT\n");break; 11240 case XML_PARSER_PROLOG: 11241 xmlGenericError(xmlGenericErrorContext, 11242 "PP: try PROLOG\n");break; 11243 case XML_PARSER_START_TAG: 11244 xmlGenericError(xmlGenericErrorContext, 11245 "PP: try START_TAG\n");break; 11246 case XML_PARSER_CONTENT: 11247 xmlGenericError(xmlGenericErrorContext, 11248 "PP: try CONTENT\n");break; 11249 case XML_PARSER_CDATA_SECTION: 11250 xmlGenericError(xmlGenericErrorContext, 11251 "PP: try CDATA_SECTION\n");break; 11252 case XML_PARSER_END_TAG: 11253 xmlGenericError(xmlGenericErrorContext, 11254 "PP: try END_TAG\n");break; 11255 case XML_PARSER_ENTITY_DECL: 11256 xmlGenericError(xmlGenericErrorContext, 11257 "PP: try ENTITY_DECL\n");break; 11258 case XML_PARSER_ENTITY_VALUE: 11259 xmlGenericError(xmlGenericErrorContext, 11260 "PP: try ENTITY_VALUE\n");break; 11261 case XML_PARSER_ATTRIBUTE_VALUE: 11262 xmlGenericError(xmlGenericErrorContext, 11263 "PP: try ATTRIBUTE_VALUE\n");break; 11264 case XML_PARSER_DTD: 11265 xmlGenericError(xmlGenericErrorContext, 11266 "PP: try DTD\n");break; 11267 case XML_PARSER_EPILOG: 11268 xmlGenericError(xmlGenericErrorContext, 11269 "PP: try EPILOG\n");break; 11270 case XML_PARSER_PI: 11271 xmlGenericError(xmlGenericErrorContext, 11272 "PP: try PI\n");break; 11273 case XML_PARSER_IGNORE: 11274 xmlGenericError(xmlGenericErrorContext, 11275 "PP: try IGNORE\n");break; 11276 } 11277 #endif 11278 11279 if ((ctxt->input != NULL) && 11280 (ctxt->input->cur - ctxt->input->base > 4096)) { 11281 xmlSHRINK(ctxt); 11282 ctxt->checkIndex = 0; 11283 } 11284 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11285 11286 while (ctxt->instate != XML_PARSER_EOF) { 11287 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11288 return(0); 11289 11290 11291 /* 11292 * Pop-up of finished entities. 11293 */ 11294 while ((RAW == 0) && (ctxt->inputNr > 1)) 11295 xmlPopInput(ctxt); 11296 11297 if (ctxt->input == NULL) break; 11298 if (ctxt->input->buf == NULL) 11299 avail = ctxt->input->length - 11300 (ctxt->input->cur - ctxt->input->base); 11301 else { 11302 /* 11303 * If we are operating on converted input, try to flush 11304 * remainng chars to avoid them stalling in the non-converted 11305 * buffer. But do not do this in document start where 11306 * encoding="..." may not have been read and we work on a 11307 * guessed encoding. 11308 */ 11309 if ((ctxt->instate != XML_PARSER_START) && 11310 (ctxt->input->buf->raw != NULL) && 11311 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11312 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11313 ctxt->input); 11314 size_t current = ctxt->input->cur - ctxt->input->base; 11315 11316 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11317 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11318 base, current); 11319 } 11320 avail = xmlBufUse(ctxt->input->buf->buffer) - 11321 (ctxt->input->cur - ctxt->input->base); 11322 } 11323 if (avail < 1) 11324 goto done; 11325 switch (ctxt->instate) { 11326 case XML_PARSER_EOF: 11327 /* 11328 * Document parsing is done ! 11329 */ 11330 goto done; 11331 case XML_PARSER_START: 11332 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11333 xmlChar start[4]; 11334 xmlCharEncoding enc; 11335 11336 /* 11337 * Very first chars read from the document flow. 11338 */ 11339 if (avail < 4) 11340 goto done; 11341 11342 /* 11343 * Get the 4 first bytes and decode the charset 11344 * if enc != XML_CHAR_ENCODING_NONE 11345 * plug some encoding conversion routines, 11346 * else xmlSwitchEncoding will set to (default) 11347 * UTF8. 11348 */ 11349 start[0] = RAW; 11350 start[1] = NXT(1); 11351 start[2] = NXT(2); 11352 start[3] = NXT(3); 11353 enc = xmlDetectCharEncoding(start, 4); 11354 xmlSwitchEncoding(ctxt, enc); 11355 break; 11356 } 11357 11358 if (avail < 2) 11359 goto done; 11360 cur = ctxt->input->cur[0]; 11361 next = ctxt->input->cur[1]; 11362 if (cur == 0) { 11363 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11364 ctxt->sax->setDocumentLocator(ctxt->userData, 11365 &xmlDefaultSAXLocator); 11366 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11367 ctxt->instate = XML_PARSER_EOF; 11368 #ifdef DEBUG_PUSH 11369 xmlGenericError(xmlGenericErrorContext, 11370 "PP: entering EOF\n"); 11371 #endif 11372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11373 ctxt->sax->endDocument(ctxt->userData); 11374 goto done; 11375 } 11376 if ((cur == '<') && (next == '?')) { 11377 /* PI or XML decl */ 11378 if (avail < 5) return(ret); 11379 if ((!terminate) && 11380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11381 return(ret); 11382 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11383 ctxt->sax->setDocumentLocator(ctxt->userData, 11384 &xmlDefaultSAXLocator); 11385 if ((ctxt->input->cur[2] == 'x') && 11386 (ctxt->input->cur[3] == 'm') && 11387 (ctxt->input->cur[4] == 'l') && 11388 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11389 ret += 5; 11390 #ifdef DEBUG_PUSH 11391 xmlGenericError(xmlGenericErrorContext, 11392 "PP: Parsing XML Decl\n"); 11393 #endif 11394 xmlParseXMLDecl(ctxt); 11395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11396 /* 11397 * The XML REC instructs us to stop parsing right 11398 * here 11399 */ 11400 ctxt->instate = XML_PARSER_EOF; 11401 return(0); 11402 } 11403 ctxt->standalone = ctxt->input->standalone; 11404 if ((ctxt->encoding == NULL) && 11405 (ctxt->input->encoding != NULL)) 11406 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11407 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11408 (!ctxt->disableSAX)) 11409 ctxt->sax->startDocument(ctxt->userData); 11410 ctxt->instate = XML_PARSER_MISC; 11411 #ifdef DEBUG_PUSH 11412 xmlGenericError(xmlGenericErrorContext, 11413 "PP: entering MISC\n"); 11414 #endif 11415 } else { 11416 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11417 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11418 (!ctxt->disableSAX)) 11419 ctxt->sax->startDocument(ctxt->userData); 11420 ctxt->instate = XML_PARSER_MISC; 11421 #ifdef DEBUG_PUSH 11422 xmlGenericError(xmlGenericErrorContext, 11423 "PP: entering MISC\n"); 11424 #endif 11425 } 11426 } else { 11427 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11428 ctxt->sax->setDocumentLocator(ctxt->userData, 11429 &xmlDefaultSAXLocator); 11430 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11431 if (ctxt->version == NULL) { 11432 xmlErrMemory(ctxt, NULL); 11433 break; 11434 } 11435 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11436 (!ctxt->disableSAX)) 11437 ctxt->sax->startDocument(ctxt->userData); 11438 ctxt->instate = XML_PARSER_MISC; 11439 #ifdef DEBUG_PUSH 11440 xmlGenericError(xmlGenericErrorContext, 11441 "PP: entering MISC\n"); 11442 #endif 11443 } 11444 break; 11445 case XML_PARSER_START_TAG: { 11446 const xmlChar *name; 11447 const xmlChar *prefix = NULL; 11448 const xmlChar *URI = NULL; 11449 int nsNr = ctxt->nsNr; 11450 11451 if ((avail < 2) && (ctxt->inputNr == 1)) 11452 goto done; 11453 cur = ctxt->input->cur[0]; 11454 if (cur != '<') { 11455 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11456 ctxt->instate = XML_PARSER_EOF; 11457 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11458 ctxt->sax->endDocument(ctxt->userData); 11459 goto done; 11460 } 11461 if (!terminate) { 11462 if (ctxt->progressive) { 11463 /* > can be found unescaped in attribute values */ 11464 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11465 goto done; 11466 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11467 goto done; 11468 } 11469 } 11470 if (ctxt->spaceNr == 0) 11471 spacePush(ctxt, -1); 11472 else if (*ctxt->space == -2) 11473 spacePush(ctxt, -1); 11474 else 11475 spacePush(ctxt, *ctxt->space); 11476 #ifdef LIBXML_SAX1_ENABLED 11477 if (ctxt->sax2) 11478 #endif /* LIBXML_SAX1_ENABLED */ 11479 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11480 #ifdef LIBXML_SAX1_ENABLED 11481 else 11482 name = xmlParseStartTag(ctxt); 11483 #endif /* LIBXML_SAX1_ENABLED */ 11484 if (ctxt->instate == XML_PARSER_EOF) 11485 goto done; 11486 if (name == NULL) { 11487 spacePop(ctxt); 11488 ctxt->instate = XML_PARSER_EOF; 11489 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11490 ctxt->sax->endDocument(ctxt->userData); 11491 goto done; 11492 } 11493 #ifdef LIBXML_VALID_ENABLED 11494 /* 11495 * [ VC: Root Element Type ] 11496 * The Name in the document type declaration must match 11497 * the element type of the root element. 11498 */ 11499 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11500 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11501 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11502 #endif /* LIBXML_VALID_ENABLED */ 11503 11504 /* 11505 * Check for an Empty Element. 11506 */ 11507 if ((RAW == '/') && (NXT(1) == '>')) { 11508 SKIP(2); 11509 11510 if (ctxt->sax2) { 11511 if ((ctxt->sax != NULL) && 11512 (ctxt->sax->endElementNs != NULL) && 11513 (!ctxt->disableSAX)) 11514 ctxt->sax->endElementNs(ctxt->userData, name, 11515 prefix, URI); 11516 if (ctxt->nsNr - nsNr > 0) 11517 nsPop(ctxt, ctxt->nsNr - nsNr); 11518 #ifdef LIBXML_SAX1_ENABLED 11519 } else { 11520 if ((ctxt->sax != NULL) && 11521 (ctxt->sax->endElement != NULL) && 11522 (!ctxt->disableSAX)) 11523 ctxt->sax->endElement(ctxt->userData, name); 11524 #endif /* LIBXML_SAX1_ENABLED */ 11525 } 11526 if (ctxt->instate == XML_PARSER_EOF) 11527 goto done; 11528 spacePop(ctxt); 11529 if (ctxt->nameNr == 0) { 11530 ctxt->instate = XML_PARSER_EPILOG; 11531 } else { 11532 ctxt->instate = XML_PARSER_CONTENT; 11533 } 11534 ctxt->progressive = 1; 11535 break; 11536 } 11537 if (RAW == '>') { 11538 NEXT; 11539 } else { 11540 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11541 "Couldn't find end of Start Tag %s\n", 11542 name); 11543 nodePop(ctxt); 11544 spacePop(ctxt); 11545 } 11546 if (ctxt->sax2) 11547 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11548 #ifdef LIBXML_SAX1_ENABLED 11549 else 11550 namePush(ctxt, name); 11551 #endif /* LIBXML_SAX1_ENABLED */ 11552 11553 ctxt->instate = XML_PARSER_CONTENT; 11554 ctxt->progressive = 1; 11555 break; 11556 } 11557 case XML_PARSER_CONTENT: { 11558 const xmlChar *test; 11559 unsigned int cons; 11560 if ((avail < 2) && (ctxt->inputNr == 1)) 11561 goto done; 11562 cur = ctxt->input->cur[0]; 11563 next = ctxt->input->cur[1]; 11564 11565 test = CUR_PTR; 11566 cons = ctxt->input->consumed; 11567 if ((cur == '<') && (next == '/')) { 11568 ctxt->instate = XML_PARSER_END_TAG; 11569 break; 11570 } else if ((cur == '<') && (next == '?')) { 11571 if ((!terminate) && 11572 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11573 ctxt->progressive = XML_PARSER_PI; 11574 goto done; 11575 } 11576 xmlParsePI(ctxt); 11577 ctxt->instate = XML_PARSER_CONTENT; 11578 ctxt->progressive = 1; 11579 } else if ((cur == '<') && (next != '!')) { 11580 ctxt->instate = XML_PARSER_START_TAG; 11581 break; 11582 } else if ((cur == '<') && (next == '!') && 11583 (ctxt->input->cur[2] == '-') && 11584 (ctxt->input->cur[3] == '-')) { 11585 int term; 11586 11587 if (avail < 4) 11588 goto done; 11589 ctxt->input->cur += 4; 11590 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11591 ctxt->input->cur -= 4; 11592 if ((!terminate) && (term < 0)) { 11593 ctxt->progressive = XML_PARSER_COMMENT; 11594 goto done; 11595 } 11596 xmlParseComment(ctxt); 11597 ctxt->instate = XML_PARSER_CONTENT; 11598 ctxt->progressive = 1; 11599 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11600 (ctxt->input->cur[2] == '[') && 11601 (ctxt->input->cur[3] == 'C') && 11602 (ctxt->input->cur[4] == 'D') && 11603 (ctxt->input->cur[5] == 'A') && 11604 (ctxt->input->cur[6] == 'T') && 11605 (ctxt->input->cur[7] == 'A') && 11606 (ctxt->input->cur[8] == '[')) { 11607 SKIP(9); 11608 ctxt->instate = XML_PARSER_CDATA_SECTION; 11609 break; 11610 } else if ((cur == '<') && (next == '!') && 11611 (avail < 9)) { 11612 goto done; 11613 } else if (cur == '&') { 11614 if ((!terminate) && 11615 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11616 goto done; 11617 xmlParseReference(ctxt); 11618 } else { 11619 /* TODO Avoid the extra copy, handle directly !!! */ 11620 /* 11621 * Goal of the following test is: 11622 * - minimize calls to the SAX 'character' callback 11623 * when they are mergeable 11624 * - handle an problem for isBlank when we only parse 11625 * a sequence of blank chars and the next one is 11626 * not available to check against '<' presence. 11627 * - tries to homogenize the differences in SAX 11628 * callbacks between the push and pull versions 11629 * of the parser. 11630 */ 11631 if ((ctxt->inputNr == 1) && 11632 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11633 if (!terminate) { 11634 if (ctxt->progressive) { 11635 if ((lastlt == NULL) || 11636 (ctxt->input->cur > lastlt)) 11637 goto done; 11638 } else if (xmlParseLookupSequence(ctxt, 11639 '<', 0, 0) < 0) { 11640 goto done; 11641 } 11642 } 11643 } 11644 ctxt->checkIndex = 0; 11645 xmlParseCharData(ctxt, 0); 11646 } 11647 /* 11648 * Pop-up of finished entities. 11649 */ 11650 while ((RAW == 0) && (ctxt->inputNr > 1)) 11651 xmlPopInput(ctxt); 11652 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11653 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11654 "detected an error in element content\n"); 11655 ctxt->instate = XML_PARSER_EOF; 11656 break; 11657 } 11658 break; 11659 } 11660 case XML_PARSER_END_TAG: 11661 if (avail < 2) 11662 goto done; 11663 if (!terminate) { 11664 if (ctxt->progressive) { 11665 /* > can be found unescaped in attribute values */ 11666 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11667 goto done; 11668 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11669 goto done; 11670 } 11671 } 11672 if (ctxt->sax2) { 11673 xmlParseEndTag2(ctxt, 11674 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11675 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11676 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11677 nameNsPop(ctxt); 11678 } 11679 #ifdef LIBXML_SAX1_ENABLED 11680 else 11681 xmlParseEndTag1(ctxt, 0); 11682 #endif /* LIBXML_SAX1_ENABLED */ 11683 if (ctxt->instate == XML_PARSER_EOF) { 11684 /* Nothing */ 11685 } else if (ctxt->nameNr == 0) { 11686 ctxt->instate = XML_PARSER_EPILOG; 11687 } else { 11688 ctxt->instate = XML_PARSER_CONTENT; 11689 } 11690 break; 11691 case XML_PARSER_CDATA_SECTION: { 11692 /* 11693 * The Push mode need to have the SAX callback for 11694 * cdataBlock merge back contiguous callbacks. 11695 */ 11696 int base; 11697 11698 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11699 if (base < 0) { 11700 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11701 int tmp; 11702 11703 tmp = xmlCheckCdataPush(ctxt->input->cur, 11704 XML_PARSER_BIG_BUFFER_SIZE); 11705 if (tmp < 0) { 11706 tmp = -tmp; 11707 ctxt->input->cur += tmp; 11708 goto encoding_error; 11709 } 11710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11711 if (ctxt->sax->cdataBlock != NULL) 11712 ctxt->sax->cdataBlock(ctxt->userData, 11713 ctxt->input->cur, tmp); 11714 else if (ctxt->sax->characters != NULL) 11715 ctxt->sax->characters(ctxt->userData, 11716 ctxt->input->cur, tmp); 11717 } 11718 if (ctxt->instate == XML_PARSER_EOF) 11719 goto done; 11720 SKIPL(tmp); 11721 ctxt->checkIndex = 0; 11722 } 11723 goto done; 11724 } else { 11725 int tmp; 11726 11727 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11728 if ((tmp < 0) || (tmp != base)) { 11729 tmp = -tmp; 11730 ctxt->input->cur += tmp; 11731 goto encoding_error; 11732 } 11733 if ((ctxt->sax != NULL) && (base == 0) && 11734 (ctxt->sax->cdataBlock != NULL) && 11735 (!ctxt->disableSAX)) { 11736 /* 11737 * Special case to provide identical behaviour 11738 * between pull and push parsers on enpty CDATA 11739 * sections 11740 */ 11741 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11742 (!strncmp((const char *)&ctxt->input->cur[-9], 11743 "<![CDATA[", 9))) 11744 ctxt->sax->cdataBlock(ctxt->userData, 11745 BAD_CAST "", 0); 11746 } else if ((ctxt->sax != NULL) && (base > 0) && 11747 (!ctxt->disableSAX)) { 11748 if (ctxt->sax->cdataBlock != NULL) 11749 ctxt->sax->cdataBlock(ctxt->userData, 11750 ctxt->input->cur, base); 11751 else if (ctxt->sax->characters != NULL) 11752 ctxt->sax->characters(ctxt->userData, 11753 ctxt->input->cur, base); 11754 } 11755 if (ctxt->instate == XML_PARSER_EOF) 11756 goto done; 11757 SKIPL(base + 3); 11758 ctxt->checkIndex = 0; 11759 ctxt->instate = XML_PARSER_CONTENT; 11760 #ifdef DEBUG_PUSH 11761 xmlGenericError(xmlGenericErrorContext, 11762 "PP: entering CONTENT\n"); 11763 #endif 11764 } 11765 break; 11766 } 11767 case XML_PARSER_MISC: 11768 SKIP_BLANKS; 11769 if (ctxt->input->buf == NULL) 11770 avail = ctxt->input->length - 11771 (ctxt->input->cur - ctxt->input->base); 11772 else 11773 avail = xmlBufUse(ctxt->input->buf->buffer) - 11774 (ctxt->input->cur - ctxt->input->base); 11775 if (avail < 2) 11776 goto done; 11777 cur = ctxt->input->cur[0]; 11778 next = ctxt->input->cur[1]; 11779 if ((cur == '<') && (next == '?')) { 11780 if ((!terminate) && 11781 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11782 ctxt->progressive = XML_PARSER_PI; 11783 goto done; 11784 } 11785 #ifdef DEBUG_PUSH 11786 xmlGenericError(xmlGenericErrorContext, 11787 "PP: Parsing PI\n"); 11788 #endif 11789 xmlParsePI(ctxt); 11790 if (ctxt->instate == XML_PARSER_EOF) 11791 goto done; 11792 ctxt->instate = XML_PARSER_MISC; 11793 ctxt->progressive = 1; 11794 ctxt->checkIndex = 0; 11795 } else if ((cur == '<') && (next == '!') && 11796 (ctxt->input->cur[2] == '-') && 11797 (ctxt->input->cur[3] == '-')) { 11798 if ((!terminate) && 11799 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11800 ctxt->progressive = XML_PARSER_COMMENT; 11801 goto done; 11802 } 11803 #ifdef DEBUG_PUSH 11804 xmlGenericError(xmlGenericErrorContext, 11805 "PP: Parsing Comment\n"); 11806 #endif 11807 xmlParseComment(ctxt); 11808 if (ctxt->instate == XML_PARSER_EOF) 11809 goto done; 11810 ctxt->instate = XML_PARSER_MISC; 11811 ctxt->progressive = 1; 11812 ctxt->checkIndex = 0; 11813 } else if ((cur == '<') && (next == '!') && 11814 (ctxt->input->cur[2] == 'D') && 11815 (ctxt->input->cur[3] == 'O') && 11816 (ctxt->input->cur[4] == 'C') && 11817 (ctxt->input->cur[5] == 'T') && 11818 (ctxt->input->cur[6] == 'Y') && 11819 (ctxt->input->cur[7] == 'P') && 11820 (ctxt->input->cur[8] == 'E')) { 11821 if ((!terminate) && 11822 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11823 ctxt->progressive = XML_PARSER_DTD; 11824 goto done; 11825 } 11826 #ifdef DEBUG_PUSH 11827 xmlGenericError(xmlGenericErrorContext, 11828 "PP: Parsing internal subset\n"); 11829 #endif 11830 ctxt->inSubset = 1; 11831 ctxt->progressive = 0; 11832 ctxt->checkIndex = 0; 11833 xmlParseDocTypeDecl(ctxt); 11834 if (ctxt->instate == XML_PARSER_EOF) 11835 goto done; 11836 if (RAW == '[') { 11837 ctxt->instate = XML_PARSER_DTD; 11838 #ifdef DEBUG_PUSH 11839 xmlGenericError(xmlGenericErrorContext, 11840 "PP: entering DTD\n"); 11841 #endif 11842 } else { 11843 /* 11844 * Create and update the external subset. 11845 */ 11846 ctxt->inSubset = 2; 11847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11848 (ctxt->sax->externalSubset != NULL)) 11849 ctxt->sax->externalSubset(ctxt->userData, 11850 ctxt->intSubName, ctxt->extSubSystem, 11851 ctxt->extSubURI); 11852 ctxt->inSubset = 0; 11853 xmlCleanSpecialAttr(ctxt); 11854 ctxt->instate = XML_PARSER_PROLOG; 11855 #ifdef DEBUG_PUSH 11856 xmlGenericError(xmlGenericErrorContext, 11857 "PP: entering PROLOG\n"); 11858 #endif 11859 } 11860 } else if ((cur == '<') && (next == '!') && 11861 (avail < 9)) { 11862 goto done; 11863 } else { 11864 ctxt->instate = XML_PARSER_START_TAG; 11865 ctxt->progressive = XML_PARSER_START_TAG; 11866 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11867 #ifdef DEBUG_PUSH 11868 xmlGenericError(xmlGenericErrorContext, 11869 "PP: entering START_TAG\n"); 11870 #endif 11871 } 11872 break; 11873 case XML_PARSER_PROLOG: 11874 SKIP_BLANKS; 11875 if (ctxt->input->buf == NULL) 11876 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11877 else 11878 avail = xmlBufUse(ctxt->input->buf->buffer) - 11879 (ctxt->input->cur - ctxt->input->base); 11880 if (avail < 2) 11881 goto done; 11882 cur = ctxt->input->cur[0]; 11883 next = ctxt->input->cur[1]; 11884 if ((cur == '<') && (next == '?')) { 11885 if ((!terminate) && 11886 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11887 ctxt->progressive = XML_PARSER_PI; 11888 goto done; 11889 } 11890 #ifdef DEBUG_PUSH 11891 xmlGenericError(xmlGenericErrorContext, 11892 "PP: Parsing PI\n"); 11893 #endif 11894 xmlParsePI(ctxt); 11895 if (ctxt->instate == XML_PARSER_EOF) 11896 goto done; 11897 ctxt->instate = XML_PARSER_PROLOG; 11898 ctxt->progressive = 1; 11899 } else if ((cur == '<') && (next == '!') && 11900 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11901 if ((!terminate) && 11902 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11903 ctxt->progressive = XML_PARSER_COMMENT; 11904 goto done; 11905 } 11906 #ifdef DEBUG_PUSH 11907 xmlGenericError(xmlGenericErrorContext, 11908 "PP: Parsing Comment\n"); 11909 #endif 11910 xmlParseComment(ctxt); 11911 if (ctxt->instate == XML_PARSER_EOF) 11912 goto done; 11913 ctxt->instate = XML_PARSER_PROLOG; 11914 ctxt->progressive = 1; 11915 } else if ((cur == '<') && (next == '!') && 11916 (avail < 4)) { 11917 goto done; 11918 } else { 11919 ctxt->instate = XML_PARSER_START_TAG; 11920 if (ctxt->progressive == 0) 11921 ctxt->progressive = XML_PARSER_START_TAG; 11922 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11923 #ifdef DEBUG_PUSH 11924 xmlGenericError(xmlGenericErrorContext, 11925 "PP: entering START_TAG\n"); 11926 #endif 11927 } 11928 break; 11929 case XML_PARSER_EPILOG: 11930 SKIP_BLANKS; 11931 if (ctxt->input->buf == NULL) 11932 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11933 else 11934 avail = xmlBufUse(ctxt->input->buf->buffer) - 11935 (ctxt->input->cur - ctxt->input->base); 11936 if (avail < 2) 11937 goto done; 11938 cur = ctxt->input->cur[0]; 11939 next = ctxt->input->cur[1]; 11940 if ((cur == '<') && (next == '?')) { 11941 if ((!terminate) && 11942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11943 ctxt->progressive = XML_PARSER_PI; 11944 goto done; 11945 } 11946 #ifdef DEBUG_PUSH 11947 xmlGenericError(xmlGenericErrorContext, 11948 "PP: Parsing PI\n"); 11949 #endif 11950 xmlParsePI(ctxt); 11951 if (ctxt->instate == XML_PARSER_EOF) 11952 goto done; 11953 ctxt->instate = XML_PARSER_EPILOG; 11954 ctxt->progressive = 1; 11955 } else if ((cur == '<') && (next == '!') && 11956 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11957 if ((!terminate) && 11958 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11959 ctxt->progressive = XML_PARSER_COMMENT; 11960 goto done; 11961 } 11962 #ifdef DEBUG_PUSH 11963 xmlGenericError(xmlGenericErrorContext, 11964 "PP: Parsing Comment\n"); 11965 #endif 11966 xmlParseComment(ctxt); 11967 if (ctxt->instate == XML_PARSER_EOF) 11968 goto done; 11969 ctxt->instate = XML_PARSER_EPILOG; 11970 ctxt->progressive = 1; 11971 } else if ((cur == '<') && (next == '!') && 11972 (avail < 4)) { 11973 goto done; 11974 } else { 11975 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11976 ctxt->instate = XML_PARSER_EOF; 11977 #ifdef DEBUG_PUSH 11978 xmlGenericError(xmlGenericErrorContext, 11979 "PP: entering EOF\n"); 11980 #endif 11981 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11982 ctxt->sax->endDocument(ctxt->userData); 11983 goto done; 11984 } 11985 break; 11986 case XML_PARSER_DTD: { 11987 /* 11988 * Sorry but progressive parsing of the internal subset 11989 * is not expected to be supported. We first check that 11990 * the full content of the internal subset is available and 11991 * the parsing is launched only at that point. 11992 * Internal subset ends up with "']' S? '>'" in an unescaped 11993 * section and not in a ']]>' sequence which are conditional 11994 * sections (whoever argued to keep that crap in XML deserve 11995 * a place in hell !). 11996 */ 11997 int base, i; 11998 xmlChar *buf; 11999 xmlChar quote = 0; 12000 size_t use; 12001 12002 base = ctxt->input->cur - ctxt->input->base; 12003 if (base < 0) return(0); 12004 if (ctxt->checkIndex > base) 12005 base = ctxt->checkIndex; 12006 buf = xmlBufContent(ctxt->input->buf->buffer); 12007 use = xmlBufUse(ctxt->input->buf->buffer); 12008 for (;(unsigned int) base < use; base++) { 12009 if (quote != 0) { 12010 if (buf[base] == quote) 12011 quote = 0; 12012 continue; 12013 } 12014 if ((quote == 0) && (buf[base] == '<')) { 12015 int found = 0; 12016 /* special handling of comments */ 12017 if (((unsigned int) base + 4 < use) && 12018 (buf[base + 1] == '!') && 12019 (buf[base + 2] == '-') && 12020 (buf[base + 3] == '-')) { 12021 for (;(unsigned int) base + 3 < use; base++) { 12022 if ((buf[base] == '-') && 12023 (buf[base + 1] == '-') && 12024 (buf[base + 2] == '>')) { 12025 found = 1; 12026 base += 2; 12027 break; 12028 } 12029 } 12030 if (!found) { 12031 #if 0 12032 fprintf(stderr, "unfinished comment\n"); 12033 #endif 12034 break; /* for */ 12035 } 12036 continue; 12037 } 12038 } 12039 if (buf[base] == '"') { 12040 quote = '"'; 12041 continue; 12042 } 12043 if (buf[base] == '\'') { 12044 quote = '\''; 12045 continue; 12046 } 12047 if (buf[base] == ']') { 12048 #if 0 12049 fprintf(stderr, "%c%c%c%c: ", buf[base], 12050 buf[base + 1], buf[base + 2], buf[base + 3]); 12051 #endif 12052 if ((unsigned int) base +1 >= use) 12053 break; 12054 if (buf[base + 1] == ']') { 12055 /* conditional crap, skip both ']' ! */ 12056 base++; 12057 continue; 12058 } 12059 for (i = 1; (unsigned int) base + i < use; i++) { 12060 if (buf[base + i] == '>') { 12061 #if 0 12062 fprintf(stderr, "found\n"); 12063 #endif 12064 goto found_end_int_subset; 12065 } 12066 if (!IS_BLANK_CH(buf[base + i])) { 12067 #if 0 12068 fprintf(stderr, "not found\n"); 12069 #endif 12070 goto not_end_of_int_subset; 12071 } 12072 } 12073 #if 0 12074 fprintf(stderr, "end of stream\n"); 12075 #endif 12076 break; 12077 12078 } 12079 not_end_of_int_subset: 12080 continue; /* for */ 12081 } 12082 /* 12083 * We didn't found the end of the Internal subset 12084 */ 12085 if (quote == 0) 12086 ctxt->checkIndex = base; 12087 else 12088 ctxt->checkIndex = 0; 12089 #ifdef DEBUG_PUSH 12090 if (next == 0) 12091 xmlGenericError(xmlGenericErrorContext, 12092 "PP: lookup of int subset end filed\n"); 12093 #endif 12094 goto done; 12095 12096 found_end_int_subset: 12097 ctxt->checkIndex = 0; 12098 xmlParseInternalSubset(ctxt); 12099 if (ctxt->instate == XML_PARSER_EOF) 12100 goto done; 12101 ctxt->inSubset = 2; 12102 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12103 (ctxt->sax->externalSubset != NULL)) 12104 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12105 ctxt->extSubSystem, ctxt->extSubURI); 12106 ctxt->inSubset = 0; 12107 xmlCleanSpecialAttr(ctxt); 12108 if (ctxt->instate == XML_PARSER_EOF) 12109 goto done; 12110 ctxt->instate = XML_PARSER_PROLOG; 12111 ctxt->checkIndex = 0; 12112 #ifdef DEBUG_PUSH 12113 xmlGenericError(xmlGenericErrorContext, 12114 "PP: entering PROLOG\n"); 12115 #endif 12116 break; 12117 } 12118 case XML_PARSER_COMMENT: 12119 xmlGenericError(xmlGenericErrorContext, 12120 "PP: internal error, state == COMMENT\n"); 12121 ctxt->instate = XML_PARSER_CONTENT; 12122 #ifdef DEBUG_PUSH 12123 xmlGenericError(xmlGenericErrorContext, 12124 "PP: entering CONTENT\n"); 12125 #endif 12126 break; 12127 case XML_PARSER_IGNORE: 12128 xmlGenericError(xmlGenericErrorContext, 12129 "PP: internal error, state == IGNORE"); 12130 ctxt->instate = XML_PARSER_DTD; 12131 #ifdef DEBUG_PUSH 12132 xmlGenericError(xmlGenericErrorContext, 12133 "PP: entering DTD\n"); 12134 #endif 12135 break; 12136 case XML_PARSER_PI: 12137 xmlGenericError(xmlGenericErrorContext, 12138 "PP: internal error, state == PI\n"); 12139 ctxt->instate = XML_PARSER_CONTENT; 12140 #ifdef DEBUG_PUSH 12141 xmlGenericError(xmlGenericErrorContext, 12142 "PP: entering CONTENT\n"); 12143 #endif 12144 break; 12145 case XML_PARSER_ENTITY_DECL: 12146 xmlGenericError(xmlGenericErrorContext, 12147 "PP: internal error, state == ENTITY_DECL\n"); 12148 ctxt->instate = XML_PARSER_DTD; 12149 #ifdef DEBUG_PUSH 12150 xmlGenericError(xmlGenericErrorContext, 12151 "PP: entering DTD\n"); 12152 #endif 12153 break; 12154 case XML_PARSER_ENTITY_VALUE: 12155 xmlGenericError(xmlGenericErrorContext, 12156 "PP: internal error, state == ENTITY_VALUE\n"); 12157 ctxt->instate = XML_PARSER_CONTENT; 12158 #ifdef DEBUG_PUSH 12159 xmlGenericError(xmlGenericErrorContext, 12160 "PP: entering DTD\n"); 12161 #endif 12162 break; 12163 case XML_PARSER_ATTRIBUTE_VALUE: 12164 xmlGenericError(xmlGenericErrorContext, 12165 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12166 ctxt->instate = XML_PARSER_START_TAG; 12167 #ifdef DEBUG_PUSH 12168 xmlGenericError(xmlGenericErrorContext, 12169 "PP: entering START_TAG\n"); 12170 #endif 12171 break; 12172 case XML_PARSER_SYSTEM_LITERAL: 12173 xmlGenericError(xmlGenericErrorContext, 12174 "PP: internal error, state == SYSTEM_LITERAL\n"); 12175 ctxt->instate = XML_PARSER_START_TAG; 12176 #ifdef DEBUG_PUSH 12177 xmlGenericError(xmlGenericErrorContext, 12178 "PP: entering START_TAG\n"); 12179 #endif 12180 break; 12181 case XML_PARSER_PUBLIC_LITERAL: 12182 xmlGenericError(xmlGenericErrorContext, 12183 "PP: internal error, state == PUBLIC_LITERAL\n"); 12184 ctxt->instate = XML_PARSER_START_TAG; 12185 #ifdef DEBUG_PUSH 12186 xmlGenericError(xmlGenericErrorContext, 12187 "PP: entering START_TAG\n"); 12188 #endif 12189 break; 12190 } 12191 } 12192 done: 12193 #ifdef DEBUG_PUSH 12194 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12195 #endif 12196 return(ret); 12197 encoding_error: 12198 { 12199 char buffer[150]; 12200 12201 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12202 ctxt->input->cur[0], ctxt->input->cur[1], 12203 ctxt->input->cur[2], ctxt->input->cur[3]); 12204 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12205 "Input is not proper UTF-8, indicate encoding !\n%s", 12206 BAD_CAST buffer, NULL); 12207 } 12208 return(0); 12209 } 12210 12211 /** 12212 * xmlParseCheckTransition: 12213 * @ctxt: an XML parser context 12214 * @chunk: a char array 12215 * @size: the size in byte of the chunk 12216 * 12217 * Check depending on the current parser state if the chunk given must be 12218 * processed immediately or one need more data to advance on parsing. 12219 * 12220 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12221 */ 12222 static int 12223 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12224 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12225 return(-1); 12226 if (ctxt->instate == XML_PARSER_START_TAG) { 12227 if (memchr(chunk, '>', size) != NULL) 12228 return(1); 12229 return(0); 12230 } 12231 if (ctxt->progressive == XML_PARSER_COMMENT) { 12232 if (memchr(chunk, '>', size) != NULL) 12233 return(1); 12234 return(0); 12235 } 12236 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12237 if (memchr(chunk, '>', size) != NULL) 12238 return(1); 12239 return(0); 12240 } 12241 if (ctxt->progressive == XML_PARSER_PI) { 12242 if (memchr(chunk, '>', size) != NULL) 12243 return(1); 12244 return(0); 12245 } 12246 if (ctxt->instate == XML_PARSER_END_TAG) { 12247 if (memchr(chunk, '>', size) != NULL) 12248 return(1); 12249 return(0); 12250 } 12251 if ((ctxt->progressive == XML_PARSER_DTD) || 12252 (ctxt->instate == XML_PARSER_DTD)) { 12253 if (memchr(chunk, '>', size) != NULL) 12254 return(1); 12255 return(0); 12256 } 12257 return(1); 12258 } 12259 12260 /** 12261 * xmlParseChunk: 12262 * @ctxt: an XML parser context 12263 * @chunk: an char array 12264 * @size: the size in byte of the chunk 12265 * @terminate: last chunk indicator 12266 * 12267 * Parse a Chunk of memory 12268 * 12269 * Returns zero if no error, the xmlParserErrors otherwise. 12270 */ 12271 int 12272 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12273 int terminate) { 12274 int end_in_lf = 0; 12275 int remain = 0; 12276 size_t old_avail = 0; 12277 size_t avail = 0; 12278 12279 if (ctxt == NULL) 12280 return(XML_ERR_INTERNAL_ERROR); 12281 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12282 return(ctxt->errNo); 12283 if (ctxt->instate == XML_PARSER_EOF) 12284 return(-1); 12285 if (ctxt->instate == XML_PARSER_START) 12286 xmlDetectSAX2(ctxt); 12287 if ((size > 0) && (chunk != NULL) && (!terminate) && 12288 (chunk[size - 1] == '\r')) { 12289 end_in_lf = 1; 12290 size--; 12291 } 12292 12293 xmldecl_done: 12294 12295 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12296 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12297 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12298 size_t cur = ctxt->input->cur - ctxt->input->base; 12299 int res; 12300 12301 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12302 /* 12303 * Specific handling if we autodetected an encoding, we should not 12304 * push more than the first line ... which depend on the encoding 12305 * And only push the rest once the final encoding was detected 12306 */ 12307 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12308 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12309 unsigned int len = 45; 12310 12311 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12312 BAD_CAST "UTF-16")) || 12313 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12314 BAD_CAST "UTF16"))) 12315 len = 90; 12316 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12317 BAD_CAST "UCS-4")) || 12318 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12319 BAD_CAST "UCS4"))) 12320 len = 180; 12321 12322 if (ctxt->input->buf->rawconsumed < len) 12323 len -= ctxt->input->buf->rawconsumed; 12324 12325 /* 12326 * Change size for reading the initial declaration only 12327 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12328 * will blindly copy extra bytes from memory. 12329 */ 12330 if ((unsigned int) size > len) { 12331 remain = size - len; 12332 size = len; 12333 } else { 12334 remain = 0; 12335 } 12336 } 12337 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12338 if (res < 0) { 12339 ctxt->errNo = XML_PARSER_EOF; 12340 ctxt->disableSAX = 1; 12341 return (XML_PARSER_EOF); 12342 } 12343 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12344 #ifdef DEBUG_PUSH 12345 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12346 #endif 12347 12348 } else if (ctxt->instate != XML_PARSER_EOF) { 12349 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12350 xmlParserInputBufferPtr in = ctxt->input->buf; 12351 if ((in->encoder != NULL) && (in->buffer != NULL) && 12352 (in->raw != NULL)) { 12353 int nbchars; 12354 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12355 size_t current = ctxt->input->cur - ctxt->input->base; 12356 12357 nbchars = xmlCharEncInput(in, terminate); 12358 if (nbchars < 0) { 12359 /* TODO 2.6.0 */ 12360 xmlGenericError(xmlGenericErrorContext, 12361 "xmlParseChunk: encoder error\n"); 12362 return(XML_ERR_INVALID_ENCODING); 12363 } 12364 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12365 } 12366 } 12367 } 12368 if (remain != 0) { 12369 xmlParseTryOrFinish(ctxt, 0); 12370 } else { 12371 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12372 avail = xmlBufUse(ctxt->input->buf->buffer); 12373 /* 12374 * Depending on the current state it may not be such 12375 * a good idea to try parsing if there is nothing in the chunk 12376 * which would be worth doing a parser state transition and we 12377 * need to wait for more data 12378 */ 12379 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12380 (old_avail == 0) || (avail == 0) || 12381 (xmlParseCheckTransition(ctxt, 12382 (const char *)&ctxt->input->base[old_avail], 12383 avail - old_avail))) 12384 xmlParseTryOrFinish(ctxt, terminate); 12385 } 12386 if (ctxt->instate == XML_PARSER_EOF) 12387 return(ctxt->errNo); 12388 12389 if ((ctxt->input != NULL) && 12390 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12391 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12392 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12393 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12394 ctxt->instate = XML_PARSER_EOF; 12395 } 12396 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12397 return(ctxt->errNo); 12398 12399 if (remain != 0) { 12400 chunk += size; 12401 size = remain; 12402 remain = 0; 12403 goto xmldecl_done; 12404 } 12405 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12406 (ctxt->input->buf != NULL)) { 12407 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12408 ctxt->input); 12409 size_t current = ctxt->input->cur - ctxt->input->base; 12410 12411 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12412 12413 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12414 base, current); 12415 } 12416 if (terminate) { 12417 /* 12418 * Check for termination 12419 */ 12420 int cur_avail = 0; 12421 12422 if (ctxt->input != NULL) { 12423 if (ctxt->input->buf == NULL) 12424 cur_avail = ctxt->input->length - 12425 (ctxt->input->cur - ctxt->input->base); 12426 else 12427 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12428 (ctxt->input->cur - ctxt->input->base); 12429 } 12430 12431 if ((ctxt->instate != XML_PARSER_EOF) && 12432 (ctxt->instate != XML_PARSER_EPILOG)) { 12433 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12434 } 12435 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12436 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12437 } 12438 if (ctxt->instate != XML_PARSER_EOF) { 12439 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12440 ctxt->sax->endDocument(ctxt->userData); 12441 } 12442 ctxt->instate = XML_PARSER_EOF; 12443 } 12444 if (ctxt->wellFormed == 0) 12445 return((xmlParserErrors) ctxt->errNo); 12446 else 12447 return(0); 12448 } 12449 12450 /************************************************************************ 12451 * * 12452 * I/O front end functions to the parser * 12453 * * 12454 ************************************************************************/ 12455 12456 /** 12457 * xmlCreatePushParserCtxt: 12458 * @sax: a SAX handler 12459 * @user_data: The user data returned on SAX callbacks 12460 * @chunk: a pointer to an array of chars 12461 * @size: number of chars in the array 12462 * @filename: an optional file name or URI 12463 * 12464 * Create a parser context for using the XML parser in push mode. 12465 * If @buffer and @size are non-NULL, the data is used to detect 12466 * the encoding. The remaining characters will be parsed so they 12467 * don't need to be fed in again through xmlParseChunk. 12468 * To allow content encoding detection, @size should be >= 4 12469 * The value of @filename is used for fetching external entities 12470 * and error/warning reports. 12471 * 12472 * Returns the new parser context or NULL 12473 */ 12474 12475 xmlParserCtxtPtr 12476 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12477 const char *chunk, int size, const char *filename) { 12478 xmlParserCtxtPtr ctxt; 12479 xmlParserInputPtr inputStream; 12480 xmlParserInputBufferPtr buf; 12481 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12482 12483 /* 12484 * plug some encoding conversion routines 12485 */ 12486 if ((chunk != NULL) && (size >= 4)) 12487 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12488 12489 buf = xmlAllocParserInputBuffer(enc); 12490 if (buf == NULL) return(NULL); 12491 12492 ctxt = xmlNewParserCtxt(); 12493 if (ctxt == NULL) { 12494 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12495 xmlFreeParserInputBuffer(buf); 12496 return(NULL); 12497 } 12498 ctxt->dictNames = 1; 12499 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12500 if (ctxt->pushTab == NULL) { 12501 xmlErrMemory(ctxt, NULL); 12502 xmlFreeParserInputBuffer(buf); 12503 xmlFreeParserCtxt(ctxt); 12504 return(NULL); 12505 } 12506 if (sax != NULL) { 12507 #ifdef LIBXML_SAX1_ENABLED 12508 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12509 #endif /* LIBXML_SAX1_ENABLED */ 12510 xmlFree(ctxt->sax); 12511 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12512 if (ctxt->sax == NULL) { 12513 xmlErrMemory(ctxt, NULL); 12514 xmlFreeParserInputBuffer(buf); 12515 xmlFreeParserCtxt(ctxt); 12516 return(NULL); 12517 } 12518 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12519 if (sax->initialized == XML_SAX2_MAGIC) 12520 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12521 else 12522 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12523 if (user_data != NULL) 12524 ctxt->userData = user_data; 12525 } 12526 if (filename == NULL) { 12527 ctxt->directory = NULL; 12528 } else { 12529 ctxt->directory = xmlParserGetDirectory(filename); 12530 } 12531 12532 inputStream = xmlNewInputStream(ctxt); 12533 if (inputStream == NULL) { 12534 xmlFreeParserCtxt(ctxt); 12535 xmlFreeParserInputBuffer(buf); 12536 return(NULL); 12537 } 12538 12539 if (filename == NULL) 12540 inputStream->filename = NULL; 12541 else { 12542 inputStream->filename = (char *) 12543 xmlCanonicPath((const xmlChar *) filename); 12544 if (inputStream->filename == NULL) { 12545 xmlFreeParserCtxt(ctxt); 12546 xmlFreeParserInputBuffer(buf); 12547 return(NULL); 12548 } 12549 } 12550 inputStream->buf = buf; 12551 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12552 inputPush(ctxt, inputStream); 12553 12554 /* 12555 * If the caller didn't provide an initial 'chunk' for determining 12556 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12557 * that it can be automatically determined later 12558 */ 12559 if ((size == 0) || (chunk == NULL)) { 12560 ctxt->charset = XML_CHAR_ENCODING_NONE; 12561 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12562 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12563 size_t cur = ctxt->input->cur - ctxt->input->base; 12564 12565 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12566 12567 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12568 #ifdef DEBUG_PUSH 12569 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12570 #endif 12571 } 12572 12573 if (enc != XML_CHAR_ENCODING_NONE) { 12574 xmlSwitchEncoding(ctxt, enc); 12575 } 12576 12577 return(ctxt); 12578 } 12579 #endif /* LIBXML_PUSH_ENABLED */ 12580 12581 /** 12582 * xmlStopParser: 12583 * @ctxt: an XML parser context 12584 * 12585 * Blocks further parser processing 12586 */ 12587 void 12588 xmlStopParser(xmlParserCtxtPtr ctxt) { 12589 if (ctxt == NULL) 12590 return; 12591 ctxt->instate = XML_PARSER_EOF; 12592 ctxt->errNo = XML_ERR_USER_STOP; 12593 ctxt->disableSAX = 1; 12594 if (ctxt->input != NULL) { 12595 ctxt->input->cur = BAD_CAST""; 12596 ctxt->input->base = ctxt->input->cur; 12597 } 12598 } 12599 12600 /** 12601 * xmlCreateIOParserCtxt: 12602 * @sax: a SAX handler 12603 * @user_data: The user data returned on SAX callbacks 12604 * @ioread: an I/O read function 12605 * @ioclose: an I/O close function 12606 * @ioctx: an I/O handler 12607 * @enc: the charset encoding if known 12608 * 12609 * Create a parser context for using the XML parser with an existing 12610 * I/O stream 12611 * 12612 * Returns the new parser context or NULL 12613 */ 12614 xmlParserCtxtPtr 12615 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12616 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12617 void *ioctx, xmlCharEncoding enc) { 12618 xmlParserCtxtPtr ctxt; 12619 xmlParserInputPtr inputStream; 12620 xmlParserInputBufferPtr buf; 12621 12622 if (ioread == NULL) return(NULL); 12623 12624 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12625 if (buf == NULL) { 12626 if (ioclose != NULL) 12627 ioclose(ioctx); 12628 return (NULL); 12629 } 12630 12631 ctxt = xmlNewParserCtxt(); 12632 if (ctxt == NULL) { 12633 xmlFreeParserInputBuffer(buf); 12634 return(NULL); 12635 } 12636 if (sax != NULL) { 12637 #ifdef LIBXML_SAX1_ENABLED 12638 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12639 #endif /* LIBXML_SAX1_ENABLED */ 12640 xmlFree(ctxt->sax); 12641 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12642 if (ctxt->sax == NULL) { 12643 xmlErrMemory(ctxt, NULL); 12644 xmlFreeParserCtxt(ctxt); 12645 return(NULL); 12646 } 12647 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12648 if (sax->initialized == XML_SAX2_MAGIC) 12649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12650 else 12651 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12652 if (user_data != NULL) 12653 ctxt->userData = user_data; 12654 } 12655 12656 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12657 if (inputStream == NULL) { 12658 xmlFreeParserCtxt(ctxt); 12659 return(NULL); 12660 } 12661 inputPush(ctxt, inputStream); 12662 12663 return(ctxt); 12664 } 12665 12666 #ifdef LIBXML_VALID_ENABLED 12667 /************************************************************************ 12668 * * 12669 * Front ends when parsing a DTD * 12670 * * 12671 ************************************************************************/ 12672 12673 /** 12674 * xmlIOParseDTD: 12675 * @sax: the SAX handler block or NULL 12676 * @input: an Input Buffer 12677 * @enc: the charset encoding if known 12678 * 12679 * Load and parse a DTD 12680 * 12681 * Returns the resulting xmlDtdPtr or NULL in case of error. 12682 * @input will be freed by the function in any case. 12683 */ 12684 12685 xmlDtdPtr 12686 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12687 xmlCharEncoding enc) { 12688 xmlDtdPtr ret = NULL; 12689 xmlParserCtxtPtr ctxt; 12690 xmlParserInputPtr pinput = NULL; 12691 xmlChar start[4]; 12692 12693 if (input == NULL) 12694 return(NULL); 12695 12696 ctxt = xmlNewParserCtxt(); 12697 if (ctxt == NULL) { 12698 xmlFreeParserInputBuffer(input); 12699 return(NULL); 12700 } 12701 12702 /* We are loading a DTD */ 12703 ctxt->options |= XML_PARSE_DTDLOAD; 12704 12705 /* 12706 * Set-up the SAX context 12707 */ 12708 if (sax != NULL) { 12709 if (ctxt->sax != NULL) 12710 xmlFree(ctxt->sax); 12711 ctxt->sax = sax; 12712 ctxt->userData = ctxt; 12713 } 12714 xmlDetectSAX2(ctxt); 12715 12716 /* 12717 * generate a parser input from the I/O handler 12718 */ 12719 12720 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12721 if (pinput == NULL) { 12722 if (sax != NULL) ctxt->sax = NULL; 12723 xmlFreeParserInputBuffer(input); 12724 xmlFreeParserCtxt(ctxt); 12725 return(NULL); 12726 } 12727 12728 /* 12729 * plug some encoding conversion routines here. 12730 */ 12731 if (xmlPushInput(ctxt, pinput) < 0) { 12732 if (sax != NULL) ctxt->sax = NULL; 12733 xmlFreeParserCtxt(ctxt); 12734 return(NULL); 12735 } 12736 if (enc != XML_CHAR_ENCODING_NONE) { 12737 xmlSwitchEncoding(ctxt, enc); 12738 } 12739 12740 pinput->filename = NULL; 12741 pinput->line = 1; 12742 pinput->col = 1; 12743 pinput->base = ctxt->input->cur; 12744 pinput->cur = ctxt->input->cur; 12745 pinput->free = NULL; 12746 12747 /* 12748 * let's parse that entity knowing it's an external subset. 12749 */ 12750 ctxt->inSubset = 2; 12751 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12752 if (ctxt->myDoc == NULL) { 12753 xmlErrMemory(ctxt, "New Doc failed"); 12754 return(NULL); 12755 } 12756 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12757 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12758 BAD_CAST "none", BAD_CAST "none"); 12759 12760 if ((enc == XML_CHAR_ENCODING_NONE) && 12761 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12762 /* 12763 * Get the 4 first bytes and decode the charset 12764 * if enc != XML_CHAR_ENCODING_NONE 12765 * plug some encoding conversion routines. 12766 */ 12767 start[0] = RAW; 12768 start[1] = NXT(1); 12769 start[2] = NXT(2); 12770 start[3] = NXT(3); 12771 enc = xmlDetectCharEncoding(start, 4); 12772 if (enc != XML_CHAR_ENCODING_NONE) { 12773 xmlSwitchEncoding(ctxt, enc); 12774 } 12775 } 12776 12777 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12778 12779 if (ctxt->myDoc != NULL) { 12780 if (ctxt->wellFormed) { 12781 ret = ctxt->myDoc->extSubset; 12782 ctxt->myDoc->extSubset = NULL; 12783 if (ret != NULL) { 12784 xmlNodePtr tmp; 12785 12786 ret->doc = NULL; 12787 tmp = ret->children; 12788 while (tmp != NULL) { 12789 tmp->doc = NULL; 12790 tmp = tmp->next; 12791 } 12792 } 12793 } else { 12794 ret = NULL; 12795 } 12796 xmlFreeDoc(ctxt->myDoc); 12797 ctxt->myDoc = NULL; 12798 } 12799 if (sax != NULL) ctxt->sax = NULL; 12800 xmlFreeParserCtxt(ctxt); 12801 12802 return(ret); 12803 } 12804 12805 /** 12806 * xmlSAXParseDTD: 12807 * @sax: the SAX handler block 12808 * @ExternalID: a NAME* containing the External ID of the DTD 12809 * @SystemID: a NAME* containing the URL to the DTD 12810 * 12811 * Load and parse an external subset. 12812 * 12813 * Returns the resulting xmlDtdPtr or NULL in case of error. 12814 */ 12815 12816 xmlDtdPtr 12817 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12818 const xmlChar *SystemID) { 12819 xmlDtdPtr ret = NULL; 12820 xmlParserCtxtPtr ctxt; 12821 xmlParserInputPtr input = NULL; 12822 xmlCharEncoding enc; 12823 xmlChar* systemIdCanonic; 12824 12825 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12826 12827 ctxt = xmlNewParserCtxt(); 12828 if (ctxt == NULL) { 12829 return(NULL); 12830 } 12831 12832 /* We are loading a DTD */ 12833 ctxt->options |= XML_PARSE_DTDLOAD; 12834 12835 /* 12836 * Set-up the SAX context 12837 */ 12838 if (sax != NULL) { 12839 if (ctxt->sax != NULL) 12840 xmlFree(ctxt->sax); 12841 ctxt->sax = sax; 12842 ctxt->userData = ctxt; 12843 } 12844 12845 /* 12846 * Canonicalise the system ID 12847 */ 12848 systemIdCanonic = xmlCanonicPath(SystemID); 12849 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12850 xmlFreeParserCtxt(ctxt); 12851 return(NULL); 12852 } 12853 12854 /* 12855 * Ask the Entity resolver to load the damn thing 12856 */ 12857 12858 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12859 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12860 systemIdCanonic); 12861 if (input == NULL) { 12862 if (sax != NULL) ctxt->sax = NULL; 12863 xmlFreeParserCtxt(ctxt); 12864 if (systemIdCanonic != NULL) 12865 xmlFree(systemIdCanonic); 12866 return(NULL); 12867 } 12868 12869 /* 12870 * plug some encoding conversion routines here. 12871 */ 12872 if (xmlPushInput(ctxt, input) < 0) { 12873 if (sax != NULL) ctxt->sax = NULL; 12874 xmlFreeParserCtxt(ctxt); 12875 if (systemIdCanonic != NULL) 12876 xmlFree(systemIdCanonic); 12877 return(NULL); 12878 } 12879 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12880 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12881 xmlSwitchEncoding(ctxt, enc); 12882 } 12883 12884 if (input->filename == NULL) 12885 input->filename = (char *) systemIdCanonic; 12886 else 12887 xmlFree(systemIdCanonic); 12888 input->line = 1; 12889 input->col = 1; 12890 input->base = ctxt->input->cur; 12891 input->cur = ctxt->input->cur; 12892 input->free = NULL; 12893 12894 /* 12895 * let's parse that entity knowing it's an external subset. 12896 */ 12897 ctxt->inSubset = 2; 12898 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12899 if (ctxt->myDoc == NULL) { 12900 xmlErrMemory(ctxt, "New Doc failed"); 12901 if (sax != NULL) ctxt->sax = NULL; 12902 xmlFreeParserCtxt(ctxt); 12903 return(NULL); 12904 } 12905 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12907 ExternalID, SystemID); 12908 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12909 12910 if (ctxt->myDoc != NULL) { 12911 if (ctxt->wellFormed) { 12912 ret = ctxt->myDoc->extSubset; 12913 ctxt->myDoc->extSubset = NULL; 12914 if (ret != NULL) { 12915 xmlNodePtr tmp; 12916 12917 ret->doc = NULL; 12918 tmp = ret->children; 12919 while (tmp != NULL) { 12920 tmp->doc = NULL; 12921 tmp = tmp->next; 12922 } 12923 } 12924 } else { 12925 ret = NULL; 12926 } 12927 xmlFreeDoc(ctxt->myDoc); 12928 ctxt->myDoc = NULL; 12929 } 12930 if (sax != NULL) ctxt->sax = NULL; 12931 xmlFreeParserCtxt(ctxt); 12932 12933 return(ret); 12934 } 12935 12936 12937 /** 12938 * xmlParseDTD: 12939 * @ExternalID: a NAME* containing the External ID of the DTD 12940 * @SystemID: a NAME* containing the URL to the DTD 12941 * 12942 * Load and parse an external subset. 12943 * 12944 * Returns the resulting xmlDtdPtr or NULL in case of error. 12945 */ 12946 12947 xmlDtdPtr 12948 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12949 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12950 } 12951 #endif /* LIBXML_VALID_ENABLED */ 12952 12953 /************************************************************************ 12954 * * 12955 * Front ends when parsing an Entity * 12956 * * 12957 ************************************************************************/ 12958 12959 /** 12960 * xmlParseCtxtExternalEntity: 12961 * @ctx: the existing parsing context 12962 * @URL: the URL for the entity to load 12963 * @ID: the System ID for the entity to load 12964 * @lst: the return value for the set of parsed nodes 12965 * 12966 * Parse an external general entity within an existing parsing context 12967 * An external general parsed entity is well-formed if it matches the 12968 * production labeled extParsedEnt. 12969 * 12970 * [78] extParsedEnt ::= TextDecl? content 12971 * 12972 * Returns 0 if the entity is well formed, -1 in case of args problem and 12973 * the parser error code otherwise 12974 */ 12975 12976 int 12977 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12978 const xmlChar *ID, xmlNodePtr *lst) { 12979 xmlParserCtxtPtr ctxt; 12980 xmlDocPtr newDoc; 12981 xmlNodePtr newRoot; 12982 xmlSAXHandlerPtr oldsax = NULL; 12983 int ret = 0; 12984 xmlChar start[4]; 12985 xmlCharEncoding enc; 12986 12987 if (ctx == NULL) return(-1); 12988 12989 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12990 (ctx->depth > 1024)) { 12991 return(XML_ERR_ENTITY_LOOP); 12992 } 12993 12994 if (lst != NULL) 12995 *lst = NULL; 12996 if ((URL == NULL) && (ID == NULL)) 12997 return(-1); 12998 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12999 return(-1); 13000 13001 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 13002 if (ctxt == NULL) { 13003 return(-1); 13004 } 13005 13006 oldsax = ctxt->sax; 13007 ctxt->sax = ctx->sax; 13008 xmlDetectSAX2(ctxt); 13009 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13010 if (newDoc == NULL) { 13011 xmlFreeParserCtxt(ctxt); 13012 return(-1); 13013 } 13014 newDoc->properties = XML_DOC_INTERNAL; 13015 if (ctx->myDoc->dict) { 13016 newDoc->dict = ctx->myDoc->dict; 13017 xmlDictReference(newDoc->dict); 13018 } 13019 if (ctx->myDoc != NULL) { 13020 newDoc->intSubset = ctx->myDoc->intSubset; 13021 newDoc->extSubset = ctx->myDoc->extSubset; 13022 } 13023 if (ctx->myDoc->URL != NULL) { 13024 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 13025 } 13026 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13027 if (newRoot == NULL) { 13028 ctxt->sax = oldsax; 13029 xmlFreeParserCtxt(ctxt); 13030 newDoc->intSubset = NULL; 13031 newDoc->extSubset = NULL; 13032 xmlFreeDoc(newDoc); 13033 return(-1); 13034 } 13035 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13036 nodePush(ctxt, newDoc->children); 13037 if (ctx->myDoc == NULL) { 13038 ctxt->myDoc = newDoc; 13039 } else { 13040 ctxt->myDoc = ctx->myDoc; 13041 newDoc->children->doc = ctx->myDoc; 13042 } 13043 13044 /* 13045 * Get the 4 first bytes and decode the charset 13046 * if enc != XML_CHAR_ENCODING_NONE 13047 * plug some encoding conversion routines. 13048 */ 13049 GROW 13050 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13051 start[0] = RAW; 13052 start[1] = NXT(1); 13053 start[2] = NXT(2); 13054 start[3] = NXT(3); 13055 enc = xmlDetectCharEncoding(start, 4); 13056 if (enc != XML_CHAR_ENCODING_NONE) { 13057 xmlSwitchEncoding(ctxt, enc); 13058 } 13059 } 13060 13061 /* 13062 * Parse a possible text declaration first 13063 */ 13064 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13065 xmlParseTextDecl(ctxt); 13066 /* 13067 * An XML-1.0 document can't reference an entity not XML-1.0 13068 */ 13069 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 13070 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13071 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13072 "Version mismatch between document and entity\n"); 13073 } 13074 } 13075 13076 /* 13077 * If the user provided its own SAX callbacks then reuse the 13078 * useData callback field, otherwise the expected setup in a 13079 * DOM builder is to have userData == ctxt 13080 */ 13081 if (ctx->userData == ctx) 13082 ctxt->userData = ctxt; 13083 else 13084 ctxt->userData = ctx->userData; 13085 13086 /* 13087 * Doing validity checking on chunk doesn't make sense 13088 */ 13089 ctxt->instate = XML_PARSER_CONTENT; 13090 ctxt->validate = ctx->validate; 13091 ctxt->valid = ctx->valid; 13092 ctxt->loadsubset = ctx->loadsubset; 13093 ctxt->depth = ctx->depth + 1; 13094 ctxt->replaceEntities = ctx->replaceEntities; 13095 if (ctxt->validate) { 13096 ctxt->vctxt.error = ctx->vctxt.error; 13097 ctxt->vctxt.warning = ctx->vctxt.warning; 13098 } else { 13099 ctxt->vctxt.error = NULL; 13100 ctxt->vctxt.warning = NULL; 13101 } 13102 ctxt->vctxt.nodeTab = NULL; 13103 ctxt->vctxt.nodeNr = 0; 13104 ctxt->vctxt.nodeMax = 0; 13105 ctxt->vctxt.node = NULL; 13106 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13107 ctxt->dict = ctx->dict; 13108 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13109 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13110 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13111 ctxt->dictNames = ctx->dictNames; 13112 ctxt->attsDefault = ctx->attsDefault; 13113 ctxt->attsSpecial = ctx->attsSpecial; 13114 ctxt->linenumbers = ctx->linenumbers; 13115 13116 xmlParseContent(ctxt); 13117 13118 ctx->validate = ctxt->validate; 13119 ctx->valid = ctxt->valid; 13120 if ((RAW == '<') && (NXT(1) == '/')) { 13121 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13122 } else if (RAW != 0) { 13123 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13124 } 13125 if (ctxt->node != newDoc->children) { 13126 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13127 } 13128 13129 if (!ctxt->wellFormed) { 13130 if (ctxt->errNo == 0) 13131 ret = 1; 13132 else 13133 ret = ctxt->errNo; 13134 } else { 13135 if (lst != NULL) { 13136 xmlNodePtr cur; 13137 13138 /* 13139 * Return the newly created nodeset after unlinking it from 13140 * they pseudo parent. 13141 */ 13142 cur = newDoc->children->children; 13143 *lst = cur; 13144 while (cur != NULL) { 13145 cur->parent = NULL; 13146 cur = cur->next; 13147 } 13148 newDoc->children->children = NULL; 13149 } 13150 ret = 0; 13151 } 13152 ctxt->sax = oldsax; 13153 ctxt->dict = NULL; 13154 ctxt->attsDefault = NULL; 13155 ctxt->attsSpecial = NULL; 13156 xmlFreeParserCtxt(ctxt); 13157 newDoc->intSubset = NULL; 13158 newDoc->extSubset = NULL; 13159 xmlFreeDoc(newDoc); 13160 13161 return(ret); 13162 } 13163 13164 /** 13165 * xmlParseExternalEntityPrivate: 13166 * @doc: the document the chunk pertains to 13167 * @oldctxt: the previous parser context if available 13168 * @sax: the SAX handler bloc (possibly NULL) 13169 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13170 * @depth: Used for loop detection, use 0 13171 * @URL: the URL for the entity to load 13172 * @ID: the System ID for the entity to load 13173 * @list: the return value for the set of parsed nodes 13174 * 13175 * Private version of xmlParseExternalEntity() 13176 * 13177 * Returns 0 if the entity is well formed, -1 in case of args problem and 13178 * the parser error code otherwise 13179 */ 13180 13181 static xmlParserErrors 13182 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13183 xmlSAXHandlerPtr sax, 13184 void *user_data, int depth, const xmlChar *URL, 13185 const xmlChar *ID, xmlNodePtr *list) { 13186 xmlParserCtxtPtr ctxt; 13187 xmlDocPtr newDoc; 13188 xmlNodePtr newRoot; 13189 xmlSAXHandlerPtr oldsax = NULL; 13190 xmlParserErrors ret = XML_ERR_OK; 13191 xmlChar start[4]; 13192 xmlCharEncoding enc; 13193 13194 if (((depth > 40) && 13195 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13196 (depth > 1024)) { 13197 return(XML_ERR_ENTITY_LOOP); 13198 } 13199 13200 if (list != NULL) 13201 *list = NULL; 13202 if ((URL == NULL) && (ID == NULL)) 13203 return(XML_ERR_INTERNAL_ERROR); 13204 if (doc == NULL) 13205 return(XML_ERR_INTERNAL_ERROR); 13206 13207 13208 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13209 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13210 ctxt->userData = ctxt; 13211 if (oldctxt != NULL) { 13212 ctxt->_private = oldctxt->_private; 13213 ctxt->loadsubset = oldctxt->loadsubset; 13214 ctxt->validate = oldctxt->validate; 13215 ctxt->external = oldctxt->external; 13216 ctxt->record_info = oldctxt->record_info; 13217 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13218 ctxt->node_seq.length = oldctxt->node_seq.length; 13219 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13220 } else { 13221 /* 13222 * Doing validity checking on chunk without context 13223 * doesn't make sense 13224 */ 13225 ctxt->_private = NULL; 13226 ctxt->validate = 0; 13227 ctxt->external = 2; 13228 ctxt->loadsubset = 0; 13229 } 13230 if (sax != NULL) { 13231 oldsax = ctxt->sax; 13232 ctxt->sax = sax; 13233 if (user_data != NULL) 13234 ctxt->userData = user_data; 13235 } 13236 xmlDetectSAX2(ctxt); 13237 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13238 if (newDoc == NULL) { 13239 ctxt->node_seq.maximum = 0; 13240 ctxt->node_seq.length = 0; 13241 ctxt->node_seq.buffer = NULL; 13242 xmlFreeParserCtxt(ctxt); 13243 return(XML_ERR_INTERNAL_ERROR); 13244 } 13245 newDoc->properties = XML_DOC_INTERNAL; 13246 newDoc->intSubset = doc->intSubset; 13247 newDoc->extSubset = doc->extSubset; 13248 newDoc->dict = doc->dict; 13249 xmlDictReference(newDoc->dict); 13250 13251 if (doc->URL != NULL) { 13252 newDoc->URL = xmlStrdup(doc->URL); 13253 } 13254 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13255 if (newRoot == NULL) { 13256 if (sax != NULL) 13257 ctxt->sax = oldsax; 13258 ctxt->node_seq.maximum = 0; 13259 ctxt->node_seq.length = 0; 13260 ctxt->node_seq.buffer = NULL; 13261 xmlFreeParserCtxt(ctxt); 13262 newDoc->intSubset = NULL; 13263 newDoc->extSubset = NULL; 13264 xmlFreeDoc(newDoc); 13265 return(XML_ERR_INTERNAL_ERROR); 13266 } 13267 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13268 nodePush(ctxt, newDoc->children); 13269 ctxt->myDoc = doc; 13270 newRoot->doc = doc; 13271 13272 /* 13273 * Get the 4 first bytes and decode the charset 13274 * if enc != XML_CHAR_ENCODING_NONE 13275 * plug some encoding conversion routines. 13276 */ 13277 GROW; 13278 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13279 start[0] = RAW; 13280 start[1] = NXT(1); 13281 start[2] = NXT(2); 13282 start[3] = NXT(3); 13283 enc = xmlDetectCharEncoding(start, 4); 13284 if (enc != XML_CHAR_ENCODING_NONE) { 13285 xmlSwitchEncoding(ctxt, enc); 13286 } 13287 } 13288 13289 /* 13290 * Parse a possible text declaration first 13291 */ 13292 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13293 xmlParseTextDecl(ctxt); 13294 } 13295 13296 ctxt->instate = XML_PARSER_CONTENT; 13297 ctxt->depth = depth; 13298 13299 xmlParseContent(ctxt); 13300 13301 if ((RAW == '<') && (NXT(1) == '/')) { 13302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13303 } else if (RAW != 0) { 13304 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13305 } 13306 if (ctxt->node != newDoc->children) { 13307 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13308 } 13309 13310 if (!ctxt->wellFormed) { 13311 if (ctxt->errNo == 0) 13312 ret = XML_ERR_INTERNAL_ERROR; 13313 else 13314 ret = (xmlParserErrors)ctxt->errNo; 13315 } else { 13316 if (list != NULL) { 13317 xmlNodePtr cur; 13318 13319 /* 13320 * Return the newly created nodeset after unlinking it from 13321 * they pseudo parent. 13322 */ 13323 cur = newDoc->children->children; 13324 *list = cur; 13325 while (cur != NULL) { 13326 cur->parent = NULL; 13327 cur = cur->next; 13328 } 13329 newDoc->children->children = NULL; 13330 } 13331 ret = XML_ERR_OK; 13332 } 13333 13334 /* 13335 * Record in the parent context the number of entities replacement 13336 * done when parsing that reference. 13337 */ 13338 if (oldctxt != NULL) 13339 oldctxt->nbentities += ctxt->nbentities; 13340 13341 /* 13342 * Also record the size of the entity parsed 13343 */ 13344 if (ctxt->input != NULL) { 13345 oldctxt->sizeentities += ctxt->input->consumed; 13346 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13347 } 13348 /* 13349 * And record the last error if any 13350 */ 13351 if (ctxt->lastError.code != XML_ERR_OK) 13352 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13353 13354 if (sax != NULL) 13355 ctxt->sax = oldsax; 13356 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13357 oldctxt->node_seq.length = ctxt->node_seq.length; 13358 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13359 ctxt->node_seq.maximum = 0; 13360 ctxt->node_seq.length = 0; 13361 ctxt->node_seq.buffer = NULL; 13362 xmlFreeParserCtxt(ctxt); 13363 newDoc->intSubset = NULL; 13364 newDoc->extSubset = NULL; 13365 xmlFreeDoc(newDoc); 13366 13367 return(ret); 13368 } 13369 13370 #ifdef LIBXML_SAX1_ENABLED 13371 /** 13372 * xmlParseExternalEntity: 13373 * @doc: the document the chunk pertains to 13374 * @sax: the SAX handler bloc (possibly NULL) 13375 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13376 * @depth: Used for loop detection, use 0 13377 * @URL: the URL for the entity to load 13378 * @ID: the System ID for the entity to load 13379 * @lst: the return value for the set of parsed nodes 13380 * 13381 * Parse an external general entity 13382 * An external general parsed entity is well-formed if it matches the 13383 * production labeled extParsedEnt. 13384 * 13385 * [78] extParsedEnt ::= TextDecl? content 13386 * 13387 * Returns 0 if the entity is well formed, -1 in case of args problem and 13388 * the parser error code otherwise 13389 */ 13390 13391 int 13392 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13393 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13394 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13395 ID, lst)); 13396 } 13397 13398 /** 13399 * xmlParseBalancedChunkMemory: 13400 * @doc: the document the chunk pertains to 13401 * @sax: the SAX handler bloc (possibly NULL) 13402 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13403 * @depth: Used for loop detection, use 0 13404 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13405 * @lst: the return value for the set of parsed nodes 13406 * 13407 * Parse a well-balanced chunk of an XML document 13408 * called by the parser 13409 * The allowed sequence for the Well Balanced Chunk is the one defined by 13410 * the content production in the XML grammar: 13411 * 13412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13413 * 13414 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13415 * the parser error code otherwise 13416 */ 13417 13418 int 13419 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13420 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13421 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13422 depth, string, lst, 0 ); 13423 } 13424 #endif /* LIBXML_SAX1_ENABLED */ 13425 13426 /** 13427 * xmlParseBalancedChunkMemoryInternal: 13428 * @oldctxt: the existing parsing context 13429 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13430 * @user_data: the user data field for the parser context 13431 * @lst: the return value for the set of parsed nodes 13432 * 13433 * 13434 * Parse a well-balanced chunk of an XML document 13435 * called by the parser 13436 * The allowed sequence for the Well Balanced Chunk is the one defined by 13437 * the content production in the XML grammar: 13438 * 13439 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13440 * 13441 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13442 * error code otherwise 13443 * 13444 * In case recover is set to 1, the nodelist will not be empty even if 13445 * the parsed chunk is not well balanced. 13446 */ 13447 static xmlParserErrors 13448 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13449 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13450 xmlParserCtxtPtr ctxt; 13451 xmlDocPtr newDoc = NULL; 13452 xmlNodePtr newRoot; 13453 xmlSAXHandlerPtr oldsax = NULL; 13454 xmlNodePtr content = NULL; 13455 xmlNodePtr last = NULL; 13456 int size; 13457 xmlParserErrors ret = XML_ERR_OK; 13458 #ifdef SAX2 13459 int i; 13460 #endif 13461 13462 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13463 (oldctxt->depth > 1024)) { 13464 return(XML_ERR_ENTITY_LOOP); 13465 } 13466 13467 13468 if (lst != NULL) 13469 *lst = NULL; 13470 if (string == NULL) 13471 return(XML_ERR_INTERNAL_ERROR); 13472 13473 size = xmlStrlen(string); 13474 13475 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13476 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13477 if (user_data != NULL) 13478 ctxt->userData = user_data; 13479 else 13480 ctxt->userData = ctxt; 13481 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13482 ctxt->dict = oldctxt->dict; 13483 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13484 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13485 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13486 13487 #ifdef SAX2 13488 /* propagate namespaces down the entity */ 13489 for (i = 0;i < oldctxt->nsNr;i += 2) { 13490 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13491 } 13492 #endif 13493 13494 oldsax = ctxt->sax; 13495 ctxt->sax = oldctxt->sax; 13496 xmlDetectSAX2(ctxt); 13497 ctxt->replaceEntities = oldctxt->replaceEntities; 13498 ctxt->options = oldctxt->options; 13499 13500 ctxt->_private = oldctxt->_private; 13501 if (oldctxt->myDoc == NULL) { 13502 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13503 if (newDoc == NULL) { 13504 ctxt->sax = oldsax; 13505 ctxt->dict = NULL; 13506 xmlFreeParserCtxt(ctxt); 13507 return(XML_ERR_INTERNAL_ERROR); 13508 } 13509 newDoc->properties = XML_DOC_INTERNAL; 13510 newDoc->dict = ctxt->dict; 13511 xmlDictReference(newDoc->dict); 13512 ctxt->myDoc = newDoc; 13513 } else { 13514 ctxt->myDoc = oldctxt->myDoc; 13515 content = ctxt->myDoc->children; 13516 last = ctxt->myDoc->last; 13517 } 13518 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13519 if (newRoot == NULL) { 13520 ctxt->sax = oldsax; 13521 ctxt->dict = NULL; 13522 xmlFreeParserCtxt(ctxt); 13523 if (newDoc != NULL) { 13524 xmlFreeDoc(newDoc); 13525 } 13526 return(XML_ERR_INTERNAL_ERROR); 13527 } 13528 ctxt->myDoc->children = NULL; 13529 ctxt->myDoc->last = NULL; 13530 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13531 nodePush(ctxt, ctxt->myDoc->children); 13532 ctxt->instate = XML_PARSER_CONTENT; 13533 ctxt->depth = oldctxt->depth + 1; 13534 13535 ctxt->validate = 0; 13536 ctxt->loadsubset = oldctxt->loadsubset; 13537 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13538 /* 13539 * ID/IDREF registration will be done in xmlValidateElement below 13540 */ 13541 ctxt->loadsubset |= XML_SKIP_IDS; 13542 } 13543 ctxt->dictNames = oldctxt->dictNames; 13544 ctxt->attsDefault = oldctxt->attsDefault; 13545 ctxt->attsSpecial = oldctxt->attsSpecial; 13546 13547 xmlParseContent(ctxt); 13548 if ((RAW == '<') && (NXT(1) == '/')) { 13549 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13550 } else if (RAW != 0) { 13551 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13552 } 13553 if (ctxt->node != ctxt->myDoc->children) { 13554 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13555 } 13556 13557 if (!ctxt->wellFormed) { 13558 if (ctxt->errNo == 0) 13559 ret = XML_ERR_INTERNAL_ERROR; 13560 else 13561 ret = (xmlParserErrors)ctxt->errNo; 13562 } else { 13563 ret = XML_ERR_OK; 13564 } 13565 13566 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13567 xmlNodePtr cur; 13568 13569 /* 13570 * Return the newly created nodeset after unlinking it from 13571 * they pseudo parent. 13572 */ 13573 cur = ctxt->myDoc->children->children; 13574 *lst = cur; 13575 while (cur != NULL) { 13576 #ifdef LIBXML_VALID_ENABLED 13577 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13578 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13579 (cur->type == XML_ELEMENT_NODE)) { 13580 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13581 oldctxt->myDoc, cur); 13582 } 13583 #endif /* LIBXML_VALID_ENABLED */ 13584 cur->parent = NULL; 13585 cur = cur->next; 13586 } 13587 ctxt->myDoc->children->children = NULL; 13588 } 13589 if (ctxt->myDoc != NULL) { 13590 xmlFreeNode(ctxt->myDoc->children); 13591 ctxt->myDoc->children = content; 13592 ctxt->myDoc->last = last; 13593 } 13594 13595 /* 13596 * Record in the parent context the number of entities replacement 13597 * done when parsing that reference. 13598 */ 13599 if (oldctxt != NULL) 13600 oldctxt->nbentities += ctxt->nbentities; 13601 13602 /* 13603 * Also record the last error if any 13604 */ 13605 if (ctxt->lastError.code != XML_ERR_OK) 13606 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13607 13608 ctxt->sax = oldsax; 13609 ctxt->dict = NULL; 13610 ctxt->attsDefault = NULL; 13611 ctxt->attsSpecial = NULL; 13612 xmlFreeParserCtxt(ctxt); 13613 if (newDoc != NULL) { 13614 xmlFreeDoc(newDoc); 13615 } 13616 13617 return(ret); 13618 } 13619 13620 /** 13621 * xmlParseInNodeContext: 13622 * @node: the context node 13623 * @data: the input string 13624 * @datalen: the input string length in bytes 13625 * @options: a combination of xmlParserOption 13626 * @lst: the return value for the set of parsed nodes 13627 * 13628 * Parse a well-balanced chunk of an XML document 13629 * within the context (DTD, namespaces, etc ...) of the given node. 13630 * 13631 * The allowed sequence for the data is a Well Balanced Chunk defined by 13632 * the content production in the XML grammar: 13633 * 13634 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13635 * 13636 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13637 * error code otherwise 13638 */ 13639 xmlParserErrors 13640 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13641 int options, xmlNodePtr *lst) { 13642 #ifdef SAX2 13643 xmlParserCtxtPtr ctxt; 13644 xmlDocPtr doc = NULL; 13645 xmlNodePtr fake, cur; 13646 int nsnr = 0; 13647 13648 xmlParserErrors ret = XML_ERR_OK; 13649 13650 /* 13651 * check all input parameters, grab the document 13652 */ 13653 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13654 return(XML_ERR_INTERNAL_ERROR); 13655 switch (node->type) { 13656 case XML_ELEMENT_NODE: 13657 case XML_ATTRIBUTE_NODE: 13658 case XML_TEXT_NODE: 13659 case XML_CDATA_SECTION_NODE: 13660 case XML_ENTITY_REF_NODE: 13661 case XML_PI_NODE: 13662 case XML_COMMENT_NODE: 13663 case XML_DOCUMENT_NODE: 13664 case XML_HTML_DOCUMENT_NODE: 13665 break; 13666 default: 13667 return(XML_ERR_INTERNAL_ERROR); 13668 13669 } 13670 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13671 (node->type != XML_DOCUMENT_NODE) && 13672 (node->type != XML_HTML_DOCUMENT_NODE)) 13673 node = node->parent; 13674 if (node == NULL) 13675 return(XML_ERR_INTERNAL_ERROR); 13676 if (node->type == XML_ELEMENT_NODE) 13677 doc = node->doc; 13678 else 13679 doc = (xmlDocPtr) node; 13680 if (doc == NULL) 13681 return(XML_ERR_INTERNAL_ERROR); 13682 13683 /* 13684 * allocate a context and set-up everything not related to the 13685 * node position in the tree 13686 */ 13687 if (doc->type == XML_DOCUMENT_NODE) 13688 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13689 #ifdef LIBXML_HTML_ENABLED 13690 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13691 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13692 /* 13693 * When parsing in context, it makes no sense to add implied 13694 * elements like html/body/etc... 13695 */ 13696 options |= HTML_PARSE_NOIMPLIED; 13697 } 13698 #endif 13699 else 13700 return(XML_ERR_INTERNAL_ERROR); 13701 13702 if (ctxt == NULL) 13703 return(XML_ERR_NO_MEMORY); 13704 13705 /* 13706 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13707 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13708 * we must wait until the last moment to free the original one. 13709 */ 13710 if (doc->dict != NULL) { 13711 if (ctxt->dict != NULL) 13712 xmlDictFree(ctxt->dict); 13713 ctxt->dict = doc->dict; 13714 } else 13715 options |= XML_PARSE_NODICT; 13716 13717 if (doc->encoding != NULL) { 13718 xmlCharEncodingHandlerPtr hdlr; 13719 13720 if (ctxt->encoding != NULL) 13721 xmlFree((xmlChar *) ctxt->encoding); 13722 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13723 13724 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13725 if (hdlr != NULL) { 13726 xmlSwitchToEncoding(ctxt, hdlr); 13727 } else { 13728 return(XML_ERR_UNSUPPORTED_ENCODING); 13729 } 13730 } 13731 13732 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13733 xmlDetectSAX2(ctxt); 13734 ctxt->myDoc = doc; 13735 /* parsing in context, i.e. as within existing content */ 13736 ctxt->instate = XML_PARSER_CONTENT; 13737 13738 fake = xmlNewComment(NULL); 13739 if (fake == NULL) { 13740 xmlFreeParserCtxt(ctxt); 13741 return(XML_ERR_NO_MEMORY); 13742 } 13743 xmlAddChild(node, fake); 13744 13745 if (node->type == XML_ELEMENT_NODE) { 13746 nodePush(ctxt, node); 13747 /* 13748 * initialize the SAX2 namespaces stack 13749 */ 13750 cur = node; 13751 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13752 xmlNsPtr ns = cur->nsDef; 13753 const xmlChar *iprefix, *ihref; 13754 13755 while (ns != NULL) { 13756 if (ctxt->dict) { 13757 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13758 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13759 } else { 13760 iprefix = ns->prefix; 13761 ihref = ns->href; 13762 } 13763 13764 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13765 nsPush(ctxt, iprefix, ihref); 13766 nsnr++; 13767 } 13768 ns = ns->next; 13769 } 13770 cur = cur->parent; 13771 } 13772 } 13773 13774 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13775 /* 13776 * ID/IDREF registration will be done in xmlValidateElement below 13777 */ 13778 ctxt->loadsubset |= XML_SKIP_IDS; 13779 } 13780 13781 #ifdef LIBXML_HTML_ENABLED 13782 if (doc->type == XML_HTML_DOCUMENT_NODE) 13783 __htmlParseContent(ctxt); 13784 else 13785 #endif 13786 xmlParseContent(ctxt); 13787 13788 nsPop(ctxt, nsnr); 13789 if ((RAW == '<') && (NXT(1) == '/')) { 13790 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13791 } else if (RAW != 0) { 13792 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13793 } 13794 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13795 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13796 ctxt->wellFormed = 0; 13797 } 13798 13799 if (!ctxt->wellFormed) { 13800 if (ctxt->errNo == 0) 13801 ret = XML_ERR_INTERNAL_ERROR; 13802 else 13803 ret = (xmlParserErrors)ctxt->errNo; 13804 } else { 13805 ret = XML_ERR_OK; 13806 } 13807 13808 /* 13809 * Return the newly created nodeset after unlinking it from 13810 * the pseudo sibling. 13811 */ 13812 13813 cur = fake->next; 13814 fake->next = NULL; 13815 node->last = fake; 13816 13817 if (cur != NULL) { 13818 cur->prev = NULL; 13819 } 13820 13821 *lst = cur; 13822 13823 while (cur != NULL) { 13824 cur->parent = NULL; 13825 cur = cur->next; 13826 } 13827 13828 xmlUnlinkNode(fake); 13829 xmlFreeNode(fake); 13830 13831 13832 if (ret != XML_ERR_OK) { 13833 xmlFreeNodeList(*lst); 13834 *lst = NULL; 13835 } 13836 13837 if (doc->dict != NULL) 13838 ctxt->dict = NULL; 13839 xmlFreeParserCtxt(ctxt); 13840 13841 return(ret); 13842 #else /* !SAX2 */ 13843 return(XML_ERR_INTERNAL_ERROR); 13844 #endif 13845 } 13846 13847 #ifdef LIBXML_SAX1_ENABLED 13848 /** 13849 * xmlParseBalancedChunkMemoryRecover: 13850 * @doc: the document the chunk pertains to 13851 * @sax: the SAX handler bloc (possibly NULL) 13852 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13853 * @depth: Used for loop detection, use 0 13854 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13855 * @lst: the return value for the set of parsed nodes 13856 * @recover: return nodes even if the data is broken (use 0) 13857 * 13858 * 13859 * Parse a well-balanced chunk of an XML document 13860 * called by the parser 13861 * The allowed sequence for the Well Balanced Chunk is the one defined by 13862 * the content production in the XML grammar: 13863 * 13864 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13865 * 13866 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13867 * the parser error code otherwise 13868 * 13869 * In case recover is set to 1, the nodelist will not be empty even if 13870 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13871 * some extent. 13872 */ 13873 int 13874 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13875 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13876 int recover) { 13877 xmlParserCtxtPtr ctxt; 13878 xmlDocPtr newDoc; 13879 xmlSAXHandlerPtr oldsax = NULL; 13880 xmlNodePtr content, newRoot; 13881 int size; 13882 int ret = 0; 13883 13884 if (depth > 40) { 13885 return(XML_ERR_ENTITY_LOOP); 13886 } 13887 13888 13889 if (lst != NULL) 13890 *lst = NULL; 13891 if (string == NULL) 13892 return(-1); 13893 13894 size = xmlStrlen(string); 13895 13896 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13897 if (ctxt == NULL) return(-1); 13898 ctxt->userData = ctxt; 13899 if (sax != NULL) { 13900 oldsax = ctxt->sax; 13901 ctxt->sax = sax; 13902 if (user_data != NULL) 13903 ctxt->userData = user_data; 13904 } 13905 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13906 if (newDoc == NULL) { 13907 xmlFreeParserCtxt(ctxt); 13908 return(-1); 13909 } 13910 newDoc->properties = XML_DOC_INTERNAL; 13911 if ((doc != NULL) && (doc->dict != NULL)) { 13912 xmlDictFree(ctxt->dict); 13913 ctxt->dict = doc->dict; 13914 xmlDictReference(ctxt->dict); 13915 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13916 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13917 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13918 ctxt->dictNames = 1; 13919 } else { 13920 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13921 } 13922 if (doc != NULL) { 13923 newDoc->intSubset = doc->intSubset; 13924 newDoc->extSubset = doc->extSubset; 13925 } 13926 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13927 if (newRoot == NULL) { 13928 if (sax != NULL) 13929 ctxt->sax = oldsax; 13930 xmlFreeParserCtxt(ctxt); 13931 newDoc->intSubset = NULL; 13932 newDoc->extSubset = NULL; 13933 xmlFreeDoc(newDoc); 13934 return(-1); 13935 } 13936 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13937 nodePush(ctxt, newRoot); 13938 if (doc == NULL) { 13939 ctxt->myDoc = newDoc; 13940 } else { 13941 ctxt->myDoc = newDoc; 13942 newDoc->children->doc = doc; 13943 /* Ensure that doc has XML spec namespace */ 13944 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13945 newDoc->oldNs = doc->oldNs; 13946 } 13947 ctxt->instate = XML_PARSER_CONTENT; 13948 ctxt->depth = depth; 13949 13950 /* 13951 * Doing validity checking on chunk doesn't make sense 13952 */ 13953 ctxt->validate = 0; 13954 ctxt->loadsubset = 0; 13955 xmlDetectSAX2(ctxt); 13956 13957 if ( doc != NULL ){ 13958 content = doc->children; 13959 doc->children = NULL; 13960 xmlParseContent(ctxt); 13961 doc->children = content; 13962 } 13963 else { 13964 xmlParseContent(ctxt); 13965 } 13966 if ((RAW == '<') && (NXT(1) == '/')) { 13967 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13968 } else if (RAW != 0) { 13969 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13970 } 13971 if (ctxt->node != newDoc->children) { 13972 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13973 } 13974 13975 if (!ctxt->wellFormed) { 13976 if (ctxt->errNo == 0) 13977 ret = 1; 13978 else 13979 ret = ctxt->errNo; 13980 } else { 13981 ret = 0; 13982 } 13983 13984 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13985 xmlNodePtr cur; 13986 13987 /* 13988 * Return the newly created nodeset after unlinking it from 13989 * they pseudo parent. 13990 */ 13991 cur = newDoc->children->children; 13992 *lst = cur; 13993 while (cur != NULL) { 13994 xmlSetTreeDoc(cur, doc); 13995 cur->parent = NULL; 13996 cur = cur->next; 13997 } 13998 newDoc->children->children = NULL; 13999 } 14000 14001 if (sax != NULL) 14002 ctxt->sax = oldsax; 14003 xmlFreeParserCtxt(ctxt); 14004 newDoc->intSubset = NULL; 14005 newDoc->extSubset = NULL; 14006 newDoc->oldNs = NULL; 14007 xmlFreeDoc(newDoc); 14008 14009 return(ret); 14010 } 14011 14012 /** 14013 * xmlSAXParseEntity: 14014 * @sax: the SAX handler block 14015 * @filename: the filename 14016 * 14017 * parse an XML external entity out of context and build a tree. 14018 * It use the given SAX function block to handle the parsing callback. 14019 * If sax is NULL, fallback to the default DOM tree building routines. 14020 * 14021 * [78] extParsedEnt ::= TextDecl? content 14022 * 14023 * This correspond to a "Well Balanced" chunk 14024 * 14025 * Returns the resulting document tree 14026 */ 14027 14028 xmlDocPtr 14029 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 14030 xmlDocPtr ret; 14031 xmlParserCtxtPtr ctxt; 14032 14033 ctxt = xmlCreateFileParserCtxt(filename); 14034 if (ctxt == NULL) { 14035 return(NULL); 14036 } 14037 if (sax != NULL) { 14038 if (ctxt->sax != NULL) 14039 xmlFree(ctxt->sax); 14040 ctxt->sax = sax; 14041 ctxt->userData = NULL; 14042 } 14043 14044 xmlParseExtParsedEnt(ctxt); 14045 14046 if (ctxt->wellFormed) 14047 ret = ctxt->myDoc; 14048 else { 14049 ret = NULL; 14050 xmlFreeDoc(ctxt->myDoc); 14051 ctxt->myDoc = NULL; 14052 } 14053 if (sax != NULL) 14054 ctxt->sax = NULL; 14055 xmlFreeParserCtxt(ctxt); 14056 14057 return(ret); 14058 } 14059 14060 /** 14061 * xmlParseEntity: 14062 * @filename: the filename 14063 * 14064 * parse an XML external entity out of context and build a tree. 14065 * 14066 * [78] extParsedEnt ::= TextDecl? content 14067 * 14068 * This correspond to a "Well Balanced" chunk 14069 * 14070 * Returns the resulting document tree 14071 */ 14072 14073 xmlDocPtr 14074 xmlParseEntity(const char *filename) { 14075 return(xmlSAXParseEntity(NULL, filename)); 14076 } 14077 #endif /* LIBXML_SAX1_ENABLED */ 14078 14079 /** 14080 * xmlCreateEntityParserCtxtInternal: 14081 * @URL: the entity URL 14082 * @ID: the entity PUBLIC ID 14083 * @base: a possible base for the target URI 14084 * @pctx: parser context used to set options on new context 14085 * 14086 * Create a parser context for an external entity 14087 * Automatic support for ZLIB/Compress compressed document is provided 14088 * by default if found at compile-time. 14089 * 14090 * Returns the new parser context or NULL 14091 */ 14092 static xmlParserCtxtPtr 14093 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14094 const xmlChar *base, xmlParserCtxtPtr pctx) { 14095 xmlParserCtxtPtr ctxt; 14096 xmlParserInputPtr inputStream; 14097 char *directory = NULL; 14098 xmlChar *uri; 14099 14100 ctxt = xmlNewParserCtxt(); 14101 if (ctxt == NULL) { 14102 return(NULL); 14103 } 14104 14105 if (pctx != NULL) { 14106 ctxt->options = pctx->options; 14107 ctxt->_private = pctx->_private; 14108 } 14109 14110 uri = xmlBuildURI(URL, base); 14111 14112 if (uri == NULL) { 14113 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14114 if (inputStream == NULL) { 14115 xmlFreeParserCtxt(ctxt); 14116 return(NULL); 14117 } 14118 14119 inputPush(ctxt, inputStream); 14120 14121 if ((ctxt->directory == NULL) && (directory == NULL)) 14122 directory = xmlParserGetDirectory((char *)URL); 14123 if ((ctxt->directory == NULL) && (directory != NULL)) 14124 ctxt->directory = directory; 14125 } else { 14126 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14127 if (inputStream == NULL) { 14128 xmlFree(uri); 14129 xmlFreeParserCtxt(ctxt); 14130 return(NULL); 14131 } 14132 14133 inputPush(ctxt, inputStream); 14134 14135 if ((ctxt->directory == NULL) && (directory == NULL)) 14136 directory = xmlParserGetDirectory((char *)uri); 14137 if ((ctxt->directory == NULL) && (directory != NULL)) 14138 ctxt->directory = directory; 14139 xmlFree(uri); 14140 } 14141 return(ctxt); 14142 } 14143 14144 /** 14145 * xmlCreateEntityParserCtxt: 14146 * @URL: the entity URL 14147 * @ID: the entity PUBLIC ID 14148 * @base: a possible base for the target URI 14149 * 14150 * Create a parser context for an external entity 14151 * Automatic support for ZLIB/Compress compressed document is provided 14152 * by default if found at compile-time. 14153 * 14154 * Returns the new parser context or NULL 14155 */ 14156 xmlParserCtxtPtr 14157 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14158 const xmlChar *base) { 14159 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14160 14161 } 14162 14163 /************************************************************************ 14164 * * 14165 * Front ends when parsing from a file * 14166 * * 14167 ************************************************************************/ 14168 14169 /** 14170 * xmlCreateURLParserCtxt: 14171 * @filename: the filename or URL 14172 * @options: a combination of xmlParserOption 14173 * 14174 * Create a parser context for a file or URL content. 14175 * Automatic support for ZLIB/Compress compressed document is provided 14176 * by default if found at compile-time and for file accesses 14177 * 14178 * Returns the new parser context or NULL 14179 */ 14180 xmlParserCtxtPtr 14181 xmlCreateURLParserCtxt(const char *filename, int options) 14182 { 14183 xmlParserCtxtPtr ctxt; 14184 xmlParserInputPtr inputStream; 14185 char *directory = NULL; 14186 14187 ctxt = xmlNewParserCtxt(); 14188 if (ctxt == NULL) { 14189 xmlErrMemory(NULL, "cannot allocate parser context"); 14190 return(NULL); 14191 } 14192 14193 if (options) 14194 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14195 ctxt->linenumbers = 1; 14196 14197 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14198 if (inputStream == NULL) { 14199 xmlFreeParserCtxt(ctxt); 14200 return(NULL); 14201 } 14202 14203 inputPush(ctxt, inputStream); 14204 if ((ctxt->directory == NULL) && (directory == NULL)) 14205 directory = xmlParserGetDirectory(filename); 14206 if ((ctxt->directory == NULL) && (directory != NULL)) 14207 ctxt->directory = directory; 14208 14209 return(ctxt); 14210 } 14211 14212 /** 14213 * xmlCreateFileParserCtxt: 14214 * @filename: the filename 14215 * 14216 * Create a parser context for a file content. 14217 * Automatic support for ZLIB/Compress compressed document is provided 14218 * by default if found at compile-time. 14219 * 14220 * Returns the new parser context or NULL 14221 */ 14222 xmlParserCtxtPtr 14223 xmlCreateFileParserCtxt(const char *filename) 14224 { 14225 return(xmlCreateURLParserCtxt(filename, 0)); 14226 } 14227 14228 #ifdef LIBXML_SAX1_ENABLED 14229 /** 14230 * xmlSAXParseFileWithData: 14231 * @sax: the SAX handler block 14232 * @filename: the filename 14233 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14234 * documents 14235 * @data: the userdata 14236 * 14237 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14238 * compressed document is provided by default if found at compile-time. 14239 * It use the given SAX function block to handle the parsing callback. 14240 * If sax is NULL, fallback to the default DOM tree building routines. 14241 * 14242 * User data (void *) is stored within the parser context in the 14243 * context's _private member, so it is available nearly everywhere in libxml 14244 * 14245 * Returns the resulting document tree 14246 */ 14247 14248 xmlDocPtr 14249 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14250 int recovery, void *data) { 14251 xmlDocPtr ret; 14252 xmlParserCtxtPtr ctxt; 14253 14254 xmlInitParser(); 14255 14256 ctxt = xmlCreateFileParserCtxt(filename); 14257 if (ctxt == NULL) { 14258 return(NULL); 14259 } 14260 if (sax != NULL) { 14261 if (ctxt->sax != NULL) 14262 xmlFree(ctxt->sax); 14263 ctxt->sax = sax; 14264 } 14265 xmlDetectSAX2(ctxt); 14266 if (data!=NULL) { 14267 ctxt->_private = data; 14268 } 14269 14270 if (ctxt->directory == NULL) 14271 ctxt->directory = xmlParserGetDirectory(filename); 14272 14273 ctxt->recovery = recovery; 14274 14275 xmlParseDocument(ctxt); 14276 14277 if ((ctxt->wellFormed) || recovery) { 14278 ret = ctxt->myDoc; 14279 if (ret != NULL) { 14280 if (ctxt->input->buf->compressed > 0) 14281 ret->compression = 9; 14282 else 14283 ret->compression = ctxt->input->buf->compressed; 14284 } 14285 } 14286 else { 14287 ret = NULL; 14288 xmlFreeDoc(ctxt->myDoc); 14289 ctxt->myDoc = NULL; 14290 } 14291 if (sax != NULL) 14292 ctxt->sax = NULL; 14293 xmlFreeParserCtxt(ctxt); 14294 14295 return(ret); 14296 } 14297 14298 /** 14299 * xmlSAXParseFile: 14300 * @sax: the SAX handler block 14301 * @filename: the filename 14302 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14303 * documents 14304 * 14305 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14306 * compressed document is provided by default if found at compile-time. 14307 * It use the given SAX function block to handle the parsing callback. 14308 * If sax is NULL, fallback to the default DOM tree building routines. 14309 * 14310 * Returns the resulting document tree 14311 */ 14312 14313 xmlDocPtr 14314 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14315 int recovery) { 14316 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14317 } 14318 14319 /** 14320 * xmlRecoverDoc: 14321 * @cur: a pointer to an array of xmlChar 14322 * 14323 * parse an XML in-memory document and build a tree. 14324 * In the case the document is not Well Formed, a attempt to build a 14325 * tree is tried anyway 14326 * 14327 * Returns the resulting document tree or NULL in case of failure 14328 */ 14329 14330 xmlDocPtr 14331 xmlRecoverDoc(const xmlChar *cur) { 14332 return(xmlSAXParseDoc(NULL, cur, 1)); 14333 } 14334 14335 /** 14336 * xmlParseFile: 14337 * @filename: the filename 14338 * 14339 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14340 * compressed document is provided by default if found at compile-time. 14341 * 14342 * Returns the resulting document tree if the file was wellformed, 14343 * NULL otherwise. 14344 */ 14345 14346 xmlDocPtr 14347 xmlParseFile(const char *filename) { 14348 return(xmlSAXParseFile(NULL, filename, 0)); 14349 } 14350 14351 /** 14352 * xmlRecoverFile: 14353 * @filename: the filename 14354 * 14355 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14356 * compressed document is provided by default if found at compile-time. 14357 * In the case the document is not Well Formed, it attempts to build 14358 * a tree anyway 14359 * 14360 * Returns the resulting document tree or NULL in case of failure 14361 */ 14362 14363 xmlDocPtr 14364 xmlRecoverFile(const char *filename) { 14365 return(xmlSAXParseFile(NULL, filename, 1)); 14366 } 14367 14368 14369 /** 14370 * xmlSetupParserForBuffer: 14371 * @ctxt: an XML parser context 14372 * @buffer: a xmlChar * buffer 14373 * @filename: a file name 14374 * 14375 * Setup the parser context to parse a new buffer; Clears any prior 14376 * contents from the parser context. The buffer parameter must not be 14377 * NULL, but the filename parameter can be 14378 */ 14379 void 14380 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14381 const char* filename) 14382 { 14383 xmlParserInputPtr input; 14384 14385 if ((ctxt == NULL) || (buffer == NULL)) 14386 return; 14387 14388 input = xmlNewInputStream(ctxt); 14389 if (input == NULL) { 14390 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14391 xmlClearParserCtxt(ctxt); 14392 return; 14393 } 14394 14395 xmlClearParserCtxt(ctxt); 14396 if (filename != NULL) 14397 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14398 input->base = buffer; 14399 input->cur = buffer; 14400 input->end = &buffer[xmlStrlen(buffer)]; 14401 inputPush(ctxt, input); 14402 } 14403 14404 /** 14405 * xmlSAXUserParseFile: 14406 * @sax: a SAX handler 14407 * @user_data: The user data returned on SAX callbacks 14408 * @filename: a file name 14409 * 14410 * parse an XML file and call the given SAX handler routines. 14411 * Automatic support for ZLIB/Compress compressed document is provided 14412 * 14413 * Returns 0 in case of success or a error number otherwise 14414 */ 14415 int 14416 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14417 const char *filename) { 14418 int ret = 0; 14419 xmlParserCtxtPtr ctxt; 14420 14421 ctxt = xmlCreateFileParserCtxt(filename); 14422 if (ctxt == NULL) return -1; 14423 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14424 xmlFree(ctxt->sax); 14425 ctxt->sax = sax; 14426 xmlDetectSAX2(ctxt); 14427 14428 if (user_data != NULL) 14429 ctxt->userData = user_data; 14430 14431 xmlParseDocument(ctxt); 14432 14433 if (ctxt->wellFormed) 14434 ret = 0; 14435 else { 14436 if (ctxt->errNo != 0) 14437 ret = ctxt->errNo; 14438 else 14439 ret = -1; 14440 } 14441 if (sax != NULL) 14442 ctxt->sax = NULL; 14443 if (ctxt->myDoc != NULL) { 14444 xmlFreeDoc(ctxt->myDoc); 14445 ctxt->myDoc = NULL; 14446 } 14447 xmlFreeParserCtxt(ctxt); 14448 14449 return ret; 14450 } 14451 #endif /* LIBXML_SAX1_ENABLED */ 14452 14453 /************************************************************************ 14454 * * 14455 * Front ends when parsing from memory * 14456 * * 14457 ************************************************************************/ 14458 14459 /** 14460 * xmlCreateMemoryParserCtxt: 14461 * @buffer: a pointer to a char array 14462 * @size: the size of the array 14463 * 14464 * Create a parser context for an XML in-memory document. 14465 * 14466 * Returns the new parser context or NULL 14467 */ 14468 xmlParserCtxtPtr 14469 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14470 xmlParserCtxtPtr ctxt; 14471 xmlParserInputPtr input; 14472 xmlParserInputBufferPtr buf; 14473 14474 if (buffer == NULL) 14475 return(NULL); 14476 if (size <= 0) 14477 return(NULL); 14478 14479 ctxt = xmlNewParserCtxt(); 14480 if (ctxt == NULL) 14481 return(NULL); 14482 14483 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14484 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14485 if (buf == NULL) { 14486 xmlFreeParserCtxt(ctxt); 14487 return(NULL); 14488 } 14489 14490 input = xmlNewInputStream(ctxt); 14491 if (input == NULL) { 14492 xmlFreeParserInputBuffer(buf); 14493 xmlFreeParserCtxt(ctxt); 14494 return(NULL); 14495 } 14496 14497 input->filename = NULL; 14498 input->buf = buf; 14499 xmlBufResetInput(input->buf->buffer, input); 14500 14501 inputPush(ctxt, input); 14502 return(ctxt); 14503 } 14504 14505 #ifdef LIBXML_SAX1_ENABLED 14506 /** 14507 * xmlSAXParseMemoryWithData: 14508 * @sax: the SAX handler block 14509 * @buffer: an pointer to a char array 14510 * @size: the size of the array 14511 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14512 * documents 14513 * @data: the userdata 14514 * 14515 * parse an XML in-memory block and use the given SAX function block 14516 * to handle the parsing callback. If sax is NULL, fallback to the default 14517 * DOM tree building routines. 14518 * 14519 * User data (void *) is stored within the parser context in the 14520 * context's _private member, so it is available nearly everywhere in libxml 14521 * 14522 * Returns the resulting document tree 14523 */ 14524 14525 xmlDocPtr 14526 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14527 int size, int recovery, void *data) { 14528 xmlDocPtr ret; 14529 xmlParserCtxtPtr ctxt; 14530 14531 xmlInitParser(); 14532 14533 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14534 if (ctxt == NULL) return(NULL); 14535 if (sax != NULL) { 14536 if (ctxt->sax != NULL) 14537 xmlFree(ctxt->sax); 14538 ctxt->sax = sax; 14539 } 14540 xmlDetectSAX2(ctxt); 14541 if (data!=NULL) { 14542 ctxt->_private=data; 14543 } 14544 14545 ctxt->recovery = recovery; 14546 14547 xmlParseDocument(ctxt); 14548 14549 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14550 else { 14551 ret = NULL; 14552 xmlFreeDoc(ctxt->myDoc); 14553 ctxt->myDoc = NULL; 14554 } 14555 if (sax != NULL) 14556 ctxt->sax = NULL; 14557 xmlFreeParserCtxt(ctxt); 14558 14559 return(ret); 14560 } 14561 14562 /** 14563 * xmlSAXParseMemory: 14564 * @sax: the SAX handler block 14565 * @buffer: an pointer to a char array 14566 * @size: the size of the array 14567 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14568 * documents 14569 * 14570 * parse an XML in-memory block and use the given SAX function block 14571 * to handle the parsing callback. If sax is NULL, fallback to the default 14572 * DOM tree building routines. 14573 * 14574 * Returns the resulting document tree 14575 */ 14576 xmlDocPtr 14577 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14578 int size, int recovery) { 14579 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14580 } 14581 14582 /** 14583 * xmlParseMemory: 14584 * @buffer: an pointer to a char array 14585 * @size: the size of the array 14586 * 14587 * parse an XML in-memory block and build a tree. 14588 * 14589 * Returns the resulting document tree 14590 */ 14591 14592 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14593 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14594 } 14595 14596 /** 14597 * xmlRecoverMemory: 14598 * @buffer: an pointer to a char array 14599 * @size: the size of the array 14600 * 14601 * parse an XML in-memory block and build a tree. 14602 * In the case the document is not Well Formed, an attempt to 14603 * build a tree is tried anyway 14604 * 14605 * Returns the resulting document tree or NULL in case of error 14606 */ 14607 14608 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14609 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14610 } 14611 14612 /** 14613 * xmlSAXUserParseMemory: 14614 * @sax: a SAX handler 14615 * @user_data: The user data returned on SAX callbacks 14616 * @buffer: an in-memory XML document input 14617 * @size: the length of the XML document in bytes 14618 * 14619 * A better SAX parsing routine. 14620 * parse an XML in-memory buffer and call the given SAX handler routines. 14621 * 14622 * Returns 0 in case of success or a error number otherwise 14623 */ 14624 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14625 const char *buffer, int size) { 14626 int ret = 0; 14627 xmlParserCtxtPtr ctxt; 14628 14629 xmlInitParser(); 14630 14631 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14632 if (ctxt == NULL) return -1; 14633 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14634 xmlFree(ctxt->sax); 14635 ctxt->sax = sax; 14636 xmlDetectSAX2(ctxt); 14637 14638 if (user_data != NULL) 14639 ctxt->userData = user_data; 14640 14641 xmlParseDocument(ctxt); 14642 14643 if (ctxt->wellFormed) 14644 ret = 0; 14645 else { 14646 if (ctxt->errNo != 0) 14647 ret = ctxt->errNo; 14648 else 14649 ret = -1; 14650 } 14651 if (sax != NULL) 14652 ctxt->sax = NULL; 14653 if (ctxt->myDoc != NULL) { 14654 xmlFreeDoc(ctxt->myDoc); 14655 ctxt->myDoc = NULL; 14656 } 14657 xmlFreeParserCtxt(ctxt); 14658 14659 return ret; 14660 } 14661 #endif /* LIBXML_SAX1_ENABLED */ 14662 14663 /** 14664 * xmlCreateDocParserCtxt: 14665 * @cur: a pointer to an array of xmlChar 14666 * 14667 * Creates a parser context for an XML in-memory document. 14668 * 14669 * Returns the new parser context or NULL 14670 */ 14671 xmlParserCtxtPtr 14672 xmlCreateDocParserCtxt(const xmlChar *cur) { 14673 int len; 14674 14675 if (cur == NULL) 14676 return(NULL); 14677 len = xmlStrlen(cur); 14678 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14679 } 14680 14681 #ifdef LIBXML_SAX1_ENABLED 14682 /** 14683 * xmlSAXParseDoc: 14684 * @sax: the SAX handler block 14685 * @cur: a pointer to an array of xmlChar 14686 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14687 * documents 14688 * 14689 * parse an XML in-memory document and build a tree. 14690 * It use the given SAX function block to handle the parsing callback. 14691 * If sax is NULL, fallback to the default DOM tree building routines. 14692 * 14693 * Returns the resulting document tree 14694 */ 14695 14696 xmlDocPtr 14697 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14698 xmlDocPtr ret; 14699 xmlParserCtxtPtr ctxt; 14700 xmlSAXHandlerPtr oldsax = NULL; 14701 14702 if (cur == NULL) return(NULL); 14703 14704 14705 ctxt = xmlCreateDocParserCtxt(cur); 14706 if (ctxt == NULL) return(NULL); 14707 if (sax != NULL) { 14708 oldsax = ctxt->sax; 14709 ctxt->sax = sax; 14710 ctxt->userData = NULL; 14711 } 14712 xmlDetectSAX2(ctxt); 14713 14714 xmlParseDocument(ctxt); 14715 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14716 else { 14717 ret = NULL; 14718 xmlFreeDoc(ctxt->myDoc); 14719 ctxt->myDoc = NULL; 14720 } 14721 if (sax != NULL) 14722 ctxt->sax = oldsax; 14723 xmlFreeParserCtxt(ctxt); 14724 14725 return(ret); 14726 } 14727 14728 /** 14729 * xmlParseDoc: 14730 * @cur: a pointer to an array of xmlChar 14731 * 14732 * parse an XML in-memory document and build a tree. 14733 * 14734 * Returns the resulting document tree 14735 */ 14736 14737 xmlDocPtr 14738 xmlParseDoc(const xmlChar *cur) { 14739 return(xmlSAXParseDoc(NULL, cur, 0)); 14740 } 14741 #endif /* LIBXML_SAX1_ENABLED */ 14742 14743 #ifdef LIBXML_LEGACY_ENABLED 14744 /************************************************************************ 14745 * * 14746 * Specific function to keep track of entities references * 14747 * and used by the XSLT debugger * 14748 * * 14749 ************************************************************************/ 14750 14751 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14752 14753 /** 14754 * xmlAddEntityReference: 14755 * @ent : A valid entity 14756 * @firstNode : A valid first node for children of entity 14757 * @lastNode : A valid last node of children entity 14758 * 14759 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14760 */ 14761 static void 14762 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14763 xmlNodePtr lastNode) 14764 { 14765 if (xmlEntityRefFunc != NULL) { 14766 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14767 } 14768 } 14769 14770 14771 /** 14772 * xmlSetEntityReferenceFunc: 14773 * @func: A valid function 14774 * 14775 * Set the function to call call back when a xml reference has been made 14776 */ 14777 void 14778 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14779 { 14780 xmlEntityRefFunc = func; 14781 } 14782 #endif /* LIBXML_LEGACY_ENABLED */ 14783 14784 /************************************************************************ 14785 * * 14786 * Miscellaneous * 14787 * * 14788 ************************************************************************/ 14789 14790 #ifdef LIBXML_XPATH_ENABLED 14791 #include <libxml/xpath.h> 14792 #endif 14793 14794 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14795 static int xmlParserInitialized = 0; 14796 14797 /** 14798 * xmlInitParser: 14799 * 14800 * Initialization function for the XML parser. 14801 * This is not reentrant. Call once before processing in case of 14802 * use in multithreaded programs. 14803 */ 14804 14805 void 14806 xmlInitParser(void) { 14807 if (xmlParserInitialized != 0) 14808 return; 14809 14810 #ifdef LIBXML_THREAD_ENABLED 14811 __xmlGlobalInitMutexLock(); 14812 if (xmlParserInitialized == 0) { 14813 #endif 14814 xmlInitThreads(); 14815 xmlInitGlobals(); 14816 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14817 (xmlGenericError == NULL)) 14818 initGenericErrorDefaultFunc(NULL); 14819 xmlInitMemory(); 14820 xmlInitializeDict(); 14821 xmlInitCharEncodingHandlers(); 14822 xmlDefaultSAXHandlerInit(); 14823 xmlRegisterDefaultInputCallbacks(); 14824 #ifdef LIBXML_OUTPUT_ENABLED 14825 xmlRegisterDefaultOutputCallbacks(); 14826 #endif /* LIBXML_OUTPUT_ENABLED */ 14827 #ifdef LIBXML_HTML_ENABLED 14828 htmlInitAutoClose(); 14829 htmlDefaultSAXHandlerInit(); 14830 #endif 14831 #ifdef LIBXML_XPATH_ENABLED 14832 xmlXPathInit(); 14833 #endif 14834 xmlParserInitialized = 1; 14835 #ifdef LIBXML_THREAD_ENABLED 14836 } 14837 __xmlGlobalInitMutexUnlock(); 14838 #endif 14839 } 14840 14841 /** 14842 * xmlCleanupParser: 14843 * 14844 * This function name is somewhat misleading. It does not clean up 14845 * parser state, it cleans up memory allocated by the library itself. 14846 * It is a cleanup function for the XML library. It tries to reclaim all 14847 * related global memory allocated for the library processing. 14848 * It doesn't deallocate any document related memory. One should 14849 * call xmlCleanupParser() only when the process has finished using 14850 * the library and all XML/HTML documents built with it. 14851 * See also xmlInitParser() which has the opposite function of preparing 14852 * the library for operations. 14853 * 14854 * WARNING: if your application is multithreaded or has plugin support 14855 * calling this may crash the application if another thread or 14856 * a plugin is still using libxml2. It's sometimes very hard to 14857 * guess if libxml2 is in use in the application, some libraries 14858 * or plugins may use it without notice. In case of doubt abstain 14859 * from calling this function or do it just before calling exit() 14860 * to avoid leak reports from valgrind ! 14861 */ 14862 14863 void 14864 xmlCleanupParser(void) { 14865 if (!xmlParserInitialized) 14866 return; 14867 14868 xmlCleanupCharEncodingHandlers(); 14869 #ifdef LIBXML_CATALOG_ENABLED 14870 xmlCatalogCleanup(); 14871 #endif 14872 xmlDictCleanup(); 14873 xmlCleanupInputCallbacks(); 14874 #ifdef LIBXML_OUTPUT_ENABLED 14875 xmlCleanupOutputCallbacks(); 14876 #endif 14877 #ifdef LIBXML_SCHEMAS_ENABLED 14878 xmlSchemaCleanupTypes(); 14879 xmlRelaxNGCleanupTypes(); 14880 #endif 14881 xmlResetLastError(); 14882 xmlCleanupGlobals(); 14883 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14884 xmlCleanupMemory(); 14885 xmlParserInitialized = 0; 14886 } 14887 14888 /************************************************************************ 14889 * * 14890 * New set (2.6.0) of simpler and more flexible APIs * 14891 * * 14892 ************************************************************************/ 14893 14894 /** 14895 * DICT_FREE: 14896 * @str: a string 14897 * 14898 * Free a string if it is not owned by the "dict" dictionnary in the 14899 * current scope 14900 */ 14901 #define DICT_FREE(str) \ 14902 if ((str) && ((!dict) || \ 14903 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14904 xmlFree((char *)(str)); 14905 14906 /** 14907 * xmlCtxtReset: 14908 * @ctxt: an XML parser context 14909 * 14910 * Reset a parser context 14911 */ 14912 void 14913 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14914 { 14915 xmlParserInputPtr input; 14916 xmlDictPtr dict; 14917 14918 if (ctxt == NULL) 14919 return; 14920 14921 dict = ctxt->dict; 14922 14923 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14924 xmlFreeInputStream(input); 14925 } 14926 ctxt->inputNr = 0; 14927 ctxt->input = NULL; 14928 14929 ctxt->spaceNr = 0; 14930 if (ctxt->spaceTab != NULL) { 14931 ctxt->spaceTab[0] = -1; 14932 ctxt->space = &ctxt->spaceTab[0]; 14933 } else { 14934 ctxt->space = NULL; 14935 } 14936 14937 14938 ctxt->nodeNr = 0; 14939 ctxt->node = NULL; 14940 14941 ctxt->nameNr = 0; 14942 ctxt->name = NULL; 14943 14944 DICT_FREE(ctxt->version); 14945 ctxt->version = NULL; 14946 DICT_FREE(ctxt->encoding); 14947 ctxt->encoding = NULL; 14948 DICT_FREE(ctxt->directory); 14949 ctxt->directory = NULL; 14950 DICT_FREE(ctxt->extSubURI); 14951 ctxt->extSubURI = NULL; 14952 DICT_FREE(ctxt->extSubSystem); 14953 ctxt->extSubSystem = NULL; 14954 if (ctxt->myDoc != NULL) 14955 xmlFreeDoc(ctxt->myDoc); 14956 ctxt->myDoc = NULL; 14957 14958 ctxt->standalone = -1; 14959 ctxt->hasExternalSubset = 0; 14960 ctxt->hasPErefs = 0; 14961 ctxt->html = 0; 14962 ctxt->external = 0; 14963 ctxt->instate = XML_PARSER_START; 14964 ctxt->token = 0; 14965 14966 ctxt->wellFormed = 1; 14967 ctxt->nsWellFormed = 1; 14968 ctxt->disableSAX = 0; 14969 ctxt->valid = 1; 14970 #if 0 14971 ctxt->vctxt.userData = ctxt; 14972 ctxt->vctxt.error = xmlParserValidityError; 14973 ctxt->vctxt.warning = xmlParserValidityWarning; 14974 #endif 14975 ctxt->record_info = 0; 14976 ctxt->nbChars = 0; 14977 ctxt->checkIndex = 0; 14978 ctxt->inSubset = 0; 14979 ctxt->errNo = XML_ERR_OK; 14980 ctxt->depth = 0; 14981 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14982 ctxt->catalogs = NULL; 14983 ctxt->nbentities = 0; 14984 ctxt->sizeentities = 0; 14985 ctxt->sizeentcopy = 0; 14986 xmlInitNodeInfoSeq(&ctxt->node_seq); 14987 14988 if (ctxt->attsDefault != NULL) { 14989 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14990 ctxt->attsDefault = NULL; 14991 } 14992 if (ctxt->attsSpecial != NULL) { 14993 xmlHashFree(ctxt->attsSpecial, NULL); 14994 ctxt->attsSpecial = NULL; 14995 } 14996 14997 #ifdef LIBXML_CATALOG_ENABLED 14998 if (ctxt->catalogs != NULL) 14999 xmlCatalogFreeLocal(ctxt->catalogs); 15000 #endif 15001 if (ctxt->lastError.code != XML_ERR_OK) 15002 xmlResetError(&ctxt->lastError); 15003 } 15004 15005 /** 15006 * xmlCtxtResetPush: 15007 * @ctxt: an XML parser context 15008 * @chunk: a pointer to an array of chars 15009 * @size: number of chars in the array 15010 * @filename: an optional file name or URI 15011 * @encoding: the document encoding, or NULL 15012 * 15013 * Reset a push parser context 15014 * 15015 * Returns 0 in case of success and 1 in case of error 15016 */ 15017 int 15018 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 15019 int size, const char *filename, const char *encoding) 15020 { 15021 xmlParserInputPtr inputStream; 15022 xmlParserInputBufferPtr buf; 15023 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 15024 15025 if (ctxt == NULL) 15026 return(1); 15027 15028 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 15029 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 15030 15031 buf = xmlAllocParserInputBuffer(enc); 15032 if (buf == NULL) 15033 return(1); 15034 15035 if (ctxt == NULL) { 15036 xmlFreeParserInputBuffer(buf); 15037 return(1); 15038 } 15039 15040 xmlCtxtReset(ctxt); 15041 15042 if (ctxt->pushTab == NULL) { 15043 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 15044 sizeof(xmlChar *)); 15045 if (ctxt->pushTab == NULL) { 15046 xmlErrMemory(ctxt, NULL); 15047 xmlFreeParserInputBuffer(buf); 15048 return(1); 15049 } 15050 } 15051 15052 if (filename == NULL) { 15053 ctxt->directory = NULL; 15054 } else { 15055 ctxt->directory = xmlParserGetDirectory(filename); 15056 } 15057 15058 inputStream = xmlNewInputStream(ctxt); 15059 if (inputStream == NULL) { 15060 xmlFreeParserInputBuffer(buf); 15061 return(1); 15062 } 15063 15064 if (filename == NULL) 15065 inputStream->filename = NULL; 15066 else 15067 inputStream->filename = (char *) 15068 xmlCanonicPath((const xmlChar *) filename); 15069 inputStream->buf = buf; 15070 xmlBufResetInput(buf->buffer, inputStream); 15071 15072 inputPush(ctxt, inputStream); 15073 15074 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 15075 (ctxt->input->buf != NULL)) { 15076 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15077 size_t cur = ctxt->input->cur - ctxt->input->base; 15078 15079 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15080 15081 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15082 #ifdef DEBUG_PUSH 15083 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15084 #endif 15085 } 15086 15087 if (encoding != NULL) { 15088 xmlCharEncodingHandlerPtr hdlr; 15089 15090 if (ctxt->encoding != NULL) 15091 xmlFree((xmlChar *) ctxt->encoding); 15092 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15093 15094 hdlr = xmlFindCharEncodingHandler(encoding); 15095 if (hdlr != NULL) { 15096 xmlSwitchToEncoding(ctxt, hdlr); 15097 } else { 15098 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15099 "Unsupported encoding %s\n", BAD_CAST encoding); 15100 } 15101 } else if (enc != XML_CHAR_ENCODING_NONE) { 15102 xmlSwitchEncoding(ctxt, enc); 15103 } 15104 15105 return(0); 15106 } 15107 15108 15109 /** 15110 * xmlCtxtUseOptionsInternal: 15111 * @ctxt: an XML parser context 15112 * @options: a combination of xmlParserOption 15113 * @encoding: the user provided encoding to use 15114 * 15115 * Applies the options to the parser context 15116 * 15117 * Returns 0 in case of success, the set of unknown or unimplemented options 15118 * in case of error. 15119 */ 15120 static int 15121 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15122 { 15123 if (ctxt == NULL) 15124 return(-1); 15125 if (encoding != NULL) { 15126 if (ctxt->encoding != NULL) 15127 xmlFree((xmlChar *) ctxt->encoding); 15128 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15129 } 15130 if (options & XML_PARSE_RECOVER) { 15131 ctxt->recovery = 1; 15132 options -= XML_PARSE_RECOVER; 15133 ctxt->options |= XML_PARSE_RECOVER; 15134 } else 15135 ctxt->recovery = 0; 15136 if (options & XML_PARSE_DTDLOAD) { 15137 ctxt->loadsubset = XML_DETECT_IDS; 15138 options -= XML_PARSE_DTDLOAD; 15139 ctxt->options |= XML_PARSE_DTDLOAD; 15140 } else 15141 ctxt->loadsubset = 0; 15142 if (options & XML_PARSE_DTDATTR) { 15143 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15144 options -= XML_PARSE_DTDATTR; 15145 ctxt->options |= XML_PARSE_DTDATTR; 15146 } 15147 if (options & XML_PARSE_NOENT) { 15148 ctxt->replaceEntities = 1; 15149 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15150 options -= XML_PARSE_NOENT; 15151 ctxt->options |= XML_PARSE_NOENT; 15152 } else 15153 ctxt->replaceEntities = 0; 15154 if (options & XML_PARSE_PEDANTIC) { 15155 ctxt->pedantic = 1; 15156 options -= XML_PARSE_PEDANTIC; 15157 ctxt->options |= XML_PARSE_PEDANTIC; 15158 } else 15159 ctxt->pedantic = 0; 15160 if (options & XML_PARSE_NOBLANKS) { 15161 ctxt->keepBlanks = 0; 15162 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15163 options -= XML_PARSE_NOBLANKS; 15164 ctxt->options |= XML_PARSE_NOBLANKS; 15165 } else 15166 ctxt->keepBlanks = 1; 15167 if (options & XML_PARSE_DTDVALID) { 15168 ctxt->validate = 1; 15169 if (options & XML_PARSE_NOWARNING) 15170 ctxt->vctxt.warning = NULL; 15171 if (options & XML_PARSE_NOERROR) 15172 ctxt->vctxt.error = NULL; 15173 options -= XML_PARSE_DTDVALID; 15174 ctxt->options |= XML_PARSE_DTDVALID; 15175 } else 15176 ctxt->validate = 0; 15177 if (options & XML_PARSE_NOWARNING) { 15178 ctxt->sax->warning = NULL; 15179 options -= XML_PARSE_NOWARNING; 15180 } 15181 if (options & XML_PARSE_NOERROR) { 15182 ctxt->sax->error = NULL; 15183 ctxt->sax->fatalError = NULL; 15184 options -= XML_PARSE_NOERROR; 15185 } 15186 #ifdef LIBXML_SAX1_ENABLED 15187 if (options & XML_PARSE_SAX1) { 15188 ctxt->sax->startElement = xmlSAX2StartElement; 15189 ctxt->sax->endElement = xmlSAX2EndElement; 15190 ctxt->sax->startElementNs = NULL; 15191 ctxt->sax->endElementNs = NULL; 15192 ctxt->sax->initialized = 1; 15193 options -= XML_PARSE_SAX1; 15194 ctxt->options |= XML_PARSE_SAX1; 15195 } 15196 #endif /* LIBXML_SAX1_ENABLED */ 15197 if (options & XML_PARSE_NODICT) { 15198 ctxt->dictNames = 0; 15199 options -= XML_PARSE_NODICT; 15200 ctxt->options |= XML_PARSE_NODICT; 15201 } else { 15202 ctxt->dictNames = 1; 15203 } 15204 if (options & XML_PARSE_NOCDATA) { 15205 ctxt->sax->cdataBlock = NULL; 15206 options -= XML_PARSE_NOCDATA; 15207 ctxt->options |= XML_PARSE_NOCDATA; 15208 } 15209 if (options & XML_PARSE_NSCLEAN) { 15210 ctxt->options |= XML_PARSE_NSCLEAN; 15211 options -= XML_PARSE_NSCLEAN; 15212 } 15213 if (options & XML_PARSE_NONET) { 15214 ctxt->options |= XML_PARSE_NONET; 15215 options -= XML_PARSE_NONET; 15216 } 15217 if (options & XML_PARSE_COMPACT) { 15218 ctxt->options |= XML_PARSE_COMPACT; 15219 options -= XML_PARSE_COMPACT; 15220 } 15221 if (options & XML_PARSE_OLD10) { 15222 ctxt->options |= XML_PARSE_OLD10; 15223 options -= XML_PARSE_OLD10; 15224 } 15225 if (options & XML_PARSE_NOBASEFIX) { 15226 ctxt->options |= XML_PARSE_NOBASEFIX; 15227 options -= XML_PARSE_NOBASEFIX; 15228 } 15229 if (options & XML_PARSE_HUGE) { 15230 ctxt->options |= XML_PARSE_HUGE; 15231 options -= XML_PARSE_HUGE; 15232 if (ctxt->dict != NULL) 15233 xmlDictSetLimit(ctxt->dict, 0); 15234 } 15235 if (options & XML_PARSE_OLDSAX) { 15236 ctxt->options |= XML_PARSE_OLDSAX; 15237 options -= XML_PARSE_OLDSAX; 15238 } 15239 if (options & XML_PARSE_IGNORE_ENC) { 15240 ctxt->options |= XML_PARSE_IGNORE_ENC; 15241 options -= XML_PARSE_IGNORE_ENC; 15242 } 15243 if (options & XML_PARSE_BIG_LINES) { 15244 ctxt->options |= XML_PARSE_BIG_LINES; 15245 options -= XML_PARSE_BIG_LINES; 15246 } 15247 ctxt->linenumbers = 1; 15248 return (options); 15249 } 15250 15251 /** 15252 * xmlCtxtUseOptions: 15253 * @ctxt: an XML parser context 15254 * @options: a combination of xmlParserOption 15255 * 15256 * Applies the options to the parser context 15257 * 15258 * Returns 0 in case of success, the set of unknown or unimplemented options 15259 * in case of error. 15260 */ 15261 int 15262 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15263 { 15264 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15265 } 15266 15267 /** 15268 * xmlDoRead: 15269 * @ctxt: an XML parser context 15270 * @URL: the base URL to use for the document 15271 * @encoding: the document encoding, or NULL 15272 * @options: a combination of xmlParserOption 15273 * @reuse: keep the context for reuse 15274 * 15275 * Common front-end for the xmlRead functions 15276 * 15277 * Returns the resulting document tree or NULL 15278 */ 15279 static xmlDocPtr 15280 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15281 int options, int reuse) 15282 { 15283 xmlDocPtr ret; 15284 15285 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15286 if (encoding != NULL) { 15287 xmlCharEncodingHandlerPtr hdlr; 15288 15289 hdlr = xmlFindCharEncodingHandler(encoding); 15290 if (hdlr != NULL) 15291 xmlSwitchToEncoding(ctxt, hdlr); 15292 } 15293 if ((URL != NULL) && (ctxt->input != NULL) && 15294 (ctxt->input->filename == NULL)) 15295 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15296 xmlParseDocument(ctxt); 15297 if ((ctxt->wellFormed) || ctxt->recovery) 15298 ret = ctxt->myDoc; 15299 else { 15300 ret = NULL; 15301 if (ctxt->myDoc != NULL) { 15302 xmlFreeDoc(ctxt->myDoc); 15303 } 15304 } 15305 ctxt->myDoc = NULL; 15306 if (!reuse) { 15307 xmlFreeParserCtxt(ctxt); 15308 } 15309 15310 return (ret); 15311 } 15312 15313 /** 15314 * xmlReadDoc: 15315 * @cur: a pointer to a zero terminated string 15316 * @URL: the base URL to use for the document 15317 * @encoding: the document encoding, or NULL 15318 * @options: a combination of xmlParserOption 15319 * 15320 * parse an XML in-memory document and build a tree. 15321 * 15322 * Returns the resulting document tree 15323 */ 15324 xmlDocPtr 15325 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15326 { 15327 xmlParserCtxtPtr ctxt; 15328 15329 if (cur == NULL) 15330 return (NULL); 15331 xmlInitParser(); 15332 15333 ctxt = xmlCreateDocParserCtxt(cur); 15334 if (ctxt == NULL) 15335 return (NULL); 15336 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15337 } 15338 15339 /** 15340 * xmlReadFile: 15341 * @filename: a file or URL 15342 * @encoding: the document encoding, or NULL 15343 * @options: a combination of xmlParserOption 15344 * 15345 * parse an XML file from the filesystem or the network. 15346 * 15347 * Returns the resulting document tree 15348 */ 15349 xmlDocPtr 15350 xmlReadFile(const char *filename, const char *encoding, int options) 15351 { 15352 xmlParserCtxtPtr ctxt; 15353 15354 xmlInitParser(); 15355 ctxt = xmlCreateURLParserCtxt(filename, options); 15356 if (ctxt == NULL) 15357 return (NULL); 15358 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15359 } 15360 15361 /** 15362 * xmlReadMemory: 15363 * @buffer: a pointer to a char array 15364 * @size: the size of the array 15365 * @URL: the base URL to use for the document 15366 * @encoding: the document encoding, or NULL 15367 * @options: a combination of xmlParserOption 15368 * 15369 * parse an XML in-memory document and build a tree. 15370 * 15371 * Returns the resulting document tree 15372 */ 15373 xmlDocPtr 15374 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15375 { 15376 xmlParserCtxtPtr ctxt; 15377 15378 xmlInitParser(); 15379 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15380 if (ctxt == NULL) 15381 return (NULL); 15382 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15383 } 15384 15385 /** 15386 * xmlReadFd: 15387 * @fd: an open file descriptor 15388 * @URL: the base URL to use for the document 15389 * @encoding: the document encoding, or NULL 15390 * @options: a combination of xmlParserOption 15391 * 15392 * parse an XML from a file descriptor and build a tree. 15393 * NOTE that the file descriptor will not be closed when the 15394 * reader is closed or reset. 15395 * 15396 * Returns the resulting document tree 15397 */ 15398 xmlDocPtr 15399 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15400 { 15401 xmlParserCtxtPtr ctxt; 15402 xmlParserInputBufferPtr input; 15403 xmlParserInputPtr stream; 15404 15405 if (fd < 0) 15406 return (NULL); 15407 xmlInitParser(); 15408 15409 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15410 if (input == NULL) 15411 return (NULL); 15412 input->closecallback = NULL; 15413 ctxt = xmlNewParserCtxt(); 15414 if (ctxt == NULL) { 15415 xmlFreeParserInputBuffer(input); 15416 return (NULL); 15417 } 15418 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15419 if (stream == NULL) { 15420 xmlFreeParserInputBuffer(input); 15421 xmlFreeParserCtxt(ctxt); 15422 return (NULL); 15423 } 15424 inputPush(ctxt, stream); 15425 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15426 } 15427 15428 /** 15429 * xmlReadIO: 15430 * @ioread: an I/O read function 15431 * @ioclose: an I/O close function 15432 * @ioctx: an I/O handler 15433 * @URL: the base URL to use for the document 15434 * @encoding: the document encoding, or NULL 15435 * @options: a combination of xmlParserOption 15436 * 15437 * parse an XML document from I/O functions and source and build a tree. 15438 * 15439 * Returns the resulting document tree 15440 */ 15441 xmlDocPtr 15442 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15443 void *ioctx, const char *URL, const char *encoding, int options) 15444 { 15445 xmlParserCtxtPtr ctxt; 15446 xmlParserInputBufferPtr input; 15447 xmlParserInputPtr stream; 15448 15449 if (ioread == NULL) 15450 return (NULL); 15451 xmlInitParser(); 15452 15453 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15454 XML_CHAR_ENCODING_NONE); 15455 if (input == NULL) { 15456 if (ioclose != NULL) 15457 ioclose(ioctx); 15458 return (NULL); 15459 } 15460 ctxt = xmlNewParserCtxt(); 15461 if (ctxt == NULL) { 15462 xmlFreeParserInputBuffer(input); 15463 return (NULL); 15464 } 15465 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15466 if (stream == NULL) { 15467 xmlFreeParserInputBuffer(input); 15468 xmlFreeParserCtxt(ctxt); 15469 return (NULL); 15470 } 15471 inputPush(ctxt, stream); 15472 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15473 } 15474 15475 /** 15476 * xmlCtxtReadDoc: 15477 * @ctxt: an XML parser context 15478 * @cur: a pointer to a zero terminated string 15479 * @URL: the base URL to use for the document 15480 * @encoding: the document encoding, or NULL 15481 * @options: a combination of xmlParserOption 15482 * 15483 * parse an XML in-memory document and build a tree. 15484 * This reuses the existing @ctxt parser context 15485 * 15486 * Returns the resulting document tree 15487 */ 15488 xmlDocPtr 15489 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15490 const char *URL, const char *encoding, int options) 15491 { 15492 xmlParserInputPtr stream; 15493 15494 if (cur == NULL) 15495 return (NULL); 15496 if (ctxt == NULL) 15497 return (NULL); 15498 xmlInitParser(); 15499 15500 xmlCtxtReset(ctxt); 15501 15502 stream = xmlNewStringInputStream(ctxt, cur); 15503 if (stream == NULL) { 15504 return (NULL); 15505 } 15506 inputPush(ctxt, stream); 15507 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15508 } 15509 15510 /** 15511 * xmlCtxtReadFile: 15512 * @ctxt: an XML parser context 15513 * @filename: a file or URL 15514 * @encoding: the document encoding, or NULL 15515 * @options: a combination of xmlParserOption 15516 * 15517 * parse an XML file from the filesystem or the network. 15518 * This reuses the existing @ctxt parser context 15519 * 15520 * Returns the resulting document tree 15521 */ 15522 xmlDocPtr 15523 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15524 const char *encoding, int options) 15525 { 15526 xmlParserInputPtr stream; 15527 15528 if (filename == NULL) 15529 return (NULL); 15530 if (ctxt == NULL) 15531 return (NULL); 15532 xmlInitParser(); 15533 15534 xmlCtxtReset(ctxt); 15535 15536 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15537 if (stream == NULL) { 15538 return (NULL); 15539 } 15540 inputPush(ctxt, stream); 15541 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15542 } 15543 15544 /** 15545 * xmlCtxtReadMemory: 15546 * @ctxt: an XML parser context 15547 * @buffer: a pointer to a char array 15548 * @size: the size of the array 15549 * @URL: the base URL to use for the document 15550 * @encoding: the document encoding, or NULL 15551 * @options: a combination of xmlParserOption 15552 * 15553 * parse an XML in-memory document and build a tree. 15554 * This reuses the existing @ctxt parser context 15555 * 15556 * Returns the resulting document tree 15557 */ 15558 xmlDocPtr 15559 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15560 const char *URL, const char *encoding, int options) 15561 { 15562 xmlParserInputBufferPtr input; 15563 xmlParserInputPtr stream; 15564 15565 if (ctxt == NULL) 15566 return (NULL); 15567 if (buffer == NULL) 15568 return (NULL); 15569 xmlInitParser(); 15570 15571 xmlCtxtReset(ctxt); 15572 15573 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15574 if (input == NULL) { 15575 return(NULL); 15576 } 15577 15578 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15579 if (stream == NULL) { 15580 xmlFreeParserInputBuffer(input); 15581 return(NULL); 15582 } 15583 15584 inputPush(ctxt, stream); 15585 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15586 } 15587 15588 /** 15589 * xmlCtxtReadFd: 15590 * @ctxt: an XML parser context 15591 * @fd: an open file descriptor 15592 * @URL: the base URL to use for the document 15593 * @encoding: the document encoding, or NULL 15594 * @options: a combination of xmlParserOption 15595 * 15596 * parse an XML from a file descriptor and build a tree. 15597 * This reuses the existing @ctxt parser context 15598 * NOTE that the file descriptor will not be closed when the 15599 * reader is closed or reset. 15600 * 15601 * Returns the resulting document tree 15602 */ 15603 xmlDocPtr 15604 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15605 const char *URL, const char *encoding, int options) 15606 { 15607 xmlParserInputBufferPtr input; 15608 xmlParserInputPtr stream; 15609 15610 if (fd < 0) 15611 return (NULL); 15612 if (ctxt == NULL) 15613 return (NULL); 15614 xmlInitParser(); 15615 15616 xmlCtxtReset(ctxt); 15617 15618 15619 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15620 if (input == NULL) 15621 return (NULL); 15622 input->closecallback = NULL; 15623 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15624 if (stream == NULL) { 15625 xmlFreeParserInputBuffer(input); 15626 return (NULL); 15627 } 15628 inputPush(ctxt, stream); 15629 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15630 } 15631 15632 /** 15633 * xmlCtxtReadIO: 15634 * @ctxt: an XML parser context 15635 * @ioread: an I/O read function 15636 * @ioclose: an I/O close function 15637 * @ioctx: an I/O handler 15638 * @URL: the base URL to use for the document 15639 * @encoding: the document encoding, or NULL 15640 * @options: a combination of xmlParserOption 15641 * 15642 * parse an XML document from I/O functions and source and build a tree. 15643 * This reuses the existing @ctxt parser context 15644 * 15645 * Returns the resulting document tree 15646 */ 15647 xmlDocPtr 15648 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15649 xmlInputCloseCallback ioclose, void *ioctx, 15650 const char *URL, 15651 const char *encoding, int options) 15652 { 15653 xmlParserInputBufferPtr input; 15654 xmlParserInputPtr stream; 15655 15656 if (ioread == NULL) 15657 return (NULL); 15658 if (ctxt == NULL) 15659 return (NULL); 15660 xmlInitParser(); 15661 15662 xmlCtxtReset(ctxt); 15663 15664 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15665 XML_CHAR_ENCODING_NONE); 15666 if (input == NULL) { 15667 if (ioclose != NULL) 15668 ioclose(ioctx); 15669 return (NULL); 15670 } 15671 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15672 if (stream == NULL) { 15673 xmlFreeParserInputBuffer(input); 15674 return (NULL); 15675 } 15676 inputPush(ctxt, stream); 15677 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15678 } 15679 15680 #define bottom_parser 15681 #include "elfgcchack.h" 15682