1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <string.h> 44 #include <stdarg.h> 45 #include <libxml/xmlmemory.h> 46 #include <libxml/threads.h> 47 #include <libxml/globals.h> 48 #include <libxml/tree.h> 49 #include <libxml/parser.h> 50 #include <libxml/parserInternals.h> 51 #include <libxml/valid.h> 52 #include <libxml/entities.h> 53 #include <libxml/xmlerror.h> 54 #include <libxml/encoding.h> 55 #include <libxml/xmlIO.h> 56 #include <libxml/uri.h> 57 #ifdef LIBXML_CATALOG_ENABLED 58 #include <libxml/catalog.h> 59 #endif 60 #ifdef LIBXML_SCHEMAS_ENABLED 61 #include <libxml/xmlschemastypes.h> 62 #include <libxml/relaxng.h> 63 #endif 64 #ifdef HAVE_CTYPE_H 65 #include <ctype.h> 66 #endif 67 #ifdef HAVE_STDLIB_H 68 #include <stdlib.h> 69 #endif 70 #ifdef HAVE_SYS_STAT_H 71 #include <sys/stat.h> 72 #endif 73 #ifdef HAVE_FCNTL_H 74 #include <fcntl.h> 75 #endif 76 #ifdef HAVE_UNISTD_H 77 #include <unistd.h> 78 #endif 79 #ifdef HAVE_ZLIB_H 80 #include <zlib.h> 81 #endif 82 83 static void 84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 85 86 static xmlParserCtxtPtr 87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 88 const xmlChar *base, xmlParserCtxtPtr pctx); 89 90 /************************************************************************ 91 * * 92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 93 * * 94 ************************************************************************/ 95 96 #define XML_PARSER_BIG_ENTITY 1000 97 #define XML_PARSER_LOT_ENTITY 5000 98 99 /* 100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 101 * replacement over the size in byte of the input indicates that you have 102 * and eponential behaviour. A value of 10 correspond to at least 3 entity 103 * replacement per byte of input. 104 */ 105 #define XML_PARSER_NON_LINEAR 10 106 107 /* 108 * xmlParserEntityCheck 109 * 110 * Function to check non-linear entity expansion behaviour 111 * This is here to detect and stop exponential linear entity expansion 112 * This is not a limitation of the parser but a safety 113 * boundary feature. It can be disabled with the XML_PARSE_HUGE 114 * parser option. 115 */ 116 static int 117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 118 xmlEntityPtr ent) 119 { 120 unsigned long consumed = 0; 121 122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 123 return (0); 124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 125 return (1); 126 if (size != 0) { 127 /* 128 * Do the check based on the replacement size of the entity 129 */ 130 if (size < XML_PARSER_BIG_ENTITY) 131 return(0); 132 133 /* 134 * A limit on the amount of text data reasonably used 135 */ 136 if (ctxt->input != NULL) { 137 consumed = ctxt->input->consumed + 138 (ctxt->input->cur - ctxt->input->base); 139 } 140 consumed += ctxt->sizeentities; 141 142 if ((size < XML_PARSER_NON_LINEAR * consumed) && 143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 144 return (0); 145 } else if (ent != NULL) { 146 /* 147 * use the number of parsed entities in the replacement 148 */ 149 size = ent->checked; 150 151 /* 152 * The amount of data parsed counting entities size only once 153 */ 154 if (ctxt->input != NULL) { 155 consumed = ctxt->input->consumed + 156 (ctxt->input->cur - ctxt->input->base); 157 } 158 consumed += ctxt->sizeentities; 159 160 /* 161 * Check the density of entities for the amount of data 162 * knowing an entity reference will take at least 3 bytes 163 */ 164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 165 return (0); 166 } else { 167 /* 168 * strange we got no data for checking just return 169 */ 170 return (0); 171 } 172 173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 174 return (1); 175 } 176 177 /** 178 * xmlParserMaxDepth: 179 * 180 * arbitrary depth limit for the XML documents that we allow to 181 * process. This is not a limitation of the parser but a safety 182 * boundary feature. It can be disabled with the XML_PARSE_HUGE 183 * parser option. 184 */ 185 unsigned int xmlParserMaxDepth = 256; 186 187 188 189 #define SAX2 1 190 #define XML_PARSER_BIG_BUFFER_SIZE 300 191 #define XML_PARSER_BUFFER_SIZE 100 192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 193 194 /* 195 * List of XML prefixed PI allowed by W3C specs 196 */ 197 198 static const char *xmlW3CPIs[] = { 199 "xml-stylesheet", 200 NULL 201 }; 202 203 204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 206 const xmlChar **str); 207 208 static xmlParserErrors 209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 210 xmlSAXHandlerPtr sax, 211 void *user_data, int depth, const xmlChar *URL, 212 const xmlChar *ID, xmlNodePtr *list); 213 214 static int 215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 216 const char *encoding); 217 #ifdef LIBXML_LEGACY_ENABLED 218 static void 219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 220 xmlNodePtr lastNode); 221 #endif /* LIBXML_LEGACY_ENABLED */ 222 223 static xmlParserErrors 224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 225 const xmlChar *string, void *user_data, xmlNodePtr *lst); 226 227 static int 228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 229 230 /************************************************************************ 231 * * 232 * Some factorized error routines * 233 * * 234 ************************************************************************/ 235 236 /** 237 * xmlErrAttributeDup: 238 * @ctxt: an XML parser context 239 * @prefix: the attribute prefix 240 * @localname: the attribute localname 241 * 242 * Handle a redefinition of attribute error 243 */ 244 static void 245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 246 const xmlChar * localname) 247 { 248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 249 (ctxt->instate == XML_PARSER_EOF)) 250 return; 251 if (ctxt != NULL) 252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 253 254 if (prefix == NULL) 255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 257 (const char *) localname, NULL, NULL, 0, 0, 258 "Attribute %s redefined\n", localname); 259 else 260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 262 (const char *) prefix, (const char *) localname, 263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 264 localname); 265 if (ctxt != NULL) { 266 ctxt->wellFormed = 0; 267 if (ctxt->recovery == 0) 268 ctxt->disableSAX = 1; 269 } 270 } 271 272 /** 273 * xmlFatalErr: 274 * @ctxt: an XML parser context 275 * @error: the error number 276 * @extra: extra information string 277 * 278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 279 */ 280 static void 281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 282 { 283 const char *errmsg; 284 285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 286 (ctxt->instate == XML_PARSER_EOF)) 287 return; 288 switch (error) { 289 case XML_ERR_INVALID_HEX_CHARREF: 290 errmsg = "CharRef: invalid hexadecimal value\n"; 291 break; 292 case XML_ERR_INVALID_DEC_CHARREF: 293 errmsg = "CharRef: invalid decimal value\n"; 294 break; 295 case XML_ERR_INVALID_CHARREF: 296 errmsg = "CharRef: invalid value\n"; 297 break; 298 case XML_ERR_INTERNAL_ERROR: 299 errmsg = "internal error"; 300 break; 301 case XML_ERR_PEREF_AT_EOF: 302 errmsg = "PEReference at end of document\n"; 303 break; 304 case XML_ERR_PEREF_IN_PROLOG: 305 errmsg = "PEReference in prolog\n"; 306 break; 307 case XML_ERR_PEREF_IN_EPILOG: 308 errmsg = "PEReference in epilog\n"; 309 break; 310 case XML_ERR_PEREF_NO_NAME: 311 errmsg = "PEReference: no name\n"; 312 break; 313 case XML_ERR_PEREF_SEMICOL_MISSING: 314 errmsg = "PEReference: expecting ';'\n"; 315 break; 316 case XML_ERR_ENTITY_LOOP: 317 errmsg = "Detected an entity reference loop\n"; 318 break; 319 case XML_ERR_ENTITY_NOT_STARTED: 320 errmsg = "EntityValue: \" or ' expected\n"; 321 break; 322 case XML_ERR_ENTITY_PE_INTERNAL: 323 errmsg = "PEReferences forbidden in internal subset\n"; 324 break; 325 case XML_ERR_ENTITY_NOT_FINISHED: 326 errmsg = "EntityValue: \" or ' expected\n"; 327 break; 328 case XML_ERR_ATTRIBUTE_NOT_STARTED: 329 errmsg = "AttValue: \" or ' expected\n"; 330 break; 331 case XML_ERR_LT_IN_ATTRIBUTE: 332 errmsg = "Unescaped '<' not allowed in attributes values\n"; 333 break; 334 case XML_ERR_LITERAL_NOT_STARTED: 335 errmsg = "SystemLiteral \" or ' expected\n"; 336 break; 337 case XML_ERR_LITERAL_NOT_FINISHED: 338 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 339 break; 340 case XML_ERR_MISPLACED_CDATA_END: 341 errmsg = "Sequence ']]>' not allowed in content\n"; 342 break; 343 case XML_ERR_URI_REQUIRED: 344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 345 break; 346 case XML_ERR_PUBID_REQUIRED: 347 errmsg = "PUBLIC, the Public Identifier is missing\n"; 348 break; 349 case XML_ERR_HYPHEN_IN_COMMENT: 350 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 351 break; 352 case XML_ERR_PI_NOT_STARTED: 353 errmsg = "xmlParsePI : no target name\n"; 354 break; 355 case XML_ERR_RESERVED_XML_NAME: 356 errmsg = "Invalid PI name\n"; 357 break; 358 case XML_ERR_NOTATION_NOT_STARTED: 359 errmsg = "NOTATION: Name expected here\n"; 360 break; 361 case XML_ERR_NOTATION_NOT_FINISHED: 362 errmsg = "'>' required to close NOTATION declaration\n"; 363 break; 364 case XML_ERR_VALUE_REQUIRED: 365 errmsg = "Entity value required\n"; 366 break; 367 case XML_ERR_URI_FRAGMENT: 368 errmsg = "Fragment not allowed"; 369 break; 370 case XML_ERR_ATTLIST_NOT_STARTED: 371 errmsg = "'(' required to start ATTLIST enumeration\n"; 372 break; 373 case XML_ERR_NMTOKEN_REQUIRED: 374 errmsg = "NmToken expected in ATTLIST enumeration\n"; 375 break; 376 case XML_ERR_ATTLIST_NOT_FINISHED: 377 errmsg = "')' required to finish ATTLIST enumeration\n"; 378 break; 379 case XML_ERR_MIXED_NOT_STARTED: 380 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 381 break; 382 case XML_ERR_PCDATA_REQUIRED: 383 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 384 break; 385 case XML_ERR_ELEMCONTENT_NOT_STARTED: 386 errmsg = "ContentDecl : Name or '(' expected\n"; 387 break; 388 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 389 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 390 break; 391 case XML_ERR_PEREF_IN_INT_SUBSET: 392 errmsg = 393 "PEReference: forbidden within markup decl in internal subset\n"; 394 break; 395 case XML_ERR_GT_REQUIRED: 396 errmsg = "expected '>'\n"; 397 break; 398 case XML_ERR_CONDSEC_INVALID: 399 errmsg = "XML conditional section '[' expected\n"; 400 break; 401 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 402 errmsg = "Content error in the external subset\n"; 403 break; 404 case XML_ERR_CONDSEC_INVALID_KEYWORD: 405 errmsg = 406 "conditional section INCLUDE or IGNORE keyword expected\n"; 407 break; 408 case XML_ERR_CONDSEC_NOT_FINISHED: 409 errmsg = "XML conditional section not closed\n"; 410 break; 411 case XML_ERR_XMLDECL_NOT_STARTED: 412 errmsg = "Text declaration '<?xml' required\n"; 413 break; 414 case XML_ERR_XMLDECL_NOT_FINISHED: 415 errmsg = "parsing XML declaration: '?>' expected\n"; 416 break; 417 case XML_ERR_EXT_ENTITY_STANDALONE: 418 errmsg = "external parsed entities cannot be standalone\n"; 419 break; 420 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 421 errmsg = "EntityRef: expecting ';'\n"; 422 break; 423 case XML_ERR_DOCTYPE_NOT_FINISHED: 424 errmsg = "DOCTYPE improperly terminated\n"; 425 break; 426 case XML_ERR_LTSLASH_REQUIRED: 427 errmsg = "EndTag: '</' not found\n"; 428 break; 429 case XML_ERR_EQUAL_REQUIRED: 430 errmsg = "expected '='\n"; 431 break; 432 case XML_ERR_STRING_NOT_CLOSED: 433 errmsg = "String not closed expecting \" or '\n"; 434 break; 435 case XML_ERR_STRING_NOT_STARTED: 436 errmsg = "String not started expecting ' or \"\n"; 437 break; 438 case XML_ERR_ENCODING_NAME: 439 errmsg = "Invalid XML encoding name\n"; 440 break; 441 case XML_ERR_STANDALONE_VALUE: 442 errmsg = "standalone accepts only 'yes' or 'no'\n"; 443 break; 444 case XML_ERR_DOCUMENT_EMPTY: 445 errmsg = "Document is empty\n"; 446 break; 447 case XML_ERR_DOCUMENT_END: 448 errmsg = "Extra content at the end of the document\n"; 449 break; 450 case XML_ERR_NOT_WELL_BALANCED: 451 errmsg = "chunk is not well balanced\n"; 452 break; 453 case XML_ERR_EXTRA_CONTENT: 454 errmsg = "extra content at the end of well balanced chunk\n"; 455 break; 456 case XML_ERR_VERSION_MISSING: 457 errmsg = "Malformed declaration expecting version\n"; 458 break; 459 #if 0 460 case: 461 errmsg = "\n"; 462 break; 463 #endif 464 default: 465 errmsg = "Unregistered error message\n"; 466 } 467 if (ctxt != NULL) 468 ctxt->errNo = error; 469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 471 info); 472 if (ctxt != NULL) { 473 ctxt->wellFormed = 0; 474 if (ctxt->recovery == 0) 475 ctxt->disableSAX = 1; 476 } 477 } 478 479 /** 480 * xmlFatalErrMsg: 481 * @ctxt: an XML parser context 482 * @error: the error number 483 * @msg: the error message 484 * 485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 486 */ 487 static void 488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 489 const char *msg) 490 { 491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 492 (ctxt->instate == XML_PARSER_EOF)) 493 return; 494 if (ctxt != NULL) 495 ctxt->errNo = error; 496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 498 if (ctxt != NULL) { 499 ctxt->wellFormed = 0; 500 if (ctxt->recovery == 0) 501 ctxt->disableSAX = 1; 502 } 503 } 504 505 /** 506 * xmlWarningMsg: 507 * @ctxt: an XML parser context 508 * @error: the error number 509 * @msg: the error message 510 * @str1: extra data 511 * @str2: extra data 512 * 513 * Handle a warning. 514 */ 515 static void 516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 517 const char *msg, const xmlChar *str1, const xmlChar *str2) 518 { 519 xmlStructuredErrorFunc schannel = NULL; 520 521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 522 (ctxt->instate == XML_PARSER_EOF)) 523 return; 524 if ((ctxt != NULL) && (ctxt->sax != NULL) && 525 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 526 schannel = ctxt->sax->serror; 527 if (ctxt != NULL) { 528 __xmlRaiseError(schannel, 529 (ctxt->sax) ? ctxt->sax->warning : NULL, 530 ctxt->userData, 531 ctxt, NULL, XML_FROM_PARSER, error, 532 XML_ERR_WARNING, NULL, 0, 533 (const char *) str1, (const char *) str2, NULL, 0, 0, 534 msg, (const char *) str1, (const char *) str2); 535 } else { 536 __xmlRaiseError(schannel, NULL, NULL, 537 ctxt, NULL, XML_FROM_PARSER, error, 538 XML_ERR_WARNING, NULL, 0, 539 (const char *) str1, (const char *) str2, NULL, 0, 0, 540 msg, (const char *) str1, (const char *) str2); 541 } 542 } 543 544 /** 545 * xmlValidityError: 546 * @ctxt: an XML parser context 547 * @error: the error number 548 * @msg: the error message 549 * @str1: extra data 550 * 551 * Handle a validity error. 552 */ 553 static void 554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 555 const char *msg, const xmlChar *str1, const xmlChar *str2) 556 { 557 xmlStructuredErrorFunc schannel = NULL; 558 559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 560 (ctxt->instate == XML_PARSER_EOF)) 561 return; 562 if (ctxt != NULL) { 563 ctxt->errNo = error; 564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 565 schannel = ctxt->sax->serror; 566 } 567 if (ctxt != NULL) { 568 __xmlRaiseError(schannel, 569 ctxt->vctxt.error, ctxt->vctxt.userData, 570 ctxt, NULL, XML_FROM_DTD, error, 571 XML_ERR_ERROR, NULL, 0, (const char *) str1, 572 (const char *) str2, NULL, 0, 0, 573 msg, (const char *) str1, (const char *) str2); 574 ctxt->valid = 0; 575 } else { 576 __xmlRaiseError(schannel, NULL, NULL, 577 ctxt, NULL, XML_FROM_DTD, error, 578 XML_ERR_ERROR, NULL, 0, (const char *) str1, 579 (const char *) str2, NULL, 0, 0, 580 msg, (const char *) str1, (const char *) str2); 581 } 582 } 583 584 /** 585 * xmlFatalErrMsgInt: 586 * @ctxt: an XML parser context 587 * @error: the error number 588 * @msg: the error message 589 * @val: an integer value 590 * 591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 592 */ 593 static void 594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg, int val) 596 { 597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 598 (ctxt->instate == XML_PARSER_EOF)) 599 return; 600 if (ctxt != NULL) 601 ctxt->errNo = error; 602 __xmlRaiseError(NULL, NULL, NULL, 603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 605 if (ctxt != NULL) { 606 ctxt->wellFormed = 0; 607 if (ctxt->recovery == 0) 608 ctxt->disableSAX = 1; 609 } 610 } 611 612 /** 613 * xmlFatalErrMsgStrIntStr: 614 * @ctxt: an XML parser context 615 * @error: the error number 616 * @msg: the error message 617 * @str1: an string info 618 * @val: an integer value 619 * @str2: an string info 620 * 621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 622 */ 623 static void 624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 625 const char *msg, const xmlChar *str1, int val, 626 const xmlChar *str2) 627 { 628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 629 (ctxt->instate == XML_PARSER_EOF)) 630 return; 631 if (ctxt != NULL) 632 ctxt->errNo = error; 633 __xmlRaiseError(NULL, NULL, NULL, 634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 635 NULL, 0, (const char *) str1, (const char *) str2, 636 NULL, val, 0, msg, str1, val, str2); 637 if (ctxt != NULL) { 638 ctxt->wellFormed = 0; 639 if (ctxt->recovery == 0) 640 ctxt->disableSAX = 1; 641 } 642 } 643 644 /** 645 * xmlFatalErrMsgStr: 646 * @ctxt: an XML parser context 647 * @error: the error number 648 * @msg: the error message 649 * @val: a string value 650 * 651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 652 */ 653 static void 654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 655 const char *msg, const xmlChar * val) 656 { 657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 658 (ctxt->instate == XML_PARSER_EOF)) 659 return; 660 if (ctxt != NULL) 661 ctxt->errNo = error; 662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 663 XML_FROM_PARSER, error, XML_ERR_FATAL, 664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 665 val); 666 if (ctxt != NULL) { 667 ctxt->wellFormed = 0; 668 if (ctxt->recovery == 0) 669 ctxt->disableSAX = 1; 670 } 671 } 672 673 /** 674 * xmlErrMsgStr: 675 * @ctxt: an XML parser context 676 * @error: the error number 677 * @msg: the error message 678 * @val: a string value 679 * 680 * Handle a non fatal parser error 681 */ 682 static void 683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 684 const char *msg, const xmlChar * val) 685 { 686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 687 (ctxt->instate == XML_PARSER_EOF)) 688 return; 689 if (ctxt != NULL) 690 ctxt->errNo = error; 691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 692 XML_FROM_PARSER, error, XML_ERR_ERROR, 693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 694 val); 695 } 696 697 /** 698 * xmlNsErr: 699 * @ctxt: an XML parser context 700 * @error: the error number 701 * @msg: the message 702 * @info1: extra information string 703 * @info2: extra information string 704 * 705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 706 */ 707 static void 708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 709 const char *msg, 710 const xmlChar * info1, const xmlChar * info2, 711 const xmlChar * info3) 712 { 713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 714 (ctxt->instate == XML_PARSER_EOF)) 715 return; 716 if (ctxt != NULL) 717 ctxt->errNo = error; 718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 719 XML_ERR_ERROR, NULL, 0, (const char *) info1, 720 (const char *) info2, (const char *) info3, 0, 0, msg, 721 info1, info2, info3); 722 if (ctxt != NULL) 723 ctxt->nsWellFormed = 0; 724 } 725 726 /** 727 * xmlNsWarn 728 * @ctxt: an XML parser context 729 * @error: the error number 730 * @msg: the message 731 * @info1: extra information string 732 * @info2: extra information string 733 * 734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 735 */ 736 static void 737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 738 const char *msg, 739 const xmlChar * info1, const xmlChar * info2, 740 const xmlChar * info3) 741 { 742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 743 (ctxt->instate == XML_PARSER_EOF)) 744 return; 745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 746 XML_ERR_WARNING, NULL, 0, (const char *) info1, 747 (const char *) info2, (const char *) info3, 0, 0, msg, 748 info1, info2, info3); 749 } 750 751 /************************************************************************ 752 * * 753 * Library wide options * 754 * * 755 ************************************************************************/ 756 757 /** 758 * xmlHasFeature: 759 * @feature: the feature to be examined 760 * 761 * Examines if the library has been compiled with a given feature. 762 * 763 * Returns a non-zero value if the feature exist, otherwise zero. 764 * Returns zero (0) if the feature does not exist or an unknown 765 * unknown feature is requested, non-zero otherwise. 766 */ 767 int 768 xmlHasFeature(xmlFeature feature) 769 { 770 switch (feature) { 771 case XML_WITH_THREAD: 772 #ifdef LIBXML_THREAD_ENABLED 773 return(1); 774 #else 775 return(0); 776 #endif 777 case XML_WITH_TREE: 778 #ifdef LIBXML_TREE_ENABLED 779 return(1); 780 #else 781 return(0); 782 #endif 783 case XML_WITH_OUTPUT: 784 #ifdef LIBXML_OUTPUT_ENABLED 785 return(1); 786 #else 787 return(0); 788 #endif 789 case XML_WITH_PUSH: 790 #ifdef LIBXML_PUSH_ENABLED 791 return(1); 792 #else 793 return(0); 794 #endif 795 case XML_WITH_READER: 796 #ifdef LIBXML_READER_ENABLED 797 return(1); 798 #else 799 return(0); 800 #endif 801 case XML_WITH_PATTERN: 802 #ifdef LIBXML_PATTERN_ENABLED 803 return(1); 804 #else 805 return(0); 806 #endif 807 case XML_WITH_WRITER: 808 #ifdef LIBXML_WRITER_ENABLED 809 return(1); 810 #else 811 return(0); 812 #endif 813 case XML_WITH_SAX1: 814 #ifdef LIBXML_SAX1_ENABLED 815 return(1); 816 #else 817 return(0); 818 #endif 819 case XML_WITH_FTP: 820 #ifdef LIBXML_FTP_ENABLED 821 return(1); 822 #else 823 return(0); 824 #endif 825 case XML_WITH_HTTP: 826 #ifdef LIBXML_HTTP_ENABLED 827 return(1); 828 #else 829 return(0); 830 #endif 831 case XML_WITH_VALID: 832 #ifdef LIBXML_VALID_ENABLED 833 return(1); 834 #else 835 return(0); 836 #endif 837 case XML_WITH_HTML: 838 #ifdef LIBXML_HTML_ENABLED 839 return(1); 840 #else 841 return(0); 842 #endif 843 case XML_WITH_LEGACY: 844 #ifdef LIBXML_LEGACY_ENABLED 845 return(1); 846 #else 847 return(0); 848 #endif 849 case XML_WITH_C14N: 850 #ifdef LIBXML_C14N_ENABLED 851 return(1); 852 #else 853 return(0); 854 #endif 855 case XML_WITH_CATALOG: 856 #ifdef LIBXML_CATALOG_ENABLED 857 return(1); 858 #else 859 return(0); 860 #endif 861 case XML_WITH_XPATH: 862 #ifdef LIBXML_XPATH_ENABLED 863 return(1); 864 #else 865 return(0); 866 #endif 867 case XML_WITH_XPTR: 868 #ifdef LIBXML_XPTR_ENABLED 869 return(1); 870 #else 871 return(0); 872 #endif 873 case XML_WITH_XINCLUDE: 874 #ifdef LIBXML_XINCLUDE_ENABLED 875 return(1); 876 #else 877 return(0); 878 #endif 879 case XML_WITH_ICONV: 880 #ifdef LIBXML_ICONV_ENABLED 881 return(1); 882 #else 883 return(0); 884 #endif 885 case XML_WITH_ISO8859X: 886 #ifdef LIBXML_ISO8859X_ENABLED 887 return(1); 888 #else 889 return(0); 890 #endif 891 case XML_WITH_UNICODE: 892 #ifdef LIBXML_UNICODE_ENABLED 893 return(1); 894 #else 895 return(0); 896 #endif 897 case XML_WITH_REGEXP: 898 #ifdef LIBXML_REGEXP_ENABLED 899 return(1); 900 #else 901 return(0); 902 #endif 903 case XML_WITH_AUTOMATA: 904 #ifdef LIBXML_AUTOMATA_ENABLED 905 return(1); 906 #else 907 return(0); 908 #endif 909 case XML_WITH_EXPR: 910 #ifdef LIBXML_EXPR_ENABLED 911 return(1); 912 #else 913 return(0); 914 #endif 915 case XML_WITH_SCHEMAS: 916 #ifdef LIBXML_SCHEMAS_ENABLED 917 return(1); 918 #else 919 return(0); 920 #endif 921 case XML_WITH_SCHEMATRON: 922 #ifdef LIBXML_SCHEMATRON_ENABLED 923 return(1); 924 #else 925 return(0); 926 #endif 927 case XML_WITH_MODULES: 928 #ifdef LIBXML_MODULES_ENABLED 929 return(1); 930 #else 931 return(0); 932 #endif 933 case XML_WITH_DEBUG: 934 #ifdef LIBXML_DEBUG_ENABLED 935 return(1); 936 #else 937 return(0); 938 #endif 939 case XML_WITH_DEBUG_MEM: 940 #ifdef DEBUG_MEMORY_LOCATION 941 return(1); 942 #else 943 return(0); 944 #endif 945 case XML_WITH_DEBUG_RUN: 946 #ifdef LIBXML_DEBUG_RUNTIME 947 return(1); 948 #else 949 return(0); 950 #endif 951 case XML_WITH_ZLIB: 952 #ifdef LIBXML_ZLIB_ENABLED 953 return(1); 954 #else 955 return(0); 956 #endif 957 case XML_WITH_ICU: 958 #ifdef LIBXML_ICU_ENABLED 959 return(1); 960 #else 961 return(0); 962 #endif 963 default: 964 break; 965 } 966 return(0); 967 } 968 969 /************************************************************************ 970 * * 971 * SAX2 defaulted attributes handling * 972 * * 973 ************************************************************************/ 974 975 /** 976 * xmlDetectSAX2: 977 * @ctxt: an XML parser context 978 * 979 * Do the SAX2 detection and specific intialization 980 */ 981 static void 982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 983 if (ctxt == NULL) return; 984 #ifdef LIBXML_SAX1_ENABLED 985 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 986 ((ctxt->sax->startElementNs != NULL) || 987 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 988 #else 989 ctxt->sax2 = 1; 990 #endif /* LIBXML_SAX1_ENABLED */ 991 992 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 993 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 994 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 995 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 996 (ctxt->str_xml_ns == NULL)) { 997 xmlErrMemory(ctxt, NULL); 998 } 999 } 1000 1001 typedef struct _xmlDefAttrs xmlDefAttrs; 1002 typedef xmlDefAttrs *xmlDefAttrsPtr; 1003 struct _xmlDefAttrs { 1004 int nbAttrs; /* number of defaulted attributes on that element */ 1005 int maxAttrs; /* the size of the array */ 1006 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1007 }; 1008 1009 /** 1010 * xmlAttrNormalizeSpace: 1011 * @src: the source string 1012 * @dst: the target string 1013 * 1014 * Normalize the space in non CDATA attribute values: 1015 * If the attribute type is not CDATA, then the XML processor MUST further 1016 * process the normalized attribute value by discarding any leading and 1017 * trailing space (#x20) characters, and by replacing sequences of space 1018 * (#x20) characters by a single space (#x20) character. 1019 * Note that the size of dst need to be at least src, and if one doesn't need 1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1021 * passing src as dst is just fine. 1022 * 1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1024 * is needed. 1025 */ 1026 static xmlChar * 1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1028 { 1029 if ((src == NULL) || (dst == NULL)) 1030 return(NULL); 1031 1032 while (*src == 0x20) src++; 1033 while (*src != 0) { 1034 if (*src == 0x20) { 1035 while (*src == 0x20) src++; 1036 if (*src != 0) 1037 *dst++ = 0x20; 1038 } else { 1039 *dst++ = *src++; 1040 } 1041 } 1042 *dst = 0; 1043 if (dst == src) 1044 return(NULL); 1045 return(dst); 1046 } 1047 1048 /** 1049 * xmlAttrNormalizeSpace2: 1050 * @src: the source string 1051 * 1052 * Normalize the space in non CDATA attribute values, a slightly more complex 1053 * front end to avoid allocation problems when running on attribute values 1054 * coming from the input. 1055 * 1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1057 * is needed. 1058 */ 1059 static const xmlChar * 1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1061 { 1062 int i; 1063 int remove_head = 0; 1064 int need_realloc = 0; 1065 const xmlChar *cur; 1066 1067 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1068 return(NULL); 1069 i = *len; 1070 if (i <= 0) 1071 return(NULL); 1072 1073 cur = src; 1074 while (*cur == 0x20) { 1075 cur++; 1076 remove_head++; 1077 } 1078 while (*cur != 0) { 1079 if (*cur == 0x20) { 1080 cur++; 1081 if ((*cur == 0x20) || (*cur == 0)) { 1082 need_realloc = 1; 1083 break; 1084 } 1085 } else 1086 cur++; 1087 } 1088 if (need_realloc) { 1089 xmlChar *ret; 1090 1091 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1092 if (ret == NULL) { 1093 xmlErrMemory(ctxt, NULL); 1094 return(NULL); 1095 } 1096 xmlAttrNormalizeSpace(ret, ret); 1097 *len = (int) strlen((const char *)ret); 1098 return(ret); 1099 } else if (remove_head) { 1100 *len -= remove_head; 1101 memmove(src, src + remove_head, 1 + *len); 1102 return(src); 1103 } 1104 return(NULL); 1105 } 1106 1107 /** 1108 * xmlAddDefAttrs: 1109 * @ctxt: an XML parser context 1110 * @fullname: the element fullname 1111 * @fullattr: the attribute fullname 1112 * @value: the attribute value 1113 * 1114 * Add a defaulted attribute for an element 1115 */ 1116 static void 1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1118 const xmlChar *fullname, 1119 const xmlChar *fullattr, 1120 const xmlChar *value) { 1121 xmlDefAttrsPtr defaults; 1122 int len; 1123 const xmlChar *name; 1124 const xmlChar *prefix; 1125 1126 /* 1127 * Allows to detect attribute redefinitions 1128 */ 1129 if (ctxt->attsSpecial != NULL) { 1130 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1131 return; 1132 } 1133 1134 if (ctxt->attsDefault == NULL) { 1135 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1136 if (ctxt->attsDefault == NULL) 1137 goto mem_error; 1138 } 1139 1140 /* 1141 * split the element name into prefix:localname , the string found 1142 * are within the DTD and then not associated to namespace names. 1143 */ 1144 name = xmlSplitQName3(fullname, &len); 1145 if (name == NULL) { 1146 name = xmlDictLookup(ctxt->dict, fullname, -1); 1147 prefix = NULL; 1148 } else { 1149 name = xmlDictLookup(ctxt->dict, name, -1); 1150 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1151 } 1152 1153 /* 1154 * make sure there is some storage 1155 */ 1156 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1157 if (defaults == NULL) { 1158 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1159 (4 * 5) * sizeof(const xmlChar *)); 1160 if (defaults == NULL) 1161 goto mem_error; 1162 defaults->nbAttrs = 0; 1163 defaults->maxAttrs = 4; 1164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1165 defaults, NULL) < 0) { 1166 xmlFree(defaults); 1167 goto mem_error; 1168 } 1169 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1170 xmlDefAttrsPtr temp; 1171 1172 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1173 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1174 if (temp == NULL) 1175 goto mem_error; 1176 defaults = temp; 1177 defaults->maxAttrs *= 2; 1178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1179 defaults, NULL) < 0) { 1180 xmlFree(defaults); 1181 goto mem_error; 1182 } 1183 } 1184 1185 /* 1186 * Split the element name into prefix:localname , the string found 1187 * are within the DTD and hen not associated to namespace names. 1188 */ 1189 name = xmlSplitQName3(fullattr, &len); 1190 if (name == NULL) { 1191 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1192 prefix = NULL; 1193 } else { 1194 name = xmlDictLookup(ctxt->dict, name, -1); 1195 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1196 } 1197 1198 defaults->values[5 * defaults->nbAttrs] = name; 1199 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1200 /* intern the string and precompute the end */ 1201 len = xmlStrlen(value); 1202 value = xmlDictLookup(ctxt->dict, value, len); 1203 defaults->values[5 * defaults->nbAttrs + 2] = value; 1204 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1205 if (ctxt->external) 1206 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1207 else 1208 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1209 defaults->nbAttrs++; 1210 1211 return; 1212 1213 mem_error: 1214 xmlErrMemory(ctxt, NULL); 1215 return; 1216 } 1217 1218 /** 1219 * xmlAddSpecialAttr: 1220 * @ctxt: an XML parser context 1221 * @fullname: the element fullname 1222 * @fullattr: the attribute fullname 1223 * @type: the attribute type 1224 * 1225 * Register this attribute type 1226 */ 1227 static void 1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1229 const xmlChar *fullname, 1230 const xmlChar *fullattr, 1231 int type) 1232 { 1233 if (ctxt->attsSpecial == NULL) { 1234 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1235 if (ctxt->attsSpecial == NULL) 1236 goto mem_error; 1237 } 1238 1239 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1240 return; 1241 1242 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1243 (void *) (long) type); 1244 return; 1245 1246 mem_error: 1247 xmlErrMemory(ctxt, NULL); 1248 return; 1249 } 1250 1251 /** 1252 * xmlCleanSpecialAttrCallback: 1253 * 1254 * Removes CDATA attributes from the special attribute table 1255 */ 1256 static void 1257 xmlCleanSpecialAttrCallback(void *payload, void *data, 1258 const xmlChar *fullname, const xmlChar *fullattr, 1259 const xmlChar *unused ATTRIBUTE_UNUSED) { 1260 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1261 1262 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1263 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1264 } 1265 } 1266 1267 /** 1268 * xmlCleanSpecialAttr: 1269 * @ctxt: an XML parser context 1270 * 1271 * Trim the list of attributes defined to remove all those of type 1272 * CDATA as they are not special. This call should be done when finishing 1273 * to parse the DTD and before starting to parse the document root. 1274 */ 1275 static void 1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1277 { 1278 if (ctxt->attsSpecial == NULL) 1279 return; 1280 1281 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1282 1283 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1284 xmlHashFree(ctxt->attsSpecial, NULL); 1285 ctxt->attsSpecial = NULL; 1286 } 1287 return; 1288 } 1289 1290 /** 1291 * xmlCheckLanguageID: 1292 * @lang: pointer to the string value 1293 * 1294 * Checks that the value conforms to the LanguageID production: 1295 * 1296 * NOTE: this is somewhat deprecated, those productions were removed from 1297 * the XML Second edition. 1298 * 1299 * [33] LanguageID ::= Langcode ('-' Subcode)* 1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1304 * [38] Subcode ::= ([a-z] | [A-Z])+ 1305 * 1306 * Returns 1 if correct 0 otherwise 1307 **/ 1308 int 1309 xmlCheckLanguageID(const xmlChar * lang) 1310 { 1311 const xmlChar *cur = lang; 1312 1313 if (cur == NULL) 1314 return (0); 1315 if (((cur[0] == 'i') && (cur[1] == '-')) || 1316 ((cur[0] == 'I') && (cur[1] == '-'))) { 1317 /* 1318 * IANA code 1319 */ 1320 cur += 2; 1321 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1322 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1323 cur++; 1324 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1325 ((cur[0] == 'X') && (cur[1] == '-'))) { 1326 /* 1327 * User code 1328 */ 1329 cur += 2; 1330 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1331 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1332 cur++; 1333 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1334 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1335 /* 1336 * ISO639 1337 */ 1338 cur++; 1339 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1340 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1341 cur++; 1342 else 1343 return (0); 1344 } else 1345 return (0); 1346 while (cur[0] != 0) { /* non input consuming */ 1347 if (cur[0] != '-') 1348 return (0); 1349 cur++; 1350 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1351 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1352 cur++; 1353 else 1354 return (0); 1355 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1356 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1357 cur++; 1358 } 1359 return (1); 1360 } 1361 1362 /************************************************************************ 1363 * * 1364 * Parser stacks related functions and macros * 1365 * * 1366 ************************************************************************/ 1367 1368 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1369 const xmlChar ** str); 1370 1371 #ifdef SAX2 1372 /** 1373 * nsPush: 1374 * @ctxt: an XML parser context 1375 * @prefix: the namespace prefix or NULL 1376 * @URL: the namespace name 1377 * 1378 * Pushes a new parser namespace on top of the ns stack 1379 * 1380 * Returns -1 in case of error, -2 if the namespace should be discarded 1381 * and the index in the stack otherwise. 1382 */ 1383 static int 1384 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1385 { 1386 if (ctxt->options & XML_PARSE_NSCLEAN) { 1387 int i; 1388 for (i = 0;i < ctxt->nsNr;i += 2) { 1389 if (ctxt->nsTab[i] == prefix) { 1390 /* in scope */ 1391 if (ctxt->nsTab[i + 1] == URL) 1392 return(-2); 1393 /* out of scope keep it */ 1394 break; 1395 } 1396 } 1397 } 1398 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1399 ctxt->nsMax = 10; 1400 ctxt->nsNr = 0; 1401 ctxt->nsTab = (const xmlChar **) 1402 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1403 if (ctxt->nsTab == NULL) { 1404 xmlErrMemory(ctxt, NULL); 1405 ctxt->nsMax = 0; 1406 return (-1); 1407 } 1408 } else if (ctxt->nsNr >= ctxt->nsMax) { 1409 const xmlChar ** tmp; 1410 ctxt->nsMax *= 2; 1411 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1412 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1413 if (tmp == NULL) { 1414 xmlErrMemory(ctxt, NULL); 1415 ctxt->nsMax /= 2; 1416 return (-1); 1417 } 1418 ctxt->nsTab = tmp; 1419 } 1420 ctxt->nsTab[ctxt->nsNr++] = prefix; 1421 ctxt->nsTab[ctxt->nsNr++] = URL; 1422 return (ctxt->nsNr); 1423 } 1424 /** 1425 * nsPop: 1426 * @ctxt: an XML parser context 1427 * @nr: the number to pop 1428 * 1429 * Pops the top @nr parser prefix/namespace from the ns stack 1430 * 1431 * Returns the number of namespaces removed 1432 */ 1433 static int 1434 nsPop(xmlParserCtxtPtr ctxt, int nr) 1435 { 1436 int i; 1437 1438 if (ctxt->nsTab == NULL) return(0); 1439 if (ctxt->nsNr < nr) { 1440 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1441 nr = ctxt->nsNr; 1442 } 1443 if (ctxt->nsNr <= 0) 1444 return (0); 1445 1446 for (i = 0;i < nr;i++) { 1447 ctxt->nsNr--; 1448 ctxt->nsTab[ctxt->nsNr] = NULL; 1449 } 1450 return(nr); 1451 } 1452 #endif 1453 1454 static int 1455 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1456 const xmlChar **atts; 1457 int *attallocs; 1458 int maxatts; 1459 1460 if (ctxt->atts == NULL) { 1461 maxatts = 55; /* allow for 10 attrs by default */ 1462 atts = (const xmlChar **) 1463 xmlMalloc(maxatts * sizeof(xmlChar *)); 1464 if (atts == NULL) goto mem_error; 1465 ctxt->atts = atts; 1466 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1467 if (attallocs == NULL) goto mem_error; 1468 ctxt->attallocs = attallocs; 1469 ctxt->maxatts = maxatts; 1470 } else if (nr + 5 > ctxt->maxatts) { 1471 maxatts = (nr + 5) * 2; 1472 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1473 maxatts * sizeof(const xmlChar *)); 1474 if (atts == NULL) goto mem_error; 1475 ctxt->atts = atts; 1476 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1477 (maxatts / 5) * sizeof(int)); 1478 if (attallocs == NULL) goto mem_error; 1479 ctxt->attallocs = attallocs; 1480 ctxt->maxatts = maxatts; 1481 } 1482 return(ctxt->maxatts); 1483 mem_error: 1484 xmlErrMemory(ctxt, NULL); 1485 return(-1); 1486 } 1487 1488 /** 1489 * inputPush: 1490 * @ctxt: an XML parser context 1491 * @value: the parser input 1492 * 1493 * Pushes a new parser input on top of the input stack 1494 * 1495 * Returns -1 in case of error, the index in the stack otherwise 1496 */ 1497 int 1498 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1499 { 1500 if ((ctxt == NULL) || (value == NULL)) 1501 return(-1); 1502 if (ctxt->inputNr >= ctxt->inputMax) { 1503 ctxt->inputMax *= 2; 1504 ctxt->inputTab = 1505 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1506 ctxt->inputMax * 1507 sizeof(ctxt->inputTab[0])); 1508 if (ctxt->inputTab == NULL) { 1509 xmlErrMemory(ctxt, NULL); 1510 xmlFreeInputStream(value); 1511 ctxt->inputMax /= 2; 1512 value = NULL; 1513 return (-1); 1514 } 1515 } 1516 ctxt->inputTab[ctxt->inputNr] = value; 1517 ctxt->input = value; 1518 return (ctxt->inputNr++); 1519 } 1520 /** 1521 * inputPop: 1522 * @ctxt: an XML parser context 1523 * 1524 * Pops the top parser input from the input stack 1525 * 1526 * Returns the input just removed 1527 */ 1528 xmlParserInputPtr 1529 inputPop(xmlParserCtxtPtr ctxt) 1530 { 1531 xmlParserInputPtr ret; 1532 1533 if (ctxt == NULL) 1534 return(NULL); 1535 if (ctxt->inputNr <= 0) 1536 return (NULL); 1537 ctxt->inputNr--; 1538 if (ctxt->inputNr > 0) 1539 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1540 else 1541 ctxt->input = NULL; 1542 ret = ctxt->inputTab[ctxt->inputNr]; 1543 ctxt->inputTab[ctxt->inputNr] = NULL; 1544 return (ret); 1545 } 1546 /** 1547 * nodePush: 1548 * @ctxt: an XML parser context 1549 * @value: the element node 1550 * 1551 * Pushes a new element node on top of the node stack 1552 * 1553 * Returns -1 in case of error, the index in the stack otherwise 1554 */ 1555 int 1556 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1557 { 1558 if (ctxt == NULL) return(0); 1559 if (ctxt->nodeNr >= ctxt->nodeMax) { 1560 xmlNodePtr *tmp; 1561 1562 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1563 ctxt->nodeMax * 2 * 1564 sizeof(ctxt->nodeTab[0])); 1565 if (tmp == NULL) { 1566 xmlErrMemory(ctxt, NULL); 1567 return (-1); 1568 } 1569 ctxt->nodeTab = tmp; 1570 ctxt->nodeMax *= 2; 1571 } 1572 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1573 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1574 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1575 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1576 xmlParserMaxDepth); 1577 ctxt->instate = XML_PARSER_EOF; 1578 return(-1); 1579 } 1580 ctxt->nodeTab[ctxt->nodeNr] = value; 1581 ctxt->node = value; 1582 return (ctxt->nodeNr++); 1583 } 1584 1585 /** 1586 * nodePop: 1587 * @ctxt: an XML parser context 1588 * 1589 * Pops the top element node from the node stack 1590 * 1591 * Returns the node just removed 1592 */ 1593 xmlNodePtr 1594 nodePop(xmlParserCtxtPtr ctxt) 1595 { 1596 xmlNodePtr ret; 1597 1598 if (ctxt == NULL) return(NULL); 1599 if (ctxt->nodeNr <= 0) 1600 return (NULL); 1601 ctxt->nodeNr--; 1602 if (ctxt->nodeNr > 0) 1603 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1604 else 1605 ctxt->node = NULL; 1606 ret = ctxt->nodeTab[ctxt->nodeNr]; 1607 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1608 return (ret); 1609 } 1610 1611 #ifdef LIBXML_PUSH_ENABLED 1612 /** 1613 * nameNsPush: 1614 * @ctxt: an XML parser context 1615 * @value: the element name 1616 * @prefix: the element prefix 1617 * @URI: the element namespace name 1618 * 1619 * Pushes a new element name/prefix/URL on top of the name stack 1620 * 1621 * Returns -1 in case of error, the index in the stack otherwise 1622 */ 1623 static int 1624 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1625 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1626 { 1627 if (ctxt->nameNr >= ctxt->nameMax) { 1628 const xmlChar * *tmp; 1629 void **tmp2; 1630 ctxt->nameMax *= 2; 1631 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1632 ctxt->nameMax * 1633 sizeof(ctxt->nameTab[0])); 1634 if (tmp == NULL) { 1635 ctxt->nameMax /= 2; 1636 goto mem_error; 1637 } 1638 ctxt->nameTab = tmp; 1639 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1640 ctxt->nameMax * 3 * 1641 sizeof(ctxt->pushTab[0])); 1642 if (tmp2 == NULL) { 1643 ctxt->nameMax /= 2; 1644 goto mem_error; 1645 } 1646 ctxt->pushTab = tmp2; 1647 } 1648 ctxt->nameTab[ctxt->nameNr] = value; 1649 ctxt->name = value; 1650 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1651 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1652 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1653 return (ctxt->nameNr++); 1654 mem_error: 1655 xmlErrMemory(ctxt, NULL); 1656 return (-1); 1657 } 1658 /** 1659 * nameNsPop: 1660 * @ctxt: an XML parser context 1661 * 1662 * Pops the top element/prefix/URI name from the name stack 1663 * 1664 * Returns the name just removed 1665 */ 1666 static const xmlChar * 1667 nameNsPop(xmlParserCtxtPtr ctxt) 1668 { 1669 const xmlChar *ret; 1670 1671 if (ctxt->nameNr <= 0) 1672 return (NULL); 1673 ctxt->nameNr--; 1674 if (ctxt->nameNr > 0) 1675 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1676 else 1677 ctxt->name = NULL; 1678 ret = ctxt->nameTab[ctxt->nameNr]; 1679 ctxt->nameTab[ctxt->nameNr] = NULL; 1680 return (ret); 1681 } 1682 #endif /* LIBXML_PUSH_ENABLED */ 1683 1684 /** 1685 * namePush: 1686 * @ctxt: an XML parser context 1687 * @value: the element name 1688 * 1689 * Pushes a new element name on top of the name stack 1690 * 1691 * Returns -1 in case of error, the index in the stack otherwise 1692 */ 1693 int 1694 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1695 { 1696 if (ctxt == NULL) return (-1); 1697 1698 if (ctxt->nameNr >= ctxt->nameMax) { 1699 const xmlChar * *tmp; 1700 ctxt->nameMax *= 2; 1701 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1702 ctxt->nameMax * 1703 sizeof(ctxt->nameTab[0])); 1704 if (tmp == NULL) { 1705 ctxt->nameMax /= 2; 1706 goto mem_error; 1707 } 1708 ctxt->nameTab = tmp; 1709 } 1710 ctxt->nameTab[ctxt->nameNr] = value; 1711 ctxt->name = value; 1712 return (ctxt->nameNr++); 1713 mem_error: 1714 xmlErrMemory(ctxt, NULL); 1715 return (-1); 1716 } 1717 /** 1718 * namePop: 1719 * @ctxt: an XML parser context 1720 * 1721 * Pops the top element name from the name stack 1722 * 1723 * Returns the name just removed 1724 */ 1725 const xmlChar * 1726 namePop(xmlParserCtxtPtr ctxt) 1727 { 1728 const xmlChar *ret; 1729 1730 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1731 return (NULL); 1732 ctxt->nameNr--; 1733 if (ctxt->nameNr > 0) 1734 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1735 else 1736 ctxt->name = NULL; 1737 ret = ctxt->nameTab[ctxt->nameNr]; 1738 ctxt->nameTab[ctxt->nameNr] = NULL; 1739 return (ret); 1740 } 1741 1742 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1743 if (ctxt->spaceNr >= ctxt->spaceMax) { 1744 int *tmp; 1745 1746 ctxt->spaceMax *= 2; 1747 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1748 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1749 if (tmp == NULL) { 1750 xmlErrMemory(ctxt, NULL); 1751 ctxt->spaceMax /=2; 1752 return(-1); 1753 } 1754 ctxt->spaceTab = tmp; 1755 } 1756 ctxt->spaceTab[ctxt->spaceNr] = val; 1757 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1758 return(ctxt->spaceNr++); 1759 } 1760 1761 static int spacePop(xmlParserCtxtPtr ctxt) { 1762 int ret; 1763 if (ctxt->spaceNr <= 0) return(0); 1764 ctxt->spaceNr--; 1765 if (ctxt->spaceNr > 0) 1766 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1767 else 1768 ctxt->space = &ctxt->spaceTab[0]; 1769 ret = ctxt->spaceTab[ctxt->spaceNr]; 1770 ctxt->spaceTab[ctxt->spaceNr] = -1; 1771 return(ret); 1772 } 1773 1774 /* 1775 * Macros for accessing the content. Those should be used only by the parser, 1776 * and not exported. 1777 * 1778 * Dirty macros, i.e. one often need to make assumption on the context to 1779 * use them 1780 * 1781 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1782 * To be used with extreme caution since operations consuming 1783 * characters may move the input buffer to a different location ! 1784 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1785 * This should be used internally by the parser 1786 * only to compare to ASCII values otherwise it would break when 1787 * running with UTF-8 encoding. 1788 * RAW same as CUR but in the input buffer, bypass any token 1789 * extraction that may have been done 1790 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1791 * to compare on ASCII based substring. 1792 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1793 * strings without newlines within the parser. 1794 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1795 * defined char within the parser. 1796 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1797 * 1798 * NEXT Skip to the next character, this does the proper decoding 1799 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1800 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1801 * CUR_CHAR(l) returns the current unicode character (int), set l 1802 * to the number of xmlChars used for the encoding [0-5]. 1803 * CUR_SCHAR same but operate on a string instead of the context 1804 * COPY_BUF copy the current unicode char to the target buffer, increment 1805 * the index 1806 * GROW, SHRINK handling of input buffers 1807 */ 1808 1809 #define RAW (*ctxt->input->cur) 1810 #define CUR (*ctxt->input->cur) 1811 #define NXT(val) ctxt->input->cur[(val)] 1812 #define CUR_PTR ctxt->input->cur 1813 1814 #define CMP4( s, c1, c2, c3, c4 ) \ 1815 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1816 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1817 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1818 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1819 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1820 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1821 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1822 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1823 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1824 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1825 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1826 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1827 ((unsigned char *) s)[ 8 ] == c9 ) 1828 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1829 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1830 ((unsigned char *) s)[ 9 ] == c10 ) 1831 1832 #define SKIP(val) do { \ 1833 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1834 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1835 if ((*ctxt->input->cur == 0) && \ 1836 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1837 xmlPopInput(ctxt); \ 1838 } while (0) 1839 1840 #define SKIPL(val) do { \ 1841 int skipl; \ 1842 for(skipl=0; skipl<val; skipl++) { \ 1843 if (*(ctxt->input->cur) == '\n') { \ 1844 ctxt->input->line++; ctxt->input->col = 1; \ 1845 } else ctxt->input->col++; \ 1846 ctxt->nbChars++; \ 1847 ctxt->input->cur++; \ 1848 } \ 1849 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1850 if ((*ctxt->input->cur == 0) && \ 1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1852 xmlPopInput(ctxt); \ 1853 } while (0) 1854 1855 #define SHRINK if ((ctxt->progressive == 0) && \ 1856 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1857 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1858 xmlSHRINK (ctxt); 1859 1860 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1861 xmlParserInputShrink(ctxt->input); 1862 if ((*ctxt->input->cur == 0) && 1863 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1864 xmlPopInput(ctxt); 1865 } 1866 1867 #define GROW if ((ctxt->progressive == 0) && \ 1868 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1869 xmlGROW (ctxt); 1870 1871 static void xmlGROW (xmlParserCtxtPtr ctxt) { 1872 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1873 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 1874 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1875 xmlPopInput(ctxt); 1876 } 1877 1878 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1879 1880 #define NEXT xmlNextChar(ctxt) 1881 1882 #define NEXT1 { \ 1883 ctxt->input->col++; \ 1884 ctxt->input->cur++; \ 1885 ctxt->nbChars++; \ 1886 if (*ctxt->input->cur == 0) \ 1887 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1888 } 1889 1890 #define NEXTL(l) do { \ 1891 if (*(ctxt->input->cur) == '\n') { \ 1892 ctxt->input->line++; ctxt->input->col = 1; \ 1893 } else ctxt->input->col++; \ 1894 ctxt->input->cur += l; \ 1895 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1896 } while (0) 1897 1898 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1899 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1900 1901 #define COPY_BUF(l,b,i,v) \ 1902 if (l == 1) b[i++] = (xmlChar) v; \ 1903 else i += xmlCopyCharMultiByte(&b[i],v) 1904 1905 /** 1906 * xmlSkipBlankChars: 1907 * @ctxt: the XML parser context 1908 * 1909 * skip all blanks character found at that point in the input streams. 1910 * It pops up finished entities in the process if allowable at that point. 1911 * 1912 * Returns the number of space chars skipped 1913 */ 1914 1915 int 1916 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1917 int res = 0; 1918 1919 /* 1920 * It's Okay to use CUR/NEXT here since all the blanks are on 1921 * the ASCII range. 1922 */ 1923 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1924 const xmlChar *cur; 1925 /* 1926 * if we are in the document content, go really fast 1927 */ 1928 cur = ctxt->input->cur; 1929 while (IS_BLANK_CH(*cur)) { 1930 if (*cur == '\n') { 1931 ctxt->input->line++; ctxt->input->col = 1; 1932 } 1933 cur++; 1934 res++; 1935 if (*cur == 0) { 1936 ctxt->input->cur = cur; 1937 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1938 cur = ctxt->input->cur; 1939 } 1940 } 1941 ctxt->input->cur = cur; 1942 } else { 1943 int cur; 1944 do { 1945 cur = CUR; 1946 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1947 NEXT; 1948 cur = CUR; 1949 res++; 1950 } 1951 while ((cur == 0) && (ctxt->inputNr > 1) && 1952 (ctxt->instate != XML_PARSER_COMMENT)) { 1953 xmlPopInput(ctxt); 1954 cur = CUR; 1955 } 1956 /* 1957 * Need to handle support of entities branching here 1958 */ 1959 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1960 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1961 } 1962 return(res); 1963 } 1964 1965 /************************************************************************ 1966 * * 1967 * Commodity functions to handle entities * 1968 * * 1969 ************************************************************************/ 1970 1971 /** 1972 * xmlPopInput: 1973 * @ctxt: an XML parser context 1974 * 1975 * xmlPopInput: the current input pointed by ctxt->input came to an end 1976 * pop it and return the next char. 1977 * 1978 * Returns the current xmlChar in the parser context 1979 */ 1980 xmlChar 1981 xmlPopInput(xmlParserCtxtPtr ctxt) { 1982 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1983 if (xmlParserDebugEntities) 1984 xmlGenericError(xmlGenericErrorContext, 1985 "Popping input %d\n", ctxt->inputNr); 1986 xmlFreeInputStream(inputPop(ctxt)); 1987 if ((*ctxt->input->cur == 0) && 1988 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1989 return(xmlPopInput(ctxt)); 1990 return(CUR); 1991 } 1992 1993 /** 1994 * xmlPushInput: 1995 * @ctxt: an XML parser context 1996 * @input: an XML parser input fragment (entity, XML fragment ...). 1997 * 1998 * xmlPushInput: switch to a new input stream which is stacked on top 1999 * of the previous one(s). 2000 * Returns -1 in case of error or the index in the input stack 2001 */ 2002 int 2003 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2004 int ret; 2005 if (input == NULL) return(-1); 2006 2007 if (xmlParserDebugEntities) { 2008 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2009 xmlGenericError(xmlGenericErrorContext, 2010 "%s(%d): ", ctxt->input->filename, 2011 ctxt->input->line); 2012 xmlGenericError(xmlGenericErrorContext, 2013 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2014 } 2015 ret = inputPush(ctxt, input); 2016 if (ctxt->instate == XML_PARSER_EOF) 2017 return(-1); 2018 GROW; 2019 return(ret); 2020 } 2021 2022 /** 2023 * xmlParseCharRef: 2024 * @ctxt: an XML parser context 2025 * 2026 * parse Reference declarations 2027 * 2028 * [66] CharRef ::= '&#' [0-9]+ ';' | 2029 * '&#x' [0-9a-fA-F]+ ';' 2030 * 2031 * [ WFC: Legal Character ] 2032 * Characters referred to using character references must match the 2033 * production for Char. 2034 * 2035 * Returns the value parsed (as an int), 0 in case of error 2036 */ 2037 int 2038 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2039 unsigned int val = 0; 2040 int count = 0; 2041 unsigned int outofrange = 0; 2042 2043 /* 2044 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2045 */ 2046 if ((RAW == '&') && (NXT(1) == '#') && 2047 (NXT(2) == 'x')) { 2048 SKIP(3); 2049 GROW; 2050 while (RAW != ';') { /* loop blocked by count */ 2051 if (count++ > 20) { 2052 count = 0; 2053 GROW; 2054 if (ctxt->instate == XML_PARSER_EOF) 2055 return(0); 2056 } 2057 if ((RAW >= '0') && (RAW <= '9')) 2058 val = val * 16 + (CUR - '0'); 2059 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2060 val = val * 16 + (CUR - 'a') + 10; 2061 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2062 val = val * 16 + (CUR - 'A') + 10; 2063 else { 2064 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2065 val = 0; 2066 break; 2067 } 2068 if (val > 0x10FFFF) 2069 outofrange = val; 2070 2071 NEXT; 2072 count++; 2073 } 2074 if (RAW == ';') { 2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2076 ctxt->input->col++; 2077 ctxt->nbChars ++; 2078 ctxt->input->cur++; 2079 } 2080 } else if ((RAW == '&') && (NXT(1) == '#')) { 2081 SKIP(2); 2082 GROW; 2083 while (RAW != ';') { /* loop blocked by count */ 2084 if (count++ > 20) { 2085 count = 0; 2086 GROW; 2087 if (ctxt->instate == XML_PARSER_EOF) 2088 return(0); 2089 } 2090 if ((RAW >= '0') && (RAW <= '9')) 2091 val = val * 10 + (CUR - '0'); 2092 else { 2093 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2094 val = 0; 2095 break; 2096 } 2097 if (val > 0x10FFFF) 2098 outofrange = val; 2099 2100 NEXT; 2101 count++; 2102 } 2103 if (RAW == ';') { 2104 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2105 ctxt->input->col++; 2106 ctxt->nbChars ++; 2107 ctxt->input->cur++; 2108 } 2109 } else { 2110 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2111 } 2112 2113 /* 2114 * [ WFC: Legal Character ] 2115 * Characters referred to using character references must match the 2116 * production for Char. 2117 */ 2118 if ((IS_CHAR(val) && (outofrange == 0))) { 2119 return(val); 2120 } else { 2121 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2122 "xmlParseCharRef: invalid xmlChar value %d\n", 2123 val); 2124 } 2125 return(0); 2126 } 2127 2128 /** 2129 * xmlParseStringCharRef: 2130 * @ctxt: an XML parser context 2131 * @str: a pointer to an index in the string 2132 * 2133 * parse Reference declarations, variant parsing from a string rather 2134 * than an an input flow. 2135 * 2136 * [66] CharRef ::= '&#' [0-9]+ ';' | 2137 * '&#x' [0-9a-fA-F]+ ';' 2138 * 2139 * [ WFC: Legal Character ] 2140 * Characters referred to using character references must match the 2141 * production for Char. 2142 * 2143 * Returns the value parsed (as an int), 0 in case of error, str will be 2144 * updated to the current value of the index 2145 */ 2146 static int 2147 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2148 const xmlChar *ptr; 2149 xmlChar cur; 2150 unsigned int val = 0; 2151 unsigned int outofrange = 0; 2152 2153 if ((str == NULL) || (*str == NULL)) return(0); 2154 ptr = *str; 2155 cur = *ptr; 2156 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2157 ptr += 3; 2158 cur = *ptr; 2159 while (cur != ';') { /* Non input consuming loop */ 2160 if ((cur >= '0') && (cur <= '9')) 2161 val = val * 16 + (cur - '0'); 2162 else if ((cur >= 'a') && (cur <= 'f')) 2163 val = val * 16 + (cur - 'a') + 10; 2164 else if ((cur >= 'A') && (cur <= 'F')) 2165 val = val * 16 + (cur - 'A') + 10; 2166 else { 2167 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2168 val = 0; 2169 break; 2170 } 2171 if (val > 0x10FFFF) 2172 outofrange = val; 2173 2174 ptr++; 2175 cur = *ptr; 2176 } 2177 if (cur == ';') 2178 ptr++; 2179 } else if ((cur == '&') && (ptr[1] == '#')){ 2180 ptr += 2; 2181 cur = *ptr; 2182 while (cur != ';') { /* Non input consuming loops */ 2183 if ((cur >= '0') && (cur <= '9')) 2184 val = val * 10 + (cur - '0'); 2185 else { 2186 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2187 val = 0; 2188 break; 2189 } 2190 if (val > 0x10FFFF) 2191 outofrange = val; 2192 2193 ptr++; 2194 cur = *ptr; 2195 } 2196 if (cur == ';') 2197 ptr++; 2198 } else { 2199 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2200 return(0); 2201 } 2202 *str = ptr; 2203 2204 /* 2205 * [ WFC: Legal Character ] 2206 * Characters referred to using character references must match the 2207 * production for Char. 2208 */ 2209 if ((IS_CHAR(val) && (outofrange == 0))) { 2210 return(val); 2211 } else { 2212 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2213 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2214 val); 2215 } 2216 return(0); 2217 } 2218 2219 /** 2220 * xmlNewBlanksWrapperInputStream: 2221 * @ctxt: an XML parser context 2222 * @entity: an Entity pointer 2223 * 2224 * Create a new input stream for wrapping 2225 * blanks around a PEReference 2226 * 2227 * Returns the new input stream or NULL 2228 */ 2229 2230 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2231 2232 static xmlParserInputPtr 2233 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2234 xmlParserInputPtr input; 2235 xmlChar *buffer; 2236 size_t length; 2237 if (entity == NULL) { 2238 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2239 "xmlNewBlanksWrapperInputStream entity\n"); 2240 return(NULL); 2241 } 2242 if (xmlParserDebugEntities) 2243 xmlGenericError(xmlGenericErrorContext, 2244 "new blanks wrapper for entity: %s\n", entity->name); 2245 input = xmlNewInputStream(ctxt); 2246 if (input == NULL) { 2247 return(NULL); 2248 } 2249 length = xmlStrlen(entity->name) + 5; 2250 buffer = xmlMallocAtomic(length); 2251 if (buffer == NULL) { 2252 xmlErrMemory(ctxt, NULL); 2253 xmlFree(input); 2254 return(NULL); 2255 } 2256 buffer [0] = ' '; 2257 buffer [1] = '%'; 2258 buffer [length-3] = ';'; 2259 buffer [length-2] = ' '; 2260 buffer [length-1] = 0; 2261 memcpy(buffer + 2, entity->name, length - 5); 2262 input->free = deallocblankswrapper; 2263 input->base = buffer; 2264 input->cur = buffer; 2265 input->length = length; 2266 input->end = &buffer[length]; 2267 return(input); 2268 } 2269 2270 /** 2271 * xmlParserHandlePEReference: 2272 * @ctxt: the parser context 2273 * 2274 * [69] PEReference ::= '%' Name ';' 2275 * 2276 * [ WFC: No Recursion ] 2277 * A parsed entity must not contain a recursive 2278 * reference to itself, either directly or indirectly. 2279 * 2280 * [ WFC: Entity Declared ] 2281 * In a document without any DTD, a document with only an internal DTD 2282 * subset which contains no parameter entity references, or a document 2283 * with "standalone='yes'", ... ... The declaration of a parameter 2284 * entity must precede any reference to it... 2285 * 2286 * [ VC: Entity Declared ] 2287 * In a document with an external subset or external parameter entities 2288 * with "standalone='no'", ... ... The declaration of a parameter entity 2289 * must precede any reference to it... 2290 * 2291 * [ WFC: In DTD ] 2292 * Parameter-entity references may only appear in the DTD. 2293 * NOTE: misleading but this is handled. 2294 * 2295 * A PEReference may have been detected in the current input stream 2296 * the handling is done accordingly to 2297 * http://www.w3.org/TR/REC-xml#entproc 2298 * i.e. 2299 * - Included in literal in entity values 2300 * - Included as Parameter Entity reference within DTDs 2301 */ 2302 void 2303 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2304 const xmlChar *name; 2305 xmlEntityPtr entity = NULL; 2306 xmlParserInputPtr input; 2307 2308 if (RAW != '%') return; 2309 switch(ctxt->instate) { 2310 case XML_PARSER_CDATA_SECTION: 2311 return; 2312 case XML_PARSER_COMMENT: 2313 return; 2314 case XML_PARSER_START_TAG: 2315 return; 2316 case XML_PARSER_END_TAG: 2317 return; 2318 case XML_PARSER_EOF: 2319 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2320 return; 2321 case XML_PARSER_PROLOG: 2322 case XML_PARSER_START: 2323 case XML_PARSER_MISC: 2324 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2325 return; 2326 case XML_PARSER_ENTITY_DECL: 2327 case XML_PARSER_CONTENT: 2328 case XML_PARSER_ATTRIBUTE_VALUE: 2329 case XML_PARSER_PI: 2330 case XML_PARSER_SYSTEM_LITERAL: 2331 case XML_PARSER_PUBLIC_LITERAL: 2332 /* we just ignore it there */ 2333 return; 2334 case XML_PARSER_EPILOG: 2335 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2336 return; 2337 case XML_PARSER_ENTITY_VALUE: 2338 /* 2339 * NOTE: in the case of entity values, we don't do the 2340 * substitution here since we need the literal 2341 * entity value to be able to save the internal 2342 * subset of the document. 2343 * This will be handled by xmlStringDecodeEntities 2344 */ 2345 return; 2346 case XML_PARSER_DTD: 2347 /* 2348 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2349 * In the internal DTD subset, parameter-entity references 2350 * can occur only where markup declarations can occur, not 2351 * within markup declarations. 2352 * In that case this is handled in xmlParseMarkupDecl 2353 */ 2354 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2355 return; 2356 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2357 return; 2358 break; 2359 case XML_PARSER_IGNORE: 2360 return; 2361 } 2362 2363 NEXT; 2364 name = xmlParseName(ctxt); 2365 if (xmlParserDebugEntities) 2366 xmlGenericError(xmlGenericErrorContext, 2367 "PEReference: %s\n", name); 2368 if (name == NULL) { 2369 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2370 } else { 2371 if (RAW == ';') { 2372 NEXT; 2373 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2374 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2375 if (ctxt->instate == XML_PARSER_EOF) 2376 return; 2377 if (entity == NULL) { 2378 2379 /* 2380 * [ WFC: Entity Declared ] 2381 * In a document without any DTD, a document with only an 2382 * internal DTD subset which contains no parameter entity 2383 * references, or a document with "standalone='yes'", ... 2384 * ... The declaration of a parameter entity must precede 2385 * any reference to it... 2386 */ 2387 if ((ctxt->standalone == 1) || 2388 ((ctxt->hasExternalSubset == 0) && 2389 (ctxt->hasPErefs == 0))) { 2390 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2391 "PEReference: %%%s; not found\n", name); 2392 } else { 2393 /* 2394 * [ VC: Entity Declared ] 2395 * In a document with an external subset or external 2396 * parameter entities with "standalone='no'", ... 2397 * ... The declaration of a parameter entity must precede 2398 * any reference to it... 2399 */ 2400 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2401 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2402 "PEReference: %%%s; not found\n", 2403 name, NULL); 2404 } else 2405 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2406 "PEReference: %%%s; not found\n", 2407 name, NULL); 2408 ctxt->valid = 0; 2409 } 2410 } else if (ctxt->input->free != deallocblankswrapper) { 2411 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2412 if (xmlPushInput(ctxt, input) < 0) 2413 return; 2414 } else { 2415 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2416 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2417 xmlChar start[4]; 2418 xmlCharEncoding enc; 2419 2420 /* 2421 * handle the extra spaces added before and after 2422 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2423 * this is done independently. 2424 */ 2425 input = xmlNewEntityInputStream(ctxt, entity); 2426 if (xmlPushInput(ctxt, input) < 0) 2427 return; 2428 2429 /* 2430 * Get the 4 first bytes and decode the charset 2431 * if enc != XML_CHAR_ENCODING_NONE 2432 * plug some encoding conversion routines. 2433 * Note that, since we may have some non-UTF8 2434 * encoding (like UTF16, bug 135229), the 'length' 2435 * is not known, but we can calculate based upon 2436 * the amount of data in the buffer. 2437 */ 2438 GROW 2439 if (ctxt->instate == XML_PARSER_EOF) 2440 return; 2441 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2442 start[0] = RAW; 2443 start[1] = NXT(1); 2444 start[2] = NXT(2); 2445 start[3] = NXT(3); 2446 enc = xmlDetectCharEncoding(start, 4); 2447 if (enc != XML_CHAR_ENCODING_NONE) { 2448 xmlSwitchEncoding(ctxt, enc); 2449 } 2450 } 2451 2452 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2453 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2454 (IS_BLANK_CH(NXT(5)))) { 2455 xmlParseTextDecl(ctxt); 2456 } 2457 } else { 2458 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2459 "PEReference: %s is not a parameter entity\n", 2460 name); 2461 } 2462 } 2463 } else { 2464 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2465 } 2466 } 2467 } 2468 2469 /* 2470 * Macro used to grow the current buffer. 2471 */ 2472 #define growBuffer(buffer, n) { \ 2473 xmlChar *tmp; \ 2474 buffer##_size *= 2; \ 2475 buffer##_size += n; \ 2476 tmp = (xmlChar *) \ 2477 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2478 if (tmp == NULL) goto mem_error; \ 2479 buffer = tmp; \ 2480 } 2481 2482 /** 2483 * xmlStringLenDecodeEntities: 2484 * @ctxt: the parser context 2485 * @str: the input string 2486 * @len: the string length 2487 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2488 * @end: an end marker xmlChar, 0 if none 2489 * @end2: an end marker xmlChar, 0 if none 2490 * @end3: an end marker xmlChar, 0 if none 2491 * 2492 * Takes a entity string content and process to do the adequate substitutions. 2493 * 2494 * [67] Reference ::= EntityRef | CharRef 2495 * 2496 * [69] PEReference ::= '%' Name ';' 2497 * 2498 * Returns A newly allocated string with the substitution done. The caller 2499 * must deallocate it ! 2500 */ 2501 xmlChar * 2502 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2503 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2504 xmlChar *buffer = NULL; 2505 int buffer_size = 0; 2506 2507 xmlChar *current = NULL; 2508 xmlChar *rep = NULL; 2509 const xmlChar *last; 2510 xmlEntityPtr ent; 2511 int c,l; 2512 int nbchars = 0; 2513 2514 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2515 return(NULL); 2516 last = str + len; 2517 2518 if (((ctxt->depth > 40) && 2519 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2520 (ctxt->depth > 1024)) { 2521 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2522 return(NULL); 2523 } 2524 2525 /* 2526 * allocate a translation buffer. 2527 */ 2528 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2529 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2530 if (buffer == NULL) goto mem_error; 2531 2532 /* 2533 * OK loop until we reach one of the ending char or a size limit. 2534 * we are operating on already parsed values. 2535 */ 2536 if (str < last) 2537 c = CUR_SCHAR(str, l); 2538 else 2539 c = 0; 2540 while ((c != 0) && (c != end) && /* non input consuming loop */ 2541 (c != end2) && (c != end3)) { 2542 2543 if (c == 0) break; 2544 if ((c == '&') && (str[1] == '#')) { 2545 int val = xmlParseStringCharRef(ctxt, &str); 2546 if (val != 0) { 2547 COPY_BUF(0,buffer,nbchars,val); 2548 } 2549 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2550 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2551 } 2552 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2553 if (xmlParserDebugEntities) 2554 xmlGenericError(xmlGenericErrorContext, 2555 "String decoding Entity Reference: %.30s\n", 2556 str); 2557 ent = xmlParseStringEntityRef(ctxt, &str); 2558 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2559 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2560 goto int_error; 2561 if (ent != NULL) 2562 ctxt->nbentities += ent->checked; 2563 if ((ent != NULL) && 2564 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2565 if (ent->content != NULL) { 2566 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2567 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2568 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2569 } 2570 } else { 2571 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2572 "predefined entity has no content\n"); 2573 } 2574 } else if ((ent != NULL) && (ent->content != NULL)) { 2575 ctxt->depth++; 2576 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2577 0, 0, 0); 2578 ctxt->depth--; 2579 2580 if (rep != NULL) { 2581 current = rep; 2582 while (*current != 0) { /* non input consuming loop */ 2583 buffer[nbchars++] = *current++; 2584 if (nbchars > 2585 buffer_size - XML_PARSER_BUFFER_SIZE) { 2586 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2587 goto int_error; 2588 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2589 } 2590 } 2591 xmlFree(rep); 2592 rep = NULL; 2593 } 2594 } else if (ent != NULL) { 2595 int i = xmlStrlen(ent->name); 2596 const xmlChar *cur = ent->name; 2597 2598 buffer[nbchars++] = '&'; 2599 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2600 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2601 } 2602 for (;i > 0;i--) 2603 buffer[nbchars++] = *cur++; 2604 buffer[nbchars++] = ';'; 2605 } 2606 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2607 if (xmlParserDebugEntities) 2608 xmlGenericError(xmlGenericErrorContext, 2609 "String decoding PE Reference: %.30s\n", str); 2610 ent = xmlParseStringPEReference(ctxt, &str); 2611 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2612 goto int_error; 2613 if (ent != NULL) 2614 ctxt->nbentities += ent->checked; 2615 if (ent != NULL) { 2616 if (ent->content == NULL) { 2617 xmlLoadEntityContent(ctxt, ent); 2618 } 2619 ctxt->depth++; 2620 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2621 0, 0, 0); 2622 ctxt->depth--; 2623 if (rep != NULL) { 2624 current = rep; 2625 while (*current != 0) { /* non input consuming loop */ 2626 buffer[nbchars++] = *current++; 2627 if (nbchars > 2628 buffer_size - XML_PARSER_BUFFER_SIZE) { 2629 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2630 goto int_error; 2631 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2632 } 2633 } 2634 xmlFree(rep); 2635 rep = NULL; 2636 } 2637 } 2638 } else { 2639 COPY_BUF(l,buffer,nbchars,c); 2640 str += l; 2641 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2642 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2643 } 2644 } 2645 if (str < last) 2646 c = CUR_SCHAR(str, l); 2647 else 2648 c = 0; 2649 } 2650 buffer[nbchars] = 0; 2651 return(buffer); 2652 2653 mem_error: 2654 xmlErrMemory(ctxt, NULL); 2655 int_error: 2656 if (rep != NULL) 2657 xmlFree(rep); 2658 if (buffer != NULL) 2659 xmlFree(buffer); 2660 return(NULL); 2661 } 2662 2663 /** 2664 * xmlStringDecodeEntities: 2665 * @ctxt: the parser context 2666 * @str: the input string 2667 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2668 * @end: an end marker xmlChar, 0 if none 2669 * @end2: an end marker xmlChar, 0 if none 2670 * @end3: an end marker xmlChar, 0 if none 2671 * 2672 * Takes a entity string content and process to do the adequate substitutions. 2673 * 2674 * [67] Reference ::= EntityRef | CharRef 2675 * 2676 * [69] PEReference ::= '%' Name ';' 2677 * 2678 * Returns A newly allocated string with the substitution done. The caller 2679 * must deallocate it ! 2680 */ 2681 xmlChar * 2682 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2683 xmlChar end, xmlChar end2, xmlChar end3) { 2684 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2685 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2686 end, end2, end3)); 2687 } 2688 2689 /************************************************************************ 2690 * * 2691 * Commodity functions, cleanup needed ? * 2692 * * 2693 ************************************************************************/ 2694 2695 /** 2696 * areBlanks: 2697 * @ctxt: an XML parser context 2698 * @str: a xmlChar * 2699 * @len: the size of @str 2700 * @blank_chars: we know the chars are blanks 2701 * 2702 * Is this a sequence of blank chars that one can ignore ? 2703 * 2704 * Returns 1 if ignorable 0 otherwise. 2705 */ 2706 2707 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2708 int blank_chars) { 2709 int i, ret; 2710 xmlNodePtr lastChild; 2711 2712 /* 2713 * Don't spend time trying to differentiate them, the same callback is 2714 * used ! 2715 */ 2716 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2717 return(0); 2718 2719 /* 2720 * Check for xml:space value. 2721 */ 2722 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2723 (*(ctxt->space) == -2)) 2724 return(0); 2725 2726 /* 2727 * Check that the string is made of blanks 2728 */ 2729 if (blank_chars == 0) { 2730 for (i = 0;i < len;i++) 2731 if (!(IS_BLANK_CH(str[i]))) return(0); 2732 } 2733 2734 /* 2735 * Look if the element is mixed content in the DTD if available 2736 */ 2737 if (ctxt->node == NULL) return(0); 2738 if (ctxt->myDoc != NULL) { 2739 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2740 if (ret == 0) return(1); 2741 if (ret == 1) return(0); 2742 } 2743 2744 /* 2745 * Otherwise, heuristic :-\ 2746 */ 2747 if ((RAW != '<') && (RAW != 0xD)) return(0); 2748 if ((ctxt->node->children == NULL) && 2749 (RAW == '<') && (NXT(1) == '/')) return(0); 2750 2751 lastChild = xmlGetLastChild(ctxt->node); 2752 if (lastChild == NULL) { 2753 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2754 (ctxt->node->content != NULL)) return(0); 2755 } else if (xmlNodeIsText(lastChild)) 2756 return(0); 2757 else if ((ctxt->node->children != NULL) && 2758 (xmlNodeIsText(ctxt->node->children))) 2759 return(0); 2760 return(1); 2761 } 2762 2763 /************************************************************************ 2764 * * 2765 * Extra stuff for namespace support * 2766 * Relates to http://www.w3.org/TR/WD-xml-names * 2767 * * 2768 ************************************************************************/ 2769 2770 /** 2771 * xmlSplitQName: 2772 * @ctxt: an XML parser context 2773 * @name: an XML parser context 2774 * @prefix: a xmlChar ** 2775 * 2776 * parse an UTF8 encoded XML qualified name string 2777 * 2778 * [NS 5] QName ::= (Prefix ':')? LocalPart 2779 * 2780 * [NS 6] Prefix ::= NCName 2781 * 2782 * [NS 7] LocalPart ::= NCName 2783 * 2784 * Returns the local part, and prefix is updated 2785 * to get the Prefix if any. 2786 */ 2787 2788 xmlChar * 2789 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2790 xmlChar buf[XML_MAX_NAMELEN + 5]; 2791 xmlChar *buffer = NULL; 2792 int len = 0; 2793 int max = XML_MAX_NAMELEN; 2794 xmlChar *ret = NULL; 2795 const xmlChar *cur = name; 2796 int c; 2797 2798 if (prefix == NULL) return(NULL); 2799 *prefix = NULL; 2800 2801 if (cur == NULL) return(NULL); 2802 2803 #ifndef XML_XML_NAMESPACE 2804 /* xml: prefix is not really a namespace */ 2805 if ((cur[0] == 'x') && (cur[1] == 'm') && 2806 (cur[2] == 'l') && (cur[3] == ':')) 2807 return(xmlStrdup(name)); 2808 #endif 2809 2810 /* nasty but well=formed */ 2811 if (cur[0] == ':') 2812 return(xmlStrdup(name)); 2813 2814 c = *cur++; 2815 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2816 buf[len++] = c; 2817 c = *cur++; 2818 } 2819 if (len >= max) { 2820 /* 2821 * Okay someone managed to make a huge name, so he's ready to pay 2822 * for the processing speed. 2823 */ 2824 max = len * 2; 2825 2826 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2827 if (buffer == NULL) { 2828 xmlErrMemory(ctxt, NULL); 2829 return(NULL); 2830 } 2831 memcpy(buffer, buf, len); 2832 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2833 if (len + 10 > max) { 2834 xmlChar *tmp; 2835 2836 max *= 2; 2837 tmp = (xmlChar *) xmlRealloc(buffer, 2838 max * sizeof(xmlChar)); 2839 if (tmp == NULL) { 2840 xmlFree(buffer); 2841 xmlErrMemory(ctxt, NULL); 2842 return(NULL); 2843 } 2844 buffer = tmp; 2845 } 2846 buffer[len++] = c; 2847 c = *cur++; 2848 } 2849 buffer[len] = 0; 2850 } 2851 2852 if ((c == ':') && (*cur == 0)) { 2853 if (buffer != NULL) 2854 xmlFree(buffer); 2855 *prefix = NULL; 2856 return(xmlStrdup(name)); 2857 } 2858 2859 if (buffer == NULL) 2860 ret = xmlStrndup(buf, len); 2861 else { 2862 ret = buffer; 2863 buffer = NULL; 2864 max = XML_MAX_NAMELEN; 2865 } 2866 2867 2868 if (c == ':') { 2869 c = *cur; 2870 *prefix = ret; 2871 if (c == 0) { 2872 return(xmlStrndup(BAD_CAST "", 0)); 2873 } 2874 len = 0; 2875 2876 /* 2877 * Check that the first character is proper to start 2878 * a new name 2879 */ 2880 if (!(((c >= 0x61) && (c <= 0x7A)) || 2881 ((c >= 0x41) && (c <= 0x5A)) || 2882 (c == '_') || (c == ':'))) { 2883 int l; 2884 int first = CUR_SCHAR(cur, l); 2885 2886 if (!IS_LETTER(first) && (first != '_')) { 2887 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2888 "Name %s is not XML Namespace compliant\n", 2889 name); 2890 } 2891 } 2892 cur++; 2893 2894 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2895 buf[len++] = c; 2896 c = *cur++; 2897 } 2898 if (len >= max) { 2899 /* 2900 * Okay someone managed to make a huge name, so he's ready to pay 2901 * for the processing speed. 2902 */ 2903 max = len * 2; 2904 2905 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2906 if (buffer == NULL) { 2907 xmlErrMemory(ctxt, NULL); 2908 return(NULL); 2909 } 2910 memcpy(buffer, buf, len); 2911 while (c != 0) { /* tested bigname2.xml */ 2912 if (len + 10 > max) { 2913 xmlChar *tmp; 2914 2915 max *= 2; 2916 tmp = (xmlChar *) xmlRealloc(buffer, 2917 max * sizeof(xmlChar)); 2918 if (tmp == NULL) { 2919 xmlErrMemory(ctxt, NULL); 2920 xmlFree(buffer); 2921 return(NULL); 2922 } 2923 buffer = tmp; 2924 } 2925 buffer[len++] = c; 2926 c = *cur++; 2927 } 2928 buffer[len] = 0; 2929 } 2930 2931 if (buffer == NULL) 2932 ret = xmlStrndup(buf, len); 2933 else { 2934 ret = buffer; 2935 } 2936 } 2937 2938 return(ret); 2939 } 2940 2941 /************************************************************************ 2942 * * 2943 * The parser itself * 2944 * Relates to http://www.w3.org/TR/REC-xml * 2945 * * 2946 ************************************************************************/ 2947 2948 /************************************************************************ 2949 * * 2950 * Routines to parse Name, NCName and NmToken * 2951 * * 2952 ************************************************************************/ 2953 #ifdef DEBUG 2954 static unsigned long nbParseName = 0; 2955 static unsigned long nbParseNmToken = 0; 2956 static unsigned long nbParseNCName = 0; 2957 static unsigned long nbParseNCNameComplex = 0; 2958 static unsigned long nbParseNameComplex = 0; 2959 static unsigned long nbParseStringName = 0; 2960 #endif 2961 2962 /* 2963 * The two following functions are related to the change of accepted 2964 * characters for Name and NmToken in the Revision 5 of XML-1.0 2965 * They correspond to the modified production [4] and the new production [4a] 2966 * changes in that revision. Also note that the macros used for the 2967 * productions Letter, Digit, CombiningChar and Extender are not needed 2968 * anymore. 2969 * We still keep compatibility to pre-revision5 parsing semantic if the 2970 * new XML_PARSE_OLD10 option is given to the parser. 2971 */ 2972 static int 2973 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 2975 /* 2976 * Use the new checks of production [4] [4a] amd [5] of the 2977 * Update 5 of XML-1.0 2978 */ 2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 2980 (((c >= 'a') && (c <= 'z')) || 2981 ((c >= 'A') && (c <= 'Z')) || 2982 (c == '_') || (c == ':') || 2983 ((c >= 0xC0) && (c <= 0xD6)) || 2984 ((c >= 0xD8) && (c <= 0xF6)) || 2985 ((c >= 0xF8) && (c <= 0x2FF)) || 2986 ((c >= 0x370) && (c <= 0x37D)) || 2987 ((c >= 0x37F) && (c <= 0x1FFF)) || 2988 ((c >= 0x200C) && (c <= 0x200D)) || 2989 ((c >= 0x2070) && (c <= 0x218F)) || 2990 ((c >= 0x2C00) && (c <= 0x2FEF)) || 2991 ((c >= 0x3001) && (c <= 0xD7FF)) || 2992 ((c >= 0xF900) && (c <= 0xFDCF)) || 2993 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 2994 ((c >= 0x10000) && (c <= 0xEFFFF)))) 2995 return(1); 2996 } else { 2997 if (IS_LETTER(c) || (c == '_') || (c == ':')) 2998 return(1); 2999 } 3000 return(0); 3001 } 3002 3003 static int 3004 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3005 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3006 /* 3007 * Use the new checks of production [4] [4a] amd [5] of the 3008 * Update 5 of XML-1.0 3009 */ 3010 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3011 (((c >= 'a') && (c <= 'z')) || 3012 ((c >= 'A') && (c <= 'Z')) || 3013 ((c >= '0') && (c <= '9')) || /* !start */ 3014 (c == '_') || (c == ':') || 3015 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3016 ((c >= 0xC0) && (c <= 0xD6)) || 3017 ((c >= 0xD8) && (c <= 0xF6)) || 3018 ((c >= 0xF8) && (c <= 0x2FF)) || 3019 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3020 ((c >= 0x370) && (c <= 0x37D)) || 3021 ((c >= 0x37F) && (c <= 0x1FFF)) || 3022 ((c >= 0x200C) && (c <= 0x200D)) || 3023 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3024 ((c >= 0x2070) && (c <= 0x218F)) || 3025 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3026 ((c >= 0x3001) && (c <= 0xD7FF)) || 3027 ((c >= 0xF900) && (c <= 0xFDCF)) || 3028 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3029 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3030 return(1); 3031 } else { 3032 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3033 (c == '.') || (c == '-') || 3034 (c == '_') || (c == ':') || 3035 (IS_COMBINING(c)) || 3036 (IS_EXTENDER(c))) 3037 return(1); 3038 } 3039 return(0); 3040 } 3041 3042 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3043 int *len, int *alloc, int normalize); 3044 3045 static const xmlChar * 3046 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3047 int len = 0, l; 3048 int c; 3049 int count = 0; 3050 3051 #ifdef DEBUG 3052 nbParseNameComplex++; 3053 #endif 3054 3055 /* 3056 * Handler for more complex cases 3057 */ 3058 GROW; 3059 if (ctxt->instate == XML_PARSER_EOF) 3060 return(NULL); 3061 c = CUR_CHAR(l); 3062 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3063 /* 3064 * Use the new checks of production [4] [4a] amd [5] of the 3065 * Update 5 of XML-1.0 3066 */ 3067 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3068 (!(((c >= 'a') && (c <= 'z')) || 3069 ((c >= 'A') && (c <= 'Z')) || 3070 (c == '_') || (c == ':') || 3071 ((c >= 0xC0) && (c <= 0xD6)) || 3072 ((c >= 0xD8) && (c <= 0xF6)) || 3073 ((c >= 0xF8) && (c <= 0x2FF)) || 3074 ((c >= 0x370) && (c <= 0x37D)) || 3075 ((c >= 0x37F) && (c <= 0x1FFF)) || 3076 ((c >= 0x200C) && (c <= 0x200D)) || 3077 ((c >= 0x2070) && (c <= 0x218F)) || 3078 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3079 ((c >= 0x3001) && (c <= 0xD7FF)) || 3080 ((c >= 0xF900) && (c <= 0xFDCF)) || 3081 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3082 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3083 return(NULL); 3084 } 3085 len += l; 3086 NEXTL(l); 3087 c = CUR_CHAR(l); 3088 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3089 (((c >= 'a') && (c <= 'z')) || 3090 ((c >= 'A') && (c <= 'Z')) || 3091 ((c >= '0') && (c <= '9')) || /* !start */ 3092 (c == '_') || (c == ':') || 3093 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3094 ((c >= 0xC0) && (c <= 0xD6)) || 3095 ((c >= 0xD8) && (c <= 0xF6)) || 3096 ((c >= 0xF8) && (c <= 0x2FF)) || 3097 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3098 ((c >= 0x370) && (c <= 0x37D)) || 3099 ((c >= 0x37F) && (c <= 0x1FFF)) || 3100 ((c >= 0x200C) && (c <= 0x200D)) || 3101 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3102 ((c >= 0x2070) && (c <= 0x218F)) || 3103 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3104 ((c >= 0x3001) && (c <= 0xD7FF)) || 3105 ((c >= 0xF900) && (c <= 0xFDCF)) || 3106 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3107 ((c >= 0x10000) && (c <= 0xEFFFF)) 3108 )) { 3109 if (count++ > 100) { 3110 count = 0; 3111 GROW; 3112 if (ctxt->instate == XML_PARSER_EOF) 3113 return(NULL); 3114 } 3115 len += l; 3116 NEXTL(l); 3117 c = CUR_CHAR(l); 3118 } 3119 } else { 3120 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3121 (!IS_LETTER(c) && (c != '_') && 3122 (c != ':'))) { 3123 return(NULL); 3124 } 3125 len += l; 3126 NEXTL(l); 3127 c = CUR_CHAR(l); 3128 3129 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3130 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3131 (c == '.') || (c == '-') || 3132 (c == '_') || (c == ':') || 3133 (IS_COMBINING(c)) || 3134 (IS_EXTENDER(c)))) { 3135 if (count++ > 100) { 3136 count = 0; 3137 GROW; 3138 if (ctxt->instate == XML_PARSER_EOF) 3139 return(NULL); 3140 } 3141 len += l; 3142 NEXTL(l); 3143 c = CUR_CHAR(l); 3144 } 3145 } 3146 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3147 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3148 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3149 } 3150 3151 /** 3152 * xmlParseName: 3153 * @ctxt: an XML parser context 3154 * 3155 * parse an XML name. 3156 * 3157 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3158 * CombiningChar | Extender 3159 * 3160 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3161 * 3162 * [6] Names ::= Name (#x20 Name)* 3163 * 3164 * Returns the Name parsed or NULL 3165 */ 3166 3167 const xmlChar * 3168 xmlParseName(xmlParserCtxtPtr ctxt) { 3169 const xmlChar *in; 3170 const xmlChar *ret; 3171 int count = 0; 3172 3173 GROW; 3174 3175 #ifdef DEBUG 3176 nbParseName++; 3177 #endif 3178 3179 /* 3180 * Accelerator for simple ASCII names 3181 */ 3182 in = ctxt->input->cur; 3183 if (((*in >= 0x61) && (*in <= 0x7A)) || 3184 ((*in >= 0x41) && (*in <= 0x5A)) || 3185 (*in == '_') || (*in == ':')) { 3186 in++; 3187 while (((*in >= 0x61) && (*in <= 0x7A)) || 3188 ((*in >= 0x41) && (*in <= 0x5A)) || 3189 ((*in >= 0x30) && (*in <= 0x39)) || 3190 (*in == '_') || (*in == '-') || 3191 (*in == ':') || (*in == '.')) 3192 in++; 3193 if ((*in > 0) && (*in < 0x80)) { 3194 count = in - ctxt->input->cur; 3195 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3196 ctxt->input->cur = in; 3197 ctxt->nbChars += count; 3198 ctxt->input->col += count; 3199 if (ret == NULL) 3200 xmlErrMemory(ctxt, NULL); 3201 return(ret); 3202 } 3203 } 3204 /* accelerator for special cases */ 3205 return(xmlParseNameComplex(ctxt)); 3206 } 3207 3208 static const xmlChar * 3209 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3210 int len = 0, l; 3211 int c; 3212 int count = 0; 3213 3214 #ifdef DEBUG 3215 nbParseNCNameComplex++; 3216 #endif 3217 3218 /* 3219 * Handler for more complex cases 3220 */ 3221 GROW; 3222 c = CUR_CHAR(l); 3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3224 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3225 return(NULL); 3226 } 3227 3228 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3229 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3230 if (count++ > 100) { 3231 count = 0; 3232 GROW; 3233 if (ctxt->instate == XML_PARSER_EOF) 3234 return(NULL); 3235 } 3236 len += l; 3237 NEXTL(l); 3238 c = CUR_CHAR(l); 3239 } 3240 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3241 } 3242 3243 /** 3244 * xmlParseNCName: 3245 * @ctxt: an XML parser context 3246 * @len: lenght of the string parsed 3247 * 3248 * parse an XML name. 3249 * 3250 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3251 * CombiningChar | Extender 3252 * 3253 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3254 * 3255 * Returns the Name parsed or NULL 3256 */ 3257 3258 static const xmlChar * 3259 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3260 const xmlChar *in; 3261 const xmlChar *ret; 3262 int count = 0; 3263 3264 #ifdef DEBUG 3265 nbParseNCName++; 3266 #endif 3267 3268 /* 3269 * Accelerator for simple ASCII names 3270 */ 3271 in = ctxt->input->cur; 3272 if (((*in >= 0x61) && (*in <= 0x7A)) || 3273 ((*in >= 0x41) && (*in <= 0x5A)) || 3274 (*in == '_')) { 3275 in++; 3276 while (((*in >= 0x61) && (*in <= 0x7A)) || 3277 ((*in >= 0x41) && (*in <= 0x5A)) || 3278 ((*in >= 0x30) && (*in <= 0x39)) || 3279 (*in == '_') || (*in == '-') || 3280 (*in == '.')) 3281 in++; 3282 if ((*in > 0) && (*in < 0x80)) { 3283 count = in - ctxt->input->cur; 3284 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3285 ctxt->input->cur = in; 3286 ctxt->nbChars += count; 3287 ctxt->input->col += count; 3288 if (ret == NULL) { 3289 xmlErrMemory(ctxt, NULL); 3290 } 3291 return(ret); 3292 } 3293 } 3294 return(xmlParseNCNameComplex(ctxt)); 3295 } 3296 3297 /** 3298 * xmlParseNameAndCompare: 3299 * @ctxt: an XML parser context 3300 * 3301 * parse an XML name and compares for match 3302 * (specialized for endtag parsing) 3303 * 3304 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3305 * and the name for mismatch 3306 */ 3307 3308 static const xmlChar * 3309 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3310 register const xmlChar *cmp = other; 3311 register const xmlChar *in; 3312 const xmlChar *ret; 3313 3314 GROW; 3315 if (ctxt->instate == XML_PARSER_EOF) 3316 return(NULL); 3317 3318 in = ctxt->input->cur; 3319 while (*in != 0 && *in == *cmp) { 3320 ++in; 3321 ++cmp; 3322 ctxt->input->col++; 3323 } 3324 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3325 /* success */ 3326 ctxt->input->cur = in; 3327 return (const xmlChar*) 1; 3328 } 3329 /* failure (or end of input buffer), check with full function */ 3330 ret = xmlParseName (ctxt); 3331 /* strings coming from the dictionnary direct compare possible */ 3332 if (ret == other) { 3333 return (const xmlChar*) 1; 3334 } 3335 return ret; 3336 } 3337 3338 /** 3339 * xmlParseStringName: 3340 * @ctxt: an XML parser context 3341 * @str: a pointer to the string pointer (IN/OUT) 3342 * 3343 * parse an XML name. 3344 * 3345 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3346 * CombiningChar | Extender 3347 * 3348 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3349 * 3350 * [6] Names ::= Name (#x20 Name)* 3351 * 3352 * Returns the Name parsed or NULL. The @str pointer 3353 * is updated to the current location in the string. 3354 */ 3355 3356 static xmlChar * 3357 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3358 xmlChar buf[XML_MAX_NAMELEN + 5]; 3359 const xmlChar *cur = *str; 3360 int len = 0, l; 3361 int c; 3362 3363 #ifdef DEBUG 3364 nbParseStringName++; 3365 #endif 3366 3367 c = CUR_SCHAR(cur, l); 3368 if (!xmlIsNameStartChar(ctxt, c)) { 3369 return(NULL); 3370 } 3371 3372 COPY_BUF(l,buf,len,c); 3373 cur += l; 3374 c = CUR_SCHAR(cur, l); 3375 while (xmlIsNameChar(ctxt, c)) { 3376 COPY_BUF(l,buf,len,c); 3377 cur += l; 3378 c = CUR_SCHAR(cur, l); 3379 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3380 /* 3381 * Okay someone managed to make a huge name, so he's ready to pay 3382 * for the processing speed. 3383 */ 3384 xmlChar *buffer; 3385 int max = len * 2; 3386 3387 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3388 if (buffer == NULL) { 3389 xmlErrMemory(ctxt, NULL); 3390 return(NULL); 3391 } 3392 memcpy(buffer, buf, len); 3393 while (xmlIsNameChar(ctxt, c)) { 3394 if (len + 10 > max) { 3395 xmlChar *tmp; 3396 max *= 2; 3397 tmp = (xmlChar *) xmlRealloc(buffer, 3398 max * sizeof(xmlChar)); 3399 if (tmp == NULL) { 3400 xmlErrMemory(ctxt, NULL); 3401 xmlFree(buffer); 3402 return(NULL); 3403 } 3404 buffer = tmp; 3405 } 3406 COPY_BUF(l,buffer,len,c); 3407 cur += l; 3408 c = CUR_SCHAR(cur, l); 3409 } 3410 buffer[len] = 0; 3411 *str = cur; 3412 return(buffer); 3413 } 3414 } 3415 *str = cur; 3416 return(xmlStrndup(buf, len)); 3417 } 3418 3419 /** 3420 * xmlParseNmtoken: 3421 * @ctxt: an XML parser context 3422 * 3423 * parse an XML Nmtoken. 3424 * 3425 * [7] Nmtoken ::= (NameChar)+ 3426 * 3427 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3428 * 3429 * Returns the Nmtoken parsed or NULL 3430 */ 3431 3432 xmlChar * 3433 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3434 xmlChar buf[XML_MAX_NAMELEN + 5]; 3435 int len = 0, l; 3436 int c; 3437 int count = 0; 3438 3439 #ifdef DEBUG 3440 nbParseNmToken++; 3441 #endif 3442 3443 GROW; 3444 if (ctxt->instate == XML_PARSER_EOF) 3445 return(NULL); 3446 c = CUR_CHAR(l); 3447 3448 while (xmlIsNameChar(ctxt, c)) { 3449 if (count++ > 100) { 3450 count = 0; 3451 GROW; 3452 } 3453 COPY_BUF(l,buf,len,c); 3454 NEXTL(l); 3455 c = CUR_CHAR(l); 3456 if (len >= XML_MAX_NAMELEN) { 3457 /* 3458 * Okay someone managed to make a huge token, so he's ready to pay 3459 * for the processing speed. 3460 */ 3461 xmlChar *buffer; 3462 int max = len * 2; 3463 3464 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3465 if (buffer == NULL) { 3466 xmlErrMemory(ctxt, NULL); 3467 return(NULL); 3468 } 3469 memcpy(buffer, buf, len); 3470 while (xmlIsNameChar(ctxt, c)) { 3471 if (count++ > 100) { 3472 count = 0; 3473 GROW; 3474 if (ctxt->instate == XML_PARSER_EOF) { 3475 xmlFree(buffer); 3476 return(NULL); 3477 } 3478 } 3479 if (len + 10 > max) { 3480 xmlChar *tmp; 3481 3482 max *= 2; 3483 tmp = (xmlChar *) xmlRealloc(buffer, 3484 max * sizeof(xmlChar)); 3485 if (tmp == NULL) { 3486 xmlErrMemory(ctxt, NULL); 3487 xmlFree(buffer); 3488 return(NULL); 3489 } 3490 buffer = tmp; 3491 } 3492 COPY_BUF(l,buffer,len,c); 3493 NEXTL(l); 3494 c = CUR_CHAR(l); 3495 } 3496 buffer[len] = 0; 3497 return(buffer); 3498 } 3499 } 3500 if (len == 0) 3501 return(NULL); 3502 return(xmlStrndup(buf, len)); 3503 } 3504 3505 /** 3506 * xmlParseEntityValue: 3507 * @ctxt: an XML parser context 3508 * @orig: if non-NULL store a copy of the original entity value 3509 * 3510 * parse a value for ENTITY declarations 3511 * 3512 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3513 * "'" ([^%&'] | PEReference | Reference)* "'" 3514 * 3515 * Returns the EntityValue parsed with reference substituted or NULL 3516 */ 3517 3518 xmlChar * 3519 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3520 xmlChar *buf = NULL; 3521 int len = 0; 3522 int size = XML_PARSER_BUFFER_SIZE; 3523 int c, l; 3524 xmlChar stop; 3525 xmlChar *ret = NULL; 3526 const xmlChar *cur = NULL; 3527 xmlParserInputPtr input; 3528 3529 if (RAW == '"') stop = '"'; 3530 else if (RAW == '\'') stop = '\''; 3531 else { 3532 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3533 return(NULL); 3534 } 3535 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3536 if (buf == NULL) { 3537 xmlErrMemory(ctxt, NULL); 3538 return(NULL); 3539 } 3540 3541 /* 3542 * The content of the entity definition is copied in a buffer. 3543 */ 3544 3545 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3546 input = ctxt->input; 3547 GROW; 3548 if (ctxt->instate == XML_PARSER_EOF) { 3549 xmlFree(buf); 3550 return(NULL); 3551 } 3552 NEXT; 3553 c = CUR_CHAR(l); 3554 /* 3555 * NOTE: 4.4.5 Included in Literal 3556 * When a parameter entity reference appears in a literal entity 3557 * value, ... a single or double quote character in the replacement 3558 * text is always treated as a normal data character and will not 3559 * terminate the literal. 3560 * In practice it means we stop the loop only when back at parsing 3561 * the initial entity and the quote is found 3562 */ 3563 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3564 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3565 if (len + 5 >= size) { 3566 xmlChar *tmp; 3567 3568 size *= 2; 3569 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3570 if (tmp == NULL) { 3571 xmlErrMemory(ctxt, NULL); 3572 xmlFree(buf); 3573 return(NULL); 3574 } 3575 buf = tmp; 3576 } 3577 COPY_BUF(l,buf,len,c); 3578 NEXTL(l); 3579 /* 3580 * Pop-up of finished entities. 3581 */ 3582 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3583 xmlPopInput(ctxt); 3584 3585 GROW; 3586 c = CUR_CHAR(l); 3587 if (c == 0) { 3588 GROW; 3589 c = CUR_CHAR(l); 3590 } 3591 } 3592 buf[len] = 0; 3593 if (ctxt->instate == XML_PARSER_EOF) { 3594 xmlFree(buf); 3595 return(NULL); 3596 } 3597 3598 /* 3599 * Raise problem w.r.t. '&' and '%' being used in non-entities 3600 * reference constructs. Note Charref will be handled in 3601 * xmlStringDecodeEntities() 3602 */ 3603 cur = buf; 3604 while (*cur != 0) { /* non input consuming */ 3605 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3606 xmlChar *name; 3607 xmlChar tmp = *cur; 3608 3609 cur++; 3610 name = xmlParseStringName(ctxt, &cur); 3611 if ((name == NULL) || (*cur != ';')) { 3612 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3613 "EntityValue: '%c' forbidden except for entities references\n", 3614 tmp); 3615 } 3616 if ((tmp == '%') && (ctxt->inSubset == 1) && 3617 (ctxt->inputNr == 1)) { 3618 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3619 } 3620 if (name != NULL) 3621 xmlFree(name); 3622 if (*cur == 0) 3623 break; 3624 } 3625 cur++; 3626 } 3627 3628 /* 3629 * Then PEReference entities are substituted. 3630 */ 3631 if (c != stop) { 3632 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3633 xmlFree(buf); 3634 } else { 3635 NEXT; 3636 /* 3637 * NOTE: 4.4.7 Bypassed 3638 * When a general entity reference appears in the EntityValue in 3639 * an entity declaration, it is bypassed and left as is. 3640 * so XML_SUBSTITUTE_REF is not set here. 3641 */ 3642 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3643 0, 0, 0); 3644 if (orig != NULL) 3645 *orig = buf; 3646 else 3647 xmlFree(buf); 3648 } 3649 3650 return(ret); 3651 } 3652 3653 /** 3654 * xmlParseAttValueComplex: 3655 * @ctxt: an XML parser context 3656 * @len: the resulting attribute len 3657 * @normalize: wether to apply the inner normalization 3658 * 3659 * parse a value for an attribute, this is the fallback function 3660 * of xmlParseAttValue() when the attribute parsing requires handling 3661 * of non-ASCII characters, or normalization compaction. 3662 * 3663 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3664 */ 3665 static xmlChar * 3666 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3667 xmlChar limit = 0; 3668 xmlChar *buf = NULL; 3669 xmlChar *rep = NULL; 3670 int len = 0; 3671 int buf_size = 0; 3672 int c, l, in_space = 0; 3673 xmlChar *current = NULL; 3674 xmlEntityPtr ent; 3675 3676 if (NXT(0) == '"') { 3677 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3678 limit = '"'; 3679 NEXT; 3680 } else if (NXT(0) == '\'') { 3681 limit = '\''; 3682 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3683 NEXT; 3684 } else { 3685 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3686 return(NULL); 3687 } 3688 3689 /* 3690 * allocate a translation buffer. 3691 */ 3692 buf_size = XML_PARSER_BUFFER_SIZE; 3693 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3694 if (buf == NULL) goto mem_error; 3695 3696 /* 3697 * OK loop until we reach one of the ending char or a size limit. 3698 */ 3699 c = CUR_CHAR(l); 3700 while (((NXT(0) != limit) && /* checked */ 3701 (IS_CHAR(c)) && (c != '<')) && 3702 (ctxt->instate != XML_PARSER_EOF)) { 3703 if (c == 0) break; 3704 if (c == '&') { 3705 in_space = 0; 3706 if (NXT(1) == '#') { 3707 int val = xmlParseCharRef(ctxt); 3708 3709 if (val == '&') { 3710 if (ctxt->replaceEntities) { 3711 if (len > buf_size - 10) { 3712 growBuffer(buf, 10); 3713 } 3714 buf[len++] = '&'; 3715 } else { 3716 /* 3717 * The reparsing will be done in xmlStringGetNodeList() 3718 * called by the attribute() function in SAX.c 3719 */ 3720 if (len > buf_size - 10) { 3721 growBuffer(buf, 10); 3722 } 3723 buf[len++] = '&'; 3724 buf[len++] = '#'; 3725 buf[len++] = '3'; 3726 buf[len++] = '8'; 3727 buf[len++] = ';'; 3728 } 3729 } else if (val != 0) { 3730 if (len > buf_size - 10) { 3731 growBuffer(buf, 10); 3732 } 3733 len += xmlCopyChar(0, &buf[len], val); 3734 } 3735 } else { 3736 ent = xmlParseEntityRef(ctxt); 3737 ctxt->nbentities++; 3738 if (ent != NULL) 3739 ctxt->nbentities += ent->owner; 3740 if ((ent != NULL) && 3741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3742 if (len > buf_size - 10) { 3743 growBuffer(buf, 10); 3744 } 3745 if ((ctxt->replaceEntities == 0) && 3746 (ent->content[0] == '&')) { 3747 buf[len++] = '&'; 3748 buf[len++] = '#'; 3749 buf[len++] = '3'; 3750 buf[len++] = '8'; 3751 buf[len++] = ';'; 3752 } else { 3753 buf[len++] = ent->content[0]; 3754 } 3755 } else if ((ent != NULL) && 3756 (ctxt->replaceEntities != 0)) { 3757 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3758 rep = xmlStringDecodeEntities(ctxt, ent->content, 3759 XML_SUBSTITUTE_REF, 3760 0, 0, 0); 3761 if (rep != NULL) { 3762 current = rep; 3763 while (*current != 0) { /* non input consuming */ 3764 if ((*current == 0xD) || (*current == 0xA) || 3765 (*current == 0x9)) { 3766 buf[len++] = 0x20; 3767 current++; 3768 } else 3769 buf[len++] = *current++; 3770 if (len > buf_size - 10) { 3771 growBuffer(buf, 10); 3772 } 3773 } 3774 xmlFree(rep); 3775 rep = NULL; 3776 } 3777 } else { 3778 if (len > buf_size - 10) { 3779 growBuffer(buf, 10); 3780 } 3781 if (ent->content != NULL) 3782 buf[len++] = ent->content[0]; 3783 } 3784 } else if (ent != NULL) { 3785 int i = xmlStrlen(ent->name); 3786 const xmlChar *cur = ent->name; 3787 3788 /* 3789 * This may look absurd but is needed to detect 3790 * entities problems 3791 */ 3792 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3793 (ent->content != NULL)) { 3794 rep = xmlStringDecodeEntities(ctxt, ent->content, 3795 XML_SUBSTITUTE_REF, 0, 0, 0); 3796 if (rep != NULL) { 3797 xmlFree(rep); 3798 rep = NULL; 3799 } 3800 } 3801 3802 /* 3803 * Just output the reference 3804 */ 3805 buf[len++] = '&'; 3806 while (len > buf_size - i - 10) { 3807 growBuffer(buf, i + 10); 3808 } 3809 for (;i > 0;i--) 3810 buf[len++] = *cur++; 3811 buf[len++] = ';'; 3812 } 3813 } 3814 } else { 3815 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3816 if ((len != 0) || (!normalize)) { 3817 if ((!normalize) || (!in_space)) { 3818 COPY_BUF(l,buf,len,0x20); 3819 while (len > buf_size - 10) { 3820 growBuffer(buf, 10); 3821 } 3822 } 3823 in_space = 1; 3824 } 3825 } else { 3826 in_space = 0; 3827 COPY_BUF(l,buf,len,c); 3828 if (len > buf_size - 10) { 3829 growBuffer(buf, 10); 3830 } 3831 } 3832 NEXTL(l); 3833 } 3834 GROW; 3835 c = CUR_CHAR(l); 3836 } 3837 if (ctxt->instate == XML_PARSER_EOF) 3838 goto error; 3839 3840 if ((in_space) && (normalize)) { 3841 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 3842 } 3843 buf[len] = 0; 3844 if (RAW == '<') { 3845 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3846 } else if (RAW != limit) { 3847 if ((c != 0) && (!IS_CHAR(c))) { 3848 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3849 "invalid character in attribute value\n"); 3850 } else { 3851 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3852 "AttValue: ' expected\n"); 3853 } 3854 } else 3855 NEXT; 3856 if (attlen != NULL) *attlen = len; 3857 return(buf); 3858 3859 mem_error: 3860 xmlErrMemory(ctxt, NULL); 3861 error: 3862 if (buf != NULL) 3863 xmlFree(buf); 3864 if (rep != NULL) 3865 xmlFree(rep); 3866 return(NULL); 3867 } 3868 3869 /** 3870 * xmlParseAttValue: 3871 * @ctxt: an XML parser context 3872 * 3873 * parse a value for an attribute 3874 * Note: the parser won't do substitution of entities here, this 3875 * will be handled later in xmlStringGetNodeList 3876 * 3877 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3878 * "'" ([^<&'] | Reference)* "'" 3879 * 3880 * 3.3.3 Attribute-Value Normalization: 3881 * Before the value of an attribute is passed to the application or 3882 * checked for validity, the XML processor must normalize it as follows: 3883 * - a character reference is processed by appending the referenced 3884 * character to the attribute value 3885 * - an entity reference is processed by recursively processing the 3886 * replacement text of the entity 3887 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3888 * appending #x20 to the normalized value, except that only a single 3889 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3890 * parsed entity or the literal entity value of an internal parsed entity 3891 * - other characters are processed by appending them to the normalized value 3892 * If the declared value is not CDATA, then the XML processor must further 3893 * process the normalized attribute value by discarding any leading and 3894 * trailing space (#x20) characters, and by replacing sequences of space 3895 * (#x20) characters by a single space (#x20) character. 3896 * All attributes for which no declaration has been read should be treated 3897 * by a non-validating parser as if declared CDATA. 3898 * 3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3900 */ 3901 3902 3903 xmlChar * 3904 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3905 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3906 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3907 } 3908 3909 /** 3910 * xmlParseSystemLiteral: 3911 * @ctxt: an XML parser context 3912 * 3913 * parse an XML Literal 3914 * 3915 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3916 * 3917 * Returns the SystemLiteral parsed or NULL 3918 */ 3919 3920 xmlChar * 3921 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3922 xmlChar *buf = NULL; 3923 int len = 0; 3924 int size = XML_PARSER_BUFFER_SIZE; 3925 int cur, l; 3926 xmlChar stop; 3927 int state = ctxt->instate; 3928 int count = 0; 3929 3930 SHRINK; 3931 if (RAW == '"') { 3932 NEXT; 3933 stop = '"'; 3934 } else if (RAW == '\'') { 3935 NEXT; 3936 stop = '\''; 3937 } else { 3938 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3939 return(NULL); 3940 } 3941 3942 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3943 if (buf == NULL) { 3944 xmlErrMemory(ctxt, NULL); 3945 return(NULL); 3946 } 3947 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3948 cur = CUR_CHAR(l); 3949 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3950 if (len + 5 >= size) { 3951 xmlChar *tmp; 3952 3953 size *= 2; 3954 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3955 if (tmp == NULL) { 3956 xmlFree(buf); 3957 xmlErrMemory(ctxt, NULL); 3958 ctxt->instate = (xmlParserInputState) state; 3959 return(NULL); 3960 } 3961 buf = tmp; 3962 } 3963 count++; 3964 if (count > 50) { 3965 GROW; 3966 count = 0; 3967 if (ctxt->instate == XML_PARSER_EOF) { 3968 xmlFree(buf); 3969 return(NULL); 3970 } 3971 } 3972 COPY_BUF(l,buf,len,cur); 3973 NEXTL(l); 3974 cur = CUR_CHAR(l); 3975 if (cur == 0) { 3976 GROW; 3977 SHRINK; 3978 cur = CUR_CHAR(l); 3979 } 3980 } 3981 buf[len] = 0; 3982 ctxt->instate = (xmlParserInputState) state; 3983 if (!IS_CHAR(cur)) { 3984 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3985 } else { 3986 NEXT; 3987 } 3988 return(buf); 3989 } 3990 3991 /** 3992 * xmlParsePubidLiteral: 3993 * @ctxt: an XML parser context 3994 * 3995 * parse an XML public literal 3996 * 3997 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3998 * 3999 * Returns the PubidLiteral parsed or NULL. 4000 */ 4001 4002 xmlChar * 4003 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4004 xmlChar *buf = NULL; 4005 int len = 0; 4006 int size = XML_PARSER_BUFFER_SIZE; 4007 xmlChar cur; 4008 xmlChar stop; 4009 int count = 0; 4010 xmlParserInputState oldstate = ctxt->instate; 4011 4012 SHRINK; 4013 if (RAW == '"') { 4014 NEXT; 4015 stop = '"'; 4016 } else if (RAW == '\'') { 4017 NEXT; 4018 stop = '\''; 4019 } else { 4020 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4021 return(NULL); 4022 } 4023 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4024 if (buf == NULL) { 4025 xmlErrMemory(ctxt, NULL); 4026 return(NULL); 4027 } 4028 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4029 cur = CUR; 4030 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4031 if (len + 1 >= size) { 4032 xmlChar *tmp; 4033 4034 size *= 2; 4035 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4036 if (tmp == NULL) { 4037 xmlErrMemory(ctxt, NULL); 4038 xmlFree(buf); 4039 return(NULL); 4040 } 4041 buf = tmp; 4042 } 4043 buf[len++] = cur; 4044 count++; 4045 if (count > 50) { 4046 GROW; 4047 count = 0; 4048 if (ctxt->instate == XML_PARSER_EOF) { 4049 xmlFree(buf); 4050 return(NULL); 4051 } 4052 } 4053 NEXT; 4054 cur = CUR; 4055 if (cur == 0) { 4056 GROW; 4057 SHRINK; 4058 cur = CUR; 4059 } 4060 } 4061 buf[len] = 0; 4062 if (cur != stop) { 4063 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4064 } else { 4065 NEXT; 4066 } 4067 ctxt->instate = oldstate; 4068 return(buf); 4069 } 4070 4071 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4072 4073 /* 4074 * used for the test in the inner loop of the char data testing 4075 */ 4076 static const unsigned char test_char_data[256] = { 4077 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4078 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4079 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4080 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4081 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4082 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4083 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4084 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4085 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4086 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4087 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4088 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4089 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4090 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4091 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4092 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4093 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4094 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4095 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4096 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4097 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4098 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4099 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4100 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4101 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4102 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4103 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4104 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4105 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4106 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4107 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4108 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4109 }; 4110 4111 /** 4112 * xmlParseCharData: 4113 * @ctxt: an XML parser context 4114 * @cdata: int indicating whether we are within a CDATA section 4115 * 4116 * parse a CharData section. 4117 * if we are within a CDATA section ']]>' marks an end of section. 4118 * 4119 * The right angle bracket (>) may be represented using the string ">", 4120 * and must, for compatibility, be escaped using ">" or a character 4121 * reference when it appears in the string "]]>" in content, when that 4122 * string is not marking the end of a CDATA section. 4123 * 4124 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4125 */ 4126 4127 void 4128 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4129 const xmlChar *in; 4130 int nbchar = 0; 4131 int line = ctxt->input->line; 4132 int col = ctxt->input->col; 4133 int ccol; 4134 4135 SHRINK; 4136 GROW; 4137 /* 4138 * Accelerated common case where input don't need to be 4139 * modified before passing it to the handler. 4140 */ 4141 if (!cdata) { 4142 in = ctxt->input->cur; 4143 do { 4144 get_more_space: 4145 while (*in == 0x20) { in++; ctxt->input->col++; } 4146 if (*in == 0xA) { 4147 do { 4148 ctxt->input->line++; ctxt->input->col = 1; 4149 in++; 4150 } while (*in == 0xA); 4151 goto get_more_space; 4152 } 4153 if (*in == '<') { 4154 nbchar = in - ctxt->input->cur; 4155 if (nbchar > 0) { 4156 const xmlChar *tmp = ctxt->input->cur; 4157 ctxt->input->cur = in; 4158 4159 if ((ctxt->sax != NULL) && 4160 (ctxt->sax->ignorableWhitespace != 4161 ctxt->sax->characters)) { 4162 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4163 if (ctxt->sax->ignorableWhitespace != NULL) 4164 ctxt->sax->ignorableWhitespace(ctxt->userData, 4165 tmp, nbchar); 4166 } else { 4167 if (ctxt->sax->characters != NULL) 4168 ctxt->sax->characters(ctxt->userData, 4169 tmp, nbchar); 4170 if (*ctxt->space == -1) 4171 *ctxt->space = -2; 4172 } 4173 } else if ((ctxt->sax != NULL) && 4174 (ctxt->sax->characters != NULL)) { 4175 ctxt->sax->characters(ctxt->userData, 4176 tmp, nbchar); 4177 } 4178 } 4179 return; 4180 } 4181 4182 get_more: 4183 ccol = ctxt->input->col; 4184 while (test_char_data[*in]) { 4185 in++; 4186 ccol++; 4187 } 4188 ctxt->input->col = ccol; 4189 if (*in == 0xA) { 4190 do { 4191 ctxt->input->line++; ctxt->input->col = 1; 4192 in++; 4193 } while (*in == 0xA); 4194 goto get_more; 4195 } 4196 if (*in == ']') { 4197 if ((in[1] == ']') && (in[2] == '>')) { 4198 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4199 ctxt->input->cur = in; 4200 return; 4201 } 4202 in++; 4203 ctxt->input->col++; 4204 goto get_more; 4205 } 4206 nbchar = in - ctxt->input->cur; 4207 if (nbchar > 0) { 4208 if ((ctxt->sax != NULL) && 4209 (ctxt->sax->ignorableWhitespace != 4210 ctxt->sax->characters) && 4211 (IS_BLANK_CH(*ctxt->input->cur))) { 4212 const xmlChar *tmp = ctxt->input->cur; 4213 ctxt->input->cur = in; 4214 4215 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4216 if (ctxt->sax->ignorableWhitespace != NULL) 4217 ctxt->sax->ignorableWhitespace(ctxt->userData, 4218 tmp, nbchar); 4219 } else { 4220 if (ctxt->sax->characters != NULL) 4221 ctxt->sax->characters(ctxt->userData, 4222 tmp, nbchar); 4223 if (*ctxt->space == -1) 4224 *ctxt->space = -2; 4225 } 4226 line = ctxt->input->line; 4227 col = ctxt->input->col; 4228 } else if (ctxt->sax != NULL) { 4229 if (ctxt->sax->characters != NULL) 4230 ctxt->sax->characters(ctxt->userData, 4231 ctxt->input->cur, nbchar); 4232 line = ctxt->input->line; 4233 col = ctxt->input->col; 4234 } 4235 /* something really bad happened in the SAX callback */ 4236 if (ctxt->instate != XML_PARSER_CONTENT) 4237 return; 4238 } 4239 ctxt->input->cur = in; 4240 if (*in == 0xD) { 4241 in++; 4242 if (*in == 0xA) { 4243 ctxt->input->cur = in; 4244 in++; 4245 ctxt->input->line++; ctxt->input->col = 1; 4246 continue; /* while */ 4247 } 4248 in--; 4249 } 4250 if (*in == '<') { 4251 return; 4252 } 4253 if (*in == '&') { 4254 return; 4255 } 4256 SHRINK; 4257 GROW; 4258 if (ctxt->instate == XML_PARSER_EOF) 4259 return; 4260 in = ctxt->input->cur; 4261 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4262 nbchar = 0; 4263 } 4264 ctxt->input->line = line; 4265 ctxt->input->col = col; 4266 xmlParseCharDataComplex(ctxt, cdata); 4267 } 4268 4269 /** 4270 * xmlParseCharDataComplex: 4271 * @ctxt: an XML parser context 4272 * @cdata: int indicating whether we are within a CDATA section 4273 * 4274 * parse a CharData section.this is the fallback function 4275 * of xmlParseCharData() when the parsing requires handling 4276 * of non-ASCII characters. 4277 */ 4278 static void 4279 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4280 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4281 int nbchar = 0; 4282 int cur, l; 4283 int count = 0; 4284 4285 SHRINK; 4286 GROW; 4287 cur = CUR_CHAR(l); 4288 while ((cur != '<') && /* checked */ 4289 (cur != '&') && 4290 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4291 if ((cur == ']') && (NXT(1) == ']') && 4292 (NXT(2) == '>')) { 4293 if (cdata) break; 4294 else { 4295 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4296 } 4297 } 4298 COPY_BUF(l,buf,nbchar,cur); 4299 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4300 buf[nbchar] = 0; 4301 4302 /* 4303 * OK the segment is to be consumed as chars. 4304 */ 4305 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4306 if (areBlanks(ctxt, buf, nbchar, 0)) { 4307 if (ctxt->sax->ignorableWhitespace != NULL) 4308 ctxt->sax->ignorableWhitespace(ctxt->userData, 4309 buf, nbchar); 4310 } else { 4311 if (ctxt->sax->characters != NULL) 4312 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4313 if ((ctxt->sax->characters != 4314 ctxt->sax->ignorableWhitespace) && 4315 (*ctxt->space == -1)) 4316 *ctxt->space = -2; 4317 } 4318 } 4319 nbchar = 0; 4320 /* something really bad happened in the SAX callback */ 4321 if (ctxt->instate != XML_PARSER_CONTENT) 4322 return; 4323 } 4324 count++; 4325 if (count > 50) { 4326 GROW; 4327 count = 0; 4328 if (ctxt->instate == XML_PARSER_EOF) 4329 return; 4330 } 4331 NEXTL(l); 4332 cur = CUR_CHAR(l); 4333 } 4334 if (nbchar != 0) { 4335 buf[nbchar] = 0; 4336 /* 4337 * OK the segment is to be consumed as chars. 4338 */ 4339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4340 if (areBlanks(ctxt, buf, nbchar, 0)) { 4341 if (ctxt->sax->ignorableWhitespace != NULL) 4342 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4343 } else { 4344 if (ctxt->sax->characters != NULL) 4345 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4346 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4347 (*ctxt->space == -1)) 4348 *ctxt->space = -2; 4349 } 4350 } 4351 } 4352 if ((cur != 0) && (!IS_CHAR(cur))) { 4353 /* Generate the error and skip the offending character */ 4354 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4355 "PCDATA invalid Char value %d\n", 4356 cur); 4357 NEXTL(l); 4358 } 4359 } 4360 4361 /** 4362 * xmlParseExternalID: 4363 * @ctxt: an XML parser context 4364 * @publicID: a xmlChar** receiving PubidLiteral 4365 * @strict: indicate whether we should restrict parsing to only 4366 * production [75], see NOTE below 4367 * 4368 * Parse an External ID or a Public ID 4369 * 4370 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4371 * 'PUBLIC' S PubidLiteral S SystemLiteral 4372 * 4373 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4374 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4375 * 4376 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4377 * 4378 * Returns the function returns SystemLiteral and in the second 4379 * case publicID receives PubidLiteral, is strict is off 4380 * it is possible to return NULL and have publicID set. 4381 */ 4382 4383 xmlChar * 4384 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4385 xmlChar *URI = NULL; 4386 4387 SHRINK; 4388 4389 *publicID = NULL; 4390 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4391 SKIP(6); 4392 if (!IS_BLANK_CH(CUR)) { 4393 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4394 "Space required after 'SYSTEM'\n"); 4395 } 4396 SKIP_BLANKS; 4397 URI = xmlParseSystemLiteral(ctxt); 4398 if (URI == NULL) { 4399 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4400 } 4401 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4402 SKIP(6); 4403 if (!IS_BLANK_CH(CUR)) { 4404 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4405 "Space required after 'PUBLIC'\n"); 4406 } 4407 SKIP_BLANKS; 4408 *publicID = xmlParsePubidLiteral(ctxt); 4409 if (*publicID == NULL) { 4410 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4411 } 4412 if (strict) { 4413 /* 4414 * We don't handle [83] so "S SystemLiteral" is required. 4415 */ 4416 if (!IS_BLANK_CH(CUR)) { 4417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4418 "Space required after the Public Identifier\n"); 4419 } 4420 } else { 4421 /* 4422 * We handle [83] so we return immediately, if 4423 * "S SystemLiteral" is not detected. From a purely parsing 4424 * point of view that's a nice mess. 4425 */ 4426 const xmlChar *ptr; 4427 GROW; 4428 4429 ptr = CUR_PTR; 4430 if (!IS_BLANK_CH(*ptr)) return(NULL); 4431 4432 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4433 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4434 } 4435 SKIP_BLANKS; 4436 URI = xmlParseSystemLiteral(ctxt); 4437 if (URI == NULL) { 4438 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4439 } 4440 } 4441 return(URI); 4442 } 4443 4444 /** 4445 * xmlParseCommentComplex: 4446 * @ctxt: an XML parser context 4447 * @buf: the already parsed part of the buffer 4448 * @len: number of bytes filles in the buffer 4449 * @size: allocated size of the buffer 4450 * 4451 * Skip an XML (SGML) comment <!-- .... --> 4452 * The spec says that "For compatibility, the string "--" (double-hyphen) 4453 * must not occur within comments. " 4454 * This is the slow routine in case the accelerator for ascii didn't work 4455 * 4456 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4457 */ 4458 static void 4459 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4460 int q, ql; 4461 int r, rl; 4462 int cur, l; 4463 int count = 0; 4464 int inputid; 4465 4466 inputid = ctxt->input->id; 4467 4468 if (buf == NULL) { 4469 len = 0; 4470 size = XML_PARSER_BUFFER_SIZE; 4471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4472 if (buf == NULL) { 4473 xmlErrMemory(ctxt, NULL); 4474 return; 4475 } 4476 } 4477 GROW; /* Assure there's enough input data */ 4478 q = CUR_CHAR(ql); 4479 if (q == 0) 4480 goto not_terminated; 4481 if (!IS_CHAR(q)) { 4482 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4483 "xmlParseComment: invalid xmlChar value %d\n", 4484 q); 4485 xmlFree (buf); 4486 return; 4487 } 4488 NEXTL(ql); 4489 r = CUR_CHAR(rl); 4490 if (r == 0) 4491 goto not_terminated; 4492 if (!IS_CHAR(r)) { 4493 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4494 "xmlParseComment: invalid xmlChar value %d\n", 4495 q); 4496 xmlFree (buf); 4497 return; 4498 } 4499 NEXTL(rl); 4500 cur = CUR_CHAR(l); 4501 if (cur == 0) 4502 goto not_terminated; 4503 while (IS_CHAR(cur) && /* checked */ 4504 ((cur != '>') || 4505 (r != '-') || (q != '-'))) { 4506 if ((r == '-') && (q == '-')) { 4507 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4508 } 4509 if (len + 5 >= size) { 4510 xmlChar *new_buf; 4511 size *= 2; 4512 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4513 if (new_buf == NULL) { 4514 xmlFree (buf); 4515 xmlErrMemory(ctxt, NULL); 4516 return; 4517 } 4518 buf = new_buf; 4519 } 4520 COPY_BUF(ql,buf,len,q); 4521 q = r; 4522 ql = rl; 4523 r = cur; 4524 rl = l; 4525 4526 count++; 4527 if (count > 50) { 4528 GROW; 4529 count = 0; 4530 if (ctxt->instate == XML_PARSER_EOF) { 4531 xmlFree(buf); 4532 return; 4533 } 4534 } 4535 NEXTL(l); 4536 cur = CUR_CHAR(l); 4537 if (cur == 0) { 4538 SHRINK; 4539 GROW; 4540 cur = CUR_CHAR(l); 4541 } 4542 } 4543 buf[len] = 0; 4544 if (cur == 0) { 4545 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4546 "Comment not terminated \n<!--%.50s\n", buf); 4547 } else if (!IS_CHAR(cur)) { 4548 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4549 "xmlParseComment: invalid xmlChar value %d\n", 4550 cur); 4551 } else { 4552 if (inputid != ctxt->input->id) { 4553 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4554 "Comment doesn't start and stop in the same entity\n"); 4555 } 4556 NEXT; 4557 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4558 (!ctxt->disableSAX)) 4559 ctxt->sax->comment(ctxt->userData, buf); 4560 } 4561 xmlFree(buf); 4562 return; 4563 not_terminated: 4564 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4565 "Comment not terminated\n", NULL); 4566 xmlFree(buf); 4567 return; 4568 } 4569 4570 /** 4571 * xmlParseComment: 4572 * @ctxt: an XML parser context 4573 * 4574 * Skip an XML (SGML) comment <!-- .... --> 4575 * The spec says that "For compatibility, the string "--" (double-hyphen) 4576 * must not occur within comments. " 4577 * 4578 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4579 */ 4580 void 4581 xmlParseComment(xmlParserCtxtPtr ctxt) { 4582 xmlChar *buf = NULL; 4583 int size = XML_PARSER_BUFFER_SIZE; 4584 int len = 0; 4585 xmlParserInputState state; 4586 const xmlChar *in; 4587 int nbchar = 0, ccol; 4588 int inputid; 4589 4590 /* 4591 * Check that there is a comment right here. 4592 */ 4593 if ((RAW != '<') || (NXT(1) != '!') || 4594 (NXT(2) != '-') || (NXT(3) != '-')) return; 4595 state = ctxt->instate; 4596 ctxt->instate = XML_PARSER_COMMENT; 4597 inputid = ctxt->input->id; 4598 SKIP(4); 4599 SHRINK; 4600 GROW; 4601 4602 /* 4603 * Accelerated common case where input don't need to be 4604 * modified before passing it to the handler. 4605 */ 4606 in = ctxt->input->cur; 4607 do { 4608 if (*in == 0xA) { 4609 do { 4610 ctxt->input->line++; ctxt->input->col = 1; 4611 in++; 4612 } while (*in == 0xA); 4613 } 4614 get_more: 4615 ccol = ctxt->input->col; 4616 while (((*in > '-') && (*in <= 0x7F)) || 4617 ((*in >= 0x20) && (*in < '-')) || 4618 (*in == 0x09)) { 4619 in++; 4620 ccol++; 4621 } 4622 ctxt->input->col = ccol; 4623 if (*in == 0xA) { 4624 do { 4625 ctxt->input->line++; ctxt->input->col = 1; 4626 in++; 4627 } while (*in == 0xA); 4628 goto get_more; 4629 } 4630 nbchar = in - ctxt->input->cur; 4631 /* 4632 * save current set of data 4633 */ 4634 if (nbchar > 0) { 4635 if ((ctxt->sax != NULL) && 4636 (ctxt->sax->comment != NULL)) { 4637 if (buf == NULL) { 4638 if ((*in == '-') && (in[1] == '-')) 4639 size = nbchar + 1; 4640 else 4641 size = XML_PARSER_BUFFER_SIZE + nbchar; 4642 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4643 if (buf == NULL) { 4644 xmlErrMemory(ctxt, NULL); 4645 ctxt->instate = state; 4646 return; 4647 } 4648 len = 0; 4649 } else if (len + nbchar + 1 >= size) { 4650 xmlChar *new_buf; 4651 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4652 new_buf = (xmlChar *) xmlRealloc(buf, 4653 size * sizeof(xmlChar)); 4654 if (new_buf == NULL) { 4655 xmlFree (buf); 4656 xmlErrMemory(ctxt, NULL); 4657 ctxt->instate = state; 4658 return; 4659 } 4660 buf = new_buf; 4661 } 4662 memcpy(&buf[len], ctxt->input->cur, nbchar); 4663 len += nbchar; 4664 buf[len] = 0; 4665 } 4666 } 4667 ctxt->input->cur = in; 4668 if (*in == 0xA) { 4669 in++; 4670 ctxt->input->line++; ctxt->input->col = 1; 4671 } 4672 if (*in == 0xD) { 4673 in++; 4674 if (*in == 0xA) { 4675 ctxt->input->cur = in; 4676 in++; 4677 ctxt->input->line++; ctxt->input->col = 1; 4678 continue; /* while */ 4679 } 4680 in--; 4681 } 4682 SHRINK; 4683 GROW; 4684 if (ctxt->instate == XML_PARSER_EOF) { 4685 xmlFree(buf); 4686 return; 4687 } 4688 in = ctxt->input->cur; 4689 if (*in == '-') { 4690 if (in[1] == '-') { 4691 if (in[2] == '>') { 4692 if (ctxt->input->id != inputid) { 4693 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4694 "comment doesn't start and stop in the same entity\n"); 4695 } 4696 SKIP(3); 4697 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4698 (!ctxt->disableSAX)) { 4699 if (buf != NULL) 4700 ctxt->sax->comment(ctxt->userData, buf); 4701 else 4702 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4703 } 4704 if (buf != NULL) 4705 xmlFree(buf); 4706 if (ctxt->instate != XML_PARSER_EOF) 4707 ctxt->instate = state; 4708 return; 4709 } 4710 if (buf != NULL) 4711 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4712 "Comment not terminated \n<!--%.50s\n", 4713 buf); 4714 else 4715 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4716 "Comment not terminated \n", NULL); 4717 in++; 4718 ctxt->input->col++; 4719 } 4720 in++; 4721 ctxt->input->col++; 4722 goto get_more; 4723 } 4724 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4725 xmlParseCommentComplex(ctxt, buf, len, size); 4726 ctxt->instate = state; 4727 return; 4728 } 4729 4730 4731 /** 4732 * xmlParsePITarget: 4733 * @ctxt: an XML parser context 4734 * 4735 * parse the name of a PI 4736 * 4737 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4738 * 4739 * Returns the PITarget name or NULL 4740 */ 4741 4742 const xmlChar * 4743 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4744 const xmlChar *name; 4745 4746 name = xmlParseName(ctxt); 4747 if ((name != NULL) && 4748 ((name[0] == 'x') || (name[0] == 'X')) && 4749 ((name[1] == 'm') || (name[1] == 'M')) && 4750 ((name[2] == 'l') || (name[2] == 'L'))) { 4751 int i; 4752 if ((name[0] == 'x') && (name[1] == 'm') && 4753 (name[2] == 'l') && (name[3] == 0)) { 4754 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4755 "XML declaration allowed only at the start of the document\n"); 4756 return(name); 4757 } else if (name[3] == 0) { 4758 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4759 return(name); 4760 } 4761 for (i = 0;;i++) { 4762 if (xmlW3CPIs[i] == NULL) break; 4763 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4764 return(name); 4765 } 4766 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4767 "xmlParsePITarget: invalid name prefix 'xml'\n", 4768 NULL, NULL); 4769 } 4770 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4771 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4772 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4773 } 4774 return(name); 4775 } 4776 4777 #ifdef LIBXML_CATALOG_ENABLED 4778 /** 4779 * xmlParseCatalogPI: 4780 * @ctxt: an XML parser context 4781 * @catalog: the PI value string 4782 * 4783 * parse an XML Catalog Processing Instruction. 4784 * 4785 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4786 * 4787 * Occurs only if allowed by the user and if happening in the Misc 4788 * part of the document before any doctype informations 4789 * This will add the given catalog to the parsing context in order 4790 * to be used if there is a resolution need further down in the document 4791 */ 4792 4793 static void 4794 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4795 xmlChar *URL = NULL; 4796 const xmlChar *tmp, *base; 4797 xmlChar marker; 4798 4799 tmp = catalog; 4800 while (IS_BLANK_CH(*tmp)) tmp++; 4801 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4802 goto error; 4803 tmp += 7; 4804 while (IS_BLANK_CH(*tmp)) tmp++; 4805 if (*tmp != '=') { 4806 return; 4807 } 4808 tmp++; 4809 while (IS_BLANK_CH(*tmp)) tmp++; 4810 marker = *tmp; 4811 if ((marker != '\'') && (marker != '"')) 4812 goto error; 4813 tmp++; 4814 base = tmp; 4815 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4816 if (*tmp == 0) 4817 goto error; 4818 URL = xmlStrndup(base, tmp - base); 4819 tmp++; 4820 while (IS_BLANK_CH(*tmp)) tmp++; 4821 if (*tmp != 0) 4822 goto error; 4823 4824 if (URL != NULL) { 4825 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4826 xmlFree(URL); 4827 } 4828 return; 4829 4830 error: 4831 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4832 "Catalog PI syntax error: %s\n", 4833 catalog, NULL); 4834 if (URL != NULL) 4835 xmlFree(URL); 4836 } 4837 #endif 4838 4839 /** 4840 * xmlParsePI: 4841 * @ctxt: an XML parser context 4842 * 4843 * parse an XML Processing Instruction. 4844 * 4845 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4846 * 4847 * The processing is transfered to SAX once parsed. 4848 */ 4849 4850 void 4851 xmlParsePI(xmlParserCtxtPtr ctxt) { 4852 xmlChar *buf = NULL; 4853 int len = 0; 4854 int size = XML_PARSER_BUFFER_SIZE; 4855 int cur, l; 4856 const xmlChar *target; 4857 xmlParserInputState state; 4858 int count = 0; 4859 4860 if ((RAW == '<') && (NXT(1) == '?')) { 4861 xmlParserInputPtr input = ctxt->input; 4862 state = ctxt->instate; 4863 ctxt->instate = XML_PARSER_PI; 4864 /* 4865 * this is a Processing Instruction. 4866 */ 4867 SKIP(2); 4868 SHRINK; 4869 4870 /* 4871 * Parse the target name and check for special support like 4872 * namespace. 4873 */ 4874 target = xmlParsePITarget(ctxt); 4875 if (target != NULL) { 4876 if ((RAW == '?') && (NXT(1) == '>')) { 4877 if (input != ctxt->input) { 4878 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4879 "PI declaration doesn't start and stop in the same entity\n"); 4880 } 4881 SKIP(2); 4882 4883 /* 4884 * SAX: PI detected. 4885 */ 4886 if ((ctxt->sax) && (!ctxt->disableSAX) && 4887 (ctxt->sax->processingInstruction != NULL)) 4888 ctxt->sax->processingInstruction(ctxt->userData, 4889 target, NULL); 4890 if (ctxt->instate != XML_PARSER_EOF) 4891 ctxt->instate = state; 4892 return; 4893 } 4894 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4895 if (buf == NULL) { 4896 xmlErrMemory(ctxt, NULL); 4897 ctxt->instate = state; 4898 return; 4899 } 4900 cur = CUR; 4901 if (!IS_BLANK(cur)) { 4902 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4903 "ParsePI: PI %s space expected\n", target); 4904 } 4905 SKIP_BLANKS; 4906 cur = CUR_CHAR(l); 4907 while (IS_CHAR(cur) && /* checked */ 4908 ((cur != '?') || (NXT(1) != '>'))) { 4909 if (len + 5 >= size) { 4910 xmlChar *tmp; 4911 4912 size *= 2; 4913 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4914 if (tmp == NULL) { 4915 xmlErrMemory(ctxt, NULL); 4916 xmlFree(buf); 4917 ctxt->instate = state; 4918 return; 4919 } 4920 buf = tmp; 4921 } 4922 count++; 4923 if (count > 50) { 4924 GROW; 4925 if (ctxt->instate == XML_PARSER_EOF) { 4926 xmlFree(buf); 4927 return; 4928 } 4929 count = 0; 4930 } 4931 COPY_BUF(l,buf,len,cur); 4932 NEXTL(l); 4933 cur = CUR_CHAR(l); 4934 if (cur == 0) { 4935 SHRINK; 4936 GROW; 4937 cur = CUR_CHAR(l); 4938 } 4939 } 4940 buf[len] = 0; 4941 if (cur != '?') { 4942 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4943 "ParsePI: PI %s never end ...\n", target); 4944 } else { 4945 if (input != ctxt->input) { 4946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4947 "PI declaration doesn't start and stop in the same entity\n"); 4948 } 4949 SKIP(2); 4950 4951 #ifdef LIBXML_CATALOG_ENABLED 4952 if (((state == XML_PARSER_MISC) || 4953 (state == XML_PARSER_START)) && 4954 (xmlStrEqual(target, XML_CATALOG_PI))) { 4955 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4956 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4957 (allow == XML_CATA_ALLOW_ALL)) 4958 xmlParseCatalogPI(ctxt, buf); 4959 } 4960 #endif 4961 4962 4963 /* 4964 * SAX: PI detected. 4965 */ 4966 if ((ctxt->sax) && (!ctxt->disableSAX) && 4967 (ctxt->sax->processingInstruction != NULL)) 4968 ctxt->sax->processingInstruction(ctxt->userData, 4969 target, buf); 4970 } 4971 xmlFree(buf); 4972 } else { 4973 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4974 } 4975 if (ctxt->instate != XML_PARSER_EOF) 4976 ctxt->instate = state; 4977 } 4978 } 4979 4980 /** 4981 * xmlParseNotationDecl: 4982 * @ctxt: an XML parser context 4983 * 4984 * parse a notation declaration 4985 * 4986 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4987 * 4988 * Hence there is actually 3 choices: 4989 * 'PUBLIC' S PubidLiteral 4990 * 'PUBLIC' S PubidLiteral S SystemLiteral 4991 * and 'SYSTEM' S SystemLiteral 4992 * 4993 * See the NOTE on xmlParseExternalID(). 4994 */ 4995 4996 void 4997 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4998 const xmlChar *name; 4999 xmlChar *Pubid; 5000 xmlChar *Systemid; 5001 5002 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5003 xmlParserInputPtr input = ctxt->input; 5004 SHRINK; 5005 SKIP(10); 5006 if (!IS_BLANK_CH(CUR)) { 5007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5008 "Space required after '<!NOTATION'\n"); 5009 return; 5010 } 5011 SKIP_BLANKS; 5012 5013 name = xmlParseName(ctxt); 5014 if (name == NULL) { 5015 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5016 return; 5017 } 5018 if (!IS_BLANK_CH(CUR)) { 5019 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5020 "Space required after the NOTATION name'\n"); 5021 return; 5022 } 5023 if (xmlStrchr(name, ':') != NULL) { 5024 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5025 "colon are forbidden from notation names '%s'\n", 5026 name, NULL, NULL); 5027 } 5028 SKIP_BLANKS; 5029 5030 /* 5031 * Parse the IDs. 5032 */ 5033 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5034 SKIP_BLANKS; 5035 5036 if (RAW == '>') { 5037 if (input != ctxt->input) { 5038 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5039 "Notation declaration doesn't start and stop in the same entity\n"); 5040 } 5041 NEXT; 5042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5043 (ctxt->sax->notationDecl != NULL)) 5044 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5045 } else { 5046 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5047 } 5048 if (Systemid != NULL) xmlFree(Systemid); 5049 if (Pubid != NULL) xmlFree(Pubid); 5050 } 5051 } 5052 5053 /** 5054 * xmlParseEntityDecl: 5055 * @ctxt: an XML parser context 5056 * 5057 * parse <!ENTITY declarations 5058 * 5059 * [70] EntityDecl ::= GEDecl | PEDecl 5060 * 5061 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5062 * 5063 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5064 * 5065 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5066 * 5067 * [74] PEDef ::= EntityValue | ExternalID 5068 * 5069 * [76] NDataDecl ::= S 'NDATA' S Name 5070 * 5071 * [ VC: Notation Declared ] 5072 * The Name must match the declared name of a notation. 5073 */ 5074 5075 void 5076 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5077 const xmlChar *name = NULL; 5078 xmlChar *value = NULL; 5079 xmlChar *URI = NULL, *literal = NULL; 5080 const xmlChar *ndata = NULL; 5081 int isParameter = 0; 5082 xmlChar *orig = NULL; 5083 int skipped; 5084 5085 /* GROW; done in the caller */ 5086 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5087 xmlParserInputPtr input = ctxt->input; 5088 SHRINK; 5089 SKIP(8); 5090 skipped = SKIP_BLANKS; 5091 if (skipped == 0) { 5092 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5093 "Space required after '<!ENTITY'\n"); 5094 } 5095 5096 if (RAW == '%') { 5097 NEXT; 5098 skipped = SKIP_BLANKS; 5099 if (skipped == 0) { 5100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5101 "Space required after '%'\n"); 5102 } 5103 isParameter = 1; 5104 } 5105 5106 name = xmlParseName(ctxt); 5107 if (name == NULL) { 5108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5109 "xmlParseEntityDecl: no name\n"); 5110 return; 5111 } 5112 if (xmlStrchr(name, ':') != NULL) { 5113 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5114 "colon are forbidden from entities names '%s'\n", 5115 name, NULL, NULL); 5116 } 5117 skipped = SKIP_BLANKS; 5118 if (skipped == 0) { 5119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5120 "Space required after the entity name\n"); 5121 } 5122 5123 ctxt->instate = XML_PARSER_ENTITY_DECL; 5124 /* 5125 * handle the various case of definitions... 5126 */ 5127 if (isParameter) { 5128 if ((RAW == '"') || (RAW == '\'')) { 5129 value = xmlParseEntityValue(ctxt, &orig); 5130 if (value) { 5131 if ((ctxt->sax != NULL) && 5132 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5133 ctxt->sax->entityDecl(ctxt->userData, name, 5134 XML_INTERNAL_PARAMETER_ENTITY, 5135 NULL, NULL, value); 5136 } 5137 } else { 5138 URI = xmlParseExternalID(ctxt, &literal, 1); 5139 if ((URI == NULL) && (literal == NULL)) { 5140 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5141 } 5142 if (URI) { 5143 xmlURIPtr uri; 5144 5145 uri = xmlParseURI((const char *) URI); 5146 if (uri == NULL) { 5147 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5148 "Invalid URI: %s\n", URI); 5149 /* 5150 * This really ought to be a well formedness error 5151 * but the XML Core WG decided otherwise c.f. issue 5152 * E26 of the XML erratas. 5153 */ 5154 } else { 5155 if (uri->fragment != NULL) { 5156 /* 5157 * Okay this is foolish to block those but not 5158 * invalid URIs. 5159 */ 5160 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5161 } else { 5162 if ((ctxt->sax != NULL) && 5163 (!ctxt->disableSAX) && 5164 (ctxt->sax->entityDecl != NULL)) 5165 ctxt->sax->entityDecl(ctxt->userData, name, 5166 XML_EXTERNAL_PARAMETER_ENTITY, 5167 literal, URI, NULL); 5168 } 5169 xmlFreeURI(uri); 5170 } 5171 } 5172 } 5173 } else { 5174 if ((RAW == '"') || (RAW == '\'')) { 5175 value = xmlParseEntityValue(ctxt, &orig); 5176 if ((ctxt->sax != NULL) && 5177 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5178 ctxt->sax->entityDecl(ctxt->userData, name, 5179 XML_INTERNAL_GENERAL_ENTITY, 5180 NULL, NULL, value); 5181 /* 5182 * For expat compatibility in SAX mode. 5183 */ 5184 if ((ctxt->myDoc == NULL) || 5185 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5186 if (ctxt->myDoc == NULL) { 5187 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5188 if (ctxt->myDoc == NULL) { 5189 xmlErrMemory(ctxt, "New Doc failed"); 5190 return; 5191 } 5192 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5193 } 5194 if (ctxt->myDoc->intSubset == NULL) 5195 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5196 BAD_CAST "fake", NULL, NULL); 5197 5198 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5199 NULL, NULL, value); 5200 } 5201 } else { 5202 URI = xmlParseExternalID(ctxt, &literal, 1); 5203 if ((URI == NULL) && (literal == NULL)) { 5204 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5205 } 5206 if (URI) { 5207 xmlURIPtr uri; 5208 5209 uri = xmlParseURI((const char *)URI); 5210 if (uri == NULL) { 5211 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5212 "Invalid URI: %s\n", URI); 5213 /* 5214 * This really ought to be a well formedness error 5215 * but the XML Core WG decided otherwise c.f. issue 5216 * E26 of the XML erratas. 5217 */ 5218 } else { 5219 if (uri->fragment != NULL) { 5220 /* 5221 * Okay this is foolish to block those but not 5222 * invalid URIs. 5223 */ 5224 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5225 } 5226 xmlFreeURI(uri); 5227 } 5228 } 5229 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5230 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5231 "Space required before 'NDATA'\n"); 5232 } 5233 SKIP_BLANKS; 5234 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5235 SKIP(5); 5236 if (!IS_BLANK_CH(CUR)) { 5237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5238 "Space required after 'NDATA'\n"); 5239 } 5240 SKIP_BLANKS; 5241 ndata = xmlParseName(ctxt); 5242 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5243 (ctxt->sax->unparsedEntityDecl != NULL)) 5244 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5245 literal, URI, ndata); 5246 } else { 5247 if ((ctxt->sax != NULL) && 5248 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5249 ctxt->sax->entityDecl(ctxt->userData, name, 5250 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5251 literal, URI, NULL); 5252 /* 5253 * For expat compatibility in SAX mode. 5254 * assuming the entity repalcement was asked for 5255 */ 5256 if ((ctxt->replaceEntities != 0) && 5257 ((ctxt->myDoc == NULL) || 5258 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5259 if (ctxt->myDoc == NULL) { 5260 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5261 if (ctxt->myDoc == NULL) { 5262 xmlErrMemory(ctxt, "New Doc failed"); 5263 return; 5264 } 5265 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5266 } 5267 5268 if (ctxt->myDoc->intSubset == NULL) 5269 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5270 BAD_CAST "fake", NULL, NULL); 5271 xmlSAX2EntityDecl(ctxt, name, 5272 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5273 literal, URI, NULL); 5274 } 5275 } 5276 } 5277 } 5278 if (ctxt->instate == XML_PARSER_EOF) 5279 return; 5280 SKIP_BLANKS; 5281 if (RAW != '>') { 5282 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5283 "xmlParseEntityDecl: entity %s not terminated\n", name); 5284 } else { 5285 if (input != ctxt->input) { 5286 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5287 "Entity declaration doesn't start and stop in the same entity\n"); 5288 } 5289 NEXT; 5290 } 5291 if (orig != NULL) { 5292 /* 5293 * Ugly mechanism to save the raw entity value. 5294 */ 5295 xmlEntityPtr cur = NULL; 5296 5297 if (isParameter) { 5298 if ((ctxt->sax != NULL) && 5299 (ctxt->sax->getParameterEntity != NULL)) 5300 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5301 } else { 5302 if ((ctxt->sax != NULL) && 5303 (ctxt->sax->getEntity != NULL)) 5304 cur = ctxt->sax->getEntity(ctxt->userData, name); 5305 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5306 cur = xmlSAX2GetEntity(ctxt, name); 5307 } 5308 } 5309 if (cur != NULL) { 5310 if (cur->orig != NULL) 5311 xmlFree(orig); 5312 else 5313 cur->orig = orig; 5314 } else 5315 xmlFree(orig); 5316 } 5317 if (value != NULL) xmlFree(value); 5318 if (URI != NULL) xmlFree(URI); 5319 if (literal != NULL) xmlFree(literal); 5320 } 5321 } 5322 5323 /** 5324 * xmlParseDefaultDecl: 5325 * @ctxt: an XML parser context 5326 * @value: Receive a possible fixed default value for the attribute 5327 * 5328 * Parse an attribute default declaration 5329 * 5330 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5331 * 5332 * [ VC: Required Attribute ] 5333 * if the default declaration is the keyword #REQUIRED, then the 5334 * attribute must be specified for all elements of the type in the 5335 * attribute-list declaration. 5336 * 5337 * [ VC: Attribute Default Legal ] 5338 * The declared default value must meet the lexical constraints of 5339 * the declared attribute type c.f. xmlValidateAttributeDecl() 5340 * 5341 * [ VC: Fixed Attribute Default ] 5342 * if an attribute has a default value declared with the #FIXED 5343 * keyword, instances of that attribute must match the default value. 5344 * 5345 * [ WFC: No < in Attribute Values ] 5346 * handled in xmlParseAttValue() 5347 * 5348 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5349 * or XML_ATTRIBUTE_FIXED. 5350 */ 5351 5352 int 5353 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5354 int val; 5355 xmlChar *ret; 5356 5357 *value = NULL; 5358 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5359 SKIP(9); 5360 return(XML_ATTRIBUTE_REQUIRED); 5361 } 5362 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5363 SKIP(8); 5364 return(XML_ATTRIBUTE_IMPLIED); 5365 } 5366 val = XML_ATTRIBUTE_NONE; 5367 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5368 SKIP(6); 5369 val = XML_ATTRIBUTE_FIXED; 5370 if (!IS_BLANK_CH(CUR)) { 5371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5372 "Space required after '#FIXED'\n"); 5373 } 5374 SKIP_BLANKS; 5375 } 5376 ret = xmlParseAttValue(ctxt); 5377 ctxt->instate = XML_PARSER_DTD; 5378 if (ret == NULL) { 5379 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5380 "Attribute default value declaration error\n"); 5381 } else 5382 *value = ret; 5383 return(val); 5384 } 5385 5386 /** 5387 * xmlParseNotationType: 5388 * @ctxt: an XML parser context 5389 * 5390 * parse an Notation attribute type. 5391 * 5392 * Note: the leading 'NOTATION' S part has already being parsed... 5393 * 5394 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5395 * 5396 * [ VC: Notation Attributes ] 5397 * Values of this type must match one of the notation names included 5398 * in the declaration; all notation names in the declaration must be declared. 5399 * 5400 * Returns: the notation attribute tree built while parsing 5401 */ 5402 5403 xmlEnumerationPtr 5404 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5405 const xmlChar *name; 5406 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5407 5408 if (RAW != '(') { 5409 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5410 return(NULL); 5411 } 5412 SHRINK; 5413 do { 5414 NEXT; 5415 SKIP_BLANKS; 5416 name = xmlParseName(ctxt); 5417 if (name == NULL) { 5418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5419 "Name expected in NOTATION declaration\n"); 5420 xmlFreeEnumeration(ret); 5421 return(NULL); 5422 } 5423 tmp = ret; 5424 while (tmp != NULL) { 5425 if (xmlStrEqual(name, tmp->name)) { 5426 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5427 "standalone: attribute notation value token %s duplicated\n", 5428 name, NULL); 5429 if (!xmlDictOwns(ctxt->dict, name)) 5430 xmlFree((xmlChar *) name); 5431 break; 5432 } 5433 tmp = tmp->next; 5434 } 5435 if (tmp == NULL) { 5436 cur = xmlCreateEnumeration(name); 5437 if (cur == NULL) { 5438 xmlFreeEnumeration(ret); 5439 return(NULL); 5440 } 5441 if (last == NULL) ret = last = cur; 5442 else { 5443 last->next = cur; 5444 last = cur; 5445 } 5446 } 5447 SKIP_BLANKS; 5448 } while (RAW == '|'); 5449 if (RAW != ')') { 5450 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5451 xmlFreeEnumeration(ret); 5452 return(NULL); 5453 } 5454 NEXT; 5455 return(ret); 5456 } 5457 5458 /** 5459 * xmlParseEnumerationType: 5460 * @ctxt: an XML parser context 5461 * 5462 * parse an Enumeration attribute type. 5463 * 5464 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5465 * 5466 * [ VC: Enumeration ] 5467 * Values of this type must match one of the Nmtoken tokens in 5468 * the declaration 5469 * 5470 * Returns: the enumeration attribute tree built while parsing 5471 */ 5472 5473 xmlEnumerationPtr 5474 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5475 xmlChar *name; 5476 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5477 5478 if (RAW != '(') { 5479 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5480 return(NULL); 5481 } 5482 SHRINK; 5483 do { 5484 NEXT; 5485 SKIP_BLANKS; 5486 name = xmlParseNmtoken(ctxt); 5487 if (name == NULL) { 5488 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5489 return(ret); 5490 } 5491 tmp = ret; 5492 while (tmp != NULL) { 5493 if (xmlStrEqual(name, tmp->name)) { 5494 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5495 "standalone: attribute enumeration value token %s duplicated\n", 5496 name, NULL); 5497 if (!xmlDictOwns(ctxt->dict, name)) 5498 xmlFree(name); 5499 break; 5500 } 5501 tmp = tmp->next; 5502 } 5503 if (tmp == NULL) { 5504 cur = xmlCreateEnumeration(name); 5505 if (!xmlDictOwns(ctxt->dict, name)) 5506 xmlFree(name); 5507 if (cur == NULL) { 5508 xmlFreeEnumeration(ret); 5509 return(NULL); 5510 } 5511 if (last == NULL) ret = last = cur; 5512 else { 5513 last->next = cur; 5514 last = cur; 5515 } 5516 } 5517 SKIP_BLANKS; 5518 } while (RAW == '|'); 5519 if (RAW != ')') { 5520 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5521 return(ret); 5522 } 5523 NEXT; 5524 return(ret); 5525 } 5526 5527 /** 5528 * xmlParseEnumeratedType: 5529 * @ctxt: an XML parser context 5530 * @tree: the enumeration tree built while parsing 5531 * 5532 * parse an Enumerated attribute type. 5533 * 5534 * [57] EnumeratedType ::= NotationType | Enumeration 5535 * 5536 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5537 * 5538 * 5539 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5540 */ 5541 5542 int 5543 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5544 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5545 SKIP(8); 5546 if (!IS_BLANK_CH(CUR)) { 5547 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5548 "Space required after 'NOTATION'\n"); 5549 return(0); 5550 } 5551 SKIP_BLANKS; 5552 *tree = xmlParseNotationType(ctxt); 5553 if (*tree == NULL) return(0); 5554 return(XML_ATTRIBUTE_NOTATION); 5555 } 5556 *tree = xmlParseEnumerationType(ctxt); 5557 if (*tree == NULL) return(0); 5558 return(XML_ATTRIBUTE_ENUMERATION); 5559 } 5560 5561 /** 5562 * xmlParseAttributeType: 5563 * @ctxt: an XML parser context 5564 * @tree: the enumeration tree built while parsing 5565 * 5566 * parse the Attribute list def for an element 5567 * 5568 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5569 * 5570 * [55] StringType ::= 'CDATA' 5571 * 5572 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5573 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5574 * 5575 * Validity constraints for attribute values syntax are checked in 5576 * xmlValidateAttributeValue() 5577 * 5578 * [ VC: ID ] 5579 * Values of type ID must match the Name production. A name must not 5580 * appear more than once in an XML document as a value of this type; 5581 * i.e., ID values must uniquely identify the elements which bear them. 5582 * 5583 * [ VC: One ID per Element Type ] 5584 * No element type may have more than one ID attribute specified. 5585 * 5586 * [ VC: ID Attribute Default ] 5587 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5588 * 5589 * [ VC: IDREF ] 5590 * Values of type IDREF must match the Name production, and values 5591 * of type IDREFS must match Names; each IDREF Name must match the value 5592 * of an ID attribute on some element in the XML document; i.e. IDREF 5593 * values must match the value of some ID attribute. 5594 * 5595 * [ VC: Entity Name ] 5596 * Values of type ENTITY must match the Name production, values 5597 * of type ENTITIES must match Names; each Entity Name must match the 5598 * name of an unparsed entity declared in the DTD. 5599 * 5600 * [ VC: Name Token ] 5601 * Values of type NMTOKEN must match the Nmtoken production; values 5602 * of type NMTOKENS must match Nmtokens. 5603 * 5604 * Returns the attribute type 5605 */ 5606 int 5607 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5608 SHRINK; 5609 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5610 SKIP(5); 5611 return(XML_ATTRIBUTE_CDATA); 5612 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5613 SKIP(6); 5614 return(XML_ATTRIBUTE_IDREFS); 5615 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5616 SKIP(5); 5617 return(XML_ATTRIBUTE_IDREF); 5618 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5619 SKIP(2); 5620 return(XML_ATTRIBUTE_ID); 5621 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5622 SKIP(6); 5623 return(XML_ATTRIBUTE_ENTITY); 5624 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5625 SKIP(8); 5626 return(XML_ATTRIBUTE_ENTITIES); 5627 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5628 SKIP(8); 5629 return(XML_ATTRIBUTE_NMTOKENS); 5630 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5631 SKIP(7); 5632 return(XML_ATTRIBUTE_NMTOKEN); 5633 } 5634 return(xmlParseEnumeratedType(ctxt, tree)); 5635 } 5636 5637 /** 5638 * xmlParseAttributeListDecl: 5639 * @ctxt: an XML parser context 5640 * 5641 * : parse the Attribute list def for an element 5642 * 5643 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5644 * 5645 * [53] AttDef ::= S Name S AttType S DefaultDecl 5646 * 5647 */ 5648 void 5649 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5650 const xmlChar *elemName; 5651 const xmlChar *attrName; 5652 xmlEnumerationPtr tree; 5653 5654 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5655 xmlParserInputPtr input = ctxt->input; 5656 5657 SKIP(9); 5658 if (!IS_BLANK_CH(CUR)) { 5659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5660 "Space required after '<!ATTLIST'\n"); 5661 } 5662 SKIP_BLANKS; 5663 elemName = xmlParseName(ctxt); 5664 if (elemName == NULL) { 5665 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5666 "ATTLIST: no name for Element\n"); 5667 return; 5668 } 5669 SKIP_BLANKS; 5670 GROW; 5671 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5672 const xmlChar *check = CUR_PTR; 5673 int type; 5674 int def; 5675 xmlChar *defaultValue = NULL; 5676 5677 GROW; 5678 tree = NULL; 5679 attrName = xmlParseName(ctxt); 5680 if (attrName == NULL) { 5681 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5682 "ATTLIST: no name for Attribute\n"); 5683 break; 5684 } 5685 GROW; 5686 if (!IS_BLANK_CH(CUR)) { 5687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5688 "Space required after the attribute name\n"); 5689 break; 5690 } 5691 SKIP_BLANKS; 5692 5693 type = xmlParseAttributeType(ctxt, &tree); 5694 if (type <= 0) { 5695 break; 5696 } 5697 5698 GROW; 5699 if (!IS_BLANK_CH(CUR)) { 5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5701 "Space required after the attribute type\n"); 5702 if (tree != NULL) 5703 xmlFreeEnumeration(tree); 5704 break; 5705 } 5706 SKIP_BLANKS; 5707 5708 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5709 if (def <= 0) { 5710 if (defaultValue != NULL) 5711 xmlFree(defaultValue); 5712 if (tree != NULL) 5713 xmlFreeEnumeration(tree); 5714 break; 5715 } 5716 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5717 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5718 5719 GROW; 5720 if (RAW != '>') { 5721 if (!IS_BLANK_CH(CUR)) { 5722 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5723 "Space required after the attribute default value\n"); 5724 if (defaultValue != NULL) 5725 xmlFree(defaultValue); 5726 if (tree != NULL) 5727 xmlFreeEnumeration(tree); 5728 break; 5729 } 5730 SKIP_BLANKS; 5731 } 5732 if (check == CUR_PTR) { 5733 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5734 "in xmlParseAttributeListDecl\n"); 5735 if (defaultValue != NULL) 5736 xmlFree(defaultValue); 5737 if (tree != NULL) 5738 xmlFreeEnumeration(tree); 5739 break; 5740 } 5741 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5742 (ctxt->sax->attributeDecl != NULL)) 5743 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5744 type, def, defaultValue, tree); 5745 else if (tree != NULL) 5746 xmlFreeEnumeration(tree); 5747 5748 if ((ctxt->sax2) && (defaultValue != NULL) && 5749 (def != XML_ATTRIBUTE_IMPLIED) && 5750 (def != XML_ATTRIBUTE_REQUIRED)) { 5751 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5752 } 5753 if (ctxt->sax2) { 5754 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5755 } 5756 if (defaultValue != NULL) 5757 xmlFree(defaultValue); 5758 GROW; 5759 } 5760 if (RAW == '>') { 5761 if (input != ctxt->input) { 5762 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5763 "Attribute list declaration doesn't start and stop in the same entity\n", 5764 NULL, NULL); 5765 } 5766 NEXT; 5767 } 5768 } 5769 } 5770 5771 /** 5772 * xmlParseElementMixedContentDecl: 5773 * @ctxt: an XML parser context 5774 * @inputchk: the input used for the current entity, needed for boundary checks 5775 * 5776 * parse the declaration for a Mixed Element content 5777 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5778 * 5779 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5780 * '(' S? '#PCDATA' S? ')' 5781 * 5782 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5783 * 5784 * [ VC: No Duplicate Types ] 5785 * The same name must not appear more than once in a single 5786 * mixed-content declaration. 5787 * 5788 * returns: the list of the xmlElementContentPtr describing the element choices 5789 */ 5790 xmlElementContentPtr 5791 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5792 xmlElementContentPtr ret = NULL, cur = NULL, n; 5793 const xmlChar *elem = NULL; 5794 5795 GROW; 5796 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5797 SKIP(7); 5798 SKIP_BLANKS; 5799 SHRINK; 5800 if (RAW == ')') { 5801 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5802 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5803 "Element content declaration doesn't start and stop in the same entity\n", 5804 NULL, NULL); 5805 } 5806 NEXT; 5807 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5808 if (ret == NULL) 5809 return(NULL); 5810 if (RAW == '*') { 5811 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5812 NEXT; 5813 } 5814 return(ret); 5815 } 5816 if ((RAW == '(') || (RAW == '|')) { 5817 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5818 if (ret == NULL) return(NULL); 5819 } 5820 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 5821 NEXT; 5822 if (elem == NULL) { 5823 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5824 if (ret == NULL) return(NULL); 5825 ret->c1 = cur; 5826 if (cur != NULL) 5827 cur->parent = ret; 5828 cur = ret; 5829 } else { 5830 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5831 if (n == NULL) return(NULL); 5832 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5833 if (n->c1 != NULL) 5834 n->c1->parent = n; 5835 cur->c2 = n; 5836 if (n != NULL) 5837 n->parent = cur; 5838 cur = n; 5839 } 5840 SKIP_BLANKS; 5841 elem = xmlParseName(ctxt); 5842 if (elem == NULL) { 5843 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5844 "xmlParseElementMixedContentDecl : Name expected\n"); 5845 xmlFreeDocElementContent(ctxt->myDoc, cur); 5846 return(NULL); 5847 } 5848 SKIP_BLANKS; 5849 GROW; 5850 } 5851 if ((RAW == ')') && (NXT(1) == '*')) { 5852 if (elem != NULL) { 5853 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5854 XML_ELEMENT_CONTENT_ELEMENT); 5855 if (cur->c2 != NULL) 5856 cur->c2->parent = cur; 5857 } 5858 if (ret != NULL) 5859 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5860 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5861 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5862 "Element content declaration doesn't start and stop in the same entity\n", 5863 NULL, NULL); 5864 } 5865 SKIP(2); 5866 } else { 5867 xmlFreeDocElementContent(ctxt->myDoc, ret); 5868 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5869 return(NULL); 5870 } 5871 5872 } else { 5873 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5874 } 5875 return(ret); 5876 } 5877 5878 /** 5879 * xmlParseElementChildrenContentDeclPriv: 5880 * @ctxt: an XML parser context 5881 * @inputchk: the input used for the current entity, needed for boundary checks 5882 * @depth: the level of recursion 5883 * 5884 * parse the declaration for a Mixed Element content 5885 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5886 * 5887 * 5888 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5889 * 5890 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5891 * 5892 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5893 * 5894 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5895 * 5896 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5897 * TODO Parameter-entity replacement text must be properly nested 5898 * with parenthesized groups. That is to say, if either of the 5899 * opening or closing parentheses in a choice, seq, or Mixed 5900 * construct is contained in the replacement text for a parameter 5901 * entity, both must be contained in the same replacement text. For 5902 * interoperability, if a parameter-entity reference appears in a 5903 * choice, seq, or Mixed construct, its replacement text should not 5904 * be empty, and neither the first nor last non-blank character of 5905 * the replacement text should be a connector (| or ,). 5906 * 5907 * Returns the tree of xmlElementContentPtr describing the element 5908 * hierarchy. 5909 */ 5910 static xmlElementContentPtr 5911 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 5912 int depth) { 5913 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5914 const xmlChar *elem; 5915 xmlChar type = 0; 5916 5917 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 5918 (depth > 2048)) { 5919 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 5920 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 5921 depth); 5922 return(NULL); 5923 } 5924 SKIP_BLANKS; 5925 GROW; 5926 if (RAW == '(') { 5927 int inputid = ctxt->input->id; 5928 5929 /* Recurse on first child */ 5930 NEXT; 5931 SKIP_BLANKS; 5932 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5933 depth + 1); 5934 SKIP_BLANKS; 5935 GROW; 5936 } else { 5937 elem = xmlParseName(ctxt); 5938 if (elem == NULL) { 5939 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5940 return(NULL); 5941 } 5942 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5943 if (cur == NULL) { 5944 xmlErrMemory(ctxt, NULL); 5945 return(NULL); 5946 } 5947 GROW; 5948 if (RAW == '?') { 5949 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5950 NEXT; 5951 } else if (RAW == '*') { 5952 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5953 NEXT; 5954 } else if (RAW == '+') { 5955 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5956 NEXT; 5957 } else { 5958 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5959 } 5960 GROW; 5961 } 5962 SKIP_BLANKS; 5963 SHRINK; 5964 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 5965 /* 5966 * Each loop we parse one separator and one element. 5967 */ 5968 if (RAW == ',') { 5969 if (type == 0) type = CUR; 5970 5971 /* 5972 * Detect "Name | Name , Name" error 5973 */ 5974 else if (type != CUR) { 5975 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5976 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5977 type); 5978 if ((last != NULL) && (last != ret)) 5979 xmlFreeDocElementContent(ctxt->myDoc, last); 5980 if (ret != NULL) 5981 xmlFreeDocElementContent(ctxt->myDoc, ret); 5982 return(NULL); 5983 } 5984 NEXT; 5985 5986 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5987 if (op == NULL) { 5988 if ((last != NULL) && (last != ret)) 5989 xmlFreeDocElementContent(ctxt->myDoc, last); 5990 xmlFreeDocElementContent(ctxt->myDoc, ret); 5991 return(NULL); 5992 } 5993 if (last == NULL) { 5994 op->c1 = ret; 5995 if (ret != NULL) 5996 ret->parent = op; 5997 ret = cur = op; 5998 } else { 5999 cur->c2 = op; 6000 if (op != NULL) 6001 op->parent = cur; 6002 op->c1 = last; 6003 if (last != NULL) 6004 last->parent = op; 6005 cur =op; 6006 last = NULL; 6007 } 6008 } else if (RAW == '|') { 6009 if (type == 0) type = CUR; 6010 6011 /* 6012 * Detect "Name , Name | Name" error 6013 */ 6014 else if (type != CUR) { 6015 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6016 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6017 type); 6018 if ((last != NULL) && (last != ret)) 6019 xmlFreeDocElementContent(ctxt->myDoc, last); 6020 if (ret != NULL) 6021 xmlFreeDocElementContent(ctxt->myDoc, ret); 6022 return(NULL); 6023 } 6024 NEXT; 6025 6026 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6027 if (op == NULL) { 6028 if ((last != NULL) && (last != ret)) 6029 xmlFreeDocElementContent(ctxt->myDoc, last); 6030 if (ret != NULL) 6031 xmlFreeDocElementContent(ctxt->myDoc, ret); 6032 return(NULL); 6033 } 6034 if (last == NULL) { 6035 op->c1 = ret; 6036 if (ret != NULL) 6037 ret->parent = op; 6038 ret = cur = op; 6039 } else { 6040 cur->c2 = op; 6041 if (op != NULL) 6042 op->parent = cur; 6043 op->c1 = last; 6044 if (last != NULL) 6045 last->parent = op; 6046 cur =op; 6047 last = NULL; 6048 } 6049 } else { 6050 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6051 if ((last != NULL) && (last != ret)) 6052 xmlFreeDocElementContent(ctxt->myDoc, last); 6053 if (ret != NULL) 6054 xmlFreeDocElementContent(ctxt->myDoc, ret); 6055 return(NULL); 6056 } 6057 GROW; 6058 SKIP_BLANKS; 6059 GROW; 6060 if (RAW == '(') { 6061 int inputid = ctxt->input->id; 6062 /* Recurse on second child */ 6063 NEXT; 6064 SKIP_BLANKS; 6065 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6066 depth + 1); 6067 SKIP_BLANKS; 6068 } else { 6069 elem = xmlParseName(ctxt); 6070 if (elem == NULL) { 6071 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6072 if (ret != NULL) 6073 xmlFreeDocElementContent(ctxt->myDoc, ret); 6074 return(NULL); 6075 } 6076 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6077 if (last == NULL) { 6078 if (ret != NULL) 6079 xmlFreeDocElementContent(ctxt->myDoc, ret); 6080 return(NULL); 6081 } 6082 if (RAW == '?') { 6083 last->ocur = XML_ELEMENT_CONTENT_OPT; 6084 NEXT; 6085 } else if (RAW == '*') { 6086 last->ocur = XML_ELEMENT_CONTENT_MULT; 6087 NEXT; 6088 } else if (RAW == '+') { 6089 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6090 NEXT; 6091 } else { 6092 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6093 } 6094 } 6095 SKIP_BLANKS; 6096 GROW; 6097 } 6098 if ((cur != NULL) && (last != NULL)) { 6099 cur->c2 = last; 6100 if (last != NULL) 6101 last->parent = cur; 6102 } 6103 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6104 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6105 "Element content declaration doesn't start and stop in the same entity\n", 6106 NULL, NULL); 6107 } 6108 NEXT; 6109 if (RAW == '?') { 6110 if (ret != NULL) { 6111 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6112 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6113 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6114 else 6115 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6116 } 6117 NEXT; 6118 } else if (RAW == '*') { 6119 if (ret != NULL) { 6120 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6121 cur = ret; 6122 /* 6123 * Some normalization: 6124 * (a | b* | c?)* == (a | b | c)* 6125 */ 6126 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6127 if ((cur->c1 != NULL) && 6128 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6129 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6130 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6131 if ((cur->c2 != NULL) && 6132 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6133 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6134 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6135 cur = cur->c2; 6136 } 6137 } 6138 NEXT; 6139 } else if (RAW == '+') { 6140 if (ret != NULL) { 6141 int found = 0; 6142 6143 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6144 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6145 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6146 else 6147 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6148 /* 6149 * Some normalization: 6150 * (a | b*)+ == (a | b)* 6151 * (a | b?)+ == (a | b)* 6152 */ 6153 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6154 if ((cur->c1 != NULL) && 6155 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6156 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6157 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6158 found = 1; 6159 } 6160 if ((cur->c2 != NULL) && 6161 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6162 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6163 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6164 found = 1; 6165 } 6166 cur = cur->c2; 6167 } 6168 if (found) 6169 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6170 } 6171 NEXT; 6172 } 6173 return(ret); 6174 } 6175 6176 /** 6177 * xmlParseElementChildrenContentDecl: 6178 * @ctxt: an XML parser context 6179 * @inputchk: the input used for the current entity, needed for boundary checks 6180 * 6181 * parse the declaration for a Mixed Element content 6182 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6183 * 6184 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6185 * 6186 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6187 * 6188 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6189 * 6190 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6191 * 6192 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6193 * TODO Parameter-entity replacement text must be properly nested 6194 * with parenthesized groups. That is to say, if either of the 6195 * opening or closing parentheses in a choice, seq, or Mixed 6196 * construct is contained in the replacement text for a parameter 6197 * entity, both must be contained in the same replacement text. For 6198 * interoperability, if a parameter-entity reference appears in a 6199 * choice, seq, or Mixed construct, its replacement text should not 6200 * be empty, and neither the first nor last non-blank character of 6201 * the replacement text should be a connector (| or ,). 6202 * 6203 * Returns the tree of xmlElementContentPtr describing the element 6204 * hierarchy. 6205 */ 6206 xmlElementContentPtr 6207 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6208 /* stub left for API/ABI compat */ 6209 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6210 } 6211 6212 /** 6213 * xmlParseElementContentDecl: 6214 * @ctxt: an XML parser context 6215 * @name: the name of the element being defined. 6216 * @result: the Element Content pointer will be stored here if any 6217 * 6218 * parse the declaration for an Element content either Mixed or Children, 6219 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6220 * 6221 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6222 * 6223 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6224 */ 6225 6226 int 6227 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6228 xmlElementContentPtr *result) { 6229 6230 xmlElementContentPtr tree = NULL; 6231 int inputid = ctxt->input->id; 6232 int res; 6233 6234 *result = NULL; 6235 6236 if (RAW != '(') { 6237 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6238 "xmlParseElementContentDecl : %s '(' expected\n", name); 6239 return(-1); 6240 } 6241 NEXT; 6242 GROW; 6243 if (ctxt->instate == XML_PARSER_EOF) 6244 return(-1); 6245 SKIP_BLANKS; 6246 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6247 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6248 res = XML_ELEMENT_TYPE_MIXED; 6249 } else { 6250 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6251 res = XML_ELEMENT_TYPE_ELEMENT; 6252 } 6253 SKIP_BLANKS; 6254 *result = tree; 6255 return(res); 6256 } 6257 6258 /** 6259 * xmlParseElementDecl: 6260 * @ctxt: an XML parser context 6261 * 6262 * parse an Element declaration. 6263 * 6264 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6265 * 6266 * [ VC: Unique Element Type Declaration ] 6267 * No element type may be declared more than once 6268 * 6269 * Returns the type of the element, or -1 in case of error 6270 */ 6271 int 6272 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6273 const xmlChar *name; 6274 int ret = -1; 6275 xmlElementContentPtr content = NULL; 6276 6277 /* GROW; done in the caller */ 6278 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6279 xmlParserInputPtr input = ctxt->input; 6280 6281 SKIP(9); 6282 if (!IS_BLANK_CH(CUR)) { 6283 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6284 "Space required after 'ELEMENT'\n"); 6285 } 6286 SKIP_BLANKS; 6287 name = xmlParseName(ctxt); 6288 if (name == NULL) { 6289 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6290 "xmlParseElementDecl: no name for Element\n"); 6291 return(-1); 6292 } 6293 while ((RAW == 0) && (ctxt->inputNr > 1)) 6294 xmlPopInput(ctxt); 6295 if (!IS_BLANK_CH(CUR)) { 6296 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6297 "Space required after the element name\n"); 6298 } 6299 SKIP_BLANKS; 6300 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6301 SKIP(5); 6302 /* 6303 * Element must always be empty. 6304 */ 6305 ret = XML_ELEMENT_TYPE_EMPTY; 6306 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6307 (NXT(2) == 'Y')) { 6308 SKIP(3); 6309 /* 6310 * Element is a generic container. 6311 */ 6312 ret = XML_ELEMENT_TYPE_ANY; 6313 } else if (RAW == '(') { 6314 ret = xmlParseElementContentDecl(ctxt, name, &content); 6315 } else { 6316 /* 6317 * [ WFC: PEs in Internal Subset ] error handling. 6318 */ 6319 if ((RAW == '%') && (ctxt->external == 0) && 6320 (ctxt->inputNr == 1)) { 6321 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6322 "PEReference: forbidden within markup decl in internal subset\n"); 6323 } else { 6324 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6325 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6326 } 6327 return(-1); 6328 } 6329 6330 SKIP_BLANKS; 6331 /* 6332 * Pop-up of finished entities. 6333 */ 6334 while ((RAW == 0) && (ctxt->inputNr > 1)) 6335 xmlPopInput(ctxt); 6336 SKIP_BLANKS; 6337 6338 if (RAW != '>') { 6339 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6340 if (content != NULL) { 6341 xmlFreeDocElementContent(ctxt->myDoc, content); 6342 } 6343 } else { 6344 if (input != ctxt->input) { 6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6346 "Element declaration doesn't start and stop in the same entity\n"); 6347 } 6348 6349 NEXT; 6350 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6351 (ctxt->sax->elementDecl != NULL)) { 6352 if (content != NULL) 6353 content->parent = NULL; 6354 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6355 content); 6356 if ((content != NULL) && (content->parent == NULL)) { 6357 /* 6358 * this is a trick: if xmlAddElementDecl is called, 6359 * instead of copying the full tree it is plugged directly 6360 * if called from the parser. Avoid duplicating the 6361 * interfaces or change the API/ABI 6362 */ 6363 xmlFreeDocElementContent(ctxt->myDoc, content); 6364 } 6365 } else if (content != NULL) { 6366 xmlFreeDocElementContent(ctxt->myDoc, content); 6367 } 6368 } 6369 } 6370 return(ret); 6371 } 6372 6373 /** 6374 * xmlParseConditionalSections 6375 * @ctxt: an XML parser context 6376 * 6377 * [61] conditionalSect ::= includeSect | ignoreSect 6378 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6379 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6380 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6381 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6382 */ 6383 6384 static void 6385 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6386 int id = ctxt->input->id; 6387 6388 SKIP(3); 6389 SKIP_BLANKS; 6390 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6391 SKIP(7); 6392 SKIP_BLANKS; 6393 if (RAW != '[') { 6394 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6395 } else { 6396 if (ctxt->input->id != id) { 6397 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6398 "All markup of the conditional section is not in the same entity\n", 6399 NULL, NULL); 6400 } 6401 NEXT; 6402 } 6403 if (xmlParserDebugEntities) { 6404 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6405 xmlGenericError(xmlGenericErrorContext, 6406 "%s(%d): ", ctxt->input->filename, 6407 ctxt->input->line); 6408 xmlGenericError(xmlGenericErrorContext, 6409 "Entering INCLUDE Conditional Section\n"); 6410 } 6411 6412 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6413 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6414 const xmlChar *check = CUR_PTR; 6415 unsigned int cons = ctxt->input->consumed; 6416 6417 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6418 xmlParseConditionalSections(ctxt); 6419 } else if (IS_BLANK_CH(CUR)) { 6420 NEXT; 6421 } else if (RAW == '%') { 6422 xmlParsePEReference(ctxt); 6423 } else 6424 xmlParseMarkupDecl(ctxt); 6425 6426 /* 6427 * Pop-up of finished entities. 6428 */ 6429 while ((RAW == 0) && (ctxt->inputNr > 1)) 6430 xmlPopInput(ctxt); 6431 6432 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6433 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6434 break; 6435 } 6436 } 6437 if (xmlParserDebugEntities) { 6438 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6439 xmlGenericError(xmlGenericErrorContext, 6440 "%s(%d): ", ctxt->input->filename, 6441 ctxt->input->line); 6442 xmlGenericError(xmlGenericErrorContext, 6443 "Leaving INCLUDE Conditional Section\n"); 6444 } 6445 6446 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6447 int state; 6448 xmlParserInputState instate; 6449 int depth = 0; 6450 6451 SKIP(6); 6452 SKIP_BLANKS; 6453 if (RAW != '[') { 6454 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6455 } else { 6456 if (ctxt->input->id != id) { 6457 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6458 "All markup of the conditional section is not in the same entity\n", 6459 NULL, NULL); 6460 } 6461 NEXT; 6462 } 6463 if (xmlParserDebugEntities) { 6464 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6465 xmlGenericError(xmlGenericErrorContext, 6466 "%s(%d): ", ctxt->input->filename, 6467 ctxt->input->line); 6468 xmlGenericError(xmlGenericErrorContext, 6469 "Entering IGNORE Conditional Section\n"); 6470 } 6471 6472 /* 6473 * Parse up to the end of the conditional section 6474 * But disable SAX event generating DTD building in the meantime 6475 */ 6476 state = ctxt->disableSAX; 6477 instate = ctxt->instate; 6478 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6479 ctxt->instate = XML_PARSER_IGNORE; 6480 6481 while (((depth >= 0) && (RAW != 0)) && 6482 (ctxt->instate != XML_PARSER_EOF)) { 6483 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6484 depth++; 6485 SKIP(3); 6486 continue; 6487 } 6488 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6489 if (--depth >= 0) SKIP(3); 6490 continue; 6491 } 6492 NEXT; 6493 continue; 6494 } 6495 6496 ctxt->disableSAX = state; 6497 ctxt->instate = instate; 6498 6499 if (xmlParserDebugEntities) { 6500 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6501 xmlGenericError(xmlGenericErrorContext, 6502 "%s(%d): ", ctxt->input->filename, 6503 ctxt->input->line); 6504 xmlGenericError(xmlGenericErrorContext, 6505 "Leaving IGNORE Conditional Section\n"); 6506 } 6507 6508 } else { 6509 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6510 } 6511 6512 if (RAW == 0) 6513 SHRINK; 6514 6515 if (RAW == 0) { 6516 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6517 } else { 6518 if (ctxt->input->id != id) { 6519 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6520 "All markup of the conditional section is not in the same entity\n", 6521 NULL, NULL); 6522 } 6523 SKIP(3); 6524 } 6525 } 6526 6527 /** 6528 * xmlParseMarkupDecl: 6529 * @ctxt: an XML parser context 6530 * 6531 * parse Markup declarations 6532 * 6533 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6534 * NotationDecl | PI | Comment 6535 * 6536 * [ VC: Proper Declaration/PE Nesting ] 6537 * Parameter-entity replacement text must be properly nested with 6538 * markup declarations. That is to say, if either the first character 6539 * or the last character of a markup declaration (markupdecl above) is 6540 * contained in the replacement text for a parameter-entity reference, 6541 * both must be contained in the same replacement text. 6542 * 6543 * [ WFC: PEs in Internal Subset ] 6544 * In the internal DTD subset, parameter-entity references can occur 6545 * only where markup declarations can occur, not within markup declarations. 6546 * (This does not apply to references that occur in external parameter 6547 * entities or to the external subset.) 6548 */ 6549 void 6550 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6551 GROW; 6552 if (CUR == '<') { 6553 if (NXT(1) == '!') { 6554 switch (NXT(2)) { 6555 case 'E': 6556 if (NXT(3) == 'L') 6557 xmlParseElementDecl(ctxt); 6558 else if (NXT(3) == 'N') 6559 xmlParseEntityDecl(ctxt); 6560 break; 6561 case 'A': 6562 xmlParseAttributeListDecl(ctxt); 6563 break; 6564 case 'N': 6565 xmlParseNotationDecl(ctxt); 6566 break; 6567 case '-': 6568 xmlParseComment(ctxt); 6569 break; 6570 default: 6571 /* there is an error but it will be detected later */ 6572 break; 6573 } 6574 } else if (NXT(1) == '?') { 6575 xmlParsePI(ctxt); 6576 } 6577 } 6578 /* 6579 * This is only for internal subset. On external entities, 6580 * the replacement is done before parsing stage 6581 */ 6582 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6583 xmlParsePEReference(ctxt); 6584 6585 /* 6586 * Conditional sections are allowed from entities included 6587 * by PE References in the internal subset. 6588 */ 6589 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6590 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6591 xmlParseConditionalSections(ctxt); 6592 } 6593 } 6594 6595 ctxt->instate = XML_PARSER_DTD; 6596 } 6597 6598 /** 6599 * xmlParseTextDecl: 6600 * @ctxt: an XML parser context 6601 * 6602 * parse an XML declaration header for external entities 6603 * 6604 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6605 */ 6606 6607 void 6608 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6609 xmlChar *version; 6610 const xmlChar *encoding; 6611 6612 /* 6613 * We know that '<?xml' is here. 6614 */ 6615 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6616 SKIP(5); 6617 } else { 6618 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6619 return; 6620 } 6621 6622 if (!IS_BLANK_CH(CUR)) { 6623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6624 "Space needed after '<?xml'\n"); 6625 } 6626 SKIP_BLANKS; 6627 6628 /* 6629 * We may have the VersionInfo here. 6630 */ 6631 version = xmlParseVersionInfo(ctxt); 6632 if (version == NULL) 6633 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6634 else { 6635 if (!IS_BLANK_CH(CUR)) { 6636 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6637 "Space needed here\n"); 6638 } 6639 } 6640 ctxt->input->version = version; 6641 6642 /* 6643 * We must have the encoding declaration 6644 */ 6645 encoding = xmlParseEncodingDecl(ctxt); 6646 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6647 /* 6648 * The XML REC instructs us to stop parsing right here 6649 */ 6650 return; 6651 } 6652 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6653 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6654 "Missing encoding in text declaration\n"); 6655 } 6656 6657 SKIP_BLANKS; 6658 if ((RAW == '?') && (NXT(1) == '>')) { 6659 SKIP(2); 6660 } else if (RAW == '>') { 6661 /* Deprecated old WD ... */ 6662 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6663 NEXT; 6664 } else { 6665 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6666 MOVETO_ENDTAG(CUR_PTR); 6667 NEXT; 6668 } 6669 } 6670 6671 /** 6672 * xmlParseExternalSubset: 6673 * @ctxt: an XML parser context 6674 * @ExternalID: the external identifier 6675 * @SystemID: the system identifier (or URL) 6676 * 6677 * parse Markup declarations from an external subset 6678 * 6679 * [30] extSubset ::= textDecl? extSubsetDecl 6680 * 6681 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6682 */ 6683 void 6684 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6685 const xmlChar *SystemID) { 6686 xmlDetectSAX2(ctxt); 6687 GROW; 6688 6689 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 6690 (ctxt->input->end - ctxt->input->cur >= 4)) { 6691 xmlChar start[4]; 6692 xmlCharEncoding enc; 6693 6694 start[0] = RAW; 6695 start[1] = NXT(1); 6696 start[2] = NXT(2); 6697 start[3] = NXT(3); 6698 enc = xmlDetectCharEncoding(start, 4); 6699 if (enc != XML_CHAR_ENCODING_NONE) 6700 xmlSwitchEncoding(ctxt, enc); 6701 } 6702 6703 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6704 xmlParseTextDecl(ctxt); 6705 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6706 /* 6707 * The XML REC instructs us to stop parsing right here 6708 */ 6709 ctxt->instate = XML_PARSER_EOF; 6710 return; 6711 } 6712 } 6713 if (ctxt->myDoc == NULL) { 6714 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6715 if (ctxt->myDoc == NULL) { 6716 xmlErrMemory(ctxt, "New Doc failed"); 6717 return; 6718 } 6719 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6720 } 6721 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6722 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6723 6724 ctxt->instate = XML_PARSER_DTD; 6725 ctxt->external = 1; 6726 while (((RAW == '<') && (NXT(1) == '?')) || 6727 ((RAW == '<') && (NXT(1) == '!')) || 6728 (RAW == '%') || IS_BLANK_CH(CUR)) { 6729 const xmlChar *check = CUR_PTR; 6730 unsigned int cons = ctxt->input->consumed; 6731 6732 GROW; 6733 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6734 xmlParseConditionalSections(ctxt); 6735 } else if (IS_BLANK_CH(CUR)) { 6736 NEXT; 6737 } else if (RAW == '%') { 6738 xmlParsePEReference(ctxt); 6739 } else 6740 xmlParseMarkupDecl(ctxt); 6741 6742 /* 6743 * Pop-up of finished entities. 6744 */ 6745 while ((RAW == 0) && (ctxt->inputNr > 1)) 6746 xmlPopInput(ctxt); 6747 6748 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6749 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6750 break; 6751 } 6752 } 6753 6754 if (RAW != 0) { 6755 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6756 } 6757 6758 } 6759 6760 /** 6761 * xmlParseReference: 6762 * @ctxt: an XML parser context 6763 * 6764 * parse and handle entity references in content, depending on the SAX 6765 * interface, this may end-up in a call to character() if this is a 6766 * CharRef, a predefined entity, if there is no reference() callback. 6767 * or if the parser was asked to switch to that mode. 6768 * 6769 * [67] Reference ::= EntityRef | CharRef 6770 */ 6771 void 6772 xmlParseReference(xmlParserCtxtPtr ctxt) { 6773 xmlEntityPtr ent; 6774 xmlChar *val; 6775 int was_checked; 6776 xmlNodePtr list = NULL; 6777 xmlParserErrors ret = XML_ERR_OK; 6778 6779 6780 if (RAW != '&') 6781 return; 6782 6783 /* 6784 * Simple case of a CharRef 6785 */ 6786 if (NXT(1) == '#') { 6787 int i = 0; 6788 xmlChar out[10]; 6789 int hex = NXT(2); 6790 int value = xmlParseCharRef(ctxt); 6791 6792 if (value == 0) 6793 return; 6794 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6795 /* 6796 * So we are using non-UTF-8 buffers 6797 * Check that the char fit on 8bits, if not 6798 * generate a CharRef. 6799 */ 6800 if (value <= 0xFF) { 6801 out[0] = value; 6802 out[1] = 0; 6803 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6804 (!ctxt->disableSAX)) 6805 ctxt->sax->characters(ctxt->userData, out, 1); 6806 } else { 6807 if ((hex == 'x') || (hex == 'X')) 6808 snprintf((char *)out, sizeof(out), "#x%X", value); 6809 else 6810 snprintf((char *)out, sizeof(out), "#%d", value); 6811 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6812 (!ctxt->disableSAX)) 6813 ctxt->sax->reference(ctxt->userData, out); 6814 } 6815 } else { 6816 /* 6817 * Just encode the value in UTF-8 6818 */ 6819 COPY_BUF(0 ,out, i, value); 6820 out[i] = 0; 6821 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6822 (!ctxt->disableSAX)) 6823 ctxt->sax->characters(ctxt->userData, out, i); 6824 } 6825 return; 6826 } 6827 6828 /* 6829 * We are seeing an entity reference 6830 */ 6831 ent = xmlParseEntityRef(ctxt); 6832 if (ent == NULL) return; 6833 if (!ctxt->wellFormed) 6834 return; 6835 was_checked = ent->checked; 6836 6837 /* special case of predefined entities */ 6838 if ((ent->name == NULL) || 6839 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6840 val = ent->content; 6841 if (val == NULL) return; 6842 /* 6843 * inline the entity. 6844 */ 6845 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6846 (!ctxt->disableSAX)) 6847 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6848 return; 6849 } 6850 6851 /* 6852 * The first reference to the entity trigger a parsing phase 6853 * where the ent->children is filled with the result from 6854 * the parsing. 6855 */ 6856 if (ent->checked == 0) { 6857 unsigned long oldnbent = ctxt->nbentities; 6858 6859 /* 6860 * This is a bit hackish but this seems the best 6861 * way to make sure both SAX and DOM entity support 6862 * behaves okay. 6863 */ 6864 void *user_data; 6865 if (ctxt->userData == ctxt) 6866 user_data = NULL; 6867 else 6868 user_data = ctxt->userData; 6869 6870 /* 6871 * Check that this entity is well formed 6872 * 4.3.2: An internal general parsed entity is well-formed 6873 * if its replacement text matches the production labeled 6874 * content. 6875 */ 6876 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6877 ctxt->depth++; 6878 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6879 user_data, &list); 6880 ctxt->depth--; 6881 6882 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6883 ctxt->depth++; 6884 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6885 user_data, ctxt->depth, ent->URI, 6886 ent->ExternalID, &list); 6887 ctxt->depth--; 6888 } else { 6889 ret = XML_ERR_ENTITY_PE_INTERNAL; 6890 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6891 "invalid entity type found\n", NULL); 6892 } 6893 6894 /* 6895 * Store the number of entities needing parsing for this entity 6896 * content and do checkings 6897 */ 6898 ent->checked = ctxt->nbentities - oldnbent; 6899 if (ret == XML_ERR_ENTITY_LOOP) { 6900 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6901 xmlFreeNodeList(list); 6902 return; 6903 } 6904 if (xmlParserEntityCheck(ctxt, 0, ent)) { 6905 xmlFreeNodeList(list); 6906 return; 6907 } 6908 6909 if ((ret == XML_ERR_OK) && (list != NULL)) { 6910 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6911 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6912 (ent->children == NULL)) { 6913 ent->children = list; 6914 if (ctxt->replaceEntities) { 6915 /* 6916 * Prune it directly in the generated document 6917 * except for single text nodes. 6918 */ 6919 if (((list->type == XML_TEXT_NODE) && 6920 (list->next == NULL)) || 6921 (ctxt->parseMode == XML_PARSE_READER)) { 6922 list->parent = (xmlNodePtr) ent; 6923 list = NULL; 6924 ent->owner = 1; 6925 } else { 6926 ent->owner = 0; 6927 while (list != NULL) { 6928 list->parent = (xmlNodePtr) ctxt->node; 6929 list->doc = ctxt->myDoc; 6930 if (list->next == NULL) 6931 ent->last = list; 6932 list = list->next; 6933 } 6934 list = ent->children; 6935 #ifdef LIBXML_LEGACY_ENABLED 6936 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6937 xmlAddEntityReference(ent, list, NULL); 6938 #endif /* LIBXML_LEGACY_ENABLED */ 6939 } 6940 } else { 6941 ent->owner = 1; 6942 while (list != NULL) { 6943 list->parent = (xmlNodePtr) ent; 6944 if (list->next == NULL) 6945 ent->last = list; 6946 list = list->next; 6947 } 6948 } 6949 } else { 6950 xmlFreeNodeList(list); 6951 list = NULL; 6952 } 6953 } else if ((ret != XML_ERR_OK) && 6954 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6956 "Entity '%s' failed to parse\n", ent->name); 6957 } else if (list != NULL) { 6958 xmlFreeNodeList(list); 6959 list = NULL; 6960 } 6961 if (ent->checked == 0) 6962 ent->checked = 1; 6963 } else if (ent->checked != 1) { 6964 ctxt->nbentities += ent->checked; 6965 } 6966 6967 /* 6968 * Now that the entity content has been gathered 6969 * provide it to the application, this can take different forms based 6970 * on the parsing modes. 6971 */ 6972 if (ent->children == NULL) { 6973 /* 6974 * Probably running in SAX mode and the callbacks don't 6975 * build the entity content. So unless we already went 6976 * though parsing for first checking go though the entity 6977 * content to generate callbacks associated to the entity 6978 */ 6979 if (was_checked != 0) { 6980 void *user_data; 6981 /* 6982 * This is a bit hackish but this seems the best 6983 * way to make sure both SAX and DOM entity support 6984 * behaves okay. 6985 */ 6986 if (ctxt->userData == ctxt) 6987 user_data = NULL; 6988 else 6989 user_data = ctxt->userData; 6990 6991 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6992 ctxt->depth++; 6993 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6994 ent->content, user_data, NULL); 6995 ctxt->depth--; 6996 } else if (ent->etype == 6997 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6998 ctxt->depth++; 6999 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7000 ctxt->sax, user_data, ctxt->depth, 7001 ent->URI, ent->ExternalID, NULL); 7002 ctxt->depth--; 7003 } else { 7004 ret = XML_ERR_ENTITY_PE_INTERNAL; 7005 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7006 "invalid entity type found\n", NULL); 7007 } 7008 if (ret == XML_ERR_ENTITY_LOOP) { 7009 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7010 return; 7011 } 7012 } 7013 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7014 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7015 /* 7016 * Entity reference callback comes second, it's somewhat 7017 * superfluous but a compatibility to historical behaviour 7018 */ 7019 ctxt->sax->reference(ctxt->userData, ent->name); 7020 } 7021 return; 7022 } 7023 7024 /* 7025 * If we didn't get any children for the entity being built 7026 */ 7027 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7028 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7029 /* 7030 * Create a node. 7031 */ 7032 ctxt->sax->reference(ctxt->userData, ent->name); 7033 return; 7034 } 7035 7036 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7037 /* 7038 * There is a problem on the handling of _private for entities 7039 * (bug 155816): Should we copy the content of the field from 7040 * the entity (possibly overwriting some value set by the user 7041 * when a copy is created), should we leave it alone, or should 7042 * we try to take care of different situations? The problem 7043 * is exacerbated by the usage of this field by the xmlReader. 7044 * To fix this bug, we look at _private on the created node 7045 * and, if it's NULL, we copy in whatever was in the entity. 7046 * If it's not NULL we leave it alone. This is somewhat of a 7047 * hack - maybe we should have further tests to determine 7048 * what to do. 7049 */ 7050 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7051 /* 7052 * Seems we are generating the DOM content, do 7053 * a simple tree copy for all references except the first 7054 * In the first occurrence list contains the replacement. 7055 * progressive == 2 means we are operating on the Reader 7056 * and since nodes are discarded we must copy all the time. 7057 */ 7058 if (((list == NULL) && (ent->owner == 0)) || 7059 (ctxt->parseMode == XML_PARSE_READER)) { 7060 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7061 7062 /* 7063 * when operating on a reader, the entities definitions 7064 * are always owning the entities subtree. 7065 if (ctxt->parseMode == XML_PARSE_READER) 7066 ent->owner = 1; 7067 */ 7068 7069 cur = ent->children; 7070 while (cur != NULL) { 7071 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7072 if (nw != NULL) { 7073 if (nw->_private == NULL) 7074 nw->_private = cur->_private; 7075 if (firstChild == NULL){ 7076 firstChild = nw; 7077 } 7078 nw = xmlAddChild(ctxt->node, nw); 7079 } 7080 if (cur == ent->last) { 7081 /* 7082 * needed to detect some strange empty 7083 * node cases in the reader tests 7084 */ 7085 if ((ctxt->parseMode == XML_PARSE_READER) && 7086 (nw != NULL) && 7087 (nw->type == XML_ELEMENT_NODE) && 7088 (nw->children == NULL)) 7089 nw->extra = 1; 7090 7091 break; 7092 } 7093 cur = cur->next; 7094 } 7095 #ifdef LIBXML_LEGACY_ENABLED 7096 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7097 xmlAddEntityReference(ent, firstChild, nw); 7098 #endif /* LIBXML_LEGACY_ENABLED */ 7099 } else if (list == NULL) { 7100 xmlNodePtr nw = NULL, cur, next, last, 7101 firstChild = NULL; 7102 /* 7103 * Copy the entity child list and make it the new 7104 * entity child list. The goal is to make sure any 7105 * ID or REF referenced will be the one from the 7106 * document content and not the entity copy. 7107 */ 7108 cur = ent->children; 7109 ent->children = NULL; 7110 last = ent->last; 7111 ent->last = NULL; 7112 while (cur != NULL) { 7113 next = cur->next; 7114 cur->next = NULL; 7115 cur->parent = NULL; 7116 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7117 if (nw != NULL) { 7118 if (nw->_private == NULL) 7119 nw->_private = cur->_private; 7120 if (firstChild == NULL){ 7121 firstChild = cur; 7122 } 7123 xmlAddChild((xmlNodePtr) ent, nw); 7124 xmlAddChild(ctxt->node, cur); 7125 } 7126 if (cur == last) 7127 break; 7128 cur = next; 7129 } 7130 if (ent->owner == 0) 7131 ent->owner = 1; 7132 #ifdef LIBXML_LEGACY_ENABLED 7133 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7134 xmlAddEntityReference(ent, firstChild, nw); 7135 #endif /* LIBXML_LEGACY_ENABLED */ 7136 } else { 7137 const xmlChar *nbktext; 7138 7139 /* 7140 * the name change is to avoid coalescing of the 7141 * node with a possible previous text one which 7142 * would make ent->children a dangling pointer 7143 */ 7144 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7145 -1); 7146 if (ent->children->type == XML_TEXT_NODE) 7147 ent->children->name = nbktext; 7148 if ((ent->last != ent->children) && 7149 (ent->last->type == XML_TEXT_NODE)) 7150 ent->last->name = nbktext; 7151 xmlAddChildList(ctxt->node, ent->children); 7152 } 7153 7154 /* 7155 * This is to avoid a nasty side effect, see 7156 * characters() in SAX.c 7157 */ 7158 ctxt->nodemem = 0; 7159 ctxt->nodelen = 0; 7160 return; 7161 } 7162 } 7163 } 7164 7165 /** 7166 * xmlParseEntityRef: 7167 * @ctxt: an XML parser context 7168 * 7169 * parse ENTITY references declarations 7170 * 7171 * [68] EntityRef ::= '&' Name ';' 7172 * 7173 * [ WFC: Entity Declared ] 7174 * In a document without any DTD, a document with only an internal DTD 7175 * subset which contains no parameter entity references, or a document 7176 * with "standalone='yes'", the Name given in the entity reference 7177 * must match that in an entity declaration, except that well-formed 7178 * documents need not declare any of the following entities: amp, lt, 7179 * gt, apos, quot. The declaration of a parameter entity must precede 7180 * any reference to it. Similarly, the declaration of a general entity 7181 * must precede any reference to it which appears in a default value in an 7182 * attribute-list declaration. Note that if entities are declared in the 7183 * external subset or in external parameter entities, a non-validating 7184 * processor is not obligated to read and process their declarations; 7185 * for such documents, the rule that an entity must be declared is a 7186 * well-formedness constraint only if standalone='yes'. 7187 * 7188 * [ WFC: Parsed Entity ] 7189 * An entity reference must not contain the name of an unparsed entity 7190 * 7191 * Returns the xmlEntityPtr if found, or NULL otherwise. 7192 */ 7193 xmlEntityPtr 7194 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7195 const xmlChar *name; 7196 xmlEntityPtr ent = NULL; 7197 7198 GROW; 7199 if (ctxt->instate == XML_PARSER_EOF) 7200 return(NULL); 7201 7202 if (RAW != '&') 7203 return(NULL); 7204 NEXT; 7205 name = xmlParseName(ctxt); 7206 if (name == NULL) { 7207 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7208 "xmlParseEntityRef: no name\n"); 7209 return(NULL); 7210 } 7211 if (RAW != ';') { 7212 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7213 return(NULL); 7214 } 7215 NEXT; 7216 7217 /* 7218 * Predefined entites override any extra definition 7219 */ 7220 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7221 ent = xmlGetPredefinedEntity(name); 7222 if (ent != NULL) 7223 return(ent); 7224 } 7225 7226 /* 7227 * Increate the number of entity references parsed 7228 */ 7229 ctxt->nbentities++; 7230 7231 /* 7232 * Ask first SAX for entity resolution, otherwise try the 7233 * entities which may have stored in the parser context. 7234 */ 7235 if (ctxt->sax != NULL) { 7236 if (ctxt->sax->getEntity != NULL) 7237 ent = ctxt->sax->getEntity(ctxt->userData, name); 7238 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7239 (ctxt->options & XML_PARSE_OLDSAX)) 7240 ent = xmlGetPredefinedEntity(name); 7241 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7242 (ctxt->userData==ctxt)) { 7243 ent = xmlSAX2GetEntity(ctxt, name); 7244 } 7245 } 7246 if (ctxt->instate == XML_PARSER_EOF) 7247 return(NULL); 7248 /* 7249 * [ WFC: Entity Declared ] 7250 * In a document without any DTD, a document with only an 7251 * internal DTD subset which contains no parameter entity 7252 * references, or a document with "standalone='yes'", the 7253 * Name given in the entity reference must match that in an 7254 * entity declaration, except that well-formed documents 7255 * need not declare any of the following entities: amp, lt, 7256 * gt, apos, quot. 7257 * The declaration of a parameter entity must precede any 7258 * reference to it. 7259 * Similarly, the declaration of a general entity must 7260 * precede any reference to it which appears in a default 7261 * value in an attribute-list declaration. Note that if 7262 * entities are declared in the external subset or in 7263 * external parameter entities, a non-validating processor 7264 * is not obligated to read and process their declarations; 7265 * for such documents, the rule that an entity must be 7266 * declared is a well-formedness constraint only if 7267 * standalone='yes'. 7268 */ 7269 if (ent == NULL) { 7270 if ((ctxt->standalone == 1) || 7271 ((ctxt->hasExternalSubset == 0) && 7272 (ctxt->hasPErefs == 0))) { 7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7274 "Entity '%s' not defined\n", name); 7275 } else { 7276 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7277 "Entity '%s' not defined\n", name); 7278 if ((ctxt->inSubset == 0) && 7279 (ctxt->sax != NULL) && 7280 (ctxt->sax->reference != NULL)) { 7281 ctxt->sax->reference(ctxt->userData, name); 7282 } 7283 } 7284 ctxt->valid = 0; 7285 } 7286 7287 /* 7288 * [ WFC: Parsed Entity ] 7289 * An entity reference must not contain the name of an 7290 * unparsed entity 7291 */ 7292 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7293 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7294 "Entity reference to unparsed entity %s\n", name); 7295 } 7296 7297 /* 7298 * [ WFC: No External Entity References ] 7299 * Attribute values cannot contain direct or indirect 7300 * entity references to external entities. 7301 */ 7302 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7303 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7304 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7305 "Attribute references external entity '%s'\n", name); 7306 } 7307 /* 7308 * [ WFC: No < in Attribute Values ] 7309 * The replacement text of any entity referred to directly or 7310 * indirectly in an attribute value (other than "<") must 7311 * not contain a <. 7312 */ 7313 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7314 (ent != NULL) && (ent->content != NULL) && 7315 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7316 (xmlStrchr(ent->content, '<'))) { 7317 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7318 "'<' in entity '%s' is not allowed in attributes values\n", name); 7319 } 7320 7321 /* 7322 * Internal check, no parameter entities here ... 7323 */ 7324 else { 7325 switch (ent->etype) { 7326 case XML_INTERNAL_PARAMETER_ENTITY: 7327 case XML_EXTERNAL_PARAMETER_ENTITY: 7328 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7329 "Attempt to reference the parameter entity '%s'\n", 7330 name); 7331 break; 7332 default: 7333 break; 7334 } 7335 } 7336 7337 /* 7338 * [ WFC: No Recursion ] 7339 * A parsed entity must not contain a recursive reference 7340 * to itself, either directly or indirectly. 7341 * Done somewhere else 7342 */ 7343 return(ent); 7344 } 7345 7346 /** 7347 * xmlParseStringEntityRef: 7348 * @ctxt: an XML parser context 7349 * @str: a pointer to an index in the string 7350 * 7351 * parse ENTITY references declarations, but this version parses it from 7352 * a string value. 7353 * 7354 * [68] EntityRef ::= '&' Name ';' 7355 * 7356 * [ WFC: Entity Declared ] 7357 * In a document without any DTD, a document with only an internal DTD 7358 * subset which contains no parameter entity references, or a document 7359 * with "standalone='yes'", the Name given in the entity reference 7360 * must match that in an entity declaration, except that well-formed 7361 * documents need not declare any of the following entities: amp, lt, 7362 * gt, apos, quot. The declaration of a parameter entity must precede 7363 * any reference to it. Similarly, the declaration of a general entity 7364 * must precede any reference to it which appears in a default value in an 7365 * attribute-list declaration. Note that if entities are declared in the 7366 * external subset or in external parameter entities, a non-validating 7367 * processor is not obligated to read and process their declarations; 7368 * for such documents, the rule that an entity must be declared is a 7369 * well-formedness constraint only if standalone='yes'. 7370 * 7371 * [ WFC: Parsed Entity ] 7372 * An entity reference must not contain the name of an unparsed entity 7373 * 7374 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7375 * is updated to the current location in the string. 7376 */ 7377 static xmlEntityPtr 7378 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7379 xmlChar *name; 7380 const xmlChar *ptr; 7381 xmlChar cur; 7382 xmlEntityPtr ent = NULL; 7383 7384 if ((str == NULL) || (*str == NULL)) 7385 return(NULL); 7386 ptr = *str; 7387 cur = *ptr; 7388 if (cur != '&') 7389 return(NULL); 7390 7391 ptr++; 7392 name = xmlParseStringName(ctxt, &ptr); 7393 if (name == NULL) { 7394 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7395 "xmlParseStringEntityRef: no name\n"); 7396 *str = ptr; 7397 return(NULL); 7398 } 7399 if (*ptr != ';') { 7400 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7401 xmlFree(name); 7402 *str = ptr; 7403 return(NULL); 7404 } 7405 ptr++; 7406 7407 7408 /* 7409 * Predefined entites override any extra definition 7410 */ 7411 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7412 ent = xmlGetPredefinedEntity(name); 7413 if (ent != NULL) { 7414 xmlFree(name); 7415 *str = ptr; 7416 return(ent); 7417 } 7418 } 7419 7420 /* 7421 * Increate the number of entity references parsed 7422 */ 7423 ctxt->nbentities++; 7424 7425 /* 7426 * Ask first SAX for entity resolution, otherwise try the 7427 * entities which may have stored in the parser context. 7428 */ 7429 if (ctxt->sax != NULL) { 7430 if (ctxt->sax->getEntity != NULL) 7431 ent = ctxt->sax->getEntity(ctxt->userData, name); 7432 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7433 ent = xmlGetPredefinedEntity(name); 7434 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7435 ent = xmlSAX2GetEntity(ctxt, name); 7436 } 7437 } 7438 if (ctxt->instate == XML_PARSER_EOF) { 7439 xmlFree(name); 7440 return(NULL); 7441 } 7442 7443 /* 7444 * [ WFC: Entity Declared ] 7445 * In a document without any DTD, a document with only an 7446 * internal DTD subset which contains no parameter entity 7447 * references, or a document with "standalone='yes'", the 7448 * Name given in the entity reference must match that in an 7449 * entity declaration, except that well-formed documents 7450 * need not declare any of the following entities: amp, lt, 7451 * gt, apos, quot. 7452 * The declaration of a parameter entity must precede any 7453 * reference to it. 7454 * Similarly, the declaration of a general entity must 7455 * precede any reference to it which appears in a default 7456 * value in an attribute-list declaration. Note that if 7457 * entities are declared in the external subset or in 7458 * external parameter entities, a non-validating processor 7459 * is not obligated to read and process their declarations; 7460 * for such documents, the rule that an entity must be 7461 * declared is a well-formedness constraint only if 7462 * standalone='yes'. 7463 */ 7464 if (ent == NULL) { 7465 if ((ctxt->standalone == 1) || 7466 ((ctxt->hasExternalSubset == 0) && 7467 (ctxt->hasPErefs == 0))) { 7468 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7469 "Entity '%s' not defined\n", name); 7470 } else { 7471 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7472 "Entity '%s' not defined\n", 7473 name); 7474 } 7475 /* TODO ? check regressions ctxt->valid = 0; */ 7476 } 7477 7478 /* 7479 * [ WFC: Parsed Entity ] 7480 * An entity reference must not contain the name of an 7481 * unparsed entity 7482 */ 7483 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7484 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7485 "Entity reference to unparsed entity %s\n", name); 7486 } 7487 7488 /* 7489 * [ WFC: No External Entity References ] 7490 * Attribute values cannot contain direct or indirect 7491 * entity references to external entities. 7492 */ 7493 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7494 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7495 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7496 "Attribute references external entity '%s'\n", name); 7497 } 7498 /* 7499 * [ WFC: No < in Attribute Values ] 7500 * The replacement text of any entity referred to directly or 7501 * indirectly in an attribute value (other than "<") must 7502 * not contain a <. 7503 */ 7504 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7505 (ent != NULL) && (ent->content != NULL) && 7506 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7507 (xmlStrchr(ent->content, '<'))) { 7508 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7509 "'<' in entity '%s' is not allowed in attributes values\n", 7510 name); 7511 } 7512 7513 /* 7514 * Internal check, no parameter entities here ... 7515 */ 7516 else { 7517 switch (ent->etype) { 7518 case XML_INTERNAL_PARAMETER_ENTITY: 7519 case XML_EXTERNAL_PARAMETER_ENTITY: 7520 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7521 "Attempt to reference the parameter entity '%s'\n", 7522 name); 7523 break; 7524 default: 7525 break; 7526 } 7527 } 7528 7529 /* 7530 * [ WFC: No Recursion ] 7531 * A parsed entity must not contain a recursive reference 7532 * to itself, either directly or indirectly. 7533 * Done somewhere else 7534 */ 7535 7536 xmlFree(name); 7537 *str = ptr; 7538 return(ent); 7539 } 7540 7541 /** 7542 * xmlParsePEReference: 7543 * @ctxt: an XML parser context 7544 * 7545 * parse PEReference declarations 7546 * The entity content is handled directly by pushing it's content as 7547 * a new input stream. 7548 * 7549 * [69] PEReference ::= '%' Name ';' 7550 * 7551 * [ WFC: No Recursion ] 7552 * A parsed entity must not contain a recursive 7553 * reference to itself, either directly or indirectly. 7554 * 7555 * [ WFC: Entity Declared ] 7556 * In a document without any DTD, a document with only an internal DTD 7557 * subset which contains no parameter entity references, or a document 7558 * with "standalone='yes'", ... ... The declaration of a parameter 7559 * entity must precede any reference to it... 7560 * 7561 * [ VC: Entity Declared ] 7562 * In a document with an external subset or external parameter entities 7563 * with "standalone='no'", ... ... The declaration of a parameter entity 7564 * must precede any reference to it... 7565 * 7566 * [ WFC: In DTD ] 7567 * Parameter-entity references may only appear in the DTD. 7568 * NOTE: misleading but this is handled. 7569 */ 7570 void 7571 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7572 { 7573 const xmlChar *name; 7574 xmlEntityPtr entity = NULL; 7575 xmlParserInputPtr input; 7576 7577 if (RAW != '%') 7578 return; 7579 NEXT; 7580 name = xmlParseName(ctxt); 7581 if (name == NULL) { 7582 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7583 "xmlParsePEReference: no name\n"); 7584 return; 7585 } 7586 if (RAW != ';') { 7587 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7588 return; 7589 } 7590 7591 NEXT; 7592 7593 /* 7594 * Increate the number of entity references parsed 7595 */ 7596 ctxt->nbentities++; 7597 7598 /* 7599 * Request the entity from SAX 7600 */ 7601 if ((ctxt->sax != NULL) && 7602 (ctxt->sax->getParameterEntity != NULL)) 7603 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7604 if (ctxt->instate == XML_PARSER_EOF) 7605 return; 7606 if (entity == NULL) { 7607 /* 7608 * [ WFC: Entity Declared ] 7609 * In a document without any DTD, a document with only an 7610 * internal DTD subset which contains no parameter entity 7611 * references, or a document with "standalone='yes'", ... 7612 * ... The declaration of a parameter entity must precede 7613 * any reference to it... 7614 */ 7615 if ((ctxt->standalone == 1) || 7616 ((ctxt->hasExternalSubset == 0) && 7617 (ctxt->hasPErefs == 0))) { 7618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7619 "PEReference: %%%s; not found\n", 7620 name); 7621 } else { 7622 /* 7623 * [ VC: Entity Declared ] 7624 * In a document with an external subset or external 7625 * parameter entities with "standalone='no'", ... 7626 * ... The declaration of a parameter entity must 7627 * precede any reference to it... 7628 */ 7629 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7630 "PEReference: %%%s; not found\n", 7631 name, NULL); 7632 ctxt->valid = 0; 7633 } 7634 } else { 7635 /* 7636 * Internal checking in case the entity quest barfed 7637 */ 7638 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7639 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7640 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7641 "Internal: %%%s; is not a parameter entity\n", 7642 name, NULL); 7643 } else if (ctxt->input->free != deallocblankswrapper) { 7644 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7645 if (xmlPushInput(ctxt, input) < 0) 7646 return; 7647 } else { 7648 /* 7649 * TODO !!! 7650 * handle the extra spaces added before and after 7651 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7652 */ 7653 input = xmlNewEntityInputStream(ctxt, entity); 7654 if (xmlPushInput(ctxt, input) < 0) 7655 return; 7656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7657 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7658 (IS_BLANK_CH(NXT(5)))) { 7659 xmlParseTextDecl(ctxt); 7660 if (ctxt->errNo == 7661 XML_ERR_UNSUPPORTED_ENCODING) { 7662 /* 7663 * The XML REC instructs us to stop parsing 7664 * right here 7665 */ 7666 ctxt->instate = XML_PARSER_EOF; 7667 return; 7668 } 7669 } 7670 } 7671 } 7672 ctxt->hasPErefs = 1; 7673 } 7674 7675 /** 7676 * xmlLoadEntityContent: 7677 * @ctxt: an XML parser context 7678 * @entity: an unloaded system entity 7679 * 7680 * Load the original content of the given system entity from the 7681 * ExternalID/SystemID given. This is to be used for Included in Literal 7682 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7683 * 7684 * Returns 0 in case of success and -1 in case of failure 7685 */ 7686 static int 7687 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7688 xmlParserInputPtr input; 7689 xmlBufferPtr buf; 7690 int l, c; 7691 int count = 0; 7692 7693 if ((ctxt == NULL) || (entity == NULL) || 7694 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7695 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7696 (entity->content != NULL)) { 7697 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7698 "xmlLoadEntityContent parameter error"); 7699 return(-1); 7700 } 7701 7702 if (xmlParserDebugEntities) 7703 xmlGenericError(xmlGenericErrorContext, 7704 "Reading %s entity content input\n", entity->name); 7705 7706 buf = xmlBufferCreate(); 7707 if (buf == NULL) { 7708 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7709 "xmlLoadEntityContent parameter error"); 7710 return(-1); 7711 } 7712 7713 input = xmlNewEntityInputStream(ctxt, entity); 7714 if (input == NULL) { 7715 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7716 "xmlLoadEntityContent input error"); 7717 xmlBufferFree(buf); 7718 return(-1); 7719 } 7720 7721 /* 7722 * Push the entity as the current input, read char by char 7723 * saving to the buffer until the end of the entity or an error 7724 */ 7725 if (xmlPushInput(ctxt, input) < 0) { 7726 xmlBufferFree(buf); 7727 return(-1); 7728 } 7729 7730 GROW; 7731 c = CUR_CHAR(l); 7732 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7733 (IS_CHAR(c))) { 7734 xmlBufferAdd(buf, ctxt->input->cur, l); 7735 if (count++ > 100) { 7736 count = 0; 7737 GROW; 7738 if (ctxt->instate == XML_PARSER_EOF) { 7739 xmlBufferFree(buf); 7740 return(-1); 7741 } 7742 } 7743 NEXTL(l); 7744 c = CUR_CHAR(l); 7745 } 7746 7747 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7748 xmlPopInput(ctxt); 7749 } else if (!IS_CHAR(c)) { 7750 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7751 "xmlLoadEntityContent: invalid char value %d\n", 7752 c); 7753 xmlBufferFree(buf); 7754 return(-1); 7755 } 7756 entity->content = buf->content; 7757 buf->content = NULL; 7758 xmlBufferFree(buf); 7759 7760 return(0); 7761 } 7762 7763 /** 7764 * xmlParseStringPEReference: 7765 * @ctxt: an XML parser context 7766 * @str: a pointer to an index in the string 7767 * 7768 * parse PEReference declarations 7769 * 7770 * [69] PEReference ::= '%' Name ';' 7771 * 7772 * [ WFC: No Recursion ] 7773 * A parsed entity must not contain a recursive 7774 * reference to itself, either directly or indirectly. 7775 * 7776 * [ WFC: Entity Declared ] 7777 * In a document without any DTD, a document with only an internal DTD 7778 * subset which contains no parameter entity references, or a document 7779 * with "standalone='yes'", ... ... The declaration of a parameter 7780 * entity must precede any reference to it... 7781 * 7782 * [ VC: Entity Declared ] 7783 * In a document with an external subset or external parameter entities 7784 * with "standalone='no'", ... ... The declaration of a parameter entity 7785 * must precede any reference to it... 7786 * 7787 * [ WFC: In DTD ] 7788 * Parameter-entity references may only appear in the DTD. 7789 * NOTE: misleading but this is handled. 7790 * 7791 * Returns the string of the entity content. 7792 * str is updated to the current value of the index 7793 */ 7794 static xmlEntityPtr 7795 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7796 const xmlChar *ptr; 7797 xmlChar cur; 7798 xmlChar *name; 7799 xmlEntityPtr entity = NULL; 7800 7801 if ((str == NULL) || (*str == NULL)) return(NULL); 7802 ptr = *str; 7803 cur = *ptr; 7804 if (cur != '%') 7805 return(NULL); 7806 ptr++; 7807 name = xmlParseStringName(ctxt, &ptr); 7808 if (name == NULL) { 7809 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7810 "xmlParseStringPEReference: no name\n"); 7811 *str = ptr; 7812 return(NULL); 7813 } 7814 cur = *ptr; 7815 if (cur != ';') { 7816 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7817 xmlFree(name); 7818 *str = ptr; 7819 return(NULL); 7820 } 7821 ptr++; 7822 7823 /* 7824 * Increate the number of entity references parsed 7825 */ 7826 ctxt->nbentities++; 7827 7828 /* 7829 * Request the entity from SAX 7830 */ 7831 if ((ctxt->sax != NULL) && 7832 (ctxt->sax->getParameterEntity != NULL)) 7833 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7834 if (ctxt->instate == XML_PARSER_EOF) { 7835 xmlFree(name); 7836 return(NULL); 7837 } 7838 if (entity == NULL) { 7839 /* 7840 * [ WFC: Entity Declared ] 7841 * In a document without any DTD, a document with only an 7842 * internal DTD subset which contains no parameter entity 7843 * references, or a document with "standalone='yes'", ... 7844 * ... The declaration of a parameter entity must precede 7845 * any reference to it... 7846 */ 7847 if ((ctxt->standalone == 1) || 7848 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7849 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7850 "PEReference: %%%s; not found\n", name); 7851 } else { 7852 /* 7853 * [ VC: Entity Declared ] 7854 * In a document with an external subset or external 7855 * parameter entities with "standalone='no'", ... 7856 * ... The declaration of a parameter entity must 7857 * precede any reference to it... 7858 */ 7859 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7860 "PEReference: %%%s; not found\n", 7861 name, NULL); 7862 ctxt->valid = 0; 7863 } 7864 } else { 7865 /* 7866 * Internal checking in case the entity quest barfed 7867 */ 7868 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7869 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7870 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7871 "%%%s; is not a parameter entity\n", 7872 name, NULL); 7873 } 7874 } 7875 ctxt->hasPErefs = 1; 7876 xmlFree(name); 7877 *str = ptr; 7878 return(entity); 7879 } 7880 7881 /** 7882 * xmlParseDocTypeDecl: 7883 * @ctxt: an XML parser context 7884 * 7885 * parse a DOCTYPE declaration 7886 * 7887 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7888 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7889 * 7890 * [ VC: Root Element Type ] 7891 * The Name in the document type declaration must match the element 7892 * type of the root element. 7893 */ 7894 7895 void 7896 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7897 const xmlChar *name = NULL; 7898 xmlChar *ExternalID = NULL; 7899 xmlChar *URI = NULL; 7900 7901 /* 7902 * We know that '<!DOCTYPE' has been detected. 7903 */ 7904 SKIP(9); 7905 7906 SKIP_BLANKS; 7907 7908 /* 7909 * Parse the DOCTYPE name. 7910 */ 7911 name = xmlParseName(ctxt); 7912 if (name == NULL) { 7913 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7914 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7915 } 7916 ctxt->intSubName = name; 7917 7918 SKIP_BLANKS; 7919 7920 /* 7921 * Check for SystemID and ExternalID 7922 */ 7923 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7924 7925 if ((URI != NULL) || (ExternalID != NULL)) { 7926 ctxt->hasExternalSubset = 1; 7927 } 7928 ctxt->extSubURI = URI; 7929 ctxt->extSubSystem = ExternalID; 7930 7931 SKIP_BLANKS; 7932 7933 /* 7934 * Create and update the internal subset. 7935 */ 7936 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7937 (!ctxt->disableSAX)) 7938 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7939 if (ctxt->instate == XML_PARSER_EOF) 7940 return; 7941 7942 /* 7943 * Is there any internal subset declarations ? 7944 * they are handled separately in xmlParseInternalSubset() 7945 */ 7946 if (RAW == '[') 7947 return; 7948 7949 /* 7950 * We should be at the end of the DOCTYPE declaration. 7951 */ 7952 if (RAW != '>') { 7953 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7954 } 7955 NEXT; 7956 } 7957 7958 /** 7959 * xmlParseInternalSubset: 7960 * @ctxt: an XML parser context 7961 * 7962 * parse the internal subset declaration 7963 * 7964 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7965 */ 7966 7967 static void 7968 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 7969 /* 7970 * Is there any DTD definition ? 7971 */ 7972 if (RAW == '[') { 7973 ctxt->instate = XML_PARSER_DTD; 7974 NEXT; 7975 /* 7976 * Parse the succession of Markup declarations and 7977 * PEReferences. 7978 * Subsequence (markupdecl | PEReference | S)* 7979 */ 7980 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 7981 const xmlChar *check = CUR_PTR; 7982 unsigned int cons = ctxt->input->consumed; 7983 7984 SKIP_BLANKS; 7985 xmlParseMarkupDecl(ctxt); 7986 xmlParsePEReference(ctxt); 7987 7988 /* 7989 * Pop-up of finished entities. 7990 */ 7991 while ((RAW == 0) && (ctxt->inputNr > 1)) 7992 xmlPopInput(ctxt); 7993 7994 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7996 "xmlParseInternalSubset: error detected in Markup declaration\n"); 7997 break; 7998 } 7999 } 8000 if (RAW == ']') { 8001 NEXT; 8002 SKIP_BLANKS; 8003 } 8004 } 8005 8006 /* 8007 * We should be at the end of the DOCTYPE declaration. 8008 */ 8009 if (RAW != '>') { 8010 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8011 } 8012 NEXT; 8013 } 8014 8015 #ifdef LIBXML_SAX1_ENABLED 8016 /** 8017 * xmlParseAttribute: 8018 * @ctxt: an XML parser context 8019 * @value: a xmlChar ** used to store the value of the attribute 8020 * 8021 * parse an attribute 8022 * 8023 * [41] Attribute ::= Name Eq AttValue 8024 * 8025 * [ WFC: No External Entity References ] 8026 * Attribute values cannot contain direct or indirect entity references 8027 * to external entities. 8028 * 8029 * [ WFC: No < in Attribute Values ] 8030 * The replacement text of any entity referred to directly or indirectly in 8031 * an attribute value (other than "<") must not contain a <. 8032 * 8033 * [ VC: Attribute Value Type ] 8034 * The attribute must have been declared; the value must be of the type 8035 * declared for it. 8036 * 8037 * [25] Eq ::= S? '=' S? 8038 * 8039 * With namespace: 8040 * 8041 * [NS 11] Attribute ::= QName Eq AttValue 8042 * 8043 * Also the case QName == xmlns:??? is handled independently as a namespace 8044 * definition. 8045 * 8046 * Returns the attribute name, and the value in *value. 8047 */ 8048 8049 const xmlChar * 8050 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8051 const xmlChar *name; 8052 xmlChar *val; 8053 8054 *value = NULL; 8055 GROW; 8056 name = xmlParseName(ctxt); 8057 if (name == NULL) { 8058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8059 "error parsing attribute name\n"); 8060 return(NULL); 8061 } 8062 8063 /* 8064 * read the value 8065 */ 8066 SKIP_BLANKS; 8067 if (RAW == '=') { 8068 NEXT; 8069 SKIP_BLANKS; 8070 val = xmlParseAttValue(ctxt); 8071 ctxt->instate = XML_PARSER_CONTENT; 8072 } else { 8073 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8074 "Specification mandate value for attribute %s\n", name); 8075 return(NULL); 8076 } 8077 8078 /* 8079 * Check that xml:lang conforms to the specification 8080 * No more registered as an error, just generate a warning now 8081 * since this was deprecated in XML second edition 8082 */ 8083 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8084 if (!xmlCheckLanguageID(val)) { 8085 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8086 "Malformed value for xml:lang : %s\n", 8087 val, NULL); 8088 } 8089 } 8090 8091 /* 8092 * Check that xml:space conforms to the specification 8093 */ 8094 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8095 if (xmlStrEqual(val, BAD_CAST "default")) 8096 *(ctxt->space) = 0; 8097 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8098 *(ctxt->space) = 1; 8099 else { 8100 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8101 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8102 val, NULL); 8103 } 8104 } 8105 8106 *value = val; 8107 return(name); 8108 } 8109 8110 /** 8111 * xmlParseStartTag: 8112 * @ctxt: an XML parser context 8113 * 8114 * parse a start of tag either for rule element or 8115 * EmptyElement. In both case we don't parse the tag closing chars. 8116 * 8117 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8118 * 8119 * [ WFC: Unique Att Spec ] 8120 * No attribute name may appear more than once in the same start-tag or 8121 * empty-element tag. 8122 * 8123 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8124 * 8125 * [ WFC: Unique Att Spec ] 8126 * No attribute name may appear more than once in the same start-tag or 8127 * empty-element tag. 8128 * 8129 * With namespace: 8130 * 8131 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8132 * 8133 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8134 * 8135 * Returns the element name parsed 8136 */ 8137 8138 const xmlChar * 8139 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8140 const xmlChar *name; 8141 const xmlChar *attname; 8142 xmlChar *attvalue; 8143 const xmlChar **atts = ctxt->atts; 8144 int nbatts = 0; 8145 int maxatts = ctxt->maxatts; 8146 int i; 8147 8148 if (RAW != '<') return(NULL); 8149 NEXT1; 8150 8151 name = xmlParseName(ctxt); 8152 if (name == NULL) { 8153 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8154 "xmlParseStartTag: invalid element name\n"); 8155 return(NULL); 8156 } 8157 8158 /* 8159 * Now parse the attributes, it ends up with the ending 8160 * 8161 * (S Attribute)* S? 8162 */ 8163 SKIP_BLANKS; 8164 GROW; 8165 8166 while (((RAW != '>') && 8167 ((RAW != '/') || (NXT(1) != '>')) && 8168 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8169 const xmlChar *q = CUR_PTR; 8170 unsigned int cons = ctxt->input->consumed; 8171 8172 attname = xmlParseAttribute(ctxt, &attvalue); 8173 if ((attname != NULL) && (attvalue != NULL)) { 8174 /* 8175 * [ WFC: Unique Att Spec ] 8176 * No attribute name may appear more than once in the same 8177 * start-tag or empty-element tag. 8178 */ 8179 for (i = 0; i < nbatts;i += 2) { 8180 if (xmlStrEqual(atts[i], attname)) { 8181 xmlErrAttributeDup(ctxt, NULL, attname); 8182 xmlFree(attvalue); 8183 goto failed; 8184 } 8185 } 8186 /* 8187 * Add the pair to atts 8188 */ 8189 if (atts == NULL) { 8190 maxatts = 22; /* allow for 10 attrs by default */ 8191 atts = (const xmlChar **) 8192 xmlMalloc(maxatts * sizeof(xmlChar *)); 8193 if (atts == NULL) { 8194 xmlErrMemory(ctxt, NULL); 8195 if (attvalue != NULL) 8196 xmlFree(attvalue); 8197 goto failed; 8198 } 8199 ctxt->atts = atts; 8200 ctxt->maxatts = maxatts; 8201 } else if (nbatts + 4 > maxatts) { 8202 const xmlChar **n; 8203 8204 maxatts *= 2; 8205 n = (const xmlChar **) xmlRealloc((void *) atts, 8206 maxatts * sizeof(const xmlChar *)); 8207 if (n == NULL) { 8208 xmlErrMemory(ctxt, NULL); 8209 if (attvalue != NULL) 8210 xmlFree(attvalue); 8211 goto failed; 8212 } 8213 atts = n; 8214 ctxt->atts = atts; 8215 ctxt->maxatts = maxatts; 8216 } 8217 atts[nbatts++] = attname; 8218 atts[nbatts++] = attvalue; 8219 atts[nbatts] = NULL; 8220 atts[nbatts + 1] = NULL; 8221 } else { 8222 if (attvalue != NULL) 8223 xmlFree(attvalue); 8224 } 8225 8226 failed: 8227 8228 GROW 8229 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8230 break; 8231 if (!IS_BLANK_CH(RAW)) { 8232 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8233 "attributes construct error\n"); 8234 } 8235 SKIP_BLANKS; 8236 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8237 (attname == NULL) && (attvalue == NULL)) { 8238 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8239 "xmlParseStartTag: problem parsing attributes\n"); 8240 break; 8241 } 8242 SHRINK; 8243 GROW; 8244 } 8245 8246 /* 8247 * SAX: Start of Element ! 8248 */ 8249 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8250 (!ctxt->disableSAX)) { 8251 if (nbatts > 0) 8252 ctxt->sax->startElement(ctxt->userData, name, atts); 8253 else 8254 ctxt->sax->startElement(ctxt->userData, name, NULL); 8255 } 8256 8257 if (atts != NULL) { 8258 /* Free only the content strings */ 8259 for (i = 1;i < nbatts;i+=2) 8260 if (atts[i] != NULL) 8261 xmlFree((xmlChar *) atts[i]); 8262 } 8263 return(name); 8264 } 8265 8266 /** 8267 * xmlParseEndTag1: 8268 * @ctxt: an XML parser context 8269 * @line: line of the start tag 8270 * @nsNr: number of namespaces on the start tag 8271 * 8272 * parse an end of tag 8273 * 8274 * [42] ETag ::= '</' Name S? '>' 8275 * 8276 * With namespace 8277 * 8278 * [NS 9] ETag ::= '</' QName S? '>' 8279 */ 8280 8281 static void 8282 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8283 const xmlChar *name; 8284 8285 GROW; 8286 if ((RAW != '<') || (NXT(1) != '/')) { 8287 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8288 "xmlParseEndTag: '</' not found\n"); 8289 return; 8290 } 8291 SKIP(2); 8292 8293 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8294 8295 /* 8296 * We should definitely be at the ending "S? '>'" part 8297 */ 8298 GROW; 8299 SKIP_BLANKS; 8300 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8301 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8302 } else 8303 NEXT1; 8304 8305 /* 8306 * [ WFC: Element Type Match ] 8307 * The Name in an element's end-tag must match the element type in the 8308 * start-tag. 8309 * 8310 */ 8311 if (name != (xmlChar*)1) { 8312 if (name == NULL) name = BAD_CAST "unparseable"; 8313 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8314 "Opening and ending tag mismatch: %s line %d and %s\n", 8315 ctxt->name, line, name); 8316 } 8317 8318 /* 8319 * SAX: End of Tag 8320 */ 8321 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8322 (!ctxt->disableSAX)) 8323 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8324 8325 namePop(ctxt); 8326 spacePop(ctxt); 8327 return; 8328 } 8329 8330 /** 8331 * xmlParseEndTag: 8332 * @ctxt: an XML parser context 8333 * 8334 * parse an end of tag 8335 * 8336 * [42] ETag ::= '</' Name S? '>' 8337 * 8338 * With namespace 8339 * 8340 * [NS 9] ETag ::= '</' QName S? '>' 8341 */ 8342 8343 void 8344 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8345 xmlParseEndTag1(ctxt, 0); 8346 } 8347 #endif /* LIBXML_SAX1_ENABLED */ 8348 8349 /************************************************************************ 8350 * * 8351 * SAX 2 specific operations * 8352 * * 8353 ************************************************************************/ 8354 8355 /* 8356 * xmlGetNamespace: 8357 * @ctxt: an XML parser context 8358 * @prefix: the prefix to lookup 8359 * 8360 * Lookup the namespace name for the @prefix (which ca be NULL) 8361 * The prefix must come from the @ctxt->dict dictionnary 8362 * 8363 * Returns the namespace name or NULL if not bound 8364 */ 8365 static const xmlChar * 8366 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8367 int i; 8368 8369 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8370 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8371 if (ctxt->nsTab[i] == prefix) { 8372 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8373 return(NULL); 8374 return(ctxt->nsTab[i + 1]); 8375 } 8376 return(NULL); 8377 } 8378 8379 /** 8380 * xmlParseQName: 8381 * @ctxt: an XML parser context 8382 * @prefix: pointer to store the prefix part 8383 * 8384 * parse an XML Namespace QName 8385 * 8386 * [6] QName ::= (Prefix ':')? LocalPart 8387 * [7] Prefix ::= NCName 8388 * [8] LocalPart ::= NCName 8389 * 8390 * Returns the Name parsed or NULL 8391 */ 8392 8393 static const xmlChar * 8394 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8395 const xmlChar *l, *p; 8396 8397 GROW; 8398 8399 l = xmlParseNCName(ctxt); 8400 if (l == NULL) { 8401 if (CUR == ':') { 8402 l = xmlParseName(ctxt); 8403 if (l != NULL) { 8404 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8405 "Failed to parse QName '%s'\n", l, NULL, NULL); 8406 *prefix = NULL; 8407 return(l); 8408 } 8409 } 8410 return(NULL); 8411 } 8412 if (CUR == ':') { 8413 NEXT; 8414 p = l; 8415 l = xmlParseNCName(ctxt); 8416 if (l == NULL) { 8417 xmlChar *tmp; 8418 8419 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8420 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8421 l = xmlParseNmtoken(ctxt); 8422 if (l == NULL) 8423 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8424 else { 8425 tmp = xmlBuildQName(l, p, NULL, 0); 8426 xmlFree((char *)l); 8427 } 8428 p = xmlDictLookup(ctxt->dict, tmp, -1); 8429 if (tmp != NULL) xmlFree(tmp); 8430 *prefix = NULL; 8431 return(p); 8432 } 8433 if (CUR == ':') { 8434 xmlChar *tmp; 8435 8436 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8437 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8438 NEXT; 8439 tmp = (xmlChar *) xmlParseName(ctxt); 8440 if (tmp != NULL) { 8441 tmp = xmlBuildQName(tmp, l, NULL, 0); 8442 l = xmlDictLookup(ctxt->dict, tmp, -1); 8443 if (tmp != NULL) xmlFree(tmp); 8444 *prefix = p; 8445 return(l); 8446 } 8447 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8448 l = xmlDictLookup(ctxt->dict, tmp, -1); 8449 if (tmp != NULL) xmlFree(tmp); 8450 *prefix = p; 8451 return(l); 8452 } 8453 *prefix = p; 8454 } else 8455 *prefix = NULL; 8456 return(l); 8457 } 8458 8459 /** 8460 * xmlParseQNameAndCompare: 8461 * @ctxt: an XML parser context 8462 * @name: the localname 8463 * @prefix: the prefix, if any. 8464 * 8465 * parse an XML name and compares for match 8466 * (specialized for endtag parsing) 8467 * 8468 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8469 * and the name for mismatch 8470 */ 8471 8472 static const xmlChar * 8473 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8474 xmlChar const *prefix) { 8475 const xmlChar *cmp; 8476 const xmlChar *in; 8477 const xmlChar *ret; 8478 const xmlChar *prefix2; 8479 8480 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8481 8482 GROW; 8483 in = ctxt->input->cur; 8484 8485 cmp = prefix; 8486 while (*in != 0 && *in == *cmp) { 8487 ++in; 8488 ++cmp; 8489 } 8490 if ((*cmp == 0) && (*in == ':')) { 8491 in++; 8492 cmp = name; 8493 while (*in != 0 && *in == *cmp) { 8494 ++in; 8495 ++cmp; 8496 } 8497 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8498 /* success */ 8499 ctxt->input->cur = in; 8500 return((const xmlChar*) 1); 8501 } 8502 } 8503 /* 8504 * all strings coms from the dictionary, equality can be done directly 8505 */ 8506 ret = xmlParseQName (ctxt, &prefix2); 8507 if ((ret == name) && (prefix == prefix2)) 8508 return((const xmlChar*) 1); 8509 return ret; 8510 } 8511 8512 /** 8513 * xmlParseAttValueInternal: 8514 * @ctxt: an XML parser context 8515 * @len: attribute len result 8516 * @alloc: whether the attribute was reallocated as a new string 8517 * @normalize: if 1 then further non-CDATA normalization must be done 8518 * 8519 * parse a value for an attribute. 8520 * NOTE: if no normalization is needed, the routine will return pointers 8521 * directly from the data buffer. 8522 * 8523 * 3.3.3 Attribute-Value Normalization: 8524 * Before the value of an attribute is passed to the application or 8525 * checked for validity, the XML processor must normalize it as follows: 8526 * - a character reference is processed by appending the referenced 8527 * character to the attribute value 8528 * - an entity reference is processed by recursively processing the 8529 * replacement text of the entity 8530 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8531 * appending #x20 to the normalized value, except that only a single 8532 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8533 * parsed entity or the literal entity value of an internal parsed entity 8534 * - other characters are processed by appending them to the normalized value 8535 * If the declared value is not CDATA, then the XML processor must further 8536 * process the normalized attribute value by discarding any leading and 8537 * trailing space (#x20) characters, and by replacing sequences of space 8538 * (#x20) characters by a single space (#x20) character. 8539 * All attributes for which no declaration has been read should be treated 8540 * by a non-validating parser as if declared CDATA. 8541 * 8542 * Returns the AttValue parsed or NULL. The value has to be freed by the 8543 * caller if it was copied, this can be detected by val[*len] == 0. 8544 */ 8545 8546 static xmlChar * 8547 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8548 int normalize) 8549 { 8550 xmlChar limit = 0; 8551 const xmlChar *in = NULL, *start, *end, *last; 8552 xmlChar *ret = NULL; 8553 8554 GROW; 8555 in = (xmlChar *) CUR_PTR; 8556 if (*in != '"' && *in != '\'') { 8557 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8558 return (NULL); 8559 } 8560 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8561 8562 /* 8563 * try to handle in this routine the most common case where no 8564 * allocation of a new string is required and where content is 8565 * pure ASCII. 8566 */ 8567 limit = *in++; 8568 end = ctxt->input->end; 8569 start = in; 8570 if (in >= end) { 8571 const xmlChar *oldbase = ctxt->input->base; 8572 GROW; 8573 if (oldbase != ctxt->input->base) { 8574 long delta = ctxt->input->base - oldbase; 8575 start = start + delta; 8576 in = in + delta; 8577 } 8578 end = ctxt->input->end; 8579 } 8580 if (normalize) { 8581 /* 8582 * Skip any leading spaces 8583 */ 8584 while ((in < end) && (*in != limit) && 8585 ((*in == 0x20) || (*in == 0x9) || 8586 (*in == 0xA) || (*in == 0xD))) { 8587 in++; 8588 start = in; 8589 if (in >= end) { 8590 const xmlChar *oldbase = ctxt->input->base; 8591 GROW; 8592 if (ctxt->instate == XML_PARSER_EOF) 8593 return(NULL); 8594 if (oldbase != ctxt->input->base) { 8595 long delta = ctxt->input->base - oldbase; 8596 start = start + delta; 8597 in = in + delta; 8598 } 8599 end = ctxt->input->end; 8600 } 8601 } 8602 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8603 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8604 if ((*in++ == 0x20) && (*in == 0x20)) break; 8605 if (in >= end) { 8606 const xmlChar *oldbase = ctxt->input->base; 8607 GROW; 8608 if (ctxt->instate == XML_PARSER_EOF) 8609 return(NULL); 8610 if (oldbase != ctxt->input->base) { 8611 long delta = ctxt->input->base - oldbase; 8612 start = start + delta; 8613 in = in + delta; 8614 } 8615 end = ctxt->input->end; 8616 } 8617 } 8618 last = in; 8619 /* 8620 * skip the trailing blanks 8621 */ 8622 while ((last[-1] == 0x20) && (last > start)) last--; 8623 while ((in < end) && (*in != limit) && 8624 ((*in == 0x20) || (*in == 0x9) || 8625 (*in == 0xA) || (*in == 0xD))) { 8626 in++; 8627 if (in >= end) { 8628 const xmlChar *oldbase = ctxt->input->base; 8629 GROW; 8630 if (ctxt->instate == XML_PARSER_EOF) 8631 return(NULL); 8632 if (oldbase != ctxt->input->base) { 8633 long delta = ctxt->input->base - oldbase; 8634 start = start + delta; 8635 in = in + delta; 8636 last = last + delta; 8637 } 8638 end = ctxt->input->end; 8639 } 8640 } 8641 if (*in != limit) goto need_complex; 8642 } else { 8643 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8644 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8645 in++; 8646 if (in >= end) { 8647 const xmlChar *oldbase = ctxt->input->base; 8648 GROW; 8649 if (ctxt->instate == XML_PARSER_EOF) 8650 return(NULL); 8651 if (oldbase != ctxt->input->base) { 8652 long delta = ctxt->input->base - oldbase; 8653 start = start + delta; 8654 in = in + delta; 8655 } 8656 end = ctxt->input->end; 8657 } 8658 } 8659 last = in; 8660 if (*in != limit) goto need_complex; 8661 } 8662 in++; 8663 if (len != NULL) { 8664 *len = last - start; 8665 ret = (xmlChar *) start; 8666 } else { 8667 if (alloc) *alloc = 1; 8668 ret = xmlStrndup(start, last - start); 8669 } 8670 CUR_PTR = in; 8671 if (alloc) *alloc = 0; 8672 return ret; 8673 need_complex: 8674 if (alloc) *alloc = 1; 8675 return xmlParseAttValueComplex(ctxt, len, normalize); 8676 } 8677 8678 /** 8679 * xmlParseAttribute2: 8680 * @ctxt: an XML parser context 8681 * @pref: the element prefix 8682 * @elem: the element name 8683 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8684 * @value: a xmlChar ** used to store the value of the attribute 8685 * @len: an int * to save the length of the attribute 8686 * @alloc: an int * to indicate if the attribute was allocated 8687 * 8688 * parse an attribute in the new SAX2 framework. 8689 * 8690 * Returns the attribute name, and the value in *value, . 8691 */ 8692 8693 static const xmlChar * 8694 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8695 const xmlChar * pref, const xmlChar * elem, 8696 const xmlChar ** prefix, xmlChar ** value, 8697 int *len, int *alloc) 8698 { 8699 const xmlChar *name; 8700 xmlChar *val, *internal_val = NULL; 8701 int normalize = 0; 8702 8703 *value = NULL; 8704 GROW; 8705 name = xmlParseQName(ctxt, prefix); 8706 if (name == NULL) { 8707 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8708 "error parsing attribute name\n"); 8709 return (NULL); 8710 } 8711 8712 /* 8713 * get the type if needed 8714 */ 8715 if (ctxt->attsSpecial != NULL) { 8716 int type; 8717 8718 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8719 pref, elem, *prefix, name); 8720 if (type != 0) 8721 normalize = 1; 8722 } 8723 8724 /* 8725 * read the value 8726 */ 8727 SKIP_BLANKS; 8728 if (RAW == '=') { 8729 NEXT; 8730 SKIP_BLANKS; 8731 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8732 if (normalize) { 8733 /* 8734 * Sometimes a second normalisation pass for spaces is needed 8735 * but that only happens if charrefs or entities refernces 8736 * have been used in the attribute value, i.e. the attribute 8737 * value have been extracted in an allocated string already. 8738 */ 8739 if (*alloc) { 8740 const xmlChar *val2; 8741 8742 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8743 if ((val2 != NULL) && (val2 != val)) { 8744 xmlFree(val); 8745 val = (xmlChar *) val2; 8746 } 8747 } 8748 } 8749 ctxt->instate = XML_PARSER_CONTENT; 8750 } else { 8751 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8752 "Specification mandate value for attribute %s\n", 8753 name); 8754 return (NULL); 8755 } 8756 8757 if (*prefix == ctxt->str_xml) { 8758 /* 8759 * Check that xml:lang conforms to the specification 8760 * No more registered as an error, just generate a warning now 8761 * since this was deprecated in XML second edition 8762 */ 8763 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8764 internal_val = xmlStrndup(val, *len); 8765 if (!xmlCheckLanguageID(internal_val)) { 8766 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8767 "Malformed value for xml:lang : %s\n", 8768 internal_val, NULL); 8769 } 8770 } 8771 8772 /* 8773 * Check that xml:space conforms to the specification 8774 */ 8775 if (xmlStrEqual(name, BAD_CAST "space")) { 8776 internal_val = xmlStrndup(val, *len); 8777 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8778 *(ctxt->space) = 0; 8779 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8780 *(ctxt->space) = 1; 8781 else { 8782 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8783 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8784 internal_val, NULL); 8785 } 8786 } 8787 if (internal_val) { 8788 xmlFree(internal_val); 8789 } 8790 } 8791 8792 *value = val; 8793 return (name); 8794 } 8795 /** 8796 * xmlParseStartTag2: 8797 * @ctxt: an XML parser context 8798 * 8799 * parse a start of tag either for rule element or 8800 * EmptyElement. In both case we don't parse the tag closing chars. 8801 * This routine is called when running SAX2 parsing 8802 * 8803 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8804 * 8805 * [ WFC: Unique Att Spec ] 8806 * No attribute name may appear more than once in the same start-tag or 8807 * empty-element tag. 8808 * 8809 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8810 * 8811 * [ WFC: Unique Att Spec ] 8812 * No attribute name may appear more than once in the same start-tag or 8813 * empty-element tag. 8814 * 8815 * With namespace: 8816 * 8817 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8818 * 8819 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8820 * 8821 * Returns the element name parsed 8822 */ 8823 8824 static const xmlChar * 8825 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8826 const xmlChar **URI, int *tlen) { 8827 const xmlChar *localname; 8828 const xmlChar *prefix; 8829 const xmlChar *attname; 8830 const xmlChar *aprefix; 8831 const xmlChar *nsname; 8832 xmlChar *attvalue; 8833 const xmlChar **atts = ctxt->atts; 8834 int maxatts = ctxt->maxatts; 8835 int nratts, nbatts, nbdef; 8836 int i, j, nbNs, attval, oldline, oldcol; 8837 const xmlChar *base; 8838 unsigned long cur; 8839 int nsNr = ctxt->nsNr; 8840 8841 if (RAW != '<') return(NULL); 8842 NEXT1; 8843 8844 /* 8845 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8846 * point since the attribute values may be stored as pointers to 8847 * the buffer and calling SHRINK would destroy them ! 8848 * The Shrinking is only possible once the full set of attribute 8849 * callbacks have been done. 8850 */ 8851 reparse: 8852 SHRINK; 8853 base = ctxt->input->base; 8854 cur = ctxt->input->cur - ctxt->input->base; 8855 oldline = ctxt->input->line; 8856 oldcol = ctxt->input->col; 8857 nbatts = 0; 8858 nratts = 0; 8859 nbdef = 0; 8860 nbNs = 0; 8861 attval = 0; 8862 /* Forget any namespaces added during an earlier parse of this element. */ 8863 ctxt->nsNr = nsNr; 8864 8865 localname = xmlParseQName(ctxt, &prefix); 8866 if (localname == NULL) { 8867 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8868 "StartTag: invalid element name\n"); 8869 return(NULL); 8870 } 8871 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8872 8873 /* 8874 * Now parse the attributes, it ends up with the ending 8875 * 8876 * (S Attribute)* S? 8877 */ 8878 SKIP_BLANKS; 8879 GROW; 8880 if (ctxt->input->base != base) goto base_changed; 8881 8882 while (((RAW != '>') && 8883 ((RAW != '/') || (NXT(1) != '>')) && 8884 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8885 const xmlChar *q = CUR_PTR; 8886 unsigned int cons = ctxt->input->consumed; 8887 int len = -1, alloc = 0; 8888 8889 attname = xmlParseAttribute2(ctxt, prefix, localname, 8890 &aprefix, &attvalue, &len, &alloc); 8891 if (ctxt->input->base != base) { 8892 if ((attvalue != NULL) && (alloc != 0)) 8893 xmlFree(attvalue); 8894 attvalue = NULL; 8895 goto base_changed; 8896 } 8897 if ((attname != NULL) && (attvalue != NULL)) { 8898 if (len < 0) len = xmlStrlen(attvalue); 8899 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8900 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8901 xmlURIPtr uri; 8902 8903 if (*URL != 0) { 8904 uri = xmlParseURI((const char *) URL); 8905 if (uri == NULL) { 8906 xmlNsErr(ctxt, XML_WAR_NS_URI, 8907 "xmlns: '%s' is not a valid URI\n", 8908 URL, NULL, NULL); 8909 } else { 8910 if (uri->scheme == NULL) { 8911 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8912 "xmlns: URI %s is not absolute\n", 8913 URL, NULL, NULL); 8914 } 8915 xmlFreeURI(uri); 8916 } 8917 if (URL == ctxt->str_xml_ns) { 8918 if (attname != ctxt->str_xml) { 8919 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8920 "xml namespace URI cannot be the default namespace\n", 8921 NULL, NULL, NULL); 8922 } 8923 goto skip_default_ns; 8924 } 8925 if ((len == 29) && 8926 (xmlStrEqual(URL, 8927 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8928 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8929 "reuse of the xmlns namespace name is forbidden\n", 8930 NULL, NULL, NULL); 8931 goto skip_default_ns; 8932 } 8933 } 8934 /* 8935 * check that it's not a defined namespace 8936 */ 8937 for (j = 1;j <= nbNs;j++) 8938 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8939 break; 8940 if (j <= nbNs) 8941 xmlErrAttributeDup(ctxt, NULL, attname); 8942 else 8943 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8944 skip_default_ns: 8945 if (alloc != 0) xmlFree(attvalue); 8946 SKIP_BLANKS; 8947 continue; 8948 } 8949 if (aprefix == ctxt->str_xmlns) { 8950 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8951 xmlURIPtr uri; 8952 8953 if (attname == ctxt->str_xml) { 8954 if (URL != ctxt->str_xml_ns) { 8955 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8956 "xml namespace prefix mapped to wrong URI\n", 8957 NULL, NULL, NULL); 8958 } 8959 /* 8960 * Do not keep a namespace definition node 8961 */ 8962 goto skip_ns; 8963 } 8964 if (URL == ctxt->str_xml_ns) { 8965 if (attname != ctxt->str_xml) { 8966 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8967 "xml namespace URI mapped to wrong prefix\n", 8968 NULL, NULL, NULL); 8969 } 8970 goto skip_ns; 8971 } 8972 if (attname == ctxt->str_xmlns) { 8973 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8974 "redefinition of the xmlns prefix is forbidden\n", 8975 NULL, NULL, NULL); 8976 goto skip_ns; 8977 } 8978 if ((len == 29) && 8979 (xmlStrEqual(URL, 8980 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8981 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8982 "reuse of the xmlns namespace name is forbidden\n", 8983 NULL, NULL, NULL); 8984 goto skip_ns; 8985 } 8986 if ((URL == NULL) || (URL[0] == 0)) { 8987 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8988 "xmlns:%s: Empty XML namespace is not allowed\n", 8989 attname, NULL, NULL); 8990 goto skip_ns; 8991 } else { 8992 uri = xmlParseURI((const char *) URL); 8993 if (uri == NULL) { 8994 xmlNsErr(ctxt, XML_WAR_NS_URI, 8995 "xmlns:%s: '%s' is not a valid URI\n", 8996 attname, URL, NULL); 8997 } else { 8998 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 8999 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9000 "xmlns:%s: URI %s is not absolute\n", 9001 attname, URL, NULL); 9002 } 9003 xmlFreeURI(uri); 9004 } 9005 } 9006 9007 /* 9008 * check that it's not a defined namespace 9009 */ 9010 for (j = 1;j <= nbNs;j++) 9011 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9012 break; 9013 if (j <= nbNs) 9014 xmlErrAttributeDup(ctxt, aprefix, attname); 9015 else 9016 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9017 skip_ns: 9018 if (alloc != 0) xmlFree(attvalue); 9019 SKIP_BLANKS; 9020 if (ctxt->input->base != base) goto base_changed; 9021 continue; 9022 } 9023 9024 /* 9025 * Add the pair to atts 9026 */ 9027 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9028 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9029 if (attvalue[len] == 0) 9030 xmlFree(attvalue); 9031 goto failed; 9032 } 9033 maxatts = ctxt->maxatts; 9034 atts = ctxt->atts; 9035 } 9036 ctxt->attallocs[nratts++] = alloc; 9037 atts[nbatts++] = attname; 9038 atts[nbatts++] = aprefix; 9039 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9040 atts[nbatts++] = attvalue; 9041 attvalue += len; 9042 atts[nbatts++] = attvalue; 9043 /* 9044 * tag if some deallocation is needed 9045 */ 9046 if (alloc != 0) attval = 1; 9047 } else { 9048 if ((attvalue != NULL) && (attvalue[len] == 0)) 9049 xmlFree(attvalue); 9050 } 9051 9052 failed: 9053 9054 GROW 9055 if (ctxt->instate == XML_PARSER_EOF) 9056 break; 9057 if (ctxt->input->base != base) goto base_changed; 9058 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9059 break; 9060 if (!IS_BLANK_CH(RAW)) { 9061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9062 "attributes construct error\n"); 9063 break; 9064 } 9065 SKIP_BLANKS; 9066 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9067 (attname == NULL) && (attvalue == NULL)) { 9068 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9069 "xmlParseStartTag: problem parsing attributes\n"); 9070 break; 9071 } 9072 GROW; 9073 if (ctxt->input->base != base) goto base_changed; 9074 } 9075 9076 /* 9077 * The attributes defaulting 9078 */ 9079 if (ctxt->attsDefault != NULL) { 9080 xmlDefAttrsPtr defaults; 9081 9082 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9083 if (defaults != NULL) { 9084 for (i = 0;i < defaults->nbAttrs;i++) { 9085 attname = defaults->values[5 * i]; 9086 aprefix = defaults->values[5 * i + 1]; 9087 9088 /* 9089 * special work for namespaces defaulted defs 9090 */ 9091 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9092 /* 9093 * check that it's not a defined namespace 9094 */ 9095 for (j = 1;j <= nbNs;j++) 9096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9097 break; 9098 if (j <= nbNs) continue; 9099 9100 nsname = xmlGetNamespace(ctxt, NULL); 9101 if (nsname != defaults->values[5 * i + 2]) { 9102 if (nsPush(ctxt, NULL, 9103 defaults->values[5 * i + 2]) > 0) 9104 nbNs++; 9105 } 9106 } else if (aprefix == ctxt->str_xmlns) { 9107 /* 9108 * check that it's not a defined namespace 9109 */ 9110 for (j = 1;j <= nbNs;j++) 9111 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9112 break; 9113 if (j <= nbNs) continue; 9114 9115 nsname = xmlGetNamespace(ctxt, attname); 9116 if (nsname != defaults->values[2]) { 9117 if (nsPush(ctxt, attname, 9118 defaults->values[5 * i + 2]) > 0) 9119 nbNs++; 9120 } 9121 } else { 9122 /* 9123 * check that it's not a defined attribute 9124 */ 9125 for (j = 0;j < nbatts;j+=5) { 9126 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9127 break; 9128 } 9129 if (j < nbatts) continue; 9130 9131 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9132 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9133 return(NULL); 9134 } 9135 maxatts = ctxt->maxatts; 9136 atts = ctxt->atts; 9137 } 9138 atts[nbatts++] = attname; 9139 atts[nbatts++] = aprefix; 9140 if (aprefix == NULL) 9141 atts[nbatts++] = NULL; 9142 else 9143 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9144 atts[nbatts++] = defaults->values[5 * i + 2]; 9145 atts[nbatts++] = defaults->values[5 * i + 3]; 9146 if ((ctxt->standalone == 1) && 9147 (defaults->values[5 * i + 4] != NULL)) { 9148 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9149 "standalone: attribute %s on %s defaulted from external subset\n", 9150 attname, localname); 9151 } 9152 nbdef++; 9153 } 9154 } 9155 } 9156 } 9157 9158 /* 9159 * The attributes checkings 9160 */ 9161 for (i = 0; i < nbatts;i += 5) { 9162 /* 9163 * The default namespace does not apply to attribute names. 9164 */ 9165 if (atts[i + 1] != NULL) { 9166 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9167 if (nsname == NULL) { 9168 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9169 "Namespace prefix %s for %s on %s is not defined\n", 9170 atts[i + 1], atts[i], localname); 9171 } 9172 atts[i + 2] = nsname; 9173 } else 9174 nsname = NULL; 9175 /* 9176 * [ WFC: Unique Att Spec ] 9177 * No attribute name may appear more than once in the same 9178 * start-tag or empty-element tag. 9179 * As extended by the Namespace in XML REC. 9180 */ 9181 for (j = 0; j < i;j += 5) { 9182 if (atts[i] == atts[j]) { 9183 if (atts[i+1] == atts[j+1]) { 9184 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9185 break; 9186 } 9187 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9188 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9189 "Namespaced Attribute %s in '%s' redefined\n", 9190 atts[i], nsname, NULL); 9191 break; 9192 } 9193 } 9194 } 9195 } 9196 9197 nsname = xmlGetNamespace(ctxt, prefix); 9198 if ((prefix != NULL) && (nsname == NULL)) { 9199 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9200 "Namespace prefix %s on %s is not defined\n", 9201 prefix, localname, NULL); 9202 } 9203 *pref = prefix; 9204 *URI = nsname; 9205 9206 /* 9207 * SAX: Start of Element ! 9208 */ 9209 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9210 (!ctxt->disableSAX)) { 9211 if (nbNs > 0) 9212 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9213 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9214 nbatts / 5, nbdef, atts); 9215 else 9216 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9217 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9218 } 9219 9220 /* 9221 * Free up attribute allocated strings if needed 9222 */ 9223 if (attval != 0) { 9224 for (i = 3,j = 0; j < nratts;i += 5,j++) 9225 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9226 xmlFree((xmlChar *) atts[i]); 9227 } 9228 9229 return(localname); 9230 9231 base_changed: 9232 /* 9233 * the attribute strings are valid iif the base didn't changed 9234 */ 9235 if (attval != 0) { 9236 for (i = 3,j = 0; j < nratts;i += 5,j++) 9237 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9238 xmlFree((xmlChar *) atts[i]); 9239 } 9240 ctxt->input->cur = ctxt->input->base + cur; 9241 ctxt->input->line = oldline; 9242 ctxt->input->col = oldcol; 9243 if (ctxt->wellFormed == 1) { 9244 goto reparse; 9245 } 9246 return(NULL); 9247 } 9248 9249 /** 9250 * xmlParseEndTag2: 9251 * @ctxt: an XML parser context 9252 * @line: line of the start tag 9253 * @nsNr: number of namespaces on the start tag 9254 * 9255 * parse an end of tag 9256 * 9257 * [42] ETag ::= '</' Name S? '>' 9258 * 9259 * With namespace 9260 * 9261 * [NS 9] ETag ::= '</' QName S? '>' 9262 */ 9263 9264 static void 9265 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9266 const xmlChar *URI, int line, int nsNr, int tlen) { 9267 const xmlChar *name; 9268 9269 GROW; 9270 if ((RAW != '<') || (NXT(1) != '/')) { 9271 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9272 return; 9273 } 9274 SKIP(2); 9275 9276 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9277 if (ctxt->input->cur[tlen] == '>') { 9278 ctxt->input->cur += tlen + 1; 9279 goto done; 9280 } 9281 ctxt->input->cur += tlen; 9282 name = (xmlChar*)1; 9283 } else { 9284 if (prefix == NULL) 9285 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9286 else 9287 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9288 } 9289 9290 /* 9291 * We should definitely be at the ending "S? '>'" part 9292 */ 9293 GROW; 9294 if (ctxt->instate == XML_PARSER_EOF) 9295 return; 9296 SKIP_BLANKS; 9297 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9298 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9299 } else 9300 NEXT1; 9301 9302 /* 9303 * [ WFC: Element Type Match ] 9304 * The Name in an element's end-tag must match the element type in the 9305 * start-tag. 9306 * 9307 */ 9308 if (name != (xmlChar*)1) { 9309 if (name == NULL) name = BAD_CAST "unparseable"; 9310 if ((line == 0) && (ctxt->node != NULL)) 9311 line = ctxt->node->line; 9312 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9313 "Opening and ending tag mismatch: %s line %d and %s\n", 9314 ctxt->name, line, name); 9315 } 9316 9317 /* 9318 * SAX: End of Tag 9319 */ 9320 done: 9321 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9322 (!ctxt->disableSAX)) 9323 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9324 9325 spacePop(ctxt); 9326 if (nsNr != 0) 9327 nsPop(ctxt, nsNr); 9328 return; 9329 } 9330 9331 /** 9332 * xmlParseCDSect: 9333 * @ctxt: an XML parser context 9334 * 9335 * Parse escaped pure raw content. 9336 * 9337 * [18] CDSect ::= CDStart CData CDEnd 9338 * 9339 * [19] CDStart ::= '<![CDATA[' 9340 * 9341 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9342 * 9343 * [21] CDEnd ::= ']]>' 9344 */ 9345 void 9346 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9347 xmlChar *buf = NULL; 9348 int len = 0; 9349 int size = XML_PARSER_BUFFER_SIZE; 9350 int r, rl; 9351 int s, sl; 9352 int cur, l; 9353 int count = 0; 9354 9355 /* Check 2.6.0 was NXT(0) not RAW */ 9356 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9357 SKIP(9); 9358 } else 9359 return; 9360 9361 ctxt->instate = XML_PARSER_CDATA_SECTION; 9362 r = CUR_CHAR(rl); 9363 if (!IS_CHAR(r)) { 9364 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9365 ctxt->instate = XML_PARSER_CONTENT; 9366 return; 9367 } 9368 NEXTL(rl); 9369 s = CUR_CHAR(sl); 9370 if (!IS_CHAR(s)) { 9371 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9372 ctxt->instate = XML_PARSER_CONTENT; 9373 return; 9374 } 9375 NEXTL(sl); 9376 cur = CUR_CHAR(l); 9377 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9378 if (buf == NULL) { 9379 xmlErrMemory(ctxt, NULL); 9380 return; 9381 } 9382 while (IS_CHAR(cur) && 9383 ((r != ']') || (s != ']') || (cur != '>'))) { 9384 if (len + 5 >= size) { 9385 xmlChar *tmp; 9386 9387 size *= 2; 9388 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9389 if (tmp == NULL) { 9390 xmlFree(buf); 9391 xmlErrMemory(ctxt, NULL); 9392 return; 9393 } 9394 buf = tmp; 9395 } 9396 COPY_BUF(rl,buf,len,r); 9397 r = s; 9398 rl = sl; 9399 s = cur; 9400 sl = l; 9401 count++; 9402 if (count > 50) { 9403 GROW; 9404 if (ctxt->instate == XML_PARSER_EOF) { 9405 xmlFree(buf); 9406 return; 9407 } 9408 count = 0; 9409 } 9410 NEXTL(l); 9411 cur = CUR_CHAR(l); 9412 } 9413 buf[len] = 0; 9414 ctxt->instate = XML_PARSER_CONTENT; 9415 if (cur != '>') { 9416 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9417 "CData section not finished\n%.50s\n", buf); 9418 xmlFree(buf); 9419 return; 9420 } 9421 NEXTL(l); 9422 9423 /* 9424 * OK the buffer is to be consumed as cdata. 9425 */ 9426 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9427 if (ctxt->sax->cdataBlock != NULL) 9428 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9429 else if (ctxt->sax->characters != NULL) 9430 ctxt->sax->characters(ctxt->userData, buf, len); 9431 } 9432 xmlFree(buf); 9433 } 9434 9435 /** 9436 * xmlParseContent: 9437 * @ctxt: an XML parser context 9438 * 9439 * Parse a content: 9440 * 9441 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9442 */ 9443 9444 void 9445 xmlParseContent(xmlParserCtxtPtr ctxt) { 9446 GROW; 9447 while ((RAW != 0) && 9448 ((RAW != '<') || (NXT(1) != '/')) && 9449 (ctxt->instate != XML_PARSER_EOF)) { 9450 const xmlChar *test = CUR_PTR; 9451 unsigned int cons = ctxt->input->consumed; 9452 const xmlChar *cur = ctxt->input->cur; 9453 9454 /* 9455 * First case : a Processing Instruction. 9456 */ 9457 if ((*cur == '<') && (cur[1] == '?')) { 9458 xmlParsePI(ctxt); 9459 } 9460 9461 /* 9462 * Second case : a CDSection 9463 */ 9464 /* 2.6.0 test was *cur not RAW */ 9465 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9466 xmlParseCDSect(ctxt); 9467 } 9468 9469 /* 9470 * Third case : a comment 9471 */ 9472 else if ((*cur == '<') && (NXT(1) == '!') && 9473 (NXT(2) == '-') && (NXT(3) == '-')) { 9474 xmlParseComment(ctxt); 9475 ctxt->instate = XML_PARSER_CONTENT; 9476 } 9477 9478 /* 9479 * Fourth case : a sub-element. 9480 */ 9481 else if (*cur == '<') { 9482 xmlParseElement(ctxt); 9483 } 9484 9485 /* 9486 * Fifth case : a reference. If if has not been resolved, 9487 * parsing returns it's Name, create the node 9488 */ 9489 9490 else if (*cur == '&') { 9491 xmlParseReference(ctxt); 9492 } 9493 9494 /* 9495 * Last case, text. Note that References are handled directly. 9496 */ 9497 else { 9498 xmlParseCharData(ctxt, 0); 9499 } 9500 9501 GROW; 9502 /* 9503 * Pop-up of finished entities. 9504 */ 9505 while ((RAW == 0) && (ctxt->inputNr > 1)) 9506 xmlPopInput(ctxt); 9507 SHRINK; 9508 9509 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9510 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9511 "detected an error in element content\n"); 9512 ctxt->instate = XML_PARSER_EOF; 9513 break; 9514 } 9515 } 9516 } 9517 9518 /** 9519 * xmlParseElement: 9520 * @ctxt: an XML parser context 9521 * 9522 * parse an XML element, this is highly recursive 9523 * 9524 * [39] element ::= EmptyElemTag | STag content ETag 9525 * 9526 * [ WFC: Element Type Match ] 9527 * The Name in an element's end-tag must match the element type in the 9528 * start-tag. 9529 * 9530 */ 9531 9532 void 9533 xmlParseElement(xmlParserCtxtPtr ctxt) { 9534 const xmlChar *name; 9535 const xmlChar *prefix = NULL; 9536 const xmlChar *URI = NULL; 9537 xmlParserNodeInfo node_info; 9538 int line, tlen; 9539 xmlNodePtr ret; 9540 int nsNr = ctxt->nsNr; 9541 9542 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9543 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9544 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9545 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9546 xmlParserMaxDepth); 9547 ctxt->instate = XML_PARSER_EOF; 9548 return; 9549 } 9550 9551 /* Capture start position */ 9552 if (ctxt->record_info) { 9553 node_info.begin_pos = ctxt->input->consumed + 9554 (CUR_PTR - ctxt->input->base); 9555 node_info.begin_line = ctxt->input->line; 9556 } 9557 9558 if (ctxt->spaceNr == 0) 9559 spacePush(ctxt, -1); 9560 else if (*ctxt->space == -2) 9561 spacePush(ctxt, -1); 9562 else 9563 spacePush(ctxt, *ctxt->space); 9564 9565 line = ctxt->input->line; 9566 #ifdef LIBXML_SAX1_ENABLED 9567 if (ctxt->sax2) 9568 #endif /* LIBXML_SAX1_ENABLED */ 9569 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9570 #ifdef LIBXML_SAX1_ENABLED 9571 else 9572 name = xmlParseStartTag(ctxt); 9573 #endif /* LIBXML_SAX1_ENABLED */ 9574 if (ctxt->instate == XML_PARSER_EOF) 9575 return; 9576 if (name == NULL) { 9577 spacePop(ctxt); 9578 return; 9579 } 9580 namePush(ctxt, name); 9581 ret = ctxt->node; 9582 9583 #ifdef LIBXML_VALID_ENABLED 9584 /* 9585 * [ VC: Root Element Type ] 9586 * The Name in the document type declaration must match the element 9587 * type of the root element. 9588 */ 9589 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9590 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9591 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9592 #endif /* LIBXML_VALID_ENABLED */ 9593 9594 /* 9595 * Check for an Empty Element. 9596 */ 9597 if ((RAW == '/') && (NXT(1) == '>')) { 9598 SKIP(2); 9599 if (ctxt->sax2) { 9600 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9601 (!ctxt->disableSAX)) 9602 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9603 #ifdef LIBXML_SAX1_ENABLED 9604 } else { 9605 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9606 (!ctxt->disableSAX)) 9607 ctxt->sax->endElement(ctxt->userData, name); 9608 #endif /* LIBXML_SAX1_ENABLED */ 9609 } 9610 namePop(ctxt); 9611 spacePop(ctxt); 9612 if (nsNr != ctxt->nsNr) 9613 nsPop(ctxt, ctxt->nsNr - nsNr); 9614 if ( ret != NULL && ctxt->record_info ) { 9615 node_info.end_pos = ctxt->input->consumed + 9616 (CUR_PTR - ctxt->input->base); 9617 node_info.end_line = ctxt->input->line; 9618 node_info.node = ret; 9619 xmlParserAddNodeInfo(ctxt, &node_info); 9620 } 9621 return; 9622 } 9623 if (RAW == '>') { 9624 NEXT1; 9625 } else { 9626 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9627 "Couldn't find end of Start Tag %s line %d\n", 9628 name, line, NULL); 9629 9630 /* 9631 * end of parsing of this node. 9632 */ 9633 nodePop(ctxt); 9634 namePop(ctxt); 9635 spacePop(ctxt); 9636 if (nsNr != ctxt->nsNr) 9637 nsPop(ctxt, ctxt->nsNr - nsNr); 9638 9639 /* 9640 * Capture end position and add node 9641 */ 9642 if ( ret != NULL && ctxt->record_info ) { 9643 node_info.end_pos = ctxt->input->consumed + 9644 (CUR_PTR - ctxt->input->base); 9645 node_info.end_line = ctxt->input->line; 9646 node_info.node = ret; 9647 xmlParserAddNodeInfo(ctxt, &node_info); 9648 } 9649 return; 9650 } 9651 9652 /* 9653 * Parse the content of the element: 9654 */ 9655 xmlParseContent(ctxt); 9656 if (ctxt->instate == XML_PARSER_EOF) 9657 return; 9658 if (!IS_BYTE_CHAR(RAW)) { 9659 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9660 "Premature end of data in tag %s line %d\n", 9661 name, line, NULL); 9662 9663 /* 9664 * end of parsing of this node. 9665 */ 9666 nodePop(ctxt); 9667 namePop(ctxt); 9668 spacePop(ctxt); 9669 if (nsNr != ctxt->nsNr) 9670 nsPop(ctxt, ctxt->nsNr - nsNr); 9671 return; 9672 } 9673 9674 /* 9675 * parse the end of tag: '</' should be here. 9676 */ 9677 if (ctxt->sax2) { 9678 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9679 namePop(ctxt); 9680 } 9681 #ifdef LIBXML_SAX1_ENABLED 9682 else 9683 xmlParseEndTag1(ctxt, line); 9684 #endif /* LIBXML_SAX1_ENABLED */ 9685 9686 /* 9687 * Capture end position and add node 9688 */ 9689 if ( ret != NULL && ctxt->record_info ) { 9690 node_info.end_pos = ctxt->input->consumed + 9691 (CUR_PTR - ctxt->input->base); 9692 node_info.end_line = ctxt->input->line; 9693 node_info.node = ret; 9694 xmlParserAddNodeInfo(ctxt, &node_info); 9695 } 9696 } 9697 9698 /** 9699 * xmlParseVersionNum: 9700 * @ctxt: an XML parser context 9701 * 9702 * parse the XML version value. 9703 * 9704 * [26] VersionNum ::= '1.' [0-9]+ 9705 * 9706 * In practice allow [0-9].[0-9]+ at that level 9707 * 9708 * Returns the string giving the XML version number, or NULL 9709 */ 9710 xmlChar * 9711 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9712 xmlChar *buf = NULL; 9713 int len = 0; 9714 int size = 10; 9715 xmlChar cur; 9716 9717 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9718 if (buf == NULL) { 9719 xmlErrMemory(ctxt, NULL); 9720 return(NULL); 9721 } 9722 cur = CUR; 9723 if (!((cur >= '0') && (cur <= '9'))) { 9724 xmlFree(buf); 9725 return(NULL); 9726 } 9727 buf[len++] = cur; 9728 NEXT; 9729 cur=CUR; 9730 if (cur != '.') { 9731 xmlFree(buf); 9732 return(NULL); 9733 } 9734 buf[len++] = cur; 9735 NEXT; 9736 cur=CUR; 9737 while ((cur >= '0') && (cur <= '9')) { 9738 if (len + 1 >= size) { 9739 xmlChar *tmp; 9740 9741 size *= 2; 9742 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9743 if (tmp == NULL) { 9744 xmlFree(buf); 9745 xmlErrMemory(ctxt, NULL); 9746 return(NULL); 9747 } 9748 buf = tmp; 9749 } 9750 buf[len++] = cur; 9751 NEXT; 9752 cur=CUR; 9753 } 9754 buf[len] = 0; 9755 return(buf); 9756 } 9757 9758 /** 9759 * xmlParseVersionInfo: 9760 * @ctxt: an XML parser context 9761 * 9762 * parse the XML version. 9763 * 9764 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9765 * 9766 * [25] Eq ::= S? '=' S? 9767 * 9768 * Returns the version string, e.g. "1.0" 9769 */ 9770 9771 xmlChar * 9772 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9773 xmlChar *version = NULL; 9774 9775 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9776 SKIP(7); 9777 SKIP_BLANKS; 9778 if (RAW != '=') { 9779 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9780 return(NULL); 9781 } 9782 NEXT; 9783 SKIP_BLANKS; 9784 if (RAW == '"') { 9785 NEXT; 9786 version = xmlParseVersionNum(ctxt); 9787 if (RAW != '"') { 9788 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9789 } else 9790 NEXT; 9791 } else if (RAW == '\''){ 9792 NEXT; 9793 version = xmlParseVersionNum(ctxt); 9794 if (RAW != '\'') { 9795 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9796 } else 9797 NEXT; 9798 } else { 9799 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9800 } 9801 } 9802 return(version); 9803 } 9804 9805 /** 9806 * xmlParseEncName: 9807 * @ctxt: an XML parser context 9808 * 9809 * parse the XML encoding name 9810 * 9811 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9812 * 9813 * Returns the encoding name value or NULL 9814 */ 9815 xmlChar * 9816 xmlParseEncName(xmlParserCtxtPtr ctxt) { 9817 xmlChar *buf = NULL; 9818 int len = 0; 9819 int size = 10; 9820 xmlChar cur; 9821 9822 cur = CUR; 9823 if (((cur >= 'a') && (cur <= 'z')) || 9824 ((cur >= 'A') && (cur <= 'Z'))) { 9825 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9826 if (buf == NULL) { 9827 xmlErrMemory(ctxt, NULL); 9828 return(NULL); 9829 } 9830 9831 buf[len++] = cur; 9832 NEXT; 9833 cur = CUR; 9834 while (((cur >= 'a') && (cur <= 'z')) || 9835 ((cur >= 'A') && (cur <= 'Z')) || 9836 ((cur >= '0') && (cur <= '9')) || 9837 (cur == '.') || (cur == '_') || 9838 (cur == '-')) { 9839 if (len + 1 >= size) { 9840 xmlChar *tmp; 9841 9842 size *= 2; 9843 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9844 if (tmp == NULL) { 9845 xmlErrMemory(ctxt, NULL); 9846 xmlFree(buf); 9847 return(NULL); 9848 } 9849 buf = tmp; 9850 } 9851 buf[len++] = cur; 9852 NEXT; 9853 cur = CUR; 9854 if (cur == 0) { 9855 SHRINK; 9856 GROW; 9857 cur = CUR; 9858 } 9859 } 9860 buf[len] = 0; 9861 } else { 9862 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9863 } 9864 return(buf); 9865 } 9866 9867 /** 9868 * xmlParseEncodingDecl: 9869 * @ctxt: an XML parser context 9870 * 9871 * parse the XML encoding declaration 9872 * 9873 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9874 * 9875 * this setups the conversion filters. 9876 * 9877 * Returns the encoding value or NULL 9878 */ 9879 9880 const xmlChar * 9881 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9882 xmlChar *encoding = NULL; 9883 9884 SKIP_BLANKS; 9885 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9886 SKIP(8); 9887 SKIP_BLANKS; 9888 if (RAW != '=') { 9889 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9890 return(NULL); 9891 } 9892 NEXT; 9893 SKIP_BLANKS; 9894 if (RAW == '"') { 9895 NEXT; 9896 encoding = xmlParseEncName(ctxt); 9897 if (RAW != '"') { 9898 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9899 } else 9900 NEXT; 9901 } else if (RAW == '\''){ 9902 NEXT; 9903 encoding = xmlParseEncName(ctxt); 9904 if (RAW != '\'') { 9905 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9906 } else 9907 NEXT; 9908 } else { 9909 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9910 } 9911 /* 9912 * UTF-16 encoding stwich has already taken place at this stage, 9913 * more over the little-endian/big-endian selection is already done 9914 */ 9915 if ((encoding != NULL) && 9916 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9917 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9918 /* 9919 * If no encoding was passed to the parser, that we are 9920 * using UTF-16 and no decoder is present i.e. the 9921 * document is apparently UTF-8 compatible, then raise an 9922 * encoding mismatch fatal error 9923 */ 9924 if ((ctxt->encoding == NULL) && 9925 (ctxt->input->buf != NULL) && 9926 (ctxt->input->buf->encoder == NULL)) { 9927 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9928 "Document labelled UTF-16 but has UTF-8 content\n"); 9929 } 9930 if (ctxt->encoding != NULL) 9931 xmlFree((xmlChar *) ctxt->encoding); 9932 ctxt->encoding = encoding; 9933 } 9934 /* 9935 * UTF-8 encoding is handled natively 9936 */ 9937 else if ((encoding != NULL) && 9938 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9939 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9940 if (ctxt->encoding != NULL) 9941 xmlFree((xmlChar *) ctxt->encoding); 9942 ctxt->encoding = encoding; 9943 } 9944 else if (encoding != NULL) { 9945 xmlCharEncodingHandlerPtr handler; 9946 9947 if (ctxt->input->encoding != NULL) 9948 xmlFree((xmlChar *) ctxt->input->encoding); 9949 ctxt->input->encoding = encoding; 9950 9951 handler = xmlFindCharEncodingHandler((const char *) encoding); 9952 if (handler != NULL) { 9953 xmlSwitchToEncoding(ctxt, handler); 9954 } else { 9955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9956 "Unsupported encoding %s\n", encoding); 9957 return(NULL); 9958 } 9959 } 9960 } 9961 return(encoding); 9962 } 9963 9964 /** 9965 * xmlParseSDDecl: 9966 * @ctxt: an XML parser context 9967 * 9968 * parse the XML standalone declaration 9969 * 9970 * [32] SDDecl ::= S 'standalone' Eq 9971 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 9972 * 9973 * [ VC: Standalone Document Declaration ] 9974 * TODO The standalone document declaration must have the value "no" 9975 * if any external markup declarations contain declarations of: 9976 * - attributes with default values, if elements to which these 9977 * attributes apply appear in the document without specifications 9978 * of values for these attributes, or 9979 * - entities (other than amp, lt, gt, apos, quot), if references 9980 * to those entities appear in the document, or 9981 * - attributes with values subject to normalization, where the 9982 * attribute appears in the document with a value which will change 9983 * as a result of normalization, or 9984 * - element types with element content, if white space occurs directly 9985 * within any instance of those types. 9986 * 9987 * Returns: 9988 * 1 if standalone="yes" 9989 * 0 if standalone="no" 9990 * -2 if standalone attribute is missing or invalid 9991 * (A standalone value of -2 means that the XML declaration was found, 9992 * but no value was specified for the standalone attribute). 9993 */ 9994 9995 int 9996 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 9997 int standalone = -2; 9998 9999 SKIP_BLANKS; 10000 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10001 SKIP(10); 10002 SKIP_BLANKS; 10003 if (RAW != '=') { 10004 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10005 return(standalone); 10006 } 10007 NEXT; 10008 SKIP_BLANKS; 10009 if (RAW == '\''){ 10010 NEXT; 10011 if ((RAW == 'n') && (NXT(1) == 'o')) { 10012 standalone = 0; 10013 SKIP(2); 10014 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10015 (NXT(2) == 's')) { 10016 standalone = 1; 10017 SKIP(3); 10018 } else { 10019 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10020 } 10021 if (RAW != '\'') { 10022 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10023 } else 10024 NEXT; 10025 } else if (RAW == '"'){ 10026 NEXT; 10027 if ((RAW == 'n') && (NXT(1) == 'o')) { 10028 standalone = 0; 10029 SKIP(2); 10030 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10031 (NXT(2) == 's')) { 10032 standalone = 1; 10033 SKIP(3); 10034 } else { 10035 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10036 } 10037 if (RAW != '"') { 10038 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10039 } else 10040 NEXT; 10041 } else { 10042 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10043 } 10044 } 10045 return(standalone); 10046 } 10047 10048 /** 10049 * xmlParseXMLDecl: 10050 * @ctxt: an XML parser context 10051 * 10052 * parse an XML declaration header 10053 * 10054 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10055 */ 10056 10057 void 10058 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10059 xmlChar *version; 10060 10061 /* 10062 * This value for standalone indicates that the document has an 10063 * XML declaration but it does not have a standalone attribute. 10064 * It will be overwritten later if a standalone attribute is found. 10065 */ 10066 ctxt->input->standalone = -2; 10067 10068 /* 10069 * We know that '<?xml' is here. 10070 */ 10071 SKIP(5); 10072 10073 if (!IS_BLANK_CH(RAW)) { 10074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10075 "Blank needed after '<?xml'\n"); 10076 } 10077 SKIP_BLANKS; 10078 10079 /* 10080 * We must have the VersionInfo here. 10081 */ 10082 version = xmlParseVersionInfo(ctxt); 10083 if (version == NULL) { 10084 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10085 } else { 10086 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10087 /* 10088 * Changed here for XML-1.0 5th edition 10089 */ 10090 if (ctxt->options & XML_PARSE_OLD10) { 10091 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10092 "Unsupported version '%s'\n", 10093 version); 10094 } else { 10095 if ((version[0] == '1') && ((version[1] == '.'))) { 10096 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10097 "Unsupported version '%s'\n", 10098 version, NULL); 10099 } else { 10100 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10101 "Unsupported version '%s'\n", 10102 version); 10103 } 10104 } 10105 } 10106 if (ctxt->version != NULL) 10107 xmlFree((void *) ctxt->version); 10108 ctxt->version = version; 10109 } 10110 10111 /* 10112 * We may have the encoding declaration 10113 */ 10114 if (!IS_BLANK_CH(RAW)) { 10115 if ((RAW == '?') && (NXT(1) == '>')) { 10116 SKIP(2); 10117 return; 10118 } 10119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10120 } 10121 xmlParseEncodingDecl(ctxt); 10122 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10123 /* 10124 * The XML REC instructs us to stop parsing right here 10125 */ 10126 return; 10127 } 10128 10129 /* 10130 * We may have the standalone status. 10131 */ 10132 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10133 if ((RAW == '?') && (NXT(1) == '>')) { 10134 SKIP(2); 10135 return; 10136 } 10137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10138 } 10139 10140 /* 10141 * We can grow the input buffer freely at that point 10142 */ 10143 GROW; 10144 10145 SKIP_BLANKS; 10146 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10147 10148 SKIP_BLANKS; 10149 if ((RAW == '?') && (NXT(1) == '>')) { 10150 SKIP(2); 10151 } else if (RAW == '>') { 10152 /* Deprecated old WD ... */ 10153 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10154 NEXT; 10155 } else { 10156 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10157 MOVETO_ENDTAG(CUR_PTR); 10158 NEXT; 10159 } 10160 } 10161 10162 /** 10163 * xmlParseMisc: 10164 * @ctxt: an XML parser context 10165 * 10166 * parse an XML Misc* optional field. 10167 * 10168 * [27] Misc ::= Comment | PI | S 10169 */ 10170 10171 void 10172 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10173 while ((ctxt->instate != XML_PARSER_EOF) && 10174 (((RAW == '<') && (NXT(1) == '?')) || 10175 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10176 IS_BLANK_CH(CUR))) { 10177 if ((RAW == '<') && (NXT(1) == '?')) { 10178 xmlParsePI(ctxt); 10179 } else if (IS_BLANK_CH(CUR)) { 10180 NEXT; 10181 } else 10182 xmlParseComment(ctxt); 10183 } 10184 } 10185 10186 /** 10187 * xmlParseDocument: 10188 * @ctxt: an XML parser context 10189 * 10190 * parse an XML document (and build a tree if using the standard SAX 10191 * interface). 10192 * 10193 * [1] document ::= prolog element Misc* 10194 * 10195 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10196 * 10197 * Returns 0, -1 in case of error. the parser context is augmented 10198 * as a result of the parsing. 10199 */ 10200 10201 int 10202 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10203 xmlChar start[4]; 10204 xmlCharEncoding enc; 10205 10206 xmlInitParser(); 10207 10208 if ((ctxt == NULL) || (ctxt->input == NULL)) 10209 return(-1); 10210 10211 GROW; 10212 10213 /* 10214 * SAX: detecting the level. 10215 */ 10216 xmlDetectSAX2(ctxt); 10217 10218 /* 10219 * SAX: beginning of the document processing. 10220 */ 10221 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10222 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10223 if (ctxt->instate == XML_PARSER_EOF) 10224 return(-1); 10225 10226 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 10227 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10228 /* 10229 * Get the 4 first bytes and decode the charset 10230 * if enc != XML_CHAR_ENCODING_NONE 10231 * plug some encoding conversion routines. 10232 */ 10233 start[0] = RAW; 10234 start[1] = NXT(1); 10235 start[2] = NXT(2); 10236 start[3] = NXT(3); 10237 enc = xmlDetectCharEncoding(&start[0], 4); 10238 if (enc != XML_CHAR_ENCODING_NONE) { 10239 xmlSwitchEncoding(ctxt, enc); 10240 } 10241 } 10242 10243 10244 if (CUR == 0) { 10245 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10246 } 10247 10248 /* 10249 * Check for the XMLDecl in the Prolog. 10250 * do not GROW here to avoid the detected encoder to decode more 10251 * than just the first line, unless the amount of data is really 10252 * too small to hold "<?xml version="1.0" encoding="foo" 10253 */ 10254 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10255 GROW; 10256 } 10257 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10258 10259 /* 10260 * Note that we will switch encoding on the fly. 10261 */ 10262 xmlParseXMLDecl(ctxt); 10263 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10264 /* 10265 * The XML REC instructs us to stop parsing right here 10266 */ 10267 return(-1); 10268 } 10269 ctxt->standalone = ctxt->input->standalone; 10270 SKIP_BLANKS; 10271 } else { 10272 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10273 } 10274 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10275 ctxt->sax->startDocument(ctxt->userData); 10276 if (ctxt->instate == XML_PARSER_EOF) 10277 return(-1); 10278 10279 /* 10280 * The Misc part of the Prolog 10281 */ 10282 GROW; 10283 xmlParseMisc(ctxt); 10284 10285 /* 10286 * Then possibly doc type declaration(s) and more Misc 10287 * (doctypedecl Misc*)? 10288 */ 10289 GROW; 10290 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10291 10292 ctxt->inSubset = 1; 10293 xmlParseDocTypeDecl(ctxt); 10294 if (RAW == '[') { 10295 ctxt->instate = XML_PARSER_DTD; 10296 xmlParseInternalSubset(ctxt); 10297 if (ctxt->instate == XML_PARSER_EOF) 10298 return(-1); 10299 } 10300 10301 /* 10302 * Create and update the external subset. 10303 */ 10304 ctxt->inSubset = 2; 10305 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10306 (!ctxt->disableSAX)) 10307 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10308 ctxt->extSubSystem, ctxt->extSubURI); 10309 if (ctxt->instate == XML_PARSER_EOF) 10310 return(-1); 10311 ctxt->inSubset = 0; 10312 10313 xmlCleanSpecialAttr(ctxt); 10314 10315 ctxt->instate = XML_PARSER_PROLOG; 10316 xmlParseMisc(ctxt); 10317 } 10318 10319 /* 10320 * Time to start parsing the tree itself 10321 */ 10322 GROW; 10323 if (RAW != '<') { 10324 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10325 "Start tag expected, '<' not found\n"); 10326 } else { 10327 ctxt->instate = XML_PARSER_CONTENT; 10328 xmlParseElement(ctxt); 10329 ctxt->instate = XML_PARSER_EPILOG; 10330 10331 10332 /* 10333 * The Misc part at the end 10334 */ 10335 xmlParseMisc(ctxt); 10336 10337 if (RAW != 0) { 10338 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10339 } 10340 ctxt->instate = XML_PARSER_EOF; 10341 } 10342 10343 /* 10344 * SAX: end of the document processing. 10345 */ 10346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10347 ctxt->sax->endDocument(ctxt->userData); 10348 10349 /* 10350 * Remove locally kept entity definitions if the tree was not built 10351 */ 10352 if ((ctxt->myDoc != NULL) && 10353 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10354 xmlFreeDoc(ctxt->myDoc); 10355 ctxt->myDoc = NULL; 10356 } 10357 10358 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10359 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10360 if (ctxt->valid) 10361 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10362 if (ctxt->nsWellFormed) 10363 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10364 if (ctxt->options & XML_PARSE_OLD10) 10365 ctxt->myDoc->properties |= XML_DOC_OLD10; 10366 } 10367 if (! ctxt->wellFormed) { 10368 ctxt->valid = 0; 10369 return(-1); 10370 } 10371 return(0); 10372 } 10373 10374 /** 10375 * xmlParseExtParsedEnt: 10376 * @ctxt: an XML parser context 10377 * 10378 * parse a general parsed entity 10379 * An external general parsed entity is well-formed if it matches the 10380 * production labeled extParsedEnt. 10381 * 10382 * [78] extParsedEnt ::= TextDecl? content 10383 * 10384 * Returns 0, -1 in case of error. the parser context is augmented 10385 * as a result of the parsing. 10386 */ 10387 10388 int 10389 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10390 xmlChar start[4]; 10391 xmlCharEncoding enc; 10392 10393 if ((ctxt == NULL) || (ctxt->input == NULL)) 10394 return(-1); 10395 10396 xmlDefaultSAXHandlerInit(); 10397 10398 xmlDetectSAX2(ctxt); 10399 10400 GROW; 10401 10402 /* 10403 * SAX: beginning of the document processing. 10404 */ 10405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10406 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10407 10408 /* 10409 * Get the 4 first bytes and decode the charset 10410 * if enc != XML_CHAR_ENCODING_NONE 10411 * plug some encoding conversion routines. 10412 */ 10413 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10414 start[0] = RAW; 10415 start[1] = NXT(1); 10416 start[2] = NXT(2); 10417 start[3] = NXT(3); 10418 enc = xmlDetectCharEncoding(start, 4); 10419 if (enc != XML_CHAR_ENCODING_NONE) { 10420 xmlSwitchEncoding(ctxt, enc); 10421 } 10422 } 10423 10424 10425 if (CUR == 0) { 10426 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10427 } 10428 10429 /* 10430 * Check for the XMLDecl in the Prolog. 10431 */ 10432 GROW; 10433 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10434 10435 /* 10436 * Note that we will switch encoding on the fly. 10437 */ 10438 xmlParseXMLDecl(ctxt); 10439 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10440 /* 10441 * The XML REC instructs us to stop parsing right here 10442 */ 10443 return(-1); 10444 } 10445 SKIP_BLANKS; 10446 } else { 10447 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10448 } 10449 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10450 ctxt->sax->startDocument(ctxt->userData); 10451 if (ctxt->instate == XML_PARSER_EOF) 10452 return(-1); 10453 10454 /* 10455 * Doing validity checking on chunk doesn't make sense 10456 */ 10457 ctxt->instate = XML_PARSER_CONTENT; 10458 ctxt->validate = 0; 10459 ctxt->loadsubset = 0; 10460 ctxt->depth = 0; 10461 10462 xmlParseContent(ctxt); 10463 if (ctxt->instate == XML_PARSER_EOF) 10464 return(-1); 10465 10466 if ((RAW == '<') && (NXT(1) == '/')) { 10467 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10468 } else if (RAW != 0) { 10469 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10470 } 10471 10472 /* 10473 * SAX: end of the document processing. 10474 */ 10475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10476 ctxt->sax->endDocument(ctxt->userData); 10477 10478 if (! ctxt->wellFormed) return(-1); 10479 return(0); 10480 } 10481 10482 #ifdef LIBXML_PUSH_ENABLED 10483 /************************************************************************ 10484 * * 10485 * Progressive parsing interfaces * 10486 * * 10487 ************************************************************************/ 10488 10489 /** 10490 * xmlParseLookupSequence: 10491 * @ctxt: an XML parser context 10492 * @first: the first char to lookup 10493 * @next: the next char to lookup or zero 10494 * @third: the next char to lookup or zero 10495 * 10496 * Try to find if a sequence (first, next, third) or just (first next) or 10497 * (first) is available in the input stream. 10498 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10499 * to avoid rescanning sequences of bytes, it DOES change the state of the 10500 * parser, do not use liberally. 10501 * 10502 * Returns the index to the current parsing point if the full sequence 10503 * is available, -1 otherwise. 10504 */ 10505 static int 10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10507 xmlChar next, xmlChar third) { 10508 int base, len; 10509 xmlParserInputPtr in; 10510 const xmlChar *buf; 10511 10512 in = ctxt->input; 10513 if (in == NULL) return(-1); 10514 base = in->cur - in->base; 10515 if (base < 0) return(-1); 10516 if (ctxt->checkIndex > base) 10517 base = ctxt->checkIndex; 10518 if (in->buf == NULL) { 10519 buf = in->base; 10520 len = in->length; 10521 } else { 10522 buf = in->buf->buffer->content; 10523 len = in->buf->buffer->use; 10524 } 10525 /* take into account the sequence length */ 10526 if (third) len -= 2; 10527 else if (next) len --; 10528 for (;base < len;base++) { 10529 if (buf[base] == first) { 10530 if (third != 0) { 10531 if ((buf[base + 1] != next) || 10532 (buf[base + 2] != third)) continue; 10533 } else if (next != 0) { 10534 if (buf[base + 1] != next) continue; 10535 } 10536 ctxt->checkIndex = 0; 10537 #ifdef DEBUG_PUSH 10538 if (next == 0) 10539 xmlGenericError(xmlGenericErrorContext, 10540 "PP: lookup '%c' found at %d\n", 10541 first, base); 10542 else if (third == 0) 10543 xmlGenericError(xmlGenericErrorContext, 10544 "PP: lookup '%c%c' found at %d\n", 10545 first, next, base); 10546 else 10547 xmlGenericError(xmlGenericErrorContext, 10548 "PP: lookup '%c%c%c' found at %d\n", 10549 first, next, third, base); 10550 #endif 10551 return(base - (in->cur - in->base)); 10552 } 10553 } 10554 ctxt->checkIndex = base; 10555 #ifdef DEBUG_PUSH 10556 if (next == 0) 10557 xmlGenericError(xmlGenericErrorContext, 10558 "PP: lookup '%c' failed\n", first); 10559 else if (third == 0) 10560 xmlGenericError(xmlGenericErrorContext, 10561 "PP: lookup '%c%c' failed\n", first, next); 10562 else 10563 xmlGenericError(xmlGenericErrorContext, 10564 "PP: lookup '%c%c%c' failed\n", first, next, third); 10565 #endif 10566 return(-1); 10567 } 10568 10569 /** 10570 * xmlParseGetLasts: 10571 * @ctxt: an XML parser context 10572 * @lastlt: pointer to store the last '<' from the input 10573 * @lastgt: pointer to store the last '>' from the input 10574 * 10575 * Lookup the last < and > in the current chunk 10576 */ 10577 static void 10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10579 const xmlChar **lastgt) { 10580 const xmlChar *tmp; 10581 10582 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10583 xmlGenericError(xmlGenericErrorContext, 10584 "Internal error: xmlParseGetLasts\n"); 10585 return; 10586 } 10587 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10588 tmp = ctxt->input->end; 10589 tmp--; 10590 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10591 if (tmp < ctxt->input->base) { 10592 *lastlt = NULL; 10593 *lastgt = NULL; 10594 } else { 10595 *lastlt = tmp; 10596 tmp++; 10597 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10598 if (*tmp == '\'') { 10599 tmp++; 10600 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10601 if (tmp < ctxt->input->end) tmp++; 10602 } else if (*tmp == '"') { 10603 tmp++; 10604 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10605 if (tmp < ctxt->input->end) tmp++; 10606 } else 10607 tmp++; 10608 } 10609 if (tmp < ctxt->input->end) 10610 *lastgt = tmp; 10611 else { 10612 tmp = *lastlt; 10613 tmp--; 10614 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10615 if (tmp >= ctxt->input->base) 10616 *lastgt = tmp; 10617 else 10618 *lastgt = NULL; 10619 } 10620 } 10621 } else { 10622 *lastlt = NULL; 10623 *lastgt = NULL; 10624 } 10625 } 10626 /** 10627 * xmlCheckCdataPush: 10628 * @cur: pointer to the bock of characters 10629 * @len: length of the block in bytes 10630 * 10631 * Check that the block of characters is okay as SCdata content [20] 10632 * 10633 * Returns the number of bytes to pass if okay, a negative index where an 10634 * UTF-8 error occured otherwise 10635 */ 10636 static int 10637 xmlCheckCdataPush(const xmlChar *utf, int len) { 10638 int ix; 10639 unsigned char c; 10640 int codepoint; 10641 10642 if ((utf == NULL) || (len <= 0)) 10643 return(0); 10644 10645 for (ix = 0; ix < len;) { /* string is 0-terminated */ 10646 c = utf[ix]; 10647 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10648 if (c >= 0x20) 10649 ix++; 10650 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10651 ix++; 10652 else 10653 return(-ix); 10654 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10655 if (ix + 2 > len) return(ix); 10656 if ((utf[ix+1] & 0xc0 ) != 0x80) 10657 return(-ix); 10658 codepoint = (utf[ix] & 0x1f) << 6; 10659 codepoint |= utf[ix+1] & 0x3f; 10660 if (!xmlIsCharQ(codepoint)) 10661 return(-ix); 10662 ix += 2; 10663 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10664 if (ix + 3 > len) return(ix); 10665 if (((utf[ix+1] & 0xc0) != 0x80) || 10666 ((utf[ix+2] & 0xc0) != 0x80)) 10667 return(-ix); 10668 codepoint = (utf[ix] & 0xf) << 12; 10669 codepoint |= (utf[ix+1] & 0x3f) << 6; 10670 codepoint |= utf[ix+2] & 0x3f; 10671 if (!xmlIsCharQ(codepoint)) 10672 return(-ix); 10673 ix += 3; 10674 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10675 if (ix + 4 > len) return(ix); 10676 if (((utf[ix+1] & 0xc0) != 0x80) || 10677 ((utf[ix+2] & 0xc0) != 0x80) || 10678 ((utf[ix+3] & 0xc0) != 0x80)) 10679 return(-ix); 10680 codepoint = (utf[ix] & 0x7) << 18; 10681 codepoint |= (utf[ix+1] & 0x3f) << 12; 10682 codepoint |= (utf[ix+2] & 0x3f) << 6; 10683 codepoint |= utf[ix+3] & 0x3f; 10684 if (!xmlIsCharQ(codepoint)) 10685 return(-ix); 10686 ix += 4; 10687 } else /* unknown encoding */ 10688 return(-ix); 10689 } 10690 return(ix); 10691 } 10692 10693 /** 10694 * xmlParseTryOrFinish: 10695 * @ctxt: an XML parser context 10696 * @terminate: last chunk indicator 10697 * 10698 * Try to progress on parsing 10699 * 10700 * Returns zero if no parsing was possible 10701 */ 10702 static int 10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10704 int ret = 0; 10705 int avail, tlen; 10706 xmlChar cur, next; 10707 const xmlChar *lastlt, *lastgt; 10708 10709 if (ctxt->input == NULL) 10710 return(0); 10711 10712 #ifdef DEBUG_PUSH 10713 switch (ctxt->instate) { 10714 case XML_PARSER_EOF: 10715 xmlGenericError(xmlGenericErrorContext, 10716 "PP: try EOF\n"); break; 10717 case XML_PARSER_START: 10718 xmlGenericError(xmlGenericErrorContext, 10719 "PP: try START\n"); break; 10720 case XML_PARSER_MISC: 10721 xmlGenericError(xmlGenericErrorContext, 10722 "PP: try MISC\n");break; 10723 case XML_PARSER_COMMENT: 10724 xmlGenericError(xmlGenericErrorContext, 10725 "PP: try COMMENT\n");break; 10726 case XML_PARSER_PROLOG: 10727 xmlGenericError(xmlGenericErrorContext, 10728 "PP: try PROLOG\n");break; 10729 case XML_PARSER_START_TAG: 10730 xmlGenericError(xmlGenericErrorContext, 10731 "PP: try START_TAG\n");break; 10732 case XML_PARSER_CONTENT: 10733 xmlGenericError(xmlGenericErrorContext, 10734 "PP: try CONTENT\n");break; 10735 case XML_PARSER_CDATA_SECTION: 10736 xmlGenericError(xmlGenericErrorContext, 10737 "PP: try CDATA_SECTION\n");break; 10738 case XML_PARSER_END_TAG: 10739 xmlGenericError(xmlGenericErrorContext, 10740 "PP: try END_TAG\n");break; 10741 case XML_PARSER_ENTITY_DECL: 10742 xmlGenericError(xmlGenericErrorContext, 10743 "PP: try ENTITY_DECL\n");break; 10744 case XML_PARSER_ENTITY_VALUE: 10745 xmlGenericError(xmlGenericErrorContext, 10746 "PP: try ENTITY_VALUE\n");break; 10747 case XML_PARSER_ATTRIBUTE_VALUE: 10748 xmlGenericError(xmlGenericErrorContext, 10749 "PP: try ATTRIBUTE_VALUE\n");break; 10750 case XML_PARSER_DTD: 10751 xmlGenericError(xmlGenericErrorContext, 10752 "PP: try DTD\n");break; 10753 case XML_PARSER_EPILOG: 10754 xmlGenericError(xmlGenericErrorContext, 10755 "PP: try EPILOG\n");break; 10756 case XML_PARSER_PI: 10757 xmlGenericError(xmlGenericErrorContext, 10758 "PP: try PI\n");break; 10759 case XML_PARSER_IGNORE: 10760 xmlGenericError(xmlGenericErrorContext, 10761 "PP: try IGNORE\n");break; 10762 } 10763 #endif 10764 10765 if ((ctxt->input != NULL) && 10766 (ctxt->input->cur - ctxt->input->base > 4096)) { 10767 xmlSHRINK(ctxt); 10768 ctxt->checkIndex = 0; 10769 } 10770 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10771 10772 while (ctxt->instate != XML_PARSER_EOF) { 10773 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10774 return(0); 10775 10776 10777 /* 10778 * Pop-up of finished entities. 10779 */ 10780 while ((RAW == 0) && (ctxt->inputNr > 1)) 10781 xmlPopInput(ctxt); 10782 10783 if (ctxt->input == NULL) break; 10784 if (ctxt->input->buf == NULL) 10785 avail = ctxt->input->length - 10786 (ctxt->input->cur - ctxt->input->base); 10787 else { 10788 /* 10789 * If we are operating on converted input, try to flush 10790 * remainng chars to avoid them stalling in the non-converted 10791 * buffer. 10792 */ 10793 if ((ctxt->input->buf->raw != NULL) && 10794 (ctxt->input->buf->raw->use > 0)) { 10795 int base = ctxt->input->base - 10796 ctxt->input->buf->buffer->content; 10797 int current = ctxt->input->cur - ctxt->input->base; 10798 10799 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10800 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10801 ctxt->input->cur = ctxt->input->base + current; 10802 ctxt->input->end = 10803 &ctxt->input->buf->buffer->content[ 10804 ctxt->input->buf->buffer->use]; 10805 } 10806 avail = ctxt->input->buf->buffer->use - 10807 (ctxt->input->cur - ctxt->input->base); 10808 } 10809 if (avail < 1) 10810 goto done; 10811 switch (ctxt->instate) { 10812 case XML_PARSER_EOF: 10813 /* 10814 * Document parsing is done ! 10815 */ 10816 goto done; 10817 case XML_PARSER_START: 10818 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10819 xmlChar start[4]; 10820 xmlCharEncoding enc; 10821 10822 /* 10823 * Very first chars read from the document flow. 10824 */ 10825 if (avail < 4) 10826 goto done; 10827 10828 /* 10829 * Get the 4 first bytes and decode the charset 10830 * if enc != XML_CHAR_ENCODING_NONE 10831 * plug some encoding conversion routines, 10832 * else xmlSwitchEncoding will set to (default) 10833 * UTF8. 10834 */ 10835 start[0] = RAW; 10836 start[1] = NXT(1); 10837 start[2] = NXT(2); 10838 start[3] = NXT(3); 10839 enc = xmlDetectCharEncoding(start, 4); 10840 xmlSwitchEncoding(ctxt, enc); 10841 break; 10842 } 10843 10844 if (avail < 2) 10845 goto done; 10846 cur = ctxt->input->cur[0]; 10847 next = ctxt->input->cur[1]; 10848 if (cur == 0) { 10849 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10850 ctxt->sax->setDocumentLocator(ctxt->userData, 10851 &xmlDefaultSAXLocator); 10852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10853 ctxt->instate = XML_PARSER_EOF; 10854 #ifdef DEBUG_PUSH 10855 xmlGenericError(xmlGenericErrorContext, 10856 "PP: entering EOF\n"); 10857 #endif 10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10859 ctxt->sax->endDocument(ctxt->userData); 10860 goto done; 10861 } 10862 if ((cur == '<') && (next == '?')) { 10863 /* PI or XML decl */ 10864 if (avail < 5) return(ret); 10865 if ((!terminate) && 10866 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10867 return(ret); 10868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10869 ctxt->sax->setDocumentLocator(ctxt->userData, 10870 &xmlDefaultSAXLocator); 10871 if ((ctxt->input->cur[2] == 'x') && 10872 (ctxt->input->cur[3] == 'm') && 10873 (ctxt->input->cur[4] == 'l') && 10874 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10875 ret += 5; 10876 #ifdef DEBUG_PUSH 10877 xmlGenericError(xmlGenericErrorContext, 10878 "PP: Parsing XML Decl\n"); 10879 #endif 10880 xmlParseXMLDecl(ctxt); 10881 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10882 /* 10883 * The XML REC instructs us to stop parsing right 10884 * here 10885 */ 10886 ctxt->instate = XML_PARSER_EOF; 10887 return(0); 10888 } 10889 ctxt->standalone = ctxt->input->standalone; 10890 if ((ctxt->encoding == NULL) && 10891 (ctxt->input->encoding != NULL)) 10892 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10893 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10894 (!ctxt->disableSAX)) 10895 ctxt->sax->startDocument(ctxt->userData); 10896 ctxt->instate = XML_PARSER_MISC; 10897 #ifdef DEBUG_PUSH 10898 xmlGenericError(xmlGenericErrorContext, 10899 "PP: entering MISC\n"); 10900 #endif 10901 } else { 10902 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10903 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10904 (!ctxt->disableSAX)) 10905 ctxt->sax->startDocument(ctxt->userData); 10906 ctxt->instate = XML_PARSER_MISC; 10907 #ifdef DEBUG_PUSH 10908 xmlGenericError(xmlGenericErrorContext, 10909 "PP: entering MISC\n"); 10910 #endif 10911 } 10912 } else { 10913 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10914 ctxt->sax->setDocumentLocator(ctxt->userData, 10915 &xmlDefaultSAXLocator); 10916 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10917 if (ctxt->version == NULL) { 10918 xmlErrMemory(ctxt, NULL); 10919 break; 10920 } 10921 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10922 (!ctxt->disableSAX)) 10923 ctxt->sax->startDocument(ctxt->userData); 10924 ctxt->instate = XML_PARSER_MISC; 10925 #ifdef DEBUG_PUSH 10926 xmlGenericError(xmlGenericErrorContext, 10927 "PP: entering MISC\n"); 10928 #endif 10929 } 10930 break; 10931 case XML_PARSER_START_TAG: { 10932 const xmlChar *name; 10933 const xmlChar *prefix = NULL; 10934 const xmlChar *URI = NULL; 10935 int nsNr = ctxt->nsNr; 10936 10937 if ((avail < 2) && (ctxt->inputNr == 1)) 10938 goto done; 10939 cur = ctxt->input->cur[0]; 10940 if (cur != '<') { 10941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10942 ctxt->instate = XML_PARSER_EOF; 10943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10944 ctxt->sax->endDocument(ctxt->userData); 10945 goto done; 10946 } 10947 if (!terminate) { 10948 if (ctxt->progressive) { 10949 /* > can be found unescaped in attribute values */ 10950 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10951 goto done; 10952 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10953 goto done; 10954 } 10955 } 10956 if (ctxt->spaceNr == 0) 10957 spacePush(ctxt, -1); 10958 else if (*ctxt->space == -2) 10959 spacePush(ctxt, -1); 10960 else 10961 spacePush(ctxt, *ctxt->space); 10962 #ifdef LIBXML_SAX1_ENABLED 10963 if (ctxt->sax2) 10964 #endif /* LIBXML_SAX1_ENABLED */ 10965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10966 #ifdef LIBXML_SAX1_ENABLED 10967 else 10968 name = xmlParseStartTag(ctxt); 10969 #endif /* LIBXML_SAX1_ENABLED */ 10970 if (ctxt->instate == XML_PARSER_EOF) 10971 goto done; 10972 if (name == NULL) { 10973 spacePop(ctxt); 10974 ctxt->instate = XML_PARSER_EOF; 10975 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10976 ctxt->sax->endDocument(ctxt->userData); 10977 goto done; 10978 } 10979 #ifdef LIBXML_VALID_ENABLED 10980 /* 10981 * [ VC: Root Element Type ] 10982 * The Name in the document type declaration must match 10983 * the element type of the root element. 10984 */ 10985 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10986 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10987 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10988 #endif /* LIBXML_VALID_ENABLED */ 10989 10990 /* 10991 * Check for an Empty Element. 10992 */ 10993 if ((RAW == '/') && (NXT(1) == '>')) { 10994 SKIP(2); 10995 10996 if (ctxt->sax2) { 10997 if ((ctxt->sax != NULL) && 10998 (ctxt->sax->endElementNs != NULL) && 10999 (!ctxt->disableSAX)) 11000 ctxt->sax->endElementNs(ctxt->userData, name, 11001 prefix, URI); 11002 if (ctxt->nsNr - nsNr > 0) 11003 nsPop(ctxt, ctxt->nsNr - nsNr); 11004 #ifdef LIBXML_SAX1_ENABLED 11005 } else { 11006 if ((ctxt->sax != NULL) && 11007 (ctxt->sax->endElement != NULL) && 11008 (!ctxt->disableSAX)) 11009 ctxt->sax->endElement(ctxt->userData, name); 11010 #endif /* LIBXML_SAX1_ENABLED */ 11011 } 11012 if (ctxt->instate == XML_PARSER_EOF) 11013 goto done; 11014 spacePop(ctxt); 11015 if (ctxt->nameNr == 0) { 11016 ctxt->instate = XML_PARSER_EPILOG; 11017 } else { 11018 ctxt->instate = XML_PARSER_CONTENT; 11019 } 11020 break; 11021 } 11022 if (RAW == '>') { 11023 NEXT; 11024 } else { 11025 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11026 "Couldn't find end of Start Tag %s\n", 11027 name); 11028 nodePop(ctxt); 11029 spacePop(ctxt); 11030 } 11031 if (ctxt->sax2) 11032 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11033 #ifdef LIBXML_SAX1_ENABLED 11034 else 11035 namePush(ctxt, name); 11036 #endif /* LIBXML_SAX1_ENABLED */ 11037 11038 ctxt->instate = XML_PARSER_CONTENT; 11039 break; 11040 } 11041 case XML_PARSER_CONTENT: { 11042 const xmlChar *test; 11043 unsigned int cons; 11044 if ((avail < 2) && (ctxt->inputNr == 1)) 11045 goto done; 11046 cur = ctxt->input->cur[0]; 11047 next = ctxt->input->cur[1]; 11048 11049 test = CUR_PTR; 11050 cons = ctxt->input->consumed; 11051 if ((cur == '<') && (next == '/')) { 11052 ctxt->instate = XML_PARSER_END_TAG; 11053 break; 11054 } else if ((cur == '<') && (next == '?')) { 11055 if ((!terminate) && 11056 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11057 goto done; 11058 xmlParsePI(ctxt); 11059 } else if ((cur == '<') && (next != '!')) { 11060 ctxt->instate = XML_PARSER_START_TAG; 11061 break; 11062 } else if ((cur == '<') && (next == '!') && 11063 (ctxt->input->cur[2] == '-') && 11064 (ctxt->input->cur[3] == '-')) { 11065 int term; 11066 11067 if (avail < 4) 11068 goto done; 11069 ctxt->input->cur += 4; 11070 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11071 ctxt->input->cur -= 4; 11072 if ((!terminate) && (term < 0)) 11073 goto done; 11074 xmlParseComment(ctxt); 11075 ctxt->instate = XML_PARSER_CONTENT; 11076 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11077 (ctxt->input->cur[2] == '[') && 11078 (ctxt->input->cur[3] == 'C') && 11079 (ctxt->input->cur[4] == 'D') && 11080 (ctxt->input->cur[5] == 'A') && 11081 (ctxt->input->cur[6] == 'T') && 11082 (ctxt->input->cur[7] == 'A') && 11083 (ctxt->input->cur[8] == '[')) { 11084 SKIP(9); 11085 ctxt->instate = XML_PARSER_CDATA_SECTION; 11086 break; 11087 } else if ((cur == '<') && (next == '!') && 11088 (avail < 9)) { 11089 goto done; 11090 } else if (cur == '&') { 11091 if ((!terminate) && 11092 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11093 goto done; 11094 xmlParseReference(ctxt); 11095 } else { 11096 /* TODO Avoid the extra copy, handle directly !!! */ 11097 /* 11098 * Goal of the following test is: 11099 * - minimize calls to the SAX 'character' callback 11100 * when they are mergeable 11101 * - handle an problem for isBlank when we only parse 11102 * a sequence of blank chars and the next one is 11103 * not available to check against '<' presence. 11104 * - tries to homogenize the differences in SAX 11105 * callbacks between the push and pull versions 11106 * of the parser. 11107 */ 11108 if ((ctxt->inputNr == 1) && 11109 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11110 if (!terminate) { 11111 if (ctxt->progressive) { 11112 if ((lastlt == NULL) || 11113 (ctxt->input->cur > lastlt)) 11114 goto done; 11115 } else if (xmlParseLookupSequence(ctxt, 11116 '<', 0, 0) < 0) { 11117 goto done; 11118 } 11119 } 11120 } 11121 ctxt->checkIndex = 0; 11122 xmlParseCharData(ctxt, 0); 11123 } 11124 /* 11125 * Pop-up of finished entities. 11126 */ 11127 while ((RAW == 0) && (ctxt->inputNr > 1)) 11128 xmlPopInput(ctxt); 11129 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11131 "detected an error in element content\n"); 11132 ctxt->instate = XML_PARSER_EOF; 11133 break; 11134 } 11135 break; 11136 } 11137 case XML_PARSER_END_TAG: 11138 if (avail < 2) 11139 goto done; 11140 if (!terminate) { 11141 if (ctxt->progressive) { 11142 /* > can be found unescaped in attribute values */ 11143 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11144 goto done; 11145 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11146 goto done; 11147 } 11148 } 11149 if (ctxt->sax2) { 11150 xmlParseEndTag2(ctxt, 11151 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11152 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11153 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11154 nameNsPop(ctxt); 11155 } 11156 #ifdef LIBXML_SAX1_ENABLED 11157 else 11158 xmlParseEndTag1(ctxt, 0); 11159 #endif /* LIBXML_SAX1_ENABLED */ 11160 if (ctxt->instate == XML_PARSER_EOF) { 11161 /* Nothing */ 11162 } else if (ctxt->nameNr == 0) { 11163 ctxt->instate = XML_PARSER_EPILOG; 11164 } else { 11165 ctxt->instate = XML_PARSER_CONTENT; 11166 } 11167 break; 11168 case XML_PARSER_CDATA_SECTION: { 11169 /* 11170 * The Push mode need to have the SAX callback for 11171 * cdataBlock merge back contiguous callbacks. 11172 */ 11173 int base; 11174 11175 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11176 if (base < 0) { 11177 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11178 int tmp; 11179 11180 tmp = xmlCheckCdataPush(ctxt->input->cur, 11181 XML_PARSER_BIG_BUFFER_SIZE); 11182 if (tmp < 0) { 11183 tmp = -tmp; 11184 ctxt->input->cur += tmp; 11185 goto encoding_error; 11186 } 11187 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11188 if (ctxt->sax->cdataBlock != NULL) 11189 ctxt->sax->cdataBlock(ctxt->userData, 11190 ctxt->input->cur, tmp); 11191 else if (ctxt->sax->characters != NULL) 11192 ctxt->sax->characters(ctxt->userData, 11193 ctxt->input->cur, tmp); 11194 } 11195 if (ctxt->instate == XML_PARSER_EOF) 11196 goto done; 11197 SKIPL(tmp); 11198 ctxt->checkIndex = 0; 11199 } 11200 goto done; 11201 } else { 11202 int tmp; 11203 11204 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11205 if ((tmp < 0) || (tmp != base)) { 11206 tmp = -tmp; 11207 ctxt->input->cur += tmp; 11208 goto encoding_error; 11209 } 11210 if ((ctxt->sax != NULL) && (base == 0) && 11211 (ctxt->sax->cdataBlock != NULL) && 11212 (!ctxt->disableSAX)) { 11213 /* 11214 * Special case to provide identical behaviour 11215 * between pull and push parsers on enpty CDATA 11216 * sections 11217 */ 11218 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11219 (!strncmp((const char *)&ctxt->input->cur[-9], 11220 "<![CDATA[", 9))) 11221 ctxt->sax->cdataBlock(ctxt->userData, 11222 BAD_CAST "", 0); 11223 } else if ((ctxt->sax != NULL) && (base > 0) && 11224 (!ctxt->disableSAX)) { 11225 if (ctxt->sax->cdataBlock != NULL) 11226 ctxt->sax->cdataBlock(ctxt->userData, 11227 ctxt->input->cur, base); 11228 else if (ctxt->sax->characters != NULL) 11229 ctxt->sax->characters(ctxt->userData, 11230 ctxt->input->cur, base); 11231 } 11232 if (ctxt->instate == XML_PARSER_EOF) 11233 goto done; 11234 SKIPL(base + 3); 11235 ctxt->checkIndex = 0; 11236 ctxt->instate = XML_PARSER_CONTENT; 11237 #ifdef DEBUG_PUSH 11238 xmlGenericError(xmlGenericErrorContext, 11239 "PP: entering CONTENT\n"); 11240 #endif 11241 } 11242 break; 11243 } 11244 case XML_PARSER_MISC: 11245 SKIP_BLANKS; 11246 if (ctxt->input->buf == NULL) 11247 avail = ctxt->input->length - 11248 (ctxt->input->cur - ctxt->input->base); 11249 else 11250 avail = ctxt->input->buf->buffer->use - 11251 (ctxt->input->cur - ctxt->input->base); 11252 if (avail < 2) 11253 goto done; 11254 cur = ctxt->input->cur[0]; 11255 next = ctxt->input->cur[1]; 11256 if ((cur == '<') && (next == '?')) { 11257 if ((!terminate) && 11258 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11259 goto done; 11260 #ifdef DEBUG_PUSH 11261 xmlGenericError(xmlGenericErrorContext, 11262 "PP: Parsing PI\n"); 11263 #endif 11264 xmlParsePI(ctxt); 11265 if (ctxt->instate == XML_PARSER_EOF) 11266 goto done; 11267 ctxt->checkIndex = 0; 11268 } else if ((cur == '<') && (next == '!') && 11269 (ctxt->input->cur[2] == '-') && 11270 (ctxt->input->cur[3] == '-')) { 11271 if ((!terminate) && 11272 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11273 goto done; 11274 #ifdef DEBUG_PUSH 11275 xmlGenericError(xmlGenericErrorContext, 11276 "PP: Parsing Comment\n"); 11277 #endif 11278 xmlParseComment(ctxt); 11279 if (ctxt->instate == XML_PARSER_EOF) 11280 goto done; 11281 ctxt->instate = XML_PARSER_MISC; 11282 ctxt->checkIndex = 0; 11283 } else if ((cur == '<') && (next == '!') && 11284 (ctxt->input->cur[2] == 'D') && 11285 (ctxt->input->cur[3] == 'O') && 11286 (ctxt->input->cur[4] == 'C') && 11287 (ctxt->input->cur[5] == 'T') && 11288 (ctxt->input->cur[6] == 'Y') && 11289 (ctxt->input->cur[7] == 'P') && 11290 (ctxt->input->cur[8] == 'E')) { 11291 if ((!terminate) && 11292 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11293 goto done; 11294 #ifdef DEBUG_PUSH 11295 xmlGenericError(xmlGenericErrorContext, 11296 "PP: Parsing internal subset\n"); 11297 #endif 11298 ctxt->inSubset = 1; 11299 xmlParseDocTypeDecl(ctxt); 11300 if (ctxt->instate == XML_PARSER_EOF) 11301 goto done; 11302 if (RAW == '[') { 11303 ctxt->instate = XML_PARSER_DTD; 11304 #ifdef DEBUG_PUSH 11305 xmlGenericError(xmlGenericErrorContext, 11306 "PP: entering DTD\n"); 11307 #endif 11308 } else { 11309 /* 11310 * Create and update the external subset. 11311 */ 11312 ctxt->inSubset = 2; 11313 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11314 (ctxt->sax->externalSubset != NULL)) 11315 ctxt->sax->externalSubset(ctxt->userData, 11316 ctxt->intSubName, ctxt->extSubSystem, 11317 ctxt->extSubURI); 11318 ctxt->inSubset = 0; 11319 xmlCleanSpecialAttr(ctxt); 11320 ctxt->instate = XML_PARSER_PROLOG; 11321 #ifdef DEBUG_PUSH 11322 xmlGenericError(xmlGenericErrorContext, 11323 "PP: entering PROLOG\n"); 11324 #endif 11325 } 11326 } else if ((cur == '<') && (next == '!') && 11327 (avail < 9)) { 11328 goto done; 11329 } else { 11330 ctxt->instate = XML_PARSER_START_TAG; 11331 ctxt->progressive = 1; 11332 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11333 #ifdef DEBUG_PUSH 11334 xmlGenericError(xmlGenericErrorContext, 11335 "PP: entering START_TAG\n"); 11336 #endif 11337 } 11338 break; 11339 case XML_PARSER_PROLOG: 11340 SKIP_BLANKS; 11341 if (ctxt->input->buf == NULL) 11342 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11343 else 11344 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11345 if (avail < 2) 11346 goto done; 11347 cur = ctxt->input->cur[0]; 11348 next = ctxt->input->cur[1]; 11349 if ((cur == '<') && (next == '?')) { 11350 if ((!terminate) && 11351 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11352 goto done; 11353 #ifdef DEBUG_PUSH 11354 xmlGenericError(xmlGenericErrorContext, 11355 "PP: Parsing PI\n"); 11356 #endif 11357 xmlParsePI(ctxt); 11358 if (ctxt->instate == XML_PARSER_EOF) 11359 goto done; 11360 } else if ((cur == '<') && (next == '!') && 11361 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11362 if ((!terminate) && 11363 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11364 goto done; 11365 #ifdef DEBUG_PUSH 11366 xmlGenericError(xmlGenericErrorContext, 11367 "PP: Parsing Comment\n"); 11368 #endif 11369 xmlParseComment(ctxt); 11370 if (ctxt->instate == XML_PARSER_EOF) 11371 goto done; 11372 ctxt->instate = XML_PARSER_PROLOG; 11373 } else if ((cur == '<') && (next == '!') && 11374 (avail < 4)) { 11375 goto done; 11376 } else { 11377 ctxt->instate = XML_PARSER_START_TAG; 11378 if (ctxt->progressive == 0) 11379 ctxt->progressive = 1; 11380 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11381 #ifdef DEBUG_PUSH 11382 xmlGenericError(xmlGenericErrorContext, 11383 "PP: entering START_TAG\n"); 11384 #endif 11385 } 11386 break; 11387 case XML_PARSER_EPILOG: 11388 SKIP_BLANKS; 11389 if (ctxt->input->buf == NULL) 11390 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11391 else 11392 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11393 if (avail < 2) 11394 goto done; 11395 cur = ctxt->input->cur[0]; 11396 next = ctxt->input->cur[1]; 11397 if ((cur == '<') && (next == '?')) { 11398 if ((!terminate) && 11399 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11400 goto done; 11401 #ifdef DEBUG_PUSH 11402 xmlGenericError(xmlGenericErrorContext, 11403 "PP: Parsing PI\n"); 11404 #endif 11405 xmlParsePI(ctxt); 11406 if (ctxt->instate == XML_PARSER_EOF) 11407 goto done; 11408 ctxt->instate = XML_PARSER_EPILOG; 11409 } else if ((cur == '<') && (next == '!') && 11410 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11411 if ((!terminate) && 11412 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11413 goto done; 11414 #ifdef DEBUG_PUSH 11415 xmlGenericError(xmlGenericErrorContext, 11416 "PP: Parsing Comment\n"); 11417 #endif 11418 xmlParseComment(ctxt); 11419 if (ctxt->instate == XML_PARSER_EOF) 11420 goto done; 11421 ctxt->instate = XML_PARSER_EPILOG; 11422 } else if ((cur == '<') && (next == '!') && 11423 (avail < 4)) { 11424 goto done; 11425 } else { 11426 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11427 ctxt->instate = XML_PARSER_EOF; 11428 #ifdef DEBUG_PUSH 11429 xmlGenericError(xmlGenericErrorContext, 11430 "PP: entering EOF\n"); 11431 #endif 11432 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11433 ctxt->sax->endDocument(ctxt->userData); 11434 goto done; 11435 } 11436 break; 11437 case XML_PARSER_DTD: { 11438 /* 11439 * Sorry but progressive parsing of the internal subset 11440 * is not expected to be supported. We first check that 11441 * the full content of the internal subset is available and 11442 * the parsing is launched only at that point. 11443 * Internal subset ends up with "']' S? '>'" in an unescaped 11444 * section and not in a ']]>' sequence which are conditional 11445 * sections (whoever argued to keep that crap in XML deserve 11446 * a place in hell !). 11447 */ 11448 int base, i; 11449 xmlChar *buf; 11450 xmlChar quote = 0; 11451 11452 base = ctxt->input->cur - ctxt->input->base; 11453 if (base < 0) return(0); 11454 if (ctxt->checkIndex > base) 11455 base = ctxt->checkIndex; 11456 buf = ctxt->input->buf->buffer->content; 11457 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11458 base++) { 11459 if (quote != 0) { 11460 if (buf[base] == quote) 11461 quote = 0; 11462 continue; 11463 } 11464 if ((quote == 0) && (buf[base] == '<')) { 11465 int found = 0; 11466 /* special handling of comments */ 11467 if (((unsigned int) base + 4 < 11468 ctxt->input->buf->buffer->use) && 11469 (buf[base + 1] == '!') && 11470 (buf[base + 2] == '-') && 11471 (buf[base + 3] == '-')) { 11472 for (;(unsigned int) base + 3 < 11473 ctxt->input->buf->buffer->use; base++) { 11474 if ((buf[base] == '-') && 11475 (buf[base + 1] == '-') && 11476 (buf[base + 2] == '>')) { 11477 found = 1; 11478 base += 2; 11479 break; 11480 } 11481 } 11482 if (!found) { 11483 #if 0 11484 fprintf(stderr, "unfinished comment\n"); 11485 #endif 11486 break; /* for */ 11487 } 11488 continue; 11489 } 11490 } 11491 if (buf[base] == '"') { 11492 quote = '"'; 11493 continue; 11494 } 11495 if (buf[base] == '\'') { 11496 quote = '\''; 11497 continue; 11498 } 11499 if (buf[base] == ']') { 11500 #if 0 11501 fprintf(stderr, "%c%c%c%c: ", buf[base], 11502 buf[base + 1], buf[base + 2], buf[base + 3]); 11503 #endif 11504 if ((unsigned int) base +1 >= 11505 ctxt->input->buf->buffer->use) 11506 break; 11507 if (buf[base + 1] == ']') { 11508 /* conditional crap, skip both ']' ! */ 11509 base++; 11510 continue; 11511 } 11512 for (i = 1; 11513 (unsigned int) base + i < ctxt->input->buf->buffer->use; 11514 i++) { 11515 if (buf[base + i] == '>') { 11516 #if 0 11517 fprintf(stderr, "found\n"); 11518 #endif 11519 goto found_end_int_subset; 11520 } 11521 if (!IS_BLANK_CH(buf[base + i])) { 11522 #if 0 11523 fprintf(stderr, "not found\n"); 11524 #endif 11525 goto not_end_of_int_subset; 11526 } 11527 } 11528 #if 0 11529 fprintf(stderr, "end of stream\n"); 11530 #endif 11531 break; 11532 11533 } 11534 not_end_of_int_subset: 11535 continue; /* for */ 11536 } 11537 /* 11538 * We didn't found the end of the Internal subset 11539 */ 11540 #ifdef DEBUG_PUSH 11541 if (next == 0) 11542 xmlGenericError(xmlGenericErrorContext, 11543 "PP: lookup of int subset end filed\n"); 11544 #endif 11545 goto done; 11546 11547 found_end_int_subset: 11548 xmlParseInternalSubset(ctxt); 11549 if (ctxt->instate == XML_PARSER_EOF) 11550 goto done; 11551 ctxt->inSubset = 2; 11552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11553 (ctxt->sax->externalSubset != NULL)) 11554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11555 ctxt->extSubSystem, ctxt->extSubURI); 11556 ctxt->inSubset = 0; 11557 xmlCleanSpecialAttr(ctxt); 11558 if (ctxt->instate == XML_PARSER_EOF) 11559 goto done; 11560 ctxt->instate = XML_PARSER_PROLOG; 11561 ctxt->checkIndex = 0; 11562 #ifdef DEBUG_PUSH 11563 xmlGenericError(xmlGenericErrorContext, 11564 "PP: entering PROLOG\n"); 11565 #endif 11566 break; 11567 } 11568 case XML_PARSER_COMMENT: 11569 xmlGenericError(xmlGenericErrorContext, 11570 "PP: internal error, state == COMMENT\n"); 11571 ctxt->instate = XML_PARSER_CONTENT; 11572 #ifdef DEBUG_PUSH 11573 xmlGenericError(xmlGenericErrorContext, 11574 "PP: entering CONTENT\n"); 11575 #endif 11576 break; 11577 case XML_PARSER_IGNORE: 11578 xmlGenericError(xmlGenericErrorContext, 11579 "PP: internal error, state == IGNORE"); 11580 ctxt->instate = XML_PARSER_DTD; 11581 #ifdef DEBUG_PUSH 11582 xmlGenericError(xmlGenericErrorContext, 11583 "PP: entering DTD\n"); 11584 #endif 11585 break; 11586 case XML_PARSER_PI: 11587 xmlGenericError(xmlGenericErrorContext, 11588 "PP: internal error, state == PI\n"); 11589 ctxt->instate = XML_PARSER_CONTENT; 11590 #ifdef DEBUG_PUSH 11591 xmlGenericError(xmlGenericErrorContext, 11592 "PP: entering CONTENT\n"); 11593 #endif 11594 break; 11595 case XML_PARSER_ENTITY_DECL: 11596 xmlGenericError(xmlGenericErrorContext, 11597 "PP: internal error, state == ENTITY_DECL\n"); 11598 ctxt->instate = XML_PARSER_DTD; 11599 #ifdef DEBUG_PUSH 11600 xmlGenericError(xmlGenericErrorContext, 11601 "PP: entering DTD\n"); 11602 #endif 11603 break; 11604 case XML_PARSER_ENTITY_VALUE: 11605 xmlGenericError(xmlGenericErrorContext, 11606 "PP: internal error, state == ENTITY_VALUE\n"); 11607 ctxt->instate = XML_PARSER_CONTENT; 11608 #ifdef DEBUG_PUSH 11609 xmlGenericError(xmlGenericErrorContext, 11610 "PP: entering DTD\n"); 11611 #endif 11612 break; 11613 case XML_PARSER_ATTRIBUTE_VALUE: 11614 xmlGenericError(xmlGenericErrorContext, 11615 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11616 ctxt->instate = XML_PARSER_START_TAG; 11617 #ifdef DEBUG_PUSH 11618 xmlGenericError(xmlGenericErrorContext, 11619 "PP: entering START_TAG\n"); 11620 #endif 11621 break; 11622 case XML_PARSER_SYSTEM_LITERAL: 11623 xmlGenericError(xmlGenericErrorContext, 11624 "PP: internal error, state == SYSTEM_LITERAL\n"); 11625 ctxt->instate = XML_PARSER_START_TAG; 11626 #ifdef DEBUG_PUSH 11627 xmlGenericError(xmlGenericErrorContext, 11628 "PP: entering START_TAG\n"); 11629 #endif 11630 break; 11631 case XML_PARSER_PUBLIC_LITERAL: 11632 xmlGenericError(xmlGenericErrorContext, 11633 "PP: internal error, state == PUBLIC_LITERAL\n"); 11634 ctxt->instate = XML_PARSER_START_TAG; 11635 #ifdef DEBUG_PUSH 11636 xmlGenericError(xmlGenericErrorContext, 11637 "PP: entering START_TAG\n"); 11638 #endif 11639 break; 11640 } 11641 } 11642 done: 11643 #ifdef DEBUG_PUSH 11644 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11645 #endif 11646 return(ret); 11647 encoding_error: 11648 { 11649 char buffer[150]; 11650 11651 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11652 ctxt->input->cur[0], ctxt->input->cur[1], 11653 ctxt->input->cur[2], ctxt->input->cur[3]); 11654 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11655 "Input is not proper UTF-8, indicate encoding !\n%s", 11656 BAD_CAST buffer, NULL); 11657 } 11658 return(0); 11659 } 11660 11661 /** 11662 * xmlParseChunk: 11663 * @ctxt: an XML parser context 11664 * @chunk: an char array 11665 * @size: the size in byte of the chunk 11666 * @terminate: last chunk indicator 11667 * 11668 * Parse a Chunk of memory 11669 * 11670 * Returns zero if no error, the xmlParserErrors otherwise. 11671 */ 11672 int 11673 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11674 int terminate) { 11675 int end_in_lf = 0; 11676 int remain = 0; 11677 11678 if (ctxt == NULL) 11679 return(XML_ERR_INTERNAL_ERROR); 11680 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11681 return(ctxt->errNo); 11682 if (ctxt->instate == XML_PARSER_EOF) 11683 return(-1); 11684 if (ctxt->instate == XML_PARSER_START) 11685 xmlDetectSAX2(ctxt); 11686 if ((size > 0) && (chunk != NULL) && (!terminate) && 11687 (chunk[size - 1] == '\r')) { 11688 end_in_lf = 1; 11689 size--; 11690 } 11691 11692 xmldecl_done: 11693 11694 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11695 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11696 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11697 int cur = ctxt->input->cur - ctxt->input->base; 11698 int res; 11699 11700 /* 11701 * Specific handling if we autodetected an encoding, we should not 11702 * push more than the first line ... which depend on the encoding 11703 * And only push the rest once the final encoding was detected 11704 */ 11705 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 11706 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 11707 unsigned int len = 45; 11708 11709 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11710 BAD_CAST "UTF-16")) || 11711 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11712 BAD_CAST "UTF16"))) 11713 len = 90; 11714 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11715 BAD_CAST "UCS-4")) || 11716 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11717 BAD_CAST "UCS4"))) 11718 len = 180; 11719 11720 if (ctxt->input->buf->rawconsumed < len) 11721 len -= ctxt->input->buf->rawconsumed; 11722 11723 /* 11724 * Change size for reading the initial declaration only 11725 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 11726 * will blindly copy extra bytes from memory. 11727 */ 11728 if (size > len) { 11729 remain = size - len; 11730 size = len; 11731 } else { 11732 remain = 0; 11733 } 11734 } 11735 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11736 if (res < 0) { 11737 ctxt->errNo = XML_PARSER_EOF; 11738 ctxt->disableSAX = 1; 11739 return (XML_PARSER_EOF); 11740 } 11741 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11742 ctxt->input->cur = ctxt->input->base + cur; 11743 ctxt->input->end = 11744 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11745 #ifdef DEBUG_PUSH 11746 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11747 #endif 11748 11749 } else if (ctxt->instate != XML_PARSER_EOF) { 11750 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11751 xmlParserInputBufferPtr in = ctxt->input->buf; 11752 if ((in->encoder != NULL) && (in->buffer != NULL) && 11753 (in->raw != NULL)) { 11754 int nbchars; 11755 11756 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11757 if (nbchars < 0) { 11758 /* TODO 2.6.0 */ 11759 xmlGenericError(xmlGenericErrorContext, 11760 "xmlParseChunk: encoder error\n"); 11761 return(XML_ERR_INVALID_ENCODING); 11762 } 11763 } 11764 } 11765 } 11766 if (remain != 0) 11767 xmlParseTryOrFinish(ctxt, 0); 11768 else 11769 xmlParseTryOrFinish(ctxt, terminate); 11770 if (ctxt->instate == XML_PARSER_EOF) 11771 return(ctxt->errNo); 11772 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11773 return(ctxt->errNo); 11774 11775 if (remain != 0) { 11776 chunk += size; 11777 size = remain; 11778 remain = 0; 11779 goto xmldecl_done; 11780 } 11781 if ((end_in_lf == 1) && (ctxt->input != NULL) && 11782 (ctxt->input->buf != NULL)) { 11783 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11784 } 11785 if (terminate) { 11786 /* 11787 * Check for termination 11788 */ 11789 int avail = 0; 11790 11791 if (ctxt->input != NULL) { 11792 if (ctxt->input->buf == NULL) 11793 avail = ctxt->input->length - 11794 (ctxt->input->cur - ctxt->input->base); 11795 else 11796 avail = ctxt->input->buf->buffer->use - 11797 (ctxt->input->cur - ctxt->input->base); 11798 } 11799 11800 if ((ctxt->instate != XML_PARSER_EOF) && 11801 (ctxt->instate != XML_PARSER_EPILOG)) { 11802 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11803 } 11804 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11805 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11806 } 11807 if (ctxt->instate != XML_PARSER_EOF) { 11808 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11809 ctxt->sax->endDocument(ctxt->userData); 11810 } 11811 ctxt->instate = XML_PARSER_EOF; 11812 } 11813 return((xmlParserErrors) ctxt->errNo); 11814 } 11815 11816 /************************************************************************ 11817 * * 11818 * I/O front end functions to the parser * 11819 * * 11820 ************************************************************************/ 11821 11822 /** 11823 * xmlCreatePushParserCtxt: 11824 * @sax: a SAX handler 11825 * @user_data: The user data returned on SAX callbacks 11826 * @chunk: a pointer to an array of chars 11827 * @size: number of chars in the array 11828 * @filename: an optional file name or URI 11829 * 11830 * Create a parser context for using the XML parser in push mode. 11831 * If @buffer and @size are non-NULL, the data is used to detect 11832 * the encoding. The remaining characters will be parsed so they 11833 * don't need to be fed in again through xmlParseChunk. 11834 * To allow content encoding detection, @size should be >= 4 11835 * The value of @filename is used for fetching external entities 11836 * and error/warning reports. 11837 * 11838 * Returns the new parser context or NULL 11839 */ 11840 11841 xmlParserCtxtPtr 11842 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11843 const char *chunk, int size, const char *filename) { 11844 xmlParserCtxtPtr ctxt; 11845 xmlParserInputPtr inputStream; 11846 xmlParserInputBufferPtr buf; 11847 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11848 11849 /* 11850 * plug some encoding conversion routines 11851 */ 11852 if ((chunk != NULL) && (size >= 4)) 11853 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11854 11855 buf = xmlAllocParserInputBuffer(enc); 11856 if (buf == NULL) return(NULL); 11857 11858 ctxt = xmlNewParserCtxt(); 11859 if (ctxt == NULL) { 11860 xmlErrMemory(NULL, "creating parser: out of memory\n"); 11861 xmlFreeParserInputBuffer(buf); 11862 return(NULL); 11863 } 11864 ctxt->dictNames = 1; 11865 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11866 if (ctxt->pushTab == NULL) { 11867 xmlErrMemory(ctxt, NULL); 11868 xmlFreeParserInputBuffer(buf); 11869 xmlFreeParserCtxt(ctxt); 11870 return(NULL); 11871 } 11872 if (sax != NULL) { 11873 #ifdef LIBXML_SAX1_ENABLED 11874 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11875 #endif /* LIBXML_SAX1_ENABLED */ 11876 xmlFree(ctxt->sax); 11877 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11878 if (ctxt->sax == NULL) { 11879 xmlErrMemory(ctxt, NULL); 11880 xmlFreeParserInputBuffer(buf); 11881 xmlFreeParserCtxt(ctxt); 11882 return(NULL); 11883 } 11884 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11885 if (sax->initialized == XML_SAX2_MAGIC) 11886 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11887 else 11888 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11889 if (user_data != NULL) 11890 ctxt->userData = user_data; 11891 } 11892 if (filename == NULL) { 11893 ctxt->directory = NULL; 11894 } else { 11895 ctxt->directory = xmlParserGetDirectory(filename); 11896 } 11897 11898 inputStream = xmlNewInputStream(ctxt); 11899 if (inputStream == NULL) { 11900 xmlFreeParserCtxt(ctxt); 11901 xmlFreeParserInputBuffer(buf); 11902 return(NULL); 11903 } 11904 11905 if (filename == NULL) 11906 inputStream->filename = NULL; 11907 else { 11908 inputStream->filename = (char *) 11909 xmlCanonicPath((const xmlChar *) filename); 11910 if (inputStream->filename == NULL) { 11911 xmlFreeParserCtxt(ctxt); 11912 xmlFreeParserInputBuffer(buf); 11913 return(NULL); 11914 } 11915 } 11916 inputStream->buf = buf; 11917 inputStream->base = inputStream->buf->buffer->content; 11918 inputStream->cur = inputStream->buf->buffer->content; 11919 inputStream->end = 11920 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11921 11922 inputPush(ctxt, inputStream); 11923 11924 /* 11925 * If the caller didn't provide an initial 'chunk' for determining 11926 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11927 * that it can be automatically determined later 11928 */ 11929 if ((size == 0) || (chunk == NULL)) { 11930 ctxt->charset = XML_CHAR_ENCODING_NONE; 11931 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11932 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11933 int cur = ctxt->input->cur - ctxt->input->base; 11934 11935 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11936 11937 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11938 ctxt->input->cur = ctxt->input->base + cur; 11939 ctxt->input->end = 11940 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11941 #ifdef DEBUG_PUSH 11942 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11943 #endif 11944 } 11945 11946 if (enc != XML_CHAR_ENCODING_NONE) { 11947 xmlSwitchEncoding(ctxt, enc); 11948 } 11949 11950 return(ctxt); 11951 } 11952 #endif /* LIBXML_PUSH_ENABLED */ 11953 11954 /** 11955 * xmlStopParser: 11956 * @ctxt: an XML parser context 11957 * 11958 * Blocks further parser processing 11959 */ 11960 void 11961 xmlStopParser(xmlParserCtxtPtr ctxt) { 11962 if (ctxt == NULL) 11963 return; 11964 ctxt->instate = XML_PARSER_EOF; 11965 ctxt->errNo = XML_ERR_USER_STOP; 11966 ctxt->disableSAX = 1; 11967 if (ctxt->input != NULL) { 11968 ctxt->input->cur = BAD_CAST""; 11969 ctxt->input->base = ctxt->input->cur; 11970 } 11971 } 11972 11973 /** 11974 * xmlCreateIOParserCtxt: 11975 * @sax: a SAX handler 11976 * @user_data: The user data returned on SAX callbacks 11977 * @ioread: an I/O read function 11978 * @ioclose: an I/O close function 11979 * @ioctx: an I/O handler 11980 * @enc: the charset encoding if known 11981 * 11982 * Create a parser context for using the XML parser with an existing 11983 * I/O stream 11984 * 11985 * Returns the new parser context or NULL 11986 */ 11987 xmlParserCtxtPtr 11988 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11989 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11990 void *ioctx, xmlCharEncoding enc) { 11991 xmlParserCtxtPtr ctxt; 11992 xmlParserInputPtr inputStream; 11993 xmlParserInputBufferPtr buf; 11994 11995 if (ioread == NULL) return(NULL); 11996 11997 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11998 if (buf == NULL) return(NULL); 11999 12000 ctxt = xmlNewParserCtxt(); 12001 if (ctxt == NULL) { 12002 xmlFreeParserInputBuffer(buf); 12003 return(NULL); 12004 } 12005 if (sax != NULL) { 12006 #ifdef LIBXML_SAX1_ENABLED 12007 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12008 #endif /* LIBXML_SAX1_ENABLED */ 12009 xmlFree(ctxt->sax); 12010 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12011 if (ctxt->sax == NULL) { 12012 xmlErrMemory(ctxt, NULL); 12013 xmlFreeParserCtxt(ctxt); 12014 return(NULL); 12015 } 12016 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12017 if (sax->initialized == XML_SAX2_MAGIC) 12018 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12019 else 12020 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12021 if (user_data != NULL) 12022 ctxt->userData = user_data; 12023 } 12024 12025 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12026 if (inputStream == NULL) { 12027 xmlFreeParserCtxt(ctxt); 12028 return(NULL); 12029 } 12030 inputPush(ctxt, inputStream); 12031 12032 return(ctxt); 12033 } 12034 12035 #ifdef LIBXML_VALID_ENABLED 12036 /************************************************************************ 12037 * * 12038 * Front ends when parsing a DTD * 12039 * * 12040 ************************************************************************/ 12041 12042 /** 12043 * xmlIOParseDTD: 12044 * @sax: the SAX handler block or NULL 12045 * @input: an Input Buffer 12046 * @enc: the charset encoding if known 12047 * 12048 * Load and parse a DTD 12049 * 12050 * Returns the resulting xmlDtdPtr or NULL in case of error. 12051 * @input will be freed by the function in any case. 12052 */ 12053 12054 xmlDtdPtr 12055 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12056 xmlCharEncoding enc) { 12057 xmlDtdPtr ret = NULL; 12058 xmlParserCtxtPtr ctxt; 12059 xmlParserInputPtr pinput = NULL; 12060 xmlChar start[4]; 12061 12062 if (input == NULL) 12063 return(NULL); 12064 12065 ctxt = xmlNewParserCtxt(); 12066 if (ctxt == NULL) { 12067 xmlFreeParserInputBuffer(input); 12068 return(NULL); 12069 } 12070 12071 /* 12072 * Set-up the SAX context 12073 */ 12074 if (sax != NULL) { 12075 if (ctxt->sax != NULL) 12076 xmlFree(ctxt->sax); 12077 ctxt->sax = sax; 12078 ctxt->userData = ctxt; 12079 } 12080 xmlDetectSAX2(ctxt); 12081 12082 /* 12083 * generate a parser input from the I/O handler 12084 */ 12085 12086 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12087 if (pinput == NULL) { 12088 if (sax != NULL) ctxt->sax = NULL; 12089 xmlFreeParserInputBuffer(input); 12090 xmlFreeParserCtxt(ctxt); 12091 return(NULL); 12092 } 12093 12094 /* 12095 * plug some encoding conversion routines here. 12096 */ 12097 if (xmlPushInput(ctxt, pinput) < 0) { 12098 if (sax != NULL) ctxt->sax = NULL; 12099 xmlFreeParserCtxt(ctxt); 12100 return(NULL); 12101 } 12102 if (enc != XML_CHAR_ENCODING_NONE) { 12103 xmlSwitchEncoding(ctxt, enc); 12104 } 12105 12106 pinput->filename = NULL; 12107 pinput->line = 1; 12108 pinput->col = 1; 12109 pinput->base = ctxt->input->cur; 12110 pinput->cur = ctxt->input->cur; 12111 pinput->free = NULL; 12112 12113 /* 12114 * let's parse that entity knowing it's an external subset. 12115 */ 12116 ctxt->inSubset = 2; 12117 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12118 if (ctxt->myDoc == NULL) { 12119 xmlErrMemory(ctxt, "New Doc failed"); 12120 return(NULL); 12121 } 12122 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12123 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12124 BAD_CAST "none", BAD_CAST "none"); 12125 12126 if ((enc == XML_CHAR_ENCODING_NONE) && 12127 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12128 /* 12129 * Get the 4 first bytes and decode the charset 12130 * if enc != XML_CHAR_ENCODING_NONE 12131 * plug some encoding conversion routines. 12132 */ 12133 start[0] = RAW; 12134 start[1] = NXT(1); 12135 start[2] = NXT(2); 12136 start[3] = NXT(3); 12137 enc = xmlDetectCharEncoding(start, 4); 12138 if (enc != XML_CHAR_ENCODING_NONE) { 12139 xmlSwitchEncoding(ctxt, enc); 12140 } 12141 } 12142 12143 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12144 12145 if (ctxt->myDoc != NULL) { 12146 if (ctxt->wellFormed) { 12147 ret = ctxt->myDoc->extSubset; 12148 ctxt->myDoc->extSubset = NULL; 12149 if (ret != NULL) { 12150 xmlNodePtr tmp; 12151 12152 ret->doc = NULL; 12153 tmp = ret->children; 12154 while (tmp != NULL) { 12155 tmp->doc = NULL; 12156 tmp = tmp->next; 12157 } 12158 } 12159 } else { 12160 ret = NULL; 12161 } 12162 xmlFreeDoc(ctxt->myDoc); 12163 ctxt->myDoc = NULL; 12164 } 12165 if (sax != NULL) ctxt->sax = NULL; 12166 xmlFreeParserCtxt(ctxt); 12167 12168 return(ret); 12169 } 12170 12171 /** 12172 * xmlSAXParseDTD: 12173 * @sax: the SAX handler block 12174 * @ExternalID: a NAME* containing the External ID of the DTD 12175 * @SystemID: a NAME* containing the URL to the DTD 12176 * 12177 * Load and parse an external subset. 12178 * 12179 * Returns the resulting xmlDtdPtr or NULL in case of error. 12180 */ 12181 12182 xmlDtdPtr 12183 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12184 const xmlChar *SystemID) { 12185 xmlDtdPtr ret = NULL; 12186 xmlParserCtxtPtr ctxt; 12187 xmlParserInputPtr input = NULL; 12188 xmlCharEncoding enc; 12189 xmlChar* systemIdCanonic; 12190 12191 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12192 12193 ctxt = xmlNewParserCtxt(); 12194 if (ctxt == NULL) { 12195 return(NULL); 12196 } 12197 12198 /* 12199 * Set-up the SAX context 12200 */ 12201 if (sax != NULL) { 12202 if (ctxt->sax != NULL) 12203 xmlFree(ctxt->sax); 12204 ctxt->sax = sax; 12205 ctxt->userData = ctxt; 12206 } 12207 12208 /* 12209 * Canonicalise the system ID 12210 */ 12211 systemIdCanonic = xmlCanonicPath(SystemID); 12212 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12213 xmlFreeParserCtxt(ctxt); 12214 return(NULL); 12215 } 12216 12217 /* 12218 * Ask the Entity resolver to load the damn thing 12219 */ 12220 12221 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12222 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12223 systemIdCanonic); 12224 if (input == NULL) { 12225 if (sax != NULL) ctxt->sax = NULL; 12226 xmlFreeParserCtxt(ctxt); 12227 if (systemIdCanonic != NULL) 12228 xmlFree(systemIdCanonic); 12229 return(NULL); 12230 } 12231 12232 /* 12233 * plug some encoding conversion routines here. 12234 */ 12235 if (xmlPushInput(ctxt, input) < 0) { 12236 if (sax != NULL) ctxt->sax = NULL; 12237 xmlFreeParserCtxt(ctxt); 12238 if (systemIdCanonic != NULL) 12239 xmlFree(systemIdCanonic); 12240 return(NULL); 12241 } 12242 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12243 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12244 xmlSwitchEncoding(ctxt, enc); 12245 } 12246 12247 if (input->filename == NULL) 12248 input->filename = (char *) systemIdCanonic; 12249 else 12250 xmlFree(systemIdCanonic); 12251 input->line = 1; 12252 input->col = 1; 12253 input->base = ctxt->input->cur; 12254 input->cur = ctxt->input->cur; 12255 input->free = NULL; 12256 12257 /* 12258 * let's parse that entity knowing it's an external subset. 12259 */ 12260 ctxt->inSubset = 2; 12261 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12262 if (ctxt->myDoc == NULL) { 12263 xmlErrMemory(ctxt, "New Doc failed"); 12264 if (sax != NULL) ctxt->sax = NULL; 12265 xmlFreeParserCtxt(ctxt); 12266 return(NULL); 12267 } 12268 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12269 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12270 ExternalID, SystemID); 12271 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12272 12273 if (ctxt->myDoc != NULL) { 12274 if (ctxt->wellFormed) { 12275 ret = ctxt->myDoc->extSubset; 12276 ctxt->myDoc->extSubset = NULL; 12277 if (ret != NULL) { 12278 xmlNodePtr tmp; 12279 12280 ret->doc = NULL; 12281 tmp = ret->children; 12282 while (tmp != NULL) { 12283 tmp->doc = NULL; 12284 tmp = tmp->next; 12285 } 12286 } 12287 } else { 12288 ret = NULL; 12289 } 12290 xmlFreeDoc(ctxt->myDoc); 12291 ctxt->myDoc = NULL; 12292 } 12293 if (sax != NULL) ctxt->sax = NULL; 12294 xmlFreeParserCtxt(ctxt); 12295 12296 return(ret); 12297 } 12298 12299 12300 /** 12301 * xmlParseDTD: 12302 * @ExternalID: a NAME* containing the External ID of the DTD 12303 * @SystemID: a NAME* containing the URL to the DTD 12304 * 12305 * Load and parse an external subset. 12306 * 12307 * Returns the resulting xmlDtdPtr or NULL in case of error. 12308 */ 12309 12310 xmlDtdPtr 12311 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12312 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12313 } 12314 #endif /* LIBXML_VALID_ENABLED */ 12315 12316 /************************************************************************ 12317 * * 12318 * Front ends when parsing an Entity * 12319 * * 12320 ************************************************************************/ 12321 12322 /** 12323 * xmlParseCtxtExternalEntity: 12324 * @ctx: the existing parsing context 12325 * @URL: the URL for the entity to load 12326 * @ID: the System ID for the entity to load 12327 * @lst: the return value for the set of parsed nodes 12328 * 12329 * Parse an external general entity within an existing parsing context 12330 * An external general parsed entity is well-formed if it matches the 12331 * production labeled extParsedEnt. 12332 * 12333 * [78] extParsedEnt ::= TextDecl? content 12334 * 12335 * Returns 0 if the entity is well formed, -1 in case of args problem and 12336 * the parser error code otherwise 12337 */ 12338 12339 int 12340 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12341 const xmlChar *ID, xmlNodePtr *lst) { 12342 xmlParserCtxtPtr ctxt; 12343 xmlDocPtr newDoc; 12344 xmlNodePtr newRoot; 12345 xmlSAXHandlerPtr oldsax = NULL; 12346 int ret = 0; 12347 xmlChar start[4]; 12348 xmlCharEncoding enc; 12349 12350 if (ctx == NULL) return(-1); 12351 12352 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12353 (ctx->depth > 1024)) { 12354 return(XML_ERR_ENTITY_LOOP); 12355 } 12356 12357 if (lst != NULL) 12358 *lst = NULL; 12359 if ((URL == NULL) && (ID == NULL)) 12360 return(-1); 12361 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12362 return(-1); 12363 12364 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12365 if (ctxt == NULL) { 12366 return(-1); 12367 } 12368 12369 oldsax = ctxt->sax; 12370 ctxt->sax = ctx->sax; 12371 xmlDetectSAX2(ctxt); 12372 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12373 if (newDoc == NULL) { 12374 xmlFreeParserCtxt(ctxt); 12375 return(-1); 12376 } 12377 newDoc->properties = XML_DOC_INTERNAL; 12378 if (ctx->myDoc->dict) { 12379 newDoc->dict = ctx->myDoc->dict; 12380 xmlDictReference(newDoc->dict); 12381 } 12382 if (ctx->myDoc != NULL) { 12383 newDoc->intSubset = ctx->myDoc->intSubset; 12384 newDoc->extSubset = ctx->myDoc->extSubset; 12385 } 12386 if (ctx->myDoc->URL != NULL) { 12387 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12388 } 12389 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12390 if (newRoot == NULL) { 12391 ctxt->sax = oldsax; 12392 xmlFreeParserCtxt(ctxt); 12393 newDoc->intSubset = NULL; 12394 newDoc->extSubset = NULL; 12395 xmlFreeDoc(newDoc); 12396 return(-1); 12397 } 12398 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12399 nodePush(ctxt, newDoc->children); 12400 if (ctx->myDoc == NULL) { 12401 ctxt->myDoc = newDoc; 12402 } else { 12403 ctxt->myDoc = ctx->myDoc; 12404 newDoc->children->doc = ctx->myDoc; 12405 } 12406 12407 /* 12408 * Get the 4 first bytes and decode the charset 12409 * if enc != XML_CHAR_ENCODING_NONE 12410 * plug some encoding conversion routines. 12411 */ 12412 GROW 12413 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12414 start[0] = RAW; 12415 start[1] = NXT(1); 12416 start[2] = NXT(2); 12417 start[3] = NXT(3); 12418 enc = xmlDetectCharEncoding(start, 4); 12419 if (enc != XML_CHAR_ENCODING_NONE) { 12420 xmlSwitchEncoding(ctxt, enc); 12421 } 12422 } 12423 12424 /* 12425 * Parse a possible text declaration first 12426 */ 12427 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12428 xmlParseTextDecl(ctxt); 12429 /* 12430 * An XML-1.0 document can't reference an entity not XML-1.0 12431 */ 12432 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12433 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12434 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12435 "Version mismatch between document and entity\n"); 12436 } 12437 } 12438 12439 /* 12440 * Doing validity checking on chunk doesn't make sense 12441 */ 12442 ctxt->instate = XML_PARSER_CONTENT; 12443 ctxt->validate = ctx->validate; 12444 ctxt->valid = ctx->valid; 12445 ctxt->loadsubset = ctx->loadsubset; 12446 ctxt->depth = ctx->depth + 1; 12447 ctxt->replaceEntities = ctx->replaceEntities; 12448 if (ctxt->validate) { 12449 ctxt->vctxt.error = ctx->vctxt.error; 12450 ctxt->vctxt.warning = ctx->vctxt.warning; 12451 } else { 12452 ctxt->vctxt.error = NULL; 12453 ctxt->vctxt.warning = NULL; 12454 } 12455 ctxt->vctxt.nodeTab = NULL; 12456 ctxt->vctxt.nodeNr = 0; 12457 ctxt->vctxt.nodeMax = 0; 12458 ctxt->vctxt.node = NULL; 12459 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12460 ctxt->dict = ctx->dict; 12461 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12462 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12463 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12464 ctxt->dictNames = ctx->dictNames; 12465 ctxt->attsDefault = ctx->attsDefault; 12466 ctxt->attsSpecial = ctx->attsSpecial; 12467 ctxt->linenumbers = ctx->linenumbers; 12468 12469 xmlParseContent(ctxt); 12470 12471 ctx->validate = ctxt->validate; 12472 ctx->valid = ctxt->valid; 12473 if ((RAW == '<') && (NXT(1) == '/')) { 12474 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12475 } else if (RAW != 0) { 12476 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12477 } 12478 if (ctxt->node != newDoc->children) { 12479 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12480 } 12481 12482 if (!ctxt->wellFormed) { 12483 if (ctxt->errNo == 0) 12484 ret = 1; 12485 else 12486 ret = ctxt->errNo; 12487 } else { 12488 if (lst != NULL) { 12489 xmlNodePtr cur; 12490 12491 /* 12492 * Return the newly created nodeset after unlinking it from 12493 * they pseudo parent. 12494 */ 12495 cur = newDoc->children->children; 12496 *lst = cur; 12497 while (cur != NULL) { 12498 cur->parent = NULL; 12499 cur = cur->next; 12500 } 12501 newDoc->children->children = NULL; 12502 } 12503 ret = 0; 12504 } 12505 ctxt->sax = oldsax; 12506 ctxt->dict = NULL; 12507 ctxt->attsDefault = NULL; 12508 ctxt->attsSpecial = NULL; 12509 xmlFreeParserCtxt(ctxt); 12510 newDoc->intSubset = NULL; 12511 newDoc->extSubset = NULL; 12512 xmlFreeDoc(newDoc); 12513 12514 return(ret); 12515 } 12516 12517 /** 12518 * xmlParseExternalEntityPrivate: 12519 * @doc: the document the chunk pertains to 12520 * @oldctxt: the previous parser context if available 12521 * @sax: the SAX handler bloc (possibly NULL) 12522 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12523 * @depth: Used for loop detection, use 0 12524 * @URL: the URL for the entity to load 12525 * @ID: the System ID for the entity to load 12526 * @list: the return value for the set of parsed nodes 12527 * 12528 * Private version of xmlParseExternalEntity() 12529 * 12530 * Returns 0 if the entity is well formed, -1 in case of args problem and 12531 * the parser error code otherwise 12532 */ 12533 12534 static xmlParserErrors 12535 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12536 xmlSAXHandlerPtr sax, 12537 void *user_data, int depth, const xmlChar *URL, 12538 const xmlChar *ID, xmlNodePtr *list) { 12539 xmlParserCtxtPtr ctxt; 12540 xmlDocPtr newDoc; 12541 xmlNodePtr newRoot; 12542 xmlSAXHandlerPtr oldsax = NULL; 12543 xmlParserErrors ret = XML_ERR_OK; 12544 xmlChar start[4]; 12545 xmlCharEncoding enc; 12546 12547 if (((depth > 40) && 12548 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12549 (depth > 1024)) { 12550 return(XML_ERR_ENTITY_LOOP); 12551 } 12552 12553 if (list != NULL) 12554 *list = NULL; 12555 if ((URL == NULL) && (ID == NULL)) 12556 return(XML_ERR_INTERNAL_ERROR); 12557 if (doc == NULL) 12558 return(XML_ERR_INTERNAL_ERROR); 12559 12560 12561 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12562 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12563 ctxt->userData = ctxt; 12564 if (oldctxt != NULL) { 12565 ctxt->_private = oldctxt->_private; 12566 ctxt->loadsubset = oldctxt->loadsubset; 12567 ctxt->validate = oldctxt->validate; 12568 ctxt->external = oldctxt->external; 12569 ctxt->record_info = oldctxt->record_info; 12570 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12571 ctxt->node_seq.length = oldctxt->node_seq.length; 12572 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12573 } else { 12574 /* 12575 * Doing validity checking on chunk without context 12576 * doesn't make sense 12577 */ 12578 ctxt->_private = NULL; 12579 ctxt->validate = 0; 12580 ctxt->external = 2; 12581 ctxt->loadsubset = 0; 12582 } 12583 if (sax != NULL) { 12584 oldsax = ctxt->sax; 12585 ctxt->sax = sax; 12586 if (user_data != NULL) 12587 ctxt->userData = user_data; 12588 } 12589 xmlDetectSAX2(ctxt); 12590 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12591 if (newDoc == NULL) { 12592 ctxt->node_seq.maximum = 0; 12593 ctxt->node_seq.length = 0; 12594 ctxt->node_seq.buffer = NULL; 12595 xmlFreeParserCtxt(ctxt); 12596 return(XML_ERR_INTERNAL_ERROR); 12597 } 12598 newDoc->properties = XML_DOC_INTERNAL; 12599 newDoc->intSubset = doc->intSubset; 12600 newDoc->extSubset = doc->extSubset; 12601 newDoc->dict = doc->dict; 12602 xmlDictReference(newDoc->dict); 12603 12604 if (doc->URL != NULL) { 12605 newDoc->URL = xmlStrdup(doc->URL); 12606 } 12607 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12608 if (newRoot == NULL) { 12609 if (sax != NULL) 12610 ctxt->sax = oldsax; 12611 ctxt->node_seq.maximum = 0; 12612 ctxt->node_seq.length = 0; 12613 ctxt->node_seq.buffer = NULL; 12614 xmlFreeParserCtxt(ctxt); 12615 newDoc->intSubset = NULL; 12616 newDoc->extSubset = NULL; 12617 xmlFreeDoc(newDoc); 12618 return(XML_ERR_INTERNAL_ERROR); 12619 } 12620 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12621 nodePush(ctxt, newDoc->children); 12622 ctxt->myDoc = doc; 12623 newRoot->doc = doc; 12624 12625 /* 12626 * Get the 4 first bytes and decode the charset 12627 * if enc != XML_CHAR_ENCODING_NONE 12628 * plug some encoding conversion routines. 12629 */ 12630 GROW; 12631 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12632 start[0] = RAW; 12633 start[1] = NXT(1); 12634 start[2] = NXT(2); 12635 start[3] = NXT(3); 12636 enc = xmlDetectCharEncoding(start, 4); 12637 if (enc != XML_CHAR_ENCODING_NONE) { 12638 xmlSwitchEncoding(ctxt, enc); 12639 } 12640 } 12641 12642 /* 12643 * Parse a possible text declaration first 12644 */ 12645 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12646 xmlParseTextDecl(ctxt); 12647 } 12648 12649 ctxt->instate = XML_PARSER_CONTENT; 12650 ctxt->depth = depth; 12651 12652 xmlParseContent(ctxt); 12653 12654 if ((RAW == '<') && (NXT(1) == '/')) { 12655 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12656 } else if (RAW != 0) { 12657 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12658 } 12659 if (ctxt->node != newDoc->children) { 12660 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12661 } 12662 12663 if (!ctxt->wellFormed) { 12664 if (ctxt->errNo == 0) 12665 ret = XML_ERR_INTERNAL_ERROR; 12666 else 12667 ret = (xmlParserErrors)ctxt->errNo; 12668 } else { 12669 if (list != NULL) { 12670 xmlNodePtr cur; 12671 12672 /* 12673 * Return the newly created nodeset after unlinking it from 12674 * they pseudo parent. 12675 */ 12676 cur = newDoc->children->children; 12677 *list = cur; 12678 while (cur != NULL) { 12679 cur->parent = NULL; 12680 cur = cur->next; 12681 } 12682 newDoc->children->children = NULL; 12683 } 12684 ret = XML_ERR_OK; 12685 } 12686 12687 /* 12688 * Record in the parent context the number of entities replacement 12689 * done when parsing that reference. 12690 */ 12691 if (oldctxt != NULL) 12692 oldctxt->nbentities += ctxt->nbentities; 12693 12694 /* 12695 * Also record the size of the entity parsed 12696 */ 12697 if (ctxt->input != NULL) { 12698 oldctxt->sizeentities += ctxt->input->consumed; 12699 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12700 } 12701 /* 12702 * And record the last error if any 12703 */ 12704 if (ctxt->lastError.code != XML_ERR_OK) 12705 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12706 12707 if (sax != NULL) 12708 ctxt->sax = oldsax; 12709 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12710 oldctxt->node_seq.length = ctxt->node_seq.length; 12711 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12712 ctxt->node_seq.maximum = 0; 12713 ctxt->node_seq.length = 0; 12714 ctxt->node_seq.buffer = NULL; 12715 xmlFreeParserCtxt(ctxt); 12716 newDoc->intSubset = NULL; 12717 newDoc->extSubset = NULL; 12718 xmlFreeDoc(newDoc); 12719 12720 return(ret); 12721 } 12722 12723 #ifdef LIBXML_SAX1_ENABLED 12724 /** 12725 * xmlParseExternalEntity: 12726 * @doc: the document the chunk pertains to 12727 * @sax: the SAX handler bloc (possibly NULL) 12728 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12729 * @depth: Used for loop detection, use 0 12730 * @URL: the URL for the entity to load 12731 * @ID: the System ID for the entity to load 12732 * @lst: the return value for the set of parsed nodes 12733 * 12734 * Parse an external general entity 12735 * An external general parsed entity is well-formed if it matches the 12736 * production labeled extParsedEnt. 12737 * 12738 * [78] extParsedEnt ::= TextDecl? content 12739 * 12740 * Returns 0 if the entity is well formed, -1 in case of args problem and 12741 * the parser error code otherwise 12742 */ 12743 12744 int 12745 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12746 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12747 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12748 ID, lst)); 12749 } 12750 12751 /** 12752 * xmlParseBalancedChunkMemory: 12753 * @doc: the document the chunk pertains to 12754 * @sax: the SAX handler bloc (possibly NULL) 12755 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12756 * @depth: Used for loop detection, use 0 12757 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12758 * @lst: the return value for the set of parsed nodes 12759 * 12760 * Parse a well-balanced chunk of an XML document 12761 * called by the parser 12762 * The allowed sequence for the Well Balanced Chunk is the one defined by 12763 * the content production in the XML grammar: 12764 * 12765 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12766 * 12767 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12768 * the parser error code otherwise 12769 */ 12770 12771 int 12772 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12773 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12774 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12775 depth, string, lst, 0 ); 12776 } 12777 #endif /* LIBXML_SAX1_ENABLED */ 12778 12779 /** 12780 * xmlParseBalancedChunkMemoryInternal: 12781 * @oldctxt: the existing parsing context 12782 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12783 * @user_data: the user data field for the parser context 12784 * @lst: the return value for the set of parsed nodes 12785 * 12786 * 12787 * Parse a well-balanced chunk of an XML document 12788 * called by the parser 12789 * The allowed sequence for the Well Balanced Chunk is the one defined by 12790 * the content production in the XML grammar: 12791 * 12792 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12793 * 12794 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12795 * error code otherwise 12796 * 12797 * In case recover is set to 1, the nodelist will not be empty even if 12798 * the parsed chunk is not well balanced. 12799 */ 12800 static xmlParserErrors 12801 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12802 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12803 xmlParserCtxtPtr ctxt; 12804 xmlDocPtr newDoc = NULL; 12805 xmlNodePtr newRoot; 12806 xmlSAXHandlerPtr oldsax = NULL; 12807 xmlNodePtr content = NULL; 12808 xmlNodePtr last = NULL; 12809 int size; 12810 xmlParserErrors ret = XML_ERR_OK; 12811 #ifdef SAX2 12812 int i; 12813 #endif 12814 12815 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12816 (oldctxt->depth > 1024)) { 12817 return(XML_ERR_ENTITY_LOOP); 12818 } 12819 12820 12821 if (lst != NULL) 12822 *lst = NULL; 12823 if (string == NULL) 12824 return(XML_ERR_INTERNAL_ERROR); 12825 12826 size = xmlStrlen(string); 12827 12828 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12829 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12830 if (user_data != NULL) 12831 ctxt->userData = user_data; 12832 else 12833 ctxt->userData = ctxt; 12834 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12835 ctxt->dict = oldctxt->dict; 12836 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12837 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12838 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12839 12840 #ifdef SAX2 12841 /* propagate namespaces down the entity */ 12842 for (i = 0;i < oldctxt->nsNr;i += 2) { 12843 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 12844 } 12845 #endif 12846 12847 oldsax = ctxt->sax; 12848 ctxt->sax = oldctxt->sax; 12849 xmlDetectSAX2(ctxt); 12850 ctxt->replaceEntities = oldctxt->replaceEntities; 12851 ctxt->options = oldctxt->options; 12852 12853 ctxt->_private = oldctxt->_private; 12854 if (oldctxt->myDoc == NULL) { 12855 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12856 if (newDoc == NULL) { 12857 ctxt->sax = oldsax; 12858 ctxt->dict = NULL; 12859 xmlFreeParserCtxt(ctxt); 12860 return(XML_ERR_INTERNAL_ERROR); 12861 } 12862 newDoc->properties = XML_DOC_INTERNAL; 12863 newDoc->dict = ctxt->dict; 12864 xmlDictReference(newDoc->dict); 12865 ctxt->myDoc = newDoc; 12866 } else { 12867 ctxt->myDoc = oldctxt->myDoc; 12868 content = ctxt->myDoc->children; 12869 last = ctxt->myDoc->last; 12870 } 12871 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12872 if (newRoot == NULL) { 12873 ctxt->sax = oldsax; 12874 ctxt->dict = NULL; 12875 xmlFreeParserCtxt(ctxt); 12876 if (newDoc != NULL) { 12877 xmlFreeDoc(newDoc); 12878 } 12879 return(XML_ERR_INTERNAL_ERROR); 12880 } 12881 ctxt->myDoc->children = NULL; 12882 ctxt->myDoc->last = NULL; 12883 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12884 nodePush(ctxt, ctxt->myDoc->children); 12885 ctxt->instate = XML_PARSER_CONTENT; 12886 ctxt->depth = oldctxt->depth + 1; 12887 12888 ctxt->validate = 0; 12889 ctxt->loadsubset = oldctxt->loadsubset; 12890 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12891 /* 12892 * ID/IDREF registration will be done in xmlValidateElement below 12893 */ 12894 ctxt->loadsubset |= XML_SKIP_IDS; 12895 } 12896 ctxt->dictNames = oldctxt->dictNames; 12897 ctxt->attsDefault = oldctxt->attsDefault; 12898 ctxt->attsSpecial = oldctxt->attsSpecial; 12899 12900 xmlParseContent(ctxt); 12901 if ((RAW == '<') && (NXT(1) == '/')) { 12902 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12903 } else if (RAW != 0) { 12904 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12905 } 12906 if (ctxt->node != ctxt->myDoc->children) { 12907 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12908 } 12909 12910 if (!ctxt->wellFormed) { 12911 if (ctxt->errNo == 0) 12912 ret = XML_ERR_INTERNAL_ERROR; 12913 else 12914 ret = (xmlParserErrors)ctxt->errNo; 12915 } else { 12916 ret = XML_ERR_OK; 12917 } 12918 12919 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12920 xmlNodePtr cur; 12921 12922 /* 12923 * Return the newly created nodeset after unlinking it from 12924 * they pseudo parent. 12925 */ 12926 cur = ctxt->myDoc->children->children; 12927 *lst = cur; 12928 while (cur != NULL) { 12929 #ifdef LIBXML_VALID_ENABLED 12930 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12931 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12932 (cur->type == XML_ELEMENT_NODE)) { 12933 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12934 oldctxt->myDoc, cur); 12935 } 12936 #endif /* LIBXML_VALID_ENABLED */ 12937 cur->parent = NULL; 12938 cur = cur->next; 12939 } 12940 ctxt->myDoc->children->children = NULL; 12941 } 12942 if (ctxt->myDoc != NULL) { 12943 xmlFreeNode(ctxt->myDoc->children); 12944 ctxt->myDoc->children = content; 12945 ctxt->myDoc->last = last; 12946 } 12947 12948 /* 12949 * Record in the parent context the number of entities replacement 12950 * done when parsing that reference. 12951 */ 12952 if (oldctxt != NULL) 12953 oldctxt->nbentities += ctxt->nbentities; 12954 12955 /* 12956 * Also record the last error if any 12957 */ 12958 if (ctxt->lastError.code != XML_ERR_OK) 12959 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12960 12961 ctxt->sax = oldsax; 12962 ctxt->dict = NULL; 12963 ctxt->attsDefault = NULL; 12964 ctxt->attsSpecial = NULL; 12965 xmlFreeParserCtxt(ctxt); 12966 if (newDoc != NULL) { 12967 xmlFreeDoc(newDoc); 12968 } 12969 12970 return(ret); 12971 } 12972 12973 /** 12974 * xmlParseInNodeContext: 12975 * @node: the context node 12976 * @data: the input string 12977 * @datalen: the input string length in bytes 12978 * @options: a combination of xmlParserOption 12979 * @lst: the return value for the set of parsed nodes 12980 * 12981 * Parse a well-balanced chunk of an XML document 12982 * within the context (DTD, namespaces, etc ...) of the given node. 12983 * 12984 * The allowed sequence for the data is a Well Balanced Chunk defined by 12985 * the content production in the XML grammar: 12986 * 12987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12988 * 12989 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12990 * error code otherwise 12991 */ 12992 xmlParserErrors 12993 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12994 int options, xmlNodePtr *lst) { 12995 #ifdef SAX2 12996 xmlParserCtxtPtr ctxt; 12997 xmlDocPtr doc = NULL; 12998 xmlNodePtr fake, cur; 12999 int nsnr = 0; 13000 13001 xmlParserErrors ret = XML_ERR_OK; 13002 13003 /* 13004 * check all input parameters, grab the document 13005 */ 13006 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13007 return(XML_ERR_INTERNAL_ERROR); 13008 switch (node->type) { 13009 case XML_ELEMENT_NODE: 13010 case XML_ATTRIBUTE_NODE: 13011 case XML_TEXT_NODE: 13012 case XML_CDATA_SECTION_NODE: 13013 case XML_ENTITY_REF_NODE: 13014 case XML_PI_NODE: 13015 case XML_COMMENT_NODE: 13016 case XML_DOCUMENT_NODE: 13017 case XML_HTML_DOCUMENT_NODE: 13018 break; 13019 default: 13020 return(XML_ERR_INTERNAL_ERROR); 13021 13022 } 13023 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13024 (node->type != XML_DOCUMENT_NODE) && 13025 (node->type != XML_HTML_DOCUMENT_NODE)) 13026 node = node->parent; 13027 if (node == NULL) 13028 return(XML_ERR_INTERNAL_ERROR); 13029 if (node->type == XML_ELEMENT_NODE) 13030 doc = node->doc; 13031 else 13032 doc = (xmlDocPtr) node; 13033 if (doc == NULL) 13034 return(XML_ERR_INTERNAL_ERROR); 13035 13036 /* 13037 * allocate a context and set-up everything not related to the 13038 * node position in the tree 13039 */ 13040 if (doc->type == XML_DOCUMENT_NODE) 13041 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13042 #ifdef LIBXML_HTML_ENABLED 13043 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13044 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13045 /* 13046 * When parsing in context, it makes no sense to add implied 13047 * elements like html/body/etc... 13048 */ 13049 options |= HTML_PARSE_NOIMPLIED; 13050 } 13051 #endif 13052 else 13053 return(XML_ERR_INTERNAL_ERROR); 13054 13055 if (ctxt == NULL) 13056 return(XML_ERR_NO_MEMORY); 13057 13058 /* 13059 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13060 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13061 * we must wait until the last moment to free the original one. 13062 */ 13063 if (doc->dict != NULL) { 13064 if (ctxt->dict != NULL) 13065 xmlDictFree(ctxt->dict); 13066 ctxt->dict = doc->dict; 13067 } else 13068 options |= XML_PARSE_NODICT; 13069 13070 if (doc->encoding != NULL) { 13071 xmlCharEncodingHandlerPtr hdlr; 13072 13073 if (ctxt->encoding != NULL) 13074 xmlFree((xmlChar *) ctxt->encoding); 13075 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13076 13077 hdlr = xmlFindCharEncodingHandler(doc->encoding); 13078 if (hdlr != NULL) { 13079 xmlSwitchToEncoding(ctxt, hdlr); 13080 } else { 13081 return(XML_ERR_UNSUPPORTED_ENCODING); 13082 } 13083 } 13084 13085 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13086 xmlDetectSAX2(ctxt); 13087 ctxt->myDoc = doc; 13088 13089 fake = xmlNewComment(NULL); 13090 if (fake == NULL) { 13091 xmlFreeParserCtxt(ctxt); 13092 return(XML_ERR_NO_MEMORY); 13093 } 13094 xmlAddChild(node, fake); 13095 13096 if (node->type == XML_ELEMENT_NODE) { 13097 nodePush(ctxt, node); 13098 /* 13099 * initialize the SAX2 namespaces stack 13100 */ 13101 cur = node; 13102 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13103 xmlNsPtr ns = cur->nsDef; 13104 const xmlChar *iprefix, *ihref; 13105 13106 while (ns != NULL) { 13107 if (ctxt->dict) { 13108 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13109 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13110 } else { 13111 iprefix = ns->prefix; 13112 ihref = ns->href; 13113 } 13114 13115 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13116 nsPush(ctxt, iprefix, ihref); 13117 nsnr++; 13118 } 13119 ns = ns->next; 13120 } 13121 cur = cur->parent; 13122 } 13123 ctxt->instate = XML_PARSER_CONTENT; 13124 } 13125 13126 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13127 /* 13128 * ID/IDREF registration will be done in xmlValidateElement below 13129 */ 13130 ctxt->loadsubset |= XML_SKIP_IDS; 13131 } 13132 13133 #ifdef LIBXML_HTML_ENABLED 13134 if (doc->type == XML_HTML_DOCUMENT_NODE) 13135 __htmlParseContent(ctxt); 13136 else 13137 #endif 13138 xmlParseContent(ctxt); 13139 13140 nsPop(ctxt, nsnr); 13141 if ((RAW == '<') && (NXT(1) == '/')) { 13142 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13143 } else if (RAW != 0) { 13144 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13145 } 13146 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13147 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13148 ctxt->wellFormed = 0; 13149 } 13150 13151 if (!ctxt->wellFormed) { 13152 if (ctxt->errNo == 0) 13153 ret = XML_ERR_INTERNAL_ERROR; 13154 else 13155 ret = (xmlParserErrors)ctxt->errNo; 13156 } else { 13157 ret = XML_ERR_OK; 13158 } 13159 13160 /* 13161 * Return the newly created nodeset after unlinking it from 13162 * the pseudo sibling. 13163 */ 13164 13165 cur = fake->next; 13166 fake->next = NULL; 13167 node->last = fake; 13168 13169 if (cur != NULL) { 13170 cur->prev = NULL; 13171 } 13172 13173 *lst = cur; 13174 13175 while (cur != NULL) { 13176 cur->parent = NULL; 13177 cur = cur->next; 13178 } 13179 13180 xmlUnlinkNode(fake); 13181 xmlFreeNode(fake); 13182 13183 13184 if (ret != XML_ERR_OK) { 13185 xmlFreeNodeList(*lst); 13186 *lst = NULL; 13187 } 13188 13189 if (doc->dict != NULL) 13190 ctxt->dict = NULL; 13191 xmlFreeParserCtxt(ctxt); 13192 13193 return(ret); 13194 #else /* !SAX2 */ 13195 return(XML_ERR_INTERNAL_ERROR); 13196 #endif 13197 } 13198 13199 #ifdef LIBXML_SAX1_ENABLED 13200 /** 13201 * xmlParseBalancedChunkMemoryRecover: 13202 * @doc: the document the chunk pertains to 13203 * @sax: the SAX handler bloc (possibly NULL) 13204 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13205 * @depth: Used for loop detection, use 0 13206 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13207 * @lst: the return value for the set of parsed nodes 13208 * @recover: return nodes even if the data is broken (use 0) 13209 * 13210 * 13211 * Parse a well-balanced chunk of an XML document 13212 * called by the parser 13213 * The allowed sequence for the Well Balanced Chunk is the one defined by 13214 * the content production in the XML grammar: 13215 * 13216 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13217 * 13218 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13219 * the parser error code otherwise 13220 * 13221 * In case recover is set to 1, the nodelist will not be empty even if 13222 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13223 * some extent. 13224 */ 13225 int 13226 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13227 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13228 int recover) { 13229 xmlParserCtxtPtr ctxt; 13230 xmlDocPtr newDoc; 13231 xmlSAXHandlerPtr oldsax = NULL; 13232 xmlNodePtr content, newRoot; 13233 int size; 13234 int ret = 0; 13235 13236 if (depth > 40) { 13237 return(XML_ERR_ENTITY_LOOP); 13238 } 13239 13240 13241 if (lst != NULL) 13242 *lst = NULL; 13243 if (string == NULL) 13244 return(-1); 13245 13246 size = xmlStrlen(string); 13247 13248 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13249 if (ctxt == NULL) return(-1); 13250 ctxt->userData = ctxt; 13251 if (sax != NULL) { 13252 oldsax = ctxt->sax; 13253 ctxt->sax = sax; 13254 if (user_data != NULL) 13255 ctxt->userData = user_data; 13256 } 13257 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13258 if (newDoc == NULL) { 13259 xmlFreeParserCtxt(ctxt); 13260 return(-1); 13261 } 13262 newDoc->properties = XML_DOC_INTERNAL; 13263 if ((doc != NULL) && (doc->dict != NULL)) { 13264 xmlDictFree(ctxt->dict); 13265 ctxt->dict = doc->dict; 13266 xmlDictReference(ctxt->dict); 13267 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13268 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13269 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13270 ctxt->dictNames = 1; 13271 } else { 13272 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13273 } 13274 if (doc != NULL) { 13275 newDoc->intSubset = doc->intSubset; 13276 newDoc->extSubset = doc->extSubset; 13277 } 13278 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13279 if (newRoot == NULL) { 13280 if (sax != NULL) 13281 ctxt->sax = oldsax; 13282 xmlFreeParserCtxt(ctxt); 13283 newDoc->intSubset = NULL; 13284 newDoc->extSubset = NULL; 13285 xmlFreeDoc(newDoc); 13286 return(-1); 13287 } 13288 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13289 nodePush(ctxt, newRoot); 13290 if (doc == NULL) { 13291 ctxt->myDoc = newDoc; 13292 } else { 13293 ctxt->myDoc = newDoc; 13294 newDoc->children->doc = doc; 13295 /* Ensure that doc has XML spec namespace */ 13296 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13297 newDoc->oldNs = doc->oldNs; 13298 } 13299 ctxt->instate = XML_PARSER_CONTENT; 13300 ctxt->depth = depth; 13301 13302 /* 13303 * Doing validity checking on chunk doesn't make sense 13304 */ 13305 ctxt->validate = 0; 13306 ctxt->loadsubset = 0; 13307 xmlDetectSAX2(ctxt); 13308 13309 if ( doc != NULL ){ 13310 content = doc->children; 13311 doc->children = NULL; 13312 xmlParseContent(ctxt); 13313 doc->children = content; 13314 } 13315 else { 13316 xmlParseContent(ctxt); 13317 } 13318 if ((RAW == '<') && (NXT(1) == '/')) { 13319 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13320 } else if (RAW != 0) { 13321 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13322 } 13323 if (ctxt->node != newDoc->children) { 13324 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13325 } 13326 13327 if (!ctxt->wellFormed) { 13328 if (ctxt->errNo == 0) 13329 ret = 1; 13330 else 13331 ret = ctxt->errNo; 13332 } else { 13333 ret = 0; 13334 } 13335 13336 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13337 xmlNodePtr cur; 13338 13339 /* 13340 * Return the newly created nodeset after unlinking it from 13341 * they pseudo parent. 13342 */ 13343 cur = newDoc->children->children; 13344 *lst = cur; 13345 while (cur != NULL) { 13346 xmlSetTreeDoc(cur, doc); 13347 cur->parent = NULL; 13348 cur = cur->next; 13349 } 13350 newDoc->children->children = NULL; 13351 } 13352 13353 if (sax != NULL) 13354 ctxt->sax = oldsax; 13355 xmlFreeParserCtxt(ctxt); 13356 newDoc->intSubset = NULL; 13357 newDoc->extSubset = NULL; 13358 newDoc->oldNs = NULL; 13359 xmlFreeDoc(newDoc); 13360 13361 return(ret); 13362 } 13363 13364 /** 13365 * xmlSAXParseEntity: 13366 * @sax: the SAX handler block 13367 * @filename: the filename 13368 * 13369 * parse an XML external entity out of context and build a tree. 13370 * It use the given SAX function block to handle the parsing callback. 13371 * If sax is NULL, fallback to the default DOM tree building routines. 13372 * 13373 * [78] extParsedEnt ::= TextDecl? content 13374 * 13375 * This correspond to a "Well Balanced" chunk 13376 * 13377 * Returns the resulting document tree 13378 */ 13379 13380 xmlDocPtr 13381 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13382 xmlDocPtr ret; 13383 xmlParserCtxtPtr ctxt; 13384 13385 ctxt = xmlCreateFileParserCtxt(filename); 13386 if (ctxt == NULL) { 13387 return(NULL); 13388 } 13389 if (sax != NULL) { 13390 if (ctxt->sax != NULL) 13391 xmlFree(ctxt->sax); 13392 ctxt->sax = sax; 13393 ctxt->userData = NULL; 13394 } 13395 13396 xmlParseExtParsedEnt(ctxt); 13397 13398 if (ctxt->wellFormed) 13399 ret = ctxt->myDoc; 13400 else { 13401 ret = NULL; 13402 xmlFreeDoc(ctxt->myDoc); 13403 ctxt->myDoc = NULL; 13404 } 13405 if (sax != NULL) 13406 ctxt->sax = NULL; 13407 xmlFreeParserCtxt(ctxt); 13408 13409 return(ret); 13410 } 13411 13412 /** 13413 * xmlParseEntity: 13414 * @filename: the filename 13415 * 13416 * parse an XML external entity out of context and build a tree. 13417 * 13418 * [78] extParsedEnt ::= TextDecl? content 13419 * 13420 * This correspond to a "Well Balanced" chunk 13421 * 13422 * Returns the resulting document tree 13423 */ 13424 13425 xmlDocPtr 13426 xmlParseEntity(const char *filename) { 13427 return(xmlSAXParseEntity(NULL, filename)); 13428 } 13429 #endif /* LIBXML_SAX1_ENABLED */ 13430 13431 /** 13432 * xmlCreateEntityParserCtxtInternal: 13433 * @URL: the entity URL 13434 * @ID: the entity PUBLIC ID 13435 * @base: a possible base for the target URI 13436 * @pctx: parser context used to set options on new context 13437 * 13438 * Create a parser context for an external entity 13439 * Automatic support for ZLIB/Compress compressed document is provided 13440 * by default if found at compile-time. 13441 * 13442 * Returns the new parser context or NULL 13443 */ 13444 static xmlParserCtxtPtr 13445 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13446 const xmlChar *base, xmlParserCtxtPtr pctx) { 13447 xmlParserCtxtPtr ctxt; 13448 xmlParserInputPtr inputStream; 13449 char *directory = NULL; 13450 xmlChar *uri; 13451 13452 ctxt = xmlNewParserCtxt(); 13453 if (ctxt == NULL) { 13454 return(NULL); 13455 } 13456 13457 if (pctx != NULL) { 13458 ctxt->options = pctx->options; 13459 ctxt->_private = pctx->_private; 13460 } 13461 13462 uri = xmlBuildURI(URL, base); 13463 13464 if (uri == NULL) { 13465 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13466 if (inputStream == NULL) { 13467 xmlFreeParserCtxt(ctxt); 13468 return(NULL); 13469 } 13470 13471 inputPush(ctxt, inputStream); 13472 13473 if ((ctxt->directory == NULL) && (directory == NULL)) 13474 directory = xmlParserGetDirectory((char *)URL); 13475 if ((ctxt->directory == NULL) && (directory != NULL)) 13476 ctxt->directory = directory; 13477 } else { 13478 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13479 if (inputStream == NULL) { 13480 xmlFree(uri); 13481 xmlFreeParserCtxt(ctxt); 13482 return(NULL); 13483 } 13484 13485 inputPush(ctxt, inputStream); 13486 13487 if ((ctxt->directory == NULL) && (directory == NULL)) 13488 directory = xmlParserGetDirectory((char *)uri); 13489 if ((ctxt->directory == NULL) && (directory != NULL)) 13490 ctxt->directory = directory; 13491 xmlFree(uri); 13492 } 13493 return(ctxt); 13494 } 13495 13496 /** 13497 * xmlCreateEntityParserCtxt: 13498 * @URL: the entity URL 13499 * @ID: the entity PUBLIC ID 13500 * @base: a possible base for the target URI 13501 * 13502 * Create a parser context for an external entity 13503 * Automatic support for ZLIB/Compress compressed document is provided 13504 * by default if found at compile-time. 13505 * 13506 * Returns the new parser context or NULL 13507 */ 13508 xmlParserCtxtPtr 13509 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13510 const xmlChar *base) { 13511 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13512 13513 } 13514 13515 /************************************************************************ 13516 * * 13517 * Front ends when parsing from a file * 13518 * * 13519 ************************************************************************/ 13520 13521 /** 13522 * xmlCreateURLParserCtxt: 13523 * @filename: the filename or URL 13524 * @options: a combination of xmlParserOption 13525 * 13526 * Create a parser context for a file or URL content. 13527 * Automatic support for ZLIB/Compress compressed document is provided 13528 * by default if found at compile-time and for file accesses 13529 * 13530 * Returns the new parser context or NULL 13531 */ 13532 xmlParserCtxtPtr 13533 xmlCreateURLParserCtxt(const char *filename, int options) 13534 { 13535 xmlParserCtxtPtr ctxt; 13536 xmlParserInputPtr inputStream; 13537 char *directory = NULL; 13538 13539 ctxt = xmlNewParserCtxt(); 13540 if (ctxt == NULL) { 13541 xmlErrMemory(NULL, "cannot allocate parser context"); 13542 return(NULL); 13543 } 13544 13545 if (options) 13546 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13547 ctxt->linenumbers = 1; 13548 13549 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13550 if (inputStream == NULL) { 13551 xmlFreeParserCtxt(ctxt); 13552 return(NULL); 13553 } 13554 13555 inputPush(ctxt, inputStream); 13556 if ((ctxt->directory == NULL) && (directory == NULL)) 13557 directory = xmlParserGetDirectory(filename); 13558 if ((ctxt->directory == NULL) && (directory != NULL)) 13559 ctxt->directory = directory; 13560 13561 return(ctxt); 13562 } 13563 13564 /** 13565 * xmlCreateFileParserCtxt: 13566 * @filename: the filename 13567 * 13568 * Create a parser context for a file content. 13569 * Automatic support for ZLIB/Compress compressed document is provided 13570 * by default if found at compile-time. 13571 * 13572 * Returns the new parser context or NULL 13573 */ 13574 xmlParserCtxtPtr 13575 xmlCreateFileParserCtxt(const char *filename) 13576 { 13577 return(xmlCreateURLParserCtxt(filename, 0)); 13578 } 13579 13580 #ifdef LIBXML_SAX1_ENABLED 13581 /** 13582 * xmlSAXParseFileWithData: 13583 * @sax: the SAX handler block 13584 * @filename: the filename 13585 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13586 * documents 13587 * @data: the userdata 13588 * 13589 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13590 * compressed document is provided by default if found at compile-time. 13591 * It use the given SAX function block to handle the parsing callback. 13592 * If sax is NULL, fallback to the default DOM tree building routines. 13593 * 13594 * User data (void *) is stored within the parser context in the 13595 * context's _private member, so it is available nearly everywhere in libxml 13596 * 13597 * Returns the resulting document tree 13598 */ 13599 13600 xmlDocPtr 13601 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13602 int recovery, void *data) { 13603 xmlDocPtr ret; 13604 xmlParserCtxtPtr ctxt; 13605 13606 xmlInitParser(); 13607 13608 ctxt = xmlCreateFileParserCtxt(filename); 13609 if (ctxt == NULL) { 13610 return(NULL); 13611 } 13612 if (sax != NULL) { 13613 if (ctxt->sax != NULL) 13614 xmlFree(ctxt->sax); 13615 ctxt->sax = sax; 13616 } 13617 xmlDetectSAX2(ctxt); 13618 if (data!=NULL) { 13619 ctxt->_private = data; 13620 } 13621 13622 if (ctxt->directory == NULL) 13623 ctxt->directory = xmlParserGetDirectory(filename); 13624 13625 ctxt->recovery = recovery; 13626 13627 xmlParseDocument(ctxt); 13628 13629 if ((ctxt->wellFormed) || recovery) { 13630 ret = ctxt->myDoc; 13631 if (ret != NULL) { 13632 if (ctxt->input->buf->compressed > 0) 13633 ret->compression = 9; 13634 else 13635 ret->compression = ctxt->input->buf->compressed; 13636 } 13637 } 13638 else { 13639 ret = NULL; 13640 xmlFreeDoc(ctxt->myDoc); 13641 ctxt->myDoc = NULL; 13642 } 13643 if (sax != NULL) 13644 ctxt->sax = NULL; 13645 xmlFreeParserCtxt(ctxt); 13646 13647 return(ret); 13648 } 13649 13650 /** 13651 * xmlSAXParseFile: 13652 * @sax: the SAX handler block 13653 * @filename: the filename 13654 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13655 * documents 13656 * 13657 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13658 * compressed document is provided by default if found at compile-time. 13659 * It use the given SAX function block to handle the parsing callback. 13660 * If sax is NULL, fallback to the default DOM tree building routines. 13661 * 13662 * Returns the resulting document tree 13663 */ 13664 13665 xmlDocPtr 13666 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13667 int recovery) { 13668 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13669 } 13670 13671 /** 13672 * xmlRecoverDoc: 13673 * @cur: a pointer to an array of xmlChar 13674 * 13675 * parse an XML in-memory document and build a tree. 13676 * In the case the document is not Well Formed, a attempt to build a 13677 * tree is tried anyway 13678 * 13679 * Returns the resulting document tree or NULL in case of failure 13680 */ 13681 13682 xmlDocPtr 13683 xmlRecoverDoc(const xmlChar *cur) { 13684 return(xmlSAXParseDoc(NULL, cur, 1)); 13685 } 13686 13687 /** 13688 * xmlParseFile: 13689 * @filename: the filename 13690 * 13691 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13692 * compressed document is provided by default if found at compile-time. 13693 * 13694 * Returns the resulting document tree if the file was wellformed, 13695 * NULL otherwise. 13696 */ 13697 13698 xmlDocPtr 13699 xmlParseFile(const char *filename) { 13700 return(xmlSAXParseFile(NULL, filename, 0)); 13701 } 13702 13703 /** 13704 * xmlRecoverFile: 13705 * @filename: the filename 13706 * 13707 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13708 * compressed document is provided by default if found at compile-time. 13709 * In the case the document is not Well Formed, it attempts to build 13710 * a tree anyway 13711 * 13712 * Returns the resulting document tree or NULL in case of failure 13713 */ 13714 13715 xmlDocPtr 13716 xmlRecoverFile(const char *filename) { 13717 return(xmlSAXParseFile(NULL, filename, 1)); 13718 } 13719 13720 13721 /** 13722 * xmlSetupParserForBuffer: 13723 * @ctxt: an XML parser context 13724 * @buffer: a xmlChar * buffer 13725 * @filename: a file name 13726 * 13727 * Setup the parser context to parse a new buffer; Clears any prior 13728 * contents from the parser context. The buffer parameter must not be 13729 * NULL, but the filename parameter can be 13730 */ 13731 void 13732 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13733 const char* filename) 13734 { 13735 xmlParserInputPtr input; 13736 13737 if ((ctxt == NULL) || (buffer == NULL)) 13738 return; 13739 13740 input = xmlNewInputStream(ctxt); 13741 if (input == NULL) { 13742 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13743 xmlClearParserCtxt(ctxt); 13744 return; 13745 } 13746 13747 xmlClearParserCtxt(ctxt); 13748 if (filename != NULL) 13749 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13750 input->base = buffer; 13751 input->cur = buffer; 13752 input->end = &buffer[xmlStrlen(buffer)]; 13753 inputPush(ctxt, input); 13754 } 13755 13756 /** 13757 * xmlSAXUserParseFile: 13758 * @sax: a SAX handler 13759 * @user_data: The user data returned on SAX callbacks 13760 * @filename: a file name 13761 * 13762 * parse an XML file and call the given SAX handler routines. 13763 * Automatic support for ZLIB/Compress compressed document is provided 13764 * 13765 * Returns 0 in case of success or a error number otherwise 13766 */ 13767 int 13768 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13769 const char *filename) { 13770 int ret = 0; 13771 xmlParserCtxtPtr ctxt; 13772 13773 ctxt = xmlCreateFileParserCtxt(filename); 13774 if (ctxt == NULL) return -1; 13775 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13776 xmlFree(ctxt->sax); 13777 ctxt->sax = sax; 13778 xmlDetectSAX2(ctxt); 13779 13780 if (user_data != NULL) 13781 ctxt->userData = user_data; 13782 13783 xmlParseDocument(ctxt); 13784 13785 if (ctxt->wellFormed) 13786 ret = 0; 13787 else { 13788 if (ctxt->errNo != 0) 13789 ret = ctxt->errNo; 13790 else 13791 ret = -1; 13792 } 13793 if (sax != NULL) 13794 ctxt->sax = NULL; 13795 if (ctxt->myDoc != NULL) { 13796 xmlFreeDoc(ctxt->myDoc); 13797 ctxt->myDoc = NULL; 13798 } 13799 xmlFreeParserCtxt(ctxt); 13800 13801 return ret; 13802 } 13803 #endif /* LIBXML_SAX1_ENABLED */ 13804 13805 /************************************************************************ 13806 * * 13807 * Front ends when parsing from memory * 13808 * * 13809 ************************************************************************/ 13810 13811 /** 13812 * xmlCreateMemoryParserCtxt: 13813 * @buffer: a pointer to a char array 13814 * @size: the size of the array 13815 * 13816 * Create a parser context for an XML in-memory document. 13817 * 13818 * Returns the new parser context or NULL 13819 */ 13820 xmlParserCtxtPtr 13821 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13822 xmlParserCtxtPtr ctxt; 13823 xmlParserInputPtr input; 13824 xmlParserInputBufferPtr buf; 13825 13826 if (buffer == NULL) 13827 return(NULL); 13828 if (size <= 0) 13829 return(NULL); 13830 13831 ctxt = xmlNewParserCtxt(); 13832 if (ctxt == NULL) 13833 return(NULL); 13834 13835 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13836 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13837 if (buf == NULL) { 13838 xmlFreeParserCtxt(ctxt); 13839 return(NULL); 13840 } 13841 13842 input = xmlNewInputStream(ctxt); 13843 if (input == NULL) { 13844 xmlFreeParserInputBuffer(buf); 13845 xmlFreeParserCtxt(ctxt); 13846 return(NULL); 13847 } 13848 13849 input->filename = NULL; 13850 input->buf = buf; 13851 input->base = input->buf->buffer->content; 13852 input->cur = input->buf->buffer->content; 13853 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13854 13855 inputPush(ctxt, input); 13856 return(ctxt); 13857 } 13858 13859 #ifdef LIBXML_SAX1_ENABLED 13860 /** 13861 * xmlSAXParseMemoryWithData: 13862 * @sax: the SAX handler block 13863 * @buffer: an pointer to a char array 13864 * @size: the size of the array 13865 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13866 * documents 13867 * @data: the userdata 13868 * 13869 * parse an XML in-memory block and use the given SAX function block 13870 * to handle the parsing callback. If sax is NULL, fallback to the default 13871 * DOM tree building routines. 13872 * 13873 * User data (void *) is stored within the parser context in the 13874 * context's _private member, so it is available nearly everywhere in libxml 13875 * 13876 * Returns the resulting document tree 13877 */ 13878 13879 xmlDocPtr 13880 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13881 int size, int recovery, void *data) { 13882 xmlDocPtr ret; 13883 xmlParserCtxtPtr ctxt; 13884 13885 xmlInitParser(); 13886 13887 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13888 if (ctxt == NULL) return(NULL); 13889 if (sax != NULL) { 13890 if (ctxt->sax != NULL) 13891 xmlFree(ctxt->sax); 13892 ctxt->sax = sax; 13893 } 13894 xmlDetectSAX2(ctxt); 13895 if (data!=NULL) { 13896 ctxt->_private=data; 13897 } 13898 13899 ctxt->recovery = recovery; 13900 13901 xmlParseDocument(ctxt); 13902 13903 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13904 else { 13905 ret = NULL; 13906 xmlFreeDoc(ctxt->myDoc); 13907 ctxt->myDoc = NULL; 13908 } 13909 if (sax != NULL) 13910 ctxt->sax = NULL; 13911 xmlFreeParserCtxt(ctxt); 13912 13913 return(ret); 13914 } 13915 13916 /** 13917 * xmlSAXParseMemory: 13918 * @sax: the SAX handler block 13919 * @buffer: an pointer to a char array 13920 * @size: the size of the array 13921 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13922 * documents 13923 * 13924 * parse an XML in-memory block and use the given SAX function block 13925 * to handle the parsing callback. If sax is NULL, fallback to the default 13926 * DOM tree building routines. 13927 * 13928 * Returns the resulting document tree 13929 */ 13930 xmlDocPtr 13931 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13932 int size, int recovery) { 13933 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13934 } 13935 13936 /** 13937 * xmlParseMemory: 13938 * @buffer: an pointer to a char array 13939 * @size: the size of the array 13940 * 13941 * parse an XML in-memory block and build a tree. 13942 * 13943 * Returns the resulting document tree 13944 */ 13945 13946 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13947 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13948 } 13949 13950 /** 13951 * xmlRecoverMemory: 13952 * @buffer: an pointer to a char array 13953 * @size: the size of the array 13954 * 13955 * parse an XML in-memory block and build a tree. 13956 * In the case the document is not Well Formed, an attempt to 13957 * build a tree is tried anyway 13958 * 13959 * Returns the resulting document tree or NULL in case of error 13960 */ 13961 13962 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13963 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13964 } 13965 13966 /** 13967 * xmlSAXUserParseMemory: 13968 * @sax: a SAX handler 13969 * @user_data: The user data returned on SAX callbacks 13970 * @buffer: an in-memory XML document input 13971 * @size: the length of the XML document in bytes 13972 * 13973 * A better SAX parsing routine. 13974 * parse an XML in-memory buffer and call the given SAX handler routines. 13975 * 13976 * Returns 0 in case of success or a error number otherwise 13977 */ 13978 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13979 const char *buffer, int size) { 13980 int ret = 0; 13981 xmlParserCtxtPtr ctxt; 13982 13983 xmlInitParser(); 13984 13985 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13986 if (ctxt == NULL) return -1; 13987 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13988 xmlFree(ctxt->sax); 13989 ctxt->sax = sax; 13990 xmlDetectSAX2(ctxt); 13991 13992 if (user_data != NULL) 13993 ctxt->userData = user_data; 13994 13995 xmlParseDocument(ctxt); 13996 13997 if (ctxt->wellFormed) 13998 ret = 0; 13999 else { 14000 if (ctxt->errNo != 0) 14001 ret = ctxt->errNo; 14002 else 14003 ret = -1; 14004 } 14005 if (sax != NULL) 14006 ctxt->sax = NULL; 14007 if (ctxt->myDoc != NULL) { 14008 xmlFreeDoc(ctxt->myDoc); 14009 ctxt->myDoc = NULL; 14010 } 14011 xmlFreeParserCtxt(ctxt); 14012 14013 return ret; 14014 } 14015 #endif /* LIBXML_SAX1_ENABLED */ 14016 14017 /** 14018 * xmlCreateDocParserCtxt: 14019 * @cur: a pointer to an array of xmlChar 14020 * 14021 * Creates a parser context for an XML in-memory document. 14022 * 14023 * Returns the new parser context or NULL 14024 */ 14025 xmlParserCtxtPtr 14026 xmlCreateDocParserCtxt(const xmlChar *cur) { 14027 int len; 14028 14029 if (cur == NULL) 14030 return(NULL); 14031 len = xmlStrlen(cur); 14032 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14033 } 14034 14035 #ifdef LIBXML_SAX1_ENABLED 14036 /** 14037 * xmlSAXParseDoc: 14038 * @sax: the SAX handler block 14039 * @cur: a pointer to an array of xmlChar 14040 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14041 * documents 14042 * 14043 * parse an XML in-memory document and build a tree. 14044 * It use the given SAX function block to handle the parsing callback. 14045 * If sax is NULL, fallback to the default DOM tree building routines. 14046 * 14047 * Returns the resulting document tree 14048 */ 14049 14050 xmlDocPtr 14051 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14052 xmlDocPtr ret; 14053 xmlParserCtxtPtr ctxt; 14054 xmlSAXHandlerPtr oldsax = NULL; 14055 14056 if (cur == NULL) return(NULL); 14057 14058 14059 ctxt = xmlCreateDocParserCtxt(cur); 14060 if (ctxt == NULL) return(NULL); 14061 if (sax != NULL) { 14062 oldsax = ctxt->sax; 14063 ctxt->sax = sax; 14064 ctxt->userData = NULL; 14065 } 14066 xmlDetectSAX2(ctxt); 14067 14068 xmlParseDocument(ctxt); 14069 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14070 else { 14071 ret = NULL; 14072 xmlFreeDoc(ctxt->myDoc); 14073 ctxt->myDoc = NULL; 14074 } 14075 if (sax != NULL) 14076 ctxt->sax = oldsax; 14077 xmlFreeParserCtxt(ctxt); 14078 14079 return(ret); 14080 } 14081 14082 /** 14083 * xmlParseDoc: 14084 * @cur: a pointer to an array of xmlChar 14085 * 14086 * parse an XML in-memory document and build a tree. 14087 * 14088 * Returns the resulting document tree 14089 */ 14090 14091 xmlDocPtr 14092 xmlParseDoc(const xmlChar *cur) { 14093 return(xmlSAXParseDoc(NULL, cur, 0)); 14094 } 14095 #endif /* LIBXML_SAX1_ENABLED */ 14096 14097 #ifdef LIBXML_LEGACY_ENABLED 14098 /************************************************************************ 14099 * * 14100 * Specific function to keep track of entities references * 14101 * and used by the XSLT debugger * 14102 * * 14103 ************************************************************************/ 14104 14105 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14106 14107 /** 14108 * xmlAddEntityReference: 14109 * @ent : A valid entity 14110 * @firstNode : A valid first node for children of entity 14111 * @lastNode : A valid last node of children entity 14112 * 14113 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14114 */ 14115 static void 14116 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14117 xmlNodePtr lastNode) 14118 { 14119 if (xmlEntityRefFunc != NULL) { 14120 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14121 } 14122 } 14123 14124 14125 /** 14126 * xmlSetEntityReferenceFunc: 14127 * @func: A valid function 14128 * 14129 * Set the function to call call back when a xml reference has been made 14130 */ 14131 void 14132 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14133 { 14134 xmlEntityRefFunc = func; 14135 } 14136 #endif /* LIBXML_LEGACY_ENABLED */ 14137 14138 /************************************************************************ 14139 * * 14140 * Miscellaneous * 14141 * * 14142 ************************************************************************/ 14143 14144 #ifdef LIBXML_XPATH_ENABLED 14145 #include <libxml/xpath.h> 14146 #endif 14147 14148 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14149 static int xmlParserInitialized = 0; 14150 14151 /** 14152 * xmlInitParser: 14153 * 14154 * Initialization function for the XML parser. 14155 * This is not reentrant. Call once before processing in case of 14156 * use in multithreaded programs. 14157 */ 14158 14159 void 14160 xmlInitParser(void) { 14161 if (xmlParserInitialized != 0) 14162 return; 14163 14164 #ifdef LIBXML_THREAD_ENABLED 14165 __xmlGlobalInitMutexLock(); 14166 if (xmlParserInitialized == 0) { 14167 #endif 14168 xmlInitGlobals(); 14169 xmlInitThreads(); 14170 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14171 (xmlGenericError == NULL)) 14172 initGenericErrorDefaultFunc(NULL); 14173 xmlInitMemory(); 14174 xmlInitCharEncodingHandlers(); 14175 xmlDefaultSAXHandlerInit(); 14176 xmlRegisterDefaultInputCallbacks(); 14177 #ifdef LIBXML_OUTPUT_ENABLED 14178 xmlRegisterDefaultOutputCallbacks(); 14179 #endif /* LIBXML_OUTPUT_ENABLED */ 14180 #ifdef LIBXML_HTML_ENABLED 14181 htmlInitAutoClose(); 14182 htmlDefaultSAXHandlerInit(); 14183 #endif 14184 #ifdef LIBXML_XPATH_ENABLED 14185 xmlXPathInit(); 14186 #endif 14187 xmlParserInitialized = 1; 14188 #ifdef LIBXML_THREAD_ENABLED 14189 } 14190 __xmlGlobalInitMutexUnlock(); 14191 #endif 14192 } 14193 14194 /** 14195 * xmlCleanupParser: 14196 * 14197 * This function name is somewhat misleading. It does not clean up 14198 * parser state, it cleans up memory allocated by the library itself. 14199 * It is a cleanup function for the XML library. It tries to reclaim all 14200 * related global memory allocated for the library processing. 14201 * It doesn't deallocate any document related memory. One should 14202 * call xmlCleanupParser() only when the process has finished using 14203 * the library and all XML/HTML documents built with it. 14204 * See also xmlInitParser() which has the opposite function of preparing 14205 * the library for operations. 14206 * 14207 * WARNING: if your application is multithreaded or has plugin support 14208 * calling this may crash the application if another thread or 14209 * a plugin is still using libxml2. It's sometimes very hard to 14210 * guess if libxml2 is in use in the application, some libraries 14211 * or plugins may use it without notice. In case of doubt abstain 14212 * from calling this function or do it just before calling exit() 14213 * to avoid leak reports from valgrind ! 14214 */ 14215 14216 void 14217 xmlCleanupParser(void) { 14218 if (!xmlParserInitialized) 14219 return; 14220 14221 xmlCleanupCharEncodingHandlers(); 14222 #ifdef LIBXML_CATALOG_ENABLED 14223 xmlCatalogCleanup(); 14224 #endif 14225 xmlDictCleanup(); 14226 xmlCleanupInputCallbacks(); 14227 #ifdef LIBXML_OUTPUT_ENABLED 14228 xmlCleanupOutputCallbacks(); 14229 #endif 14230 #ifdef LIBXML_SCHEMAS_ENABLED 14231 xmlSchemaCleanupTypes(); 14232 xmlRelaxNGCleanupTypes(); 14233 #endif 14234 xmlCleanupGlobals(); 14235 xmlResetLastError(); 14236 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14237 xmlCleanupMemory(); 14238 xmlParserInitialized = 0; 14239 } 14240 14241 /************************************************************************ 14242 * * 14243 * New set (2.6.0) of simpler and more flexible APIs * 14244 * * 14245 ************************************************************************/ 14246 14247 /** 14248 * DICT_FREE: 14249 * @str: a string 14250 * 14251 * Free a string if it is not owned by the "dict" dictionnary in the 14252 * current scope 14253 */ 14254 #define DICT_FREE(str) \ 14255 if ((str) && ((!dict) || \ 14256 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14257 xmlFree((char *)(str)); 14258 14259 /** 14260 * xmlCtxtReset: 14261 * @ctxt: an XML parser context 14262 * 14263 * Reset a parser context 14264 */ 14265 void 14266 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14267 { 14268 xmlParserInputPtr input; 14269 xmlDictPtr dict; 14270 14271 if (ctxt == NULL) 14272 return; 14273 14274 dict = ctxt->dict; 14275 14276 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14277 xmlFreeInputStream(input); 14278 } 14279 ctxt->inputNr = 0; 14280 ctxt->input = NULL; 14281 14282 ctxt->spaceNr = 0; 14283 if (ctxt->spaceTab != NULL) { 14284 ctxt->spaceTab[0] = -1; 14285 ctxt->space = &ctxt->spaceTab[0]; 14286 } else { 14287 ctxt->space = NULL; 14288 } 14289 14290 14291 ctxt->nodeNr = 0; 14292 ctxt->node = NULL; 14293 14294 ctxt->nameNr = 0; 14295 ctxt->name = NULL; 14296 14297 DICT_FREE(ctxt->version); 14298 ctxt->version = NULL; 14299 DICT_FREE(ctxt->encoding); 14300 ctxt->encoding = NULL; 14301 DICT_FREE(ctxt->directory); 14302 ctxt->directory = NULL; 14303 DICT_FREE(ctxt->extSubURI); 14304 ctxt->extSubURI = NULL; 14305 DICT_FREE(ctxt->extSubSystem); 14306 ctxt->extSubSystem = NULL; 14307 if (ctxt->myDoc != NULL) 14308 xmlFreeDoc(ctxt->myDoc); 14309 ctxt->myDoc = NULL; 14310 14311 ctxt->standalone = -1; 14312 ctxt->hasExternalSubset = 0; 14313 ctxt->hasPErefs = 0; 14314 ctxt->html = 0; 14315 ctxt->external = 0; 14316 ctxt->instate = XML_PARSER_START; 14317 ctxt->token = 0; 14318 14319 ctxt->wellFormed = 1; 14320 ctxt->nsWellFormed = 1; 14321 ctxt->disableSAX = 0; 14322 ctxt->valid = 1; 14323 #if 0 14324 ctxt->vctxt.userData = ctxt; 14325 ctxt->vctxt.error = xmlParserValidityError; 14326 ctxt->vctxt.warning = xmlParserValidityWarning; 14327 #endif 14328 ctxt->record_info = 0; 14329 ctxt->nbChars = 0; 14330 ctxt->checkIndex = 0; 14331 ctxt->inSubset = 0; 14332 ctxt->errNo = XML_ERR_OK; 14333 ctxt->depth = 0; 14334 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14335 ctxt->catalogs = NULL; 14336 ctxt->nbentities = 0; 14337 ctxt->sizeentities = 0; 14338 xmlInitNodeInfoSeq(&ctxt->node_seq); 14339 14340 if (ctxt->attsDefault != NULL) { 14341 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14342 ctxt->attsDefault = NULL; 14343 } 14344 if (ctxt->attsSpecial != NULL) { 14345 xmlHashFree(ctxt->attsSpecial, NULL); 14346 ctxt->attsSpecial = NULL; 14347 } 14348 14349 #ifdef LIBXML_CATALOG_ENABLED 14350 if (ctxt->catalogs != NULL) 14351 xmlCatalogFreeLocal(ctxt->catalogs); 14352 #endif 14353 if (ctxt->lastError.code != XML_ERR_OK) 14354 xmlResetError(&ctxt->lastError); 14355 } 14356 14357 /** 14358 * xmlCtxtResetPush: 14359 * @ctxt: an XML parser context 14360 * @chunk: a pointer to an array of chars 14361 * @size: number of chars in the array 14362 * @filename: an optional file name or URI 14363 * @encoding: the document encoding, or NULL 14364 * 14365 * Reset a push parser context 14366 * 14367 * Returns 0 in case of success and 1 in case of error 14368 */ 14369 int 14370 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14371 int size, const char *filename, const char *encoding) 14372 { 14373 xmlParserInputPtr inputStream; 14374 xmlParserInputBufferPtr buf; 14375 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14376 14377 if (ctxt == NULL) 14378 return(1); 14379 14380 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14381 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14382 14383 buf = xmlAllocParserInputBuffer(enc); 14384 if (buf == NULL) 14385 return(1); 14386 14387 if (ctxt == NULL) { 14388 xmlFreeParserInputBuffer(buf); 14389 return(1); 14390 } 14391 14392 xmlCtxtReset(ctxt); 14393 14394 if (ctxt->pushTab == NULL) { 14395 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14396 sizeof(xmlChar *)); 14397 if (ctxt->pushTab == NULL) { 14398 xmlErrMemory(ctxt, NULL); 14399 xmlFreeParserInputBuffer(buf); 14400 return(1); 14401 } 14402 } 14403 14404 if (filename == NULL) { 14405 ctxt->directory = NULL; 14406 } else { 14407 ctxt->directory = xmlParserGetDirectory(filename); 14408 } 14409 14410 inputStream = xmlNewInputStream(ctxt); 14411 if (inputStream == NULL) { 14412 xmlFreeParserInputBuffer(buf); 14413 return(1); 14414 } 14415 14416 if (filename == NULL) 14417 inputStream->filename = NULL; 14418 else 14419 inputStream->filename = (char *) 14420 xmlCanonicPath((const xmlChar *) filename); 14421 inputStream->buf = buf; 14422 inputStream->base = inputStream->buf->buffer->content; 14423 inputStream->cur = inputStream->buf->buffer->content; 14424 inputStream->end = 14425 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14426 14427 inputPush(ctxt, inputStream); 14428 14429 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14430 (ctxt->input->buf != NULL)) { 14431 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14432 int cur = ctxt->input->cur - ctxt->input->base; 14433 14434 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14435 14436 ctxt->input->base = ctxt->input->buf->buffer->content + base; 14437 ctxt->input->cur = ctxt->input->base + cur; 14438 ctxt->input->end = 14439 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14440 use]; 14441 #ifdef DEBUG_PUSH 14442 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14443 #endif 14444 } 14445 14446 if (encoding != NULL) { 14447 xmlCharEncodingHandlerPtr hdlr; 14448 14449 if (ctxt->encoding != NULL) 14450 xmlFree((xmlChar *) ctxt->encoding); 14451 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14452 14453 hdlr = xmlFindCharEncodingHandler(encoding); 14454 if (hdlr != NULL) { 14455 xmlSwitchToEncoding(ctxt, hdlr); 14456 } else { 14457 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14458 "Unsupported encoding %s\n", BAD_CAST encoding); 14459 } 14460 } else if (enc != XML_CHAR_ENCODING_NONE) { 14461 xmlSwitchEncoding(ctxt, enc); 14462 } 14463 14464 return(0); 14465 } 14466 14467 14468 /** 14469 * xmlCtxtUseOptionsInternal: 14470 * @ctxt: an XML parser context 14471 * @options: a combination of xmlParserOption 14472 * @encoding: the user provided encoding to use 14473 * 14474 * Applies the options to the parser context 14475 * 14476 * Returns 0 in case of success, the set of unknown or unimplemented options 14477 * in case of error. 14478 */ 14479 static int 14480 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14481 { 14482 if (ctxt == NULL) 14483 return(-1); 14484 if (encoding != NULL) { 14485 if (ctxt->encoding != NULL) 14486 xmlFree((xmlChar *) ctxt->encoding); 14487 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14488 } 14489 if (options & XML_PARSE_RECOVER) { 14490 ctxt->recovery = 1; 14491 options -= XML_PARSE_RECOVER; 14492 ctxt->options |= XML_PARSE_RECOVER; 14493 } else 14494 ctxt->recovery = 0; 14495 if (options & XML_PARSE_DTDLOAD) { 14496 ctxt->loadsubset = XML_DETECT_IDS; 14497 options -= XML_PARSE_DTDLOAD; 14498 ctxt->options |= XML_PARSE_DTDLOAD; 14499 } else 14500 ctxt->loadsubset = 0; 14501 if (options & XML_PARSE_DTDATTR) { 14502 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14503 options -= XML_PARSE_DTDATTR; 14504 ctxt->options |= XML_PARSE_DTDATTR; 14505 } 14506 if (options & XML_PARSE_NOENT) { 14507 ctxt->replaceEntities = 1; 14508 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14509 options -= XML_PARSE_NOENT; 14510 ctxt->options |= XML_PARSE_NOENT; 14511 } else 14512 ctxt->replaceEntities = 0; 14513 if (options & XML_PARSE_PEDANTIC) { 14514 ctxt->pedantic = 1; 14515 options -= XML_PARSE_PEDANTIC; 14516 ctxt->options |= XML_PARSE_PEDANTIC; 14517 } else 14518 ctxt->pedantic = 0; 14519 if (options & XML_PARSE_NOBLANKS) { 14520 ctxt->keepBlanks = 0; 14521 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14522 options -= XML_PARSE_NOBLANKS; 14523 ctxt->options |= XML_PARSE_NOBLANKS; 14524 } else 14525 ctxt->keepBlanks = 1; 14526 if (options & XML_PARSE_DTDVALID) { 14527 ctxt->validate = 1; 14528 if (options & XML_PARSE_NOWARNING) 14529 ctxt->vctxt.warning = NULL; 14530 if (options & XML_PARSE_NOERROR) 14531 ctxt->vctxt.error = NULL; 14532 options -= XML_PARSE_DTDVALID; 14533 ctxt->options |= XML_PARSE_DTDVALID; 14534 } else 14535 ctxt->validate = 0; 14536 if (options & XML_PARSE_NOWARNING) { 14537 ctxt->sax->warning = NULL; 14538 options -= XML_PARSE_NOWARNING; 14539 } 14540 if (options & XML_PARSE_NOERROR) { 14541 ctxt->sax->error = NULL; 14542 ctxt->sax->fatalError = NULL; 14543 options -= XML_PARSE_NOERROR; 14544 } 14545 #ifdef LIBXML_SAX1_ENABLED 14546 if (options & XML_PARSE_SAX1) { 14547 ctxt->sax->startElement = xmlSAX2StartElement; 14548 ctxt->sax->endElement = xmlSAX2EndElement; 14549 ctxt->sax->startElementNs = NULL; 14550 ctxt->sax->endElementNs = NULL; 14551 ctxt->sax->initialized = 1; 14552 options -= XML_PARSE_SAX1; 14553 ctxt->options |= XML_PARSE_SAX1; 14554 } 14555 #endif /* LIBXML_SAX1_ENABLED */ 14556 if (options & XML_PARSE_NODICT) { 14557 ctxt->dictNames = 0; 14558 options -= XML_PARSE_NODICT; 14559 ctxt->options |= XML_PARSE_NODICT; 14560 } else { 14561 ctxt->dictNames = 1; 14562 } 14563 if (options & XML_PARSE_NOCDATA) { 14564 ctxt->sax->cdataBlock = NULL; 14565 options -= XML_PARSE_NOCDATA; 14566 ctxt->options |= XML_PARSE_NOCDATA; 14567 } 14568 if (options & XML_PARSE_NSCLEAN) { 14569 ctxt->options |= XML_PARSE_NSCLEAN; 14570 options -= XML_PARSE_NSCLEAN; 14571 } 14572 if (options & XML_PARSE_NONET) { 14573 ctxt->options |= XML_PARSE_NONET; 14574 options -= XML_PARSE_NONET; 14575 } 14576 if (options & XML_PARSE_COMPACT) { 14577 ctxt->options |= XML_PARSE_COMPACT; 14578 options -= XML_PARSE_COMPACT; 14579 } 14580 if (options & XML_PARSE_OLD10) { 14581 ctxt->options |= XML_PARSE_OLD10; 14582 options -= XML_PARSE_OLD10; 14583 } 14584 if (options & XML_PARSE_NOBASEFIX) { 14585 ctxt->options |= XML_PARSE_NOBASEFIX; 14586 options -= XML_PARSE_NOBASEFIX; 14587 } 14588 if (options & XML_PARSE_HUGE) { 14589 ctxt->options |= XML_PARSE_HUGE; 14590 options -= XML_PARSE_HUGE; 14591 } 14592 if (options & XML_PARSE_OLDSAX) { 14593 ctxt->options |= XML_PARSE_OLDSAX; 14594 options -= XML_PARSE_OLDSAX; 14595 } 14596 ctxt->linenumbers = 1; 14597 return (options); 14598 } 14599 14600 /** 14601 * xmlCtxtUseOptions: 14602 * @ctxt: an XML parser context 14603 * @options: a combination of xmlParserOption 14604 * 14605 * Applies the options to the parser context 14606 * 14607 * Returns 0 in case of success, the set of unknown or unimplemented options 14608 * in case of error. 14609 */ 14610 int 14611 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14612 { 14613 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14614 } 14615 14616 /** 14617 * xmlDoRead: 14618 * @ctxt: an XML parser context 14619 * @URL: the base URL to use for the document 14620 * @encoding: the document encoding, or NULL 14621 * @options: a combination of xmlParserOption 14622 * @reuse: keep the context for reuse 14623 * 14624 * Common front-end for the xmlRead functions 14625 * 14626 * Returns the resulting document tree or NULL 14627 */ 14628 static xmlDocPtr 14629 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14630 int options, int reuse) 14631 { 14632 xmlDocPtr ret; 14633 14634 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14635 if (encoding != NULL) { 14636 xmlCharEncodingHandlerPtr hdlr; 14637 14638 hdlr = xmlFindCharEncodingHandler(encoding); 14639 if (hdlr != NULL) 14640 xmlSwitchToEncoding(ctxt, hdlr); 14641 } 14642 if ((URL != NULL) && (ctxt->input != NULL) && 14643 (ctxt->input->filename == NULL)) 14644 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14645 xmlParseDocument(ctxt); 14646 if ((ctxt->wellFormed) || ctxt->recovery) 14647 ret = ctxt->myDoc; 14648 else { 14649 ret = NULL; 14650 if (ctxt->myDoc != NULL) { 14651 xmlFreeDoc(ctxt->myDoc); 14652 } 14653 } 14654 ctxt->myDoc = NULL; 14655 if (!reuse) { 14656 xmlFreeParserCtxt(ctxt); 14657 } 14658 14659 return (ret); 14660 } 14661 14662 /** 14663 * xmlReadDoc: 14664 * @cur: a pointer to a zero terminated string 14665 * @URL: the base URL to use for the document 14666 * @encoding: the document encoding, or NULL 14667 * @options: a combination of xmlParserOption 14668 * 14669 * parse an XML in-memory document and build a tree. 14670 * 14671 * Returns the resulting document tree 14672 */ 14673 xmlDocPtr 14674 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14675 { 14676 xmlParserCtxtPtr ctxt; 14677 14678 if (cur == NULL) 14679 return (NULL); 14680 14681 ctxt = xmlCreateDocParserCtxt(cur); 14682 if (ctxt == NULL) 14683 return (NULL); 14684 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14685 } 14686 14687 /** 14688 * xmlReadFile: 14689 * @filename: a file or URL 14690 * @encoding: the document encoding, or NULL 14691 * @options: a combination of xmlParserOption 14692 * 14693 * parse an XML file from the filesystem or the network. 14694 * 14695 * Returns the resulting document tree 14696 */ 14697 xmlDocPtr 14698 xmlReadFile(const char *filename, const char *encoding, int options) 14699 { 14700 xmlParserCtxtPtr ctxt; 14701 14702 ctxt = xmlCreateURLParserCtxt(filename, options); 14703 if (ctxt == NULL) 14704 return (NULL); 14705 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14706 } 14707 14708 /** 14709 * xmlReadMemory: 14710 * @buffer: a pointer to a char array 14711 * @size: the size of the array 14712 * @URL: the base URL to use for the document 14713 * @encoding: the document encoding, or NULL 14714 * @options: a combination of xmlParserOption 14715 * 14716 * parse an XML in-memory document and build a tree. 14717 * 14718 * Returns the resulting document tree 14719 */ 14720 xmlDocPtr 14721 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14722 { 14723 xmlParserCtxtPtr ctxt; 14724 14725 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14726 if (ctxt == NULL) 14727 return (NULL); 14728 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14729 } 14730 14731 /** 14732 * xmlReadFd: 14733 * @fd: an open file descriptor 14734 * @URL: the base URL to use for the document 14735 * @encoding: the document encoding, or NULL 14736 * @options: a combination of xmlParserOption 14737 * 14738 * parse an XML from a file descriptor and build a tree. 14739 * NOTE that the file descriptor will not be closed when the 14740 * reader is closed or reset. 14741 * 14742 * Returns the resulting document tree 14743 */ 14744 xmlDocPtr 14745 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14746 { 14747 xmlParserCtxtPtr ctxt; 14748 xmlParserInputBufferPtr input; 14749 xmlParserInputPtr stream; 14750 14751 if (fd < 0) 14752 return (NULL); 14753 14754 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14755 if (input == NULL) 14756 return (NULL); 14757 input->closecallback = NULL; 14758 ctxt = xmlNewParserCtxt(); 14759 if (ctxt == NULL) { 14760 xmlFreeParserInputBuffer(input); 14761 return (NULL); 14762 } 14763 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14764 if (stream == NULL) { 14765 xmlFreeParserInputBuffer(input); 14766 xmlFreeParserCtxt(ctxt); 14767 return (NULL); 14768 } 14769 inputPush(ctxt, stream); 14770 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14771 } 14772 14773 /** 14774 * xmlReadIO: 14775 * @ioread: an I/O read function 14776 * @ioclose: an I/O close function 14777 * @ioctx: an I/O handler 14778 * @URL: the base URL to use for the document 14779 * @encoding: the document encoding, or NULL 14780 * @options: a combination of xmlParserOption 14781 * 14782 * parse an XML document from I/O functions and source and build a tree. 14783 * 14784 * Returns the resulting document tree 14785 */ 14786 xmlDocPtr 14787 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14788 void *ioctx, const char *URL, const char *encoding, int options) 14789 { 14790 xmlParserCtxtPtr ctxt; 14791 xmlParserInputBufferPtr input; 14792 xmlParserInputPtr stream; 14793 14794 if (ioread == NULL) 14795 return (NULL); 14796 14797 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14798 XML_CHAR_ENCODING_NONE); 14799 if (input == NULL) 14800 return (NULL); 14801 ctxt = xmlNewParserCtxt(); 14802 if (ctxt == NULL) { 14803 xmlFreeParserInputBuffer(input); 14804 return (NULL); 14805 } 14806 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14807 if (stream == NULL) { 14808 xmlFreeParserInputBuffer(input); 14809 xmlFreeParserCtxt(ctxt); 14810 return (NULL); 14811 } 14812 inputPush(ctxt, stream); 14813 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14814 } 14815 14816 /** 14817 * xmlCtxtReadDoc: 14818 * @ctxt: an XML parser context 14819 * @cur: a pointer to a zero terminated string 14820 * @URL: the base URL to use for the document 14821 * @encoding: the document encoding, or NULL 14822 * @options: a combination of xmlParserOption 14823 * 14824 * parse an XML in-memory document and build a tree. 14825 * This reuses the existing @ctxt parser context 14826 * 14827 * Returns the resulting document tree 14828 */ 14829 xmlDocPtr 14830 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14831 const char *URL, const char *encoding, int options) 14832 { 14833 xmlParserInputPtr stream; 14834 14835 if (cur == NULL) 14836 return (NULL); 14837 if (ctxt == NULL) 14838 return (NULL); 14839 14840 xmlCtxtReset(ctxt); 14841 14842 stream = xmlNewStringInputStream(ctxt, cur); 14843 if (stream == NULL) { 14844 return (NULL); 14845 } 14846 inputPush(ctxt, stream); 14847 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14848 } 14849 14850 /** 14851 * xmlCtxtReadFile: 14852 * @ctxt: an XML parser context 14853 * @filename: a file or URL 14854 * @encoding: the document encoding, or NULL 14855 * @options: a combination of xmlParserOption 14856 * 14857 * parse an XML file from the filesystem or the network. 14858 * This reuses the existing @ctxt parser context 14859 * 14860 * Returns the resulting document tree 14861 */ 14862 xmlDocPtr 14863 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14864 const char *encoding, int options) 14865 { 14866 xmlParserInputPtr stream; 14867 14868 if (filename == NULL) 14869 return (NULL); 14870 if (ctxt == NULL) 14871 return (NULL); 14872 14873 xmlCtxtReset(ctxt); 14874 14875 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14876 if (stream == NULL) { 14877 return (NULL); 14878 } 14879 inputPush(ctxt, stream); 14880 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14881 } 14882 14883 /** 14884 * xmlCtxtReadMemory: 14885 * @ctxt: an XML parser context 14886 * @buffer: a pointer to a char array 14887 * @size: the size of the array 14888 * @URL: the base URL to use for the document 14889 * @encoding: the document encoding, or NULL 14890 * @options: a combination of xmlParserOption 14891 * 14892 * parse an XML in-memory document and build a tree. 14893 * This reuses the existing @ctxt parser context 14894 * 14895 * Returns the resulting document tree 14896 */ 14897 xmlDocPtr 14898 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14899 const char *URL, const char *encoding, int options) 14900 { 14901 xmlParserInputBufferPtr input; 14902 xmlParserInputPtr stream; 14903 14904 if (ctxt == NULL) 14905 return (NULL); 14906 if (buffer == NULL) 14907 return (NULL); 14908 14909 xmlCtxtReset(ctxt); 14910 14911 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14912 if (input == NULL) { 14913 return(NULL); 14914 } 14915 14916 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14917 if (stream == NULL) { 14918 xmlFreeParserInputBuffer(input); 14919 return(NULL); 14920 } 14921 14922 inputPush(ctxt, stream); 14923 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14924 } 14925 14926 /** 14927 * xmlCtxtReadFd: 14928 * @ctxt: an XML parser context 14929 * @fd: an open file descriptor 14930 * @URL: the base URL to use for the document 14931 * @encoding: the document encoding, or NULL 14932 * @options: a combination of xmlParserOption 14933 * 14934 * parse an XML from a file descriptor and build a tree. 14935 * This reuses the existing @ctxt parser context 14936 * NOTE that the file descriptor will not be closed when the 14937 * reader is closed or reset. 14938 * 14939 * Returns the resulting document tree 14940 */ 14941 xmlDocPtr 14942 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 14943 const char *URL, const char *encoding, int options) 14944 { 14945 xmlParserInputBufferPtr input; 14946 xmlParserInputPtr stream; 14947 14948 if (fd < 0) 14949 return (NULL); 14950 if (ctxt == NULL) 14951 return (NULL); 14952 14953 xmlCtxtReset(ctxt); 14954 14955 14956 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14957 if (input == NULL) 14958 return (NULL); 14959 input->closecallback = NULL; 14960 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14961 if (stream == NULL) { 14962 xmlFreeParserInputBuffer(input); 14963 return (NULL); 14964 } 14965 inputPush(ctxt, stream); 14966 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14967 } 14968 14969 /** 14970 * xmlCtxtReadIO: 14971 * @ctxt: an XML parser context 14972 * @ioread: an I/O read function 14973 * @ioclose: an I/O close function 14974 * @ioctx: an I/O handler 14975 * @URL: the base URL to use for the document 14976 * @encoding: the document encoding, or NULL 14977 * @options: a combination of xmlParserOption 14978 * 14979 * parse an XML document from I/O functions and source and build a tree. 14980 * This reuses the existing @ctxt parser context 14981 * 14982 * Returns the resulting document tree 14983 */ 14984 xmlDocPtr 14985 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 14986 xmlInputCloseCallback ioclose, void *ioctx, 14987 const char *URL, 14988 const char *encoding, int options) 14989 { 14990 xmlParserInputBufferPtr input; 14991 xmlParserInputPtr stream; 14992 14993 if (ioread == NULL) 14994 return (NULL); 14995 if (ctxt == NULL) 14996 return (NULL); 14997 14998 xmlCtxtReset(ctxt); 14999 15000 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15001 XML_CHAR_ENCODING_NONE); 15002 if (input == NULL) 15003 return (NULL); 15004 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15005 if (stream == NULL) { 15006 xmlFreeParserInputBuffer(input); 15007 return (NULL); 15008 } 15009 inputPush(ctxt, stream); 15010 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15011 } 15012 15013 #define bottom_parser 15014 #include "elfgcchack.h" 15015