1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <string.h> 44 #include <stdarg.h> 45 #include <libxml/xmlmemory.h> 46 #include <libxml/threads.h> 47 #include <libxml/globals.h> 48 #include <libxml/tree.h> 49 #include <libxml/parser.h> 50 #include <libxml/parserInternals.h> 51 #include <libxml/valid.h> 52 #include <libxml/entities.h> 53 #include <libxml/xmlerror.h> 54 #include <libxml/encoding.h> 55 #include <libxml/xmlIO.h> 56 #include <libxml/uri.h> 57 #ifdef LIBXML_CATALOG_ENABLED 58 #include <libxml/catalog.h> 59 #endif 60 #ifdef LIBXML_SCHEMAS_ENABLED 61 #include <libxml/xmlschemastypes.h> 62 #include <libxml/relaxng.h> 63 #endif 64 #ifdef HAVE_CTYPE_H 65 #include <ctype.h> 66 #endif 67 #ifdef HAVE_STDLIB_H 68 #include <stdlib.h> 69 #endif 70 #ifdef HAVE_SYS_STAT_H 71 #include <sys/stat.h> 72 #endif 73 #ifdef HAVE_FCNTL_H 74 #include <fcntl.h> 75 #endif 76 #ifdef HAVE_UNISTD_H 77 #include <unistd.h> 78 #endif 79 #ifdef HAVE_ZLIB_H 80 #include <zlib.h> 81 #endif 82 #ifdef HAVE_LZMA_H 83 #include <lzma.h> 84 #endif 85 86 static void 87 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 88 89 static xmlParserCtxtPtr 90 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 91 const xmlChar *base, xmlParserCtxtPtr pctx); 92 93 /************************************************************************ 94 * * 95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 96 * * 97 ************************************************************************/ 98 99 #define XML_PARSER_BIG_ENTITY 1000 100 #define XML_PARSER_LOT_ENTITY 5000 101 102 /* 103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 104 * replacement over the size in byte of the input indicates that you have 105 * and eponential behaviour. A value of 10 correspond to at least 3 entity 106 * replacement per byte of input. 107 */ 108 #define XML_PARSER_NON_LINEAR 10 109 110 /* 111 * xmlParserEntityCheck 112 * 113 * Function to check non-linear entity expansion behaviour 114 * This is here to detect and stop exponential linear entity expansion 115 * This is not a limitation of the parser but a safety 116 * boundary feature. It can be disabled with the XML_PARSE_HUGE 117 * parser option. 118 */ 119 static int 120 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 121 xmlEntityPtr ent) 122 { 123 unsigned long consumed = 0; 124 125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 126 return (0); 127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 128 return (1); 129 if (size != 0) { 130 /* 131 * Do the check based on the replacement size of the entity 132 */ 133 if (size < XML_PARSER_BIG_ENTITY) 134 return(0); 135 136 /* 137 * A limit on the amount of text data reasonably used 138 */ 139 if (ctxt->input != NULL) { 140 consumed = ctxt->input->consumed + 141 (ctxt->input->cur - ctxt->input->base); 142 } 143 consumed += ctxt->sizeentities; 144 145 if ((size < XML_PARSER_NON_LINEAR * consumed) && 146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 147 return (0); 148 } else if (ent != NULL) { 149 /* 150 * use the number of parsed entities in the replacement 151 */ 152 size = ent->checked; 153 154 /* 155 * The amount of data parsed counting entities size only once 156 */ 157 if (ctxt->input != NULL) { 158 consumed = ctxt->input->consumed + 159 (ctxt->input->cur - ctxt->input->base); 160 } 161 consumed += ctxt->sizeentities; 162 163 /* 164 * Check the density of entities for the amount of data 165 * knowing an entity reference will take at least 3 bytes 166 */ 167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 168 return (0); 169 } else { 170 /* 171 * strange we got no data for checking just return 172 */ 173 return (0); 174 } 175 176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 177 return (1); 178 } 179 180 /** 181 * xmlParserMaxDepth: 182 * 183 * arbitrary depth limit for the XML documents that we allow to 184 * process. This is not a limitation of the parser but a safety 185 * boundary feature. It can be disabled with the XML_PARSE_HUGE 186 * parser option. 187 */ 188 unsigned int xmlParserMaxDepth = 256; 189 190 191 192 #define SAX2 1 193 #define XML_PARSER_BIG_BUFFER_SIZE 300 194 #define XML_PARSER_BUFFER_SIZE 100 195 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 196 197 /* 198 * List of XML prefixed PI allowed by W3C specs 199 */ 200 201 static const char *xmlW3CPIs[] = { 202 "xml-stylesheet", 203 "xml-model", 204 NULL 205 }; 206 207 208 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 209 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 210 const xmlChar **str); 211 212 static xmlParserErrors 213 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 214 xmlSAXHandlerPtr sax, 215 void *user_data, int depth, const xmlChar *URL, 216 const xmlChar *ID, xmlNodePtr *list); 217 218 static int 219 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 220 const char *encoding); 221 #ifdef LIBXML_LEGACY_ENABLED 222 static void 223 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 224 xmlNodePtr lastNode); 225 #endif /* LIBXML_LEGACY_ENABLED */ 226 227 static xmlParserErrors 228 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 229 const xmlChar *string, void *user_data, xmlNodePtr *lst); 230 231 static int 232 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 233 234 /************************************************************************ 235 * * 236 * Some factorized error routines * 237 * * 238 ************************************************************************/ 239 240 /** 241 * xmlErrAttributeDup: 242 * @ctxt: an XML parser context 243 * @prefix: the attribute prefix 244 * @localname: the attribute localname 245 * 246 * Handle a redefinition of attribute error 247 */ 248 static void 249 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 250 const xmlChar * localname) 251 { 252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 253 (ctxt->instate == XML_PARSER_EOF)) 254 return; 255 if (ctxt != NULL) 256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 257 258 if (prefix == NULL) 259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 261 (const char *) localname, NULL, NULL, 0, 0, 262 "Attribute %s redefined\n", localname); 263 else 264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 266 (const char *) prefix, (const char *) localname, 267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 268 localname); 269 if (ctxt != NULL) { 270 ctxt->wellFormed = 0; 271 if (ctxt->recovery == 0) 272 ctxt->disableSAX = 1; 273 } 274 } 275 276 /** 277 * xmlFatalErr: 278 * @ctxt: an XML parser context 279 * @error: the error number 280 * @extra: extra information string 281 * 282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 283 */ 284 static void 285 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 286 { 287 const char *errmsg; 288 289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 290 (ctxt->instate == XML_PARSER_EOF)) 291 return; 292 switch (error) { 293 case XML_ERR_INVALID_HEX_CHARREF: 294 errmsg = "CharRef: invalid hexadecimal value\n"; 295 break; 296 case XML_ERR_INVALID_DEC_CHARREF: 297 errmsg = "CharRef: invalid decimal value\n"; 298 break; 299 case XML_ERR_INVALID_CHARREF: 300 errmsg = "CharRef: invalid value\n"; 301 break; 302 case XML_ERR_INTERNAL_ERROR: 303 errmsg = "internal error"; 304 break; 305 case XML_ERR_PEREF_AT_EOF: 306 errmsg = "PEReference at end of document\n"; 307 break; 308 case XML_ERR_PEREF_IN_PROLOG: 309 errmsg = "PEReference in prolog\n"; 310 break; 311 case XML_ERR_PEREF_IN_EPILOG: 312 errmsg = "PEReference in epilog\n"; 313 break; 314 case XML_ERR_PEREF_NO_NAME: 315 errmsg = "PEReference: no name\n"; 316 break; 317 case XML_ERR_PEREF_SEMICOL_MISSING: 318 errmsg = "PEReference: expecting ';'\n"; 319 break; 320 case XML_ERR_ENTITY_LOOP: 321 errmsg = "Detected an entity reference loop\n"; 322 break; 323 case XML_ERR_ENTITY_NOT_STARTED: 324 errmsg = "EntityValue: \" or ' expected\n"; 325 break; 326 case XML_ERR_ENTITY_PE_INTERNAL: 327 errmsg = "PEReferences forbidden in internal subset\n"; 328 break; 329 case XML_ERR_ENTITY_NOT_FINISHED: 330 errmsg = "EntityValue: \" or ' expected\n"; 331 break; 332 case XML_ERR_ATTRIBUTE_NOT_STARTED: 333 errmsg = "AttValue: \" or ' expected\n"; 334 break; 335 case XML_ERR_LT_IN_ATTRIBUTE: 336 errmsg = "Unescaped '<' not allowed in attributes values\n"; 337 break; 338 case XML_ERR_LITERAL_NOT_STARTED: 339 errmsg = "SystemLiteral \" or ' expected\n"; 340 break; 341 case XML_ERR_LITERAL_NOT_FINISHED: 342 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 343 break; 344 case XML_ERR_MISPLACED_CDATA_END: 345 errmsg = "Sequence ']]>' not allowed in content\n"; 346 break; 347 case XML_ERR_URI_REQUIRED: 348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 349 break; 350 case XML_ERR_PUBID_REQUIRED: 351 errmsg = "PUBLIC, the Public Identifier is missing\n"; 352 break; 353 case XML_ERR_HYPHEN_IN_COMMENT: 354 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 355 break; 356 case XML_ERR_PI_NOT_STARTED: 357 errmsg = "xmlParsePI : no target name\n"; 358 break; 359 case XML_ERR_RESERVED_XML_NAME: 360 errmsg = "Invalid PI name\n"; 361 break; 362 case XML_ERR_NOTATION_NOT_STARTED: 363 errmsg = "NOTATION: Name expected here\n"; 364 break; 365 case XML_ERR_NOTATION_NOT_FINISHED: 366 errmsg = "'>' required to close NOTATION declaration\n"; 367 break; 368 case XML_ERR_VALUE_REQUIRED: 369 errmsg = "Entity value required\n"; 370 break; 371 case XML_ERR_URI_FRAGMENT: 372 errmsg = "Fragment not allowed"; 373 break; 374 case XML_ERR_ATTLIST_NOT_STARTED: 375 errmsg = "'(' required to start ATTLIST enumeration\n"; 376 break; 377 case XML_ERR_NMTOKEN_REQUIRED: 378 errmsg = "NmToken expected in ATTLIST enumeration\n"; 379 break; 380 case XML_ERR_ATTLIST_NOT_FINISHED: 381 errmsg = "')' required to finish ATTLIST enumeration\n"; 382 break; 383 case XML_ERR_MIXED_NOT_STARTED: 384 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 385 break; 386 case XML_ERR_PCDATA_REQUIRED: 387 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 388 break; 389 case XML_ERR_ELEMCONTENT_NOT_STARTED: 390 errmsg = "ContentDecl : Name or '(' expected\n"; 391 break; 392 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 393 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 394 break; 395 case XML_ERR_PEREF_IN_INT_SUBSET: 396 errmsg = 397 "PEReference: forbidden within markup decl in internal subset\n"; 398 break; 399 case XML_ERR_GT_REQUIRED: 400 errmsg = "expected '>'\n"; 401 break; 402 case XML_ERR_CONDSEC_INVALID: 403 errmsg = "XML conditional section '[' expected\n"; 404 break; 405 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 406 errmsg = "Content error in the external subset\n"; 407 break; 408 case XML_ERR_CONDSEC_INVALID_KEYWORD: 409 errmsg = 410 "conditional section INCLUDE or IGNORE keyword expected\n"; 411 break; 412 case XML_ERR_CONDSEC_NOT_FINISHED: 413 errmsg = "XML conditional section not closed\n"; 414 break; 415 case XML_ERR_XMLDECL_NOT_STARTED: 416 errmsg = "Text declaration '<?xml' required\n"; 417 break; 418 case XML_ERR_XMLDECL_NOT_FINISHED: 419 errmsg = "parsing XML declaration: '?>' expected\n"; 420 break; 421 case XML_ERR_EXT_ENTITY_STANDALONE: 422 errmsg = "external parsed entities cannot be standalone\n"; 423 break; 424 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 425 errmsg = "EntityRef: expecting ';'\n"; 426 break; 427 case XML_ERR_DOCTYPE_NOT_FINISHED: 428 errmsg = "DOCTYPE improperly terminated\n"; 429 break; 430 case XML_ERR_LTSLASH_REQUIRED: 431 errmsg = "EndTag: '</' not found\n"; 432 break; 433 case XML_ERR_EQUAL_REQUIRED: 434 errmsg = "expected '='\n"; 435 break; 436 case XML_ERR_STRING_NOT_CLOSED: 437 errmsg = "String not closed expecting \" or '\n"; 438 break; 439 case XML_ERR_STRING_NOT_STARTED: 440 errmsg = "String not started expecting ' or \"\n"; 441 break; 442 case XML_ERR_ENCODING_NAME: 443 errmsg = "Invalid XML encoding name\n"; 444 break; 445 case XML_ERR_STANDALONE_VALUE: 446 errmsg = "standalone accepts only 'yes' or 'no'\n"; 447 break; 448 case XML_ERR_DOCUMENT_EMPTY: 449 errmsg = "Document is empty\n"; 450 break; 451 case XML_ERR_DOCUMENT_END: 452 errmsg = "Extra content at the end of the document\n"; 453 break; 454 case XML_ERR_NOT_WELL_BALANCED: 455 errmsg = "chunk is not well balanced\n"; 456 break; 457 case XML_ERR_EXTRA_CONTENT: 458 errmsg = "extra content at the end of well balanced chunk\n"; 459 break; 460 case XML_ERR_VERSION_MISSING: 461 errmsg = "Malformed declaration expecting version\n"; 462 break; 463 #if 0 464 case: 465 errmsg = "\n"; 466 break; 467 #endif 468 default: 469 errmsg = "Unregistered error message\n"; 470 } 471 if (ctxt != NULL) 472 ctxt->errNo = error; 473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 475 info); 476 if (ctxt != NULL) { 477 ctxt->wellFormed = 0; 478 if (ctxt->recovery == 0) 479 ctxt->disableSAX = 1; 480 } 481 } 482 483 /** 484 * xmlFatalErrMsg: 485 * @ctxt: an XML parser context 486 * @error: the error number 487 * @msg: the error message 488 * 489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 490 */ 491 static void 492 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 493 const char *msg) 494 { 495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 496 (ctxt->instate == XML_PARSER_EOF)) 497 return; 498 if (ctxt != NULL) 499 ctxt->errNo = error; 500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 502 if (ctxt != NULL) { 503 ctxt->wellFormed = 0; 504 if (ctxt->recovery == 0) 505 ctxt->disableSAX = 1; 506 } 507 } 508 509 /** 510 * xmlWarningMsg: 511 * @ctxt: an XML parser context 512 * @error: the error number 513 * @msg: the error message 514 * @str1: extra data 515 * @str2: extra data 516 * 517 * Handle a warning. 518 */ 519 static void 520 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 521 const char *msg, const xmlChar *str1, const xmlChar *str2) 522 { 523 xmlStructuredErrorFunc schannel = NULL; 524 525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 526 (ctxt->instate == XML_PARSER_EOF)) 527 return; 528 if ((ctxt != NULL) && (ctxt->sax != NULL) && 529 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 530 schannel = ctxt->sax->serror; 531 if (ctxt != NULL) { 532 __xmlRaiseError(schannel, 533 (ctxt->sax) ? ctxt->sax->warning : NULL, 534 ctxt->userData, 535 ctxt, NULL, XML_FROM_PARSER, error, 536 XML_ERR_WARNING, NULL, 0, 537 (const char *) str1, (const char *) str2, NULL, 0, 0, 538 msg, (const char *) str1, (const char *) str2); 539 } else { 540 __xmlRaiseError(schannel, NULL, NULL, 541 ctxt, NULL, XML_FROM_PARSER, error, 542 XML_ERR_WARNING, NULL, 0, 543 (const char *) str1, (const char *) str2, NULL, 0, 0, 544 msg, (const char *) str1, (const char *) str2); 545 } 546 } 547 548 /** 549 * xmlValidityError: 550 * @ctxt: an XML parser context 551 * @error: the error number 552 * @msg: the error message 553 * @str1: extra data 554 * 555 * Handle a validity error. 556 */ 557 static void 558 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 559 const char *msg, const xmlChar *str1, const xmlChar *str2) 560 { 561 xmlStructuredErrorFunc schannel = NULL; 562 563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 564 (ctxt->instate == XML_PARSER_EOF)) 565 return; 566 if (ctxt != NULL) { 567 ctxt->errNo = error; 568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569 schannel = ctxt->sax->serror; 570 } 571 if (ctxt != NULL) { 572 __xmlRaiseError(schannel, 573 ctxt->vctxt.error, ctxt->vctxt.userData, 574 ctxt, NULL, XML_FROM_DTD, error, 575 XML_ERR_ERROR, NULL, 0, (const char *) str1, 576 (const char *) str2, NULL, 0, 0, 577 msg, (const char *) str1, (const char *) str2); 578 ctxt->valid = 0; 579 } else { 580 __xmlRaiseError(schannel, NULL, NULL, 581 ctxt, NULL, XML_FROM_DTD, error, 582 XML_ERR_ERROR, NULL, 0, (const char *) str1, 583 (const char *) str2, NULL, 0, 0, 584 msg, (const char *) str1, (const char *) str2); 585 } 586 } 587 588 /** 589 * xmlFatalErrMsgInt: 590 * @ctxt: an XML parser context 591 * @error: the error number 592 * @msg: the error message 593 * @val: an integer value 594 * 595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 596 */ 597 static void 598 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 599 const char *msg, int val) 600 { 601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 602 (ctxt->instate == XML_PARSER_EOF)) 603 return; 604 if (ctxt != NULL) 605 ctxt->errNo = error; 606 __xmlRaiseError(NULL, NULL, NULL, 607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 609 if (ctxt != NULL) { 610 ctxt->wellFormed = 0; 611 if (ctxt->recovery == 0) 612 ctxt->disableSAX = 1; 613 } 614 } 615 616 /** 617 * xmlFatalErrMsgStrIntStr: 618 * @ctxt: an XML parser context 619 * @error: the error number 620 * @msg: the error message 621 * @str1: an string info 622 * @val: an integer value 623 * @str2: an string info 624 * 625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 626 */ 627 static void 628 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 629 const char *msg, const xmlChar *str1, int val, 630 const xmlChar *str2) 631 { 632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 633 (ctxt->instate == XML_PARSER_EOF)) 634 return; 635 if (ctxt != NULL) 636 ctxt->errNo = error; 637 __xmlRaiseError(NULL, NULL, NULL, 638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 639 NULL, 0, (const char *) str1, (const char *) str2, 640 NULL, val, 0, msg, str1, val, str2); 641 if (ctxt != NULL) { 642 ctxt->wellFormed = 0; 643 if (ctxt->recovery == 0) 644 ctxt->disableSAX = 1; 645 } 646 } 647 648 /** 649 * xmlFatalErrMsgStr: 650 * @ctxt: an XML parser context 651 * @error: the error number 652 * @msg: the error message 653 * @val: a string value 654 * 655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 656 */ 657 static void 658 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 659 const char *msg, const xmlChar * val) 660 { 661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 662 (ctxt->instate == XML_PARSER_EOF)) 663 return; 664 if (ctxt != NULL) 665 ctxt->errNo = error; 666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 667 XML_FROM_PARSER, error, XML_ERR_FATAL, 668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 669 val); 670 if (ctxt != NULL) { 671 ctxt->wellFormed = 0; 672 if (ctxt->recovery == 0) 673 ctxt->disableSAX = 1; 674 } 675 } 676 677 /** 678 * xmlErrMsgStr: 679 * @ctxt: an XML parser context 680 * @error: the error number 681 * @msg: the error message 682 * @val: a string value 683 * 684 * Handle a non fatal parser error 685 */ 686 static void 687 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 688 const char *msg, const xmlChar * val) 689 { 690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 691 (ctxt->instate == XML_PARSER_EOF)) 692 return; 693 if (ctxt != NULL) 694 ctxt->errNo = error; 695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 696 XML_FROM_PARSER, error, XML_ERR_ERROR, 697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 698 val); 699 } 700 701 /** 702 * xmlNsErr: 703 * @ctxt: an XML parser context 704 * @error: the error number 705 * @msg: the message 706 * @info1: extra information string 707 * @info2: extra information string 708 * 709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 710 */ 711 static void 712 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 713 const char *msg, 714 const xmlChar * info1, const xmlChar * info2, 715 const xmlChar * info3) 716 { 717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 718 (ctxt->instate == XML_PARSER_EOF)) 719 return; 720 if (ctxt != NULL) 721 ctxt->errNo = error; 722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 723 XML_ERR_ERROR, NULL, 0, (const char *) info1, 724 (const char *) info2, (const char *) info3, 0, 0, msg, 725 info1, info2, info3); 726 if (ctxt != NULL) 727 ctxt->nsWellFormed = 0; 728 } 729 730 /** 731 * xmlNsWarn 732 * @ctxt: an XML parser context 733 * @error: the error number 734 * @msg: the message 735 * @info1: extra information string 736 * @info2: extra information string 737 * 738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 739 */ 740 static void 741 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 742 const char *msg, 743 const xmlChar * info1, const xmlChar * info2, 744 const xmlChar * info3) 745 { 746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 747 (ctxt->instate == XML_PARSER_EOF)) 748 return; 749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 750 XML_ERR_WARNING, NULL, 0, (const char *) info1, 751 (const char *) info2, (const char *) info3, 0, 0, msg, 752 info1, info2, info3); 753 } 754 755 /************************************************************************ 756 * * 757 * Library wide options * 758 * * 759 ************************************************************************/ 760 761 /** 762 * xmlHasFeature: 763 * @feature: the feature to be examined 764 * 765 * Examines if the library has been compiled with a given feature. 766 * 767 * Returns a non-zero value if the feature exist, otherwise zero. 768 * Returns zero (0) if the feature does not exist or an unknown 769 * unknown feature is requested, non-zero otherwise. 770 */ 771 int 772 xmlHasFeature(xmlFeature feature) 773 { 774 switch (feature) { 775 case XML_WITH_THREAD: 776 #ifdef LIBXML_THREAD_ENABLED 777 return(1); 778 #else 779 return(0); 780 #endif 781 case XML_WITH_TREE: 782 #ifdef LIBXML_TREE_ENABLED 783 return(1); 784 #else 785 return(0); 786 #endif 787 case XML_WITH_OUTPUT: 788 #ifdef LIBXML_OUTPUT_ENABLED 789 return(1); 790 #else 791 return(0); 792 #endif 793 case XML_WITH_PUSH: 794 #ifdef LIBXML_PUSH_ENABLED 795 return(1); 796 #else 797 return(0); 798 #endif 799 case XML_WITH_READER: 800 #ifdef LIBXML_READER_ENABLED 801 return(1); 802 #else 803 return(0); 804 #endif 805 case XML_WITH_PATTERN: 806 #ifdef LIBXML_PATTERN_ENABLED 807 return(1); 808 #else 809 return(0); 810 #endif 811 case XML_WITH_WRITER: 812 #ifdef LIBXML_WRITER_ENABLED 813 return(1); 814 #else 815 return(0); 816 #endif 817 case XML_WITH_SAX1: 818 #ifdef LIBXML_SAX1_ENABLED 819 return(1); 820 #else 821 return(0); 822 #endif 823 case XML_WITH_FTP: 824 #ifdef LIBXML_FTP_ENABLED 825 return(1); 826 #else 827 return(0); 828 #endif 829 case XML_WITH_HTTP: 830 #ifdef LIBXML_HTTP_ENABLED 831 return(1); 832 #else 833 return(0); 834 #endif 835 case XML_WITH_VALID: 836 #ifdef LIBXML_VALID_ENABLED 837 return(1); 838 #else 839 return(0); 840 #endif 841 case XML_WITH_HTML: 842 #ifdef LIBXML_HTML_ENABLED 843 return(1); 844 #else 845 return(0); 846 #endif 847 case XML_WITH_LEGACY: 848 #ifdef LIBXML_LEGACY_ENABLED 849 return(1); 850 #else 851 return(0); 852 #endif 853 case XML_WITH_C14N: 854 #ifdef LIBXML_C14N_ENABLED 855 return(1); 856 #else 857 return(0); 858 #endif 859 case XML_WITH_CATALOG: 860 #ifdef LIBXML_CATALOG_ENABLED 861 return(1); 862 #else 863 return(0); 864 #endif 865 case XML_WITH_XPATH: 866 #ifdef LIBXML_XPATH_ENABLED 867 return(1); 868 #else 869 return(0); 870 #endif 871 case XML_WITH_XPTR: 872 #ifdef LIBXML_XPTR_ENABLED 873 return(1); 874 #else 875 return(0); 876 #endif 877 case XML_WITH_XINCLUDE: 878 #ifdef LIBXML_XINCLUDE_ENABLED 879 return(1); 880 #else 881 return(0); 882 #endif 883 case XML_WITH_ICONV: 884 #ifdef LIBXML_ICONV_ENABLED 885 return(1); 886 #else 887 return(0); 888 #endif 889 case XML_WITH_ISO8859X: 890 #ifdef LIBXML_ISO8859X_ENABLED 891 return(1); 892 #else 893 return(0); 894 #endif 895 case XML_WITH_UNICODE: 896 #ifdef LIBXML_UNICODE_ENABLED 897 return(1); 898 #else 899 return(0); 900 #endif 901 case XML_WITH_REGEXP: 902 #ifdef LIBXML_REGEXP_ENABLED 903 return(1); 904 #else 905 return(0); 906 #endif 907 case XML_WITH_AUTOMATA: 908 #ifdef LIBXML_AUTOMATA_ENABLED 909 return(1); 910 #else 911 return(0); 912 #endif 913 case XML_WITH_EXPR: 914 #ifdef LIBXML_EXPR_ENABLED 915 return(1); 916 #else 917 return(0); 918 #endif 919 case XML_WITH_SCHEMAS: 920 #ifdef LIBXML_SCHEMAS_ENABLED 921 return(1); 922 #else 923 return(0); 924 #endif 925 case XML_WITH_SCHEMATRON: 926 #ifdef LIBXML_SCHEMATRON_ENABLED 927 return(1); 928 #else 929 return(0); 930 #endif 931 case XML_WITH_MODULES: 932 #ifdef LIBXML_MODULES_ENABLED 933 return(1); 934 #else 935 return(0); 936 #endif 937 case XML_WITH_DEBUG: 938 #ifdef LIBXML_DEBUG_ENABLED 939 return(1); 940 #else 941 return(0); 942 #endif 943 case XML_WITH_DEBUG_MEM: 944 #ifdef DEBUG_MEMORY_LOCATION 945 return(1); 946 #else 947 return(0); 948 #endif 949 case XML_WITH_DEBUG_RUN: 950 #ifdef LIBXML_DEBUG_RUNTIME 951 return(1); 952 #else 953 return(0); 954 #endif 955 case XML_WITH_ZLIB: 956 #ifdef LIBXML_ZLIB_ENABLED 957 return(1); 958 #else 959 return(0); 960 #endif 961 case XML_WITH_LZMA: 962 #ifdef LIBXML_LZMA_ENABLED 963 return(1); 964 #else 965 return(0); 966 #endif 967 case XML_WITH_ICU: 968 #ifdef LIBXML_ICU_ENABLED 969 return(1); 970 #else 971 return(0); 972 #endif 973 default: 974 break; 975 } 976 return(0); 977 } 978 979 /************************************************************************ 980 * * 981 * SAX2 defaulted attributes handling * 982 * * 983 ************************************************************************/ 984 985 /** 986 * xmlDetectSAX2: 987 * @ctxt: an XML parser context 988 * 989 * Do the SAX2 detection and specific intialization 990 */ 991 static void 992 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 993 if (ctxt == NULL) return; 994 #ifdef LIBXML_SAX1_ENABLED 995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 996 ((ctxt->sax->startElementNs != NULL) || 997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 998 #else 999 ctxt->sax2 = 1; 1000 #endif /* LIBXML_SAX1_ENABLED */ 1001 1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1006 (ctxt->str_xml_ns == NULL)) { 1007 xmlErrMemory(ctxt, NULL); 1008 } 1009 } 1010 1011 typedef struct _xmlDefAttrs xmlDefAttrs; 1012 typedef xmlDefAttrs *xmlDefAttrsPtr; 1013 struct _xmlDefAttrs { 1014 int nbAttrs; /* number of defaulted attributes on that element */ 1015 int maxAttrs; /* the size of the array */ 1016 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1017 }; 1018 1019 /** 1020 * xmlAttrNormalizeSpace: 1021 * @src: the source string 1022 * @dst: the target string 1023 * 1024 * Normalize the space in non CDATA attribute values: 1025 * If the attribute type is not CDATA, then the XML processor MUST further 1026 * process the normalized attribute value by discarding any leading and 1027 * trailing space (#x20) characters, and by replacing sequences of space 1028 * (#x20) characters by a single space (#x20) character. 1029 * Note that the size of dst need to be at least src, and if one doesn't need 1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1031 * passing src as dst is just fine. 1032 * 1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1034 * is needed. 1035 */ 1036 static xmlChar * 1037 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1038 { 1039 if ((src == NULL) || (dst == NULL)) 1040 return(NULL); 1041 1042 while (*src == 0x20) src++; 1043 while (*src != 0) { 1044 if (*src == 0x20) { 1045 while (*src == 0x20) src++; 1046 if (*src != 0) 1047 *dst++ = 0x20; 1048 } else { 1049 *dst++ = *src++; 1050 } 1051 } 1052 *dst = 0; 1053 if (dst == src) 1054 return(NULL); 1055 return(dst); 1056 } 1057 1058 /** 1059 * xmlAttrNormalizeSpace2: 1060 * @src: the source string 1061 * 1062 * Normalize the space in non CDATA attribute values, a slightly more complex 1063 * front end to avoid allocation problems when running on attribute values 1064 * coming from the input. 1065 * 1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1067 * is needed. 1068 */ 1069 static const xmlChar * 1070 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1071 { 1072 int i; 1073 int remove_head = 0; 1074 int need_realloc = 0; 1075 const xmlChar *cur; 1076 1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1078 return(NULL); 1079 i = *len; 1080 if (i <= 0) 1081 return(NULL); 1082 1083 cur = src; 1084 while (*cur == 0x20) { 1085 cur++; 1086 remove_head++; 1087 } 1088 while (*cur != 0) { 1089 if (*cur == 0x20) { 1090 cur++; 1091 if ((*cur == 0x20) || (*cur == 0)) { 1092 need_realloc = 1; 1093 break; 1094 } 1095 } else 1096 cur++; 1097 } 1098 if (need_realloc) { 1099 xmlChar *ret; 1100 1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1102 if (ret == NULL) { 1103 xmlErrMemory(ctxt, NULL); 1104 return(NULL); 1105 } 1106 xmlAttrNormalizeSpace(ret, ret); 1107 *len = (int) strlen((const char *)ret); 1108 return(ret); 1109 } else if (remove_head) { 1110 *len -= remove_head; 1111 memmove(src, src + remove_head, 1 + *len); 1112 return(src); 1113 } 1114 return(NULL); 1115 } 1116 1117 /** 1118 * xmlAddDefAttrs: 1119 * @ctxt: an XML parser context 1120 * @fullname: the element fullname 1121 * @fullattr: the attribute fullname 1122 * @value: the attribute value 1123 * 1124 * Add a defaulted attribute for an element 1125 */ 1126 static void 1127 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1128 const xmlChar *fullname, 1129 const xmlChar *fullattr, 1130 const xmlChar *value) { 1131 xmlDefAttrsPtr defaults; 1132 int len; 1133 const xmlChar *name; 1134 const xmlChar *prefix; 1135 1136 /* 1137 * Allows to detect attribute redefinitions 1138 */ 1139 if (ctxt->attsSpecial != NULL) { 1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1141 return; 1142 } 1143 1144 if (ctxt->attsDefault == NULL) { 1145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1146 if (ctxt->attsDefault == NULL) 1147 goto mem_error; 1148 } 1149 1150 /* 1151 * split the element name into prefix:localname , the string found 1152 * are within the DTD and then not associated to namespace names. 1153 */ 1154 name = xmlSplitQName3(fullname, &len); 1155 if (name == NULL) { 1156 name = xmlDictLookup(ctxt->dict, fullname, -1); 1157 prefix = NULL; 1158 } else { 1159 name = xmlDictLookup(ctxt->dict, name, -1); 1160 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1161 } 1162 1163 /* 1164 * make sure there is some storage 1165 */ 1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1167 if (defaults == NULL) { 1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1169 (4 * 5) * sizeof(const xmlChar *)); 1170 if (defaults == NULL) 1171 goto mem_error; 1172 defaults->nbAttrs = 0; 1173 defaults->maxAttrs = 4; 1174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1175 defaults, NULL) < 0) { 1176 xmlFree(defaults); 1177 goto mem_error; 1178 } 1179 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1180 xmlDefAttrsPtr temp; 1181 1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1184 if (temp == NULL) 1185 goto mem_error; 1186 defaults = temp; 1187 defaults->maxAttrs *= 2; 1188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1189 defaults, NULL) < 0) { 1190 xmlFree(defaults); 1191 goto mem_error; 1192 } 1193 } 1194 1195 /* 1196 * Split the element name into prefix:localname , the string found 1197 * are within the DTD and hen not associated to namespace names. 1198 */ 1199 name = xmlSplitQName3(fullattr, &len); 1200 if (name == NULL) { 1201 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1202 prefix = NULL; 1203 } else { 1204 name = xmlDictLookup(ctxt->dict, name, -1); 1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1206 } 1207 1208 defaults->values[5 * defaults->nbAttrs] = name; 1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1210 /* intern the string and precompute the end */ 1211 len = xmlStrlen(value); 1212 value = xmlDictLookup(ctxt->dict, value, len); 1213 defaults->values[5 * defaults->nbAttrs + 2] = value; 1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1215 if (ctxt->external) 1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1217 else 1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1219 defaults->nbAttrs++; 1220 1221 return; 1222 1223 mem_error: 1224 xmlErrMemory(ctxt, NULL); 1225 return; 1226 } 1227 1228 /** 1229 * xmlAddSpecialAttr: 1230 * @ctxt: an XML parser context 1231 * @fullname: the element fullname 1232 * @fullattr: the attribute fullname 1233 * @type: the attribute type 1234 * 1235 * Register this attribute type 1236 */ 1237 static void 1238 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1239 const xmlChar *fullname, 1240 const xmlChar *fullattr, 1241 int type) 1242 { 1243 if (ctxt->attsSpecial == NULL) { 1244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1245 if (ctxt->attsSpecial == NULL) 1246 goto mem_error; 1247 } 1248 1249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1250 return; 1251 1252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1253 (void *) (long) type); 1254 return; 1255 1256 mem_error: 1257 xmlErrMemory(ctxt, NULL); 1258 return; 1259 } 1260 1261 /** 1262 * xmlCleanSpecialAttrCallback: 1263 * 1264 * Removes CDATA attributes from the special attribute table 1265 */ 1266 static void 1267 xmlCleanSpecialAttrCallback(void *payload, void *data, 1268 const xmlChar *fullname, const xmlChar *fullattr, 1269 const xmlChar *unused ATTRIBUTE_UNUSED) { 1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1271 1272 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1274 } 1275 } 1276 1277 /** 1278 * xmlCleanSpecialAttr: 1279 * @ctxt: an XML parser context 1280 * 1281 * Trim the list of attributes defined to remove all those of type 1282 * CDATA as they are not special. This call should be done when finishing 1283 * to parse the DTD and before starting to parse the document root. 1284 */ 1285 static void 1286 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1287 { 1288 if (ctxt->attsSpecial == NULL) 1289 return; 1290 1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1292 1293 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1294 xmlHashFree(ctxt->attsSpecial, NULL); 1295 ctxt->attsSpecial = NULL; 1296 } 1297 return; 1298 } 1299 1300 /** 1301 * xmlCheckLanguageID: 1302 * @lang: pointer to the string value 1303 * 1304 * Checks that the value conforms to the LanguageID production: 1305 * 1306 * NOTE: this is somewhat deprecated, those productions were removed from 1307 * the XML Second edition. 1308 * 1309 * [33] LanguageID ::= Langcode ('-' Subcode)* 1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1314 * [38] Subcode ::= ([a-z] | [A-Z])+ 1315 * 1316 * The current REC reference the sucessors of RFC 1766, currently 5646 1317 * 1318 * http://www.rfc-editor.org/rfc/rfc5646.txt 1319 * langtag = language 1320 * ["-" script] 1321 * ["-" region] 1322 * *("-" variant) 1323 * *("-" extension) 1324 * ["-" privateuse] 1325 * language = 2*3ALPHA ; shortest ISO 639 code 1326 * ["-" extlang] ; sometimes followed by 1327 * ; extended language subtags 1328 * / 4ALPHA ; or reserved for future use 1329 * / 5*8ALPHA ; or registered language subtag 1330 * 1331 * extlang = 3ALPHA ; selected ISO 639 codes 1332 * *2("-" 3ALPHA) ; permanently reserved 1333 * 1334 * script = 4ALPHA ; ISO 15924 code 1335 * 1336 * region = 2ALPHA ; ISO 3166-1 code 1337 * / 3DIGIT ; UN M.49 code 1338 * 1339 * variant = 5*8alphanum ; registered variants 1340 * / (DIGIT 3alphanum) 1341 * 1342 * extension = singleton 1*("-" (2*8alphanum)) 1343 * 1344 * ; Single alphanumerics 1345 * ; "x" reserved for private use 1346 * singleton = DIGIT ; 0 - 9 1347 * / %x41-57 ; A - W 1348 * / %x59-5A ; Y - Z 1349 * / %x61-77 ; a - w 1350 * / %x79-7A ; y - z 1351 * 1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1353 * The parser below doesn't try to cope with extension or privateuse 1354 * that could be added but that's not interoperable anyway 1355 * 1356 * Returns 1 if correct 0 otherwise 1357 **/ 1358 int 1359 xmlCheckLanguageID(const xmlChar * lang) 1360 { 1361 const xmlChar *cur = lang, *nxt; 1362 1363 if (cur == NULL) 1364 return (0); 1365 if (((cur[0] == 'i') && (cur[1] == '-')) || 1366 ((cur[0] == 'I') && (cur[1] == '-')) || 1367 ((cur[0] == 'x') && (cur[1] == '-')) || 1368 ((cur[0] == 'X') && (cur[1] == '-'))) { 1369 /* 1370 * Still allow IANA code and user code which were coming 1371 * from the previous version of the XML-1.0 specification 1372 * it's deprecated but we should not fail 1373 */ 1374 cur += 2; 1375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1376 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1377 cur++; 1378 return(cur[0] == 0); 1379 } 1380 nxt = cur; 1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1383 nxt++; 1384 if (nxt - cur >= 4) { 1385 /* 1386 * Reserved 1387 */ 1388 if ((nxt - cur > 8) || (nxt[0] != 0)) 1389 return(0); 1390 return(1); 1391 } 1392 if (nxt - cur < 2) 1393 return(0); 1394 /* we got an ISO 639 code */ 1395 if (nxt[0] == 0) 1396 return(1); 1397 if (nxt[0] != '-') 1398 return(0); 1399 1400 nxt++; 1401 cur = nxt; 1402 /* now we can have extlang or script or region or variant */ 1403 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1404 goto region_m49; 1405 1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1408 nxt++; 1409 if (nxt - cur == 4) 1410 goto script; 1411 if (nxt - cur == 2) 1412 goto region; 1413 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1414 goto variant; 1415 if (nxt - cur != 3) 1416 return(0); 1417 /* we parsed an extlang */ 1418 if (nxt[0] == 0) 1419 return(1); 1420 if (nxt[0] != '-') 1421 return(0); 1422 1423 nxt++; 1424 cur = nxt; 1425 /* now we can have script or region or variant */ 1426 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1427 goto region_m49; 1428 1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1431 nxt++; 1432 if (nxt - cur == 2) 1433 goto region; 1434 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1435 goto variant; 1436 if (nxt - cur != 4) 1437 return(0); 1438 /* we parsed a script */ 1439 script: 1440 if (nxt[0] == 0) 1441 return(1); 1442 if (nxt[0] != '-') 1443 return(0); 1444 1445 nxt++; 1446 cur = nxt; 1447 /* now we can have region or variant */ 1448 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1449 goto region_m49; 1450 1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1453 nxt++; 1454 1455 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1456 goto variant; 1457 if (nxt - cur != 2) 1458 return(0); 1459 /* we parsed a region */ 1460 region: 1461 if (nxt[0] == 0) 1462 return(1); 1463 if (nxt[0] != '-') 1464 return(0); 1465 1466 nxt++; 1467 cur = nxt; 1468 /* now we can just have a variant */ 1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1471 nxt++; 1472 1473 if ((nxt - cur < 5) || (nxt - cur > 8)) 1474 return(0); 1475 1476 /* we parsed a variant */ 1477 variant: 1478 if (nxt[0] == 0) 1479 return(1); 1480 if (nxt[0] != '-') 1481 return(0); 1482 /* extensions and private use subtags not checked */ 1483 return (1); 1484 1485 region_m49: 1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1488 nxt += 3; 1489 goto region; 1490 } 1491 return(0); 1492 } 1493 1494 /************************************************************************ 1495 * * 1496 * Parser stacks related functions and macros * 1497 * * 1498 ************************************************************************/ 1499 1500 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1501 const xmlChar ** str); 1502 1503 #ifdef SAX2 1504 /** 1505 * nsPush: 1506 * @ctxt: an XML parser context 1507 * @prefix: the namespace prefix or NULL 1508 * @URL: the namespace name 1509 * 1510 * Pushes a new parser namespace on top of the ns stack 1511 * 1512 * Returns -1 in case of error, -2 if the namespace should be discarded 1513 * and the index in the stack otherwise. 1514 */ 1515 static int 1516 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1517 { 1518 if (ctxt->options & XML_PARSE_NSCLEAN) { 1519 int i; 1520 for (i = 0;i < ctxt->nsNr;i += 2) { 1521 if (ctxt->nsTab[i] == prefix) { 1522 /* in scope */ 1523 if (ctxt->nsTab[i + 1] == URL) 1524 return(-2); 1525 /* out of scope keep it */ 1526 break; 1527 } 1528 } 1529 } 1530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1531 ctxt->nsMax = 10; 1532 ctxt->nsNr = 0; 1533 ctxt->nsTab = (const xmlChar **) 1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1535 if (ctxt->nsTab == NULL) { 1536 xmlErrMemory(ctxt, NULL); 1537 ctxt->nsMax = 0; 1538 return (-1); 1539 } 1540 } else if (ctxt->nsNr >= ctxt->nsMax) { 1541 const xmlChar ** tmp; 1542 ctxt->nsMax *= 2; 1543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1544 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1545 if (tmp == NULL) { 1546 xmlErrMemory(ctxt, NULL); 1547 ctxt->nsMax /= 2; 1548 return (-1); 1549 } 1550 ctxt->nsTab = tmp; 1551 } 1552 ctxt->nsTab[ctxt->nsNr++] = prefix; 1553 ctxt->nsTab[ctxt->nsNr++] = URL; 1554 return (ctxt->nsNr); 1555 } 1556 /** 1557 * nsPop: 1558 * @ctxt: an XML parser context 1559 * @nr: the number to pop 1560 * 1561 * Pops the top @nr parser prefix/namespace from the ns stack 1562 * 1563 * Returns the number of namespaces removed 1564 */ 1565 static int 1566 nsPop(xmlParserCtxtPtr ctxt, int nr) 1567 { 1568 int i; 1569 1570 if (ctxt->nsTab == NULL) return(0); 1571 if (ctxt->nsNr < nr) { 1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1573 nr = ctxt->nsNr; 1574 } 1575 if (ctxt->nsNr <= 0) 1576 return (0); 1577 1578 for (i = 0;i < nr;i++) { 1579 ctxt->nsNr--; 1580 ctxt->nsTab[ctxt->nsNr] = NULL; 1581 } 1582 return(nr); 1583 } 1584 #endif 1585 1586 static int 1587 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1588 const xmlChar **atts; 1589 int *attallocs; 1590 int maxatts; 1591 1592 if (ctxt->atts == NULL) { 1593 maxatts = 55; /* allow for 10 attrs by default */ 1594 atts = (const xmlChar **) 1595 xmlMalloc(maxatts * sizeof(xmlChar *)); 1596 if (atts == NULL) goto mem_error; 1597 ctxt->atts = atts; 1598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1599 if (attallocs == NULL) goto mem_error; 1600 ctxt->attallocs = attallocs; 1601 ctxt->maxatts = maxatts; 1602 } else if (nr + 5 > ctxt->maxatts) { 1603 maxatts = (nr + 5) * 2; 1604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1605 maxatts * sizeof(const xmlChar *)); 1606 if (atts == NULL) goto mem_error; 1607 ctxt->atts = atts; 1608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1609 (maxatts / 5) * sizeof(int)); 1610 if (attallocs == NULL) goto mem_error; 1611 ctxt->attallocs = attallocs; 1612 ctxt->maxatts = maxatts; 1613 } 1614 return(ctxt->maxatts); 1615 mem_error: 1616 xmlErrMemory(ctxt, NULL); 1617 return(-1); 1618 } 1619 1620 /** 1621 * inputPush: 1622 * @ctxt: an XML parser context 1623 * @value: the parser input 1624 * 1625 * Pushes a new parser input on top of the input stack 1626 * 1627 * Returns -1 in case of error, the index in the stack otherwise 1628 */ 1629 int 1630 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1631 { 1632 if ((ctxt == NULL) || (value == NULL)) 1633 return(-1); 1634 if (ctxt->inputNr >= ctxt->inputMax) { 1635 ctxt->inputMax *= 2; 1636 ctxt->inputTab = 1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1638 ctxt->inputMax * 1639 sizeof(ctxt->inputTab[0])); 1640 if (ctxt->inputTab == NULL) { 1641 xmlErrMemory(ctxt, NULL); 1642 xmlFreeInputStream(value); 1643 ctxt->inputMax /= 2; 1644 value = NULL; 1645 return (-1); 1646 } 1647 } 1648 ctxt->inputTab[ctxt->inputNr] = value; 1649 ctxt->input = value; 1650 return (ctxt->inputNr++); 1651 } 1652 /** 1653 * inputPop: 1654 * @ctxt: an XML parser context 1655 * 1656 * Pops the top parser input from the input stack 1657 * 1658 * Returns the input just removed 1659 */ 1660 xmlParserInputPtr 1661 inputPop(xmlParserCtxtPtr ctxt) 1662 { 1663 xmlParserInputPtr ret; 1664 1665 if (ctxt == NULL) 1666 return(NULL); 1667 if (ctxt->inputNr <= 0) 1668 return (NULL); 1669 ctxt->inputNr--; 1670 if (ctxt->inputNr > 0) 1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1672 else 1673 ctxt->input = NULL; 1674 ret = ctxt->inputTab[ctxt->inputNr]; 1675 ctxt->inputTab[ctxt->inputNr] = NULL; 1676 return (ret); 1677 } 1678 /** 1679 * nodePush: 1680 * @ctxt: an XML parser context 1681 * @value: the element node 1682 * 1683 * Pushes a new element node on top of the node stack 1684 * 1685 * Returns -1 in case of error, the index in the stack otherwise 1686 */ 1687 int 1688 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1689 { 1690 if (ctxt == NULL) return(0); 1691 if (ctxt->nodeNr >= ctxt->nodeMax) { 1692 xmlNodePtr *tmp; 1693 1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1695 ctxt->nodeMax * 2 * 1696 sizeof(ctxt->nodeTab[0])); 1697 if (tmp == NULL) { 1698 xmlErrMemory(ctxt, NULL); 1699 return (-1); 1700 } 1701 ctxt->nodeTab = tmp; 1702 ctxt->nodeMax *= 2; 1703 } 1704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1708 xmlParserMaxDepth); 1709 ctxt->instate = XML_PARSER_EOF; 1710 return(-1); 1711 } 1712 ctxt->nodeTab[ctxt->nodeNr] = value; 1713 ctxt->node = value; 1714 return (ctxt->nodeNr++); 1715 } 1716 1717 /** 1718 * nodePop: 1719 * @ctxt: an XML parser context 1720 * 1721 * Pops the top element node from the node stack 1722 * 1723 * Returns the node just removed 1724 */ 1725 xmlNodePtr 1726 nodePop(xmlParserCtxtPtr ctxt) 1727 { 1728 xmlNodePtr ret; 1729 1730 if (ctxt == NULL) return(NULL); 1731 if (ctxt->nodeNr <= 0) 1732 return (NULL); 1733 ctxt->nodeNr--; 1734 if (ctxt->nodeNr > 0) 1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1736 else 1737 ctxt->node = NULL; 1738 ret = ctxt->nodeTab[ctxt->nodeNr]; 1739 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1740 return (ret); 1741 } 1742 1743 #ifdef LIBXML_PUSH_ENABLED 1744 /** 1745 * nameNsPush: 1746 * @ctxt: an XML parser context 1747 * @value: the element name 1748 * @prefix: the element prefix 1749 * @URI: the element namespace name 1750 * 1751 * Pushes a new element name/prefix/URL on top of the name stack 1752 * 1753 * Returns -1 in case of error, the index in the stack otherwise 1754 */ 1755 static int 1756 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1757 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1758 { 1759 if (ctxt->nameNr >= ctxt->nameMax) { 1760 const xmlChar * *tmp; 1761 void **tmp2; 1762 ctxt->nameMax *= 2; 1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1764 ctxt->nameMax * 1765 sizeof(ctxt->nameTab[0])); 1766 if (tmp == NULL) { 1767 ctxt->nameMax /= 2; 1768 goto mem_error; 1769 } 1770 ctxt->nameTab = tmp; 1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1772 ctxt->nameMax * 3 * 1773 sizeof(ctxt->pushTab[0])); 1774 if (tmp2 == NULL) { 1775 ctxt->nameMax /= 2; 1776 goto mem_error; 1777 } 1778 ctxt->pushTab = tmp2; 1779 } 1780 ctxt->nameTab[ctxt->nameNr] = value; 1781 ctxt->name = value; 1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1785 return (ctxt->nameNr++); 1786 mem_error: 1787 xmlErrMemory(ctxt, NULL); 1788 return (-1); 1789 } 1790 /** 1791 * nameNsPop: 1792 * @ctxt: an XML parser context 1793 * 1794 * Pops the top element/prefix/URI name from the name stack 1795 * 1796 * Returns the name just removed 1797 */ 1798 static const xmlChar * 1799 nameNsPop(xmlParserCtxtPtr ctxt) 1800 { 1801 const xmlChar *ret; 1802 1803 if (ctxt->nameNr <= 0) 1804 return (NULL); 1805 ctxt->nameNr--; 1806 if (ctxt->nameNr > 0) 1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1808 else 1809 ctxt->name = NULL; 1810 ret = ctxt->nameTab[ctxt->nameNr]; 1811 ctxt->nameTab[ctxt->nameNr] = NULL; 1812 return (ret); 1813 } 1814 #endif /* LIBXML_PUSH_ENABLED */ 1815 1816 /** 1817 * namePush: 1818 * @ctxt: an XML parser context 1819 * @value: the element name 1820 * 1821 * Pushes a new element name on top of the name stack 1822 * 1823 * Returns -1 in case of error, the index in the stack otherwise 1824 */ 1825 int 1826 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1827 { 1828 if (ctxt == NULL) return (-1); 1829 1830 if (ctxt->nameNr >= ctxt->nameMax) { 1831 const xmlChar * *tmp; 1832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1833 ctxt->nameMax * 2 * 1834 sizeof(ctxt->nameTab[0])); 1835 if (tmp == NULL) { 1836 goto mem_error; 1837 } 1838 ctxt->nameTab = tmp; 1839 ctxt->nameMax *= 2; 1840 } 1841 ctxt->nameTab[ctxt->nameNr] = value; 1842 ctxt->name = value; 1843 return (ctxt->nameNr++); 1844 mem_error: 1845 xmlErrMemory(ctxt, NULL); 1846 return (-1); 1847 } 1848 /** 1849 * namePop: 1850 * @ctxt: an XML parser context 1851 * 1852 * Pops the top element name from the name stack 1853 * 1854 * Returns the name just removed 1855 */ 1856 const xmlChar * 1857 namePop(xmlParserCtxtPtr ctxt) 1858 { 1859 const xmlChar *ret; 1860 1861 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1862 return (NULL); 1863 ctxt->nameNr--; 1864 if (ctxt->nameNr > 0) 1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1866 else 1867 ctxt->name = NULL; 1868 ret = ctxt->nameTab[ctxt->nameNr]; 1869 ctxt->nameTab[ctxt->nameNr] = NULL; 1870 return (ret); 1871 } 1872 1873 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1874 if (ctxt->spaceNr >= ctxt->spaceMax) { 1875 int *tmp; 1876 1877 ctxt->spaceMax *= 2; 1878 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1880 if (tmp == NULL) { 1881 xmlErrMemory(ctxt, NULL); 1882 ctxt->spaceMax /=2; 1883 return(-1); 1884 } 1885 ctxt->spaceTab = tmp; 1886 } 1887 ctxt->spaceTab[ctxt->spaceNr] = val; 1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1889 return(ctxt->spaceNr++); 1890 } 1891 1892 static int spacePop(xmlParserCtxtPtr ctxt) { 1893 int ret; 1894 if (ctxt->spaceNr <= 0) return(0); 1895 ctxt->spaceNr--; 1896 if (ctxt->spaceNr > 0) 1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1898 else 1899 ctxt->space = &ctxt->spaceTab[0]; 1900 ret = ctxt->spaceTab[ctxt->spaceNr]; 1901 ctxt->spaceTab[ctxt->spaceNr] = -1; 1902 return(ret); 1903 } 1904 1905 /* 1906 * Macros for accessing the content. Those should be used only by the parser, 1907 * and not exported. 1908 * 1909 * Dirty macros, i.e. one often need to make assumption on the context to 1910 * use them 1911 * 1912 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1913 * To be used with extreme caution since operations consuming 1914 * characters may move the input buffer to a different location ! 1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1916 * This should be used internally by the parser 1917 * only to compare to ASCII values otherwise it would break when 1918 * running with UTF-8 encoding. 1919 * RAW same as CUR but in the input buffer, bypass any token 1920 * extraction that may have been done 1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1922 * to compare on ASCII based substring. 1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1924 * strings without newlines within the parser. 1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1926 * defined char within the parser. 1927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1928 * 1929 * NEXT Skip to the next character, this does the proper decoding 1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1931 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1932 * CUR_CHAR(l) returns the current unicode character (int), set l 1933 * to the number of xmlChars used for the encoding [0-5]. 1934 * CUR_SCHAR same but operate on a string instead of the context 1935 * COPY_BUF copy the current unicode char to the target buffer, increment 1936 * the index 1937 * GROW, SHRINK handling of input buffers 1938 */ 1939 1940 #define RAW (*ctxt->input->cur) 1941 #define CUR (*ctxt->input->cur) 1942 #define NXT(val) ctxt->input->cur[(val)] 1943 #define CUR_PTR ctxt->input->cur 1944 1945 #define CMP4( s, c1, c2, c3, c4 ) \ 1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1948 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1950 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1952 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1954 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1956 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1958 ((unsigned char *) s)[ 8 ] == c9 ) 1959 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1961 ((unsigned char *) s)[ 9 ] == c10 ) 1962 1963 #define SKIP(val) do { \ 1964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1966 if ((*ctxt->input->cur == 0) && \ 1967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1968 xmlPopInput(ctxt); \ 1969 } while (0) 1970 1971 #define SKIPL(val) do { \ 1972 int skipl; \ 1973 for(skipl=0; skipl<val; skipl++) { \ 1974 if (*(ctxt->input->cur) == '\n') { \ 1975 ctxt->input->line++; ctxt->input->col = 1; \ 1976 } else ctxt->input->col++; \ 1977 ctxt->nbChars++; \ 1978 ctxt->input->cur++; \ 1979 } \ 1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1981 if ((*ctxt->input->cur == 0) && \ 1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1983 xmlPopInput(ctxt); \ 1984 } while (0) 1985 1986 #define SHRINK if ((ctxt->progressive == 0) && \ 1987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1989 xmlSHRINK (ctxt); 1990 1991 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1992 xmlParserInputShrink(ctxt->input); 1993 if ((*ctxt->input->cur == 0) && 1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1995 xmlPopInput(ctxt); 1996 } 1997 1998 #define GROW if ((ctxt->progressive == 0) && \ 1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2000 xmlGROW (ctxt); 2001 2002 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2006 xmlPopInput(ctxt); 2007 } 2008 2009 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2010 2011 #define NEXT xmlNextChar(ctxt) 2012 2013 #define NEXT1 { \ 2014 ctxt->input->col++; \ 2015 ctxt->input->cur++; \ 2016 ctxt->nbChars++; \ 2017 if (*ctxt->input->cur == 0) \ 2018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2019 } 2020 2021 #define NEXTL(l) do { \ 2022 if (*(ctxt->input->cur) == '\n') { \ 2023 ctxt->input->line++; ctxt->input->col = 1; \ 2024 } else ctxt->input->col++; \ 2025 ctxt->input->cur += l; \ 2026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2027 } while (0) 2028 2029 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2030 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2031 2032 #define COPY_BUF(l,b,i,v) \ 2033 if (l == 1) b[i++] = (xmlChar) v; \ 2034 else i += xmlCopyCharMultiByte(&b[i],v) 2035 2036 /** 2037 * xmlSkipBlankChars: 2038 * @ctxt: the XML parser context 2039 * 2040 * skip all blanks character found at that point in the input streams. 2041 * It pops up finished entities in the process if allowable at that point. 2042 * 2043 * Returns the number of space chars skipped 2044 */ 2045 2046 int 2047 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2048 int res = 0; 2049 2050 /* 2051 * It's Okay to use CUR/NEXT here since all the blanks are on 2052 * the ASCII range. 2053 */ 2054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2055 const xmlChar *cur; 2056 /* 2057 * if we are in the document content, go really fast 2058 */ 2059 cur = ctxt->input->cur; 2060 while (IS_BLANK_CH(*cur)) { 2061 if (*cur == '\n') { 2062 ctxt->input->line++; ctxt->input->col = 1; 2063 } 2064 cur++; 2065 res++; 2066 if (*cur == 0) { 2067 ctxt->input->cur = cur; 2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2069 cur = ctxt->input->cur; 2070 } 2071 } 2072 ctxt->input->cur = cur; 2073 } else { 2074 int cur; 2075 do { 2076 cur = CUR; 2077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2078 NEXT; 2079 cur = CUR; 2080 res++; 2081 } 2082 while ((cur == 0) && (ctxt->inputNr > 1) && 2083 (ctxt->instate != XML_PARSER_COMMENT)) { 2084 xmlPopInput(ctxt); 2085 cur = CUR; 2086 } 2087 /* 2088 * Need to handle support of entities branching here 2089 */ 2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2092 } 2093 return(res); 2094 } 2095 2096 /************************************************************************ 2097 * * 2098 * Commodity functions to handle entities * 2099 * * 2100 ************************************************************************/ 2101 2102 /** 2103 * xmlPopInput: 2104 * @ctxt: an XML parser context 2105 * 2106 * xmlPopInput: the current input pointed by ctxt->input came to an end 2107 * pop it and return the next char. 2108 * 2109 * Returns the current xmlChar in the parser context 2110 */ 2111 xmlChar 2112 xmlPopInput(xmlParserCtxtPtr ctxt) { 2113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2114 if (xmlParserDebugEntities) 2115 xmlGenericError(xmlGenericErrorContext, 2116 "Popping input %d\n", ctxt->inputNr); 2117 xmlFreeInputStream(inputPop(ctxt)); 2118 if ((*ctxt->input->cur == 0) && 2119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2120 return(xmlPopInput(ctxt)); 2121 return(CUR); 2122 } 2123 2124 /** 2125 * xmlPushInput: 2126 * @ctxt: an XML parser context 2127 * @input: an XML parser input fragment (entity, XML fragment ...). 2128 * 2129 * xmlPushInput: switch to a new input stream which is stacked on top 2130 * of the previous one(s). 2131 * Returns -1 in case of error or the index in the input stack 2132 */ 2133 int 2134 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2135 int ret; 2136 if (input == NULL) return(-1); 2137 2138 if (xmlParserDebugEntities) { 2139 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2140 xmlGenericError(xmlGenericErrorContext, 2141 "%s(%d): ", ctxt->input->filename, 2142 ctxt->input->line); 2143 xmlGenericError(xmlGenericErrorContext, 2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2145 } 2146 ret = inputPush(ctxt, input); 2147 GROW; 2148 return(ret); 2149 } 2150 2151 /** 2152 * xmlParseCharRef: 2153 * @ctxt: an XML parser context 2154 * 2155 * parse Reference declarations 2156 * 2157 * [66] CharRef ::= '&#' [0-9]+ ';' | 2158 * '&#x' [0-9a-fA-F]+ ';' 2159 * 2160 * [ WFC: Legal Character ] 2161 * Characters referred to using character references must match the 2162 * production for Char. 2163 * 2164 * Returns the value parsed (as an int), 0 in case of error 2165 */ 2166 int 2167 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2168 unsigned int val = 0; 2169 int count = 0; 2170 unsigned int outofrange = 0; 2171 2172 /* 2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2174 */ 2175 if ((RAW == '&') && (NXT(1) == '#') && 2176 (NXT(2) == 'x')) { 2177 SKIP(3); 2178 GROW; 2179 while (RAW != ';') { /* loop blocked by count */ 2180 if (count++ > 20) { 2181 count = 0; 2182 GROW; 2183 } 2184 if ((RAW >= '0') && (RAW <= '9')) 2185 val = val * 16 + (CUR - '0'); 2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2187 val = val * 16 + (CUR - 'a') + 10; 2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2189 val = val * 16 + (CUR - 'A') + 10; 2190 else { 2191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2192 val = 0; 2193 break; 2194 } 2195 if (val > 0x10FFFF) 2196 outofrange = val; 2197 2198 NEXT; 2199 count++; 2200 } 2201 if (RAW == ';') { 2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2203 ctxt->input->col++; 2204 ctxt->nbChars ++; 2205 ctxt->input->cur++; 2206 } 2207 } else if ((RAW == '&') && (NXT(1) == '#')) { 2208 SKIP(2); 2209 GROW; 2210 while (RAW != ';') { /* loop blocked by count */ 2211 if (count++ > 20) { 2212 count = 0; 2213 GROW; 2214 } 2215 if ((RAW >= '0') && (RAW <= '9')) 2216 val = val * 10 + (CUR - '0'); 2217 else { 2218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2219 val = 0; 2220 break; 2221 } 2222 if (val > 0x10FFFF) 2223 outofrange = val; 2224 2225 NEXT; 2226 count++; 2227 } 2228 if (RAW == ';') { 2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2230 ctxt->input->col++; 2231 ctxt->nbChars ++; 2232 ctxt->input->cur++; 2233 } 2234 } else { 2235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2236 } 2237 2238 /* 2239 * [ WFC: Legal Character ] 2240 * Characters referred to using character references must match the 2241 * production for Char. 2242 */ 2243 if ((IS_CHAR(val) && (outofrange == 0))) { 2244 return(val); 2245 } else { 2246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2247 "xmlParseCharRef: invalid xmlChar value %d\n", 2248 val); 2249 } 2250 return(0); 2251 } 2252 2253 /** 2254 * xmlParseStringCharRef: 2255 * @ctxt: an XML parser context 2256 * @str: a pointer to an index in the string 2257 * 2258 * parse Reference declarations, variant parsing from a string rather 2259 * than an an input flow. 2260 * 2261 * [66] CharRef ::= '&#' [0-9]+ ';' | 2262 * '&#x' [0-9a-fA-F]+ ';' 2263 * 2264 * [ WFC: Legal Character ] 2265 * Characters referred to using character references must match the 2266 * production for Char. 2267 * 2268 * Returns the value parsed (as an int), 0 in case of error, str will be 2269 * updated to the current value of the index 2270 */ 2271 static int 2272 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2273 const xmlChar *ptr; 2274 xmlChar cur; 2275 unsigned int val = 0; 2276 unsigned int outofrange = 0; 2277 2278 if ((str == NULL) || (*str == NULL)) return(0); 2279 ptr = *str; 2280 cur = *ptr; 2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2282 ptr += 3; 2283 cur = *ptr; 2284 while (cur != ';') { /* Non input consuming loop */ 2285 if ((cur >= '0') && (cur <= '9')) 2286 val = val * 16 + (cur - '0'); 2287 else if ((cur >= 'a') && (cur <= 'f')) 2288 val = val * 16 + (cur - 'a') + 10; 2289 else if ((cur >= 'A') && (cur <= 'F')) 2290 val = val * 16 + (cur - 'A') + 10; 2291 else { 2292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2293 val = 0; 2294 break; 2295 } 2296 if (val > 0x10FFFF) 2297 outofrange = val; 2298 2299 ptr++; 2300 cur = *ptr; 2301 } 2302 if (cur == ';') 2303 ptr++; 2304 } else if ((cur == '&') && (ptr[1] == '#')){ 2305 ptr += 2; 2306 cur = *ptr; 2307 while (cur != ';') { /* Non input consuming loops */ 2308 if ((cur >= '0') && (cur <= '9')) 2309 val = val * 10 + (cur - '0'); 2310 else { 2311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2312 val = 0; 2313 break; 2314 } 2315 if (val > 0x10FFFF) 2316 outofrange = val; 2317 2318 ptr++; 2319 cur = *ptr; 2320 } 2321 if (cur == ';') 2322 ptr++; 2323 } else { 2324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2325 return(0); 2326 } 2327 *str = ptr; 2328 2329 /* 2330 * [ WFC: Legal Character ] 2331 * Characters referred to using character references must match the 2332 * production for Char. 2333 */ 2334 if ((IS_CHAR(val) && (outofrange == 0))) { 2335 return(val); 2336 } else { 2337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2338 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2339 val); 2340 } 2341 return(0); 2342 } 2343 2344 /** 2345 * xmlNewBlanksWrapperInputStream: 2346 * @ctxt: an XML parser context 2347 * @entity: an Entity pointer 2348 * 2349 * Create a new input stream for wrapping 2350 * blanks around a PEReference 2351 * 2352 * Returns the new input stream or NULL 2353 */ 2354 2355 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2356 2357 static xmlParserInputPtr 2358 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2359 xmlParserInputPtr input; 2360 xmlChar *buffer; 2361 size_t length; 2362 if (entity == NULL) { 2363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2364 "xmlNewBlanksWrapperInputStream entity\n"); 2365 return(NULL); 2366 } 2367 if (xmlParserDebugEntities) 2368 xmlGenericError(xmlGenericErrorContext, 2369 "new blanks wrapper for entity: %s\n", entity->name); 2370 input = xmlNewInputStream(ctxt); 2371 if (input == NULL) { 2372 return(NULL); 2373 } 2374 length = xmlStrlen(entity->name) + 5; 2375 buffer = xmlMallocAtomic(length); 2376 if (buffer == NULL) { 2377 xmlErrMemory(ctxt, NULL); 2378 xmlFree(input); 2379 return(NULL); 2380 } 2381 buffer [0] = ' '; 2382 buffer [1] = '%'; 2383 buffer [length-3] = ';'; 2384 buffer [length-2] = ' '; 2385 buffer [length-1] = 0; 2386 memcpy(buffer + 2, entity->name, length - 5); 2387 input->free = deallocblankswrapper; 2388 input->base = buffer; 2389 input->cur = buffer; 2390 input->length = length; 2391 input->end = &buffer[length]; 2392 return(input); 2393 } 2394 2395 /** 2396 * xmlParserHandlePEReference: 2397 * @ctxt: the parser context 2398 * 2399 * [69] PEReference ::= '%' Name ';' 2400 * 2401 * [ WFC: No Recursion ] 2402 * A parsed entity must not contain a recursive 2403 * reference to itself, either directly or indirectly. 2404 * 2405 * [ WFC: Entity Declared ] 2406 * In a document without any DTD, a document with only an internal DTD 2407 * subset which contains no parameter entity references, or a document 2408 * with "standalone='yes'", ... ... The declaration of a parameter 2409 * entity must precede any reference to it... 2410 * 2411 * [ VC: Entity Declared ] 2412 * In a document with an external subset or external parameter entities 2413 * with "standalone='no'", ... ... The declaration of a parameter entity 2414 * must precede any reference to it... 2415 * 2416 * [ WFC: In DTD ] 2417 * Parameter-entity references may only appear in the DTD. 2418 * NOTE: misleading but this is handled. 2419 * 2420 * A PEReference may have been detected in the current input stream 2421 * the handling is done accordingly to 2422 * http://www.w3.org/TR/REC-xml#entproc 2423 * i.e. 2424 * - Included in literal in entity values 2425 * - Included as Parameter Entity reference within DTDs 2426 */ 2427 void 2428 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2429 const xmlChar *name; 2430 xmlEntityPtr entity = NULL; 2431 xmlParserInputPtr input; 2432 2433 if (RAW != '%') return; 2434 switch(ctxt->instate) { 2435 case XML_PARSER_CDATA_SECTION: 2436 return; 2437 case XML_PARSER_COMMENT: 2438 return; 2439 case XML_PARSER_START_TAG: 2440 return; 2441 case XML_PARSER_END_TAG: 2442 return; 2443 case XML_PARSER_EOF: 2444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2445 return; 2446 case XML_PARSER_PROLOG: 2447 case XML_PARSER_START: 2448 case XML_PARSER_MISC: 2449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2450 return; 2451 case XML_PARSER_ENTITY_DECL: 2452 case XML_PARSER_CONTENT: 2453 case XML_PARSER_ATTRIBUTE_VALUE: 2454 case XML_PARSER_PI: 2455 case XML_PARSER_SYSTEM_LITERAL: 2456 case XML_PARSER_PUBLIC_LITERAL: 2457 /* we just ignore it there */ 2458 return; 2459 case XML_PARSER_EPILOG: 2460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2461 return; 2462 case XML_PARSER_ENTITY_VALUE: 2463 /* 2464 * NOTE: in the case of entity values, we don't do the 2465 * substitution here since we need the literal 2466 * entity value to be able to save the internal 2467 * subset of the document. 2468 * This will be handled by xmlStringDecodeEntities 2469 */ 2470 return; 2471 case XML_PARSER_DTD: 2472 /* 2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2474 * In the internal DTD subset, parameter-entity references 2475 * can occur only where markup declarations can occur, not 2476 * within markup declarations. 2477 * In that case this is handled in xmlParseMarkupDecl 2478 */ 2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2480 return; 2481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2482 return; 2483 break; 2484 case XML_PARSER_IGNORE: 2485 return; 2486 } 2487 2488 NEXT; 2489 name = xmlParseName(ctxt); 2490 if (xmlParserDebugEntities) 2491 xmlGenericError(xmlGenericErrorContext, 2492 "PEReference: %s\n", name); 2493 if (name == NULL) { 2494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2495 } else { 2496 if (RAW == ';') { 2497 NEXT; 2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2500 if (entity == NULL) { 2501 2502 /* 2503 * [ WFC: Entity Declared ] 2504 * In a document without any DTD, a document with only an 2505 * internal DTD subset which contains no parameter entity 2506 * references, or a document with "standalone='yes'", ... 2507 * ... The declaration of a parameter entity must precede 2508 * any reference to it... 2509 */ 2510 if ((ctxt->standalone == 1) || 2511 ((ctxt->hasExternalSubset == 0) && 2512 (ctxt->hasPErefs == 0))) { 2513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2514 "PEReference: %%%s; not found\n", name); 2515 } else { 2516 /* 2517 * [ VC: Entity Declared ] 2518 * In a document with an external subset or external 2519 * parameter entities with "standalone='no'", ... 2520 * ... The declaration of a parameter entity must precede 2521 * any reference to it... 2522 */ 2523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2525 "PEReference: %%%s; not found\n", 2526 name, NULL); 2527 } else 2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2529 "PEReference: %%%s; not found\n", 2530 name, NULL); 2531 ctxt->valid = 0; 2532 } 2533 } else if (ctxt->input->free != deallocblankswrapper) { 2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2535 if (xmlPushInput(ctxt, input) < 0) 2536 return; 2537 } else { 2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2540 xmlChar start[4]; 2541 xmlCharEncoding enc; 2542 2543 /* 2544 * handle the extra spaces added before and after 2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2546 * this is done independently. 2547 */ 2548 input = xmlNewEntityInputStream(ctxt, entity); 2549 if (xmlPushInput(ctxt, input) < 0) 2550 return; 2551 2552 /* 2553 * Get the 4 first bytes and decode the charset 2554 * if enc != XML_CHAR_ENCODING_NONE 2555 * plug some encoding conversion routines. 2556 * Note that, since we may have some non-UTF8 2557 * encoding (like UTF16, bug 135229), the 'length' 2558 * is not known, but we can calculate based upon 2559 * the amount of data in the buffer. 2560 */ 2561 GROW 2562 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2563 start[0] = RAW; 2564 start[1] = NXT(1); 2565 start[2] = NXT(2); 2566 start[3] = NXT(3); 2567 enc = xmlDetectCharEncoding(start, 4); 2568 if (enc != XML_CHAR_ENCODING_NONE) { 2569 xmlSwitchEncoding(ctxt, enc); 2570 } 2571 } 2572 2573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2575 (IS_BLANK_CH(NXT(5)))) { 2576 xmlParseTextDecl(ctxt); 2577 } 2578 } else { 2579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2580 "PEReference: %s is not a parameter entity\n", 2581 name); 2582 } 2583 } 2584 } else { 2585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2586 } 2587 } 2588 } 2589 2590 /* 2591 * Macro used to grow the current buffer. 2592 */ 2593 #define growBuffer(buffer, n) { \ 2594 xmlChar *tmp; \ 2595 buffer##_size *= 2; \ 2596 buffer##_size += n; \ 2597 tmp = (xmlChar *) \ 2598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2599 if (tmp == NULL) goto mem_error; \ 2600 buffer = tmp; \ 2601 } 2602 2603 /** 2604 * xmlStringLenDecodeEntities: 2605 * @ctxt: the parser context 2606 * @str: the input string 2607 * @len: the string length 2608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2609 * @end: an end marker xmlChar, 0 if none 2610 * @end2: an end marker xmlChar, 0 if none 2611 * @end3: an end marker xmlChar, 0 if none 2612 * 2613 * Takes a entity string content and process to do the adequate substitutions. 2614 * 2615 * [67] Reference ::= EntityRef | CharRef 2616 * 2617 * [69] PEReference ::= '%' Name ';' 2618 * 2619 * Returns A newly allocated string with the substitution done. The caller 2620 * must deallocate it ! 2621 */ 2622 xmlChar * 2623 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2624 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2625 xmlChar *buffer = NULL; 2626 int buffer_size = 0; 2627 2628 xmlChar *current = NULL; 2629 xmlChar *rep = NULL; 2630 const xmlChar *last; 2631 xmlEntityPtr ent; 2632 int c,l; 2633 int nbchars = 0; 2634 2635 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2636 return(NULL); 2637 last = str + len; 2638 2639 if (((ctxt->depth > 40) && 2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2641 (ctxt->depth > 1024)) { 2642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2643 return(NULL); 2644 } 2645 2646 /* 2647 * allocate a translation buffer. 2648 */ 2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2651 if (buffer == NULL) goto mem_error; 2652 2653 /* 2654 * OK loop until we reach one of the ending char or a size limit. 2655 * we are operating on already parsed values. 2656 */ 2657 if (str < last) 2658 c = CUR_SCHAR(str, l); 2659 else 2660 c = 0; 2661 while ((c != 0) && (c != end) && /* non input consuming loop */ 2662 (c != end2) && (c != end3)) { 2663 2664 if (c == 0) break; 2665 if ((c == '&') && (str[1] == '#')) { 2666 int val = xmlParseStringCharRef(ctxt, &str); 2667 if (val != 0) { 2668 COPY_BUF(0,buffer,nbchars,val); 2669 } 2670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2672 } 2673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2674 if (xmlParserDebugEntities) 2675 xmlGenericError(xmlGenericErrorContext, 2676 "String decoding Entity Reference: %.30s\n", 2677 str); 2678 ent = xmlParseStringEntityRef(ctxt, &str); 2679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2681 goto int_error; 2682 if (ent != NULL) 2683 ctxt->nbentities += ent->checked; 2684 if ((ent != NULL) && 2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2686 if (ent->content != NULL) { 2687 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2690 } 2691 } else { 2692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2693 "predefined entity has no content\n"); 2694 } 2695 } else if ((ent != NULL) && (ent->content != NULL)) { 2696 ctxt->depth++; 2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2698 0, 0, 0); 2699 ctxt->depth--; 2700 2701 if (rep != NULL) { 2702 current = rep; 2703 while (*current != 0) { /* non input consuming loop */ 2704 buffer[nbchars++] = *current++; 2705 if (nbchars > 2706 buffer_size - XML_PARSER_BUFFER_SIZE) { 2707 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2708 goto int_error; 2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2710 } 2711 } 2712 xmlFree(rep); 2713 rep = NULL; 2714 } 2715 } else if (ent != NULL) { 2716 int i = xmlStrlen(ent->name); 2717 const xmlChar *cur = ent->name; 2718 2719 buffer[nbchars++] = '&'; 2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2722 } 2723 for (;i > 0;i--) 2724 buffer[nbchars++] = *cur++; 2725 buffer[nbchars++] = ';'; 2726 } 2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2728 if (xmlParserDebugEntities) 2729 xmlGenericError(xmlGenericErrorContext, 2730 "String decoding PE Reference: %.30s\n", str); 2731 ent = xmlParseStringPEReference(ctxt, &str); 2732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2733 goto int_error; 2734 if (ent != NULL) 2735 ctxt->nbentities += ent->checked; 2736 if (ent != NULL) { 2737 if (ent->content == NULL) { 2738 xmlLoadEntityContent(ctxt, ent); 2739 } 2740 ctxt->depth++; 2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2742 0, 0, 0); 2743 ctxt->depth--; 2744 if (rep != NULL) { 2745 current = rep; 2746 while (*current != 0) { /* non input consuming loop */ 2747 buffer[nbchars++] = *current++; 2748 if (nbchars > 2749 buffer_size - XML_PARSER_BUFFER_SIZE) { 2750 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2751 goto int_error; 2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2753 } 2754 } 2755 xmlFree(rep); 2756 rep = NULL; 2757 } 2758 } 2759 } else { 2760 COPY_BUF(l,buffer,nbchars,c); 2761 str += l; 2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2764 } 2765 } 2766 if (str < last) 2767 c = CUR_SCHAR(str, l); 2768 else 2769 c = 0; 2770 } 2771 buffer[nbchars] = 0; 2772 return(buffer); 2773 2774 mem_error: 2775 xmlErrMemory(ctxt, NULL); 2776 int_error: 2777 if (rep != NULL) 2778 xmlFree(rep); 2779 if (buffer != NULL) 2780 xmlFree(buffer); 2781 return(NULL); 2782 } 2783 2784 /** 2785 * xmlStringDecodeEntities: 2786 * @ctxt: the parser context 2787 * @str: the input string 2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2789 * @end: an end marker xmlChar, 0 if none 2790 * @end2: an end marker xmlChar, 0 if none 2791 * @end3: an end marker xmlChar, 0 if none 2792 * 2793 * Takes a entity string content and process to do the adequate substitutions. 2794 * 2795 * [67] Reference ::= EntityRef | CharRef 2796 * 2797 * [69] PEReference ::= '%' Name ';' 2798 * 2799 * Returns A newly allocated string with the substitution done. The caller 2800 * must deallocate it ! 2801 */ 2802 xmlChar * 2803 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2804 xmlChar end, xmlChar end2, xmlChar end3) { 2805 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2807 end, end2, end3)); 2808 } 2809 2810 /************************************************************************ 2811 * * 2812 * Commodity functions, cleanup needed ? * 2813 * * 2814 ************************************************************************/ 2815 2816 /** 2817 * areBlanks: 2818 * @ctxt: an XML parser context 2819 * @str: a xmlChar * 2820 * @len: the size of @str 2821 * @blank_chars: we know the chars are blanks 2822 * 2823 * Is this a sequence of blank chars that one can ignore ? 2824 * 2825 * Returns 1 if ignorable 0 otherwise. 2826 */ 2827 2828 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2829 int blank_chars) { 2830 int i, ret; 2831 xmlNodePtr lastChild; 2832 2833 /* 2834 * Don't spend time trying to differentiate them, the same callback is 2835 * used ! 2836 */ 2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2838 return(0); 2839 2840 /* 2841 * Check for xml:space value. 2842 */ 2843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2844 (*(ctxt->space) == -2)) 2845 return(0); 2846 2847 /* 2848 * Check that the string is made of blanks 2849 */ 2850 if (blank_chars == 0) { 2851 for (i = 0;i < len;i++) 2852 if (!(IS_BLANK_CH(str[i]))) return(0); 2853 } 2854 2855 /* 2856 * Look if the element is mixed content in the DTD if available 2857 */ 2858 if (ctxt->node == NULL) return(0); 2859 if (ctxt->myDoc != NULL) { 2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2861 if (ret == 0) return(1); 2862 if (ret == 1) return(0); 2863 } 2864 2865 /* 2866 * Otherwise, heuristic :-\ 2867 */ 2868 if ((RAW != '<') && (RAW != 0xD)) return(0); 2869 if ((ctxt->node->children == NULL) && 2870 (RAW == '<') && (NXT(1) == '/')) return(0); 2871 2872 lastChild = xmlGetLastChild(ctxt->node); 2873 if (lastChild == NULL) { 2874 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2875 (ctxt->node->content != NULL)) return(0); 2876 } else if (xmlNodeIsText(lastChild)) 2877 return(0); 2878 else if ((ctxt->node->children != NULL) && 2879 (xmlNodeIsText(ctxt->node->children))) 2880 return(0); 2881 return(1); 2882 } 2883 2884 /************************************************************************ 2885 * * 2886 * Extra stuff for namespace support * 2887 * Relates to http://www.w3.org/TR/WD-xml-names * 2888 * * 2889 ************************************************************************/ 2890 2891 /** 2892 * xmlSplitQName: 2893 * @ctxt: an XML parser context 2894 * @name: an XML parser context 2895 * @prefix: a xmlChar ** 2896 * 2897 * parse an UTF8 encoded XML qualified name string 2898 * 2899 * [NS 5] QName ::= (Prefix ':')? LocalPart 2900 * 2901 * [NS 6] Prefix ::= NCName 2902 * 2903 * [NS 7] LocalPart ::= NCName 2904 * 2905 * Returns the local part, and prefix is updated 2906 * to get the Prefix if any. 2907 */ 2908 2909 xmlChar * 2910 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2911 xmlChar buf[XML_MAX_NAMELEN + 5]; 2912 xmlChar *buffer = NULL; 2913 int len = 0; 2914 int max = XML_MAX_NAMELEN; 2915 xmlChar *ret = NULL; 2916 const xmlChar *cur = name; 2917 int c; 2918 2919 if (prefix == NULL) return(NULL); 2920 *prefix = NULL; 2921 2922 if (cur == NULL) return(NULL); 2923 2924 #ifndef XML_XML_NAMESPACE 2925 /* xml: prefix is not really a namespace */ 2926 if ((cur[0] == 'x') && (cur[1] == 'm') && 2927 (cur[2] == 'l') && (cur[3] == ':')) 2928 return(xmlStrdup(name)); 2929 #endif 2930 2931 /* nasty but well=formed */ 2932 if (cur[0] == ':') 2933 return(xmlStrdup(name)); 2934 2935 c = *cur++; 2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2937 buf[len++] = c; 2938 c = *cur++; 2939 } 2940 if (len >= max) { 2941 /* 2942 * Okay someone managed to make a huge name, so he's ready to pay 2943 * for the processing speed. 2944 */ 2945 max = len * 2; 2946 2947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2948 if (buffer == NULL) { 2949 xmlErrMemory(ctxt, NULL); 2950 return(NULL); 2951 } 2952 memcpy(buffer, buf, len); 2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2954 if (len + 10 > max) { 2955 xmlChar *tmp; 2956 2957 max *= 2; 2958 tmp = (xmlChar *) xmlRealloc(buffer, 2959 max * sizeof(xmlChar)); 2960 if (tmp == NULL) { 2961 xmlFree(buffer); 2962 xmlErrMemory(ctxt, NULL); 2963 return(NULL); 2964 } 2965 buffer = tmp; 2966 } 2967 buffer[len++] = c; 2968 c = *cur++; 2969 } 2970 buffer[len] = 0; 2971 } 2972 2973 if ((c == ':') && (*cur == 0)) { 2974 if (buffer != NULL) 2975 xmlFree(buffer); 2976 *prefix = NULL; 2977 return(xmlStrdup(name)); 2978 } 2979 2980 if (buffer == NULL) 2981 ret = xmlStrndup(buf, len); 2982 else { 2983 ret = buffer; 2984 buffer = NULL; 2985 max = XML_MAX_NAMELEN; 2986 } 2987 2988 2989 if (c == ':') { 2990 c = *cur; 2991 *prefix = ret; 2992 if (c == 0) { 2993 return(xmlStrndup(BAD_CAST "", 0)); 2994 } 2995 len = 0; 2996 2997 /* 2998 * Check that the first character is proper to start 2999 * a new name 3000 */ 3001 if (!(((c >= 0x61) && (c <= 0x7A)) || 3002 ((c >= 0x41) && (c <= 0x5A)) || 3003 (c == '_') || (c == ':'))) { 3004 int l; 3005 int first = CUR_SCHAR(cur, l); 3006 3007 if (!IS_LETTER(first) && (first != '_')) { 3008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3009 "Name %s is not XML Namespace compliant\n", 3010 name); 3011 } 3012 } 3013 cur++; 3014 3015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3016 buf[len++] = c; 3017 c = *cur++; 3018 } 3019 if (len >= max) { 3020 /* 3021 * Okay someone managed to make a huge name, so he's ready to pay 3022 * for the processing speed. 3023 */ 3024 max = len * 2; 3025 3026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3027 if (buffer == NULL) { 3028 xmlErrMemory(ctxt, NULL); 3029 return(NULL); 3030 } 3031 memcpy(buffer, buf, len); 3032 while (c != 0) { /* tested bigname2.xml */ 3033 if (len + 10 > max) { 3034 xmlChar *tmp; 3035 3036 max *= 2; 3037 tmp = (xmlChar *) xmlRealloc(buffer, 3038 max * sizeof(xmlChar)); 3039 if (tmp == NULL) { 3040 xmlErrMemory(ctxt, NULL); 3041 xmlFree(buffer); 3042 return(NULL); 3043 } 3044 buffer = tmp; 3045 } 3046 buffer[len++] = c; 3047 c = *cur++; 3048 } 3049 buffer[len] = 0; 3050 } 3051 3052 if (buffer == NULL) 3053 ret = xmlStrndup(buf, len); 3054 else { 3055 ret = buffer; 3056 } 3057 } 3058 3059 return(ret); 3060 } 3061 3062 /************************************************************************ 3063 * * 3064 * The parser itself * 3065 * Relates to http://www.w3.org/TR/REC-xml * 3066 * * 3067 ************************************************************************/ 3068 3069 /************************************************************************ 3070 * * 3071 * Routines to parse Name, NCName and NmToken * 3072 * * 3073 ************************************************************************/ 3074 #ifdef DEBUG 3075 static unsigned long nbParseName = 0; 3076 static unsigned long nbParseNmToken = 0; 3077 static unsigned long nbParseNCName = 0; 3078 static unsigned long nbParseNCNameComplex = 0; 3079 static unsigned long nbParseNameComplex = 0; 3080 static unsigned long nbParseStringName = 0; 3081 #endif 3082 3083 /* 3084 * The two following functions are related to the change of accepted 3085 * characters for Name and NmToken in the Revision 5 of XML-1.0 3086 * They correspond to the modified production [4] and the new production [4a] 3087 * changes in that revision. Also note that the macros used for the 3088 * productions Letter, Digit, CombiningChar and Extender are not needed 3089 * anymore. 3090 * We still keep compatibility to pre-revision5 parsing semantic if the 3091 * new XML_PARSE_OLD10 option is given to the parser. 3092 */ 3093 static int 3094 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3096 /* 3097 * Use the new checks of production [4] [4a] amd [5] of the 3098 * Update 5 of XML-1.0 3099 */ 3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3101 (((c >= 'a') && (c <= 'z')) || 3102 ((c >= 'A') && (c <= 'Z')) || 3103 (c == '_') || (c == ':') || 3104 ((c >= 0xC0) && (c <= 0xD6)) || 3105 ((c >= 0xD8) && (c <= 0xF6)) || 3106 ((c >= 0xF8) && (c <= 0x2FF)) || 3107 ((c >= 0x370) && (c <= 0x37D)) || 3108 ((c >= 0x37F) && (c <= 0x1FFF)) || 3109 ((c >= 0x200C) && (c <= 0x200D)) || 3110 ((c >= 0x2070) && (c <= 0x218F)) || 3111 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3112 ((c >= 0x3001) && (c <= 0xD7FF)) || 3113 ((c >= 0xF900) && (c <= 0xFDCF)) || 3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3115 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3116 return(1); 3117 } else { 3118 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3119 return(1); 3120 } 3121 return(0); 3122 } 3123 3124 static int 3125 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3127 /* 3128 * Use the new checks of production [4] [4a] amd [5] of the 3129 * Update 5 of XML-1.0 3130 */ 3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3132 (((c >= 'a') && (c <= 'z')) || 3133 ((c >= 'A') && (c <= 'Z')) || 3134 ((c >= '0') && (c <= '9')) || /* !start */ 3135 (c == '_') || (c == ':') || 3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3137 ((c >= 0xC0) && (c <= 0xD6)) || 3138 ((c >= 0xD8) && (c <= 0xF6)) || 3139 ((c >= 0xF8) && (c <= 0x2FF)) || 3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3141 ((c >= 0x370) && (c <= 0x37D)) || 3142 ((c >= 0x37F) && (c <= 0x1FFF)) || 3143 ((c >= 0x200C) && (c <= 0x200D)) || 3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3145 ((c >= 0x2070) && (c <= 0x218F)) || 3146 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3147 ((c >= 0x3001) && (c <= 0xD7FF)) || 3148 ((c >= 0xF900) && (c <= 0xFDCF)) || 3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3150 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3151 return(1); 3152 } else { 3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3154 (c == '.') || (c == '-') || 3155 (c == '_') || (c == ':') || 3156 (IS_COMBINING(c)) || 3157 (IS_EXTENDER(c))) 3158 return(1); 3159 } 3160 return(0); 3161 } 3162 3163 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3164 int *len, int *alloc, int normalize); 3165 3166 static const xmlChar * 3167 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3168 int len = 0, l; 3169 int c; 3170 int count = 0; 3171 3172 #ifdef DEBUG 3173 nbParseNameComplex++; 3174 #endif 3175 3176 /* 3177 * Handler for more complex cases 3178 */ 3179 GROW; 3180 c = CUR_CHAR(l); 3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3182 /* 3183 * Use the new checks of production [4] [4a] amd [5] of the 3184 * Update 5 of XML-1.0 3185 */ 3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3187 (!(((c >= 'a') && (c <= 'z')) || 3188 ((c >= 'A') && (c <= 'Z')) || 3189 (c == '_') || (c == ':') || 3190 ((c >= 0xC0) && (c <= 0xD6)) || 3191 ((c >= 0xD8) && (c <= 0xF6)) || 3192 ((c >= 0xF8) && (c <= 0x2FF)) || 3193 ((c >= 0x370) && (c <= 0x37D)) || 3194 ((c >= 0x37F) && (c <= 0x1FFF)) || 3195 ((c >= 0x200C) && (c <= 0x200D)) || 3196 ((c >= 0x2070) && (c <= 0x218F)) || 3197 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3198 ((c >= 0x3001) && (c <= 0xD7FF)) || 3199 ((c >= 0xF900) && (c <= 0xFDCF)) || 3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3202 return(NULL); 3203 } 3204 len += l; 3205 NEXTL(l); 3206 c = CUR_CHAR(l); 3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3208 (((c >= 'a') && (c <= 'z')) || 3209 ((c >= 'A') && (c <= 'Z')) || 3210 ((c >= '0') && (c <= '9')) || /* !start */ 3211 (c == '_') || (c == ':') || 3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3213 ((c >= 0xC0) && (c <= 0xD6)) || 3214 ((c >= 0xD8) && (c <= 0xF6)) || 3215 ((c >= 0xF8) && (c <= 0x2FF)) || 3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3217 ((c >= 0x370) && (c <= 0x37D)) || 3218 ((c >= 0x37F) && (c <= 0x1FFF)) || 3219 ((c >= 0x200C) && (c <= 0x200D)) || 3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3221 ((c >= 0x2070) && (c <= 0x218F)) || 3222 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3223 ((c >= 0x3001) && (c <= 0xD7FF)) || 3224 ((c >= 0xF900) && (c <= 0xFDCF)) || 3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3226 ((c >= 0x10000) && (c <= 0xEFFFF)) 3227 )) { 3228 if (count++ > 100) { 3229 count = 0; 3230 GROW; 3231 } 3232 len += l; 3233 NEXTL(l); 3234 c = CUR_CHAR(l); 3235 } 3236 } else { 3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3238 (!IS_LETTER(c) && (c != '_') && 3239 (c != ':'))) { 3240 return(NULL); 3241 } 3242 len += l; 3243 NEXTL(l); 3244 c = CUR_CHAR(l); 3245 3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3248 (c == '.') || (c == '-') || 3249 (c == '_') || (c == ':') || 3250 (IS_COMBINING(c)) || 3251 (IS_EXTENDER(c)))) { 3252 if (count++ > 100) { 3253 count = 0; 3254 GROW; 3255 } 3256 len += l; 3257 NEXTL(l); 3258 c = CUR_CHAR(l); 3259 } 3260 } 3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3264 } 3265 3266 /** 3267 * xmlParseName: 3268 * @ctxt: an XML parser context 3269 * 3270 * parse an XML name. 3271 * 3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3273 * CombiningChar | Extender 3274 * 3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3276 * 3277 * [6] Names ::= Name (#x20 Name)* 3278 * 3279 * Returns the Name parsed or NULL 3280 */ 3281 3282 const xmlChar * 3283 xmlParseName(xmlParserCtxtPtr ctxt) { 3284 const xmlChar *in; 3285 const xmlChar *ret; 3286 int count = 0; 3287 3288 GROW; 3289 3290 #ifdef DEBUG 3291 nbParseName++; 3292 #endif 3293 3294 /* 3295 * Accelerator for simple ASCII names 3296 */ 3297 in = ctxt->input->cur; 3298 if (((*in >= 0x61) && (*in <= 0x7A)) || 3299 ((*in >= 0x41) && (*in <= 0x5A)) || 3300 (*in == '_') || (*in == ':')) { 3301 in++; 3302 while (((*in >= 0x61) && (*in <= 0x7A)) || 3303 ((*in >= 0x41) && (*in <= 0x5A)) || 3304 ((*in >= 0x30) && (*in <= 0x39)) || 3305 (*in == '_') || (*in == '-') || 3306 (*in == ':') || (*in == '.')) 3307 in++; 3308 if ((*in > 0) && (*in < 0x80)) { 3309 count = in - ctxt->input->cur; 3310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3311 ctxt->input->cur = in; 3312 ctxt->nbChars += count; 3313 ctxt->input->col += count; 3314 if (ret == NULL) 3315 xmlErrMemory(ctxt, NULL); 3316 return(ret); 3317 } 3318 } 3319 /* accelerator for special cases */ 3320 return(xmlParseNameComplex(ctxt)); 3321 } 3322 3323 static const xmlChar * 3324 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3325 int len = 0, l; 3326 int c; 3327 int count = 0; 3328 3329 #ifdef DEBUG 3330 nbParseNCNameComplex++; 3331 #endif 3332 3333 /* 3334 * Handler for more complex cases 3335 */ 3336 GROW; 3337 c = CUR_CHAR(l); 3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3340 return(NULL); 3341 } 3342 3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3345 if (count++ > 100) { 3346 count = 0; 3347 GROW; 3348 } 3349 len += l; 3350 NEXTL(l); 3351 c = CUR_CHAR(l); 3352 } 3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3354 } 3355 3356 /** 3357 * xmlParseNCName: 3358 * @ctxt: an XML parser context 3359 * @len: lenght of the string parsed 3360 * 3361 * parse an XML name. 3362 * 3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3364 * CombiningChar | Extender 3365 * 3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3367 * 3368 * Returns the Name parsed or NULL 3369 */ 3370 3371 static const xmlChar * 3372 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3373 const xmlChar *in; 3374 const xmlChar *ret; 3375 int count = 0; 3376 3377 #ifdef DEBUG 3378 nbParseNCName++; 3379 #endif 3380 3381 /* 3382 * Accelerator for simple ASCII names 3383 */ 3384 in = ctxt->input->cur; 3385 if (((*in >= 0x61) && (*in <= 0x7A)) || 3386 ((*in >= 0x41) && (*in <= 0x5A)) || 3387 (*in == '_')) { 3388 in++; 3389 while (((*in >= 0x61) && (*in <= 0x7A)) || 3390 ((*in >= 0x41) && (*in <= 0x5A)) || 3391 ((*in >= 0x30) && (*in <= 0x39)) || 3392 (*in == '_') || (*in == '-') || 3393 (*in == '.')) 3394 in++; 3395 if ((*in > 0) && (*in < 0x80)) { 3396 count = in - ctxt->input->cur; 3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3398 ctxt->input->cur = in; 3399 ctxt->nbChars += count; 3400 ctxt->input->col += count; 3401 if (ret == NULL) { 3402 xmlErrMemory(ctxt, NULL); 3403 } 3404 return(ret); 3405 } 3406 } 3407 return(xmlParseNCNameComplex(ctxt)); 3408 } 3409 3410 /** 3411 * xmlParseNameAndCompare: 3412 * @ctxt: an XML parser context 3413 * 3414 * parse an XML name and compares for match 3415 * (specialized for endtag parsing) 3416 * 3417 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3418 * and the name for mismatch 3419 */ 3420 3421 static const xmlChar * 3422 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3423 register const xmlChar *cmp = other; 3424 register const xmlChar *in; 3425 const xmlChar *ret; 3426 3427 GROW; 3428 3429 in = ctxt->input->cur; 3430 while (*in != 0 && *in == *cmp) { 3431 ++in; 3432 ++cmp; 3433 ctxt->input->col++; 3434 } 3435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3436 /* success */ 3437 ctxt->input->cur = in; 3438 return (const xmlChar*) 1; 3439 } 3440 /* failure (or end of input buffer), check with full function */ 3441 ret = xmlParseName (ctxt); 3442 /* strings coming from the dictionnary direct compare possible */ 3443 if (ret == other) { 3444 return (const xmlChar*) 1; 3445 } 3446 return ret; 3447 } 3448 3449 /** 3450 * xmlParseStringName: 3451 * @ctxt: an XML parser context 3452 * @str: a pointer to the string pointer (IN/OUT) 3453 * 3454 * parse an XML name. 3455 * 3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3457 * CombiningChar | Extender 3458 * 3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3460 * 3461 * [6] Names ::= Name (#x20 Name)* 3462 * 3463 * Returns the Name parsed or NULL. The @str pointer 3464 * is updated to the current location in the string. 3465 */ 3466 3467 static xmlChar * 3468 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3469 xmlChar buf[XML_MAX_NAMELEN + 5]; 3470 const xmlChar *cur = *str; 3471 int len = 0, l; 3472 int c; 3473 3474 #ifdef DEBUG 3475 nbParseStringName++; 3476 #endif 3477 3478 c = CUR_SCHAR(cur, l); 3479 if (!xmlIsNameStartChar(ctxt, c)) { 3480 return(NULL); 3481 } 3482 3483 COPY_BUF(l,buf,len,c); 3484 cur += l; 3485 c = CUR_SCHAR(cur, l); 3486 while (xmlIsNameChar(ctxt, c)) { 3487 COPY_BUF(l,buf,len,c); 3488 cur += l; 3489 c = CUR_SCHAR(cur, l); 3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3491 /* 3492 * Okay someone managed to make a huge name, so he's ready to pay 3493 * for the processing speed. 3494 */ 3495 xmlChar *buffer; 3496 int max = len * 2; 3497 3498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3499 if (buffer == NULL) { 3500 xmlErrMemory(ctxt, NULL); 3501 return(NULL); 3502 } 3503 memcpy(buffer, buf, len); 3504 while (xmlIsNameChar(ctxt, c)) { 3505 if (len + 10 > max) { 3506 xmlChar *tmp; 3507 max *= 2; 3508 tmp = (xmlChar *) xmlRealloc(buffer, 3509 max * sizeof(xmlChar)); 3510 if (tmp == NULL) { 3511 xmlErrMemory(ctxt, NULL); 3512 xmlFree(buffer); 3513 return(NULL); 3514 } 3515 buffer = tmp; 3516 } 3517 COPY_BUF(l,buffer,len,c); 3518 cur += l; 3519 c = CUR_SCHAR(cur, l); 3520 } 3521 buffer[len] = 0; 3522 *str = cur; 3523 return(buffer); 3524 } 3525 } 3526 *str = cur; 3527 return(xmlStrndup(buf, len)); 3528 } 3529 3530 /** 3531 * xmlParseNmtoken: 3532 * @ctxt: an XML parser context 3533 * 3534 * parse an XML Nmtoken. 3535 * 3536 * [7] Nmtoken ::= (NameChar)+ 3537 * 3538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3539 * 3540 * Returns the Nmtoken parsed or NULL 3541 */ 3542 3543 xmlChar * 3544 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3545 xmlChar buf[XML_MAX_NAMELEN + 5]; 3546 int len = 0, l; 3547 int c; 3548 int count = 0; 3549 3550 #ifdef DEBUG 3551 nbParseNmToken++; 3552 #endif 3553 3554 GROW; 3555 c = CUR_CHAR(l); 3556 3557 while (xmlIsNameChar(ctxt, c)) { 3558 if (count++ > 100) { 3559 count = 0; 3560 GROW; 3561 } 3562 COPY_BUF(l,buf,len,c); 3563 NEXTL(l); 3564 c = CUR_CHAR(l); 3565 if (len >= XML_MAX_NAMELEN) { 3566 /* 3567 * Okay someone managed to make a huge token, so he's ready to pay 3568 * for the processing speed. 3569 */ 3570 xmlChar *buffer; 3571 int max = len * 2; 3572 3573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3574 if (buffer == NULL) { 3575 xmlErrMemory(ctxt, NULL); 3576 return(NULL); 3577 } 3578 memcpy(buffer, buf, len); 3579 while (xmlIsNameChar(ctxt, c)) { 3580 if (count++ > 100) { 3581 count = 0; 3582 GROW; 3583 } 3584 if (len + 10 > max) { 3585 xmlChar *tmp; 3586 3587 max *= 2; 3588 tmp = (xmlChar *) xmlRealloc(buffer, 3589 max * sizeof(xmlChar)); 3590 if (tmp == NULL) { 3591 xmlErrMemory(ctxt, NULL); 3592 xmlFree(buffer); 3593 return(NULL); 3594 } 3595 buffer = tmp; 3596 } 3597 COPY_BUF(l,buffer,len,c); 3598 NEXTL(l); 3599 c = CUR_CHAR(l); 3600 } 3601 buffer[len] = 0; 3602 return(buffer); 3603 } 3604 } 3605 if (len == 0) 3606 return(NULL); 3607 return(xmlStrndup(buf, len)); 3608 } 3609 3610 /** 3611 * xmlParseEntityValue: 3612 * @ctxt: an XML parser context 3613 * @orig: if non-NULL store a copy of the original entity value 3614 * 3615 * parse a value for ENTITY declarations 3616 * 3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3618 * "'" ([^%&'] | PEReference | Reference)* "'" 3619 * 3620 * Returns the EntityValue parsed with reference substituted or NULL 3621 */ 3622 3623 xmlChar * 3624 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3625 xmlChar *buf = NULL; 3626 int len = 0; 3627 int size = XML_PARSER_BUFFER_SIZE; 3628 int c, l; 3629 xmlChar stop; 3630 xmlChar *ret = NULL; 3631 const xmlChar *cur = NULL; 3632 xmlParserInputPtr input; 3633 3634 if (RAW == '"') stop = '"'; 3635 else if (RAW == '\'') stop = '\''; 3636 else { 3637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3638 return(NULL); 3639 } 3640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3641 if (buf == NULL) { 3642 xmlErrMemory(ctxt, NULL); 3643 return(NULL); 3644 } 3645 3646 /* 3647 * The content of the entity definition is copied in a buffer. 3648 */ 3649 3650 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3651 input = ctxt->input; 3652 GROW; 3653 NEXT; 3654 c = CUR_CHAR(l); 3655 /* 3656 * NOTE: 4.4.5 Included in Literal 3657 * When a parameter entity reference appears in a literal entity 3658 * value, ... a single or double quote character in the replacement 3659 * text is always treated as a normal data character and will not 3660 * terminate the literal. 3661 * In practice it means we stop the loop only when back at parsing 3662 * the initial entity and the quote is found 3663 */ 3664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3665 (ctxt->input != input))) { 3666 if (len + 5 >= size) { 3667 xmlChar *tmp; 3668 3669 size *= 2; 3670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3671 if (tmp == NULL) { 3672 xmlErrMemory(ctxt, NULL); 3673 xmlFree(buf); 3674 return(NULL); 3675 } 3676 buf = tmp; 3677 } 3678 COPY_BUF(l,buf,len,c); 3679 NEXTL(l); 3680 /* 3681 * Pop-up of finished entities. 3682 */ 3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3684 xmlPopInput(ctxt); 3685 3686 GROW; 3687 c = CUR_CHAR(l); 3688 if (c == 0) { 3689 GROW; 3690 c = CUR_CHAR(l); 3691 } 3692 } 3693 buf[len] = 0; 3694 3695 /* 3696 * Raise problem w.r.t. '&' and '%' being used in non-entities 3697 * reference constructs. Note Charref will be handled in 3698 * xmlStringDecodeEntities() 3699 */ 3700 cur = buf; 3701 while (*cur != 0) { /* non input consuming */ 3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3703 xmlChar *name; 3704 xmlChar tmp = *cur; 3705 3706 cur++; 3707 name = xmlParseStringName(ctxt, &cur); 3708 if ((name == NULL) || (*cur != ';')) { 3709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3710 "EntityValue: '%c' forbidden except for entities references\n", 3711 tmp); 3712 } 3713 if ((tmp == '%') && (ctxt->inSubset == 1) && 3714 (ctxt->inputNr == 1)) { 3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3716 } 3717 if (name != NULL) 3718 xmlFree(name); 3719 if (*cur == 0) 3720 break; 3721 } 3722 cur++; 3723 } 3724 3725 /* 3726 * Then PEReference entities are substituted. 3727 */ 3728 if (c != stop) { 3729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3730 xmlFree(buf); 3731 } else { 3732 NEXT; 3733 /* 3734 * NOTE: 4.4.7 Bypassed 3735 * When a general entity reference appears in the EntityValue in 3736 * an entity declaration, it is bypassed and left as is. 3737 * so XML_SUBSTITUTE_REF is not set here. 3738 */ 3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3740 0, 0, 0); 3741 if (orig != NULL) 3742 *orig = buf; 3743 else 3744 xmlFree(buf); 3745 } 3746 3747 return(ret); 3748 } 3749 3750 /** 3751 * xmlParseAttValueComplex: 3752 * @ctxt: an XML parser context 3753 * @len: the resulting attribute len 3754 * @normalize: wether to apply the inner normalization 3755 * 3756 * parse a value for an attribute, this is the fallback function 3757 * of xmlParseAttValue() when the attribute parsing requires handling 3758 * of non-ASCII characters, or normalization compaction. 3759 * 3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3761 */ 3762 static xmlChar * 3763 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3764 xmlChar limit = 0; 3765 xmlChar *buf = NULL; 3766 xmlChar *rep = NULL; 3767 int len = 0; 3768 int buf_size = 0; 3769 int c, l, in_space = 0; 3770 xmlChar *current = NULL; 3771 xmlEntityPtr ent; 3772 3773 if (NXT(0) == '"') { 3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3775 limit = '"'; 3776 NEXT; 3777 } else if (NXT(0) == '\'') { 3778 limit = '\''; 3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3780 NEXT; 3781 } else { 3782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3783 return(NULL); 3784 } 3785 3786 /* 3787 * allocate a translation buffer. 3788 */ 3789 buf_size = XML_PARSER_BUFFER_SIZE; 3790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3791 if (buf == NULL) goto mem_error; 3792 3793 /* 3794 * OK loop until we reach one of the ending char or a size limit. 3795 */ 3796 c = CUR_CHAR(l); 3797 while ((NXT(0) != limit) && /* checked */ 3798 (IS_CHAR(c)) && (c != '<')) { 3799 if (c == 0) break; 3800 if (c == '&') { 3801 in_space = 0; 3802 if (NXT(1) == '#') { 3803 int val = xmlParseCharRef(ctxt); 3804 3805 if (val == '&') { 3806 if (ctxt->replaceEntities) { 3807 if (len > buf_size - 10) { 3808 growBuffer(buf, 10); 3809 } 3810 buf[len++] = '&'; 3811 } else { 3812 /* 3813 * The reparsing will be done in xmlStringGetNodeList() 3814 * called by the attribute() function in SAX.c 3815 */ 3816 if (len > buf_size - 10) { 3817 growBuffer(buf, 10); 3818 } 3819 buf[len++] = '&'; 3820 buf[len++] = '#'; 3821 buf[len++] = '3'; 3822 buf[len++] = '8'; 3823 buf[len++] = ';'; 3824 } 3825 } else if (val != 0) { 3826 if (len > buf_size - 10) { 3827 growBuffer(buf, 10); 3828 } 3829 len += xmlCopyChar(0, &buf[len], val); 3830 } 3831 } else { 3832 ent = xmlParseEntityRef(ctxt); 3833 ctxt->nbentities++; 3834 if (ent != NULL) 3835 ctxt->nbentities += ent->owner; 3836 if ((ent != NULL) && 3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3838 if (len > buf_size - 10) { 3839 growBuffer(buf, 10); 3840 } 3841 if ((ctxt->replaceEntities == 0) && 3842 (ent->content[0] == '&')) { 3843 buf[len++] = '&'; 3844 buf[len++] = '#'; 3845 buf[len++] = '3'; 3846 buf[len++] = '8'; 3847 buf[len++] = ';'; 3848 } else { 3849 buf[len++] = ent->content[0]; 3850 } 3851 } else if ((ent != NULL) && 3852 (ctxt->replaceEntities != 0)) { 3853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3854 rep = xmlStringDecodeEntities(ctxt, ent->content, 3855 XML_SUBSTITUTE_REF, 3856 0, 0, 0); 3857 if (rep != NULL) { 3858 current = rep; 3859 while (*current != 0) { /* non input consuming */ 3860 if ((*current == 0xD) || (*current == 0xA) || 3861 (*current == 0x9)) { 3862 buf[len++] = 0x20; 3863 current++; 3864 } else 3865 buf[len++] = *current++; 3866 if (len > buf_size - 10) { 3867 growBuffer(buf, 10); 3868 } 3869 } 3870 xmlFree(rep); 3871 rep = NULL; 3872 } 3873 } else { 3874 if (len > buf_size - 10) { 3875 growBuffer(buf, 10); 3876 } 3877 if (ent->content != NULL) 3878 buf[len++] = ent->content[0]; 3879 } 3880 } else if (ent != NULL) { 3881 int i = xmlStrlen(ent->name); 3882 const xmlChar *cur = ent->name; 3883 3884 /* 3885 * This may look absurd but is needed to detect 3886 * entities problems 3887 */ 3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3889 (ent->content != NULL)) { 3890 rep = xmlStringDecodeEntities(ctxt, ent->content, 3891 XML_SUBSTITUTE_REF, 0, 0, 0); 3892 if (rep != NULL) { 3893 xmlFree(rep); 3894 rep = NULL; 3895 } 3896 } 3897 3898 /* 3899 * Just output the reference 3900 */ 3901 buf[len++] = '&'; 3902 while (len > buf_size - i - 10) { 3903 growBuffer(buf, i + 10); 3904 } 3905 for (;i > 0;i--) 3906 buf[len++] = *cur++; 3907 buf[len++] = ';'; 3908 } 3909 } 3910 } else { 3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3912 if ((len != 0) || (!normalize)) { 3913 if ((!normalize) || (!in_space)) { 3914 COPY_BUF(l,buf,len,0x20); 3915 while (len > buf_size - 10) { 3916 growBuffer(buf, 10); 3917 } 3918 } 3919 in_space = 1; 3920 } 3921 } else { 3922 in_space = 0; 3923 COPY_BUF(l,buf,len,c); 3924 if (len > buf_size - 10) { 3925 growBuffer(buf, 10); 3926 } 3927 } 3928 NEXTL(l); 3929 } 3930 GROW; 3931 c = CUR_CHAR(l); 3932 } 3933 if ((in_space) && (normalize)) { 3934 while (buf[len - 1] == 0x20) len--; 3935 } 3936 buf[len] = 0; 3937 if (RAW == '<') { 3938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3939 } else if (RAW != limit) { 3940 if ((c != 0) && (!IS_CHAR(c))) { 3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3942 "invalid character in attribute value\n"); 3943 } else { 3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3945 "AttValue: ' expected\n"); 3946 } 3947 } else 3948 NEXT; 3949 if (attlen != NULL) *attlen = len; 3950 return(buf); 3951 3952 mem_error: 3953 xmlErrMemory(ctxt, NULL); 3954 if (buf != NULL) 3955 xmlFree(buf); 3956 if (rep != NULL) 3957 xmlFree(rep); 3958 return(NULL); 3959 } 3960 3961 /** 3962 * xmlParseAttValue: 3963 * @ctxt: an XML parser context 3964 * 3965 * parse a value for an attribute 3966 * Note: the parser won't do substitution of entities here, this 3967 * will be handled later in xmlStringGetNodeList 3968 * 3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3970 * "'" ([^<&'] | Reference)* "'" 3971 * 3972 * 3.3.3 Attribute-Value Normalization: 3973 * Before the value of an attribute is passed to the application or 3974 * checked for validity, the XML processor must normalize it as follows: 3975 * - a character reference is processed by appending the referenced 3976 * character to the attribute value 3977 * - an entity reference is processed by recursively processing the 3978 * replacement text of the entity 3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3980 * appending #x20 to the normalized value, except that only a single 3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3982 * parsed entity or the literal entity value of an internal parsed entity 3983 * - other characters are processed by appending them to the normalized value 3984 * If the declared value is not CDATA, then the XML processor must further 3985 * process the normalized attribute value by discarding any leading and 3986 * trailing space (#x20) characters, and by replacing sequences of space 3987 * (#x20) characters by a single space (#x20) character. 3988 * All attributes for which no declaration has been read should be treated 3989 * by a non-validating parser as if declared CDATA. 3990 * 3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3992 */ 3993 3994 3995 xmlChar * 3996 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3999 } 4000 4001 /** 4002 * xmlParseSystemLiteral: 4003 * @ctxt: an XML parser context 4004 * 4005 * parse an XML Literal 4006 * 4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4008 * 4009 * Returns the SystemLiteral parsed or NULL 4010 */ 4011 4012 xmlChar * 4013 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4014 xmlChar *buf = NULL; 4015 int len = 0; 4016 int size = XML_PARSER_BUFFER_SIZE; 4017 int cur, l; 4018 xmlChar stop; 4019 int state = ctxt->instate; 4020 int count = 0; 4021 4022 SHRINK; 4023 if (RAW == '"') { 4024 NEXT; 4025 stop = '"'; 4026 } else if (RAW == '\'') { 4027 NEXT; 4028 stop = '\''; 4029 } else { 4030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4031 return(NULL); 4032 } 4033 4034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4035 if (buf == NULL) { 4036 xmlErrMemory(ctxt, NULL); 4037 return(NULL); 4038 } 4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4040 cur = CUR_CHAR(l); 4041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4042 if (len + 5 >= size) { 4043 xmlChar *tmp; 4044 4045 size *= 2; 4046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4047 if (tmp == NULL) { 4048 xmlFree(buf); 4049 xmlErrMemory(ctxt, NULL); 4050 ctxt->instate = (xmlParserInputState) state; 4051 return(NULL); 4052 } 4053 buf = tmp; 4054 } 4055 count++; 4056 if (count > 50) { 4057 GROW; 4058 count = 0; 4059 } 4060 COPY_BUF(l,buf,len,cur); 4061 NEXTL(l); 4062 cur = CUR_CHAR(l); 4063 if (cur == 0) { 4064 GROW; 4065 SHRINK; 4066 cur = CUR_CHAR(l); 4067 } 4068 } 4069 buf[len] = 0; 4070 ctxt->instate = (xmlParserInputState) state; 4071 if (!IS_CHAR(cur)) { 4072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4073 } else { 4074 NEXT; 4075 } 4076 return(buf); 4077 } 4078 4079 /** 4080 * xmlParsePubidLiteral: 4081 * @ctxt: an XML parser context 4082 * 4083 * parse an XML public literal 4084 * 4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4086 * 4087 * Returns the PubidLiteral parsed or NULL. 4088 */ 4089 4090 xmlChar * 4091 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4092 xmlChar *buf = NULL; 4093 int len = 0; 4094 int size = XML_PARSER_BUFFER_SIZE; 4095 xmlChar cur; 4096 xmlChar stop; 4097 int count = 0; 4098 xmlParserInputState oldstate = ctxt->instate; 4099 4100 SHRINK; 4101 if (RAW == '"') { 4102 NEXT; 4103 stop = '"'; 4104 } else if (RAW == '\'') { 4105 NEXT; 4106 stop = '\''; 4107 } else { 4108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4109 return(NULL); 4110 } 4111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4112 if (buf == NULL) { 4113 xmlErrMemory(ctxt, NULL); 4114 return(NULL); 4115 } 4116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4117 cur = CUR; 4118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4119 if (len + 1 >= size) { 4120 xmlChar *tmp; 4121 4122 size *= 2; 4123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4124 if (tmp == NULL) { 4125 xmlErrMemory(ctxt, NULL); 4126 xmlFree(buf); 4127 return(NULL); 4128 } 4129 buf = tmp; 4130 } 4131 buf[len++] = cur; 4132 count++; 4133 if (count > 50) { 4134 GROW; 4135 count = 0; 4136 } 4137 NEXT; 4138 cur = CUR; 4139 if (cur == 0) { 4140 GROW; 4141 SHRINK; 4142 cur = CUR; 4143 } 4144 } 4145 buf[len] = 0; 4146 if (cur != stop) { 4147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4148 } else { 4149 NEXT; 4150 } 4151 ctxt->instate = oldstate; 4152 return(buf); 4153 } 4154 4155 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4156 4157 /* 4158 * used for the test in the inner loop of the char data testing 4159 */ 4160 static const unsigned char test_char_data[256] = { 4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4193 }; 4194 4195 /** 4196 * xmlParseCharData: 4197 * @ctxt: an XML parser context 4198 * @cdata: int indicating whether we are within a CDATA section 4199 * 4200 * parse a CharData section. 4201 * if we are within a CDATA section ']]>' marks an end of section. 4202 * 4203 * The right angle bracket (>) may be represented using the string ">", 4204 * and must, for compatibility, be escaped using ">" or a character 4205 * reference when it appears in the string "]]>" in content, when that 4206 * string is not marking the end of a CDATA section. 4207 * 4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4209 */ 4210 4211 void 4212 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4213 const xmlChar *in; 4214 int nbchar = 0; 4215 int line = ctxt->input->line; 4216 int col = ctxt->input->col; 4217 int ccol; 4218 4219 SHRINK; 4220 GROW; 4221 /* 4222 * Accelerated common case where input don't need to be 4223 * modified before passing it to the handler. 4224 */ 4225 if (!cdata) { 4226 in = ctxt->input->cur; 4227 do { 4228 get_more_space: 4229 while (*in == 0x20) { in++; ctxt->input->col++; } 4230 if (*in == 0xA) { 4231 do { 4232 ctxt->input->line++; ctxt->input->col = 1; 4233 in++; 4234 } while (*in == 0xA); 4235 goto get_more_space; 4236 } 4237 if (*in == '<') { 4238 nbchar = in - ctxt->input->cur; 4239 if (nbchar > 0) { 4240 const xmlChar *tmp = ctxt->input->cur; 4241 ctxt->input->cur = in; 4242 4243 if ((ctxt->sax != NULL) && 4244 (ctxt->sax->ignorableWhitespace != 4245 ctxt->sax->characters)) { 4246 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4247 if (ctxt->sax->ignorableWhitespace != NULL) 4248 ctxt->sax->ignorableWhitespace(ctxt->userData, 4249 tmp, nbchar); 4250 } else { 4251 if (ctxt->sax->characters != NULL) 4252 ctxt->sax->characters(ctxt->userData, 4253 tmp, nbchar); 4254 if (*ctxt->space == -1) 4255 *ctxt->space = -2; 4256 } 4257 } else if ((ctxt->sax != NULL) && 4258 (ctxt->sax->characters != NULL)) { 4259 ctxt->sax->characters(ctxt->userData, 4260 tmp, nbchar); 4261 } 4262 } 4263 return; 4264 } 4265 4266 get_more: 4267 ccol = ctxt->input->col; 4268 while (test_char_data[*in]) { 4269 in++; 4270 ccol++; 4271 } 4272 ctxt->input->col = ccol; 4273 if (*in == 0xA) { 4274 do { 4275 ctxt->input->line++; ctxt->input->col = 1; 4276 in++; 4277 } while (*in == 0xA); 4278 goto get_more; 4279 } 4280 if (*in == ']') { 4281 if ((in[1] == ']') && (in[2] == '>')) { 4282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4283 ctxt->input->cur = in; 4284 return; 4285 } 4286 in++; 4287 ctxt->input->col++; 4288 goto get_more; 4289 } 4290 nbchar = in - ctxt->input->cur; 4291 if (nbchar > 0) { 4292 if ((ctxt->sax != NULL) && 4293 (ctxt->sax->ignorableWhitespace != 4294 ctxt->sax->characters) && 4295 (IS_BLANK_CH(*ctxt->input->cur))) { 4296 const xmlChar *tmp = ctxt->input->cur; 4297 ctxt->input->cur = in; 4298 4299 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4300 if (ctxt->sax->ignorableWhitespace != NULL) 4301 ctxt->sax->ignorableWhitespace(ctxt->userData, 4302 tmp, nbchar); 4303 } else { 4304 if (ctxt->sax->characters != NULL) 4305 ctxt->sax->characters(ctxt->userData, 4306 tmp, nbchar); 4307 if (*ctxt->space == -1) 4308 *ctxt->space = -2; 4309 } 4310 line = ctxt->input->line; 4311 col = ctxt->input->col; 4312 } else if (ctxt->sax != NULL) { 4313 if (ctxt->sax->characters != NULL) 4314 ctxt->sax->characters(ctxt->userData, 4315 ctxt->input->cur, nbchar); 4316 line = ctxt->input->line; 4317 col = ctxt->input->col; 4318 } 4319 /* something really bad happened in the SAX callback */ 4320 if (ctxt->instate != XML_PARSER_CONTENT) 4321 return; 4322 } 4323 ctxt->input->cur = in; 4324 if (*in == 0xD) { 4325 in++; 4326 if (*in == 0xA) { 4327 ctxt->input->cur = in; 4328 in++; 4329 ctxt->input->line++; ctxt->input->col = 1; 4330 continue; /* while */ 4331 } 4332 in--; 4333 } 4334 if (*in == '<') { 4335 return; 4336 } 4337 if (*in == '&') { 4338 return; 4339 } 4340 SHRINK; 4341 GROW; 4342 in = ctxt->input->cur; 4343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4344 nbchar = 0; 4345 } 4346 ctxt->input->line = line; 4347 ctxt->input->col = col; 4348 xmlParseCharDataComplex(ctxt, cdata); 4349 } 4350 4351 /** 4352 * xmlParseCharDataComplex: 4353 * @ctxt: an XML parser context 4354 * @cdata: int indicating whether we are within a CDATA section 4355 * 4356 * parse a CharData section.this is the fallback function 4357 * of xmlParseCharData() when the parsing requires handling 4358 * of non-ASCII characters. 4359 */ 4360 static void 4361 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4363 int nbchar = 0; 4364 int cur, l; 4365 int count = 0; 4366 4367 SHRINK; 4368 GROW; 4369 cur = CUR_CHAR(l); 4370 while ((cur != '<') && /* checked */ 4371 (cur != '&') && 4372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4373 if ((cur == ']') && (NXT(1) == ']') && 4374 (NXT(2) == '>')) { 4375 if (cdata) break; 4376 else { 4377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4378 } 4379 } 4380 COPY_BUF(l,buf,nbchar,cur); 4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4382 buf[nbchar] = 0; 4383 4384 /* 4385 * OK the segment is to be consumed as chars. 4386 */ 4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4388 if (areBlanks(ctxt, buf, nbchar, 0)) { 4389 if (ctxt->sax->ignorableWhitespace != NULL) 4390 ctxt->sax->ignorableWhitespace(ctxt->userData, 4391 buf, nbchar); 4392 } else { 4393 if (ctxt->sax->characters != NULL) 4394 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4395 if ((ctxt->sax->characters != 4396 ctxt->sax->ignorableWhitespace) && 4397 (*ctxt->space == -1)) 4398 *ctxt->space = -2; 4399 } 4400 } 4401 nbchar = 0; 4402 /* something really bad happened in the SAX callback */ 4403 if (ctxt->instate != XML_PARSER_CONTENT) 4404 return; 4405 } 4406 count++; 4407 if (count > 50) { 4408 GROW; 4409 count = 0; 4410 } 4411 NEXTL(l); 4412 cur = CUR_CHAR(l); 4413 } 4414 if (nbchar != 0) { 4415 buf[nbchar] = 0; 4416 /* 4417 * OK the segment is to be consumed as chars. 4418 */ 4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4420 if (areBlanks(ctxt, buf, nbchar, 0)) { 4421 if (ctxt->sax->ignorableWhitespace != NULL) 4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4423 } else { 4424 if (ctxt->sax->characters != NULL) 4425 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4427 (*ctxt->space == -1)) 4428 *ctxt->space = -2; 4429 } 4430 } 4431 } 4432 if ((cur != 0) && (!IS_CHAR(cur))) { 4433 /* Generate the error and skip the offending character */ 4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4435 "PCDATA invalid Char value %d\n", 4436 cur); 4437 NEXTL(l); 4438 } 4439 } 4440 4441 /** 4442 * xmlParseExternalID: 4443 * @ctxt: an XML parser context 4444 * @publicID: a xmlChar** receiving PubidLiteral 4445 * @strict: indicate whether we should restrict parsing to only 4446 * production [75], see NOTE below 4447 * 4448 * Parse an External ID or a Public ID 4449 * 4450 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4451 * 'PUBLIC' S PubidLiteral S SystemLiteral 4452 * 4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4455 * 4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4457 * 4458 * Returns the function returns SystemLiteral and in the second 4459 * case publicID receives PubidLiteral, is strict is off 4460 * it is possible to return NULL and have publicID set. 4461 */ 4462 4463 xmlChar * 4464 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4465 xmlChar *URI = NULL; 4466 4467 SHRINK; 4468 4469 *publicID = NULL; 4470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4471 SKIP(6); 4472 if (!IS_BLANK_CH(CUR)) { 4473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4474 "Space required after 'SYSTEM'\n"); 4475 } 4476 SKIP_BLANKS; 4477 URI = xmlParseSystemLiteral(ctxt); 4478 if (URI == NULL) { 4479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4480 } 4481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4482 SKIP(6); 4483 if (!IS_BLANK_CH(CUR)) { 4484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4485 "Space required after 'PUBLIC'\n"); 4486 } 4487 SKIP_BLANKS; 4488 *publicID = xmlParsePubidLiteral(ctxt); 4489 if (*publicID == NULL) { 4490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4491 } 4492 if (strict) { 4493 /* 4494 * We don't handle [83] so "S SystemLiteral" is required. 4495 */ 4496 if (!IS_BLANK_CH(CUR)) { 4497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4498 "Space required after the Public Identifier\n"); 4499 } 4500 } else { 4501 /* 4502 * We handle [83] so we return immediately, if 4503 * "S SystemLiteral" is not detected. From a purely parsing 4504 * point of view that's a nice mess. 4505 */ 4506 const xmlChar *ptr; 4507 GROW; 4508 4509 ptr = CUR_PTR; 4510 if (!IS_BLANK_CH(*ptr)) return(NULL); 4511 4512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4514 } 4515 SKIP_BLANKS; 4516 URI = xmlParseSystemLiteral(ctxt); 4517 if (URI == NULL) { 4518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4519 } 4520 } 4521 return(URI); 4522 } 4523 4524 /** 4525 * xmlParseCommentComplex: 4526 * @ctxt: an XML parser context 4527 * @buf: the already parsed part of the buffer 4528 * @len: number of bytes filles in the buffer 4529 * @size: allocated size of the buffer 4530 * 4531 * Skip an XML (SGML) comment <!-- .... --> 4532 * The spec says that "For compatibility, the string "--" (double-hyphen) 4533 * must not occur within comments. " 4534 * This is the slow routine in case the accelerator for ascii didn't work 4535 * 4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4537 */ 4538 static void 4539 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4540 int q, ql; 4541 int r, rl; 4542 int cur, l; 4543 int count = 0; 4544 int inputid; 4545 4546 inputid = ctxt->input->id; 4547 4548 if (buf == NULL) { 4549 len = 0; 4550 size = XML_PARSER_BUFFER_SIZE; 4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4552 if (buf == NULL) { 4553 xmlErrMemory(ctxt, NULL); 4554 return; 4555 } 4556 } 4557 GROW; /* Assure there's enough input data */ 4558 q = CUR_CHAR(ql); 4559 if (q == 0) 4560 goto not_terminated; 4561 if (!IS_CHAR(q)) { 4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4563 "xmlParseComment: invalid xmlChar value %d\n", 4564 q); 4565 xmlFree (buf); 4566 return; 4567 } 4568 NEXTL(ql); 4569 r = CUR_CHAR(rl); 4570 if (r == 0) 4571 goto not_terminated; 4572 if (!IS_CHAR(r)) { 4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4574 "xmlParseComment: invalid xmlChar value %d\n", 4575 q); 4576 xmlFree (buf); 4577 return; 4578 } 4579 NEXTL(rl); 4580 cur = CUR_CHAR(l); 4581 if (cur == 0) 4582 goto not_terminated; 4583 while (IS_CHAR(cur) && /* checked */ 4584 ((cur != '>') || 4585 (r != '-') || (q != '-'))) { 4586 if ((r == '-') && (q == '-')) { 4587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4588 } 4589 if (len + 5 >= size) { 4590 xmlChar *new_buf; 4591 size *= 2; 4592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4593 if (new_buf == NULL) { 4594 xmlFree (buf); 4595 xmlErrMemory(ctxt, NULL); 4596 return; 4597 } 4598 buf = new_buf; 4599 } 4600 COPY_BUF(ql,buf,len,q); 4601 q = r; 4602 ql = rl; 4603 r = cur; 4604 rl = l; 4605 4606 count++; 4607 if (count > 50) { 4608 GROW; 4609 count = 0; 4610 } 4611 NEXTL(l); 4612 cur = CUR_CHAR(l); 4613 if (cur == 0) { 4614 SHRINK; 4615 GROW; 4616 cur = CUR_CHAR(l); 4617 } 4618 } 4619 buf[len] = 0; 4620 if (cur == 0) { 4621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4622 "Comment not terminated \n<!--%.50s\n", buf); 4623 } else if (!IS_CHAR(cur)) { 4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4625 "xmlParseComment: invalid xmlChar value %d\n", 4626 cur); 4627 } else { 4628 if (inputid != ctxt->input->id) { 4629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4630 "Comment doesn't start and stop in the same entity\n"); 4631 } 4632 NEXT; 4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4634 (!ctxt->disableSAX)) 4635 ctxt->sax->comment(ctxt->userData, buf); 4636 } 4637 xmlFree(buf); 4638 return; 4639 not_terminated: 4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4641 "Comment not terminated\n", NULL); 4642 xmlFree(buf); 4643 return; 4644 } 4645 4646 /** 4647 * xmlParseComment: 4648 * @ctxt: an XML parser context 4649 * 4650 * Skip an XML (SGML) comment <!-- .... --> 4651 * The spec says that "For compatibility, the string "--" (double-hyphen) 4652 * must not occur within comments. " 4653 * 4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4655 */ 4656 void 4657 xmlParseComment(xmlParserCtxtPtr ctxt) { 4658 xmlChar *buf = NULL; 4659 int size = XML_PARSER_BUFFER_SIZE; 4660 int len = 0; 4661 xmlParserInputState state; 4662 const xmlChar *in; 4663 int nbchar = 0, ccol; 4664 int inputid; 4665 4666 /* 4667 * Check that there is a comment right here. 4668 */ 4669 if ((RAW != '<') || (NXT(1) != '!') || 4670 (NXT(2) != '-') || (NXT(3) != '-')) return; 4671 state = ctxt->instate; 4672 ctxt->instate = XML_PARSER_COMMENT; 4673 inputid = ctxt->input->id; 4674 SKIP(4); 4675 SHRINK; 4676 GROW; 4677 4678 /* 4679 * Accelerated common case where input don't need to be 4680 * modified before passing it to the handler. 4681 */ 4682 in = ctxt->input->cur; 4683 do { 4684 if (*in == 0xA) { 4685 do { 4686 ctxt->input->line++; ctxt->input->col = 1; 4687 in++; 4688 } while (*in == 0xA); 4689 } 4690 get_more: 4691 ccol = ctxt->input->col; 4692 while (((*in > '-') && (*in <= 0x7F)) || 4693 ((*in >= 0x20) && (*in < '-')) || 4694 (*in == 0x09)) { 4695 in++; 4696 ccol++; 4697 } 4698 ctxt->input->col = ccol; 4699 if (*in == 0xA) { 4700 do { 4701 ctxt->input->line++; ctxt->input->col = 1; 4702 in++; 4703 } while (*in == 0xA); 4704 goto get_more; 4705 } 4706 nbchar = in - ctxt->input->cur; 4707 /* 4708 * save current set of data 4709 */ 4710 if (nbchar > 0) { 4711 if ((ctxt->sax != NULL) && 4712 (ctxt->sax->comment != NULL)) { 4713 if (buf == NULL) { 4714 if ((*in == '-') && (in[1] == '-')) 4715 size = nbchar + 1; 4716 else 4717 size = XML_PARSER_BUFFER_SIZE + nbchar; 4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4719 if (buf == NULL) { 4720 xmlErrMemory(ctxt, NULL); 4721 ctxt->instate = state; 4722 return; 4723 } 4724 len = 0; 4725 } else if (len + nbchar + 1 >= size) { 4726 xmlChar *new_buf; 4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4728 new_buf = (xmlChar *) xmlRealloc(buf, 4729 size * sizeof(xmlChar)); 4730 if (new_buf == NULL) { 4731 xmlFree (buf); 4732 xmlErrMemory(ctxt, NULL); 4733 ctxt->instate = state; 4734 return; 4735 } 4736 buf = new_buf; 4737 } 4738 memcpy(&buf[len], ctxt->input->cur, nbchar); 4739 len += nbchar; 4740 buf[len] = 0; 4741 } 4742 } 4743 ctxt->input->cur = in; 4744 if (*in == 0xA) { 4745 in++; 4746 ctxt->input->line++; ctxt->input->col = 1; 4747 } 4748 if (*in == 0xD) { 4749 in++; 4750 if (*in == 0xA) { 4751 ctxt->input->cur = in; 4752 in++; 4753 ctxt->input->line++; ctxt->input->col = 1; 4754 continue; /* while */ 4755 } 4756 in--; 4757 } 4758 SHRINK; 4759 GROW; 4760 in = ctxt->input->cur; 4761 if (*in == '-') { 4762 if (in[1] == '-') { 4763 if (in[2] == '>') { 4764 if (ctxt->input->id != inputid) { 4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4766 "comment doesn't start and stop in the same entity\n"); 4767 } 4768 SKIP(3); 4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4770 (!ctxt->disableSAX)) { 4771 if (buf != NULL) 4772 ctxt->sax->comment(ctxt->userData, buf); 4773 else 4774 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4775 } 4776 if (buf != NULL) 4777 xmlFree(buf); 4778 ctxt->instate = state; 4779 return; 4780 } 4781 if (buf != NULL) 4782 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4783 "Comment not terminated \n<!--%.50s\n", 4784 buf); 4785 else 4786 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4787 "Comment not terminated \n", NULL); 4788 in++; 4789 ctxt->input->col++; 4790 } 4791 in++; 4792 ctxt->input->col++; 4793 goto get_more; 4794 } 4795 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4796 xmlParseCommentComplex(ctxt, buf, len, size); 4797 ctxt->instate = state; 4798 return; 4799 } 4800 4801 4802 /** 4803 * xmlParsePITarget: 4804 * @ctxt: an XML parser context 4805 * 4806 * parse the name of a PI 4807 * 4808 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4809 * 4810 * Returns the PITarget name or NULL 4811 */ 4812 4813 const xmlChar * 4814 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4815 const xmlChar *name; 4816 4817 name = xmlParseName(ctxt); 4818 if ((name != NULL) && 4819 ((name[0] == 'x') || (name[0] == 'X')) && 4820 ((name[1] == 'm') || (name[1] == 'M')) && 4821 ((name[2] == 'l') || (name[2] == 'L'))) { 4822 int i; 4823 if ((name[0] == 'x') && (name[1] == 'm') && 4824 (name[2] == 'l') && (name[3] == 0)) { 4825 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4826 "XML declaration allowed only at the start of the document\n"); 4827 return(name); 4828 } else if (name[3] == 0) { 4829 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4830 return(name); 4831 } 4832 for (i = 0;;i++) { 4833 if (xmlW3CPIs[i] == NULL) break; 4834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4835 return(name); 4836 } 4837 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4838 "xmlParsePITarget: invalid name prefix 'xml'\n", 4839 NULL, NULL); 4840 } 4841 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4842 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4843 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4844 } 4845 return(name); 4846 } 4847 4848 #ifdef LIBXML_CATALOG_ENABLED 4849 /** 4850 * xmlParseCatalogPI: 4851 * @ctxt: an XML parser context 4852 * @catalog: the PI value string 4853 * 4854 * parse an XML Catalog Processing Instruction. 4855 * 4856 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4857 * 4858 * Occurs only if allowed by the user and if happening in the Misc 4859 * part of the document before any doctype informations 4860 * This will add the given catalog to the parsing context in order 4861 * to be used if there is a resolution need further down in the document 4862 */ 4863 4864 static void 4865 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4866 xmlChar *URL = NULL; 4867 const xmlChar *tmp, *base; 4868 xmlChar marker; 4869 4870 tmp = catalog; 4871 while (IS_BLANK_CH(*tmp)) tmp++; 4872 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4873 goto error; 4874 tmp += 7; 4875 while (IS_BLANK_CH(*tmp)) tmp++; 4876 if (*tmp != '=') { 4877 return; 4878 } 4879 tmp++; 4880 while (IS_BLANK_CH(*tmp)) tmp++; 4881 marker = *tmp; 4882 if ((marker != '\'') && (marker != '"')) 4883 goto error; 4884 tmp++; 4885 base = tmp; 4886 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4887 if (*tmp == 0) 4888 goto error; 4889 URL = xmlStrndup(base, tmp - base); 4890 tmp++; 4891 while (IS_BLANK_CH(*tmp)) tmp++; 4892 if (*tmp != 0) 4893 goto error; 4894 4895 if (URL != NULL) { 4896 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4897 xmlFree(URL); 4898 } 4899 return; 4900 4901 error: 4902 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4903 "Catalog PI syntax error: %s\n", 4904 catalog, NULL); 4905 if (URL != NULL) 4906 xmlFree(URL); 4907 } 4908 #endif 4909 4910 /** 4911 * xmlParsePI: 4912 * @ctxt: an XML parser context 4913 * 4914 * parse an XML Processing Instruction. 4915 * 4916 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4917 * 4918 * The processing is transfered to SAX once parsed. 4919 */ 4920 4921 void 4922 xmlParsePI(xmlParserCtxtPtr ctxt) { 4923 xmlChar *buf = NULL; 4924 int len = 0; 4925 int size = XML_PARSER_BUFFER_SIZE; 4926 int cur, l; 4927 const xmlChar *target; 4928 xmlParserInputState state; 4929 int count = 0; 4930 4931 if ((RAW == '<') && (NXT(1) == '?')) { 4932 xmlParserInputPtr input = ctxt->input; 4933 state = ctxt->instate; 4934 ctxt->instate = XML_PARSER_PI; 4935 /* 4936 * this is a Processing Instruction. 4937 */ 4938 SKIP(2); 4939 SHRINK; 4940 4941 /* 4942 * Parse the target name and check for special support like 4943 * namespace. 4944 */ 4945 target = xmlParsePITarget(ctxt); 4946 if (target != NULL) { 4947 if ((RAW == '?') && (NXT(1) == '>')) { 4948 if (input != ctxt->input) { 4949 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4950 "PI declaration doesn't start and stop in the same entity\n"); 4951 } 4952 SKIP(2); 4953 4954 /* 4955 * SAX: PI detected. 4956 */ 4957 if ((ctxt->sax) && (!ctxt->disableSAX) && 4958 (ctxt->sax->processingInstruction != NULL)) 4959 ctxt->sax->processingInstruction(ctxt->userData, 4960 target, NULL); 4961 if (ctxt->instate != XML_PARSER_EOF) 4962 ctxt->instate = state; 4963 return; 4964 } 4965 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4966 if (buf == NULL) { 4967 xmlErrMemory(ctxt, NULL); 4968 ctxt->instate = state; 4969 return; 4970 } 4971 cur = CUR; 4972 if (!IS_BLANK(cur)) { 4973 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4974 "ParsePI: PI %s space expected\n", target); 4975 } 4976 SKIP_BLANKS; 4977 cur = CUR_CHAR(l); 4978 while (IS_CHAR(cur) && /* checked */ 4979 ((cur != '?') || (NXT(1) != '>'))) { 4980 if (len + 5 >= size) { 4981 xmlChar *tmp; 4982 4983 size *= 2; 4984 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4985 if (tmp == NULL) { 4986 xmlErrMemory(ctxt, NULL); 4987 xmlFree(buf); 4988 ctxt->instate = state; 4989 return; 4990 } 4991 buf = tmp; 4992 } 4993 count++; 4994 if (count > 50) { 4995 GROW; 4996 count = 0; 4997 } 4998 COPY_BUF(l,buf,len,cur); 4999 NEXTL(l); 5000 cur = CUR_CHAR(l); 5001 if (cur == 0) { 5002 SHRINK; 5003 GROW; 5004 cur = CUR_CHAR(l); 5005 } 5006 } 5007 buf[len] = 0; 5008 if (cur != '?') { 5009 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5010 "ParsePI: PI %s never end ...\n", target); 5011 } else { 5012 if (input != ctxt->input) { 5013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5014 "PI declaration doesn't start and stop in the same entity\n"); 5015 } 5016 SKIP(2); 5017 5018 #ifdef LIBXML_CATALOG_ENABLED 5019 if (((state == XML_PARSER_MISC) || 5020 (state == XML_PARSER_START)) && 5021 (xmlStrEqual(target, XML_CATALOG_PI))) { 5022 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5023 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5024 (allow == XML_CATA_ALLOW_ALL)) 5025 xmlParseCatalogPI(ctxt, buf); 5026 } 5027 #endif 5028 5029 5030 /* 5031 * SAX: PI detected. 5032 */ 5033 if ((ctxt->sax) && (!ctxt->disableSAX) && 5034 (ctxt->sax->processingInstruction != NULL)) 5035 ctxt->sax->processingInstruction(ctxt->userData, 5036 target, buf); 5037 } 5038 xmlFree(buf); 5039 } else { 5040 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5041 } 5042 if (ctxt->instate != XML_PARSER_EOF) 5043 ctxt->instate = state; 5044 } 5045 } 5046 5047 /** 5048 * xmlParseNotationDecl: 5049 * @ctxt: an XML parser context 5050 * 5051 * parse a notation declaration 5052 * 5053 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5054 * 5055 * Hence there is actually 3 choices: 5056 * 'PUBLIC' S PubidLiteral 5057 * 'PUBLIC' S PubidLiteral S SystemLiteral 5058 * and 'SYSTEM' S SystemLiteral 5059 * 5060 * See the NOTE on xmlParseExternalID(). 5061 */ 5062 5063 void 5064 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5065 const xmlChar *name; 5066 xmlChar *Pubid; 5067 xmlChar *Systemid; 5068 5069 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5070 xmlParserInputPtr input = ctxt->input; 5071 SHRINK; 5072 SKIP(10); 5073 if (!IS_BLANK_CH(CUR)) { 5074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5075 "Space required after '<!NOTATION'\n"); 5076 return; 5077 } 5078 SKIP_BLANKS; 5079 5080 name = xmlParseName(ctxt); 5081 if (name == NULL) { 5082 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5083 return; 5084 } 5085 if (!IS_BLANK_CH(CUR)) { 5086 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5087 "Space required after the NOTATION name'\n"); 5088 return; 5089 } 5090 if (xmlStrchr(name, ':') != NULL) { 5091 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5092 "colon are forbidden from notation names '%s'\n", 5093 name, NULL, NULL); 5094 } 5095 SKIP_BLANKS; 5096 5097 /* 5098 * Parse the IDs. 5099 */ 5100 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5101 SKIP_BLANKS; 5102 5103 if (RAW == '>') { 5104 if (input != ctxt->input) { 5105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5106 "Notation declaration doesn't start and stop in the same entity\n"); 5107 } 5108 NEXT; 5109 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5110 (ctxt->sax->notationDecl != NULL)) 5111 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5112 } else { 5113 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5114 } 5115 if (Systemid != NULL) xmlFree(Systemid); 5116 if (Pubid != NULL) xmlFree(Pubid); 5117 } 5118 } 5119 5120 /** 5121 * xmlParseEntityDecl: 5122 * @ctxt: an XML parser context 5123 * 5124 * parse <!ENTITY declarations 5125 * 5126 * [70] EntityDecl ::= GEDecl | PEDecl 5127 * 5128 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5129 * 5130 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5131 * 5132 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5133 * 5134 * [74] PEDef ::= EntityValue | ExternalID 5135 * 5136 * [76] NDataDecl ::= S 'NDATA' S Name 5137 * 5138 * [ VC: Notation Declared ] 5139 * The Name must match the declared name of a notation. 5140 */ 5141 5142 void 5143 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5144 const xmlChar *name = NULL; 5145 xmlChar *value = NULL; 5146 xmlChar *URI = NULL, *literal = NULL; 5147 const xmlChar *ndata = NULL; 5148 int isParameter = 0; 5149 xmlChar *orig = NULL; 5150 int skipped; 5151 5152 /* GROW; done in the caller */ 5153 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5154 xmlParserInputPtr input = ctxt->input; 5155 SHRINK; 5156 SKIP(8); 5157 skipped = SKIP_BLANKS; 5158 if (skipped == 0) { 5159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5160 "Space required after '<!ENTITY'\n"); 5161 } 5162 5163 if (RAW == '%') { 5164 NEXT; 5165 skipped = SKIP_BLANKS; 5166 if (skipped == 0) { 5167 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5168 "Space required after '%'\n"); 5169 } 5170 isParameter = 1; 5171 } 5172 5173 name = xmlParseName(ctxt); 5174 if (name == NULL) { 5175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5176 "xmlParseEntityDecl: no name\n"); 5177 return; 5178 } 5179 if (xmlStrchr(name, ':') != NULL) { 5180 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5181 "colon are forbidden from entities names '%s'\n", 5182 name, NULL, NULL); 5183 } 5184 skipped = SKIP_BLANKS; 5185 if (skipped == 0) { 5186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5187 "Space required after the entity name\n"); 5188 } 5189 5190 ctxt->instate = XML_PARSER_ENTITY_DECL; 5191 /* 5192 * handle the various case of definitions... 5193 */ 5194 if (isParameter) { 5195 if ((RAW == '"') || (RAW == '\'')) { 5196 value = xmlParseEntityValue(ctxt, &orig); 5197 if (value) { 5198 if ((ctxt->sax != NULL) && 5199 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5200 ctxt->sax->entityDecl(ctxt->userData, name, 5201 XML_INTERNAL_PARAMETER_ENTITY, 5202 NULL, NULL, value); 5203 } 5204 } else { 5205 URI = xmlParseExternalID(ctxt, &literal, 1); 5206 if ((URI == NULL) && (literal == NULL)) { 5207 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5208 } 5209 if (URI) { 5210 xmlURIPtr uri; 5211 5212 uri = xmlParseURI((const char *) URI); 5213 if (uri == NULL) { 5214 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5215 "Invalid URI: %s\n", URI); 5216 /* 5217 * This really ought to be a well formedness error 5218 * but the XML Core WG decided otherwise c.f. issue 5219 * E26 of the XML erratas. 5220 */ 5221 } else { 5222 if (uri->fragment != NULL) { 5223 /* 5224 * Okay this is foolish to block those but not 5225 * invalid URIs. 5226 */ 5227 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5228 } else { 5229 if ((ctxt->sax != NULL) && 5230 (!ctxt->disableSAX) && 5231 (ctxt->sax->entityDecl != NULL)) 5232 ctxt->sax->entityDecl(ctxt->userData, name, 5233 XML_EXTERNAL_PARAMETER_ENTITY, 5234 literal, URI, NULL); 5235 } 5236 xmlFreeURI(uri); 5237 } 5238 } 5239 } 5240 } else { 5241 if ((RAW == '"') || (RAW == '\'')) { 5242 value = xmlParseEntityValue(ctxt, &orig); 5243 if ((ctxt->sax != NULL) && 5244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5245 ctxt->sax->entityDecl(ctxt->userData, name, 5246 XML_INTERNAL_GENERAL_ENTITY, 5247 NULL, NULL, value); 5248 /* 5249 * For expat compatibility in SAX mode. 5250 */ 5251 if ((ctxt->myDoc == NULL) || 5252 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5253 if (ctxt->myDoc == NULL) { 5254 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5255 if (ctxt->myDoc == NULL) { 5256 xmlErrMemory(ctxt, "New Doc failed"); 5257 return; 5258 } 5259 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5260 } 5261 if (ctxt->myDoc->intSubset == NULL) 5262 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5263 BAD_CAST "fake", NULL, NULL); 5264 5265 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5266 NULL, NULL, value); 5267 } 5268 } else { 5269 URI = xmlParseExternalID(ctxt, &literal, 1); 5270 if ((URI == NULL) && (literal == NULL)) { 5271 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5272 } 5273 if (URI) { 5274 xmlURIPtr uri; 5275 5276 uri = xmlParseURI((const char *)URI); 5277 if (uri == NULL) { 5278 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5279 "Invalid URI: %s\n", URI); 5280 /* 5281 * This really ought to be a well formedness error 5282 * but the XML Core WG decided otherwise c.f. issue 5283 * E26 of the XML erratas. 5284 */ 5285 } else { 5286 if (uri->fragment != NULL) { 5287 /* 5288 * Okay this is foolish to block those but not 5289 * invalid URIs. 5290 */ 5291 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5292 } 5293 xmlFreeURI(uri); 5294 } 5295 } 5296 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5298 "Space required before 'NDATA'\n"); 5299 } 5300 SKIP_BLANKS; 5301 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5302 SKIP(5); 5303 if (!IS_BLANK_CH(CUR)) { 5304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5305 "Space required after 'NDATA'\n"); 5306 } 5307 SKIP_BLANKS; 5308 ndata = xmlParseName(ctxt); 5309 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5310 (ctxt->sax->unparsedEntityDecl != NULL)) 5311 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5312 literal, URI, ndata); 5313 } else { 5314 if ((ctxt->sax != NULL) && 5315 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5316 ctxt->sax->entityDecl(ctxt->userData, name, 5317 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5318 literal, URI, NULL); 5319 /* 5320 * For expat compatibility in SAX mode. 5321 * assuming the entity repalcement was asked for 5322 */ 5323 if ((ctxt->replaceEntities != 0) && 5324 ((ctxt->myDoc == NULL) || 5325 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5326 if (ctxt->myDoc == NULL) { 5327 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5328 if (ctxt->myDoc == NULL) { 5329 xmlErrMemory(ctxt, "New Doc failed"); 5330 return; 5331 } 5332 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5333 } 5334 5335 if (ctxt->myDoc->intSubset == NULL) 5336 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5337 BAD_CAST "fake", NULL, NULL); 5338 xmlSAX2EntityDecl(ctxt, name, 5339 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5340 literal, URI, NULL); 5341 } 5342 } 5343 } 5344 } 5345 SKIP_BLANKS; 5346 if (RAW != '>') { 5347 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5348 "xmlParseEntityDecl: entity %s not terminated\n", name); 5349 } else { 5350 if (input != ctxt->input) { 5351 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5352 "Entity declaration doesn't start and stop in the same entity\n"); 5353 } 5354 NEXT; 5355 } 5356 if (orig != NULL) { 5357 /* 5358 * Ugly mechanism to save the raw entity value. 5359 */ 5360 xmlEntityPtr cur = NULL; 5361 5362 if (isParameter) { 5363 if ((ctxt->sax != NULL) && 5364 (ctxt->sax->getParameterEntity != NULL)) 5365 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5366 } else { 5367 if ((ctxt->sax != NULL) && 5368 (ctxt->sax->getEntity != NULL)) 5369 cur = ctxt->sax->getEntity(ctxt->userData, name); 5370 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5371 cur = xmlSAX2GetEntity(ctxt, name); 5372 } 5373 } 5374 if (cur != NULL) { 5375 if (cur->orig != NULL) 5376 xmlFree(orig); 5377 else 5378 cur->orig = orig; 5379 } else 5380 xmlFree(orig); 5381 } 5382 if (value != NULL) xmlFree(value); 5383 if (URI != NULL) xmlFree(URI); 5384 if (literal != NULL) xmlFree(literal); 5385 } 5386 } 5387 5388 /** 5389 * xmlParseDefaultDecl: 5390 * @ctxt: an XML parser context 5391 * @value: Receive a possible fixed default value for the attribute 5392 * 5393 * Parse an attribute default declaration 5394 * 5395 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5396 * 5397 * [ VC: Required Attribute ] 5398 * if the default declaration is the keyword #REQUIRED, then the 5399 * attribute must be specified for all elements of the type in the 5400 * attribute-list declaration. 5401 * 5402 * [ VC: Attribute Default Legal ] 5403 * The declared default value must meet the lexical constraints of 5404 * the declared attribute type c.f. xmlValidateAttributeDecl() 5405 * 5406 * [ VC: Fixed Attribute Default ] 5407 * if an attribute has a default value declared with the #FIXED 5408 * keyword, instances of that attribute must match the default value. 5409 * 5410 * [ WFC: No < in Attribute Values ] 5411 * handled in xmlParseAttValue() 5412 * 5413 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5414 * or XML_ATTRIBUTE_FIXED. 5415 */ 5416 5417 int 5418 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5419 int val; 5420 xmlChar *ret; 5421 5422 *value = NULL; 5423 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5424 SKIP(9); 5425 return(XML_ATTRIBUTE_REQUIRED); 5426 } 5427 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5428 SKIP(8); 5429 return(XML_ATTRIBUTE_IMPLIED); 5430 } 5431 val = XML_ATTRIBUTE_NONE; 5432 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5433 SKIP(6); 5434 val = XML_ATTRIBUTE_FIXED; 5435 if (!IS_BLANK_CH(CUR)) { 5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5437 "Space required after '#FIXED'\n"); 5438 } 5439 SKIP_BLANKS; 5440 } 5441 ret = xmlParseAttValue(ctxt); 5442 ctxt->instate = XML_PARSER_DTD; 5443 if (ret == NULL) { 5444 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5445 "Attribute default value declaration error\n"); 5446 } else 5447 *value = ret; 5448 return(val); 5449 } 5450 5451 /** 5452 * xmlParseNotationType: 5453 * @ctxt: an XML parser context 5454 * 5455 * parse an Notation attribute type. 5456 * 5457 * Note: the leading 'NOTATION' S part has already being parsed... 5458 * 5459 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5460 * 5461 * [ VC: Notation Attributes ] 5462 * Values of this type must match one of the notation names included 5463 * in the declaration; all notation names in the declaration must be declared. 5464 * 5465 * Returns: the notation attribute tree built while parsing 5466 */ 5467 5468 xmlEnumerationPtr 5469 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5470 const xmlChar *name; 5471 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5472 5473 if (RAW != '(') { 5474 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5475 return(NULL); 5476 } 5477 SHRINK; 5478 do { 5479 NEXT; 5480 SKIP_BLANKS; 5481 name = xmlParseName(ctxt); 5482 if (name == NULL) { 5483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5484 "Name expected in NOTATION declaration\n"); 5485 xmlFreeEnumeration(ret); 5486 return(NULL); 5487 } 5488 tmp = ret; 5489 while (tmp != NULL) { 5490 if (xmlStrEqual(name, tmp->name)) { 5491 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5492 "standalone: attribute notation value token %s duplicated\n", 5493 name, NULL); 5494 if (!xmlDictOwns(ctxt->dict, name)) 5495 xmlFree((xmlChar *) name); 5496 break; 5497 } 5498 tmp = tmp->next; 5499 } 5500 if (tmp == NULL) { 5501 cur = xmlCreateEnumeration(name); 5502 if (cur == NULL) { 5503 xmlFreeEnumeration(ret); 5504 return(NULL); 5505 } 5506 if (last == NULL) ret = last = cur; 5507 else { 5508 last->next = cur; 5509 last = cur; 5510 } 5511 } 5512 SKIP_BLANKS; 5513 } while (RAW == '|'); 5514 if (RAW != ')') { 5515 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5516 xmlFreeEnumeration(ret); 5517 return(NULL); 5518 } 5519 NEXT; 5520 return(ret); 5521 } 5522 5523 /** 5524 * xmlParseEnumerationType: 5525 * @ctxt: an XML parser context 5526 * 5527 * parse an Enumeration attribute type. 5528 * 5529 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5530 * 5531 * [ VC: Enumeration ] 5532 * Values of this type must match one of the Nmtoken tokens in 5533 * the declaration 5534 * 5535 * Returns: the enumeration attribute tree built while parsing 5536 */ 5537 5538 xmlEnumerationPtr 5539 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5540 xmlChar *name; 5541 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5542 5543 if (RAW != '(') { 5544 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5545 return(NULL); 5546 } 5547 SHRINK; 5548 do { 5549 NEXT; 5550 SKIP_BLANKS; 5551 name = xmlParseNmtoken(ctxt); 5552 if (name == NULL) { 5553 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5554 return(ret); 5555 } 5556 tmp = ret; 5557 while (tmp != NULL) { 5558 if (xmlStrEqual(name, tmp->name)) { 5559 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5560 "standalone: attribute enumeration value token %s duplicated\n", 5561 name, NULL); 5562 if (!xmlDictOwns(ctxt->dict, name)) 5563 xmlFree(name); 5564 break; 5565 } 5566 tmp = tmp->next; 5567 } 5568 if (tmp == NULL) { 5569 cur = xmlCreateEnumeration(name); 5570 if (!xmlDictOwns(ctxt->dict, name)) 5571 xmlFree(name); 5572 if (cur == NULL) { 5573 xmlFreeEnumeration(ret); 5574 return(NULL); 5575 } 5576 if (last == NULL) ret = last = cur; 5577 else { 5578 last->next = cur; 5579 last = cur; 5580 } 5581 } 5582 SKIP_BLANKS; 5583 } while (RAW == '|'); 5584 if (RAW != ')') { 5585 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5586 return(ret); 5587 } 5588 NEXT; 5589 return(ret); 5590 } 5591 5592 /** 5593 * xmlParseEnumeratedType: 5594 * @ctxt: an XML parser context 5595 * @tree: the enumeration tree built while parsing 5596 * 5597 * parse an Enumerated attribute type. 5598 * 5599 * [57] EnumeratedType ::= NotationType | Enumeration 5600 * 5601 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5602 * 5603 * 5604 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5605 */ 5606 5607 int 5608 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5609 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5610 SKIP(8); 5611 if (!IS_BLANK_CH(CUR)) { 5612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5613 "Space required after 'NOTATION'\n"); 5614 return(0); 5615 } 5616 SKIP_BLANKS; 5617 *tree = xmlParseNotationType(ctxt); 5618 if (*tree == NULL) return(0); 5619 return(XML_ATTRIBUTE_NOTATION); 5620 } 5621 *tree = xmlParseEnumerationType(ctxt); 5622 if (*tree == NULL) return(0); 5623 return(XML_ATTRIBUTE_ENUMERATION); 5624 } 5625 5626 /** 5627 * xmlParseAttributeType: 5628 * @ctxt: an XML parser context 5629 * @tree: the enumeration tree built while parsing 5630 * 5631 * parse the Attribute list def for an element 5632 * 5633 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5634 * 5635 * [55] StringType ::= 'CDATA' 5636 * 5637 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5638 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5639 * 5640 * Validity constraints for attribute values syntax are checked in 5641 * xmlValidateAttributeValue() 5642 * 5643 * [ VC: ID ] 5644 * Values of type ID must match the Name production. A name must not 5645 * appear more than once in an XML document as a value of this type; 5646 * i.e., ID values must uniquely identify the elements which bear them. 5647 * 5648 * [ VC: One ID per Element Type ] 5649 * No element type may have more than one ID attribute specified. 5650 * 5651 * [ VC: ID Attribute Default ] 5652 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5653 * 5654 * [ VC: IDREF ] 5655 * Values of type IDREF must match the Name production, and values 5656 * of type IDREFS must match Names; each IDREF Name must match the value 5657 * of an ID attribute on some element in the XML document; i.e. IDREF 5658 * values must match the value of some ID attribute. 5659 * 5660 * [ VC: Entity Name ] 5661 * Values of type ENTITY must match the Name production, values 5662 * of type ENTITIES must match Names; each Entity Name must match the 5663 * name of an unparsed entity declared in the DTD. 5664 * 5665 * [ VC: Name Token ] 5666 * Values of type NMTOKEN must match the Nmtoken production; values 5667 * of type NMTOKENS must match Nmtokens. 5668 * 5669 * Returns the attribute type 5670 */ 5671 int 5672 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5673 SHRINK; 5674 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5675 SKIP(5); 5676 return(XML_ATTRIBUTE_CDATA); 5677 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5678 SKIP(6); 5679 return(XML_ATTRIBUTE_IDREFS); 5680 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5681 SKIP(5); 5682 return(XML_ATTRIBUTE_IDREF); 5683 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5684 SKIP(2); 5685 return(XML_ATTRIBUTE_ID); 5686 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5687 SKIP(6); 5688 return(XML_ATTRIBUTE_ENTITY); 5689 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5690 SKIP(8); 5691 return(XML_ATTRIBUTE_ENTITIES); 5692 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5693 SKIP(8); 5694 return(XML_ATTRIBUTE_NMTOKENS); 5695 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5696 SKIP(7); 5697 return(XML_ATTRIBUTE_NMTOKEN); 5698 } 5699 return(xmlParseEnumeratedType(ctxt, tree)); 5700 } 5701 5702 /** 5703 * xmlParseAttributeListDecl: 5704 * @ctxt: an XML parser context 5705 * 5706 * : parse the Attribute list def for an element 5707 * 5708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5709 * 5710 * [53] AttDef ::= S Name S AttType S DefaultDecl 5711 * 5712 */ 5713 void 5714 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5715 const xmlChar *elemName; 5716 const xmlChar *attrName; 5717 xmlEnumerationPtr tree; 5718 5719 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5720 xmlParserInputPtr input = ctxt->input; 5721 5722 SKIP(9); 5723 if (!IS_BLANK_CH(CUR)) { 5724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5725 "Space required after '<!ATTLIST'\n"); 5726 } 5727 SKIP_BLANKS; 5728 elemName = xmlParseName(ctxt); 5729 if (elemName == NULL) { 5730 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5731 "ATTLIST: no name for Element\n"); 5732 return; 5733 } 5734 SKIP_BLANKS; 5735 GROW; 5736 while (RAW != '>') { 5737 const xmlChar *check = CUR_PTR; 5738 int type; 5739 int def; 5740 xmlChar *defaultValue = NULL; 5741 5742 GROW; 5743 tree = NULL; 5744 attrName = xmlParseName(ctxt); 5745 if (attrName == NULL) { 5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5747 "ATTLIST: no name for Attribute\n"); 5748 break; 5749 } 5750 GROW; 5751 if (!IS_BLANK_CH(CUR)) { 5752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5753 "Space required after the attribute name\n"); 5754 break; 5755 } 5756 SKIP_BLANKS; 5757 5758 type = xmlParseAttributeType(ctxt, &tree); 5759 if (type <= 0) { 5760 break; 5761 } 5762 5763 GROW; 5764 if (!IS_BLANK_CH(CUR)) { 5765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5766 "Space required after the attribute type\n"); 5767 if (tree != NULL) 5768 xmlFreeEnumeration(tree); 5769 break; 5770 } 5771 SKIP_BLANKS; 5772 5773 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5774 if (def <= 0) { 5775 if (defaultValue != NULL) 5776 xmlFree(defaultValue); 5777 if (tree != NULL) 5778 xmlFreeEnumeration(tree); 5779 break; 5780 } 5781 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5782 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5783 5784 GROW; 5785 if (RAW != '>') { 5786 if (!IS_BLANK_CH(CUR)) { 5787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5788 "Space required after the attribute default value\n"); 5789 if (defaultValue != NULL) 5790 xmlFree(defaultValue); 5791 if (tree != NULL) 5792 xmlFreeEnumeration(tree); 5793 break; 5794 } 5795 SKIP_BLANKS; 5796 } 5797 if (check == CUR_PTR) { 5798 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5799 "in xmlParseAttributeListDecl\n"); 5800 if (defaultValue != NULL) 5801 xmlFree(defaultValue); 5802 if (tree != NULL) 5803 xmlFreeEnumeration(tree); 5804 break; 5805 } 5806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5807 (ctxt->sax->attributeDecl != NULL)) 5808 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5809 type, def, defaultValue, tree); 5810 else if (tree != NULL) 5811 xmlFreeEnumeration(tree); 5812 5813 if ((ctxt->sax2) && (defaultValue != NULL) && 5814 (def != XML_ATTRIBUTE_IMPLIED) && 5815 (def != XML_ATTRIBUTE_REQUIRED)) { 5816 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5817 } 5818 if (ctxt->sax2) { 5819 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5820 } 5821 if (defaultValue != NULL) 5822 xmlFree(defaultValue); 5823 GROW; 5824 } 5825 if (RAW == '>') { 5826 if (input != ctxt->input) { 5827 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5828 "Attribute list declaration doesn't start and stop in the same entity\n", 5829 NULL, NULL); 5830 } 5831 NEXT; 5832 } 5833 } 5834 } 5835 5836 /** 5837 * xmlParseElementMixedContentDecl: 5838 * @ctxt: an XML parser context 5839 * @inputchk: the input used for the current entity, needed for boundary checks 5840 * 5841 * parse the declaration for a Mixed Element content 5842 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5843 * 5844 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5845 * '(' S? '#PCDATA' S? ')' 5846 * 5847 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5848 * 5849 * [ VC: No Duplicate Types ] 5850 * The same name must not appear more than once in a single 5851 * mixed-content declaration. 5852 * 5853 * returns: the list of the xmlElementContentPtr describing the element choices 5854 */ 5855 xmlElementContentPtr 5856 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5857 xmlElementContentPtr ret = NULL, cur = NULL, n; 5858 const xmlChar *elem = NULL; 5859 5860 GROW; 5861 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5862 SKIP(7); 5863 SKIP_BLANKS; 5864 SHRINK; 5865 if (RAW == ')') { 5866 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5868 "Element content declaration doesn't start and stop in the same entity\n", 5869 NULL, NULL); 5870 } 5871 NEXT; 5872 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5873 if (ret == NULL) 5874 return(NULL); 5875 if (RAW == '*') { 5876 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5877 NEXT; 5878 } 5879 return(ret); 5880 } 5881 if ((RAW == '(') || (RAW == '|')) { 5882 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5883 if (ret == NULL) return(NULL); 5884 } 5885 while (RAW == '|') { 5886 NEXT; 5887 if (elem == NULL) { 5888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5889 if (ret == NULL) return(NULL); 5890 ret->c1 = cur; 5891 if (cur != NULL) 5892 cur->parent = ret; 5893 cur = ret; 5894 } else { 5895 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5896 if (n == NULL) return(NULL); 5897 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5898 if (n->c1 != NULL) 5899 n->c1->parent = n; 5900 cur->c2 = n; 5901 if (n != NULL) 5902 n->parent = cur; 5903 cur = n; 5904 } 5905 SKIP_BLANKS; 5906 elem = xmlParseName(ctxt); 5907 if (elem == NULL) { 5908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5909 "xmlParseElementMixedContentDecl : Name expected\n"); 5910 xmlFreeDocElementContent(ctxt->myDoc, cur); 5911 return(NULL); 5912 } 5913 SKIP_BLANKS; 5914 GROW; 5915 } 5916 if ((RAW == ')') && (NXT(1) == '*')) { 5917 if (elem != NULL) { 5918 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5919 XML_ELEMENT_CONTENT_ELEMENT); 5920 if (cur->c2 != NULL) 5921 cur->c2->parent = cur; 5922 } 5923 if (ret != NULL) 5924 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5925 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5926 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5927 "Element content declaration doesn't start and stop in the same entity\n", 5928 NULL, NULL); 5929 } 5930 SKIP(2); 5931 } else { 5932 xmlFreeDocElementContent(ctxt->myDoc, ret); 5933 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5934 return(NULL); 5935 } 5936 5937 } else { 5938 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5939 } 5940 return(ret); 5941 } 5942 5943 /** 5944 * xmlParseElementChildrenContentDeclPriv: 5945 * @ctxt: an XML parser context 5946 * @inputchk: the input used for the current entity, needed for boundary checks 5947 * @depth: the level of recursion 5948 * 5949 * parse the declaration for a Mixed Element content 5950 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5951 * 5952 * 5953 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5954 * 5955 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5956 * 5957 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5958 * 5959 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5960 * 5961 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5962 * TODO Parameter-entity replacement text must be properly nested 5963 * with parenthesized groups. That is to say, if either of the 5964 * opening or closing parentheses in a choice, seq, or Mixed 5965 * construct is contained in the replacement text for a parameter 5966 * entity, both must be contained in the same replacement text. For 5967 * interoperability, if a parameter-entity reference appears in a 5968 * choice, seq, or Mixed construct, its replacement text should not 5969 * be empty, and neither the first nor last non-blank character of 5970 * the replacement text should be a connector (| or ,). 5971 * 5972 * Returns the tree of xmlElementContentPtr describing the element 5973 * hierarchy. 5974 */ 5975 static xmlElementContentPtr 5976 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 5977 int depth) { 5978 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5979 const xmlChar *elem; 5980 xmlChar type = 0; 5981 5982 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 5983 (depth > 2048)) { 5984 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 5985 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 5986 depth); 5987 return(NULL); 5988 } 5989 SKIP_BLANKS; 5990 GROW; 5991 if (RAW == '(') { 5992 int inputid = ctxt->input->id; 5993 5994 /* Recurse on first child */ 5995 NEXT; 5996 SKIP_BLANKS; 5997 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5998 depth + 1); 5999 SKIP_BLANKS; 6000 GROW; 6001 } else { 6002 elem = xmlParseName(ctxt); 6003 if (elem == NULL) { 6004 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6005 return(NULL); 6006 } 6007 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6008 if (cur == NULL) { 6009 xmlErrMemory(ctxt, NULL); 6010 return(NULL); 6011 } 6012 GROW; 6013 if (RAW == '?') { 6014 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6015 NEXT; 6016 } else if (RAW == '*') { 6017 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6018 NEXT; 6019 } else if (RAW == '+') { 6020 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6021 NEXT; 6022 } else { 6023 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6024 } 6025 GROW; 6026 } 6027 SKIP_BLANKS; 6028 SHRINK; 6029 while (RAW != ')') { 6030 /* 6031 * Each loop we parse one separator and one element. 6032 */ 6033 if (RAW == ',') { 6034 if (type == 0) type = CUR; 6035 6036 /* 6037 * Detect "Name | Name , Name" error 6038 */ 6039 else if (type != CUR) { 6040 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6041 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6042 type); 6043 if ((last != NULL) && (last != ret)) 6044 xmlFreeDocElementContent(ctxt->myDoc, last); 6045 if (ret != NULL) 6046 xmlFreeDocElementContent(ctxt->myDoc, ret); 6047 return(NULL); 6048 } 6049 NEXT; 6050 6051 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6052 if (op == NULL) { 6053 if ((last != NULL) && (last != ret)) 6054 xmlFreeDocElementContent(ctxt->myDoc, last); 6055 xmlFreeDocElementContent(ctxt->myDoc, ret); 6056 return(NULL); 6057 } 6058 if (last == NULL) { 6059 op->c1 = ret; 6060 if (ret != NULL) 6061 ret->parent = op; 6062 ret = cur = op; 6063 } else { 6064 cur->c2 = op; 6065 if (op != NULL) 6066 op->parent = cur; 6067 op->c1 = last; 6068 if (last != NULL) 6069 last->parent = op; 6070 cur =op; 6071 last = NULL; 6072 } 6073 } else if (RAW == '|') { 6074 if (type == 0) type = CUR; 6075 6076 /* 6077 * Detect "Name , Name | Name" error 6078 */ 6079 else if (type != CUR) { 6080 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6081 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6082 type); 6083 if ((last != NULL) && (last != ret)) 6084 xmlFreeDocElementContent(ctxt->myDoc, last); 6085 if (ret != NULL) 6086 xmlFreeDocElementContent(ctxt->myDoc, ret); 6087 return(NULL); 6088 } 6089 NEXT; 6090 6091 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6092 if (op == NULL) { 6093 if ((last != NULL) && (last != ret)) 6094 xmlFreeDocElementContent(ctxt->myDoc, last); 6095 if (ret != NULL) 6096 xmlFreeDocElementContent(ctxt->myDoc, ret); 6097 return(NULL); 6098 } 6099 if (last == NULL) { 6100 op->c1 = ret; 6101 if (ret != NULL) 6102 ret->parent = op; 6103 ret = cur = op; 6104 } else { 6105 cur->c2 = op; 6106 if (op != NULL) 6107 op->parent = cur; 6108 op->c1 = last; 6109 if (last != NULL) 6110 last->parent = op; 6111 cur =op; 6112 last = NULL; 6113 } 6114 } else { 6115 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6116 if ((last != NULL) && (last != ret)) 6117 xmlFreeDocElementContent(ctxt->myDoc, last); 6118 if (ret != NULL) 6119 xmlFreeDocElementContent(ctxt->myDoc, ret); 6120 return(NULL); 6121 } 6122 GROW; 6123 SKIP_BLANKS; 6124 GROW; 6125 if (RAW == '(') { 6126 int inputid = ctxt->input->id; 6127 /* Recurse on second child */ 6128 NEXT; 6129 SKIP_BLANKS; 6130 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6131 depth + 1); 6132 SKIP_BLANKS; 6133 } else { 6134 elem = xmlParseName(ctxt); 6135 if (elem == NULL) { 6136 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6137 if (ret != NULL) 6138 xmlFreeDocElementContent(ctxt->myDoc, ret); 6139 return(NULL); 6140 } 6141 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6142 if (last == NULL) { 6143 if (ret != NULL) 6144 xmlFreeDocElementContent(ctxt->myDoc, ret); 6145 return(NULL); 6146 } 6147 if (RAW == '?') { 6148 last->ocur = XML_ELEMENT_CONTENT_OPT; 6149 NEXT; 6150 } else if (RAW == '*') { 6151 last->ocur = XML_ELEMENT_CONTENT_MULT; 6152 NEXT; 6153 } else if (RAW == '+') { 6154 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6155 NEXT; 6156 } else { 6157 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6158 } 6159 } 6160 SKIP_BLANKS; 6161 GROW; 6162 } 6163 if ((cur != NULL) && (last != NULL)) { 6164 cur->c2 = last; 6165 if (last != NULL) 6166 last->parent = cur; 6167 } 6168 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6169 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6170 "Element content declaration doesn't start and stop in the same entity\n", 6171 NULL, NULL); 6172 } 6173 NEXT; 6174 if (RAW == '?') { 6175 if (ret != NULL) { 6176 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6177 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6178 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6179 else 6180 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6181 } 6182 NEXT; 6183 } else if (RAW == '*') { 6184 if (ret != NULL) { 6185 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6186 cur = ret; 6187 /* 6188 * Some normalization: 6189 * (a | b* | c?)* == (a | b | c)* 6190 */ 6191 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6192 if ((cur->c1 != NULL) && 6193 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6194 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6195 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6196 if ((cur->c2 != NULL) && 6197 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6198 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6199 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6200 cur = cur->c2; 6201 } 6202 } 6203 NEXT; 6204 } else if (RAW == '+') { 6205 if (ret != NULL) { 6206 int found = 0; 6207 6208 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6209 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6210 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6211 else 6212 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6213 /* 6214 * Some normalization: 6215 * (a | b*)+ == (a | b)* 6216 * (a | b?)+ == (a | b)* 6217 */ 6218 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6219 if ((cur->c1 != NULL) && 6220 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6221 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6222 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6223 found = 1; 6224 } 6225 if ((cur->c2 != NULL) && 6226 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6227 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6228 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6229 found = 1; 6230 } 6231 cur = cur->c2; 6232 } 6233 if (found) 6234 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6235 } 6236 NEXT; 6237 } 6238 return(ret); 6239 } 6240 6241 /** 6242 * xmlParseElementChildrenContentDecl: 6243 * @ctxt: an XML parser context 6244 * @inputchk: the input used for the current entity, needed for boundary checks 6245 * 6246 * parse the declaration for a Mixed Element content 6247 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6248 * 6249 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6250 * 6251 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6252 * 6253 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6254 * 6255 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6256 * 6257 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6258 * TODO Parameter-entity replacement text must be properly nested 6259 * with parenthesized groups. That is to say, if either of the 6260 * opening or closing parentheses in a choice, seq, or Mixed 6261 * construct is contained in the replacement text for a parameter 6262 * entity, both must be contained in the same replacement text. For 6263 * interoperability, if a parameter-entity reference appears in a 6264 * choice, seq, or Mixed construct, its replacement text should not 6265 * be empty, and neither the first nor last non-blank character of 6266 * the replacement text should be a connector (| or ,). 6267 * 6268 * Returns the tree of xmlElementContentPtr describing the element 6269 * hierarchy. 6270 */ 6271 xmlElementContentPtr 6272 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6273 /* stub left for API/ABI compat */ 6274 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6275 } 6276 6277 /** 6278 * xmlParseElementContentDecl: 6279 * @ctxt: an XML parser context 6280 * @name: the name of the element being defined. 6281 * @result: the Element Content pointer will be stored here if any 6282 * 6283 * parse the declaration for an Element content either Mixed or Children, 6284 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6285 * 6286 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6287 * 6288 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6289 */ 6290 6291 int 6292 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6293 xmlElementContentPtr *result) { 6294 6295 xmlElementContentPtr tree = NULL; 6296 int inputid = ctxt->input->id; 6297 int res; 6298 6299 *result = NULL; 6300 6301 if (RAW != '(') { 6302 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6303 "xmlParseElementContentDecl : %s '(' expected\n", name); 6304 return(-1); 6305 } 6306 NEXT; 6307 GROW; 6308 SKIP_BLANKS; 6309 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6310 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6311 res = XML_ELEMENT_TYPE_MIXED; 6312 } else { 6313 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6314 res = XML_ELEMENT_TYPE_ELEMENT; 6315 } 6316 SKIP_BLANKS; 6317 *result = tree; 6318 return(res); 6319 } 6320 6321 /** 6322 * xmlParseElementDecl: 6323 * @ctxt: an XML parser context 6324 * 6325 * parse an Element declaration. 6326 * 6327 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6328 * 6329 * [ VC: Unique Element Type Declaration ] 6330 * No element type may be declared more than once 6331 * 6332 * Returns the type of the element, or -1 in case of error 6333 */ 6334 int 6335 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6336 const xmlChar *name; 6337 int ret = -1; 6338 xmlElementContentPtr content = NULL; 6339 6340 /* GROW; done in the caller */ 6341 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6342 xmlParserInputPtr input = ctxt->input; 6343 6344 SKIP(9); 6345 if (!IS_BLANK_CH(CUR)) { 6346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6347 "Space required after 'ELEMENT'\n"); 6348 } 6349 SKIP_BLANKS; 6350 name = xmlParseName(ctxt); 6351 if (name == NULL) { 6352 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6353 "xmlParseElementDecl: no name for Element\n"); 6354 return(-1); 6355 } 6356 while ((RAW == 0) && (ctxt->inputNr > 1)) 6357 xmlPopInput(ctxt); 6358 if (!IS_BLANK_CH(CUR)) { 6359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6360 "Space required after the element name\n"); 6361 } 6362 SKIP_BLANKS; 6363 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6364 SKIP(5); 6365 /* 6366 * Element must always be empty. 6367 */ 6368 ret = XML_ELEMENT_TYPE_EMPTY; 6369 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6370 (NXT(2) == 'Y')) { 6371 SKIP(3); 6372 /* 6373 * Element is a generic container. 6374 */ 6375 ret = XML_ELEMENT_TYPE_ANY; 6376 } else if (RAW == '(') { 6377 ret = xmlParseElementContentDecl(ctxt, name, &content); 6378 } else { 6379 /* 6380 * [ WFC: PEs in Internal Subset ] error handling. 6381 */ 6382 if ((RAW == '%') && (ctxt->external == 0) && 6383 (ctxt->inputNr == 1)) { 6384 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6385 "PEReference: forbidden within markup decl in internal subset\n"); 6386 } else { 6387 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6388 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6389 } 6390 return(-1); 6391 } 6392 6393 SKIP_BLANKS; 6394 /* 6395 * Pop-up of finished entities. 6396 */ 6397 while ((RAW == 0) && (ctxt->inputNr > 1)) 6398 xmlPopInput(ctxt); 6399 SKIP_BLANKS; 6400 6401 if (RAW != '>') { 6402 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6403 if (content != NULL) { 6404 xmlFreeDocElementContent(ctxt->myDoc, content); 6405 } 6406 } else { 6407 if (input != ctxt->input) { 6408 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6409 "Element declaration doesn't start and stop in the same entity\n"); 6410 } 6411 6412 NEXT; 6413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6414 (ctxt->sax->elementDecl != NULL)) { 6415 if (content != NULL) 6416 content->parent = NULL; 6417 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6418 content); 6419 if ((content != NULL) && (content->parent == NULL)) { 6420 /* 6421 * this is a trick: if xmlAddElementDecl is called, 6422 * instead of copying the full tree it is plugged directly 6423 * if called from the parser. Avoid duplicating the 6424 * interfaces or change the API/ABI 6425 */ 6426 xmlFreeDocElementContent(ctxt->myDoc, content); 6427 } 6428 } else if (content != NULL) { 6429 xmlFreeDocElementContent(ctxt->myDoc, content); 6430 } 6431 } 6432 } 6433 return(ret); 6434 } 6435 6436 /** 6437 * xmlParseConditionalSections 6438 * @ctxt: an XML parser context 6439 * 6440 * [61] conditionalSect ::= includeSect | ignoreSect 6441 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6442 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6443 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6444 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6445 */ 6446 6447 static void 6448 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6449 int id = ctxt->input->id; 6450 6451 SKIP(3); 6452 SKIP_BLANKS; 6453 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6454 SKIP(7); 6455 SKIP_BLANKS; 6456 if (RAW != '[') { 6457 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6458 } else { 6459 if (ctxt->input->id != id) { 6460 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6461 "All markup of the conditional section is not in the same entity\n", 6462 NULL, NULL); 6463 } 6464 NEXT; 6465 } 6466 if (xmlParserDebugEntities) { 6467 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6468 xmlGenericError(xmlGenericErrorContext, 6469 "%s(%d): ", ctxt->input->filename, 6470 ctxt->input->line); 6471 xmlGenericError(xmlGenericErrorContext, 6472 "Entering INCLUDE Conditional Section\n"); 6473 } 6474 6475 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6476 (NXT(2) != '>'))) { 6477 const xmlChar *check = CUR_PTR; 6478 unsigned int cons = ctxt->input->consumed; 6479 6480 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6481 xmlParseConditionalSections(ctxt); 6482 } else if (IS_BLANK_CH(CUR)) { 6483 NEXT; 6484 } else if (RAW == '%') { 6485 xmlParsePEReference(ctxt); 6486 } else 6487 xmlParseMarkupDecl(ctxt); 6488 6489 /* 6490 * Pop-up of finished entities. 6491 */ 6492 while ((RAW == 0) && (ctxt->inputNr > 1)) 6493 xmlPopInput(ctxt); 6494 6495 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6496 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6497 break; 6498 } 6499 } 6500 if (xmlParserDebugEntities) { 6501 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6502 xmlGenericError(xmlGenericErrorContext, 6503 "%s(%d): ", ctxt->input->filename, 6504 ctxt->input->line); 6505 xmlGenericError(xmlGenericErrorContext, 6506 "Leaving INCLUDE Conditional Section\n"); 6507 } 6508 6509 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6510 int state; 6511 xmlParserInputState instate; 6512 int depth = 0; 6513 6514 SKIP(6); 6515 SKIP_BLANKS; 6516 if (RAW != '[') { 6517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6518 } else { 6519 if (ctxt->input->id != id) { 6520 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6521 "All markup of the conditional section is not in the same entity\n", 6522 NULL, NULL); 6523 } 6524 NEXT; 6525 } 6526 if (xmlParserDebugEntities) { 6527 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6528 xmlGenericError(xmlGenericErrorContext, 6529 "%s(%d): ", ctxt->input->filename, 6530 ctxt->input->line); 6531 xmlGenericError(xmlGenericErrorContext, 6532 "Entering IGNORE Conditional Section\n"); 6533 } 6534 6535 /* 6536 * Parse up to the end of the conditional section 6537 * But disable SAX event generating DTD building in the meantime 6538 */ 6539 state = ctxt->disableSAX; 6540 instate = ctxt->instate; 6541 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6542 ctxt->instate = XML_PARSER_IGNORE; 6543 6544 while ((depth >= 0) && (RAW != 0)) { 6545 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6546 depth++; 6547 SKIP(3); 6548 continue; 6549 } 6550 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6551 if (--depth >= 0) SKIP(3); 6552 continue; 6553 } 6554 NEXT; 6555 continue; 6556 } 6557 6558 ctxt->disableSAX = state; 6559 ctxt->instate = instate; 6560 6561 if (xmlParserDebugEntities) { 6562 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6563 xmlGenericError(xmlGenericErrorContext, 6564 "%s(%d): ", ctxt->input->filename, 6565 ctxt->input->line); 6566 xmlGenericError(xmlGenericErrorContext, 6567 "Leaving IGNORE Conditional Section\n"); 6568 } 6569 6570 } else { 6571 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6572 } 6573 6574 if (RAW == 0) 6575 SHRINK; 6576 6577 if (RAW == 0) { 6578 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6579 } else { 6580 if (ctxt->input->id != id) { 6581 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6582 "All markup of the conditional section is not in the same entity\n", 6583 NULL, NULL); 6584 } 6585 SKIP(3); 6586 } 6587 } 6588 6589 /** 6590 * xmlParseMarkupDecl: 6591 * @ctxt: an XML parser context 6592 * 6593 * parse Markup declarations 6594 * 6595 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6596 * NotationDecl | PI | Comment 6597 * 6598 * [ VC: Proper Declaration/PE Nesting ] 6599 * Parameter-entity replacement text must be properly nested with 6600 * markup declarations. That is to say, if either the first character 6601 * or the last character of a markup declaration (markupdecl above) is 6602 * contained in the replacement text for a parameter-entity reference, 6603 * both must be contained in the same replacement text. 6604 * 6605 * [ WFC: PEs in Internal Subset ] 6606 * In the internal DTD subset, parameter-entity references can occur 6607 * only where markup declarations can occur, not within markup declarations. 6608 * (This does not apply to references that occur in external parameter 6609 * entities or to the external subset.) 6610 */ 6611 void 6612 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6613 GROW; 6614 if (CUR == '<') { 6615 if (NXT(1) == '!') { 6616 switch (NXT(2)) { 6617 case 'E': 6618 if (NXT(3) == 'L') 6619 xmlParseElementDecl(ctxt); 6620 else if (NXT(3) == 'N') 6621 xmlParseEntityDecl(ctxt); 6622 break; 6623 case 'A': 6624 xmlParseAttributeListDecl(ctxt); 6625 break; 6626 case 'N': 6627 xmlParseNotationDecl(ctxt); 6628 break; 6629 case '-': 6630 xmlParseComment(ctxt); 6631 break; 6632 default: 6633 /* there is an error but it will be detected later */ 6634 break; 6635 } 6636 } else if (NXT(1) == '?') { 6637 xmlParsePI(ctxt); 6638 } 6639 } 6640 /* 6641 * This is only for internal subset. On external entities, 6642 * the replacement is done before parsing stage 6643 */ 6644 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6645 xmlParsePEReference(ctxt); 6646 6647 /* 6648 * Conditional sections are allowed from entities included 6649 * by PE References in the internal subset. 6650 */ 6651 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6652 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6653 xmlParseConditionalSections(ctxt); 6654 } 6655 } 6656 6657 ctxt->instate = XML_PARSER_DTD; 6658 } 6659 6660 /** 6661 * xmlParseTextDecl: 6662 * @ctxt: an XML parser context 6663 * 6664 * parse an XML declaration header for external entities 6665 * 6666 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6667 */ 6668 6669 void 6670 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6671 xmlChar *version; 6672 const xmlChar *encoding; 6673 6674 /* 6675 * We know that '<?xml' is here. 6676 */ 6677 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6678 SKIP(5); 6679 } else { 6680 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6681 return; 6682 } 6683 6684 if (!IS_BLANK_CH(CUR)) { 6685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6686 "Space needed after '<?xml'\n"); 6687 } 6688 SKIP_BLANKS; 6689 6690 /* 6691 * We may have the VersionInfo here. 6692 */ 6693 version = xmlParseVersionInfo(ctxt); 6694 if (version == NULL) 6695 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6696 else { 6697 if (!IS_BLANK_CH(CUR)) { 6698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6699 "Space needed here\n"); 6700 } 6701 } 6702 ctxt->input->version = version; 6703 6704 /* 6705 * We must have the encoding declaration 6706 */ 6707 encoding = xmlParseEncodingDecl(ctxt); 6708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6709 /* 6710 * The XML REC instructs us to stop parsing right here 6711 */ 6712 return; 6713 } 6714 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6715 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6716 "Missing encoding in text declaration\n"); 6717 } 6718 6719 SKIP_BLANKS; 6720 if ((RAW == '?') && (NXT(1) == '>')) { 6721 SKIP(2); 6722 } else if (RAW == '>') { 6723 /* Deprecated old WD ... */ 6724 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6725 NEXT; 6726 } else { 6727 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6728 MOVETO_ENDTAG(CUR_PTR); 6729 NEXT; 6730 } 6731 } 6732 6733 /** 6734 * xmlParseExternalSubset: 6735 * @ctxt: an XML parser context 6736 * @ExternalID: the external identifier 6737 * @SystemID: the system identifier (or URL) 6738 * 6739 * parse Markup declarations from an external subset 6740 * 6741 * [30] extSubset ::= textDecl? extSubsetDecl 6742 * 6743 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6744 */ 6745 void 6746 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6747 const xmlChar *SystemID) { 6748 xmlDetectSAX2(ctxt); 6749 GROW; 6750 6751 if ((ctxt->encoding == NULL) && 6752 (ctxt->input->end - ctxt->input->cur >= 4)) { 6753 xmlChar start[4]; 6754 xmlCharEncoding enc; 6755 6756 start[0] = RAW; 6757 start[1] = NXT(1); 6758 start[2] = NXT(2); 6759 start[3] = NXT(3); 6760 enc = xmlDetectCharEncoding(start, 4); 6761 if (enc != XML_CHAR_ENCODING_NONE) 6762 xmlSwitchEncoding(ctxt, enc); 6763 } 6764 6765 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6766 xmlParseTextDecl(ctxt); 6767 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6768 /* 6769 * The XML REC instructs us to stop parsing right here 6770 */ 6771 ctxt->instate = XML_PARSER_EOF; 6772 return; 6773 } 6774 } 6775 if (ctxt->myDoc == NULL) { 6776 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6777 if (ctxt->myDoc == NULL) { 6778 xmlErrMemory(ctxt, "New Doc failed"); 6779 return; 6780 } 6781 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6782 } 6783 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6784 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6785 6786 ctxt->instate = XML_PARSER_DTD; 6787 ctxt->external = 1; 6788 while (((RAW == '<') && (NXT(1) == '?')) || 6789 ((RAW == '<') && (NXT(1) == '!')) || 6790 (RAW == '%') || IS_BLANK_CH(CUR)) { 6791 const xmlChar *check = CUR_PTR; 6792 unsigned int cons = ctxt->input->consumed; 6793 6794 GROW; 6795 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6796 xmlParseConditionalSections(ctxt); 6797 } else if (IS_BLANK_CH(CUR)) { 6798 NEXT; 6799 } else if (RAW == '%') { 6800 xmlParsePEReference(ctxt); 6801 } else 6802 xmlParseMarkupDecl(ctxt); 6803 6804 /* 6805 * Pop-up of finished entities. 6806 */ 6807 while ((RAW == 0) && (ctxt->inputNr > 1)) 6808 xmlPopInput(ctxt); 6809 6810 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6811 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6812 break; 6813 } 6814 } 6815 6816 if (RAW != 0) { 6817 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6818 } 6819 6820 } 6821 6822 /** 6823 * xmlParseReference: 6824 * @ctxt: an XML parser context 6825 * 6826 * parse and handle entity references in content, depending on the SAX 6827 * interface, this may end-up in a call to character() if this is a 6828 * CharRef, a predefined entity, if there is no reference() callback. 6829 * or if the parser was asked to switch to that mode. 6830 * 6831 * [67] Reference ::= EntityRef | CharRef 6832 */ 6833 void 6834 xmlParseReference(xmlParserCtxtPtr ctxt) { 6835 xmlEntityPtr ent; 6836 xmlChar *val; 6837 int was_checked; 6838 xmlNodePtr list = NULL; 6839 xmlParserErrors ret = XML_ERR_OK; 6840 6841 6842 if (RAW != '&') 6843 return; 6844 6845 /* 6846 * Simple case of a CharRef 6847 */ 6848 if (NXT(1) == '#') { 6849 int i = 0; 6850 xmlChar out[10]; 6851 int hex = NXT(2); 6852 int value = xmlParseCharRef(ctxt); 6853 6854 if (value == 0) 6855 return; 6856 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6857 /* 6858 * So we are using non-UTF-8 buffers 6859 * Check that the char fit on 8bits, if not 6860 * generate a CharRef. 6861 */ 6862 if (value <= 0xFF) { 6863 out[0] = value; 6864 out[1] = 0; 6865 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6866 (!ctxt->disableSAX)) 6867 ctxt->sax->characters(ctxt->userData, out, 1); 6868 } else { 6869 if ((hex == 'x') || (hex == 'X')) 6870 snprintf((char *)out, sizeof(out), "#x%X", value); 6871 else 6872 snprintf((char *)out, sizeof(out), "#%d", value); 6873 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6874 (!ctxt->disableSAX)) 6875 ctxt->sax->reference(ctxt->userData, out); 6876 } 6877 } else { 6878 /* 6879 * Just encode the value in UTF-8 6880 */ 6881 COPY_BUF(0 ,out, i, value); 6882 out[i] = 0; 6883 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6884 (!ctxt->disableSAX)) 6885 ctxt->sax->characters(ctxt->userData, out, i); 6886 } 6887 return; 6888 } 6889 6890 /* 6891 * We are seeing an entity reference 6892 */ 6893 ent = xmlParseEntityRef(ctxt); 6894 if (ent == NULL) return; 6895 if (!ctxt->wellFormed) 6896 return; 6897 was_checked = ent->checked; 6898 6899 /* special case of predefined entities */ 6900 if ((ent->name == NULL) || 6901 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6902 val = ent->content; 6903 if (val == NULL) return; 6904 /* 6905 * inline the entity. 6906 */ 6907 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6908 (!ctxt->disableSAX)) 6909 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6910 return; 6911 } 6912 6913 /* 6914 * The first reference to the entity trigger a parsing phase 6915 * where the ent->children is filled with the result from 6916 * the parsing. 6917 */ 6918 if (ent->checked == 0) { 6919 unsigned long oldnbent = ctxt->nbentities; 6920 6921 /* 6922 * This is a bit hackish but this seems the best 6923 * way to make sure both SAX and DOM entity support 6924 * behaves okay. 6925 */ 6926 void *user_data; 6927 if (ctxt->userData == ctxt) 6928 user_data = NULL; 6929 else 6930 user_data = ctxt->userData; 6931 6932 /* 6933 * Check that this entity is well formed 6934 * 4.3.2: An internal general parsed entity is well-formed 6935 * if its replacement text matches the production labeled 6936 * content. 6937 */ 6938 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6939 ctxt->depth++; 6940 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6941 user_data, &list); 6942 ctxt->depth--; 6943 6944 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6945 ctxt->depth++; 6946 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6947 user_data, ctxt->depth, ent->URI, 6948 ent->ExternalID, &list); 6949 ctxt->depth--; 6950 } else { 6951 ret = XML_ERR_ENTITY_PE_INTERNAL; 6952 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6953 "invalid entity type found\n", NULL); 6954 } 6955 6956 /* 6957 * Store the number of entities needing parsing for this entity 6958 * content and do checkings 6959 */ 6960 ent->checked = ctxt->nbentities - oldnbent; 6961 if (ret == XML_ERR_ENTITY_LOOP) { 6962 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6963 xmlFreeNodeList(list); 6964 return; 6965 } 6966 if (xmlParserEntityCheck(ctxt, 0, ent)) { 6967 xmlFreeNodeList(list); 6968 return; 6969 } 6970 6971 if ((ret == XML_ERR_OK) && (list != NULL)) { 6972 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6973 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6974 (ent->children == NULL)) { 6975 ent->children = list; 6976 if (ctxt->replaceEntities) { 6977 /* 6978 * Prune it directly in the generated document 6979 * except for single text nodes. 6980 */ 6981 if (((list->type == XML_TEXT_NODE) && 6982 (list->next == NULL)) || 6983 (ctxt->parseMode == XML_PARSE_READER)) { 6984 list->parent = (xmlNodePtr) ent; 6985 list = NULL; 6986 ent->owner = 1; 6987 } else { 6988 ent->owner = 0; 6989 while (list != NULL) { 6990 list->parent = (xmlNodePtr) ctxt->node; 6991 list->doc = ctxt->myDoc; 6992 if (list->next == NULL) 6993 ent->last = list; 6994 list = list->next; 6995 } 6996 list = ent->children; 6997 #ifdef LIBXML_LEGACY_ENABLED 6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6999 xmlAddEntityReference(ent, list, NULL); 7000 #endif /* LIBXML_LEGACY_ENABLED */ 7001 } 7002 } else { 7003 ent->owner = 1; 7004 while (list != NULL) { 7005 list->parent = (xmlNodePtr) ent; 7006 xmlSetTreeDoc(list, ent->doc); 7007 if (list->next == NULL) 7008 ent->last = list; 7009 list = list->next; 7010 } 7011 } 7012 } else { 7013 xmlFreeNodeList(list); 7014 list = NULL; 7015 } 7016 } else if ((ret != XML_ERR_OK) && 7017 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7018 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7019 "Entity '%s' failed to parse\n", ent->name); 7020 } else if (list != NULL) { 7021 xmlFreeNodeList(list); 7022 list = NULL; 7023 } 7024 if (ent->checked == 0) 7025 ent->checked = 1; 7026 } else if (ent->checked != 1) { 7027 ctxt->nbentities += ent->checked; 7028 } 7029 7030 /* 7031 * Now that the entity content has been gathered 7032 * provide it to the application, this can take different forms based 7033 * on the parsing modes. 7034 */ 7035 if (ent->children == NULL) { 7036 /* 7037 * Probably running in SAX mode and the callbacks don't 7038 * build the entity content. So unless we already went 7039 * though parsing for first checking go though the entity 7040 * content to generate callbacks associated to the entity 7041 */ 7042 if (was_checked != 0) { 7043 void *user_data; 7044 /* 7045 * This is a bit hackish but this seems the best 7046 * way to make sure both SAX and DOM entity support 7047 * behaves okay. 7048 */ 7049 if (ctxt->userData == ctxt) 7050 user_data = NULL; 7051 else 7052 user_data = ctxt->userData; 7053 7054 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7055 ctxt->depth++; 7056 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7057 ent->content, user_data, NULL); 7058 ctxt->depth--; 7059 } else if (ent->etype == 7060 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7061 ctxt->depth++; 7062 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7063 ctxt->sax, user_data, ctxt->depth, 7064 ent->URI, ent->ExternalID, NULL); 7065 ctxt->depth--; 7066 } else { 7067 ret = XML_ERR_ENTITY_PE_INTERNAL; 7068 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7069 "invalid entity type found\n", NULL); 7070 } 7071 if (ret == XML_ERR_ENTITY_LOOP) { 7072 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7073 return; 7074 } 7075 } 7076 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7077 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7078 /* 7079 * Entity reference callback comes second, it's somewhat 7080 * superfluous but a compatibility to historical behaviour 7081 */ 7082 ctxt->sax->reference(ctxt->userData, ent->name); 7083 } 7084 return; 7085 } 7086 7087 /* 7088 * If we didn't get any children for the entity being built 7089 */ 7090 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7091 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7092 /* 7093 * Create a node. 7094 */ 7095 ctxt->sax->reference(ctxt->userData, ent->name); 7096 return; 7097 } 7098 7099 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7100 /* 7101 * There is a problem on the handling of _private for entities 7102 * (bug 155816): Should we copy the content of the field from 7103 * the entity (possibly overwriting some value set by the user 7104 * when a copy is created), should we leave it alone, or should 7105 * we try to take care of different situations? The problem 7106 * is exacerbated by the usage of this field by the xmlReader. 7107 * To fix this bug, we look at _private on the created node 7108 * and, if it's NULL, we copy in whatever was in the entity. 7109 * If it's not NULL we leave it alone. This is somewhat of a 7110 * hack - maybe we should have further tests to determine 7111 * what to do. 7112 */ 7113 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7114 /* 7115 * Seems we are generating the DOM content, do 7116 * a simple tree copy for all references except the first 7117 * In the first occurrence list contains the replacement. 7118 * progressive == 2 means we are operating on the Reader 7119 * and since nodes are discarded we must copy all the time. 7120 */ 7121 if (((list == NULL) && (ent->owner == 0)) || 7122 (ctxt->parseMode == XML_PARSE_READER)) { 7123 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7124 7125 /* 7126 * when operating on a reader, the entities definitions 7127 * are always owning the entities subtree. 7128 if (ctxt->parseMode == XML_PARSE_READER) 7129 ent->owner = 1; 7130 */ 7131 7132 cur = ent->children; 7133 while (cur != NULL) { 7134 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7135 if (nw != NULL) { 7136 if (nw->_private == NULL) 7137 nw->_private = cur->_private; 7138 if (firstChild == NULL){ 7139 firstChild = nw; 7140 } 7141 nw = xmlAddChild(ctxt->node, nw); 7142 } 7143 if (cur == ent->last) { 7144 /* 7145 * needed to detect some strange empty 7146 * node cases in the reader tests 7147 */ 7148 if ((ctxt->parseMode == XML_PARSE_READER) && 7149 (nw != NULL) && 7150 (nw->type == XML_ELEMENT_NODE) && 7151 (nw->children == NULL)) 7152 nw->extra = 1; 7153 7154 break; 7155 } 7156 cur = cur->next; 7157 } 7158 #ifdef LIBXML_LEGACY_ENABLED 7159 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7160 xmlAddEntityReference(ent, firstChild, nw); 7161 #endif /* LIBXML_LEGACY_ENABLED */ 7162 } else if (list == NULL) { 7163 xmlNodePtr nw = NULL, cur, next, last, 7164 firstChild = NULL; 7165 /* 7166 * Copy the entity child list and make it the new 7167 * entity child list. The goal is to make sure any 7168 * ID or REF referenced will be the one from the 7169 * document content and not the entity copy. 7170 */ 7171 cur = ent->children; 7172 ent->children = NULL; 7173 last = ent->last; 7174 ent->last = NULL; 7175 while (cur != NULL) { 7176 next = cur->next; 7177 cur->next = NULL; 7178 cur->parent = NULL; 7179 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7180 if (nw != NULL) { 7181 if (nw->_private == NULL) 7182 nw->_private = cur->_private; 7183 if (firstChild == NULL){ 7184 firstChild = cur; 7185 } 7186 xmlAddChild((xmlNodePtr) ent, nw); 7187 xmlAddChild(ctxt->node, cur); 7188 } 7189 if (cur == last) 7190 break; 7191 cur = next; 7192 } 7193 if (ent->owner == 0) 7194 ent->owner = 1; 7195 #ifdef LIBXML_LEGACY_ENABLED 7196 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7197 xmlAddEntityReference(ent, firstChild, nw); 7198 #endif /* LIBXML_LEGACY_ENABLED */ 7199 } else { 7200 const xmlChar *nbktext; 7201 7202 /* 7203 * the name change is to avoid coalescing of the 7204 * node with a possible previous text one which 7205 * would make ent->children a dangling pointer 7206 */ 7207 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7208 -1); 7209 if (ent->children->type == XML_TEXT_NODE) 7210 ent->children->name = nbktext; 7211 if ((ent->last != ent->children) && 7212 (ent->last->type == XML_TEXT_NODE)) 7213 ent->last->name = nbktext; 7214 xmlAddChildList(ctxt->node, ent->children); 7215 } 7216 7217 /* 7218 * This is to avoid a nasty side effect, see 7219 * characters() in SAX.c 7220 */ 7221 ctxt->nodemem = 0; 7222 ctxt->nodelen = 0; 7223 return; 7224 } 7225 } 7226 } 7227 7228 /** 7229 * xmlParseEntityRef: 7230 * @ctxt: an XML parser context 7231 * 7232 * parse ENTITY references declarations 7233 * 7234 * [68] EntityRef ::= '&' Name ';' 7235 * 7236 * [ WFC: Entity Declared ] 7237 * In a document without any DTD, a document with only an internal DTD 7238 * subset which contains no parameter entity references, or a document 7239 * with "standalone='yes'", the Name given in the entity reference 7240 * must match that in an entity declaration, except that well-formed 7241 * documents need not declare any of the following entities: amp, lt, 7242 * gt, apos, quot. The declaration of a parameter entity must precede 7243 * any reference to it. Similarly, the declaration of a general entity 7244 * must precede any reference to it which appears in a default value in an 7245 * attribute-list declaration. Note that if entities are declared in the 7246 * external subset or in external parameter entities, a non-validating 7247 * processor is not obligated to read and process their declarations; 7248 * for such documents, the rule that an entity must be declared is a 7249 * well-formedness constraint only if standalone='yes'. 7250 * 7251 * [ WFC: Parsed Entity ] 7252 * An entity reference must not contain the name of an unparsed entity 7253 * 7254 * Returns the xmlEntityPtr if found, or NULL otherwise. 7255 */ 7256 xmlEntityPtr 7257 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7258 const xmlChar *name; 7259 xmlEntityPtr ent = NULL; 7260 7261 GROW; 7262 7263 if (RAW != '&') 7264 return(NULL); 7265 NEXT; 7266 name = xmlParseName(ctxt); 7267 if (name == NULL) { 7268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7269 "xmlParseEntityRef: no name\n"); 7270 return(NULL); 7271 } 7272 if (RAW != ';') { 7273 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7274 return(NULL); 7275 } 7276 NEXT; 7277 7278 /* 7279 * Predefined entites override any extra definition 7280 */ 7281 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7282 ent = xmlGetPredefinedEntity(name); 7283 if (ent != NULL) 7284 return(ent); 7285 } 7286 7287 /* 7288 * Increate the number of entity references parsed 7289 */ 7290 ctxt->nbentities++; 7291 7292 /* 7293 * Ask first SAX for entity resolution, otherwise try the 7294 * entities which may have stored in the parser context. 7295 */ 7296 if (ctxt->sax != NULL) { 7297 if (ctxt->sax->getEntity != NULL) 7298 ent = ctxt->sax->getEntity(ctxt->userData, name); 7299 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7300 (ctxt->options & XML_PARSE_OLDSAX)) 7301 ent = xmlGetPredefinedEntity(name); 7302 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7303 (ctxt->userData==ctxt)) { 7304 ent = xmlSAX2GetEntity(ctxt, name); 7305 } 7306 } 7307 /* 7308 * [ WFC: Entity Declared ] 7309 * In a document without any DTD, a document with only an 7310 * internal DTD subset which contains no parameter entity 7311 * references, or a document with "standalone='yes'", the 7312 * Name given in the entity reference must match that in an 7313 * entity declaration, except that well-formed documents 7314 * need not declare any of the following entities: amp, lt, 7315 * gt, apos, quot. 7316 * The declaration of a parameter entity must precede any 7317 * reference to it. 7318 * Similarly, the declaration of a general entity must 7319 * precede any reference to it which appears in a default 7320 * value in an attribute-list declaration. Note that if 7321 * entities are declared in the external subset or in 7322 * external parameter entities, a non-validating processor 7323 * is not obligated to read and process their declarations; 7324 * for such documents, the rule that an entity must be 7325 * declared is a well-formedness constraint only if 7326 * standalone='yes'. 7327 */ 7328 if (ent == NULL) { 7329 if ((ctxt->standalone == 1) || 7330 ((ctxt->hasExternalSubset == 0) && 7331 (ctxt->hasPErefs == 0))) { 7332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7333 "Entity '%s' not defined\n", name); 7334 } else { 7335 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7336 "Entity '%s' not defined\n", name); 7337 if ((ctxt->inSubset == 0) && 7338 (ctxt->sax != NULL) && 7339 (ctxt->sax->reference != NULL)) { 7340 ctxt->sax->reference(ctxt->userData, name); 7341 } 7342 } 7343 ctxt->valid = 0; 7344 } 7345 7346 /* 7347 * [ WFC: Parsed Entity ] 7348 * An entity reference must not contain the name of an 7349 * unparsed entity 7350 */ 7351 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7353 "Entity reference to unparsed entity %s\n", name); 7354 } 7355 7356 /* 7357 * [ WFC: No External Entity References ] 7358 * Attribute values cannot contain direct or indirect 7359 * entity references to external entities. 7360 */ 7361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7362 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7364 "Attribute references external entity '%s'\n", name); 7365 } 7366 /* 7367 * [ WFC: No < in Attribute Values ] 7368 * The replacement text of any entity referred to directly or 7369 * indirectly in an attribute value (other than "<") must 7370 * not contain a <. 7371 */ 7372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7373 (ent != NULL) && (ent->content != NULL) && 7374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7375 (xmlStrchr(ent->content, '<'))) { 7376 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7377 "'<' in entity '%s' is not allowed in attributes values\n", name); 7378 } 7379 7380 /* 7381 * Internal check, no parameter entities here ... 7382 */ 7383 else { 7384 switch (ent->etype) { 7385 case XML_INTERNAL_PARAMETER_ENTITY: 7386 case XML_EXTERNAL_PARAMETER_ENTITY: 7387 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7388 "Attempt to reference the parameter entity '%s'\n", 7389 name); 7390 break; 7391 default: 7392 break; 7393 } 7394 } 7395 7396 /* 7397 * [ WFC: No Recursion ] 7398 * A parsed entity must not contain a recursive reference 7399 * to itself, either directly or indirectly. 7400 * Done somewhere else 7401 */ 7402 return(ent); 7403 } 7404 7405 /** 7406 * xmlParseStringEntityRef: 7407 * @ctxt: an XML parser context 7408 * @str: a pointer to an index in the string 7409 * 7410 * parse ENTITY references declarations, but this version parses it from 7411 * a string value. 7412 * 7413 * [68] EntityRef ::= '&' Name ';' 7414 * 7415 * [ WFC: Entity Declared ] 7416 * In a document without any DTD, a document with only an internal DTD 7417 * subset which contains no parameter entity references, or a document 7418 * with "standalone='yes'", the Name given in the entity reference 7419 * must match that in an entity declaration, except that well-formed 7420 * documents need not declare any of the following entities: amp, lt, 7421 * gt, apos, quot. The declaration of a parameter entity must precede 7422 * any reference to it. Similarly, the declaration of a general entity 7423 * must precede any reference to it which appears in a default value in an 7424 * attribute-list declaration. Note that if entities are declared in the 7425 * external subset or in external parameter entities, a non-validating 7426 * processor is not obligated to read and process their declarations; 7427 * for such documents, the rule that an entity must be declared is a 7428 * well-formedness constraint only if standalone='yes'. 7429 * 7430 * [ WFC: Parsed Entity ] 7431 * An entity reference must not contain the name of an unparsed entity 7432 * 7433 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7434 * is updated to the current location in the string. 7435 */ 7436 static xmlEntityPtr 7437 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7438 xmlChar *name; 7439 const xmlChar *ptr; 7440 xmlChar cur; 7441 xmlEntityPtr ent = NULL; 7442 7443 if ((str == NULL) || (*str == NULL)) 7444 return(NULL); 7445 ptr = *str; 7446 cur = *ptr; 7447 if (cur != '&') 7448 return(NULL); 7449 7450 ptr++; 7451 name = xmlParseStringName(ctxt, &ptr); 7452 if (name == NULL) { 7453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7454 "xmlParseStringEntityRef: no name\n"); 7455 *str = ptr; 7456 return(NULL); 7457 } 7458 if (*ptr != ';') { 7459 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7460 xmlFree(name); 7461 *str = ptr; 7462 return(NULL); 7463 } 7464 ptr++; 7465 7466 7467 /* 7468 * Predefined entites override any extra definition 7469 */ 7470 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7471 ent = xmlGetPredefinedEntity(name); 7472 if (ent != NULL) { 7473 xmlFree(name); 7474 *str = ptr; 7475 return(ent); 7476 } 7477 } 7478 7479 /* 7480 * Increate the number of entity references parsed 7481 */ 7482 ctxt->nbentities++; 7483 7484 /* 7485 * Ask first SAX for entity resolution, otherwise try the 7486 * entities which may have stored in the parser context. 7487 */ 7488 if (ctxt->sax != NULL) { 7489 if (ctxt->sax->getEntity != NULL) 7490 ent = ctxt->sax->getEntity(ctxt->userData, name); 7491 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7492 ent = xmlGetPredefinedEntity(name); 7493 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7494 ent = xmlSAX2GetEntity(ctxt, name); 7495 } 7496 } 7497 7498 /* 7499 * [ WFC: Entity Declared ] 7500 * In a document without any DTD, a document with only an 7501 * internal DTD subset which contains no parameter entity 7502 * references, or a document with "standalone='yes'", the 7503 * Name given in the entity reference must match that in an 7504 * entity declaration, except that well-formed documents 7505 * need not declare any of the following entities: amp, lt, 7506 * gt, apos, quot. 7507 * The declaration of a parameter entity must precede any 7508 * reference to it. 7509 * Similarly, the declaration of a general entity must 7510 * precede any reference to it which appears in a default 7511 * value in an attribute-list declaration. Note that if 7512 * entities are declared in the external subset or in 7513 * external parameter entities, a non-validating processor 7514 * is not obligated to read and process their declarations; 7515 * for such documents, the rule that an entity must be 7516 * declared is a well-formedness constraint only if 7517 * standalone='yes'. 7518 */ 7519 if (ent == NULL) { 7520 if ((ctxt->standalone == 1) || 7521 ((ctxt->hasExternalSubset == 0) && 7522 (ctxt->hasPErefs == 0))) { 7523 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7524 "Entity '%s' not defined\n", name); 7525 } else { 7526 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7527 "Entity '%s' not defined\n", 7528 name); 7529 } 7530 /* TODO ? check regressions ctxt->valid = 0; */ 7531 } 7532 7533 /* 7534 * [ WFC: Parsed Entity ] 7535 * An entity reference must not contain the name of an 7536 * unparsed entity 7537 */ 7538 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7539 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7540 "Entity reference to unparsed entity %s\n", name); 7541 } 7542 7543 /* 7544 * [ WFC: No External Entity References ] 7545 * Attribute values cannot contain direct or indirect 7546 * entity references to external entities. 7547 */ 7548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7551 "Attribute references external entity '%s'\n", name); 7552 } 7553 /* 7554 * [ WFC: No < in Attribute Values ] 7555 * The replacement text of any entity referred to directly or 7556 * indirectly in an attribute value (other than "<") must 7557 * not contain a <. 7558 */ 7559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7560 (ent != NULL) && (ent->content != NULL) && 7561 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7562 (xmlStrchr(ent->content, '<'))) { 7563 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7564 "'<' in entity '%s' is not allowed in attributes values\n", 7565 name); 7566 } 7567 7568 /* 7569 * Internal check, no parameter entities here ... 7570 */ 7571 else { 7572 switch (ent->etype) { 7573 case XML_INTERNAL_PARAMETER_ENTITY: 7574 case XML_EXTERNAL_PARAMETER_ENTITY: 7575 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7576 "Attempt to reference the parameter entity '%s'\n", 7577 name); 7578 break; 7579 default: 7580 break; 7581 } 7582 } 7583 7584 /* 7585 * [ WFC: No Recursion ] 7586 * A parsed entity must not contain a recursive reference 7587 * to itself, either directly or indirectly. 7588 * Done somewhere else 7589 */ 7590 7591 xmlFree(name); 7592 *str = ptr; 7593 return(ent); 7594 } 7595 7596 /** 7597 * xmlParsePEReference: 7598 * @ctxt: an XML parser context 7599 * 7600 * parse PEReference declarations 7601 * The entity content is handled directly by pushing it's content as 7602 * a new input stream. 7603 * 7604 * [69] PEReference ::= '%' Name ';' 7605 * 7606 * [ WFC: No Recursion ] 7607 * A parsed entity must not contain a recursive 7608 * reference to itself, either directly or indirectly. 7609 * 7610 * [ WFC: Entity Declared ] 7611 * In a document without any DTD, a document with only an internal DTD 7612 * subset which contains no parameter entity references, or a document 7613 * with "standalone='yes'", ... ... The declaration of a parameter 7614 * entity must precede any reference to it... 7615 * 7616 * [ VC: Entity Declared ] 7617 * In a document with an external subset or external parameter entities 7618 * with "standalone='no'", ... ... The declaration of a parameter entity 7619 * must precede any reference to it... 7620 * 7621 * [ WFC: In DTD ] 7622 * Parameter-entity references may only appear in the DTD. 7623 * NOTE: misleading but this is handled. 7624 */ 7625 void 7626 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7627 { 7628 const xmlChar *name; 7629 xmlEntityPtr entity = NULL; 7630 xmlParserInputPtr input; 7631 7632 if (RAW != '%') 7633 return; 7634 NEXT; 7635 name = xmlParseName(ctxt); 7636 if (name == NULL) { 7637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7638 "xmlParsePEReference: no name\n"); 7639 return; 7640 } 7641 if (RAW != ';') { 7642 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7643 return; 7644 } 7645 7646 NEXT; 7647 7648 /* 7649 * Increate the number of entity references parsed 7650 */ 7651 ctxt->nbentities++; 7652 7653 /* 7654 * Request the entity from SAX 7655 */ 7656 if ((ctxt->sax != NULL) && 7657 (ctxt->sax->getParameterEntity != NULL)) 7658 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7659 name); 7660 if (entity == NULL) { 7661 /* 7662 * [ WFC: Entity Declared ] 7663 * In a document without any DTD, a document with only an 7664 * internal DTD subset which contains no parameter entity 7665 * references, or a document with "standalone='yes'", ... 7666 * ... The declaration of a parameter entity must precede 7667 * any reference to it... 7668 */ 7669 if ((ctxt->standalone == 1) || 7670 ((ctxt->hasExternalSubset == 0) && 7671 (ctxt->hasPErefs == 0))) { 7672 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7673 "PEReference: %%%s; not found\n", 7674 name); 7675 } else { 7676 /* 7677 * [ VC: Entity Declared ] 7678 * In a document with an external subset or external 7679 * parameter entities with "standalone='no'", ... 7680 * ... The declaration of a parameter entity must 7681 * precede any reference to it... 7682 */ 7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7684 "PEReference: %%%s; not found\n", 7685 name, NULL); 7686 ctxt->valid = 0; 7687 } 7688 } else { 7689 /* 7690 * Internal checking in case the entity quest barfed 7691 */ 7692 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7693 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7695 "Internal: %%%s; is not a parameter entity\n", 7696 name, NULL); 7697 } else if (ctxt->input->free != deallocblankswrapper) { 7698 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7699 if (xmlPushInput(ctxt, input) < 0) 7700 return; 7701 } else { 7702 /* 7703 * TODO !!! 7704 * handle the extra spaces added before and after 7705 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7706 */ 7707 input = xmlNewEntityInputStream(ctxt, entity); 7708 if (xmlPushInput(ctxt, input) < 0) 7709 return; 7710 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7711 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7712 (IS_BLANK_CH(NXT(5)))) { 7713 xmlParseTextDecl(ctxt); 7714 if (ctxt->errNo == 7715 XML_ERR_UNSUPPORTED_ENCODING) { 7716 /* 7717 * The XML REC instructs us to stop parsing 7718 * right here 7719 */ 7720 ctxt->instate = XML_PARSER_EOF; 7721 return; 7722 } 7723 } 7724 } 7725 } 7726 ctxt->hasPErefs = 1; 7727 } 7728 7729 /** 7730 * xmlLoadEntityContent: 7731 * @ctxt: an XML parser context 7732 * @entity: an unloaded system entity 7733 * 7734 * Load the original content of the given system entity from the 7735 * ExternalID/SystemID given. This is to be used for Included in Literal 7736 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7737 * 7738 * Returns 0 in case of success and -1 in case of failure 7739 */ 7740 static int 7741 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7742 xmlParserInputPtr input; 7743 xmlBufferPtr buf; 7744 int l, c; 7745 int count = 0; 7746 7747 if ((ctxt == NULL) || (entity == NULL) || 7748 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7749 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7750 (entity->content != NULL)) { 7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7752 "xmlLoadEntityContent parameter error"); 7753 return(-1); 7754 } 7755 7756 if (xmlParserDebugEntities) 7757 xmlGenericError(xmlGenericErrorContext, 7758 "Reading %s entity content input\n", entity->name); 7759 7760 buf = xmlBufferCreate(); 7761 if (buf == NULL) { 7762 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7763 "xmlLoadEntityContent parameter error"); 7764 return(-1); 7765 } 7766 7767 input = xmlNewEntityInputStream(ctxt, entity); 7768 if (input == NULL) { 7769 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7770 "xmlLoadEntityContent input error"); 7771 xmlBufferFree(buf); 7772 return(-1); 7773 } 7774 7775 /* 7776 * Push the entity as the current input, read char by char 7777 * saving to the buffer until the end of the entity or an error 7778 */ 7779 if (xmlPushInput(ctxt, input) < 0) { 7780 xmlBufferFree(buf); 7781 return(-1); 7782 } 7783 7784 GROW; 7785 c = CUR_CHAR(l); 7786 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7787 (IS_CHAR(c))) { 7788 xmlBufferAdd(buf, ctxt->input->cur, l); 7789 if (count++ > 100) { 7790 count = 0; 7791 GROW; 7792 } 7793 NEXTL(l); 7794 c = CUR_CHAR(l); 7795 } 7796 7797 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7798 xmlPopInput(ctxt); 7799 } else if (!IS_CHAR(c)) { 7800 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7801 "xmlLoadEntityContent: invalid char value %d\n", 7802 c); 7803 xmlBufferFree(buf); 7804 return(-1); 7805 } 7806 entity->content = buf->content; 7807 buf->content = NULL; 7808 xmlBufferFree(buf); 7809 7810 return(0); 7811 } 7812 7813 /** 7814 * xmlParseStringPEReference: 7815 * @ctxt: an XML parser context 7816 * @str: a pointer to an index in the string 7817 * 7818 * parse PEReference declarations 7819 * 7820 * [69] PEReference ::= '%' Name ';' 7821 * 7822 * [ WFC: No Recursion ] 7823 * A parsed entity must not contain a recursive 7824 * reference to itself, either directly or indirectly. 7825 * 7826 * [ WFC: Entity Declared ] 7827 * In a document without any DTD, a document with only an internal DTD 7828 * subset which contains no parameter entity references, or a document 7829 * with "standalone='yes'", ... ... The declaration of a parameter 7830 * entity must precede any reference to it... 7831 * 7832 * [ VC: Entity Declared ] 7833 * In a document with an external subset or external parameter entities 7834 * with "standalone='no'", ... ... The declaration of a parameter entity 7835 * must precede any reference to it... 7836 * 7837 * [ WFC: In DTD ] 7838 * Parameter-entity references may only appear in the DTD. 7839 * NOTE: misleading but this is handled. 7840 * 7841 * Returns the string of the entity content. 7842 * str is updated to the current value of the index 7843 */ 7844 static xmlEntityPtr 7845 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7846 const xmlChar *ptr; 7847 xmlChar cur; 7848 xmlChar *name; 7849 xmlEntityPtr entity = NULL; 7850 7851 if ((str == NULL) || (*str == NULL)) return(NULL); 7852 ptr = *str; 7853 cur = *ptr; 7854 if (cur != '%') 7855 return(NULL); 7856 ptr++; 7857 name = xmlParseStringName(ctxt, &ptr); 7858 if (name == NULL) { 7859 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7860 "xmlParseStringPEReference: no name\n"); 7861 *str = ptr; 7862 return(NULL); 7863 } 7864 cur = *ptr; 7865 if (cur != ';') { 7866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7867 xmlFree(name); 7868 *str = ptr; 7869 return(NULL); 7870 } 7871 ptr++; 7872 7873 /* 7874 * Increate the number of entity references parsed 7875 */ 7876 ctxt->nbentities++; 7877 7878 /* 7879 * Request the entity from SAX 7880 */ 7881 if ((ctxt->sax != NULL) && 7882 (ctxt->sax->getParameterEntity != NULL)) 7883 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7884 name); 7885 if (entity == NULL) { 7886 /* 7887 * [ WFC: Entity Declared ] 7888 * In a document without any DTD, a document with only an 7889 * internal DTD subset which contains no parameter entity 7890 * references, or a document with "standalone='yes'", ... 7891 * ... The declaration of a parameter entity must precede 7892 * any reference to it... 7893 */ 7894 if ((ctxt->standalone == 1) || 7895 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7897 "PEReference: %%%s; not found\n", name); 7898 } else { 7899 /* 7900 * [ VC: Entity Declared ] 7901 * In a document with an external subset or external 7902 * parameter entities with "standalone='no'", ... 7903 * ... The declaration of a parameter entity must 7904 * precede any reference to it... 7905 */ 7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7907 "PEReference: %%%s; not found\n", 7908 name, NULL); 7909 ctxt->valid = 0; 7910 } 7911 } else { 7912 /* 7913 * Internal checking in case the entity quest barfed 7914 */ 7915 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7916 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7917 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7918 "%%%s; is not a parameter entity\n", 7919 name, NULL); 7920 } 7921 } 7922 ctxt->hasPErefs = 1; 7923 xmlFree(name); 7924 *str = ptr; 7925 return(entity); 7926 } 7927 7928 /** 7929 * xmlParseDocTypeDecl: 7930 * @ctxt: an XML parser context 7931 * 7932 * parse a DOCTYPE declaration 7933 * 7934 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7935 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7936 * 7937 * [ VC: Root Element Type ] 7938 * The Name in the document type declaration must match the element 7939 * type of the root element. 7940 */ 7941 7942 void 7943 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7944 const xmlChar *name = NULL; 7945 xmlChar *ExternalID = NULL; 7946 xmlChar *URI = NULL; 7947 7948 /* 7949 * We know that '<!DOCTYPE' has been detected. 7950 */ 7951 SKIP(9); 7952 7953 SKIP_BLANKS; 7954 7955 /* 7956 * Parse the DOCTYPE name. 7957 */ 7958 name = xmlParseName(ctxt); 7959 if (name == NULL) { 7960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7961 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7962 } 7963 ctxt->intSubName = name; 7964 7965 SKIP_BLANKS; 7966 7967 /* 7968 * Check for SystemID and ExternalID 7969 */ 7970 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7971 7972 if ((URI != NULL) || (ExternalID != NULL)) { 7973 ctxt->hasExternalSubset = 1; 7974 } 7975 ctxt->extSubURI = URI; 7976 ctxt->extSubSystem = ExternalID; 7977 7978 SKIP_BLANKS; 7979 7980 /* 7981 * Create and update the internal subset. 7982 */ 7983 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7984 (!ctxt->disableSAX)) 7985 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7986 7987 /* 7988 * Is there any internal subset declarations ? 7989 * they are handled separately in xmlParseInternalSubset() 7990 */ 7991 if (RAW == '[') 7992 return; 7993 7994 /* 7995 * We should be at the end of the DOCTYPE declaration. 7996 */ 7997 if (RAW != '>') { 7998 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7999 } 8000 NEXT; 8001 } 8002 8003 /** 8004 * xmlParseInternalSubset: 8005 * @ctxt: an XML parser context 8006 * 8007 * parse the internal subset declaration 8008 * 8009 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8010 */ 8011 8012 static void 8013 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8014 /* 8015 * Is there any DTD definition ? 8016 */ 8017 if (RAW == '[') { 8018 ctxt->instate = XML_PARSER_DTD; 8019 NEXT; 8020 /* 8021 * Parse the succession of Markup declarations and 8022 * PEReferences. 8023 * Subsequence (markupdecl | PEReference | S)* 8024 */ 8025 while (RAW != ']') { 8026 const xmlChar *check = CUR_PTR; 8027 unsigned int cons = ctxt->input->consumed; 8028 8029 SKIP_BLANKS; 8030 xmlParseMarkupDecl(ctxt); 8031 xmlParsePEReference(ctxt); 8032 8033 /* 8034 * Pop-up of finished entities. 8035 */ 8036 while ((RAW == 0) && (ctxt->inputNr > 1)) 8037 xmlPopInput(ctxt); 8038 8039 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8040 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8041 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8042 break; 8043 } 8044 } 8045 if (RAW == ']') { 8046 NEXT; 8047 SKIP_BLANKS; 8048 } 8049 } 8050 8051 /* 8052 * We should be at the end of the DOCTYPE declaration. 8053 */ 8054 if (RAW != '>') { 8055 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8056 } 8057 NEXT; 8058 } 8059 8060 #ifdef LIBXML_SAX1_ENABLED 8061 /** 8062 * xmlParseAttribute: 8063 * @ctxt: an XML parser context 8064 * @value: a xmlChar ** used to store the value of the attribute 8065 * 8066 * parse an attribute 8067 * 8068 * [41] Attribute ::= Name Eq AttValue 8069 * 8070 * [ WFC: No External Entity References ] 8071 * Attribute values cannot contain direct or indirect entity references 8072 * to external entities. 8073 * 8074 * [ WFC: No < in Attribute Values ] 8075 * The replacement text of any entity referred to directly or indirectly in 8076 * an attribute value (other than "<") must not contain a <. 8077 * 8078 * [ VC: Attribute Value Type ] 8079 * The attribute must have been declared; the value must be of the type 8080 * declared for it. 8081 * 8082 * [25] Eq ::= S? '=' S? 8083 * 8084 * With namespace: 8085 * 8086 * [NS 11] Attribute ::= QName Eq AttValue 8087 * 8088 * Also the case QName == xmlns:??? is handled independently as a namespace 8089 * definition. 8090 * 8091 * Returns the attribute name, and the value in *value. 8092 */ 8093 8094 const xmlChar * 8095 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8096 const xmlChar *name; 8097 xmlChar *val; 8098 8099 *value = NULL; 8100 GROW; 8101 name = xmlParseName(ctxt); 8102 if (name == NULL) { 8103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8104 "error parsing attribute name\n"); 8105 return(NULL); 8106 } 8107 8108 /* 8109 * read the value 8110 */ 8111 SKIP_BLANKS; 8112 if (RAW == '=') { 8113 NEXT; 8114 SKIP_BLANKS; 8115 val = xmlParseAttValue(ctxt); 8116 ctxt->instate = XML_PARSER_CONTENT; 8117 } else { 8118 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8119 "Specification mandate value for attribute %s\n", name); 8120 return(NULL); 8121 } 8122 8123 /* 8124 * Check that xml:lang conforms to the specification 8125 * No more registered as an error, just generate a warning now 8126 * since this was deprecated in XML second edition 8127 */ 8128 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8129 if (!xmlCheckLanguageID(val)) { 8130 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8131 "Malformed value for xml:lang : %s\n", 8132 val, NULL); 8133 } 8134 } 8135 8136 /* 8137 * Check that xml:space conforms to the specification 8138 */ 8139 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8140 if (xmlStrEqual(val, BAD_CAST "default")) 8141 *(ctxt->space) = 0; 8142 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8143 *(ctxt->space) = 1; 8144 else { 8145 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8146 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8147 val, NULL); 8148 } 8149 } 8150 8151 *value = val; 8152 return(name); 8153 } 8154 8155 /** 8156 * xmlParseStartTag: 8157 * @ctxt: an XML parser context 8158 * 8159 * parse a start of tag either for rule element or 8160 * EmptyElement. In both case we don't parse the tag closing chars. 8161 * 8162 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8163 * 8164 * [ WFC: Unique Att Spec ] 8165 * No attribute name may appear more than once in the same start-tag or 8166 * empty-element tag. 8167 * 8168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8169 * 8170 * [ WFC: Unique Att Spec ] 8171 * No attribute name may appear more than once in the same start-tag or 8172 * empty-element tag. 8173 * 8174 * With namespace: 8175 * 8176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8177 * 8178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8179 * 8180 * Returns the element name parsed 8181 */ 8182 8183 const xmlChar * 8184 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8185 const xmlChar *name; 8186 const xmlChar *attname; 8187 xmlChar *attvalue; 8188 const xmlChar **atts = ctxt->atts; 8189 int nbatts = 0; 8190 int maxatts = ctxt->maxatts; 8191 int i; 8192 8193 if (RAW != '<') return(NULL); 8194 NEXT1; 8195 8196 name = xmlParseName(ctxt); 8197 if (name == NULL) { 8198 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8199 "xmlParseStartTag: invalid element name\n"); 8200 return(NULL); 8201 } 8202 8203 /* 8204 * Now parse the attributes, it ends up with the ending 8205 * 8206 * (S Attribute)* S? 8207 */ 8208 SKIP_BLANKS; 8209 GROW; 8210 8211 while ((RAW != '>') && 8212 ((RAW != '/') || (NXT(1) != '>')) && 8213 (IS_BYTE_CHAR(RAW))) { 8214 const xmlChar *q = CUR_PTR; 8215 unsigned int cons = ctxt->input->consumed; 8216 8217 attname = xmlParseAttribute(ctxt, &attvalue); 8218 if ((attname != NULL) && (attvalue != NULL)) { 8219 /* 8220 * [ WFC: Unique Att Spec ] 8221 * No attribute name may appear more than once in the same 8222 * start-tag or empty-element tag. 8223 */ 8224 for (i = 0; i < nbatts;i += 2) { 8225 if (xmlStrEqual(atts[i], attname)) { 8226 xmlErrAttributeDup(ctxt, NULL, attname); 8227 xmlFree(attvalue); 8228 goto failed; 8229 } 8230 } 8231 /* 8232 * Add the pair to atts 8233 */ 8234 if (atts == NULL) { 8235 maxatts = 22; /* allow for 10 attrs by default */ 8236 atts = (const xmlChar **) 8237 xmlMalloc(maxatts * sizeof(xmlChar *)); 8238 if (atts == NULL) { 8239 xmlErrMemory(ctxt, NULL); 8240 if (attvalue != NULL) 8241 xmlFree(attvalue); 8242 goto failed; 8243 } 8244 ctxt->atts = atts; 8245 ctxt->maxatts = maxatts; 8246 } else if (nbatts + 4 > maxatts) { 8247 const xmlChar **n; 8248 8249 maxatts *= 2; 8250 n = (const xmlChar **) xmlRealloc((void *) atts, 8251 maxatts * sizeof(const xmlChar *)); 8252 if (n == NULL) { 8253 xmlErrMemory(ctxt, NULL); 8254 if (attvalue != NULL) 8255 xmlFree(attvalue); 8256 goto failed; 8257 } 8258 atts = n; 8259 ctxt->atts = atts; 8260 ctxt->maxatts = maxatts; 8261 } 8262 atts[nbatts++] = attname; 8263 atts[nbatts++] = attvalue; 8264 atts[nbatts] = NULL; 8265 atts[nbatts + 1] = NULL; 8266 } else { 8267 if (attvalue != NULL) 8268 xmlFree(attvalue); 8269 } 8270 8271 failed: 8272 8273 GROW 8274 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8275 break; 8276 if (!IS_BLANK_CH(RAW)) { 8277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8278 "attributes construct error\n"); 8279 } 8280 SKIP_BLANKS; 8281 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8282 (attname == NULL) && (attvalue == NULL)) { 8283 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8284 "xmlParseStartTag: problem parsing attributes\n"); 8285 break; 8286 } 8287 SHRINK; 8288 GROW; 8289 } 8290 8291 /* 8292 * SAX: Start of Element ! 8293 */ 8294 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8295 (!ctxt->disableSAX)) { 8296 if (nbatts > 0) 8297 ctxt->sax->startElement(ctxt->userData, name, atts); 8298 else 8299 ctxt->sax->startElement(ctxt->userData, name, NULL); 8300 } 8301 8302 if (atts != NULL) { 8303 /* Free only the content strings */ 8304 for (i = 1;i < nbatts;i+=2) 8305 if (atts[i] != NULL) 8306 xmlFree((xmlChar *) atts[i]); 8307 } 8308 return(name); 8309 } 8310 8311 /** 8312 * xmlParseEndTag1: 8313 * @ctxt: an XML parser context 8314 * @line: line of the start tag 8315 * @nsNr: number of namespaces on the start tag 8316 * 8317 * parse an end of tag 8318 * 8319 * [42] ETag ::= '</' Name S? '>' 8320 * 8321 * With namespace 8322 * 8323 * [NS 9] ETag ::= '</' QName S? '>' 8324 */ 8325 8326 static void 8327 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8328 const xmlChar *name; 8329 8330 GROW; 8331 if ((RAW != '<') || (NXT(1) != '/')) { 8332 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8333 "xmlParseEndTag: '</' not found\n"); 8334 return; 8335 } 8336 SKIP(2); 8337 8338 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8339 8340 /* 8341 * We should definitely be at the ending "S? '>'" part 8342 */ 8343 GROW; 8344 SKIP_BLANKS; 8345 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8346 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8347 } else 8348 NEXT1; 8349 8350 /* 8351 * [ WFC: Element Type Match ] 8352 * The Name in an element's end-tag must match the element type in the 8353 * start-tag. 8354 * 8355 */ 8356 if (name != (xmlChar*)1) { 8357 if (name == NULL) name = BAD_CAST "unparseable"; 8358 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8359 "Opening and ending tag mismatch: %s line %d and %s\n", 8360 ctxt->name, line, name); 8361 } 8362 8363 /* 8364 * SAX: End of Tag 8365 */ 8366 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8367 (!ctxt->disableSAX)) 8368 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8369 8370 namePop(ctxt); 8371 spacePop(ctxt); 8372 return; 8373 } 8374 8375 /** 8376 * xmlParseEndTag: 8377 * @ctxt: an XML parser context 8378 * 8379 * parse an end of tag 8380 * 8381 * [42] ETag ::= '</' Name S? '>' 8382 * 8383 * With namespace 8384 * 8385 * [NS 9] ETag ::= '</' QName S? '>' 8386 */ 8387 8388 void 8389 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8390 xmlParseEndTag1(ctxt, 0); 8391 } 8392 #endif /* LIBXML_SAX1_ENABLED */ 8393 8394 /************************************************************************ 8395 * * 8396 * SAX 2 specific operations * 8397 * * 8398 ************************************************************************/ 8399 8400 /* 8401 * xmlGetNamespace: 8402 * @ctxt: an XML parser context 8403 * @prefix: the prefix to lookup 8404 * 8405 * Lookup the namespace name for the @prefix (which ca be NULL) 8406 * The prefix must come from the @ctxt->dict dictionnary 8407 * 8408 * Returns the namespace name or NULL if not bound 8409 */ 8410 static const xmlChar * 8411 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8412 int i; 8413 8414 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8415 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8416 if (ctxt->nsTab[i] == prefix) { 8417 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8418 return(NULL); 8419 return(ctxt->nsTab[i + 1]); 8420 } 8421 return(NULL); 8422 } 8423 8424 /** 8425 * xmlParseQName: 8426 * @ctxt: an XML parser context 8427 * @prefix: pointer to store the prefix part 8428 * 8429 * parse an XML Namespace QName 8430 * 8431 * [6] QName ::= (Prefix ':')? LocalPart 8432 * [7] Prefix ::= NCName 8433 * [8] LocalPart ::= NCName 8434 * 8435 * Returns the Name parsed or NULL 8436 */ 8437 8438 static const xmlChar * 8439 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8440 const xmlChar *l, *p; 8441 8442 GROW; 8443 8444 l = xmlParseNCName(ctxt); 8445 if (l == NULL) { 8446 if (CUR == ':') { 8447 l = xmlParseName(ctxt); 8448 if (l != NULL) { 8449 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8450 "Failed to parse QName '%s'\n", l, NULL, NULL); 8451 *prefix = NULL; 8452 return(l); 8453 } 8454 } 8455 return(NULL); 8456 } 8457 if (CUR == ':') { 8458 NEXT; 8459 p = l; 8460 l = xmlParseNCName(ctxt); 8461 if (l == NULL) { 8462 xmlChar *tmp; 8463 8464 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8465 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8466 l = xmlParseNmtoken(ctxt); 8467 if (l == NULL) 8468 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8469 else { 8470 tmp = xmlBuildQName(l, p, NULL, 0); 8471 xmlFree((char *)l); 8472 } 8473 p = xmlDictLookup(ctxt->dict, tmp, -1); 8474 if (tmp != NULL) xmlFree(tmp); 8475 *prefix = NULL; 8476 return(p); 8477 } 8478 if (CUR == ':') { 8479 xmlChar *tmp; 8480 8481 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8482 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8483 NEXT; 8484 tmp = (xmlChar *) xmlParseName(ctxt); 8485 if (tmp != NULL) { 8486 tmp = xmlBuildQName(tmp, l, NULL, 0); 8487 l = xmlDictLookup(ctxt->dict, tmp, -1); 8488 if (tmp != NULL) xmlFree(tmp); 8489 *prefix = p; 8490 return(l); 8491 } 8492 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8493 l = xmlDictLookup(ctxt->dict, tmp, -1); 8494 if (tmp != NULL) xmlFree(tmp); 8495 *prefix = p; 8496 return(l); 8497 } 8498 *prefix = p; 8499 } else 8500 *prefix = NULL; 8501 return(l); 8502 } 8503 8504 /** 8505 * xmlParseQNameAndCompare: 8506 * @ctxt: an XML parser context 8507 * @name: the localname 8508 * @prefix: the prefix, if any. 8509 * 8510 * parse an XML name and compares for match 8511 * (specialized for endtag parsing) 8512 * 8513 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8514 * and the name for mismatch 8515 */ 8516 8517 static const xmlChar * 8518 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8519 xmlChar const *prefix) { 8520 const xmlChar *cmp; 8521 const xmlChar *in; 8522 const xmlChar *ret; 8523 const xmlChar *prefix2; 8524 8525 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8526 8527 GROW; 8528 in = ctxt->input->cur; 8529 8530 cmp = prefix; 8531 while (*in != 0 && *in == *cmp) { 8532 ++in; 8533 ++cmp; 8534 } 8535 if ((*cmp == 0) && (*in == ':')) { 8536 in++; 8537 cmp = name; 8538 while (*in != 0 && *in == *cmp) { 8539 ++in; 8540 ++cmp; 8541 } 8542 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8543 /* success */ 8544 ctxt->input->cur = in; 8545 return((const xmlChar*) 1); 8546 } 8547 } 8548 /* 8549 * all strings coms from the dictionary, equality can be done directly 8550 */ 8551 ret = xmlParseQName (ctxt, &prefix2); 8552 if ((ret == name) && (prefix == prefix2)) 8553 return((const xmlChar*) 1); 8554 return ret; 8555 } 8556 8557 /** 8558 * xmlParseAttValueInternal: 8559 * @ctxt: an XML parser context 8560 * @len: attribute len result 8561 * @alloc: whether the attribute was reallocated as a new string 8562 * @normalize: if 1 then further non-CDATA normalization must be done 8563 * 8564 * parse a value for an attribute. 8565 * NOTE: if no normalization is needed, the routine will return pointers 8566 * directly from the data buffer. 8567 * 8568 * 3.3.3 Attribute-Value Normalization: 8569 * Before the value of an attribute is passed to the application or 8570 * checked for validity, the XML processor must normalize it as follows: 8571 * - a character reference is processed by appending the referenced 8572 * character to the attribute value 8573 * - an entity reference is processed by recursively processing the 8574 * replacement text of the entity 8575 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8576 * appending #x20 to the normalized value, except that only a single 8577 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8578 * parsed entity or the literal entity value of an internal parsed entity 8579 * - other characters are processed by appending them to the normalized value 8580 * If the declared value is not CDATA, then the XML processor must further 8581 * process the normalized attribute value by discarding any leading and 8582 * trailing space (#x20) characters, and by replacing sequences of space 8583 * (#x20) characters by a single space (#x20) character. 8584 * All attributes for which no declaration has been read should be treated 8585 * by a non-validating parser as if declared CDATA. 8586 * 8587 * Returns the AttValue parsed or NULL. The value has to be freed by the 8588 * caller if it was copied, this can be detected by val[*len] == 0. 8589 */ 8590 8591 static xmlChar * 8592 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8593 int normalize) 8594 { 8595 xmlChar limit = 0; 8596 const xmlChar *in = NULL, *start, *end, *last; 8597 xmlChar *ret = NULL; 8598 8599 GROW; 8600 in = (xmlChar *) CUR_PTR; 8601 if (*in != '"' && *in != '\'') { 8602 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8603 return (NULL); 8604 } 8605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8606 8607 /* 8608 * try to handle in this routine the most common case where no 8609 * allocation of a new string is required and where content is 8610 * pure ASCII. 8611 */ 8612 limit = *in++; 8613 end = ctxt->input->end; 8614 start = in; 8615 if (in >= end) { 8616 const xmlChar *oldbase = ctxt->input->base; 8617 GROW; 8618 if (oldbase != ctxt->input->base) { 8619 long delta = ctxt->input->base - oldbase; 8620 start = start + delta; 8621 in = in + delta; 8622 } 8623 end = ctxt->input->end; 8624 } 8625 if (normalize) { 8626 /* 8627 * Skip any leading spaces 8628 */ 8629 while ((in < end) && (*in != limit) && 8630 ((*in == 0x20) || (*in == 0x9) || 8631 (*in == 0xA) || (*in == 0xD))) { 8632 in++; 8633 start = in; 8634 if (in >= end) { 8635 const xmlChar *oldbase = ctxt->input->base; 8636 GROW; 8637 if (oldbase != ctxt->input->base) { 8638 long delta = ctxt->input->base - oldbase; 8639 start = start + delta; 8640 in = in + delta; 8641 } 8642 end = ctxt->input->end; 8643 } 8644 } 8645 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8646 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8647 if ((*in++ == 0x20) && (*in == 0x20)) break; 8648 if (in >= end) { 8649 const xmlChar *oldbase = ctxt->input->base; 8650 GROW; 8651 if (oldbase != ctxt->input->base) { 8652 long delta = ctxt->input->base - oldbase; 8653 start = start + delta; 8654 in = in + delta; 8655 } 8656 end = ctxt->input->end; 8657 } 8658 } 8659 last = in; 8660 /* 8661 * skip the trailing blanks 8662 */ 8663 while ((last[-1] == 0x20) && (last > start)) last--; 8664 while ((in < end) && (*in != limit) && 8665 ((*in == 0x20) || (*in == 0x9) || 8666 (*in == 0xA) || (*in == 0xD))) { 8667 in++; 8668 if (in >= end) { 8669 const xmlChar *oldbase = ctxt->input->base; 8670 GROW; 8671 if (oldbase != ctxt->input->base) { 8672 long delta = ctxt->input->base - oldbase; 8673 start = start + delta; 8674 in = in + delta; 8675 last = last + delta; 8676 } 8677 end = ctxt->input->end; 8678 } 8679 } 8680 if (*in != limit) goto need_complex; 8681 } else { 8682 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8683 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8684 in++; 8685 if (in >= end) { 8686 const xmlChar *oldbase = ctxt->input->base; 8687 GROW; 8688 if (oldbase != ctxt->input->base) { 8689 long delta = ctxt->input->base - oldbase; 8690 start = start + delta; 8691 in = in + delta; 8692 } 8693 end = ctxt->input->end; 8694 } 8695 } 8696 last = in; 8697 if (*in != limit) goto need_complex; 8698 } 8699 in++; 8700 if (len != NULL) { 8701 *len = last - start; 8702 ret = (xmlChar *) start; 8703 } else { 8704 if (alloc) *alloc = 1; 8705 ret = xmlStrndup(start, last - start); 8706 } 8707 CUR_PTR = in; 8708 if (alloc) *alloc = 0; 8709 return ret; 8710 need_complex: 8711 if (alloc) *alloc = 1; 8712 return xmlParseAttValueComplex(ctxt, len, normalize); 8713 } 8714 8715 /** 8716 * xmlParseAttribute2: 8717 * @ctxt: an XML parser context 8718 * @pref: the element prefix 8719 * @elem: the element name 8720 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8721 * @value: a xmlChar ** used to store the value of the attribute 8722 * @len: an int * to save the length of the attribute 8723 * @alloc: an int * to indicate if the attribute was allocated 8724 * 8725 * parse an attribute in the new SAX2 framework. 8726 * 8727 * Returns the attribute name, and the value in *value, . 8728 */ 8729 8730 static const xmlChar * 8731 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8732 const xmlChar * pref, const xmlChar * elem, 8733 const xmlChar ** prefix, xmlChar ** value, 8734 int *len, int *alloc) 8735 { 8736 const xmlChar *name; 8737 xmlChar *val, *internal_val = NULL; 8738 int normalize = 0; 8739 8740 *value = NULL; 8741 GROW; 8742 name = xmlParseQName(ctxt, prefix); 8743 if (name == NULL) { 8744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8745 "error parsing attribute name\n"); 8746 return (NULL); 8747 } 8748 8749 /* 8750 * get the type if needed 8751 */ 8752 if (ctxt->attsSpecial != NULL) { 8753 int type; 8754 8755 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8756 pref, elem, *prefix, name); 8757 if (type != 0) 8758 normalize = 1; 8759 } 8760 8761 /* 8762 * read the value 8763 */ 8764 SKIP_BLANKS; 8765 if (RAW == '=') { 8766 NEXT; 8767 SKIP_BLANKS; 8768 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8769 if (normalize) { 8770 /* 8771 * Sometimes a second normalisation pass for spaces is needed 8772 * but that only happens if charrefs or entities refernces 8773 * have been used in the attribute value, i.e. the attribute 8774 * value have been extracted in an allocated string already. 8775 */ 8776 if (*alloc) { 8777 const xmlChar *val2; 8778 8779 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8780 if ((val2 != NULL) && (val2 != val)) { 8781 xmlFree(val); 8782 val = (xmlChar *) val2; 8783 } 8784 } 8785 } 8786 ctxt->instate = XML_PARSER_CONTENT; 8787 } else { 8788 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8789 "Specification mandate value for attribute %s\n", 8790 name); 8791 return (NULL); 8792 } 8793 8794 if (*prefix == ctxt->str_xml) { 8795 /* 8796 * Check that xml:lang conforms to the specification 8797 * No more registered as an error, just generate a warning now 8798 * since this was deprecated in XML second edition 8799 */ 8800 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8801 internal_val = xmlStrndup(val, *len); 8802 if (!xmlCheckLanguageID(internal_val)) { 8803 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8804 "Malformed value for xml:lang : %s\n", 8805 internal_val, NULL); 8806 } 8807 } 8808 8809 /* 8810 * Check that xml:space conforms to the specification 8811 */ 8812 if (xmlStrEqual(name, BAD_CAST "space")) { 8813 internal_val = xmlStrndup(val, *len); 8814 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8815 *(ctxt->space) = 0; 8816 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8817 *(ctxt->space) = 1; 8818 else { 8819 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8820 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8821 internal_val, NULL); 8822 } 8823 } 8824 if (internal_val) { 8825 xmlFree(internal_val); 8826 } 8827 } 8828 8829 *value = val; 8830 return (name); 8831 } 8832 /** 8833 * xmlParseStartTag2: 8834 * @ctxt: an XML parser context 8835 * 8836 * parse a start of tag either for rule element or 8837 * EmptyElement. In both case we don't parse the tag closing chars. 8838 * This routine is called when running SAX2 parsing 8839 * 8840 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8841 * 8842 * [ WFC: Unique Att Spec ] 8843 * No attribute name may appear more than once in the same start-tag or 8844 * empty-element tag. 8845 * 8846 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8847 * 8848 * [ WFC: Unique Att Spec ] 8849 * No attribute name may appear more than once in the same start-tag or 8850 * empty-element tag. 8851 * 8852 * With namespace: 8853 * 8854 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8855 * 8856 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8857 * 8858 * Returns the element name parsed 8859 */ 8860 8861 static const xmlChar * 8862 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8863 const xmlChar **URI, int *tlen) { 8864 const xmlChar *localname; 8865 const xmlChar *prefix; 8866 const xmlChar *attname; 8867 const xmlChar *aprefix; 8868 const xmlChar *nsname; 8869 xmlChar *attvalue; 8870 const xmlChar **atts = ctxt->atts; 8871 int maxatts = ctxt->maxatts; 8872 int nratts, nbatts, nbdef; 8873 int i, j, nbNs, attval, oldline, oldcol; 8874 const xmlChar *base; 8875 unsigned long cur; 8876 int nsNr = ctxt->nsNr; 8877 8878 if (RAW != '<') return(NULL); 8879 NEXT1; 8880 8881 /* 8882 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8883 * point since the attribute values may be stored as pointers to 8884 * the buffer and calling SHRINK would destroy them ! 8885 * The Shrinking is only possible once the full set of attribute 8886 * callbacks have been done. 8887 */ 8888 reparse: 8889 SHRINK; 8890 base = ctxt->input->base; 8891 cur = ctxt->input->cur - ctxt->input->base; 8892 oldline = ctxt->input->line; 8893 oldcol = ctxt->input->col; 8894 nbatts = 0; 8895 nratts = 0; 8896 nbdef = 0; 8897 nbNs = 0; 8898 attval = 0; 8899 /* Forget any namespaces added during an earlier parse of this element. */ 8900 ctxt->nsNr = nsNr; 8901 8902 localname = xmlParseQName(ctxt, &prefix); 8903 if (localname == NULL) { 8904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8905 "StartTag: invalid element name\n"); 8906 return(NULL); 8907 } 8908 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8909 8910 /* 8911 * Now parse the attributes, it ends up with the ending 8912 * 8913 * (S Attribute)* S? 8914 */ 8915 SKIP_BLANKS; 8916 GROW; 8917 if (ctxt->input->base != base) goto base_changed; 8918 8919 while ((RAW != '>') && 8920 ((RAW != '/') || (NXT(1) != '>')) && 8921 (IS_BYTE_CHAR(RAW))) { 8922 const xmlChar *q = CUR_PTR; 8923 unsigned int cons = ctxt->input->consumed; 8924 int len = -1, alloc = 0; 8925 8926 attname = xmlParseAttribute2(ctxt, prefix, localname, 8927 &aprefix, &attvalue, &len, &alloc); 8928 if (ctxt->input->base != base) { 8929 if ((attvalue != NULL) && (alloc != 0)) 8930 xmlFree(attvalue); 8931 attvalue = NULL; 8932 goto base_changed; 8933 } 8934 if ((attname != NULL) && (attvalue != NULL)) { 8935 if (len < 0) len = xmlStrlen(attvalue); 8936 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8937 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8938 xmlURIPtr uri; 8939 8940 if (*URL != 0) { 8941 uri = xmlParseURI((const char *) URL); 8942 if (uri == NULL) { 8943 xmlNsErr(ctxt, XML_WAR_NS_URI, 8944 "xmlns: '%s' is not a valid URI\n", 8945 URL, NULL, NULL); 8946 } else { 8947 if (uri->scheme == NULL) { 8948 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8949 "xmlns: URI %s is not absolute\n", 8950 URL, NULL, NULL); 8951 } 8952 xmlFreeURI(uri); 8953 } 8954 if (URL == ctxt->str_xml_ns) { 8955 if (attname != ctxt->str_xml) { 8956 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8957 "xml namespace URI cannot be the default namespace\n", 8958 NULL, NULL, NULL); 8959 } 8960 goto skip_default_ns; 8961 } 8962 if ((len == 29) && 8963 (xmlStrEqual(URL, 8964 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8965 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8966 "reuse of the xmlns namespace name is forbidden\n", 8967 NULL, NULL, NULL); 8968 goto skip_default_ns; 8969 } 8970 } 8971 /* 8972 * check that it's not a defined namespace 8973 */ 8974 for (j = 1;j <= nbNs;j++) 8975 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8976 break; 8977 if (j <= nbNs) 8978 xmlErrAttributeDup(ctxt, NULL, attname); 8979 else 8980 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8981 skip_default_ns: 8982 if (alloc != 0) xmlFree(attvalue); 8983 SKIP_BLANKS; 8984 continue; 8985 } 8986 if (aprefix == ctxt->str_xmlns) { 8987 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8988 xmlURIPtr uri; 8989 8990 if (attname == ctxt->str_xml) { 8991 if (URL != ctxt->str_xml_ns) { 8992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8993 "xml namespace prefix mapped to wrong URI\n", 8994 NULL, NULL, NULL); 8995 } 8996 /* 8997 * Do not keep a namespace definition node 8998 */ 8999 goto skip_ns; 9000 } 9001 if (URL == ctxt->str_xml_ns) { 9002 if (attname != ctxt->str_xml) { 9003 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9004 "xml namespace URI mapped to wrong prefix\n", 9005 NULL, NULL, NULL); 9006 } 9007 goto skip_ns; 9008 } 9009 if (attname == ctxt->str_xmlns) { 9010 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9011 "redefinition of the xmlns prefix is forbidden\n", 9012 NULL, NULL, NULL); 9013 goto skip_ns; 9014 } 9015 if ((len == 29) && 9016 (xmlStrEqual(URL, 9017 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9018 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9019 "reuse of the xmlns namespace name is forbidden\n", 9020 NULL, NULL, NULL); 9021 goto skip_ns; 9022 } 9023 if ((URL == NULL) || (URL[0] == 0)) { 9024 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9025 "xmlns:%s: Empty XML namespace is not allowed\n", 9026 attname, NULL, NULL); 9027 goto skip_ns; 9028 } else { 9029 uri = xmlParseURI((const char *) URL); 9030 if (uri == NULL) { 9031 xmlNsErr(ctxt, XML_WAR_NS_URI, 9032 "xmlns:%s: '%s' is not a valid URI\n", 9033 attname, URL, NULL); 9034 } else { 9035 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9036 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9037 "xmlns:%s: URI %s is not absolute\n", 9038 attname, URL, NULL); 9039 } 9040 xmlFreeURI(uri); 9041 } 9042 } 9043 9044 /* 9045 * check that it's not a defined namespace 9046 */ 9047 for (j = 1;j <= nbNs;j++) 9048 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9049 break; 9050 if (j <= nbNs) 9051 xmlErrAttributeDup(ctxt, aprefix, attname); 9052 else 9053 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9054 skip_ns: 9055 if (alloc != 0) xmlFree(attvalue); 9056 SKIP_BLANKS; 9057 if (ctxt->input->base != base) goto base_changed; 9058 continue; 9059 } 9060 9061 /* 9062 * Add the pair to atts 9063 */ 9064 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9065 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9066 if (attvalue[len] == 0) 9067 xmlFree(attvalue); 9068 goto failed; 9069 } 9070 maxatts = ctxt->maxatts; 9071 atts = ctxt->atts; 9072 } 9073 ctxt->attallocs[nratts++] = alloc; 9074 atts[nbatts++] = attname; 9075 atts[nbatts++] = aprefix; 9076 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9077 atts[nbatts++] = attvalue; 9078 attvalue += len; 9079 atts[nbatts++] = attvalue; 9080 /* 9081 * tag if some deallocation is needed 9082 */ 9083 if (alloc != 0) attval = 1; 9084 } else { 9085 if ((attvalue != NULL) && (attvalue[len] == 0)) 9086 xmlFree(attvalue); 9087 } 9088 9089 failed: 9090 9091 GROW 9092 if (ctxt->input->base != base) goto base_changed; 9093 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9094 break; 9095 if (!IS_BLANK_CH(RAW)) { 9096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9097 "attributes construct error\n"); 9098 break; 9099 } 9100 SKIP_BLANKS; 9101 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9102 (attname == NULL) && (attvalue == NULL)) { 9103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9104 "xmlParseStartTag: problem parsing attributes\n"); 9105 break; 9106 } 9107 GROW; 9108 if (ctxt->input->base != base) goto base_changed; 9109 } 9110 9111 /* 9112 * The attributes defaulting 9113 */ 9114 if (ctxt->attsDefault != NULL) { 9115 xmlDefAttrsPtr defaults; 9116 9117 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9118 if (defaults != NULL) { 9119 for (i = 0;i < defaults->nbAttrs;i++) { 9120 attname = defaults->values[5 * i]; 9121 aprefix = defaults->values[5 * i + 1]; 9122 9123 /* 9124 * special work for namespaces defaulted defs 9125 */ 9126 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9127 /* 9128 * check that it's not a defined namespace 9129 */ 9130 for (j = 1;j <= nbNs;j++) 9131 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9132 break; 9133 if (j <= nbNs) continue; 9134 9135 nsname = xmlGetNamespace(ctxt, NULL); 9136 if (nsname != defaults->values[5 * i + 2]) { 9137 if (nsPush(ctxt, NULL, 9138 defaults->values[5 * i + 2]) > 0) 9139 nbNs++; 9140 } 9141 } else if (aprefix == ctxt->str_xmlns) { 9142 /* 9143 * check that it's not a defined namespace 9144 */ 9145 for (j = 1;j <= nbNs;j++) 9146 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9147 break; 9148 if (j <= nbNs) continue; 9149 9150 nsname = xmlGetNamespace(ctxt, attname); 9151 if (nsname != defaults->values[2]) { 9152 if (nsPush(ctxt, attname, 9153 defaults->values[5 * i + 2]) > 0) 9154 nbNs++; 9155 } 9156 } else { 9157 /* 9158 * check that it's not a defined attribute 9159 */ 9160 for (j = 0;j < nbatts;j+=5) { 9161 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9162 break; 9163 } 9164 if (j < nbatts) continue; 9165 9166 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9167 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9168 return(NULL); 9169 } 9170 maxatts = ctxt->maxatts; 9171 atts = ctxt->atts; 9172 } 9173 atts[nbatts++] = attname; 9174 atts[nbatts++] = aprefix; 9175 if (aprefix == NULL) 9176 atts[nbatts++] = NULL; 9177 else 9178 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9179 atts[nbatts++] = defaults->values[5 * i + 2]; 9180 atts[nbatts++] = defaults->values[5 * i + 3]; 9181 if ((ctxt->standalone == 1) && 9182 (defaults->values[5 * i + 4] != NULL)) { 9183 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9184 "standalone: attribute %s on %s defaulted from external subset\n", 9185 attname, localname); 9186 } 9187 nbdef++; 9188 } 9189 } 9190 } 9191 } 9192 9193 /* 9194 * The attributes checkings 9195 */ 9196 for (i = 0; i < nbatts;i += 5) { 9197 /* 9198 * The default namespace does not apply to attribute names. 9199 */ 9200 if (atts[i + 1] != NULL) { 9201 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9202 if (nsname == NULL) { 9203 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9204 "Namespace prefix %s for %s on %s is not defined\n", 9205 atts[i + 1], atts[i], localname); 9206 } 9207 atts[i + 2] = nsname; 9208 } else 9209 nsname = NULL; 9210 /* 9211 * [ WFC: Unique Att Spec ] 9212 * No attribute name may appear more than once in the same 9213 * start-tag or empty-element tag. 9214 * As extended by the Namespace in XML REC. 9215 */ 9216 for (j = 0; j < i;j += 5) { 9217 if (atts[i] == atts[j]) { 9218 if (atts[i+1] == atts[j+1]) { 9219 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9220 break; 9221 } 9222 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9223 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9224 "Namespaced Attribute %s in '%s' redefined\n", 9225 atts[i], nsname, NULL); 9226 break; 9227 } 9228 } 9229 } 9230 } 9231 9232 nsname = xmlGetNamespace(ctxt, prefix); 9233 if ((prefix != NULL) && (nsname == NULL)) { 9234 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9235 "Namespace prefix %s on %s is not defined\n", 9236 prefix, localname, NULL); 9237 } 9238 *pref = prefix; 9239 *URI = nsname; 9240 9241 /* 9242 * SAX: Start of Element ! 9243 */ 9244 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9245 (!ctxt->disableSAX)) { 9246 if (nbNs > 0) 9247 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9248 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9249 nbatts / 5, nbdef, atts); 9250 else 9251 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9252 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9253 } 9254 9255 /* 9256 * Free up attribute allocated strings if needed 9257 */ 9258 if (attval != 0) { 9259 for (i = 3,j = 0; j < nratts;i += 5,j++) 9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9261 xmlFree((xmlChar *) atts[i]); 9262 } 9263 9264 return(localname); 9265 9266 base_changed: 9267 /* 9268 * the attribute strings are valid iif the base didn't changed 9269 */ 9270 if (attval != 0) { 9271 for (i = 3,j = 0; j < nratts;i += 5,j++) 9272 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9273 xmlFree((xmlChar *) atts[i]); 9274 } 9275 ctxt->input->cur = ctxt->input->base + cur; 9276 ctxt->input->line = oldline; 9277 ctxt->input->col = oldcol; 9278 if (ctxt->wellFormed == 1) { 9279 goto reparse; 9280 } 9281 return(NULL); 9282 } 9283 9284 /** 9285 * xmlParseEndTag2: 9286 * @ctxt: an XML parser context 9287 * @line: line of the start tag 9288 * @nsNr: number of namespaces on the start tag 9289 * 9290 * parse an end of tag 9291 * 9292 * [42] ETag ::= '</' Name S? '>' 9293 * 9294 * With namespace 9295 * 9296 * [NS 9] ETag ::= '</' QName S? '>' 9297 */ 9298 9299 static void 9300 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9301 const xmlChar *URI, int line, int nsNr, int tlen) { 9302 const xmlChar *name; 9303 9304 GROW; 9305 if ((RAW != '<') || (NXT(1) != '/')) { 9306 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9307 return; 9308 } 9309 SKIP(2); 9310 9311 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9312 if (ctxt->input->cur[tlen] == '>') { 9313 ctxt->input->cur += tlen + 1; 9314 goto done; 9315 } 9316 ctxt->input->cur += tlen; 9317 name = (xmlChar*)1; 9318 } else { 9319 if (prefix == NULL) 9320 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9321 else 9322 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9323 } 9324 9325 /* 9326 * We should definitely be at the ending "S? '>'" part 9327 */ 9328 GROW; 9329 SKIP_BLANKS; 9330 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9331 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9332 } else 9333 NEXT1; 9334 9335 /* 9336 * [ WFC: Element Type Match ] 9337 * The Name in an element's end-tag must match the element type in the 9338 * start-tag. 9339 * 9340 */ 9341 if (name != (xmlChar*)1) { 9342 if (name == NULL) name = BAD_CAST "unparseable"; 9343 if ((line == 0) && (ctxt->node != NULL)) 9344 line = ctxt->node->line; 9345 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9346 "Opening and ending tag mismatch: %s line %d and %s\n", 9347 ctxt->name, line, name); 9348 } 9349 9350 /* 9351 * SAX: End of Tag 9352 */ 9353 done: 9354 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9355 (!ctxt->disableSAX)) 9356 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9357 9358 spacePop(ctxt); 9359 if (nsNr != 0) 9360 nsPop(ctxt, nsNr); 9361 return; 9362 } 9363 9364 /** 9365 * xmlParseCDSect: 9366 * @ctxt: an XML parser context 9367 * 9368 * Parse escaped pure raw content. 9369 * 9370 * [18] CDSect ::= CDStart CData CDEnd 9371 * 9372 * [19] CDStart ::= '<![CDATA[' 9373 * 9374 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9375 * 9376 * [21] CDEnd ::= ']]>' 9377 */ 9378 void 9379 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9380 xmlChar *buf = NULL; 9381 int len = 0; 9382 int size = XML_PARSER_BUFFER_SIZE; 9383 int r, rl; 9384 int s, sl; 9385 int cur, l; 9386 int count = 0; 9387 9388 /* Check 2.6.0 was NXT(0) not RAW */ 9389 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9390 SKIP(9); 9391 } else 9392 return; 9393 9394 ctxt->instate = XML_PARSER_CDATA_SECTION; 9395 r = CUR_CHAR(rl); 9396 if (!IS_CHAR(r)) { 9397 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9398 ctxt->instate = XML_PARSER_CONTENT; 9399 return; 9400 } 9401 NEXTL(rl); 9402 s = CUR_CHAR(sl); 9403 if (!IS_CHAR(s)) { 9404 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9405 ctxt->instate = XML_PARSER_CONTENT; 9406 return; 9407 } 9408 NEXTL(sl); 9409 cur = CUR_CHAR(l); 9410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9411 if (buf == NULL) { 9412 xmlErrMemory(ctxt, NULL); 9413 return; 9414 } 9415 while (IS_CHAR(cur) && 9416 ((r != ']') || (s != ']') || (cur != '>'))) { 9417 if (len + 5 >= size) { 9418 xmlChar *tmp; 9419 9420 size *= 2; 9421 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9422 if (tmp == NULL) { 9423 xmlFree(buf); 9424 xmlErrMemory(ctxt, NULL); 9425 return; 9426 } 9427 buf = tmp; 9428 } 9429 COPY_BUF(rl,buf,len,r); 9430 r = s; 9431 rl = sl; 9432 s = cur; 9433 sl = l; 9434 count++; 9435 if (count > 50) { 9436 GROW; 9437 count = 0; 9438 } 9439 NEXTL(l); 9440 cur = CUR_CHAR(l); 9441 } 9442 buf[len] = 0; 9443 ctxt->instate = XML_PARSER_CONTENT; 9444 if (cur != '>') { 9445 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9446 "CData section not finished\n%.50s\n", buf); 9447 xmlFree(buf); 9448 return; 9449 } 9450 NEXTL(l); 9451 9452 /* 9453 * OK the buffer is to be consumed as cdata. 9454 */ 9455 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9456 if (ctxt->sax->cdataBlock != NULL) 9457 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9458 else if (ctxt->sax->characters != NULL) 9459 ctxt->sax->characters(ctxt->userData, buf, len); 9460 } 9461 xmlFree(buf); 9462 } 9463 9464 /** 9465 * xmlParseContent: 9466 * @ctxt: an XML parser context 9467 * 9468 * Parse a content: 9469 * 9470 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9471 */ 9472 9473 void 9474 xmlParseContent(xmlParserCtxtPtr ctxt) { 9475 GROW; 9476 while ((RAW != 0) && 9477 ((RAW != '<') || (NXT(1) != '/')) && 9478 (ctxt->instate != XML_PARSER_EOF)) { 9479 const xmlChar *test = CUR_PTR; 9480 unsigned int cons = ctxt->input->consumed; 9481 const xmlChar *cur = ctxt->input->cur; 9482 9483 /* 9484 * First case : a Processing Instruction. 9485 */ 9486 if ((*cur == '<') && (cur[1] == '?')) { 9487 xmlParsePI(ctxt); 9488 } 9489 9490 /* 9491 * Second case : a CDSection 9492 */ 9493 /* 2.6.0 test was *cur not RAW */ 9494 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9495 xmlParseCDSect(ctxt); 9496 } 9497 9498 /* 9499 * Third case : a comment 9500 */ 9501 else if ((*cur == '<') && (NXT(1) == '!') && 9502 (NXT(2) == '-') && (NXT(3) == '-')) { 9503 xmlParseComment(ctxt); 9504 ctxt->instate = XML_PARSER_CONTENT; 9505 } 9506 9507 /* 9508 * Fourth case : a sub-element. 9509 */ 9510 else if (*cur == '<') { 9511 xmlParseElement(ctxt); 9512 } 9513 9514 /* 9515 * Fifth case : a reference. If if has not been resolved, 9516 * parsing returns it's Name, create the node 9517 */ 9518 9519 else if (*cur == '&') { 9520 xmlParseReference(ctxt); 9521 } 9522 9523 /* 9524 * Last case, text. Note that References are handled directly. 9525 */ 9526 else { 9527 xmlParseCharData(ctxt, 0); 9528 } 9529 9530 GROW; 9531 /* 9532 * Pop-up of finished entities. 9533 */ 9534 while ((RAW == 0) && (ctxt->inputNr > 1)) 9535 xmlPopInput(ctxt); 9536 SHRINK; 9537 9538 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9540 "detected an error in element content\n"); 9541 ctxt->instate = XML_PARSER_EOF; 9542 break; 9543 } 9544 } 9545 } 9546 9547 /** 9548 * xmlParseElement: 9549 * @ctxt: an XML parser context 9550 * 9551 * parse an XML element, this is highly recursive 9552 * 9553 * [39] element ::= EmptyElemTag | STag content ETag 9554 * 9555 * [ WFC: Element Type Match ] 9556 * The Name in an element's end-tag must match the element type in the 9557 * start-tag. 9558 * 9559 */ 9560 9561 void 9562 xmlParseElement(xmlParserCtxtPtr ctxt) { 9563 const xmlChar *name; 9564 const xmlChar *prefix = NULL; 9565 const xmlChar *URI = NULL; 9566 xmlParserNodeInfo node_info; 9567 int line, tlen; 9568 xmlNodePtr ret; 9569 int nsNr = ctxt->nsNr; 9570 9571 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9572 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9573 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9574 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9575 xmlParserMaxDepth); 9576 ctxt->instate = XML_PARSER_EOF; 9577 return; 9578 } 9579 9580 /* Capture start position */ 9581 if (ctxt->record_info) { 9582 node_info.begin_pos = ctxt->input->consumed + 9583 (CUR_PTR - ctxt->input->base); 9584 node_info.begin_line = ctxt->input->line; 9585 } 9586 9587 if (ctxt->spaceNr == 0) 9588 spacePush(ctxt, -1); 9589 else if (*ctxt->space == -2) 9590 spacePush(ctxt, -1); 9591 else 9592 spacePush(ctxt, *ctxt->space); 9593 9594 line = ctxt->input->line; 9595 #ifdef LIBXML_SAX1_ENABLED 9596 if (ctxt->sax2) 9597 #endif /* LIBXML_SAX1_ENABLED */ 9598 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9599 #ifdef LIBXML_SAX1_ENABLED 9600 else 9601 name = xmlParseStartTag(ctxt); 9602 #endif /* LIBXML_SAX1_ENABLED */ 9603 if (ctxt->instate == XML_PARSER_EOF) 9604 return; 9605 if (name == NULL) { 9606 spacePop(ctxt); 9607 return; 9608 } 9609 namePush(ctxt, name); 9610 ret = ctxt->node; 9611 9612 #ifdef LIBXML_VALID_ENABLED 9613 /* 9614 * [ VC: Root Element Type ] 9615 * The Name in the document type declaration must match the element 9616 * type of the root element. 9617 */ 9618 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9619 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9620 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9621 #endif /* LIBXML_VALID_ENABLED */ 9622 9623 /* 9624 * Check for an Empty Element. 9625 */ 9626 if ((RAW == '/') && (NXT(1) == '>')) { 9627 SKIP(2); 9628 if (ctxt->sax2) { 9629 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9630 (!ctxt->disableSAX)) 9631 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9632 #ifdef LIBXML_SAX1_ENABLED 9633 } else { 9634 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9635 (!ctxt->disableSAX)) 9636 ctxt->sax->endElement(ctxt->userData, name); 9637 #endif /* LIBXML_SAX1_ENABLED */ 9638 } 9639 namePop(ctxt); 9640 spacePop(ctxt); 9641 if (nsNr != ctxt->nsNr) 9642 nsPop(ctxt, ctxt->nsNr - nsNr); 9643 if ( ret != NULL && ctxt->record_info ) { 9644 node_info.end_pos = ctxt->input->consumed + 9645 (CUR_PTR - ctxt->input->base); 9646 node_info.end_line = ctxt->input->line; 9647 node_info.node = ret; 9648 xmlParserAddNodeInfo(ctxt, &node_info); 9649 } 9650 return; 9651 } 9652 if (RAW == '>') { 9653 NEXT1; 9654 } else { 9655 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9656 "Couldn't find end of Start Tag %s line %d\n", 9657 name, line, NULL); 9658 9659 /* 9660 * end of parsing of this node. 9661 */ 9662 nodePop(ctxt); 9663 namePop(ctxt); 9664 spacePop(ctxt); 9665 if (nsNr != ctxt->nsNr) 9666 nsPop(ctxt, ctxt->nsNr - nsNr); 9667 9668 /* 9669 * Capture end position and add node 9670 */ 9671 if ( ret != NULL && ctxt->record_info ) { 9672 node_info.end_pos = ctxt->input->consumed + 9673 (CUR_PTR - ctxt->input->base); 9674 node_info.end_line = ctxt->input->line; 9675 node_info.node = ret; 9676 xmlParserAddNodeInfo(ctxt, &node_info); 9677 } 9678 return; 9679 } 9680 9681 /* 9682 * Parse the content of the element: 9683 */ 9684 xmlParseContent(ctxt); 9685 if (!IS_BYTE_CHAR(RAW)) { 9686 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9687 "Premature end of data in tag %s line %d\n", 9688 name, line, NULL); 9689 9690 /* 9691 * end of parsing of this node. 9692 */ 9693 nodePop(ctxt); 9694 namePop(ctxt); 9695 spacePop(ctxt); 9696 if (nsNr != ctxt->nsNr) 9697 nsPop(ctxt, ctxt->nsNr - nsNr); 9698 return; 9699 } 9700 9701 /* 9702 * parse the end of tag: '</' should be here. 9703 */ 9704 if (ctxt->sax2) { 9705 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9706 namePop(ctxt); 9707 } 9708 #ifdef LIBXML_SAX1_ENABLED 9709 else 9710 xmlParseEndTag1(ctxt, line); 9711 #endif /* LIBXML_SAX1_ENABLED */ 9712 9713 /* 9714 * Capture end position and add node 9715 */ 9716 if ( ret != NULL && ctxt->record_info ) { 9717 node_info.end_pos = ctxt->input->consumed + 9718 (CUR_PTR - ctxt->input->base); 9719 node_info.end_line = ctxt->input->line; 9720 node_info.node = ret; 9721 xmlParserAddNodeInfo(ctxt, &node_info); 9722 } 9723 } 9724 9725 /** 9726 * xmlParseVersionNum: 9727 * @ctxt: an XML parser context 9728 * 9729 * parse the XML version value. 9730 * 9731 * [26] VersionNum ::= '1.' [0-9]+ 9732 * 9733 * In practice allow [0-9].[0-9]+ at that level 9734 * 9735 * Returns the string giving the XML version number, or NULL 9736 */ 9737 xmlChar * 9738 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9739 xmlChar *buf = NULL; 9740 int len = 0; 9741 int size = 10; 9742 xmlChar cur; 9743 9744 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9745 if (buf == NULL) { 9746 xmlErrMemory(ctxt, NULL); 9747 return(NULL); 9748 } 9749 cur = CUR; 9750 if (!((cur >= '0') && (cur <= '9'))) { 9751 xmlFree(buf); 9752 return(NULL); 9753 } 9754 buf[len++] = cur; 9755 NEXT; 9756 cur=CUR; 9757 if (cur != '.') { 9758 xmlFree(buf); 9759 return(NULL); 9760 } 9761 buf[len++] = cur; 9762 NEXT; 9763 cur=CUR; 9764 while ((cur >= '0') && (cur <= '9')) { 9765 if (len + 1 >= size) { 9766 xmlChar *tmp; 9767 9768 size *= 2; 9769 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9770 if (tmp == NULL) { 9771 xmlFree(buf); 9772 xmlErrMemory(ctxt, NULL); 9773 return(NULL); 9774 } 9775 buf = tmp; 9776 } 9777 buf[len++] = cur; 9778 NEXT; 9779 cur=CUR; 9780 } 9781 buf[len] = 0; 9782 return(buf); 9783 } 9784 9785 /** 9786 * xmlParseVersionInfo: 9787 * @ctxt: an XML parser context 9788 * 9789 * parse the XML version. 9790 * 9791 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9792 * 9793 * [25] Eq ::= S? '=' S? 9794 * 9795 * Returns the version string, e.g. "1.0" 9796 */ 9797 9798 xmlChar * 9799 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9800 xmlChar *version = NULL; 9801 9802 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9803 SKIP(7); 9804 SKIP_BLANKS; 9805 if (RAW != '=') { 9806 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9807 return(NULL); 9808 } 9809 NEXT; 9810 SKIP_BLANKS; 9811 if (RAW == '"') { 9812 NEXT; 9813 version = xmlParseVersionNum(ctxt); 9814 if (RAW != '"') { 9815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9816 } else 9817 NEXT; 9818 } else if (RAW == '\''){ 9819 NEXT; 9820 version = xmlParseVersionNum(ctxt); 9821 if (RAW != '\'') { 9822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9823 } else 9824 NEXT; 9825 } else { 9826 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9827 } 9828 } 9829 return(version); 9830 } 9831 9832 /** 9833 * xmlParseEncName: 9834 * @ctxt: an XML parser context 9835 * 9836 * parse the XML encoding name 9837 * 9838 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9839 * 9840 * Returns the encoding name value or NULL 9841 */ 9842 xmlChar * 9843 xmlParseEncName(xmlParserCtxtPtr ctxt) { 9844 xmlChar *buf = NULL; 9845 int len = 0; 9846 int size = 10; 9847 xmlChar cur; 9848 9849 cur = CUR; 9850 if (((cur >= 'a') && (cur <= 'z')) || 9851 ((cur >= 'A') && (cur <= 'Z'))) { 9852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9853 if (buf == NULL) { 9854 xmlErrMemory(ctxt, NULL); 9855 return(NULL); 9856 } 9857 9858 buf[len++] = cur; 9859 NEXT; 9860 cur = CUR; 9861 while (((cur >= 'a') && (cur <= 'z')) || 9862 ((cur >= 'A') && (cur <= 'Z')) || 9863 ((cur >= '0') && (cur <= '9')) || 9864 (cur == '.') || (cur == '_') || 9865 (cur == '-')) { 9866 if (len + 1 >= size) { 9867 xmlChar *tmp; 9868 9869 size *= 2; 9870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9871 if (tmp == NULL) { 9872 xmlErrMemory(ctxt, NULL); 9873 xmlFree(buf); 9874 return(NULL); 9875 } 9876 buf = tmp; 9877 } 9878 buf[len++] = cur; 9879 NEXT; 9880 cur = CUR; 9881 if (cur == 0) { 9882 SHRINK; 9883 GROW; 9884 cur = CUR; 9885 } 9886 } 9887 buf[len] = 0; 9888 } else { 9889 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9890 } 9891 return(buf); 9892 } 9893 9894 /** 9895 * xmlParseEncodingDecl: 9896 * @ctxt: an XML parser context 9897 * 9898 * parse the XML encoding declaration 9899 * 9900 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9901 * 9902 * this setups the conversion filters. 9903 * 9904 * Returns the encoding value or NULL 9905 */ 9906 9907 const xmlChar * 9908 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9909 xmlChar *encoding = NULL; 9910 9911 SKIP_BLANKS; 9912 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9913 SKIP(8); 9914 SKIP_BLANKS; 9915 if (RAW != '=') { 9916 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9917 return(NULL); 9918 } 9919 NEXT; 9920 SKIP_BLANKS; 9921 if (RAW == '"') { 9922 NEXT; 9923 encoding = xmlParseEncName(ctxt); 9924 if (RAW != '"') { 9925 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9926 } else 9927 NEXT; 9928 } else if (RAW == '\''){ 9929 NEXT; 9930 encoding = xmlParseEncName(ctxt); 9931 if (RAW != '\'') { 9932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9933 } else 9934 NEXT; 9935 } else { 9936 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9937 } 9938 9939 /* 9940 * Non standard parsing, allowing the user to ignore encoding 9941 */ 9942 if (ctxt->options & XML_PARSE_IGNORE_ENC) 9943 return(encoding); 9944 9945 /* 9946 * UTF-16 encoding stwich has already taken place at this stage, 9947 * more over the little-endian/big-endian selection is already done 9948 */ 9949 if ((encoding != NULL) && 9950 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9951 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9952 /* 9953 * If no encoding was passed to the parser, that we are 9954 * using UTF-16 and no decoder is present i.e. the 9955 * document is apparently UTF-8 compatible, then raise an 9956 * encoding mismatch fatal error 9957 */ 9958 if ((ctxt->encoding == NULL) && 9959 (ctxt->input->buf != NULL) && 9960 (ctxt->input->buf->encoder == NULL)) { 9961 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9962 "Document labelled UTF-16 but has UTF-8 content\n"); 9963 } 9964 if (ctxt->encoding != NULL) 9965 xmlFree((xmlChar *) ctxt->encoding); 9966 ctxt->encoding = encoding; 9967 } 9968 /* 9969 * UTF-8 encoding is handled natively 9970 */ 9971 else if ((encoding != NULL) && 9972 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9973 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9974 if (ctxt->encoding != NULL) 9975 xmlFree((xmlChar *) ctxt->encoding); 9976 ctxt->encoding = encoding; 9977 } 9978 else if (encoding != NULL) { 9979 xmlCharEncodingHandlerPtr handler; 9980 9981 if (ctxt->input->encoding != NULL) 9982 xmlFree((xmlChar *) ctxt->input->encoding); 9983 ctxt->input->encoding = encoding; 9984 9985 handler = xmlFindCharEncodingHandler((const char *) encoding); 9986 if (handler != NULL) { 9987 xmlSwitchToEncoding(ctxt, handler); 9988 } else { 9989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9990 "Unsupported encoding %s\n", encoding); 9991 return(NULL); 9992 } 9993 } 9994 } 9995 return(encoding); 9996 } 9997 9998 /** 9999 * xmlParseSDDecl: 10000 * @ctxt: an XML parser context 10001 * 10002 * parse the XML standalone declaration 10003 * 10004 * [32] SDDecl ::= S 'standalone' Eq 10005 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10006 * 10007 * [ VC: Standalone Document Declaration ] 10008 * TODO The standalone document declaration must have the value "no" 10009 * if any external markup declarations contain declarations of: 10010 * - attributes with default values, if elements to which these 10011 * attributes apply appear in the document without specifications 10012 * of values for these attributes, or 10013 * - entities (other than amp, lt, gt, apos, quot), if references 10014 * to those entities appear in the document, or 10015 * - attributes with values subject to normalization, where the 10016 * attribute appears in the document with a value which will change 10017 * as a result of normalization, or 10018 * - element types with element content, if white space occurs directly 10019 * within any instance of those types. 10020 * 10021 * Returns: 10022 * 1 if standalone="yes" 10023 * 0 if standalone="no" 10024 * -2 if standalone attribute is missing or invalid 10025 * (A standalone value of -2 means that the XML declaration was found, 10026 * but no value was specified for the standalone attribute). 10027 */ 10028 10029 int 10030 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10031 int standalone = -2; 10032 10033 SKIP_BLANKS; 10034 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10035 SKIP(10); 10036 SKIP_BLANKS; 10037 if (RAW != '=') { 10038 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10039 return(standalone); 10040 } 10041 NEXT; 10042 SKIP_BLANKS; 10043 if (RAW == '\''){ 10044 NEXT; 10045 if ((RAW == 'n') && (NXT(1) == 'o')) { 10046 standalone = 0; 10047 SKIP(2); 10048 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10049 (NXT(2) == 's')) { 10050 standalone = 1; 10051 SKIP(3); 10052 } else { 10053 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10054 } 10055 if (RAW != '\'') { 10056 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10057 } else 10058 NEXT; 10059 } else if (RAW == '"'){ 10060 NEXT; 10061 if ((RAW == 'n') && (NXT(1) == 'o')) { 10062 standalone = 0; 10063 SKIP(2); 10064 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10065 (NXT(2) == 's')) { 10066 standalone = 1; 10067 SKIP(3); 10068 } else { 10069 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10070 } 10071 if (RAW != '"') { 10072 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10073 } else 10074 NEXT; 10075 } else { 10076 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10077 } 10078 } 10079 return(standalone); 10080 } 10081 10082 /** 10083 * xmlParseXMLDecl: 10084 * @ctxt: an XML parser context 10085 * 10086 * parse an XML declaration header 10087 * 10088 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10089 */ 10090 10091 void 10092 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10093 xmlChar *version; 10094 10095 /* 10096 * This value for standalone indicates that the document has an 10097 * XML declaration but it does not have a standalone attribute. 10098 * It will be overwritten later if a standalone attribute is found. 10099 */ 10100 ctxt->input->standalone = -2; 10101 10102 /* 10103 * We know that '<?xml' is here. 10104 */ 10105 SKIP(5); 10106 10107 if (!IS_BLANK_CH(RAW)) { 10108 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10109 "Blank needed after '<?xml'\n"); 10110 } 10111 SKIP_BLANKS; 10112 10113 /* 10114 * We must have the VersionInfo here. 10115 */ 10116 version = xmlParseVersionInfo(ctxt); 10117 if (version == NULL) { 10118 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10119 } else { 10120 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10121 /* 10122 * Changed here for XML-1.0 5th edition 10123 */ 10124 if (ctxt->options & XML_PARSE_OLD10) { 10125 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10126 "Unsupported version '%s'\n", 10127 version); 10128 } else { 10129 if ((version[0] == '1') && ((version[1] == '.'))) { 10130 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10131 "Unsupported version '%s'\n", 10132 version, NULL); 10133 } else { 10134 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10135 "Unsupported version '%s'\n", 10136 version); 10137 } 10138 } 10139 } 10140 if (ctxt->version != NULL) 10141 xmlFree((void *) ctxt->version); 10142 ctxt->version = version; 10143 } 10144 10145 /* 10146 * We may have the encoding declaration 10147 */ 10148 if (!IS_BLANK_CH(RAW)) { 10149 if ((RAW == '?') && (NXT(1) == '>')) { 10150 SKIP(2); 10151 return; 10152 } 10153 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10154 } 10155 xmlParseEncodingDecl(ctxt); 10156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10157 /* 10158 * The XML REC instructs us to stop parsing right here 10159 */ 10160 return; 10161 } 10162 10163 /* 10164 * We may have the standalone status. 10165 */ 10166 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10167 if ((RAW == '?') && (NXT(1) == '>')) { 10168 SKIP(2); 10169 return; 10170 } 10171 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10172 } 10173 10174 /* 10175 * We can grow the input buffer freely at that point 10176 */ 10177 GROW; 10178 10179 SKIP_BLANKS; 10180 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10181 10182 SKIP_BLANKS; 10183 if ((RAW == '?') && (NXT(1) == '>')) { 10184 SKIP(2); 10185 } else if (RAW == '>') { 10186 /* Deprecated old WD ... */ 10187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10188 NEXT; 10189 } else { 10190 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10191 MOVETO_ENDTAG(CUR_PTR); 10192 NEXT; 10193 } 10194 } 10195 10196 /** 10197 * xmlParseMisc: 10198 * @ctxt: an XML parser context 10199 * 10200 * parse an XML Misc* optional field. 10201 * 10202 * [27] Misc ::= Comment | PI | S 10203 */ 10204 10205 void 10206 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10207 while (((RAW == '<') && (NXT(1) == '?')) || 10208 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10209 IS_BLANK_CH(CUR)) { 10210 if ((RAW == '<') && (NXT(1) == '?')) { 10211 xmlParsePI(ctxt); 10212 } else if (IS_BLANK_CH(CUR)) { 10213 NEXT; 10214 } else 10215 xmlParseComment(ctxt); 10216 } 10217 } 10218 10219 /** 10220 * xmlParseDocument: 10221 * @ctxt: an XML parser context 10222 * 10223 * parse an XML document (and build a tree if using the standard SAX 10224 * interface). 10225 * 10226 * [1] document ::= prolog element Misc* 10227 * 10228 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10229 * 10230 * Returns 0, -1 in case of error. the parser context is augmented 10231 * as a result of the parsing. 10232 */ 10233 10234 int 10235 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10236 xmlChar start[4]; 10237 xmlCharEncoding enc; 10238 10239 xmlInitParser(); 10240 10241 if ((ctxt == NULL) || (ctxt->input == NULL)) 10242 return(-1); 10243 10244 GROW; 10245 10246 /* 10247 * SAX: detecting the level. 10248 */ 10249 xmlDetectSAX2(ctxt); 10250 10251 /* 10252 * SAX: beginning of the document processing. 10253 */ 10254 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10255 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10256 10257 if ((ctxt->encoding == NULL) && 10258 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10259 /* 10260 * Get the 4 first bytes and decode the charset 10261 * if enc != XML_CHAR_ENCODING_NONE 10262 * plug some encoding conversion routines. 10263 */ 10264 start[0] = RAW; 10265 start[1] = NXT(1); 10266 start[2] = NXT(2); 10267 start[3] = NXT(3); 10268 enc = xmlDetectCharEncoding(&start[0], 4); 10269 if (enc != XML_CHAR_ENCODING_NONE) { 10270 xmlSwitchEncoding(ctxt, enc); 10271 } 10272 } 10273 10274 10275 if (CUR == 0) { 10276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10277 } 10278 10279 /* 10280 * Check for the XMLDecl in the Prolog. 10281 * do not GROW here to avoid the detected encoder to decode more 10282 * than just the first line, unless the amount of data is really 10283 * too small to hold "<?xml version="1.0" encoding="foo" 10284 */ 10285 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10286 GROW; 10287 } 10288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10289 10290 /* 10291 * Note that we will switch encoding on the fly. 10292 */ 10293 xmlParseXMLDecl(ctxt); 10294 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10295 /* 10296 * The XML REC instructs us to stop parsing right here 10297 */ 10298 return(-1); 10299 } 10300 ctxt->standalone = ctxt->input->standalone; 10301 SKIP_BLANKS; 10302 } else { 10303 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10304 } 10305 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10306 ctxt->sax->startDocument(ctxt->userData); 10307 10308 /* 10309 * The Misc part of the Prolog 10310 */ 10311 GROW; 10312 xmlParseMisc(ctxt); 10313 10314 /* 10315 * Then possibly doc type declaration(s) and more Misc 10316 * (doctypedecl Misc*)? 10317 */ 10318 GROW; 10319 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10320 10321 ctxt->inSubset = 1; 10322 xmlParseDocTypeDecl(ctxt); 10323 if (RAW == '[') { 10324 ctxt->instate = XML_PARSER_DTD; 10325 xmlParseInternalSubset(ctxt); 10326 } 10327 10328 /* 10329 * Create and update the external subset. 10330 */ 10331 ctxt->inSubset = 2; 10332 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10333 (!ctxt->disableSAX)) 10334 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10335 ctxt->extSubSystem, ctxt->extSubURI); 10336 ctxt->inSubset = 0; 10337 10338 xmlCleanSpecialAttr(ctxt); 10339 10340 ctxt->instate = XML_PARSER_PROLOG; 10341 xmlParseMisc(ctxt); 10342 } 10343 10344 /* 10345 * Time to start parsing the tree itself 10346 */ 10347 GROW; 10348 if (RAW != '<') { 10349 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10350 "Start tag expected, '<' not found\n"); 10351 } else { 10352 ctxt->instate = XML_PARSER_CONTENT; 10353 xmlParseElement(ctxt); 10354 ctxt->instate = XML_PARSER_EPILOG; 10355 10356 10357 /* 10358 * The Misc part at the end 10359 */ 10360 xmlParseMisc(ctxt); 10361 10362 if (RAW != 0) { 10363 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10364 } 10365 ctxt->instate = XML_PARSER_EOF; 10366 } 10367 10368 /* 10369 * SAX: end of the document processing. 10370 */ 10371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10372 ctxt->sax->endDocument(ctxt->userData); 10373 10374 /* 10375 * Remove locally kept entity definitions if the tree was not built 10376 */ 10377 if ((ctxt->myDoc != NULL) && 10378 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10379 xmlFreeDoc(ctxt->myDoc); 10380 ctxt->myDoc = NULL; 10381 } 10382 10383 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10384 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10385 if (ctxt->valid) 10386 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10387 if (ctxt->nsWellFormed) 10388 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10389 if (ctxt->options & XML_PARSE_OLD10) 10390 ctxt->myDoc->properties |= XML_DOC_OLD10; 10391 } 10392 if (! ctxt->wellFormed) { 10393 ctxt->valid = 0; 10394 return(-1); 10395 } 10396 return(0); 10397 } 10398 10399 /** 10400 * xmlParseExtParsedEnt: 10401 * @ctxt: an XML parser context 10402 * 10403 * parse a general parsed entity 10404 * An external general parsed entity is well-formed if it matches the 10405 * production labeled extParsedEnt. 10406 * 10407 * [78] extParsedEnt ::= TextDecl? content 10408 * 10409 * Returns 0, -1 in case of error. the parser context is augmented 10410 * as a result of the parsing. 10411 */ 10412 10413 int 10414 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10415 xmlChar start[4]; 10416 xmlCharEncoding enc; 10417 10418 if ((ctxt == NULL) || (ctxt->input == NULL)) 10419 return(-1); 10420 10421 xmlDefaultSAXHandlerInit(); 10422 10423 xmlDetectSAX2(ctxt); 10424 10425 GROW; 10426 10427 /* 10428 * SAX: beginning of the document processing. 10429 */ 10430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10431 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10432 10433 /* 10434 * Get the 4 first bytes and decode the charset 10435 * if enc != XML_CHAR_ENCODING_NONE 10436 * plug some encoding conversion routines. 10437 */ 10438 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10439 start[0] = RAW; 10440 start[1] = NXT(1); 10441 start[2] = NXT(2); 10442 start[3] = NXT(3); 10443 enc = xmlDetectCharEncoding(start, 4); 10444 if (enc != XML_CHAR_ENCODING_NONE) { 10445 xmlSwitchEncoding(ctxt, enc); 10446 } 10447 } 10448 10449 10450 if (CUR == 0) { 10451 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10452 } 10453 10454 /* 10455 * Check for the XMLDecl in the Prolog. 10456 */ 10457 GROW; 10458 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10459 10460 /* 10461 * Note that we will switch encoding on the fly. 10462 */ 10463 xmlParseXMLDecl(ctxt); 10464 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10465 /* 10466 * The XML REC instructs us to stop parsing right here 10467 */ 10468 return(-1); 10469 } 10470 SKIP_BLANKS; 10471 } else { 10472 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10473 } 10474 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10475 ctxt->sax->startDocument(ctxt->userData); 10476 10477 /* 10478 * Doing validity checking on chunk doesn't make sense 10479 */ 10480 ctxt->instate = XML_PARSER_CONTENT; 10481 ctxt->validate = 0; 10482 ctxt->loadsubset = 0; 10483 ctxt->depth = 0; 10484 10485 xmlParseContent(ctxt); 10486 10487 if ((RAW == '<') && (NXT(1) == '/')) { 10488 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10489 } else if (RAW != 0) { 10490 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10491 } 10492 10493 /* 10494 * SAX: end of the document processing. 10495 */ 10496 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10497 ctxt->sax->endDocument(ctxt->userData); 10498 10499 if (! ctxt->wellFormed) return(-1); 10500 return(0); 10501 } 10502 10503 #ifdef LIBXML_PUSH_ENABLED 10504 /************************************************************************ 10505 * * 10506 * Progressive parsing interfaces * 10507 * * 10508 ************************************************************************/ 10509 10510 /** 10511 * xmlParseLookupSequence: 10512 * @ctxt: an XML parser context 10513 * @first: the first char to lookup 10514 * @next: the next char to lookup or zero 10515 * @third: the next char to lookup or zero 10516 * 10517 * Try to find if a sequence (first, next, third) or just (first next) or 10518 * (first) is available in the input stream. 10519 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10520 * to avoid rescanning sequences of bytes, it DOES change the state of the 10521 * parser, do not use liberally. 10522 * 10523 * Returns the index to the current parsing point if the full sequence 10524 * is available, -1 otherwise. 10525 */ 10526 static int 10527 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10528 xmlChar next, xmlChar third) { 10529 int base, len; 10530 xmlParserInputPtr in; 10531 const xmlChar *buf; 10532 10533 in = ctxt->input; 10534 if (in == NULL) return(-1); 10535 base = in->cur - in->base; 10536 if (base < 0) return(-1); 10537 if (ctxt->checkIndex > base) 10538 base = ctxt->checkIndex; 10539 if (in->buf == NULL) { 10540 buf = in->base; 10541 len = in->length; 10542 } else { 10543 buf = in->buf->buffer->content; 10544 len = in->buf->buffer->use; 10545 } 10546 /* take into account the sequence length */ 10547 if (third) len -= 2; 10548 else if (next) len --; 10549 for (;base < len;base++) { 10550 if (buf[base] == first) { 10551 if (third != 0) { 10552 if ((buf[base + 1] != next) || 10553 (buf[base + 2] != third)) continue; 10554 } else if (next != 0) { 10555 if (buf[base + 1] != next) continue; 10556 } 10557 ctxt->checkIndex = 0; 10558 #ifdef DEBUG_PUSH 10559 if (next == 0) 10560 xmlGenericError(xmlGenericErrorContext, 10561 "PP: lookup '%c' found at %d\n", 10562 first, base); 10563 else if (third == 0) 10564 xmlGenericError(xmlGenericErrorContext, 10565 "PP: lookup '%c%c' found at %d\n", 10566 first, next, base); 10567 else 10568 xmlGenericError(xmlGenericErrorContext, 10569 "PP: lookup '%c%c%c' found at %d\n", 10570 first, next, third, base); 10571 #endif 10572 return(base - (in->cur - in->base)); 10573 } 10574 } 10575 ctxt->checkIndex = base; 10576 #ifdef DEBUG_PUSH 10577 if (next == 0) 10578 xmlGenericError(xmlGenericErrorContext, 10579 "PP: lookup '%c' failed\n", first); 10580 else if (third == 0) 10581 xmlGenericError(xmlGenericErrorContext, 10582 "PP: lookup '%c%c' failed\n", first, next); 10583 else 10584 xmlGenericError(xmlGenericErrorContext, 10585 "PP: lookup '%c%c%c' failed\n", first, next, third); 10586 #endif 10587 return(-1); 10588 } 10589 10590 /** 10591 * xmlParseGetLasts: 10592 * @ctxt: an XML parser context 10593 * @lastlt: pointer to store the last '<' from the input 10594 * @lastgt: pointer to store the last '>' from the input 10595 * 10596 * Lookup the last < and > in the current chunk 10597 */ 10598 static void 10599 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10600 const xmlChar **lastgt) { 10601 const xmlChar *tmp; 10602 10603 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10604 xmlGenericError(xmlGenericErrorContext, 10605 "Internal error: xmlParseGetLasts\n"); 10606 return; 10607 } 10608 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10609 tmp = ctxt->input->end; 10610 tmp--; 10611 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10612 if (tmp < ctxt->input->base) { 10613 *lastlt = NULL; 10614 *lastgt = NULL; 10615 } else { 10616 *lastlt = tmp; 10617 tmp++; 10618 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10619 if (*tmp == '\'') { 10620 tmp++; 10621 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10622 if (tmp < ctxt->input->end) tmp++; 10623 } else if (*tmp == '"') { 10624 tmp++; 10625 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10626 if (tmp < ctxt->input->end) tmp++; 10627 } else 10628 tmp++; 10629 } 10630 if (tmp < ctxt->input->end) 10631 *lastgt = tmp; 10632 else { 10633 tmp = *lastlt; 10634 tmp--; 10635 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10636 if (tmp >= ctxt->input->base) 10637 *lastgt = tmp; 10638 else 10639 *lastgt = NULL; 10640 } 10641 } 10642 } else { 10643 *lastlt = NULL; 10644 *lastgt = NULL; 10645 } 10646 } 10647 /** 10648 * xmlCheckCdataPush: 10649 * @cur: pointer to the bock of characters 10650 * @len: length of the block in bytes 10651 * 10652 * Check that the block of characters is okay as SCdata content [20] 10653 * 10654 * Returns the number of bytes to pass if okay, a negative index where an 10655 * UTF-8 error occured otherwise 10656 */ 10657 static int 10658 xmlCheckCdataPush(const xmlChar *utf, int len) { 10659 int ix; 10660 unsigned char c; 10661 int codepoint; 10662 10663 if ((utf == NULL) || (len <= 0)) 10664 return(0); 10665 10666 for (ix = 0; ix < len;) { /* string is 0-terminated */ 10667 c = utf[ix]; 10668 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10669 if (c >= 0x20) 10670 ix++; 10671 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10672 ix++; 10673 else 10674 return(-ix); 10675 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10676 if (ix + 2 > len) return(ix); 10677 if ((utf[ix+1] & 0xc0 ) != 0x80) 10678 return(-ix); 10679 codepoint = (utf[ix] & 0x1f) << 6; 10680 codepoint |= utf[ix+1] & 0x3f; 10681 if (!xmlIsCharQ(codepoint)) 10682 return(-ix); 10683 ix += 2; 10684 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10685 if (ix + 3 > len) return(ix); 10686 if (((utf[ix+1] & 0xc0) != 0x80) || 10687 ((utf[ix+2] & 0xc0) != 0x80)) 10688 return(-ix); 10689 codepoint = (utf[ix] & 0xf) << 12; 10690 codepoint |= (utf[ix+1] & 0x3f) << 6; 10691 codepoint |= utf[ix+2] & 0x3f; 10692 if (!xmlIsCharQ(codepoint)) 10693 return(-ix); 10694 ix += 3; 10695 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10696 if (ix + 4 > len) return(ix); 10697 if (((utf[ix+1] & 0xc0) != 0x80) || 10698 ((utf[ix+2] & 0xc0) != 0x80) || 10699 ((utf[ix+3] & 0xc0) != 0x80)) 10700 return(-ix); 10701 codepoint = (utf[ix] & 0x7) << 18; 10702 codepoint |= (utf[ix+1] & 0x3f) << 12; 10703 codepoint |= (utf[ix+2] & 0x3f) << 6; 10704 codepoint |= utf[ix+3] & 0x3f; 10705 if (!xmlIsCharQ(codepoint)) 10706 return(-ix); 10707 ix += 4; 10708 } else /* unknown encoding */ 10709 return(-ix); 10710 } 10711 return(ix); 10712 } 10713 10714 /** 10715 * xmlParseTryOrFinish: 10716 * @ctxt: an XML parser context 10717 * @terminate: last chunk indicator 10718 * 10719 * Try to progress on parsing 10720 * 10721 * Returns zero if no parsing was possible 10722 */ 10723 static int 10724 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10725 int ret = 0; 10726 int avail, tlen; 10727 xmlChar cur, next; 10728 const xmlChar *lastlt, *lastgt; 10729 10730 if (ctxt->input == NULL) 10731 return(0); 10732 10733 #ifdef DEBUG_PUSH 10734 switch (ctxt->instate) { 10735 case XML_PARSER_EOF: 10736 xmlGenericError(xmlGenericErrorContext, 10737 "PP: try EOF\n"); break; 10738 case XML_PARSER_START: 10739 xmlGenericError(xmlGenericErrorContext, 10740 "PP: try START\n"); break; 10741 case XML_PARSER_MISC: 10742 xmlGenericError(xmlGenericErrorContext, 10743 "PP: try MISC\n");break; 10744 case XML_PARSER_COMMENT: 10745 xmlGenericError(xmlGenericErrorContext, 10746 "PP: try COMMENT\n");break; 10747 case XML_PARSER_PROLOG: 10748 xmlGenericError(xmlGenericErrorContext, 10749 "PP: try PROLOG\n");break; 10750 case XML_PARSER_START_TAG: 10751 xmlGenericError(xmlGenericErrorContext, 10752 "PP: try START_TAG\n");break; 10753 case XML_PARSER_CONTENT: 10754 xmlGenericError(xmlGenericErrorContext, 10755 "PP: try CONTENT\n");break; 10756 case XML_PARSER_CDATA_SECTION: 10757 xmlGenericError(xmlGenericErrorContext, 10758 "PP: try CDATA_SECTION\n");break; 10759 case XML_PARSER_END_TAG: 10760 xmlGenericError(xmlGenericErrorContext, 10761 "PP: try END_TAG\n");break; 10762 case XML_PARSER_ENTITY_DECL: 10763 xmlGenericError(xmlGenericErrorContext, 10764 "PP: try ENTITY_DECL\n");break; 10765 case XML_PARSER_ENTITY_VALUE: 10766 xmlGenericError(xmlGenericErrorContext, 10767 "PP: try ENTITY_VALUE\n");break; 10768 case XML_PARSER_ATTRIBUTE_VALUE: 10769 xmlGenericError(xmlGenericErrorContext, 10770 "PP: try ATTRIBUTE_VALUE\n");break; 10771 case XML_PARSER_DTD: 10772 xmlGenericError(xmlGenericErrorContext, 10773 "PP: try DTD\n");break; 10774 case XML_PARSER_EPILOG: 10775 xmlGenericError(xmlGenericErrorContext, 10776 "PP: try EPILOG\n");break; 10777 case XML_PARSER_PI: 10778 xmlGenericError(xmlGenericErrorContext, 10779 "PP: try PI\n");break; 10780 case XML_PARSER_IGNORE: 10781 xmlGenericError(xmlGenericErrorContext, 10782 "PP: try IGNORE\n");break; 10783 } 10784 #endif 10785 10786 if ((ctxt->input != NULL) && 10787 (ctxt->input->cur - ctxt->input->base > 4096)) { 10788 xmlSHRINK(ctxt); 10789 ctxt->checkIndex = 0; 10790 } 10791 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10792 10793 while (1) { 10794 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10795 return(0); 10796 10797 10798 /* 10799 * Pop-up of finished entities. 10800 */ 10801 while ((RAW == 0) && (ctxt->inputNr > 1)) 10802 xmlPopInput(ctxt); 10803 10804 if (ctxt->input == NULL) break; 10805 if (ctxt->input->buf == NULL) 10806 avail = ctxt->input->length - 10807 (ctxt->input->cur - ctxt->input->base); 10808 else { 10809 /* 10810 * If we are operating on converted input, try to flush 10811 * remainng chars to avoid them stalling in the non-converted 10812 * buffer. 10813 */ 10814 if ((ctxt->input->buf->raw != NULL) && 10815 (ctxt->input->buf->raw->use > 0)) { 10816 int base = ctxt->input->base - 10817 ctxt->input->buf->buffer->content; 10818 int current = ctxt->input->cur - ctxt->input->base; 10819 10820 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10821 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10822 ctxt->input->cur = ctxt->input->base + current; 10823 ctxt->input->end = 10824 &ctxt->input->buf->buffer->content[ 10825 ctxt->input->buf->buffer->use]; 10826 } 10827 avail = ctxt->input->buf->buffer->use - 10828 (ctxt->input->cur - ctxt->input->base); 10829 } 10830 if (avail < 1) 10831 goto done; 10832 switch (ctxt->instate) { 10833 case XML_PARSER_EOF: 10834 /* 10835 * Document parsing is done ! 10836 */ 10837 goto done; 10838 case XML_PARSER_START: 10839 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10840 xmlChar start[4]; 10841 xmlCharEncoding enc; 10842 10843 /* 10844 * Very first chars read from the document flow. 10845 */ 10846 if (avail < 4) 10847 goto done; 10848 10849 /* 10850 * Get the 4 first bytes and decode the charset 10851 * if enc != XML_CHAR_ENCODING_NONE 10852 * plug some encoding conversion routines, 10853 * else xmlSwitchEncoding will set to (default) 10854 * UTF8. 10855 */ 10856 start[0] = RAW; 10857 start[1] = NXT(1); 10858 start[2] = NXT(2); 10859 start[3] = NXT(3); 10860 enc = xmlDetectCharEncoding(start, 4); 10861 xmlSwitchEncoding(ctxt, enc); 10862 break; 10863 } 10864 10865 if (avail < 2) 10866 goto done; 10867 cur = ctxt->input->cur[0]; 10868 next = ctxt->input->cur[1]; 10869 if (cur == 0) { 10870 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10871 ctxt->sax->setDocumentLocator(ctxt->userData, 10872 &xmlDefaultSAXLocator); 10873 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10874 ctxt->instate = XML_PARSER_EOF; 10875 #ifdef DEBUG_PUSH 10876 xmlGenericError(xmlGenericErrorContext, 10877 "PP: entering EOF\n"); 10878 #endif 10879 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10880 ctxt->sax->endDocument(ctxt->userData); 10881 goto done; 10882 } 10883 if ((cur == '<') && (next == '?')) { 10884 /* PI or XML decl */ 10885 if (avail < 5) return(ret); 10886 if ((!terminate) && 10887 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10888 return(ret); 10889 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10890 ctxt->sax->setDocumentLocator(ctxt->userData, 10891 &xmlDefaultSAXLocator); 10892 if ((ctxt->input->cur[2] == 'x') && 10893 (ctxt->input->cur[3] == 'm') && 10894 (ctxt->input->cur[4] == 'l') && 10895 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10896 ret += 5; 10897 #ifdef DEBUG_PUSH 10898 xmlGenericError(xmlGenericErrorContext, 10899 "PP: Parsing XML Decl\n"); 10900 #endif 10901 xmlParseXMLDecl(ctxt); 10902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10903 /* 10904 * The XML REC instructs us to stop parsing right 10905 * here 10906 */ 10907 ctxt->instate = XML_PARSER_EOF; 10908 return(0); 10909 } 10910 ctxt->standalone = ctxt->input->standalone; 10911 if ((ctxt->encoding == NULL) && 10912 (ctxt->input->encoding != NULL)) 10913 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10914 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10915 (!ctxt->disableSAX)) 10916 ctxt->sax->startDocument(ctxt->userData); 10917 ctxt->instate = XML_PARSER_MISC; 10918 #ifdef DEBUG_PUSH 10919 xmlGenericError(xmlGenericErrorContext, 10920 "PP: entering MISC\n"); 10921 #endif 10922 } else { 10923 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10924 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10925 (!ctxt->disableSAX)) 10926 ctxt->sax->startDocument(ctxt->userData); 10927 ctxt->instate = XML_PARSER_MISC; 10928 #ifdef DEBUG_PUSH 10929 xmlGenericError(xmlGenericErrorContext, 10930 "PP: entering MISC\n"); 10931 #endif 10932 } 10933 } else { 10934 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10935 ctxt->sax->setDocumentLocator(ctxt->userData, 10936 &xmlDefaultSAXLocator); 10937 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10938 if (ctxt->version == NULL) { 10939 xmlErrMemory(ctxt, NULL); 10940 break; 10941 } 10942 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10943 (!ctxt->disableSAX)) 10944 ctxt->sax->startDocument(ctxt->userData); 10945 ctxt->instate = XML_PARSER_MISC; 10946 #ifdef DEBUG_PUSH 10947 xmlGenericError(xmlGenericErrorContext, 10948 "PP: entering MISC\n"); 10949 #endif 10950 } 10951 break; 10952 case XML_PARSER_START_TAG: { 10953 const xmlChar *name; 10954 const xmlChar *prefix = NULL; 10955 const xmlChar *URI = NULL; 10956 int nsNr = ctxt->nsNr; 10957 10958 if ((avail < 2) && (ctxt->inputNr == 1)) 10959 goto done; 10960 cur = ctxt->input->cur[0]; 10961 if (cur != '<') { 10962 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10963 ctxt->instate = XML_PARSER_EOF; 10964 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10965 ctxt->sax->endDocument(ctxt->userData); 10966 goto done; 10967 } 10968 if (!terminate) { 10969 if (ctxt->progressive) { 10970 /* > can be found unescaped in attribute values */ 10971 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10972 goto done; 10973 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10974 goto done; 10975 } 10976 } 10977 if (ctxt->spaceNr == 0) 10978 spacePush(ctxt, -1); 10979 else if (*ctxt->space == -2) 10980 spacePush(ctxt, -1); 10981 else 10982 spacePush(ctxt, *ctxt->space); 10983 #ifdef LIBXML_SAX1_ENABLED 10984 if (ctxt->sax2) 10985 #endif /* LIBXML_SAX1_ENABLED */ 10986 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10987 #ifdef LIBXML_SAX1_ENABLED 10988 else 10989 name = xmlParseStartTag(ctxt); 10990 #endif /* LIBXML_SAX1_ENABLED */ 10991 if (ctxt->instate == XML_PARSER_EOF) 10992 goto done; 10993 if (name == NULL) { 10994 spacePop(ctxt); 10995 ctxt->instate = XML_PARSER_EOF; 10996 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10997 ctxt->sax->endDocument(ctxt->userData); 10998 goto done; 10999 } 11000 #ifdef LIBXML_VALID_ENABLED 11001 /* 11002 * [ VC: Root Element Type ] 11003 * The Name in the document type declaration must match 11004 * the element type of the root element. 11005 */ 11006 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11007 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11008 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11009 #endif /* LIBXML_VALID_ENABLED */ 11010 11011 /* 11012 * Check for an Empty Element. 11013 */ 11014 if ((RAW == '/') && (NXT(1) == '>')) { 11015 SKIP(2); 11016 11017 if (ctxt->sax2) { 11018 if ((ctxt->sax != NULL) && 11019 (ctxt->sax->endElementNs != NULL) && 11020 (!ctxt->disableSAX)) 11021 ctxt->sax->endElementNs(ctxt->userData, name, 11022 prefix, URI); 11023 if (ctxt->nsNr - nsNr > 0) 11024 nsPop(ctxt, ctxt->nsNr - nsNr); 11025 #ifdef LIBXML_SAX1_ENABLED 11026 } else { 11027 if ((ctxt->sax != NULL) && 11028 (ctxt->sax->endElement != NULL) && 11029 (!ctxt->disableSAX)) 11030 ctxt->sax->endElement(ctxt->userData, name); 11031 #endif /* LIBXML_SAX1_ENABLED */ 11032 } 11033 spacePop(ctxt); 11034 if (ctxt->nameNr == 0) { 11035 ctxt->instate = XML_PARSER_EPILOG; 11036 } else { 11037 ctxt->instate = XML_PARSER_CONTENT; 11038 } 11039 break; 11040 } 11041 if (RAW == '>') { 11042 NEXT; 11043 } else { 11044 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11045 "Couldn't find end of Start Tag %s\n", 11046 name); 11047 nodePop(ctxt); 11048 spacePop(ctxt); 11049 } 11050 if (ctxt->sax2) 11051 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11052 #ifdef LIBXML_SAX1_ENABLED 11053 else 11054 namePush(ctxt, name); 11055 #endif /* LIBXML_SAX1_ENABLED */ 11056 11057 ctxt->instate = XML_PARSER_CONTENT; 11058 break; 11059 } 11060 case XML_PARSER_CONTENT: { 11061 const xmlChar *test; 11062 unsigned int cons; 11063 if ((avail < 2) && (ctxt->inputNr == 1)) 11064 goto done; 11065 cur = ctxt->input->cur[0]; 11066 next = ctxt->input->cur[1]; 11067 11068 test = CUR_PTR; 11069 cons = ctxt->input->consumed; 11070 if ((cur == '<') && (next == '/')) { 11071 ctxt->instate = XML_PARSER_END_TAG; 11072 break; 11073 } else if ((cur == '<') && (next == '?')) { 11074 if ((!terminate) && 11075 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11076 goto done; 11077 xmlParsePI(ctxt); 11078 } else if ((cur == '<') && (next != '!')) { 11079 ctxt->instate = XML_PARSER_START_TAG; 11080 break; 11081 } else if ((cur == '<') && (next == '!') && 11082 (ctxt->input->cur[2] == '-') && 11083 (ctxt->input->cur[3] == '-')) { 11084 int term; 11085 11086 if (avail < 4) 11087 goto done; 11088 ctxt->input->cur += 4; 11089 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11090 ctxt->input->cur -= 4; 11091 if ((!terminate) && (term < 0)) 11092 goto done; 11093 xmlParseComment(ctxt); 11094 ctxt->instate = XML_PARSER_CONTENT; 11095 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11096 (ctxt->input->cur[2] == '[') && 11097 (ctxt->input->cur[3] == 'C') && 11098 (ctxt->input->cur[4] == 'D') && 11099 (ctxt->input->cur[5] == 'A') && 11100 (ctxt->input->cur[6] == 'T') && 11101 (ctxt->input->cur[7] == 'A') && 11102 (ctxt->input->cur[8] == '[')) { 11103 SKIP(9); 11104 ctxt->instate = XML_PARSER_CDATA_SECTION; 11105 break; 11106 } else if ((cur == '<') && (next == '!') && 11107 (avail < 9)) { 11108 goto done; 11109 } else if (cur == '&') { 11110 if ((!terminate) && 11111 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11112 goto done; 11113 xmlParseReference(ctxt); 11114 } else { 11115 /* TODO Avoid the extra copy, handle directly !!! */ 11116 /* 11117 * Goal of the following test is: 11118 * - minimize calls to the SAX 'character' callback 11119 * when they are mergeable 11120 * - handle an problem for isBlank when we only parse 11121 * a sequence of blank chars and the next one is 11122 * not available to check against '<' presence. 11123 * - tries to homogenize the differences in SAX 11124 * callbacks between the push and pull versions 11125 * of the parser. 11126 */ 11127 if ((ctxt->inputNr == 1) && 11128 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11129 if (!terminate) { 11130 if (ctxt->progressive) { 11131 if ((lastlt == NULL) || 11132 (ctxt->input->cur > lastlt)) 11133 goto done; 11134 } else if (xmlParseLookupSequence(ctxt, 11135 '<', 0, 0) < 0) { 11136 goto done; 11137 } 11138 } 11139 } 11140 ctxt->checkIndex = 0; 11141 xmlParseCharData(ctxt, 0); 11142 } 11143 /* 11144 * Pop-up of finished entities. 11145 */ 11146 while ((RAW == 0) && (ctxt->inputNr > 1)) 11147 xmlPopInput(ctxt); 11148 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11149 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11150 "detected an error in element content\n"); 11151 ctxt->instate = XML_PARSER_EOF; 11152 break; 11153 } 11154 break; 11155 } 11156 case XML_PARSER_END_TAG: 11157 if (avail < 2) 11158 goto done; 11159 if (!terminate) { 11160 if (ctxt->progressive) { 11161 /* > can be found unescaped in attribute values */ 11162 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11163 goto done; 11164 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11165 goto done; 11166 } 11167 } 11168 if (ctxt->sax2) { 11169 xmlParseEndTag2(ctxt, 11170 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11171 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11172 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11173 nameNsPop(ctxt); 11174 } 11175 #ifdef LIBXML_SAX1_ENABLED 11176 else 11177 xmlParseEndTag1(ctxt, 0); 11178 #endif /* LIBXML_SAX1_ENABLED */ 11179 if (ctxt->instate == XML_PARSER_EOF) { 11180 /* Nothing */ 11181 } else if (ctxt->nameNr == 0) { 11182 ctxt->instate = XML_PARSER_EPILOG; 11183 } else { 11184 ctxt->instate = XML_PARSER_CONTENT; 11185 } 11186 break; 11187 case XML_PARSER_CDATA_SECTION: { 11188 /* 11189 * The Push mode need to have the SAX callback for 11190 * cdataBlock merge back contiguous callbacks. 11191 */ 11192 int base; 11193 11194 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11195 if (base < 0) { 11196 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11197 int tmp; 11198 11199 tmp = xmlCheckCdataPush(ctxt->input->cur, 11200 XML_PARSER_BIG_BUFFER_SIZE); 11201 if (tmp < 0) { 11202 tmp = -tmp; 11203 ctxt->input->cur += tmp; 11204 goto encoding_error; 11205 } 11206 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11207 if (ctxt->sax->cdataBlock != NULL) 11208 ctxt->sax->cdataBlock(ctxt->userData, 11209 ctxt->input->cur, tmp); 11210 else if (ctxt->sax->characters != NULL) 11211 ctxt->sax->characters(ctxt->userData, 11212 ctxt->input->cur, tmp); 11213 } 11214 SKIPL(tmp); 11215 ctxt->checkIndex = 0; 11216 } 11217 goto done; 11218 } else { 11219 int tmp; 11220 11221 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11222 if ((tmp < 0) || (tmp != base)) { 11223 tmp = -tmp; 11224 ctxt->input->cur += tmp; 11225 goto encoding_error; 11226 } 11227 if ((ctxt->sax != NULL) && (base == 0) && 11228 (ctxt->sax->cdataBlock != NULL) && 11229 (!ctxt->disableSAX)) { 11230 /* 11231 * Special case to provide identical behaviour 11232 * between pull and push parsers on enpty CDATA 11233 * sections 11234 */ 11235 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11236 (!strncmp((const char *)&ctxt->input->cur[-9], 11237 "<![CDATA[", 9))) 11238 ctxt->sax->cdataBlock(ctxt->userData, 11239 BAD_CAST "", 0); 11240 } else if ((ctxt->sax != NULL) && (base > 0) && 11241 (!ctxt->disableSAX)) { 11242 if (ctxt->sax->cdataBlock != NULL) 11243 ctxt->sax->cdataBlock(ctxt->userData, 11244 ctxt->input->cur, base); 11245 else if (ctxt->sax->characters != NULL) 11246 ctxt->sax->characters(ctxt->userData, 11247 ctxt->input->cur, base); 11248 } 11249 SKIPL(base + 3); 11250 ctxt->checkIndex = 0; 11251 ctxt->instate = XML_PARSER_CONTENT; 11252 #ifdef DEBUG_PUSH 11253 xmlGenericError(xmlGenericErrorContext, 11254 "PP: entering CONTENT\n"); 11255 #endif 11256 } 11257 break; 11258 } 11259 case XML_PARSER_MISC: 11260 SKIP_BLANKS; 11261 if (ctxt->input->buf == NULL) 11262 avail = ctxt->input->length - 11263 (ctxt->input->cur - ctxt->input->base); 11264 else 11265 avail = ctxt->input->buf->buffer->use - 11266 (ctxt->input->cur - ctxt->input->base); 11267 if (avail < 2) 11268 goto done; 11269 cur = ctxt->input->cur[0]; 11270 next = ctxt->input->cur[1]; 11271 if ((cur == '<') && (next == '?')) { 11272 if ((!terminate) && 11273 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11274 goto done; 11275 #ifdef DEBUG_PUSH 11276 xmlGenericError(xmlGenericErrorContext, 11277 "PP: Parsing PI\n"); 11278 #endif 11279 xmlParsePI(ctxt); 11280 ctxt->checkIndex = 0; 11281 } else if ((cur == '<') && (next == '!') && 11282 (ctxt->input->cur[2] == '-') && 11283 (ctxt->input->cur[3] == '-')) { 11284 if ((!terminate) && 11285 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11286 goto done; 11287 #ifdef DEBUG_PUSH 11288 xmlGenericError(xmlGenericErrorContext, 11289 "PP: Parsing Comment\n"); 11290 #endif 11291 xmlParseComment(ctxt); 11292 ctxt->instate = XML_PARSER_MISC; 11293 ctxt->checkIndex = 0; 11294 } else if ((cur == '<') && (next == '!') && 11295 (ctxt->input->cur[2] == 'D') && 11296 (ctxt->input->cur[3] == 'O') && 11297 (ctxt->input->cur[4] == 'C') && 11298 (ctxt->input->cur[5] == 'T') && 11299 (ctxt->input->cur[6] == 'Y') && 11300 (ctxt->input->cur[7] == 'P') && 11301 (ctxt->input->cur[8] == 'E')) { 11302 if ((!terminate) && 11303 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11304 goto done; 11305 #ifdef DEBUG_PUSH 11306 xmlGenericError(xmlGenericErrorContext, 11307 "PP: Parsing internal subset\n"); 11308 #endif 11309 ctxt->inSubset = 1; 11310 xmlParseDocTypeDecl(ctxt); 11311 if (RAW == '[') { 11312 ctxt->instate = XML_PARSER_DTD; 11313 #ifdef DEBUG_PUSH 11314 xmlGenericError(xmlGenericErrorContext, 11315 "PP: entering DTD\n"); 11316 #endif 11317 } else { 11318 /* 11319 * Create and update the external subset. 11320 */ 11321 ctxt->inSubset = 2; 11322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11323 (ctxt->sax->externalSubset != NULL)) 11324 ctxt->sax->externalSubset(ctxt->userData, 11325 ctxt->intSubName, ctxt->extSubSystem, 11326 ctxt->extSubURI); 11327 ctxt->inSubset = 0; 11328 xmlCleanSpecialAttr(ctxt); 11329 ctxt->instate = XML_PARSER_PROLOG; 11330 #ifdef DEBUG_PUSH 11331 xmlGenericError(xmlGenericErrorContext, 11332 "PP: entering PROLOG\n"); 11333 #endif 11334 } 11335 } else if ((cur == '<') && (next == '!') && 11336 (avail < 9)) { 11337 goto done; 11338 } else { 11339 ctxt->instate = XML_PARSER_START_TAG; 11340 ctxt->progressive = 1; 11341 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11342 #ifdef DEBUG_PUSH 11343 xmlGenericError(xmlGenericErrorContext, 11344 "PP: entering START_TAG\n"); 11345 #endif 11346 } 11347 break; 11348 case XML_PARSER_PROLOG: 11349 SKIP_BLANKS; 11350 if (ctxt->input->buf == NULL) 11351 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11352 else 11353 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11354 if (avail < 2) 11355 goto done; 11356 cur = ctxt->input->cur[0]; 11357 next = ctxt->input->cur[1]; 11358 if ((cur == '<') && (next == '?')) { 11359 if ((!terminate) && 11360 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11361 goto done; 11362 #ifdef DEBUG_PUSH 11363 xmlGenericError(xmlGenericErrorContext, 11364 "PP: Parsing PI\n"); 11365 #endif 11366 xmlParsePI(ctxt); 11367 } else if ((cur == '<') && (next == '!') && 11368 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11369 if ((!terminate) && 11370 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11371 goto done; 11372 #ifdef DEBUG_PUSH 11373 xmlGenericError(xmlGenericErrorContext, 11374 "PP: Parsing Comment\n"); 11375 #endif 11376 xmlParseComment(ctxt); 11377 ctxt->instate = XML_PARSER_PROLOG; 11378 } else if ((cur == '<') && (next == '!') && 11379 (avail < 4)) { 11380 goto done; 11381 } else { 11382 ctxt->instate = XML_PARSER_START_TAG; 11383 if (ctxt->progressive == 0) 11384 ctxt->progressive = 1; 11385 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11386 #ifdef DEBUG_PUSH 11387 xmlGenericError(xmlGenericErrorContext, 11388 "PP: entering START_TAG\n"); 11389 #endif 11390 } 11391 break; 11392 case XML_PARSER_EPILOG: 11393 SKIP_BLANKS; 11394 if (ctxt->input->buf == NULL) 11395 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11396 else 11397 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11398 if (avail < 2) 11399 goto done; 11400 cur = ctxt->input->cur[0]; 11401 next = ctxt->input->cur[1]; 11402 if ((cur == '<') && (next == '?')) { 11403 if ((!terminate) && 11404 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11405 goto done; 11406 #ifdef DEBUG_PUSH 11407 xmlGenericError(xmlGenericErrorContext, 11408 "PP: Parsing PI\n"); 11409 #endif 11410 xmlParsePI(ctxt); 11411 ctxt->instate = XML_PARSER_EPILOG; 11412 } else if ((cur == '<') && (next == '!') && 11413 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11414 if ((!terminate) && 11415 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11416 goto done; 11417 #ifdef DEBUG_PUSH 11418 xmlGenericError(xmlGenericErrorContext, 11419 "PP: Parsing Comment\n"); 11420 #endif 11421 xmlParseComment(ctxt); 11422 ctxt->instate = XML_PARSER_EPILOG; 11423 } else if ((cur == '<') && (next == '!') && 11424 (avail < 4)) { 11425 goto done; 11426 } else { 11427 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11428 ctxt->instate = XML_PARSER_EOF; 11429 #ifdef DEBUG_PUSH 11430 xmlGenericError(xmlGenericErrorContext, 11431 "PP: entering EOF\n"); 11432 #endif 11433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11434 ctxt->sax->endDocument(ctxt->userData); 11435 goto done; 11436 } 11437 break; 11438 case XML_PARSER_DTD: { 11439 /* 11440 * Sorry but progressive parsing of the internal subset 11441 * is not expected to be supported. We first check that 11442 * the full content of the internal subset is available and 11443 * the parsing is launched only at that point. 11444 * Internal subset ends up with "']' S? '>'" in an unescaped 11445 * section and not in a ']]>' sequence which are conditional 11446 * sections (whoever argued to keep that crap in XML deserve 11447 * a place in hell !). 11448 */ 11449 int base, i; 11450 xmlChar *buf; 11451 xmlChar quote = 0; 11452 11453 base = ctxt->input->cur - ctxt->input->base; 11454 if (base < 0) return(0); 11455 if (ctxt->checkIndex > base) 11456 base = ctxt->checkIndex; 11457 buf = ctxt->input->buf->buffer->content; 11458 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11459 base++) { 11460 if (quote != 0) { 11461 if (buf[base] == quote) 11462 quote = 0; 11463 continue; 11464 } 11465 if ((quote == 0) && (buf[base] == '<')) { 11466 int found = 0; 11467 /* special handling of comments */ 11468 if (((unsigned int) base + 4 < 11469 ctxt->input->buf->buffer->use) && 11470 (buf[base + 1] == '!') && 11471 (buf[base + 2] == '-') && 11472 (buf[base + 3] == '-')) { 11473 for (;(unsigned int) base + 3 < 11474 ctxt->input->buf->buffer->use; base++) { 11475 if ((buf[base] == '-') && 11476 (buf[base + 1] == '-') && 11477 (buf[base + 2] == '>')) { 11478 found = 1; 11479 base += 2; 11480 break; 11481 } 11482 } 11483 if (!found) { 11484 #if 0 11485 fprintf(stderr, "unfinished comment\n"); 11486 #endif 11487 break; /* for */ 11488 } 11489 continue; 11490 } 11491 } 11492 if (buf[base] == '"') { 11493 quote = '"'; 11494 continue; 11495 } 11496 if (buf[base] == '\'') { 11497 quote = '\''; 11498 continue; 11499 } 11500 if (buf[base] == ']') { 11501 #if 0 11502 fprintf(stderr, "%c%c%c%c: ", buf[base], 11503 buf[base + 1], buf[base + 2], buf[base + 3]); 11504 #endif 11505 if ((unsigned int) base +1 >= 11506 ctxt->input->buf->buffer->use) 11507 break; 11508 if (buf[base + 1] == ']') { 11509 /* conditional crap, skip both ']' ! */ 11510 base++; 11511 continue; 11512 } 11513 for (i = 1; 11514 (unsigned int) base + i < ctxt->input->buf->buffer->use; 11515 i++) { 11516 if (buf[base + i] == '>') { 11517 #if 0 11518 fprintf(stderr, "found\n"); 11519 #endif 11520 goto found_end_int_subset; 11521 } 11522 if (!IS_BLANK_CH(buf[base + i])) { 11523 #if 0 11524 fprintf(stderr, "not found\n"); 11525 #endif 11526 goto not_end_of_int_subset; 11527 } 11528 } 11529 #if 0 11530 fprintf(stderr, "end of stream\n"); 11531 #endif 11532 break; 11533 11534 } 11535 not_end_of_int_subset: 11536 continue; /* for */ 11537 } 11538 /* 11539 * We didn't found the end of the Internal subset 11540 */ 11541 #ifdef DEBUG_PUSH 11542 if (next == 0) 11543 xmlGenericError(xmlGenericErrorContext, 11544 "PP: lookup of int subset end filed\n"); 11545 #endif 11546 goto done; 11547 11548 found_end_int_subset: 11549 xmlParseInternalSubset(ctxt); 11550 ctxt->inSubset = 2; 11551 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11552 (ctxt->sax->externalSubset != NULL)) 11553 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11554 ctxt->extSubSystem, ctxt->extSubURI); 11555 ctxt->inSubset = 0; 11556 xmlCleanSpecialAttr(ctxt); 11557 ctxt->instate = XML_PARSER_PROLOG; 11558 ctxt->checkIndex = 0; 11559 #ifdef DEBUG_PUSH 11560 xmlGenericError(xmlGenericErrorContext, 11561 "PP: entering PROLOG\n"); 11562 #endif 11563 break; 11564 } 11565 case XML_PARSER_COMMENT: 11566 xmlGenericError(xmlGenericErrorContext, 11567 "PP: internal error, state == COMMENT\n"); 11568 ctxt->instate = XML_PARSER_CONTENT; 11569 #ifdef DEBUG_PUSH 11570 xmlGenericError(xmlGenericErrorContext, 11571 "PP: entering CONTENT\n"); 11572 #endif 11573 break; 11574 case XML_PARSER_IGNORE: 11575 xmlGenericError(xmlGenericErrorContext, 11576 "PP: internal error, state == IGNORE"); 11577 ctxt->instate = XML_PARSER_DTD; 11578 #ifdef DEBUG_PUSH 11579 xmlGenericError(xmlGenericErrorContext, 11580 "PP: entering DTD\n"); 11581 #endif 11582 break; 11583 case XML_PARSER_PI: 11584 xmlGenericError(xmlGenericErrorContext, 11585 "PP: internal error, state == PI\n"); 11586 ctxt->instate = XML_PARSER_CONTENT; 11587 #ifdef DEBUG_PUSH 11588 xmlGenericError(xmlGenericErrorContext, 11589 "PP: entering CONTENT\n"); 11590 #endif 11591 break; 11592 case XML_PARSER_ENTITY_DECL: 11593 xmlGenericError(xmlGenericErrorContext, 11594 "PP: internal error, state == ENTITY_DECL\n"); 11595 ctxt->instate = XML_PARSER_DTD; 11596 #ifdef DEBUG_PUSH 11597 xmlGenericError(xmlGenericErrorContext, 11598 "PP: entering DTD\n"); 11599 #endif 11600 break; 11601 case XML_PARSER_ENTITY_VALUE: 11602 xmlGenericError(xmlGenericErrorContext, 11603 "PP: internal error, state == ENTITY_VALUE\n"); 11604 ctxt->instate = XML_PARSER_CONTENT; 11605 #ifdef DEBUG_PUSH 11606 xmlGenericError(xmlGenericErrorContext, 11607 "PP: entering DTD\n"); 11608 #endif 11609 break; 11610 case XML_PARSER_ATTRIBUTE_VALUE: 11611 xmlGenericError(xmlGenericErrorContext, 11612 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11613 ctxt->instate = XML_PARSER_START_TAG; 11614 #ifdef DEBUG_PUSH 11615 xmlGenericError(xmlGenericErrorContext, 11616 "PP: entering START_TAG\n"); 11617 #endif 11618 break; 11619 case XML_PARSER_SYSTEM_LITERAL: 11620 xmlGenericError(xmlGenericErrorContext, 11621 "PP: internal error, state == SYSTEM_LITERAL\n"); 11622 ctxt->instate = XML_PARSER_START_TAG; 11623 #ifdef DEBUG_PUSH 11624 xmlGenericError(xmlGenericErrorContext, 11625 "PP: entering START_TAG\n"); 11626 #endif 11627 break; 11628 case XML_PARSER_PUBLIC_LITERAL: 11629 xmlGenericError(xmlGenericErrorContext, 11630 "PP: internal error, state == PUBLIC_LITERAL\n"); 11631 ctxt->instate = XML_PARSER_START_TAG; 11632 #ifdef DEBUG_PUSH 11633 xmlGenericError(xmlGenericErrorContext, 11634 "PP: entering START_TAG\n"); 11635 #endif 11636 break; 11637 } 11638 } 11639 done: 11640 #ifdef DEBUG_PUSH 11641 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11642 #endif 11643 return(ret); 11644 encoding_error: 11645 { 11646 char buffer[150]; 11647 11648 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11649 ctxt->input->cur[0], ctxt->input->cur[1], 11650 ctxt->input->cur[2], ctxt->input->cur[3]); 11651 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11652 "Input is not proper UTF-8, indicate encoding !\n%s", 11653 BAD_CAST buffer, NULL); 11654 } 11655 return(0); 11656 } 11657 11658 /** 11659 * xmlParseChunk: 11660 * @ctxt: an XML parser context 11661 * @chunk: an char array 11662 * @size: the size in byte of the chunk 11663 * @terminate: last chunk indicator 11664 * 11665 * Parse a Chunk of memory 11666 * 11667 * Returns zero if no error, the xmlParserErrors otherwise. 11668 */ 11669 int 11670 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11671 int terminate) { 11672 int end_in_lf = 0; 11673 int remain = 0; 11674 11675 if (ctxt == NULL) 11676 return(XML_ERR_INTERNAL_ERROR); 11677 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11678 return(ctxt->errNo); 11679 if (ctxt->instate == XML_PARSER_START) 11680 xmlDetectSAX2(ctxt); 11681 if ((size > 0) && (chunk != NULL) && (!terminate) && 11682 (chunk[size - 1] == '\r')) { 11683 end_in_lf = 1; 11684 size--; 11685 } 11686 11687 xmldecl_done: 11688 11689 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11690 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11691 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11692 int cur = ctxt->input->cur - ctxt->input->base; 11693 int res; 11694 11695 /* 11696 * Specific handling if we autodetected an encoding, we should not 11697 * push more than the first line ... which depend on the encoding 11698 * And only push the rest once the final encoding was detected 11699 */ 11700 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 11701 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 11702 unsigned int len = 45; 11703 11704 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11705 BAD_CAST "UTF-16")) || 11706 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11707 BAD_CAST "UTF16"))) 11708 len = 90; 11709 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11710 BAD_CAST "UCS-4")) || 11711 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11712 BAD_CAST "UCS4"))) 11713 len = 180; 11714 11715 if (ctxt->input->buf->rawconsumed < len) 11716 len -= ctxt->input->buf->rawconsumed; 11717 11718 /* 11719 * Change size for reading the initial declaration only 11720 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 11721 * will blindly copy extra bytes from memory. 11722 */ 11723 if ((unsigned int) size > len) { 11724 remain = size - len; 11725 size = len; 11726 } else { 11727 remain = 0; 11728 } 11729 } 11730 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11731 if (res < 0) { 11732 ctxt->errNo = XML_PARSER_EOF; 11733 ctxt->disableSAX = 1; 11734 return (XML_PARSER_EOF); 11735 } 11736 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11737 ctxt->input->cur = ctxt->input->base + cur; 11738 ctxt->input->end = 11739 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11740 #ifdef DEBUG_PUSH 11741 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11742 #endif 11743 11744 } else if (ctxt->instate != XML_PARSER_EOF) { 11745 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11746 xmlParserInputBufferPtr in = ctxt->input->buf; 11747 if ((in->encoder != NULL) && (in->buffer != NULL) && 11748 (in->raw != NULL)) { 11749 int nbchars; 11750 11751 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11752 if (nbchars < 0) { 11753 /* TODO 2.6.0 */ 11754 xmlGenericError(xmlGenericErrorContext, 11755 "xmlParseChunk: encoder error\n"); 11756 return(XML_ERR_INVALID_ENCODING); 11757 } 11758 } 11759 } 11760 } 11761 if (remain != 0) 11762 xmlParseTryOrFinish(ctxt, 0); 11763 else 11764 xmlParseTryOrFinish(ctxt, terminate); 11765 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11766 return(ctxt->errNo); 11767 11768 if (remain != 0) { 11769 chunk += size; 11770 size = remain; 11771 remain = 0; 11772 goto xmldecl_done; 11773 } 11774 if ((end_in_lf == 1) && (ctxt->input != NULL) && 11775 (ctxt->input->buf != NULL)) { 11776 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11777 } 11778 if (terminate) { 11779 /* 11780 * Check for termination 11781 */ 11782 int avail = 0; 11783 11784 if (ctxt->input != NULL) { 11785 if (ctxt->input->buf == NULL) 11786 avail = ctxt->input->length - 11787 (ctxt->input->cur - ctxt->input->base); 11788 else 11789 avail = ctxt->input->buf->buffer->use - 11790 (ctxt->input->cur - ctxt->input->base); 11791 } 11792 11793 if ((ctxt->instate != XML_PARSER_EOF) && 11794 (ctxt->instate != XML_PARSER_EPILOG)) { 11795 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11796 } 11797 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11798 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11799 } 11800 if (ctxt->instate != XML_PARSER_EOF) { 11801 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11802 ctxt->sax->endDocument(ctxt->userData); 11803 } 11804 ctxt->instate = XML_PARSER_EOF; 11805 } 11806 return((xmlParserErrors) ctxt->errNo); 11807 } 11808 11809 /************************************************************************ 11810 * * 11811 * I/O front end functions to the parser * 11812 * * 11813 ************************************************************************/ 11814 11815 /** 11816 * xmlCreatePushParserCtxt: 11817 * @sax: a SAX handler 11818 * @user_data: The user data returned on SAX callbacks 11819 * @chunk: a pointer to an array of chars 11820 * @size: number of chars in the array 11821 * @filename: an optional file name or URI 11822 * 11823 * Create a parser context for using the XML parser in push mode. 11824 * If @buffer and @size are non-NULL, the data is used to detect 11825 * the encoding. The remaining characters will be parsed so they 11826 * don't need to be fed in again through xmlParseChunk. 11827 * To allow content encoding detection, @size should be >= 4 11828 * The value of @filename is used for fetching external entities 11829 * and error/warning reports. 11830 * 11831 * Returns the new parser context or NULL 11832 */ 11833 11834 xmlParserCtxtPtr 11835 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11836 const char *chunk, int size, const char *filename) { 11837 xmlParserCtxtPtr ctxt; 11838 xmlParserInputPtr inputStream; 11839 xmlParserInputBufferPtr buf; 11840 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11841 11842 /* 11843 * plug some encoding conversion routines 11844 */ 11845 if ((chunk != NULL) && (size >= 4)) 11846 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11847 11848 buf = xmlAllocParserInputBuffer(enc); 11849 if (buf == NULL) return(NULL); 11850 11851 ctxt = xmlNewParserCtxt(); 11852 if (ctxt == NULL) { 11853 xmlErrMemory(NULL, "creating parser: out of memory\n"); 11854 xmlFreeParserInputBuffer(buf); 11855 return(NULL); 11856 } 11857 ctxt->dictNames = 1; 11858 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11859 if (ctxt->pushTab == NULL) { 11860 xmlErrMemory(ctxt, NULL); 11861 xmlFreeParserInputBuffer(buf); 11862 xmlFreeParserCtxt(ctxt); 11863 return(NULL); 11864 } 11865 if (sax != NULL) { 11866 #ifdef LIBXML_SAX1_ENABLED 11867 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11868 #endif /* LIBXML_SAX1_ENABLED */ 11869 xmlFree(ctxt->sax); 11870 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11871 if (ctxt->sax == NULL) { 11872 xmlErrMemory(ctxt, NULL); 11873 xmlFreeParserInputBuffer(buf); 11874 xmlFreeParserCtxt(ctxt); 11875 return(NULL); 11876 } 11877 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11878 if (sax->initialized == XML_SAX2_MAGIC) 11879 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11880 else 11881 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11882 if (user_data != NULL) 11883 ctxt->userData = user_data; 11884 } 11885 if (filename == NULL) { 11886 ctxt->directory = NULL; 11887 } else { 11888 ctxt->directory = xmlParserGetDirectory(filename); 11889 } 11890 11891 inputStream = xmlNewInputStream(ctxt); 11892 if (inputStream == NULL) { 11893 xmlFreeParserCtxt(ctxt); 11894 xmlFreeParserInputBuffer(buf); 11895 return(NULL); 11896 } 11897 11898 if (filename == NULL) 11899 inputStream->filename = NULL; 11900 else { 11901 inputStream->filename = (char *) 11902 xmlCanonicPath((const xmlChar *) filename); 11903 if (inputStream->filename == NULL) { 11904 xmlFreeParserCtxt(ctxt); 11905 xmlFreeParserInputBuffer(buf); 11906 return(NULL); 11907 } 11908 } 11909 inputStream->buf = buf; 11910 inputStream->base = inputStream->buf->buffer->content; 11911 inputStream->cur = inputStream->buf->buffer->content; 11912 inputStream->end = 11913 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11914 11915 inputPush(ctxt, inputStream); 11916 11917 /* 11918 * If the caller didn't provide an initial 'chunk' for determining 11919 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11920 * that it can be automatically determined later 11921 */ 11922 if ((size == 0) || (chunk == NULL)) { 11923 ctxt->charset = XML_CHAR_ENCODING_NONE; 11924 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11925 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11926 int cur = ctxt->input->cur - ctxt->input->base; 11927 11928 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11929 11930 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11931 ctxt->input->cur = ctxt->input->base + cur; 11932 ctxt->input->end = 11933 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11934 #ifdef DEBUG_PUSH 11935 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11936 #endif 11937 } 11938 11939 if (enc != XML_CHAR_ENCODING_NONE) { 11940 xmlSwitchEncoding(ctxt, enc); 11941 } 11942 11943 return(ctxt); 11944 } 11945 #endif /* LIBXML_PUSH_ENABLED */ 11946 11947 /** 11948 * xmlStopParser: 11949 * @ctxt: an XML parser context 11950 * 11951 * Blocks further parser processing 11952 */ 11953 void 11954 xmlStopParser(xmlParserCtxtPtr ctxt) { 11955 if (ctxt == NULL) 11956 return; 11957 ctxt->instate = XML_PARSER_EOF; 11958 ctxt->disableSAX = 1; 11959 if (ctxt->input != NULL) { 11960 ctxt->input->cur = BAD_CAST""; 11961 ctxt->input->base = ctxt->input->cur; 11962 } 11963 } 11964 11965 /** 11966 * xmlCreateIOParserCtxt: 11967 * @sax: a SAX handler 11968 * @user_data: The user data returned on SAX callbacks 11969 * @ioread: an I/O read function 11970 * @ioclose: an I/O close function 11971 * @ioctx: an I/O handler 11972 * @enc: the charset encoding if known 11973 * 11974 * Create a parser context for using the XML parser with an existing 11975 * I/O stream 11976 * 11977 * Returns the new parser context or NULL 11978 */ 11979 xmlParserCtxtPtr 11980 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11981 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11982 void *ioctx, xmlCharEncoding enc) { 11983 xmlParserCtxtPtr ctxt; 11984 xmlParserInputPtr inputStream; 11985 xmlParserInputBufferPtr buf; 11986 11987 if (ioread == NULL) return(NULL); 11988 11989 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11990 if (buf == NULL) return(NULL); 11991 11992 ctxt = xmlNewParserCtxt(); 11993 if (ctxt == NULL) { 11994 xmlFreeParserInputBuffer(buf); 11995 return(NULL); 11996 } 11997 if (sax != NULL) { 11998 #ifdef LIBXML_SAX1_ENABLED 11999 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12000 #endif /* LIBXML_SAX1_ENABLED */ 12001 xmlFree(ctxt->sax); 12002 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12003 if (ctxt->sax == NULL) { 12004 xmlErrMemory(ctxt, NULL); 12005 xmlFreeParserCtxt(ctxt); 12006 return(NULL); 12007 } 12008 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12009 if (sax->initialized == XML_SAX2_MAGIC) 12010 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12011 else 12012 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12013 if (user_data != NULL) 12014 ctxt->userData = user_data; 12015 } 12016 12017 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12018 if (inputStream == NULL) { 12019 xmlFreeParserCtxt(ctxt); 12020 return(NULL); 12021 } 12022 inputPush(ctxt, inputStream); 12023 12024 return(ctxt); 12025 } 12026 12027 #ifdef LIBXML_VALID_ENABLED 12028 /************************************************************************ 12029 * * 12030 * Front ends when parsing a DTD * 12031 * * 12032 ************************************************************************/ 12033 12034 /** 12035 * xmlIOParseDTD: 12036 * @sax: the SAX handler block or NULL 12037 * @input: an Input Buffer 12038 * @enc: the charset encoding if known 12039 * 12040 * Load and parse a DTD 12041 * 12042 * Returns the resulting xmlDtdPtr or NULL in case of error. 12043 * @input will be freed by the function in any case. 12044 */ 12045 12046 xmlDtdPtr 12047 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12048 xmlCharEncoding enc) { 12049 xmlDtdPtr ret = NULL; 12050 xmlParserCtxtPtr ctxt; 12051 xmlParserInputPtr pinput = NULL; 12052 xmlChar start[4]; 12053 12054 if (input == NULL) 12055 return(NULL); 12056 12057 ctxt = xmlNewParserCtxt(); 12058 if (ctxt == NULL) { 12059 xmlFreeParserInputBuffer(input); 12060 return(NULL); 12061 } 12062 12063 /* 12064 * Set-up the SAX context 12065 */ 12066 if (sax != NULL) { 12067 if (ctxt->sax != NULL) 12068 xmlFree(ctxt->sax); 12069 ctxt->sax = sax; 12070 ctxt->userData = ctxt; 12071 } 12072 xmlDetectSAX2(ctxt); 12073 12074 /* 12075 * generate a parser input from the I/O handler 12076 */ 12077 12078 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12079 if (pinput == NULL) { 12080 if (sax != NULL) ctxt->sax = NULL; 12081 xmlFreeParserInputBuffer(input); 12082 xmlFreeParserCtxt(ctxt); 12083 return(NULL); 12084 } 12085 12086 /* 12087 * plug some encoding conversion routines here. 12088 */ 12089 if (xmlPushInput(ctxt, pinput) < 0) { 12090 if (sax != NULL) ctxt->sax = NULL; 12091 xmlFreeParserCtxt(ctxt); 12092 return(NULL); 12093 } 12094 if (enc != XML_CHAR_ENCODING_NONE) { 12095 xmlSwitchEncoding(ctxt, enc); 12096 } 12097 12098 pinput->filename = NULL; 12099 pinput->line = 1; 12100 pinput->col = 1; 12101 pinput->base = ctxt->input->cur; 12102 pinput->cur = ctxt->input->cur; 12103 pinput->free = NULL; 12104 12105 /* 12106 * let's parse that entity knowing it's an external subset. 12107 */ 12108 ctxt->inSubset = 2; 12109 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12110 if (ctxt->myDoc == NULL) { 12111 xmlErrMemory(ctxt, "New Doc failed"); 12112 return(NULL); 12113 } 12114 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12115 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12116 BAD_CAST "none", BAD_CAST "none"); 12117 12118 if ((enc == XML_CHAR_ENCODING_NONE) && 12119 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12120 /* 12121 * Get the 4 first bytes and decode the charset 12122 * if enc != XML_CHAR_ENCODING_NONE 12123 * plug some encoding conversion routines. 12124 */ 12125 start[0] = RAW; 12126 start[1] = NXT(1); 12127 start[2] = NXT(2); 12128 start[3] = NXT(3); 12129 enc = xmlDetectCharEncoding(start, 4); 12130 if (enc != XML_CHAR_ENCODING_NONE) { 12131 xmlSwitchEncoding(ctxt, enc); 12132 } 12133 } 12134 12135 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12136 12137 if (ctxt->myDoc != NULL) { 12138 if (ctxt->wellFormed) { 12139 ret = ctxt->myDoc->extSubset; 12140 ctxt->myDoc->extSubset = NULL; 12141 if (ret != NULL) { 12142 xmlNodePtr tmp; 12143 12144 ret->doc = NULL; 12145 tmp = ret->children; 12146 while (tmp != NULL) { 12147 tmp->doc = NULL; 12148 tmp = tmp->next; 12149 } 12150 } 12151 } else { 12152 ret = NULL; 12153 } 12154 xmlFreeDoc(ctxt->myDoc); 12155 ctxt->myDoc = NULL; 12156 } 12157 if (sax != NULL) ctxt->sax = NULL; 12158 xmlFreeParserCtxt(ctxt); 12159 12160 return(ret); 12161 } 12162 12163 /** 12164 * xmlSAXParseDTD: 12165 * @sax: the SAX handler block 12166 * @ExternalID: a NAME* containing the External ID of the DTD 12167 * @SystemID: a NAME* containing the URL to the DTD 12168 * 12169 * Load and parse an external subset. 12170 * 12171 * Returns the resulting xmlDtdPtr or NULL in case of error. 12172 */ 12173 12174 xmlDtdPtr 12175 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12176 const xmlChar *SystemID) { 12177 xmlDtdPtr ret = NULL; 12178 xmlParserCtxtPtr ctxt; 12179 xmlParserInputPtr input = NULL; 12180 xmlCharEncoding enc; 12181 xmlChar* systemIdCanonic; 12182 12183 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12184 12185 ctxt = xmlNewParserCtxt(); 12186 if (ctxt == NULL) { 12187 return(NULL); 12188 } 12189 12190 /* 12191 * Set-up the SAX context 12192 */ 12193 if (sax != NULL) { 12194 if (ctxt->sax != NULL) 12195 xmlFree(ctxt->sax); 12196 ctxt->sax = sax; 12197 ctxt->userData = ctxt; 12198 } 12199 12200 /* 12201 * Canonicalise the system ID 12202 */ 12203 systemIdCanonic = xmlCanonicPath(SystemID); 12204 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12205 xmlFreeParserCtxt(ctxt); 12206 return(NULL); 12207 } 12208 12209 /* 12210 * Ask the Entity resolver to load the damn thing 12211 */ 12212 12213 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12214 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12215 systemIdCanonic); 12216 if (input == NULL) { 12217 if (sax != NULL) ctxt->sax = NULL; 12218 xmlFreeParserCtxt(ctxt); 12219 if (systemIdCanonic != NULL) 12220 xmlFree(systemIdCanonic); 12221 return(NULL); 12222 } 12223 12224 /* 12225 * plug some encoding conversion routines here. 12226 */ 12227 if (xmlPushInput(ctxt, input) < 0) { 12228 if (sax != NULL) ctxt->sax = NULL; 12229 xmlFreeParserCtxt(ctxt); 12230 if (systemIdCanonic != NULL) 12231 xmlFree(systemIdCanonic); 12232 return(NULL); 12233 } 12234 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12235 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12236 xmlSwitchEncoding(ctxt, enc); 12237 } 12238 12239 if (input->filename == NULL) 12240 input->filename = (char *) systemIdCanonic; 12241 else 12242 xmlFree(systemIdCanonic); 12243 input->line = 1; 12244 input->col = 1; 12245 input->base = ctxt->input->cur; 12246 input->cur = ctxt->input->cur; 12247 input->free = NULL; 12248 12249 /* 12250 * let's parse that entity knowing it's an external subset. 12251 */ 12252 ctxt->inSubset = 2; 12253 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12254 if (ctxt->myDoc == NULL) { 12255 xmlErrMemory(ctxt, "New Doc failed"); 12256 if (sax != NULL) ctxt->sax = NULL; 12257 xmlFreeParserCtxt(ctxt); 12258 return(NULL); 12259 } 12260 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12261 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12262 ExternalID, SystemID); 12263 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12264 12265 if (ctxt->myDoc != NULL) { 12266 if (ctxt->wellFormed) { 12267 ret = ctxt->myDoc->extSubset; 12268 ctxt->myDoc->extSubset = NULL; 12269 if (ret != NULL) { 12270 xmlNodePtr tmp; 12271 12272 ret->doc = NULL; 12273 tmp = ret->children; 12274 while (tmp != NULL) { 12275 tmp->doc = NULL; 12276 tmp = tmp->next; 12277 } 12278 } 12279 } else { 12280 ret = NULL; 12281 } 12282 xmlFreeDoc(ctxt->myDoc); 12283 ctxt->myDoc = NULL; 12284 } 12285 if (sax != NULL) ctxt->sax = NULL; 12286 xmlFreeParserCtxt(ctxt); 12287 12288 return(ret); 12289 } 12290 12291 12292 /** 12293 * xmlParseDTD: 12294 * @ExternalID: a NAME* containing the External ID of the DTD 12295 * @SystemID: a NAME* containing the URL to the DTD 12296 * 12297 * Load and parse an external subset. 12298 * 12299 * Returns the resulting xmlDtdPtr or NULL in case of error. 12300 */ 12301 12302 xmlDtdPtr 12303 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12304 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12305 } 12306 #endif /* LIBXML_VALID_ENABLED */ 12307 12308 /************************************************************************ 12309 * * 12310 * Front ends when parsing an Entity * 12311 * * 12312 ************************************************************************/ 12313 12314 /** 12315 * xmlParseCtxtExternalEntity: 12316 * @ctx: the existing parsing context 12317 * @URL: the URL for the entity to load 12318 * @ID: the System ID for the entity to load 12319 * @lst: the return value for the set of parsed nodes 12320 * 12321 * Parse an external general entity within an existing parsing context 12322 * An external general parsed entity is well-formed if it matches the 12323 * production labeled extParsedEnt. 12324 * 12325 * [78] extParsedEnt ::= TextDecl? content 12326 * 12327 * Returns 0 if the entity is well formed, -1 in case of args problem and 12328 * the parser error code otherwise 12329 */ 12330 12331 int 12332 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12333 const xmlChar *ID, xmlNodePtr *lst) { 12334 xmlParserCtxtPtr ctxt; 12335 xmlDocPtr newDoc; 12336 xmlNodePtr newRoot; 12337 xmlSAXHandlerPtr oldsax = NULL; 12338 int ret = 0; 12339 xmlChar start[4]; 12340 xmlCharEncoding enc; 12341 12342 if (ctx == NULL) return(-1); 12343 12344 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12345 (ctx->depth > 1024)) { 12346 return(XML_ERR_ENTITY_LOOP); 12347 } 12348 12349 if (lst != NULL) 12350 *lst = NULL; 12351 if ((URL == NULL) && (ID == NULL)) 12352 return(-1); 12353 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12354 return(-1); 12355 12356 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12357 if (ctxt == NULL) { 12358 return(-1); 12359 } 12360 12361 oldsax = ctxt->sax; 12362 ctxt->sax = ctx->sax; 12363 xmlDetectSAX2(ctxt); 12364 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12365 if (newDoc == NULL) { 12366 xmlFreeParserCtxt(ctxt); 12367 return(-1); 12368 } 12369 newDoc->properties = XML_DOC_INTERNAL; 12370 if (ctx->myDoc->dict) { 12371 newDoc->dict = ctx->myDoc->dict; 12372 xmlDictReference(newDoc->dict); 12373 } 12374 if (ctx->myDoc != NULL) { 12375 newDoc->intSubset = ctx->myDoc->intSubset; 12376 newDoc->extSubset = ctx->myDoc->extSubset; 12377 } 12378 if (ctx->myDoc->URL != NULL) { 12379 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12380 } 12381 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12382 if (newRoot == NULL) { 12383 ctxt->sax = oldsax; 12384 xmlFreeParserCtxt(ctxt); 12385 newDoc->intSubset = NULL; 12386 newDoc->extSubset = NULL; 12387 xmlFreeDoc(newDoc); 12388 return(-1); 12389 } 12390 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12391 nodePush(ctxt, newDoc->children); 12392 if (ctx->myDoc == NULL) { 12393 ctxt->myDoc = newDoc; 12394 } else { 12395 ctxt->myDoc = ctx->myDoc; 12396 newDoc->children->doc = ctx->myDoc; 12397 } 12398 12399 /* 12400 * Get the 4 first bytes and decode the charset 12401 * if enc != XML_CHAR_ENCODING_NONE 12402 * plug some encoding conversion routines. 12403 */ 12404 GROW 12405 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12406 start[0] = RAW; 12407 start[1] = NXT(1); 12408 start[2] = NXT(2); 12409 start[3] = NXT(3); 12410 enc = xmlDetectCharEncoding(start, 4); 12411 if (enc != XML_CHAR_ENCODING_NONE) { 12412 xmlSwitchEncoding(ctxt, enc); 12413 } 12414 } 12415 12416 /* 12417 * Parse a possible text declaration first 12418 */ 12419 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12420 xmlParseTextDecl(ctxt); 12421 /* 12422 * An XML-1.0 document can't reference an entity not XML-1.0 12423 */ 12424 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12425 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12426 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12427 "Version mismatch between document and entity\n"); 12428 } 12429 } 12430 12431 /* 12432 * Doing validity checking on chunk doesn't make sense 12433 */ 12434 ctxt->instate = XML_PARSER_CONTENT; 12435 ctxt->validate = ctx->validate; 12436 ctxt->valid = ctx->valid; 12437 ctxt->loadsubset = ctx->loadsubset; 12438 ctxt->depth = ctx->depth + 1; 12439 ctxt->replaceEntities = ctx->replaceEntities; 12440 if (ctxt->validate) { 12441 ctxt->vctxt.error = ctx->vctxt.error; 12442 ctxt->vctxt.warning = ctx->vctxt.warning; 12443 } else { 12444 ctxt->vctxt.error = NULL; 12445 ctxt->vctxt.warning = NULL; 12446 } 12447 ctxt->vctxt.nodeTab = NULL; 12448 ctxt->vctxt.nodeNr = 0; 12449 ctxt->vctxt.nodeMax = 0; 12450 ctxt->vctxt.node = NULL; 12451 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12452 ctxt->dict = ctx->dict; 12453 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12454 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12455 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12456 ctxt->dictNames = ctx->dictNames; 12457 ctxt->attsDefault = ctx->attsDefault; 12458 ctxt->attsSpecial = ctx->attsSpecial; 12459 ctxt->linenumbers = ctx->linenumbers; 12460 12461 xmlParseContent(ctxt); 12462 12463 ctx->validate = ctxt->validate; 12464 ctx->valid = ctxt->valid; 12465 if ((RAW == '<') && (NXT(1) == '/')) { 12466 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12467 } else if (RAW != 0) { 12468 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12469 } 12470 if (ctxt->node != newDoc->children) { 12471 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12472 } 12473 12474 if (!ctxt->wellFormed) { 12475 if (ctxt->errNo == 0) 12476 ret = 1; 12477 else 12478 ret = ctxt->errNo; 12479 } else { 12480 if (lst != NULL) { 12481 xmlNodePtr cur; 12482 12483 /* 12484 * Return the newly created nodeset after unlinking it from 12485 * they pseudo parent. 12486 */ 12487 cur = newDoc->children->children; 12488 *lst = cur; 12489 while (cur != NULL) { 12490 cur->parent = NULL; 12491 cur = cur->next; 12492 } 12493 newDoc->children->children = NULL; 12494 } 12495 ret = 0; 12496 } 12497 ctxt->sax = oldsax; 12498 ctxt->dict = NULL; 12499 ctxt->attsDefault = NULL; 12500 ctxt->attsSpecial = NULL; 12501 xmlFreeParserCtxt(ctxt); 12502 newDoc->intSubset = NULL; 12503 newDoc->extSubset = NULL; 12504 xmlFreeDoc(newDoc); 12505 12506 return(ret); 12507 } 12508 12509 /** 12510 * xmlParseExternalEntityPrivate: 12511 * @doc: the document the chunk pertains to 12512 * @oldctxt: the previous parser context if available 12513 * @sax: the SAX handler bloc (possibly NULL) 12514 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12515 * @depth: Used for loop detection, use 0 12516 * @URL: the URL for the entity to load 12517 * @ID: the System ID for the entity to load 12518 * @list: the return value for the set of parsed nodes 12519 * 12520 * Private version of xmlParseExternalEntity() 12521 * 12522 * Returns 0 if the entity is well formed, -1 in case of args problem and 12523 * the parser error code otherwise 12524 */ 12525 12526 static xmlParserErrors 12527 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12528 xmlSAXHandlerPtr sax, 12529 void *user_data, int depth, const xmlChar *URL, 12530 const xmlChar *ID, xmlNodePtr *list) { 12531 xmlParserCtxtPtr ctxt; 12532 xmlDocPtr newDoc; 12533 xmlNodePtr newRoot; 12534 xmlSAXHandlerPtr oldsax = NULL; 12535 xmlParserErrors ret = XML_ERR_OK; 12536 xmlChar start[4]; 12537 xmlCharEncoding enc; 12538 12539 if (((depth > 40) && 12540 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12541 (depth > 1024)) { 12542 return(XML_ERR_ENTITY_LOOP); 12543 } 12544 12545 if (list != NULL) 12546 *list = NULL; 12547 if ((URL == NULL) && (ID == NULL)) 12548 return(XML_ERR_INTERNAL_ERROR); 12549 if (doc == NULL) 12550 return(XML_ERR_INTERNAL_ERROR); 12551 12552 12553 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12554 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12555 ctxt->userData = ctxt; 12556 if (oldctxt != NULL) { 12557 ctxt->_private = oldctxt->_private; 12558 ctxt->loadsubset = oldctxt->loadsubset; 12559 ctxt->validate = oldctxt->validate; 12560 ctxt->external = oldctxt->external; 12561 ctxt->record_info = oldctxt->record_info; 12562 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12563 ctxt->node_seq.length = oldctxt->node_seq.length; 12564 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12565 } else { 12566 /* 12567 * Doing validity checking on chunk without context 12568 * doesn't make sense 12569 */ 12570 ctxt->_private = NULL; 12571 ctxt->validate = 0; 12572 ctxt->external = 2; 12573 ctxt->loadsubset = 0; 12574 } 12575 if (sax != NULL) { 12576 oldsax = ctxt->sax; 12577 ctxt->sax = sax; 12578 if (user_data != NULL) 12579 ctxt->userData = user_data; 12580 } 12581 xmlDetectSAX2(ctxt); 12582 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12583 if (newDoc == NULL) { 12584 ctxt->node_seq.maximum = 0; 12585 ctxt->node_seq.length = 0; 12586 ctxt->node_seq.buffer = NULL; 12587 xmlFreeParserCtxt(ctxt); 12588 return(XML_ERR_INTERNAL_ERROR); 12589 } 12590 newDoc->properties = XML_DOC_INTERNAL; 12591 newDoc->intSubset = doc->intSubset; 12592 newDoc->extSubset = doc->extSubset; 12593 newDoc->dict = doc->dict; 12594 xmlDictReference(newDoc->dict); 12595 12596 if (doc->URL != NULL) { 12597 newDoc->URL = xmlStrdup(doc->URL); 12598 } 12599 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12600 if (newRoot == NULL) { 12601 if (sax != NULL) 12602 ctxt->sax = oldsax; 12603 ctxt->node_seq.maximum = 0; 12604 ctxt->node_seq.length = 0; 12605 ctxt->node_seq.buffer = NULL; 12606 xmlFreeParserCtxt(ctxt); 12607 newDoc->intSubset = NULL; 12608 newDoc->extSubset = NULL; 12609 xmlFreeDoc(newDoc); 12610 return(XML_ERR_INTERNAL_ERROR); 12611 } 12612 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12613 nodePush(ctxt, newDoc->children); 12614 ctxt->myDoc = doc; 12615 newRoot->doc = doc; 12616 12617 /* 12618 * Get the 4 first bytes and decode the charset 12619 * if enc != XML_CHAR_ENCODING_NONE 12620 * plug some encoding conversion routines. 12621 */ 12622 GROW; 12623 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12624 start[0] = RAW; 12625 start[1] = NXT(1); 12626 start[2] = NXT(2); 12627 start[3] = NXT(3); 12628 enc = xmlDetectCharEncoding(start, 4); 12629 if (enc != XML_CHAR_ENCODING_NONE) { 12630 xmlSwitchEncoding(ctxt, enc); 12631 } 12632 } 12633 12634 /* 12635 * Parse a possible text declaration first 12636 */ 12637 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12638 xmlParseTextDecl(ctxt); 12639 } 12640 12641 ctxt->instate = XML_PARSER_CONTENT; 12642 ctxt->depth = depth; 12643 12644 xmlParseContent(ctxt); 12645 12646 if ((RAW == '<') && (NXT(1) == '/')) { 12647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12648 } else if (RAW != 0) { 12649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12650 } 12651 if (ctxt->node != newDoc->children) { 12652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12653 } 12654 12655 if (!ctxt->wellFormed) { 12656 if (ctxt->errNo == 0) 12657 ret = XML_ERR_INTERNAL_ERROR; 12658 else 12659 ret = (xmlParserErrors)ctxt->errNo; 12660 } else { 12661 if (list != NULL) { 12662 xmlNodePtr cur; 12663 12664 /* 12665 * Return the newly created nodeset after unlinking it from 12666 * they pseudo parent. 12667 */ 12668 cur = newDoc->children->children; 12669 *list = cur; 12670 while (cur != NULL) { 12671 cur->parent = NULL; 12672 cur = cur->next; 12673 } 12674 newDoc->children->children = NULL; 12675 } 12676 ret = XML_ERR_OK; 12677 } 12678 12679 /* 12680 * Record in the parent context the number of entities replacement 12681 * done when parsing that reference. 12682 */ 12683 if (oldctxt != NULL) 12684 oldctxt->nbentities += ctxt->nbentities; 12685 12686 /* 12687 * Also record the size of the entity parsed 12688 */ 12689 if (ctxt->input != NULL) { 12690 oldctxt->sizeentities += ctxt->input->consumed; 12691 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12692 } 12693 /* 12694 * And record the last error if any 12695 */ 12696 if (ctxt->lastError.code != XML_ERR_OK) 12697 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12698 12699 if (sax != NULL) 12700 ctxt->sax = oldsax; 12701 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12702 oldctxt->node_seq.length = ctxt->node_seq.length; 12703 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12704 ctxt->node_seq.maximum = 0; 12705 ctxt->node_seq.length = 0; 12706 ctxt->node_seq.buffer = NULL; 12707 xmlFreeParserCtxt(ctxt); 12708 newDoc->intSubset = NULL; 12709 newDoc->extSubset = NULL; 12710 xmlFreeDoc(newDoc); 12711 12712 return(ret); 12713 } 12714 12715 #ifdef LIBXML_SAX1_ENABLED 12716 /** 12717 * xmlParseExternalEntity: 12718 * @doc: the document the chunk pertains to 12719 * @sax: the SAX handler bloc (possibly NULL) 12720 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12721 * @depth: Used for loop detection, use 0 12722 * @URL: the URL for the entity to load 12723 * @ID: the System ID for the entity to load 12724 * @lst: the return value for the set of parsed nodes 12725 * 12726 * Parse an external general entity 12727 * An external general parsed entity is well-formed if it matches the 12728 * production labeled extParsedEnt. 12729 * 12730 * [78] extParsedEnt ::= TextDecl? content 12731 * 12732 * Returns 0 if the entity is well formed, -1 in case of args problem and 12733 * the parser error code otherwise 12734 */ 12735 12736 int 12737 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12738 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12739 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12740 ID, lst)); 12741 } 12742 12743 /** 12744 * xmlParseBalancedChunkMemory: 12745 * @doc: the document the chunk pertains to 12746 * @sax: the SAX handler bloc (possibly NULL) 12747 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12748 * @depth: Used for loop detection, use 0 12749 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12750 * @lst: the return value for the set of parsed nodes 12751 * 12752 * Parse a well-balanced chunk of an XML document 12753 * called by the parser 12754 * The allowed sequence for the Well Balanced Chunk is the one defined by 12755 * the content production in the XML grammar: 12756 * 12757 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12758 * 12759 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12760 * the parser error code otherwise 12761 */ 12762 12763 int 12764 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12765 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12766 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12767 depth, string, lst, 0 ); 12768 } 12769 #endif /* LIBXML_SAX1_ENABLED */ 12770 12771 /** 12772 * xmlParseBalancedChunkMemoryInternal: 12773 * @oldctxt: the existing parsing context 12774 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12775 * @user_data: the user data field for the parser context 12776 * @lst: the return value for the set of parsed nodes 12777 * 12778 * 12779 * Parse a well-balanced chunk of an XML document 12780 * called by the parser 12781 * The allowed sequence for the Well Balanced Chunk is the one defined by 12782 * the content production in the XML grammar: 12783 * 12784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12785 * 12786 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12787 * error code otherwise 12788 * 12789 * In case recover is set to 1, the nodelist will not be empty even if 12790 * the parsed chunk is not well balanced. 12791 */ 12792 static xmlParserErrors 12793 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12794 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12795 xmlParserCtxtPtr ctxt; 12796 xmlDocPtr newDoc = NULL; 12797 xmlNodePtr newRoot; 12798 xmlSAXHandlerPtr oldsax = NULL; 12799 xmlNodePtr content = NULL; 12800 xmlNodePtr last = NULL; 12801 int size; 12802 xmlParserErrors ret = XML_ERR_OK; 12803 #ifdef SAX2 12804 int i; 12805 #endif 12806 12807 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12808 (oldctxt->depth > 1024)) { 12809 return(XML_ERR_ENTITY_LOOP); 12810 } 12811 12812 12813 if (lst != NULL) 12814 *lst = NULL; 12815 if (string == NULL) 12816 return(XML_ERR_INTERNAL_ERROR); 12817 12818 size = xmlStrlen(string); 12819 12820 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12821 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12822 if (user_data != NULL) 12823 ctxt->userData = user_data; 12824 else 12825 ctxt->userData = ctxt; 12826 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12827 ctxt->dict = oldctxt->dict; 12828 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12829 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12830 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12831 12832 #ifdef SAX2 12833 /* propagate namespaces down the entity */ 12834 for (i = 0;i < oldctxt->nsNr;i += 2) { 12835 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 12836 } 12837 #endif 12838 12839 oldsax = ctxt->sax; 12840 ctxt->sax = oldctxt->sax; 12841 xmlDetectSAX2(ctxt); 12842 ctxt->replaceEntities = oldctxt->replaceEntities; 12843 ctxt->options = oldctxt->options; 12844 12845 ctxt->_private = oldctxt->_private; 12846 if (oldctxt->myDoc == NULL) { 12847 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12848 if (newDoc == NULL) { 12849 ctxt->sax = oldsax; 12850 ctxt->dict = NULL; 12851 xmlFreeParserCtxt(ctxt); 12852 return(XML_ERR_INTERNAL_ERROR); 12853 } 12854 newDoc->properties = XML_DOC_INTERNAL; 12855 newDoc->dict = ctxt->dict; 12856 xmlDictReference(newDoc->dict); 12857 ctxt->myDoc = newDoc; 12858 } else { 12859 ctxt->myDoc = oldctxt->myDoc; 12860 content = ctxt->myDoc->children; 12861 last = ctxt->myDoc->last; 12862 } 12863 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12864 if (newRoot == NULL) { 12865 ctxt->sax = oldsax; 12866 ctxt->dict = NULL; 12867 xmlFreeParserCtxt(ctxt); 12868 if (newDoc != NULL) { 12869 xmlFreeDoc(newDoc); 12870 } 12871 return(XML_ERR_INTERNAL_ERROR); 12872 } 12873 ctxt->myDoc->children = NULL; 12874 ctxt->myDoc->last = NULL; 12875 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12876 nodePush(ctxt, ctxt->myDoc->children); 12877 ctxt->instate = XML_PARSER_CONTENT; 12878 ctxt->depth = oldctxt->depth + 1; 12879 12880 ctxt->validate = 0; 12881 ctxt->loadsubset = oldctxt->loadsubset; 12882 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12883 /* 12884 * ID/IDREF registration will be done in xmlValidateElement below 12885 */ 12886 ctxt->loadsubset |= XML_SKIP_IDS; 12887 } 12888 ctxt->dictNames = oldctxt->dictNames; 12889 ctxt->attsDefault = oldctxt->attsDefault; 12890 ctxt->attsSpecial = oldctxt->attsSpecial; 12891 12892 xmlParseContent(ctxt); 12893 if ((RAW == '<') && (NXT(1) == '/')) { 12894 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12895 } else if (RAW != 0) { 12896 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12897 } 12898 if (ctxt->node != ctxt->myDoc->children) { 12899 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12900 } 12901 12902 if (!ctxt->wellFormed) { 12903 if (ctxt->errNo == 0) 12904 ret = XML_ERR_INTERNAL_ERROR; 12905 else 12906 ret = (xmlParserErrors)ctxt->errNo; 12907 } else { 12908 ret = XML_ERR_OK; 12909 } 12910 12911 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12912 xmlNodePtr cur; 12913 12914 /* 12915 * Return the newly created nodeset after unlinking it from 12916 * they pseudo parent. 12917 */ 12918 cur = ctxt->myDoc->children->children; 12919 *lst = cur; 12920 while (cur != NULL) { 12921 #ifdef LIBXML_VALID_ENABLED 12922 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12923 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12924 (cur->type == XML_ELEMENT_NODE)) { 12925 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12926 oldctxt->myDoc, cur); 12927 } 12928 #endif /* LIBXML_VALID_ENABLED */ 12929 cur->parent = NULL; 12930 cur = cur->next; 12931 } 12932 ctxt->myDoc->children->children = NULL; 12933 } 12934 if (ctxt->myDoc != NULL) { 12935 xmlFreeNode(ctxt->myDoc->children); 12936 ctxt->myDoc->children = content; 12937 ctxt->myDoc->last = last; 12938 } 12939 12940 /* 12941 * Record in the parent context the number of entities replacement 12942 * done when parsing that reference. 12943 */ 12944 if (oldctxt != NULL) 12945 oldctxt->nbentities += ctxt->nbentities; 12946 12947 /* 12948 * Also record the last error if any 12949 */ 12950 if (ctxt->lastError.code != XML_ERR_OK) 12951 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12952 12953 ctxt->sax = oldsax; 12954 ctxt->dict = NULL; 12955 ctxt->attsDefault = NULL; 12956 ctxt->attsSpecial = NULL; 12957 xmlFreeParserCtxt(ctxt); 12958 if (newDoc != NULL) { 12959 xmlFreeDoc(newDoc); 12960 } 12961 12962 return(ret); 12963 } 12964 12965 /** 12966 * xmlParseInNodeContext: 12967 * @node: the context node 12968 * @data: the input string 12969 * @datalen: the input string length in bytes 12970 * @options: a combination of xmlParserOption 12971 * @lst: the return value for the set of parsed nodes 12972 * 12973 * Parse a well-balanced chunk of an XML document 12974 * within the context (DTD, namespaces, etc ...) of the given node. 12975 * 12976 * The allowed sequence for the data is a Well Balanced Chunk defined by 12977 * the content production in the XML grammar: 12978 * 12979 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12980 * 12981 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12982 * error code otherwise 12983 */ 12984 xmlParserErrors 12985 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12986 int options, xmlNodePtr *lst) { 12987 #ifdef SAX2 12988 xmlParserCtxtPtr ctxt; 12989 xmlDocPtr doc = NULL; 12990 xmlNodePtr fake, cur; 12991 int nsnr = 0; 12992 12993 xmlParserErrors ret = XML_ERR_OK; 12994 12995 /* 12996 * check all input parameters, grab the document 12997 */ 12998 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 12999 return(XML_ERR_INTERNAL_ERROR); 13000 switch (node->type) { 13001 case XML_ELEMENT_NODE: 13002 case XML_ATTRIBUTE_NODE: 13003 case XML_TEXT_NODE: 13004 case XML_CDATA_SECTION_NODE: 13005 case XML_ENTITY_REF_NODE: 13006 case XML_PI_NODE: 13007 case XML_COMMENT_NODE: 13008 case XML_DOCUMENT_NODE: 13009 case XML_HTML_DOCUMENT_NODE: 13010 break; 13011 default: 13012 return(XML_ERR_INTERNAL_ERROR); 13013 13014 } 13015 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13016 (node->type != XML_DOCUMENT_NODE) && 13017 (node->type != XML_HTML_DOCUMENT_NODE)) 13018 node = node->parent; 13019 if (node == NULL) 13020 return(XML_ERR_INTERNAL_ERROR); 13021 if (node->type == XML_ELEMENT_NODE) 13022 doc = node->doc; 13023 else 13024 doc = (xmlDocPtr) node; 13025 if (doc == NULL) 13026 return(XML_ERR_INTERNAL_ERROR); 13027 13028 /* 13029 * allocate a context and set-up everything not related to the 13030 * node position in the tree 13031 */ 13032 if (doc->type == XML_DOCUMENT_NODE) 13033 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13034 #ifdef LIBXML_HTML_ENABLED 13035 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13036 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13037 /* 13038 * When parsing in context, it makes no sense to add implied 13039 * elements like html/body/etc... 13040 */ 13041 options |= HTML_PARSE_NOIMPLIED; 13042 } 13043 #endif 13044 else 13045 return(XML_ERR_INTERNAL_ERROR); 13046 13047 if (ctxt == NULL) 13048 return(XML_ERR_NO_MEMORY); 13049 13050 /* 13051 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13052 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13053 * we must wait until the last moment to free the original one. 13054 */ 13055 if (doc->dict != NULL) { 13056 if (ctxt->dict != NULL) 13057 xmlDictFree(ctxt->dict); 13058 ctxt->dict = doc->dict; 13059 } else 13060 options |= XML_PARSE_NODICT; 13061 13062 if (doc->encoding != NULL) { 13063 xmlCharEncodingHandlerPtr hdlr; 13064 13065 if (ctxt->encoding != NULL) 13066 xmlFree((xmlChar *) ctxt->encoding); 13067 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13068 13069 hdlr = xmlFindCharEncodingHandler(doc->encoding); 13070 if (hdlr != NULL) { 13071 xmlSwitchToEncoding(ctxt, hdlr); 13072 } else { 13073 return(XML_ERR_UNSUPPORTED_ENCODING); 13074 } 13075 } 13076 13077 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13078 xmlDetectSAX2(ctxt); 13079 ctxt->myDoc = doc; 13080 13081 fake = xmlNewComment(NULL); 13082 if (fake == NULL) { 13083 xmlFreeParserCtxt(ctxt); 13084 return(XML_ERR_NO_MEMORY); 13085 } 13086 xmlAddChild(node, fake); 13087 13088 if (node->type == XML_ELEMENT_NODE) { 13089 nodePush(ctxt, node); 13090 /* 13091 * initialize the SAX2 namespaces stack 13092 */ 13093 cur = node; 13094 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13095 xmlNsPtr ns = cur->nsDef; 13096 const xmlChar *iprefix, *ihref; 13097 13098 while (ns != NULL) { 13099 if (ctxt->dict) { 13100 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13101 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13102 } else { 13103 iprefix = ns->prefix; 13104 ihref = ns->href; 13105 } 13106 13107 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13108 nsPush(ctxt, iprefix, ihref); 13109 nsnr++; 13110 } 13111 ns = ns->next; 13112 } 13113 cur = cur->parent; 13114 } 13115 ctxt->instate = XML_PARSER_CONTENT; 13116 } 13117 13118 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13119 /* 13120 * ID/IDREF registration will be done in xmlValidateElement below 13121 */ 13122 ctxt->loadsubset |= XML_SKIP_IDS; 13123 } 13124 13125 #ifdef LIBXML_HTML_ENABLED 13126 if (doc->type == XML_HTML_DOCUMENT_NODE) 13127 __htmlParseContent(ctxt); 13128 else 13129 #endif 13130 xmlParseContent(ctxt); 13131 13132 nsPop(ctxt, nsnr); 13133 if ((RAW == '<') && (NXT(1) == '/')) { 13134 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13135 } else if (RAW != 0) { 13136 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13137 } 13138 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13139 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13140 ctxt->wellFormed = 0; 13141 } 13142 13143 if (!ctxt->wellFormed) { 13144 if (ctxt->errNo == 0) 13145 ret = XML_ERR_INTERNAL_ERROR; 13146 else 13147 ret = (xmlParserErrors)ctxt->errNo; 13148 } else { 13149 ret = XML_ERR_OK; 13150 } 13151 13152 /* 13153 * Return the newly created nodeset after unlinking it from 13154 * the pseudo sibling. 13155 */ 13156 13157 cur = fake->next; 13158 fake->next = NULL; 13159 node->last = fake; 13160 13161 if (cur != NULL) { 13162 cur->prev = NULL; 13163 } 13164 13165 *lst = cur; 13166 13167 while (cur != NULL) { 13168 cur->parent = NULL; 13169 cur = cur->next; 13170 } 13171 13172 xmlUnlinkNode(fake); 13173 xmlFreeNode(fake); 13174 13175 13176 if (ret != XML_ERR_OK) { 13177 xmlFreeNodeList(*lst); 13178 *lst = NULL; 13179 } 13180 13181 if (doc->dict != NULL) 13182 ctxt->dict = NULL; 13183 xmlFreeParserCtxt(ctxt); 13184 13185 return(ret); 13186 #else /* !SAX2 */ 13187 return(XML_ERR_INTERNAL_ERROR); 13188 #endif 13189 } 13190 13191 #ifdef LIBXML_SAX1_ENABLED 13192 /** 13193 * xmlParseBalancedChunkMemoryRecover: 13194 * @doc: the document the chunk pertains to 13195 * @sax: the SAX handler bloc (possibly NULL) 13196 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13197 * @depth: Used for loop detection, use 0 13198 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13199 * @lst: the return value for the set of parsed nodes 13200 * @recover: return nodes even if the data is broken (use 0) 13201 * 13202 * 13203 * Parse a well-balanced chunk of an XML document 13204 * called by the parser 13205 * The allowed sequence for the Well Balanced Chunk is the one defined by 13206 * the content production in the XML grammar: 13207 * 13208 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13209 * 13210 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13211 * the parser error code otherwise 13212 * 13213 * In case recover is set to 1, the nodelist will not be empty even if 13214 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13215 * some extent. 13216 */ 13217 int 13218 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13219 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13220 int recover) { 13221 xmlParserCtxtPtr ctxt; 13222 xmlDocPtr newDoc; 13223 xmlSAXHandlerPtr oldsax = NULL; 13224 xmlNodePtr content, newRoot; 13225 int size; 13226 int ret = 0; 13227 13228 if (depth > 40) { 13229 return(XML_ERR_ENTITY_LOOP); 13230 } 13231 13232 13233 if (lst != NULL) 13234 *lst = NULL; 13235 if (string == NULL) 13236 return(-1); 13237 13238 size = xmlStrlen(string); 13239 13240 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13241 if (ctxt == NULL) return(-1); 13242 ctxt->userData = ctxt; 13243 if (sax != NULL) { 13244 oldsax = ctxt->sax; 13245 ctxt->sax = sax; 13246 if (user_data != NULL) 13247 ctxt->userData = user_data; 13248 } 13249 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13250 if (newDoc == NULL) { 13251 xmlFreeParserCtxt(ctxt); 13252 return(-1); 13253 } 13254 newDoc->properties = XML_DOC_INTERNAL; 13255 if ((doc != NULL) && (doc->dict != NULL)) { 13256 xmlDictFree(ctxt->dict); 13257 ctxt->dict = doc->dict; 13258 xmlDictReference(ctxt->dict); 13259 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13260 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13261 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13262 ctxt->dictNames = 1; 13263 } else { 13264 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13265 } 13266 if (doc != NULL) { 13267 newDoc->intSubset = doc->intSubset; 13268 newDoc->extSubset = doc->extSubset; 13269 } 13270 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13271 if (newRoot == NULL) { 13272 if (sax != NULL) 13273 ctxt->sax = oldsax; 13274 xmlFreeParserCtxt(ctxt); 13275 newDoc->intSubset = NULL; 13276 newDoc->extSubset = NULL; 13277 xmlFreeDoc(newDoc); 13278 return(-1); 13279 } 13280 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13281 nodePush(ctxt, newRoot); 13282 if (doc == NULL) { 13283 ctxt->myDoc = newDoc; 13284 } else { 13285 ctxt->myDoc = newDoc; 13286 newDoc->children->doc = doc; 13287 /* Ensure that doc has XML spec namespace */ 13288 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13289 newDoc->oldNs = doc->oldNs; 13290 } 13291 ctxt->instate = XML_PARSER_CONTENT; 13292 ctxt->depth = depth; 13293 13294 /* 13295 * Doing validity checking on chunk doesn't make sense 13296 */ 13297 ctxt->validate = 0; 13298 ctxt->loadsubset = 0; 13299 xmlDetectSAX2(ctxt); 13300 13301 if ( doc != NULL ){ 13302 content = doc->children; 13303 doc->children = NULL; 13304 xmlParseContent(ctxt); 13305 doc->children = content; 13306 } 13307 else { 13308 xmlParseContent(ctxt); 13309 } 13310 if ((RAW == '<') && (NXT(1) == '/')) { 13311 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13312 } else if (RAW != 0) { 13313 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13314 } 13315 if (ctxt->node != newDoc->children) { 13316 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13317 } 13318 13319 if (!ctxt->wellFormed) { 13320 if (ctxt->errNo == 0) 13321 ret = 1; 13322 else 13323 ret = ctxt->errNo; 13324 } else { 13325 ret = 0; 13326 } 13327 13328 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13329 xmlNodePtr cur; 13330 13331 /* 13332 * Return the newly created nodeset after unlinking it from 13333 * they pseudo parent. 13334 */ 13335 cur = newDoc->children->children; 13336 *lst = cur; 13337 while (cur != NULL) { 13338 xmlSetTreeDoc(cur, doc); 13339 cur->parent = NULL; 13340 cur = cur->next; 13341 } 13342 newDoc->children->children = NULL; 13343 } 13344 13345 if (sax != NULL) 13346 ctxt->sax = oldsax; 13347 xmlFreeParserCtxt(ctxt); 13348 newDoc->intSubset = NULL; 13349 newDoc->extSubset = NULL; 13350 newDoc->oldNs = NULL; 13351 xmlFreeDoc(newDoc); 13352 13353 return(ret); 13354 } 13355 13356 /** 13357 * xmlSAXParseEntity: 13358 * @sax: the SAX handler block 13359 * @filename: the filename 13360 * 13361 * parse an XML external entity out of context and build a tree. 13362 * It use the given SAX function block to handle the parsing callback. 13363 * If sax is NULL, fallback to the default DOM tree building routines. 13364 * 13365 * [78] extParsedEnt ::= TextDecl? content 13366 * 13367 * This correspond to a "Well Balanced" chunk 13368 * 13369 * Returns the resulting document tree 13370 */ 13371 13372 xmlDocPtr 13373 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13374 xmlDocPtr ret; 13375 xmlParserCtxtPtr ctxt; 13376 13377 ctxt = xmlCreateFileParserCtxt(filename); 13378 if (ctxt == NULL) { 13379 return(NULL); 13380 } 13381 if (sax != NULL) { 13382 if (ctxt->sax != NULL) 13383 xmlFree(ctxt->sax); 13384 ctxt->sax = sax; 13385 ctxt->userData = NULL; 13386 } 13387 13388 xmlParseExtParsedEnt(ctxt); 13389 13390 if (ctxt->wellFormed) 13391 ret = ctxt->myDoc; 13392 else { 13393 ret = NULL; 13394 xmlFreeDoc(ctxt->myDoc); 13395 ctxt->myDoc = NULL; 13396 } 13397 if (sax != NULL) 13398 ctxt->sax = NULL; 13399 xmlFreeParserCtxt(ctxt); 13400 13401 return(ret); 13402 } 13403 13404 /** 13405 * xmlParseEntity: 13406 * @filename: the filename 13407 * 13408 * parse an XML external entity out of context and build a tree. 13409 * 13410 * [78] extParsedEnt ::= TextDecl? content 13411 * 13412 * This correspond to a "Well Balanced" chunk 13413 * 13414 * Returns the resulting document tree 13415 */ 13416 13417 xmlDocPtr 13418 xmlParseEntity(const char *filename) { 13419 return(xmlSAXParseEntity(NULL, filename)); 13420 } 13421 #endif /* LIBXML_SAX1_ENABLED */ 13422 13423 /** 13424 * xmlCreateEntityParserCtxtInternal: 13425 * @URL: the entity URL 13426 * @ID: the entity PUBLIC ID 13427 * @base: a possible base for the target URI 13428 * @pctx: parser context used to set options on new context 13429 * 13430 * Create a parser context for an external entity 13431 * Automatic support for ZLIB/Compress compressed document is provided 13432 * by default if found at compile-time. 13433 * 13434 * Returns the new parser context or NULL 13435 */ 13436 static xmlParserCtxtPtr 13437 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13438 const xmlChar *base, xmlParserCtxtPtr pctx) { 13439 xmlParserCtxtPtr ctxt; 13440 xmlParserInputPtr inputStream; 13441 char *directory = NULL; 13442 xmlChar *uri; 13443 13444 ctxt = xmlNewParserCtxt(); 13445 if (ctxt == NULL) { 13446 return(NULL); 13447 } 13448 13449 if (pctx != NULL) { 13450 ctxt->options = pctx->options; 13451 ctxt->_private = pctx->_private; 13452 } 13453 13454 uri = xmlBuildURI(URL, base); 13455 13456 if (uri == NULL) { 13457 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13458 if (inputStream == NULL) { 13459 xmlFreeParserCtxt(ctxt); 13460 return(NULL); 13461 } 13462 13463 inputPush(ctxt, inputStream); 13464 13465 if ((ctxt->directory == NULL) && (directory == NULL)) 13466 directory = xmlParserGetDirectory((char *)URL); 13467 if ((ctxt->directory == NULL) && (directory != NULL)) 13468 ctxt->directory = directory; 13469 } else { 13470 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13471 if (inputStream == NULL) { 13472 xmlFree(uri); 13473 xmlFreeParserCtxt(ctxt); 13474 return(NULL); 13475 } 13476 13477 inputPush(ctxt, inputStream); 13478 13479 if ((ctxt->directory == NULL) && (directory == NULL)) 13480 directory = xmlParserGetDirectory((char *)uri); 13481 if ((ctxt->directory == NULL) && (directory != NULL)) 13482 ctxt->directory = directory; 13483 xmlFree(uri); 13484 } 13485 return(ctxt); 13486 } 13487 13488 /** 13489 * xmlCreateEntityParserCtxt: 13490 * @URL: the entity URL 13491 * @ID: the entity PUBLIC ID 13492 * @base: a possible base for the target URI 13493 * 13494 * Create a parser context for an external entity 13495 * Automatic support for ZLIB/Compress compressed document is provided 13496 * by default if found at compile-time. 13497 * 13498 * Returns the new parser context or NULL 13499 */ 13500 xmlParserCtxtPtr 13501 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13502 const xmlChar *base) { 13503 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13504 13505 } 13506 13507 /************************************************************************ 13508 * * 13509 * Front ends when parsing from a file * 13510 * * 13511 ************************************************************************/ 13512 13513 /** 13514 * xmlCreateURLParserCtxt: 13515 * @filename: the filename or URL 13516 * @options: a combination of xmlParserOption 13517 * 13518 * Create a parser context for a file or URL content. 13519 * Automatic support for ZLIB/Compress compressed document is provided 13520 * by default if found at compile-time and for file accesses 13521 * 13522 * Returns the new parser context or NULL 13523 */ 13524 xmlParserCtxtPtr 13525 xmlCreateURLParserCtxt(const char *filename, int options) 13526 { 13527 xmlParserCtxtPtr ctxt; 13528 xmlParserInputPtr inputStream; 13529 char *directory = NULL; 13530 13531 ctxt = xmlNewParserCtxt(); 13532 if (ctxt == NULL) { 13533 xmlErrMemory(NULL, "cannot allocate parser context"); 13534 return(NULL); 13535 } 13536 13537 if (options) 13538 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13539 ctxt->linenumbers = 1; 13540 13541 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13542 if (inputStream == NULL) { 13543 xmlFreeParserCtxt(ctxt); 13544 return(NULL); 13545 } 13546 13547 inputPush(ctxt, inputStream); 13548 if ((ctxt->directory == NULL) && (directory == NULL)) 13549 directory = xmlParserGetDirectory(filename); 13550 if ((ctxt->directory == NULL) && (directory != NULL)) 13551 ctxt->directory = directory; 13552 13553 return(ctxt); 13554 } 13555 13556 /** 13557 * xmlCreateFileParserCtxt: 13558 * @filename: the filename 13559 * 13560 * Create a parser context for a file content. 13561 * Automatic support for ZLIB/Compress compressed document is provided 13562 * by default if found at compile-time. 13563 * 13564 * Returns the new parser context or NULL 13565 */ 13566 xmlParserCtxtPtr 13567 xmlCreateFileParserCtxt(const char *filename) 13568 { 13569 return(xmlCreateURLParserCtxt(filename, 0)); 13570 } 13571 13572 #ifdef LIBXML_SAX1_ENABLED 13573 /** 13574 * xmlSAXParseFileWithData: 13575 * @sax: the SAX handler block 13576 * @filename: the filename 13577 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13578 * documents 13579 * @data: the userdata 13580 * 13581 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13582 * compressed document is provided by default if found at compile-time. 13583 * It use the given SAX function block to handle the parsing callback. 13584 * If sax is NULL, fallback to the default DOM tree building routines. 13585 * 13586 * User data (void *) is stored within the parser context in the 13587 * context's _private member, so it is available nearly everywhere in libxml 13588 * 13589 * Returns the resulting document tree 13590 */ 13591 13592 xmlDocPtr 13593 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13594 int recovery, void *data) { 13595 xmlDocPtr ret; 13596 xmlParserCtxtPtr ctxt; 13597 13598 xmlInitParser(); 13599 13600 ctxt = xmlCreateFileParserCtxt(filename); 13601 if (ctxt == NULL) { 13602 return(NULL); 13603 } 13604 if (sax != NULL) { 13605 if (ctxt->sax != NULL) 13606 xmlFree(ctxt->sax); 13607 ctxt->sax = sax; 13608 } 13609 xmlDetectSAX2(ctxt); 13610 if (data!=NULL) { 13611 ctxt->_private = data; 13612 } 13613 13614 if (ctxt->directory == NULL) 13615 ctxt->directory = xmlParserGetDirectory(filename); 13616 13617 ctxt->recovery = recovery; 13618 13619 xmlParseDocument(ctxt); 13620 13621 if ((ctxt->wellFormed) || recovery) { 13622 ret = ctxt->myDoc; 13623 if (ret != NULL) { 13624 if (ctxt->input->buf->compressed > 0) 13625 ret->compression = 9; 13626 else 13627 ret->compression = ctxt->input->buf->compressed; 13628 } 13629 } 13630 else { 13631 ret = NULL; 13632 xmlFreeDoc(ctxt->myDoc); 13633 ctxt->myDoc = NULL; 13634 } 13635 if (sax != NULL) 13636 ctxt->sax = NULL; 13637 xmlFreeParserCtxt(ctxt); 13638 13639 return(ret); 13640 } 13641 13642 /** 13643 * xmlSAXParseFile: 13644 * @sax: the SAX handler block 13645 * @filename: the filename 13646 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13647 * documents 13648 * 13649 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13650 * compressed document is provided by default if found at compile-time. 13651 * It use the given SAX function block to handle the parsing callback. 13652 * If sax is NULL, fallback to the default DOM tree building routines. 13653 * 13654 * Returns the resulting document tree 13655 */ 13656 13657 xmlDocPtr 13658 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13659 int recovery) { 13660 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13661 } 13662 13663 /** 13664 * xmlRecoverDoc: 13665 * @cur: a pointer to an array of xmlChar 13666 * 13667 * parse an XML in-memory document and build a tree. 13668 * In the case the document is not Well Formed, a attempt to build a 13669 * tree is tried anyway 13670 * 13671 * Returns the resulting document tree or NULL in case of failure 13672 */ 13673 13674 xmlDocPtr 13675 xmlRecoverDoc(const xmlChar *cur) { 13676 return(xmlSAXParseDoc(NULL, cur, 1)); 13677 } 13678 13679 /** 13680 * xmlParseFile: 13681 * @filename: the filename 13682 * 13683 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13684 * compressed document is provided by default if found at compile-time. 13685 * 13686 * Returns the resulting document tree if the file was wellformed, 13687 * NULL otherwise. 13688 */ 13689 13690 xmlDocPtr 13691 xmlParseFile(const char *filename) { 13692 return(xmlSAXParseFile(NULL, filename, 0)); 13693 } 13694 13695 /** 13696 * xmlRecoverFile: 13697 * @filename: the filename 13698 * 13699 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13700 * compressed document is provided by default if found at compile-time. 13701 * In the case the document is not Well Formed, it attempts to build 13702 * a tree anyway 13703 * 13704 * Returns the resulting document tree or NULL in case of failure 13705 */ 13706 13707 xmlDocPtr 13708 xmlRecoverFile(const char *filename) { 13709 return(xmlSAXParseFile(NULL, filename, 1)); 13710 } 13711 13712 13713 /** 13714 * xmlSetupParserForBuffer: 13715 * @ctxt: an XML parser context 13716 * @buffer: a xmlChar * buffer 13717 * @filename: a file name 13718 * 13719 * Setup the parser context to parse a new buffer; Clears any prior 13720 * contents from the parser context. The buffer parameter must not be 13721 * NULL, but the filename parameter can be 13722 */ 13723 void 13724 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13725 const char* filename) 13726 { 13727 xmlParserInputPtr input; 13728 13729 if ((ctxt == NULL) || (buffer == NULL)) 13730 return; 13731 13732 input = xmlNewInputStream(ctxt); 13733 if (input == NULL) { 13734 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13735 xmlClearParserCtxt(ctxt); 13736 return; 13737 } 13738 13739 xmlClearParserCtxt(ctxt); 13740 if (filename != NULL) 13741 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13742 input->base = buffer; 13743 input->cur = buffer; 13744 input->end = &buffer[xmlStrlen(buffer)]; 13745 inputPush(ctxt, input); 13746 } 13747 13748 /** 13749 * xmlSAXUserParseFile: 13750 * @sax: a SAX handler 13751 * @user_data: The user data returned on SAX callbacks 13752 * @filename: a file name 13753 * 13754 * parse an XML file and call the given SAX handler routines. 13755 * Automatic support for ZLIB/Compress compressed document is provided 13756 * 13757 * Returns 0 in case of success or a error number otherwise 13758 */ 13759 int 13760 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13761 const char *filename) { 13762 int ret = 0; 13763 xmlParserCtxtPtr ctxt; 13764 13765 ctxt = xmlCreateFileParserCtxt(filename); 13766 if (ctxt == NULL) return -1; 13767 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13768 xmlFree(ctxt->sax); 13769 ctxt->sax = sax; 13770 xmlDetectSAX2(ctxt); 13771 13772 if (user_data != NULL) 13773 ctxt->userData = user_data; 13774 13775 xmlParseDocument(ctxt); 13776 13777 if (ctxt->wellFormed) 13778 ret = 0; 13779 else { 13780 if (ctxt->errNo != 0) 13781 ret = ctxt->errNo; 13782 else 13783 ret = -1; 13784 } 13785 if (sax != NULL) 13786 ctxt->sax = NULL; 13787 if (ctxt->myDoc != NULL) { 13788 xmlFreeDoc(ctxt->myDoc); 13789 ctxt->myDoc = NULL; 13790 } 13791 xmlFreeParserCtxt(ctxt); 13792 13793 return ret; 13794 } 13795 #endif /* LIBXML_SAX1_ENABLED */ 13796 13797 /************************************************************************ 13798 * * 13799 * Front ends when parsing from memory * 13800 * * 13801 ************************************************************************/ 13802 13803 /** 13804 * xmlCreateMemoryParserCtxt: 13805 * @buffer: a pointer to a char array 13806 * @size: the size of the array 13807 * 13808 * Create a parser context for an XML in-memory document. 13809 * 13810 * Returns the new parser context or NULL 13811 */ 13812 xmlParserCtxtPtr 13813 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13814 xmlParserCtxtPtr ctxt; 13815 xmlParserInputPtr input; 13816 xmlParserInputBufferPtr buf; 13817 13818 if (buffer == NULL) 13819 return(NULL); 13820 if (size <= 0) 13821 return(NULL); 13822 13823 ctxt = xmlNewParserCtxt(); 13824 if (ctxt == NULL) 13825 return(NULL); 13826 13827 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13828 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13829 if (buf == NULL) { 13830 xmlFreeParserCtxt(ctxt); 13831 return(NULL); 13832 } 13833 13834 input = xmlNewInputStream(ctxt); 13835 if (input == NULL) { 13836 xmlFreeParserInputBuffer(buf); 13837 xmlFreeParserCtxt(ctxt); 13838 return(NULL); 13839 } 13840 13841 input->filename = NULL; 13842 input->buf = buf; 13843 input->base = input->buf->buffer->content; 13844 input->cur = input->buf->buffer->content; 13845 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13846 13847 inputPush(ctxt, input); 13848 return(ctxt); 13849 } 13850 13851 #ifdef LIBXML_SAX1_ENABLED 13852 /** 13853 * xmlSAXParseMemoryWithData: 13854 * @sax: the SAX handler block 13855 * @buffer: an pointer to a char array 13856 * @size: the size of the array 13857 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13858 * documents 13859 * @data: the userdata 13860 * 13861 * parse an XML in-memory block and use the given SAX function block 13862 * to handle the parsing callback. If sax is NULL, fallback to the default 13863 * DOM tree building routines. 13864 * 13865 * User data (void *) is stored within the parser context in the 13866 * context's _private member, so it is available nearly everywhere in libxml 13867 * 13868 * Returns the resulting document tree 13869 */ 13870 13871 xmlDocPtr 13872 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13873 int size, int recovery, void *data) { 13874 xmlDocPtr ret; 13875 xmlParserCtxtPtr ctxt; 13876 13877 xmlInitParser(); 13878 13879 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13880 if (ctxt == NULL) return(NULL); 13881 if (sax != NULL) { 13882 if (ctxt->sax != NULL) 13883 xmlFree(ctxt->sax); 13884 ctxt->sax = sax; 13885 } 13886 xmlDetectSAX2(ctxt); 13887 if (data!=NULL) { 13888 ctxt->_private=data; 13889 } 13890 13891 ctxt->recovery = recovery; 13892 13893 xmlParseDocument(ctxt); 13894 13895 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13896 else { 13897 ret = NULL; 13898 xmlFreeDoc(ctxt->myDoc); 13899 ctxt->myDoc = NULL; 13900 } 13901 if (sax != NULL) 13902 ctxt->sax = NULL; 13903 xmlFreeParserCtxt(ctxt); 13904 13905 return(ret); 13906 } 13907 13908 /** 13909 * xmlSAXParseMemory: 13910 * @sax: the SAX handler block 13911 * @buffer: an pointer to a char array 13912 * @size: the size of the array 13913 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13914 * documents 13915 * 13916 * parse an XML in-memory block and use the given SAX function block 13917 * to handle the parsing callback. If sax is NULL, fallback to the default 13918 * DOM tree building routines. 13919 * 13920 * Returns the resulting document tree 13921 */ 13922 xmlDocPtr 13923 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13924 int size, int recovery) { 13925 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13926 } 13927 13928 /** 13929 * xmlParseMemory: 13930 * @buffer: an pointer to a char array 13931 * @size: the size of the array 13932 * 13933 * parse an XML in-memory block and build a tree. 13934 * 13935 * Returns the resulting document tree 13936 */ 13937 13938 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13939 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13940 } 13941 13942 /** 13943 * xmlRecoverMemory: 13944 * @buffer: an pointer to a char array 13945 * @size: the size of the array 13946 * 13947 * parse an XML in-memory block and build a tree. 13948 * In the case the document is not Well Formed, an attempt to 13949 * build a tree is tried anyway 13950 * 13951 * Returns the resulting document tree or NULL in case of error 13952 */ 13953 13954 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13955 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13956 } 13957 13958 /** 13959 * xmlSAXUserParseMemory: 13960 * @sax: a SAX handler 13961 * @user_data: The user data returned on SAX callbacks 13962 * @buffer: an in-memory XML document input 13963 * @size: the length of the XML document in bytes 13964 * 13965 * A better SAX parsing routine. 13966 * parse an XML in-memory buffer and call the given SAX handler routines. 13967 * 13968 * Returns 0 in case of success or a error number otherwise 13969 */ 13970 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13971 const char *buffer, int size) { 13972 int ret = 0; 13973 xmlParserCtxtPtr ctxt; 13974 13975 xmlInitParser(); 13976 13977 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13978 if (ctxt == NULL) return -1; 13979 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13980 xmlFree(ctxt->sax); 13981 ctxt->sax = sax; 13982 xmlDetectSAX2(ctxt); 13983 13984 if (user_data != NULL) 13985 ctxt->userData = user_data; 13986 13987 xmlParseDocument(ctxt); 13988 13989 if (ctxt->wellFormed) 13990 ret = 0; 13991 else { 13992 if (ctxt->errNo != 0) 13993 ret = ctxt->errNo; 13994 else 13995 ret = -1; 13996 } 13997 if (sax != NULL) 13998 ctxt->sax = NULL; 13999 if (ctxt->myDoc != NULL) { 14000 xmlFreeDoc(ctxt->myDoc); 14001 ctxt->myDoc = NULL; 14002 } 14003 xmlFreeParserCtxt(ctxt); 14004 14005 return ret; 14006 } 14007 #endif /* LIBXML_SAX1_ENABLED */ 14008 14009 /** 14010 * xmlCreateDocParserCtxt: 14011 * @cur: a pointer to an array of xmlChar 14012 * 14013 * Creates a parser context for an XML in-memory document. 14014 * 14015 * Returns the new parser context or NULL 14016 */ 14017 xmlParserCtxtPtr 14018 xmlCreateDocParserCtxt(const xmlChar *cur) { 14019 int len; 14020 14021 if (cur == NULL) 14022 return(NULL); 14023 len = xmlStrlen(cur); 14024 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14025 } 14026 14027 #ifdef LIBXML_SAX1_ENABLED 14028 /** 14029 * xmlSAXParseDoc: 14030 * @sax: the SAX handler block 14031 * @cur: a pointer to an array of xmlChar 14032 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14033 * documents 14034 * 14035 * parse an XML in-memory document and build a tree. 14036 * It use the given SAX function block to handle the parsing callback. 14037 * If sax is NULL, fallback to the default DOM tree building routines. 14038 * 14039 * Returns the resulting document tree 14040 */ 14041 14042 xmlDocPtr 14043 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14044 xmlDocPtr ret; 14045 xmlParserCtxtPtr ctxt; 14046 xmlSAXHandlerPtr oldsax = NULL; 14047 14048 if (cur == NULL) return(NULL); 14049 14050 14051 ctxt = xmlCreateDocParserCtxt(cur); 14052 if (ctxt == NULL) return(NULL); 14053 if (sax != NULL) { 14054 oldsax = ctxt->sax; 14055 ctxt->sax = sax; 14056 ctxt->userData = NULL; 14057 } 14058 xmlDetectSAX2(ctxt); 14059 14060 xmlParseDocument(ctxt); 14061 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14062 else { 14063 ret = NULL; 14064 xmlFreeDoc(ctxt->myDoc); 14065 ctxt->myDoc = NULL; 14066 } 14067 if (sax != NULL) 14068 ctxt->sax = oldsax; 14069 xmlFreeParserCtxt(ctxt); 14070 14071 return(ret); 14072 } 14073 14074 /** 14075 * xmlParseDoc: 14076 * @cur: a pointer to an array of xmlChar 14077 * 14078 * parse an XML in-memory document and build a tree. 14079 * 14080 * Returns the resulting document tree 14081 */ 14082 14083 xmlDocPtr 14084 xmlParseDoc(const xmlChar *cur) { 14085 return(xmlSAXParseDoc(NULL, cur, 0)); 14086 } 14087 #endif /* LIBXML_SAX1_ENABLED */ 14088 14089 #ifdef LIBXML_LEGACY_ENABLED 14090 /************************************************************************ 14091 * * 14092 * Specific function to keep track of entities references * 14093 * and used by the XSLT debugger * 14094 * * 14095 ************************************************************************/ 14096 14097 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14098 14099 /** 14100 * xmlAddEntityReference: 14101 * @ent : A valid entity 14102 * @firstNode : A valid first node for children of entity 14103 * @lastNode : A valid last node of children entity 14104 * 14105 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14106 */ 14107 static void 14108 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14109 xmlNodePtr lastNode) 14110 { 14111 if (xmlEntityRefFunc != NULL) { 14112 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14113 } 14114 } 14115 14116 14117 /** 14118 * xmlSetEntityReferenceFunc: 14119 * @func: A valid function 14120 * 14121 * Set the function to call call back when a xml reference has been made 14122 */ 14123 void 14124 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14125 { 14126 xmlEntityRefFunc = func; 14127 } 14128 #endif /* LIBXML_LEGACY_ENABLED */ 14129 14130 /************************************************************************ 14131 * * 14132 * Miscellaneous * 14133 * * 14134 ************************************************************************/ 14135 14136 #ifdef LIBXML_XPATH_ENABLED 14137 #include <libxml/xpath.h> 14138 #endif 14139 14140 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14141 static int xmlParserInitialized = 0; 14142 14143 /** 14144 * xmlInitParser: 14145 * 14146 * Initialization function for the XML parser. 14147 * This is not reentrant. Call once before processing in case of 14148 * use in multithreaded programs. 14149 */ 14150 14151 void 14152 xmlInitParser(void) { 14153 if (xmlParserInitialized != 0) 14154 return; 14155 14156 #ifdef LIBXML_THREAD_ENABLED 14157 __xmlGlobalInitMutexLock(); 14158 if (xmlParserInitialized == 0) { 14159 #endif 14160 xmlInitThreads(); 14161 xmlInitGlobals(); 14162 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14163 (xmlGenericError == NULL)) 14164 initGenericErrorDefaultFunc(NULL); 14165 xmlInitMemory(); 14166 xmlInitCharEncodingHandlers(); 14167 xmlDefaultSAXHandlerInit(); 14168 xmlRegisterDefaultInputCallbacks(); 14169 #ifdef LIBXML_OUTPUT_ENABLED 14170 xmlRegisterDefaultOutputCallbacks(); 14171 #endif /* LIBXML_OUTPUT_ENABLED */ 14172 #ifdef LIBXML_HTML_ENABLED 14173 htmlInitAutoClose(); 14174 htmlDefaultSAXHandlerInit(); 14175 #endif 14176 #ifdef LIBXML_XPATH_ENABLED 14177 xmlXPathInit(); 14178 #endif 14179 xmlParserInitialized = 1; 14180 #ifdef LIBXML_THREAD_ENABLED 14181 } 14182 __xmlGlobalInitMutexUnlock(); 14183 #endif 14184 } 14185 14186 /** 14187 * xmlCleanupParser: 14188 * 14189 * This function name is somewhat misleading. It does not clean up 14190 * parser state, it cleans up memory allocated by the library itself. 14191 * It is a cleanup function for the XML library. It tries to reclaim all 14192 * related global memory allocated for the library processing. 14193 * It doesn't deallocate any document related memory. One should 14194 * call xmlCleanupParser() only when the process has finished using 14195 * the library and all XML/HTML documents built with it. 14196 * See also xmlInitParser() which has the opposite function of preparing 14197 * the library for operations. 14198 * 14199 * WARNING: if your application is multithreaded or has plugin support 14200 * calling this may crash the application if another thread or 14201 * a plugin is still using libxml2. It's sometimes very hard to 14202 * guess if libxml2 is in use in the application, some libraries 14203 * or plugins may use it without notice. In case of doubt abstain 14204 * from calling this function or do it just before calling exit() 14205 * to avoid leak reports from valgrind ! 14206 */ 14207 14208 void 14209 xmlCleanupParser(void) { 14210 if (!xmlParserInitialized) 14211 return; 14212 14213 xmlCleanupCharEncodingHandlers(); 14214 #ifdef LIBXML_CATALOG_ENABLED 14215 xmlCatalogCleanup(); 14216 #endif 14217 xmlDictCleanup(); 14218 xmlCleanupInputCallbacks(); 14219 #ifdef LIBXML_OUTPUT_ENABLED 14220 xmlCleanupOutputCallbacks(); 14221 #endif 14222 #ifdef LIBXML_SCHEMAS_ENABLED 14223 xmlSchemaCleanupTypes(); 14224 xmlRelaxNGCleanupTypes(); 14225 #endif 14226 xmlCleanupGlobals(); 14227 xmlResetLastError(); 14228 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14229 xmlCleanupMemory(); 14230 xmlParserInitialized = 0; 14231 } 14232 14233 /************************************************************************ 14234 * * 14235 * New set (2.6.0) of simpler and more flexible APIs * 14236 * * 14237 ************************************************************************/ 14238 14239 /** 14240 * DICT_FREE: 14241 * @str: a string 14242 * 14243 * Free a string if it is not owned by the "dict" dictionnary in the 14244 * current scope 14245 */ 14246 #define DICT_FREE(str) \ 14247 if ((str) && ((!dict) || \ 14248 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14249 xmlFree((char *)(str)); 14250 14251 /** 14252 * xmlCtxtReset: 14253 * @ctxt: an XML parser context 14254 * 14255 * Reset a parser context 14256 */ 14257 void 14258 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14259 { 14260 xmlParserInputPtr input; 14261 xmlDictPtr dict; 14262 14263 if (ctxt == NULL) 14264 return; 14265 14266 dict = ctxt->dict; 14267 14268 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14269 xmlFreeInputStream(input); 14270 } 14271 ctxt->inputNr = 0; 14272 ctxt->input = NULL; 14273 14274 ctxt->spaceNr = 0; 14275 if (ctxt->spaceTab != NULL) { 14276 ctxt->spaceTab[0] = -1; 14277 ctxt->space = &ctxt->spaceTab[0]; 14278 } else { 14279 ctxt->space = NULL; 14280 } 14281 14282 14283 ctxt->nodeNr = 0; 14284 ctxt->node = NULL; 14285 14286 ctxt->nameNr = 0; 14287 ctxt->name = NULL; 14288 14289 DICT_FREE(ctxt->version); 14290 ctxt->version = NULL; 14291 DICT_FREE(ctxt->encoding); 14292 ctxt->encoding = NULL; 14293 DICT_FREE(ctxt->directory); 14294 ctxt->directory = NULL; 14295 DICT_FREE(ctxt->extSubURI); 14296 ctxt->extSubURI = NULL; 14297 DICT_FREE(ctxt->extSubSystem); 14298 ctxt->extSubSystem = NULL; 14299 if (ctxt->myDoc != NULL) 14300 xmlFreeDoc(ctxt->myDoc); 14301 ctxt->myDoc = NULL; 14302 14303 ctxt->standalone = -1; 14304 ctxt->hasExternalSubset = 0; 14305 ctxt->hasPErefs = 0; 14306 ctxt->html = 0; 14307 ctxt->external = 0; 14308 ctxt->instate = XML_PARSER_START; 14309 ctxt->token = 0; 14310 14311 ctxt->wellFormed = 1; 14312 ctxt->nsWellFormed = 1; 14313 ctxt->disableSAX = 0; 14314 ctxt->valid = 1; 14315 #if 0 14316 ctxt->vctxt.userData = ctxt; 14317 ctxt->vctxt.error = xmlParserValidityError; 14318 ctxt->vctxt.warning = xmlParserValidityWarning; 14319 #endif 14320 ctxt->record_info = 0; 14321 ctxt->nbChars = 0; 14322 ctxt->checkIndex = 0; 14323 ctxt->inSubset = 0; 14324 ctxt->errNo = XML_ERR_OK; 14325 ctxt->depth = 0; 14326 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14327 ctxt->catalogs = NULL; 14328 ctxt->nbentities = 0; 14329 ctxt->sizeentities = 0; 14330 xmlInitNodeInfoSeq(&ctxt->node_seq); 14331 14332 if (ctxt->attsDefault != NULL) { 14333 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14334 ctxt->attsDefault = NULL; 14335 } 14336 if (ctxt->attsSpecial != NULL) { 14337 xmlHashFree(ctxt->attsSpecial, NULL); 14338 ctxt->attsSpecial = NULL; 14339 } 14340 14341 #ifdef LIBXML_CATALOG_ENABLED 14342 if (ctxt->catalogs != NULL) 14343 xmlCatalogFreeLocal(ctxt->catalogs); 14344 #endif 14345 if (ctxt->lastError.code != XML_ERR_OK) 14346 xmlResetError(&ctxt->lastError); 14347 } 14348 14349 /** 14350 * xmlCtxtResetPush: 14351 * @ctxt: an XML parser context 14352 * @chunk: a pointer to an array of chars 14353 * @size: number of chars in the array 14354 * @filename: an optional file name or URI 14355 * @encoding: the document encoding, or NULL 14356 * 14357 * Reset a push parser context 14358 * 14359 * Returns 0 in case of success and 1 in case of error 14360 */ 14361 int 14362 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14363 int size, const char *filename, const char *encoding) 14364 { 14365 xmlParserInputPtr inputStream; 14366 xmlParserInputBufferPtr buf; 14367 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14368 14369 if (ctxt == NULL) 14370 return(1); 14371 14372 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14373 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14374 14375 buf = xmlAllocParserInputBuffer(enc); 14376 if (buf == NULL) 14377 return(1); 14378 14379 if (ctxt == NULL) { 14380 xmlFreeParserInputBuffer(buf); 14381 return(1); 14382 } 14383 14384 xmlCtxtReset(ctxt); 14385 14386 if (ctxt->pushTab == NULL) { 14387 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14388 sizeof(xmlChar *)); 14389 if (ctxt->pushTab == NULL) { 14390 xmlErrMemory(ctxt, NULL); 14391 xmlFreeParserInputBuffer(buf); 14392 return(1); 14393 } 14394 } 14395 14396 if (filename == NULL) { 14397 ctxt->directory = NULL; 14398 } else { 14399 ctxt->directory = xmlParserGetDirectory(filename); 14400 } 14401 14402 inputStream = xmlNewInputStream(ctxt); 14403 if (inputStream == NULL) { 14404 xmlFreeParserInputBuffer(buf); 14405 return(1); 14406 } 14407 14408 if (filename == NULL) 14409 inputStream->filename = NULL; 14410 else 14411 inputStream->filename = (char *) 14412 xmlCanonicPath((const xmlChar *) filename); 14413 inputStream->buf = buf; 14414 inputStream->base = inputStream->buf->buffer->content; 14415 inputStream->cur = inputStream->buf->buffer->content; 14416 inputStream->end = 14417 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14418 14419 inputPush(ctxt, inputStream); 14420 14421 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14422 (ctxt->input->buf != NULL)) { 14423 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14424 int cur = ctxt->input->cur - ctxt->input->base; 14425 14426 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14427 14428 ctxt->input->base = ctxt->input->buf->buffer->content + base; 14429 ctxt->input->cur = ctxt->input->base + cur; 14430 ctxt->input->end = 14431 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14432 use]; 14433 #ifdef DEBUG_PUSH 14434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14435 #endif 14436 } 14437 14438 if (encoding != NULL) { 14439 xmlCharEncodingHandlerPtr hdlr; 14440 14441 if (ctxt->encoding != NULL) 14442 xmlFree((xmlChar *) ctxt->encoding); 14443 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14444 14445 hdlr = xmlFindCharEncodingHandler(encoding); 14446 if (hdlr != NULL) { 14447 xmlSwitchToEncoding(ctxt, hdlr); 14448 } else { 14449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14450 "Unsupported encoding %s\n", BAD_CAST encoding); 14451 } 14452 } else if (enc != XML_CHAR_ENCODING_NONE) { 14453 xmlSwitchEncoding(ctxt, enc); 14454 } 14455 14456 return(0); 14457 } 14458 14459 14460 /** 14461 * xmlCtxtUseOptionsInternal: 14462 * @ctxt: an XML parser context 14463 * @options: a combination of xmlParserOption 14464 * @encoding: the user provided encoding to use 14465 * 14466 * Applies the options to the parser context 14467 * 14468 * Returns 0 in case of success, the set of unknown or unimplemented options 14469 * in case of error. 14470 */ 14471 static int 14472 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14473 { 14474 if (ctxt == NULL) 14475 return(-1); 14476 if (encoding != NULL) { 14477 if (ctxt->encoding != NULL) 14478 xmlFree((xmlChar *) ctxt->encoding); 14479 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14480 } 14481 if (options & XML_PARSE_RECOVER) { 14482 ctxt->recovery = 1; 14483 options -= XML_PARSE_RECOVER; 14484 ctxt->options |= XML_PARSE_RECOVER; 14485 } else 14486 ctxt->recovery = 0; 14487 if (options & XML_PARSE_DTDLOAD) { 14488 ctxt->loadsubset = XML_DETECT_IDS; 14489 options -= XML_PARSE_DTDLOAD; 14490 ctxt->options |= XML_PARSE_DTDLOAD; 14491 } else 14492 ctxt->loadsubset = 0; 14493 if (options & XML_PARSE_DTDATTR) { 14494 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14495 options -= XML_PARSE_DTDATTR; 14496 ctxt->options |= XML_PARSE_DTDATTR; 14497 } 14498 if (options & XML_PARSE_NOENT) { 14499 ctxt->replaceEntities = 1; 14500 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14501 options -= XML_PARSE_NOENT; 14502 ctxt->options |= XML_PARSE_NOENT; 14503 } else 14504 ctxt->replaceEntities = 0; 14505 if (options & XML_PARSE_PEDANTIC) { 14506 ctxt->pedantic = 1; 14507 options -= XML_PARSE_PEDANTIC; 14508 ctxt->options |= XML_PARSE_PEDANTIC; 14509 } else 14510 ctxt->pedantic = 0; 14511 if (options & XML_PARSE_NOBLANKS) { 14512 ctxt->keepBlanks = 0; 14513 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14514 options -= XML_PARSE_NOBLANKS; 14515 ctxt->options |= XML_PARSE_NOBLANKS; 14516 } else 14517 ctxt->keepBlanks = 1; 14518 if (options & XML_PARSE_DTDVALID) { 14519 ctxt->validate = 1; 14520 if (options & XML_PARSE_NOWARNING) 14521 ctxt->vctxt.warning = NULL; 14522 if (options & XML_PARSE_NOERROR) 14523 ctxt->vctxt.error = NULL; 14524 options -= XML_PARSE_DTDVALID; 14525 ctxt->options |= XML_PARSE_DTDVALID; 14526 } else 14527 ctxt->validate = 0; 14528 if (options & XML_PARSE_NOWARNING) { 14529 ctxt->sax->warning = NULL; 14530 options -= XML_PARSE_NOWARNING; 14531 } 14532 if (options & XML_PARSE_NOERROR) { 14533 ctxt->sax->error = NULL; 14534 ctxt->sax->fatalError = NULL; 14535 options -= XML_PARSE_NOERROR; 14536 } 14537 #ifdef LIBXML_SAX1_ENABLED 14538 if (options & XML_PARSE_SAX1) { 14539 ctxt->sax->startElement = xmlSAX2StartElement; 14540 ctxt->sax->endElement = xmlSAX2EndElement; 14541 ctxt->sax->startElementNs = NULL; 14542 ctxt->sax->endElementNs = NULL; 14543 ctxt->sax->initialized = 1; 14544 options -= XML_PARSE_SAX1; 14545 ctxt->options |= XML_PARSE_SAX1; 14546 } 14547 #endif /* LIBXML_SAX1_ENABLED */ 14548 if (options & XML_PARSE_NODICT) { 14549 ctxt->dictNames = 0; 14550 options -= XML_PARSE_NODICT; 14551 ctxt->options |= XML_PARSE_NODICT; 14552 } else { 14553 ctxt->dictNames = 1; 14554 } 14555 if (options & XML_PARSE_NOCDATA) { 14556 ctxt->sax->cdataBlock = NULL; 14557 options -= XML_PARSE_NOCDATA; 14558 ctxt->options |= XML_PARSE_NOCDATA; 14559 } 14560 if (options & XML_PARSE_NSCLEAN) { 14561 ctxt->options |= XML_PARSE_NSCLEAN; 14562 options -= XML_PARSE_NSCLEAN; 14563 } 14564 if (options & XML_PARSE_NONET) { 14565 ctxt->options |= XML_PARSE_NONET; 14566 options -= XML_PARSE_NONET; 14567 } 14568 if (options & XML_PARSE_COMPACT) { 14569 ctxt->options |= XML_PARSE_COMPACT; 14570 options -= XML_PARSE_COMPACT; 14571 } 14572 if (options & XML_PARSE_OLD10) { 14573 ctxt->options |= XML_PARSE_OLD10; 14574 options -= XML_PARSE_OLD10; 14575 } 14576 if (options & XML_PARSE_NOBASEFIX) { 14577 ctxt->options |= XML_PARSE_NOBASEFIX; 14578 options -= XML_PARSE_NOBASEFIX; 14579 } 14580 if (options & XML_PARSE_HUGE) { 14581 ctxt->options |= XML_PARSE_HUGE; 14582 options -= XML_PARSE_HUGE; 14583 } 14584 if (options & XML_PARSE_OLDSAX) { 14585 ctxt->options |= XML_PARSE_OLDSAX; 14586 options -= XML_PARSE_OLDSAX; 14587 } 14588 if (options & XML_PARSE_IGNORE_ENC) { 14589 ctxt->options |= XML_PARSE_IGNORE_ENC; 14590 options -= XML_PARSE_IGNORE_ENC; 14591 } 14592 ctxt->linenumbers = 1; 14593 return (options); 14594 } 14595 14596 /** 14597 * xmlCtxtUseOptions: 14598 * @ctxt: an XML parser context 14599 * @options: a combination of xmlParserOption 14600 * 14601 * Applies the options to the parser context 14602 * 14603 * Returns 0 in case of success, the set of unknown or unimplemented options 14604 * in case of error. 14605 */ 14606 int 14607 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14608 { 14609 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14610 } 14611 14612 /** 14613 * xmlDoRead: 14614 * @ctxt: an XML parser context 14615 * @URL: the base URL to use for the document 14616 * @encoding: the document encoding, or NULL 14617 * @options: a combination of xmlParserOption 14618 * @reuse: keep the context for reuse 14619 * 14620 * Common front-end for the xmlRead functions 14621 * 14622 * Returns the resulting document tree or NULL 14623 */ 14624 static xmlDocPtr 14625 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14626 int options, int reuse) 14627 { 14628 xmlDocPtr ret; 14629 14630 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14631 if (encoding != NULL) { 14632 xmlCharEncodingHandlerPtr hdlr; 14633 14634 hdlr = xmlFindCharEncodingHandler(encoding); 14635 if (hdlr != NULL) 14636 xmlSwitchToEncoding(ctxt, hdlr); 14637 } 14638 if ((URL != NULL) && (ctxt->input != NULL) && 14639 (ctxt->input->filename == NULL)) 14640 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14641 xmlParseDocument(ctxt); 14642 if ((ctxt->wellFormed) || ctxt->recovery) 14643 ret = ctxt->myDoc; 14644 else { 14645 ret = NULL; 14646 if (ctxt->myDoc != NULL) { 14647 xmlFreeDoc(ctxt->myDoc); 14648 } 14649 } 14650 ctxt->myDoc = NULL; 14651 if (!reuse) { 14652 xmlFreeParserCtxt(ctxt); 14653 } 14654 14655 return (ret); 14656 } 14657 14658 /** 14659 * xmlReadDoc: 14660 * @cur: a pointer to a zero terminated string 14661 * @URL: the base URL to use for the document 14662 * @encoding: the document encoding, or NULL 14663 * @options: a combination of xmlParserOption 14664 * 14665 * parse an XML in-memory document and build a tree. 14666 * 14667 * Returns the resulting document tree 14668 */ 14669 xmlDocPtr 14670 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14671 { 14672 xmlParserCtxtPtr ctxt; 14673 14674 if (cur == NULL) 14675 return (NULL); 14676 14677 ctxt = xmlCreateDocParserCtxt(cur); 14678 if (ctxt == NULL) 14679 return (NULL); 14680 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14681 } 14682 14683 /** 14684 * xmlReadFile: 14685 * @filename: a file or URL 14686 * @encoding: the document encoding, or NULL 14687 * @options: a combination of xmlParserOption 14688 * 14689 * parse an XML file from the filesystem or the network. 14690 * 14691 * Returns the resulting document tree 14692 */ 14693 xmlDocPtr 14694 xmlReadFile(const char *filename, const char *encoding, int options) 14695 { 14696 xmlParserCtxtPtr ctxt; 14697 14698 ctxt = xmlCreateURLParserCtxt(filename, options); 14699 if (ctxt == NULL) 14700 return (NULL); 14701 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14702 } 14703 14704 /** 14705 * xmlReadMemory: 14706 * @buffer: a pointer to a char array 14707 * @size: the size of the array 14708 * @URL: the base URL to use for the document 14709 * @encoding: the document encoding, or NULL 14710 * @options: a combination of xmlParserOption 14711 * 14712 * parse an XML in-memory document and build a tree. 14713 * 14714 * Returns the resulting document tree 14715 */ 14716 xmlDocPtr 14717 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14718 { 14719 xmlParserCtxtPtr ctxt; 14720 14721 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14722 if (ctxt == NULL) 14723 return (NULL); 14724 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14725 } 14726 14727 /** 14728 * xmlReadFd: 14729 * @fd: an open file descriptor 14730 * @URL: the base URL to use for the document 14731 * @encoding: the document encoding, or NULL 14732 * @options: a combination of xmlParserOption 14733 * 14734 * parse an XML from a file descriptor and build a tree. 14735 * NOTE that the file descriptor will not be closed when the 14736 * reader is closed or reset. 14737 * 14738 * Returns the resulting document tree 14739 */ 14740 xmlDocPtr 14741 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14742 { 14743 xmlParserCtxtPtr ctxt; 14744 xmlParserInputBufferPtr input; 14745 xmlParserInputPtr stream; 14746 14747 if (fd < 0) 14748 return (NULL); 14749 14750 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14751 if (input == NULL) 14752 return (NULL); 14753 input->closecallback = NULL; 14754 ctxt = xmlNewParserCtxt(); 14755 if (ctxt == NULL) { 14756 xmlFreeParserInputBuffer(input); 14757 return (NULL); 14758 } 14759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14760 if (stream == NULL) { 14761 xmlFreeParserInputBuffer(input); 14762 xmlFreeParserCtxt(ctxt); 14763 return (NULL); 14764 } 14765 inputPush(ctxt, stream); 14766 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14767 } 14768 14769 /** 14770 * xmlReadIO: 14771 * @ioread: an I/O read function 14772 * @ioclose: an I/O close function 14773 * @ioctx: an I/O handler 14774 * @URL: the base URL to use for the document 14775 * @encoding: the document encoding, or NULL 14776 * @options: a combination of xmlParserOption 14777 * 14778 * parse an XML document from I/O functions and source and build a tree. 14779 * 14780 * Returns the resulting document tree 14781 */ 14782 xmlDocPtr 14783 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14784 void *ioctx, const char *URL, const char *encoding, int options) 14785 { 14786 xmlParserCtxtPtr ctxt; 14787 xmlParserInputBufferPtr input; 14788 xmlParserInputPtr stream; 14789 14790 if (ioread == NULL) 14791 return (NULL); 14792 14793 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14794 XML_CHAR_ENCODING_NONE); 14795 if (input == NULL) 14796 return (NULL); 14797 ctxt = xmlNewParserCtxt(); 14798 if (ctxt == NULL) { 14799 xmlFreeParserInputBuffer(input); 14800 return (NULL); 14801 } 14802 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14803 if (stream == NULL) { 14804 xmlFreeParserInputBuffer(input); 14805 xmlFreeParserCtxt(ctxt); 14806 return (NULL); 14807 } 14808 inputPush(ctxt, stream); 14809 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14810 } 14811 14812 /** 14813 * xmlCtxtReadDoc: 14814 * @ctxt: an XML parser context 14815 * @cur: a pointer to a zero terminated string 14816 * @URL: the base URL to use for the document 14817 * @encoding: the document encoding, or NULL 14818 * @options: a combination of xmlParserOption 14819 * 14820 * parse an XML in-memory document and build a tree. 14821 * This reuses the existing @ctxt parser context 14822 * 14823 * Returns the resulting document tree 14824 */ 14825 xmlDocPtr 14826 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14827 const char *URL, const char *encoding, int options) 14828 { 14829 xmlParserInputPtr stream; 14830 14831 if (cur == NULL) 14832 return (NULL); 14833 if (ctxt == NULL) 14834 return (NULL); 14835 14836 xmlCtxtReset(ctxt); 14837 14838 stream = xmlNewStringInputStream(ctxt, cur); 14839 if (stream == NULL) { 14840 return (NULL); 14841 } 14842 inputPush(ctxt, stream); 14843 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14844 } 14845 14846 /** 14847 * xmlCtxtReadFile: 14848 * @ctxt: an XML parser context 14849 * @filename: a file or URL 14850 * @encoding: the document encoding, or NULL 14851 * @options: a combination of xmlParserOption 14852 * 14853 * parse an XML file from the filesystem or the network. 14854 * This reuses the existing @ctxt parser context 14855 * 14856 * Returns the resulting document tree 14857 */ 14858 xmlDocPtr 14859 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14860 const char *encoding, int options) 14861 { 14862 xmlParserInputPtr stream; 14863 14864 if (filename == NULL) 14865 return (NULL); 14866 if (ctxt == NULL) 14867 return (NULL); 14868 14869 xmlCtxtReset(ctxt); 14870 14871 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14872 if (stream == NULL) { 14873 return (NULL); 14874 } 14875 inputPush(ctxt, stream); 14876 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14877 } 14878 14879 /** 14880 * xmlCtxtReadMemory: 14881 * @ctxt: an XML parser context 14882 * @buffer: a pointer to a char array 14883 * @size: the size of the array 14884 * @URL: the base URL to use for the document 14885 * @encoding: the document encoding, or NULL 14886 * @options: a combination of xmlParserOption 14887 * 14888 * parse an XML in-memory document and build a tree. 14889 * This reuses the existing @ctxt parser context 14890 * 14891 * Returns the resulting document tree 14892 */ 14893 xmlDocPtr 14894 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14895 const char *URL, const char *encoding, int options) 14896 { 14897 xmlParserInputBufferPtr input; 14898 xmlParserInputPtr stream; 14899 14900 if (ctxt == NULL) 14901 return (NULL); 14902 if (buffer == NULL) 14903 return (NULL); 14904 14905 xmlCtxtReset(ctxt); 14906 14907 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14908 if (input == NULL) { 14909 return(NULL); 14910 } 14911 14912 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14913 if (stream == NULL) { 14914 xmlFreeParserInputBuffer(input); 14915 return(NULL); 14916 } 14917 14918 inputPush(ctxt, stream); 14919 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14920 } 14921 14922 /** 14923 * xmlCtxtReadFd: 14924 * @ctxt: an XML parser context 14925 * @fd: an open file descriptor 14926 * @URL: the base URL to use for the document 14927 * @encoding: the document encoding, or NULL 14928 * @options: a combination of xmlParserOption 14929 * 14930 * parse an XML from a file descriptor and build a tree. 14931 * This reuses the existing @ctxt parser context 14932 * NOTE that the file descriptor will not be closed when the 14933 * reader is closed or reset. 14934 * 14935 * Returns the resulting document tree 14936 */ 14937 xmlDocPtr 14938 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 14939 const char *URL, const char *encoding, int options) 14940 { 14941 xmlParserInputBufferPtr input; 14942 xmlParserInputPtr stream; 14943 14944 if (fd < 0) 14945 return (NULL); 14946 if (ctxt == NULL) 14947 return (NULL); 14948 14949 xmlCtxtReset(ctxt); 14950 14951 14952 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14953 if (input == NULL) 14954 return (NULL); 14955 input->closecallback = NULL; 14956 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14957 if (stream == NULL) { 14958 xmlFreeParserInputBuffer(input); 14959 return (NULL); 14960 } 14961 inputPush(ctxt, stream); 14962 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14963 } 14964 14965 /** 14966 * xmlCtxtReadIO: 14967 * @ctxt: an XML parser context 14968 * @ioread: an I/O read function 14969 * @ioclose: an I/O close function 14970 * @ioctx: an I/O handler 14971 * @URL: the base URL to use for the document 14972 * @encoding: the document encoding, or NULL 14973 * @options: a combination of xmlParserOption 14974 * 14975 * parse an XML document from I/O functions and source and build a tree. 14976 * This reuses the existing @ctxt parser context 14977 * 14978 * Returns the resulting document tree 14979 */ 14980 xmlDocPtr 14981 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 14982 xmlInputCloseCallback ioclose, void *ioctx, 14983 const char *URL, 14984 const char *encoding, int options) 14985 { 14986 xmlParserInputBufferPtr input; 14987 xmlParserInputPtr stream; 14988 14989 if (ioread == NULL) 14990 return (NULL); 14991 if (ctxt == NULL) 14992 return (NULL); 14993 14994 xmlCtxtReset(ctxt); 14995 14996 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14997 XML_CHAR_ENCODING_NONE); 14998 if (input == NULL) 14999 return (NULL); 15000 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15001 if (stream == NULL) { 15002 xmlFreeParserInputBuffer(input); 15003 return (NULL); 15004 } 15005 inputPush(ctxt, stream); 15006 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15007 } 15008 15009 #define bottom_parser 15010 #include "elfgcchack.h" 15011