1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <limits.h> 44 #include <string.h> 45 #include <stdarg.h> 46 #include <libxml/xmlmemory.h> 47 #include <libxml/threads.h> 48 #include <libxml/globals.h> 49 #include <libxml/tree.h> 50 #include <libxml/parser.h> 51 #include <libxml/parserInternals.h> 52 #include <libxml/valid.h> 53 #include <libxml/entities.h> 54 #include <libxml/xmlerror.h> 55 #include <libxml/encoding.h> 56 #include <libxml/xmlIO.h> 57 #include <libxml/uri.h> 58 #ifdef LIBXML_CATALOG_ENABLED 59 #include <libxml/catalog.h> 60 #endif 61 #ifdef LIBXML_SCHEMAS_ENABLED 62 #include <libxml/xmlschemastypes.h> 63 #include <libxml/relaxng.h> 64 #endif 65 #ifdef HAVE_CTYPE_H 66 #include <ctype.h> 67 #endif 68 #ifdef HAVE_STDLIB_H 69 #include <stdlib.h> 70 #endif 71 #ifdef HAVE_SYS_STAT_H 72 #include <sys/stat.h> 73 #endif 74 #ifdef HAVE_FCNTL_H 75 #include <fcntl.h> 76 #endif 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 #ifdef HAVE_ZLIB_H 81 #include <zlib.h> 82 #endif 83 #ifdef HAVE_LZMA_H 84 #include <lzma.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 /************************************************************************ 98 * * 99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 100 * * 101 ************************************************************************/ 102 103 #define XML_PARSER_BIG_ENTITY 1000 104 #define XML_PARSER_LOT_ENTITY 5000 105 106 /* 107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 108 * replacement over the size in byte of the input indicates that you have 109 * and eponential behaviour. A value of 10 correspond to at least 3 entity 110 * replacement per byte of input. 111 */ 112 #define XML_PARSER_NON_LINEAR 10 113 114 /* 115 * xmlParserEntityCheck 116 * 117 * Function to check non-linear entity expansion behaviour 118 * This is here to detect and stop exponential linear entity expansion 119 * This is not a limitation of the parser but a safety 120 * boundary feature. It can be disabled with the XML_PARSE_HUGE 121 * parser option. 122 */ 123 static int 124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 125 xmlEntityPtr ent, size_t replacement) 126 { 127 size_t consumed = 0; 128 129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 130 return (0); 131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 132 return (1); 133 if (replacement != 0) { 134 if (replacement < XML_MAX_TEXT_LENGTH) 135 return(0); 136 137 /* 138 * If the volume of entity copy reaches 10 times the 139 * amount of parsed data and over the large text threshold 140 * then that's very likely to be an abuse. 141 */ 142 if (ctxt->input != NULL) { 143 consumed = ctxt->input->consumed + 144 (ctxt->input->cur - ctxt->input->base); 145 } 146 consumed += ctxt->sizeentities; 147 148 if (replacement < XML_PARSER_NON_LINEAR * consumed) 149 return(0); 150 } else if (size != 0) { 151 /* 152 * Do the check based on the replacement size of the entity 153 */ 154 if (size < XML_PARSER_BIG_ENTITY) 155 return(0); 156 157 /* 158 * A limit on the amount of text data reasonably used 159 */ 160 if (ctxt->input != NULL) { 161 consumed = ctxt->input->consumed + 162 (ctxt->input->cur - ctxt->input->base); 163 } 164 consumed += ctxt->sizeentities; 165 166 if ((size < XML_PARSER_NON_LINEAR * consumed) && 167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 168 return (0); 169 } else if (ent != NULL) { 170 /* 171 * use the number of parsed entities in the replacement 172 */ 173 size = ent->checked / 2; 174 175 /* 176 * The amount of data parsed counting entities size only once 177 */ 178 if (ctxt->input != NULL) { 179 consumed = ctxt->input->consumed + 180 (ctxt->input->cur - ctxt->input->base); 181 } 182 consumed += ctxt->sizeentities; 183 184 /* 185 * Check the density of entities for the amount of data 186 * knowing an entity reference will take at least 3 bytes 187 */ 188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 189 return (0); 190 } else { 191 /* 192 * strange we got no data for checking just return 193 */ 194 return (0); 195 } 196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 197 return (1); 198 } 199 200 /** 201 * xmlParserMaxDepth: 202 * 203 * arbitrary depth limit for the XML documents that we allow to 204 * process. This is not a limitation of the parser but a safety 205 * boundary feature. It can be disabled with the XML_PARSE_HUGE 206 * parser option. 207 */ 208 unsigned int xmlParserMaxDepth = 256; 209 210 211 212 #define SAX2 1 213 #define XML_PARSER_BIG_BUFFER_SIZE 300 214 #define XML_PARSER_BUFFER_SIZE 100 215 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 216 217 /** 218 * XML_PARSER_CHUNK_SIZE 219 * 220 * When calling GROW that's the minimal amount of data 221 * the parser expected to have received. It is not a hard 222 * limit but an optimization when reading strings like Names 223 * It is not strictly needed as long as inputs available characters 224 * are followed by 0, which should be provided by the I/O level 225 */ 226 #define XML_PARSER_CHUNK_SIZE 100 227 228 /* 229 * List of XML prefixed PI allowed by W3C specs 230 */ 231 232 static const char *xmlW3CPIs[] = { 233 "xml-stylesheet", 234 "xml-model", 235 NULL 236 }; 237 238 239 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 240 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 241 const xmlChar **str); 242 243 static xmlParserErrors 244 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 245 xmlSAXHandlerPtr sax, 246 void *user_data, int depth, const xmlChar *URL, 247 const xmlChar *ID, xmlNodePtr *list); 248 249 static int 250 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 251 const char *encoding); 252 #ifdef LIBXML_LEGACY_ENABLED 253 static void 254 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 255 xmlNodePtr lastNode); 256 #endif /* LIBXML_LEGACY_ENABLED */ 257 258 static xmlParserErrors 259 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 260 const xmlChar *string, void *user_data, xmlNodePtr *lst); 261 262 static int 263 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 264 265 /************************************************************************ 266 * * 267 * Some factorized error routines * 268 * * 269 ************************************************************************/ 270 271 /** 272 * xmlErrAttributeDup: 273 * @ctxt: an XML parser context 274 * @prefix: the attribute prefix 275 * @localname: the attribute localname 276 * 277 * Handle a redefinition of attribute error 278 */ 279 static void 280 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 281 const xmlChar * localname) 282 { 283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 284 (ctxt->instate == XML_PARSER_EOF)) 285 return; 286 if (ctxt != NULL) 287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 288 289 if (prefix == NULL) 290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 292 (const char *) localname, NULL, NULL, 0, 0, 293 "Attribute %s redefined\n", localname); 294 else 295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 297 (const char *) prefix, (const char *) localname, 298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 299 localname); 300 if (ctxt != NULL) { 301 ctxt->wellFormed = 0; 302 if (ctxt->recovery == 0) 303 ctxt->disableSAX = 1; 304 } 305 } 306 307 /** 308 * xmlFatalErr: 309 * @ctxt: an XML parser context 310 * @error: the error number 311 * @extra: extra information string 312 * 313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 314 */ 315 static void 316 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 317 { 318 const char *errmsg; 319 char errstr[129] = ""; 320 321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 322 (ctxt->instate == XML_PARSER_EOF)) 323 return; 324 switch (error) { 325 case XML_ERR_INVALID_HEX_CHARREF: 326 errmsg = "CharRef: invalid hexadecimal value"; 327 break; 328 case XML_ERR_INVALID_DEC_CHARREF: 329 errmsg = "CharRef: invalid decimal value"; 330 break; 331 case XML_ERR_INVALID_CHARREF: 332 errmsg = "CharRef: invalid value"; 333 break; 334 case XML_ERR_INTERNAL_ERROR: 335 errmsg = "internal error"; 336 break; 337 case XML_ERR_PEREF_AT_EOF: 338 errmsg = "PEReference at end of document"; 339 break; 340 case XML_ERR_PEREF_IN_PROLOG: 341 errmsg = "PEReference in prolog"; 342 break; 343 case XML_ERR_PEREF_IN_EPILOG: 344 errmsg = "PEReference in epilog"; 345 break; 346 case XML_ERR_PEREF_NO_NAME: 347 errmsg = "PEReference: no name"; 348 break; 349 case XML_ERR_PEREF_SEMICOL_MISSING: 350 errmsg = "PEReference: expecting ';'"; 351 break; 352 case XML_ERR_ENTITY_LOOP: 353 errmsg = "Detected an entity reference loop"; 354 break; 355 case XML_ERR_ENTITY_NOT_STARTED: 356 errmsg = "EntityValue: \" or ' expected"; 357 break; 358 case XML_ERR_ENTITY_PE_INTERNAL: 359 errmsg = "PEReferences forbidden in internal subset"; 360 break; 361 case XML_ERR_ENTITY_NOT_FINISHED: 362 errmsg = "EntityValue: \" or ' expected"; 363 break; 364 case XML_ERR_ATTRIBUTE_NOT_STARTED: 365 errmsg = "AttValue: \" or ' expected"; 366 break; 367 case XML_ERR_LT_IN_ATTRIBUTE: 368 errmsg = "Unescaped '<' not allowed in attributes values"; 369 break; 370 case XML_ERR_LITERAL_NOT_STARTED: 371 errmsg = "SystemLiteral \" or ' expected"; 372 break; 373 case XML_ERR_LITERAL_NOT_FINISHED: 374 errmsg = "Unfinished System or Public ID \" or ' expected"; 375 break; 376 case XML_ERR_MISPLACED_CDATA_END: 377 errmsg = "Sequence ']]>' not allowed in content"; 378 break; 379 case XML_ERR_URI_REQUIRED: 380 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 381 break; 382 case XML_ERR_PUBID_REQUIRED: 383 errmsg = "PUBLIC, the Public Identifier is missing"; 384 break; 385 case XML_ERR_HYPHEN_IN_COMMENT: 386 errmsg = "Comment must not contain '--' (double-hyphen)"; 387 break; 388 case XML_ERR_PI_NOT_STARTED: 389 errmsg = "xmlParsePI : no target name"; 390 break; 391 case XML_ERR_RESERVED_XML_NAME: 392 errmsg = "Invalid PI name"; 393 break; 394 case XML_ERR_NOTATION_NOT_STARTED: 395 errmsg = "NOTATION: Name expected here"; 396 break; 397 case XML_ERR_NOTATION_NOT_FINISHED: 398 errmsg = "'>' required to close NOTATION declaration"; 399 break; 400 case XML_ERR_VALUE_REQUIRED: 401 errmsg = "Entity value required"; 402 break; 403 case XML_ERR_URI_FRAGMENT: 404 errmsg = "Fragment not allowed"; 405 break; 406 case XML_ERR_ATTLIST_NOT_STARTED: 407 errmsg = "'(' required to start ATTLIST enumeration"; 408 break; 409 case XML_ERR_NMTOKEN_REQUIRED: 410 errmsg = "NmToken expected in ATTLIST enumeration"; 411 break; 412 case XML_ERR_ATTLIST_NOT_FINISHED: 413 errmsg = "')' required to finish ATTLIST enumeration"; 414 break; 415 case XML_ERR_MIXED_NOT_STARTED: 416 errmsg = "MixedContentDecl : '|' or ')*' expected"; 417 break; 418 case XML_ERR_PCDATA_REQUIRED: 419 errmsg = "MixedContentDecl : '#PCDATA' expected"; 420 break; 421 case XML_ERR_ELEMCONTENT_NOT_STARTED: 422 errmsg = "ContentDecl : Name or '(' expected"; 423 break; 424 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 425 errmsg = "ContentDecl : ',' '|' or ')' expected"; 426 break; 427 case XML_ERR_PEREF_IN_INT_SUBSET: 428 errmsg = 429 "PEReference: forbidden within markup decl in internal subset"; 430 break; 431 case XML_ERR_GT_REQUIRED: 432 errmsg = "expected '>'"; 433 break; 434 case XML_ERR_CONDSEC_INVALID: 435 errmsg = "XML conditional section '[' expected"; 436 break; 437 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 438 errmsg = "Content error in the external subset"; 439 break; 440 case XML_ERR_CONDSEC_INVALID_KEYWORD: 441 errmsg = 442 "conditional section INCLUDE or IGNORE keyword expected"; 443 break; 444 case XML_ERR_CONDSEC_NOT_FINISHED: 445 errmsg = "XML conditional section not closed"; 446 break; 447 case XML_ERR_XMLDECL_NOT_STARTED: 448 errmsg = "Text declaration '<?xml' required"; 449 break; 450 case XML_ERR_XMLDECL_NOT_FINISHED: 451 errmsg = "parsing XML declaration: '?>' expected"; 452 break; 453 case XML_ERR_EXT_ENTITY_STANDALONE: 454 errmsg = "external parsed entities cannot be standalone"; 455 break; 456 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 457 errmsg = "EntityRef: expecting ';'"; 458 break; 459 case XML_ERR_DOCTYPE_NOT_FINISHED: 460 errmsg = "DOCTYPE improperly terminated"; 461 break; 462 case XML_ERR_LTSLASH_REQUIRED: 463 errmsg = "EndTag: '</' not found"; 464 break; 465 case XML_ERR_EQUAL_REQUIRED: 466 errmsg = "expected '='"; 467 break; 468 case XML_ERR_STRING_NOT_CLOSED: 469 errmsg = "String not closed expecting \" or '"; 470 break; 471 case XML_ERR_STRING_NOT_STARTED: 472 errmsg = "String not started expecting ' or \""; 473 break; 474 case XML_ERR_ENCODING_NAME: 475 errmsg = "Invalid XML encoding name"; 476 break; 477 case XML_ERR_STANDALONE_VALUE: 478 errmsg = "standalone accepts only 'yes' or 'no'"; 479 break; 480 case XML_ERR_DOCUMENT_EMPTY: 481 errmsg = "Document is empty"; 482 break; 483 case XML_ERR_DOCUMENT_END: 484 errmsg = "Extra content at the end of the document"; 485 break; 486 case XML_ERR_NOT_WELL_BALANCED: 487 errmsg = "chunk is not well balanced"; 488 break; 489 case XML_ERR_EXTRA_CONTENT: 490 errmsg = "extra content at the end of well balanced chunk"; 491 break; 492 case XML_ERR_VERSION_MISSING: 493 errmsg = "Malformed declaration expecting version"; 494 break; 495 case XML_ERR_NAME_TOO_LONG: 496 errmsg = "Name too long use XML_PARSE_HUGE option"; 497 break; 498 #if 0 499 case: 500 errmsg = ""; 501 break; 502 #endif 503 default: 504 errmsg = "Unregistered error message"; 505 } 506 if (info == NULL) 507 snprintf(errstr, 128, "%s\n", errmsg); 508 else 509 snprintf(errstr, 128, "%s: %%s\n", errmsg); 510 if (ctxt != NULL) 511 ctxt->errNo = error; 512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 514 info); 515 if (ctxt != NULL) { 516 ctxt->wellFormed = 0; 517 if (ctxt->recovery == 0) 518 ctxt->disableSAX = 1; 519 } 520 } 521 522 /** 523 * xmlFatalErrMsg: 524 * @ctxt: an XML parser context 525 * @error: the error number 526 * @msg: the error message 527 * 528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 529 */ 530 static void 531 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 532 const char *msg) 533 { 534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 535 (ctxt->instate == XML_PARSER_EOF)) 536 return; 537 if (ctxt != NULL) 538 ctxt->errNo = error; 539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 541 if (ctxt != NULL) { 542 ctxt->wellFormed = 0; 543 if (ctxt->recovery == 0) 544 ctxt->disableSAX = 1; 545 } 546 } 547 548 /** 549 * xmlWarningMsg: 550 * @ctxt: an XML parser context 551 * @error: the error number 552 * @msg: the error message 553 * @str1: extra data 554 * @str2: extra data 555 * 556 * Handle a warning. 557 */ 558 static void 559 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 560 const char *msg, const xmlChar *str1, const xmlChar *str2) 561 { 562 xmlStructuredErrorFunc schannel = NULL; 563 564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 565 (ctxt->instate == XML_PARSER_EOF)) 566 return; 567 if ((ctxt != NULL) && (ctxt->sax != NULL) && 568 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569 schannel = ctxt->sax->serror; 570 if (ctxt != NULL) { 571 __xmlRaiseError(schannel, 572 (ctxt->sax) ? ctxt->sax->warning : NULL, 573 ctxt->userData, 574 ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_WARNING, NULL, 0, 576 (const char *) str1, (const char *) str2, NULL, 0, 0, 577 msg, (const char *) str1, (const char *) str2); 578 } else { 579 __xmlRaiseError(schannel, NULL, NULL, 580 ctxt, NULL, XML_FROM_PARSER, error, 581 XML_ERR_WARNING, NULL, 0, 582 (const char *) str1, (const char *) str2, NULL, 0, 0, 583 msg, (const char *) str1, (const char *) str2); 584 } 585 } 586 587 /** 588 * xmlValidityError: 589 * @ctxt: an XML parser context 590 * @error: the error number 591 * @msg: the error message 592 * @str1: extra data 593 * 594 * Handle a validity error. 595 */ 596 static void 597 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 598 const char *msg, const xmlChar *str1, const xmlChar *str2) 599 { 600 xmlStructuredErrorFunc schannel = NULL; 601 602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 603 (ctxt->instate == XML_PARSER_EOF)) 604 return; 605 if (ctxt != NULL) { 606 ctxt->errNo = error; 607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 608 schannel = ctxt->sax->serror; 609 } 610 if (ctxt != NULL) { 611 __xmlRaiseError(schannel, 612 ctxt->vctxt.error, ctxt->vctxt.userData, 613 ctxt, NULL, XML_FROM_DTD, error, 614 XML_ERR_ERROR, NULL, 0, (const char *) str1, 615 (const char *) str2, NULL, 0, 0, 616 msg, (const char *) str1, (const char *) str2); 617 ctxt->valid = 0; 618 } else { 619 __xmlRaiseError(schannel, NULL, NULL, 620 ctxt, NULL, XML_FROM_DTD, error, 621 XML_ERR_ERROR, NULL, 0, (const char *) str1, 622 (const char *) str2, NULL, 0, 0, 623 msg, (const char *) str1, (const char *) str2); 624 } 625 } 626 627 /** 628 * xmlFatalErrMsgInt: 629 * @ctxt: an XML parser context 630 * @error: the error number 631 * @msg: the error message 632 * @val: an integer value 633 * 634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 635 */ 636 static void 637 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 638 const char *msg, int val) 639 { 640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 641 (ctxt->instate == XML_PARSER_EOF)) 642 return; 643 if (ctxt != NULL) 644 ctxt->errNo = error; 645 __xmlRaiseError(NULL, NULL, NULL, 646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 648 if (ctxt != NULL) { 649 ctxt->wellFormed = 0; 650 if (ctxt->recovery == 0) 651 ctxt->disableSAX = 1; 652 } 653 } 654 655 /** 656 * xmlFatalErrMsgStrIntStr: 657 * @ctxt: an XML parser context 658 * @error: the error number 659 * @msg: the error message 660 * @str1: an string info 661 * @val: an integer value 662 * @str2: an string info 663 * 664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 665 */ 666 static void 667 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 668 const char *msg, const xmlChar *str1, int val, 669 const xmlChar *str2) 670 { 671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 672 (ctxt->instate == XML_PARSER_EOF)) 673 return; 674 if (ctxt != NULL) 675 ctxt->errNo = error; 676 __xmlRaiseError(NULL, NULL, NULL, 677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 678 NULL, 0, (const char *) str1, (const char *) str2, 679 NULL, val, 0, msg, str1, val, str2); 680 if (ctxt != NULL) { 681 ctxt->wellFormed = 0; 682 if (ctxt->recovery == 0) 683 ctxt->disableSAX = 1; 684 } 685 } 686 687 /** 688 * xmlFatalErrMsgStr: 689 * @ctxt: an XML parser context 690 * @error: the error number 691 * @msg: the error message 692 * @val: a string value 693 * 694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 695 */ 696 static void 697 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 698 const char *msg, const xmlChar * val) 699 { 700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 701 (ctxt->instate == XML_PARSER_EOF)) 702 return; 703 if (ctxt != NULL) 704 ctxt->errNo = error; 705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 706 XML_FROM_PARSER, error, XML_ERR_FATAL, 707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 708 val); 709 if (ctxt != NULL) { 710 ctxt->wellFormed = 0; 711 if (ctxt->recovery == 0) 712 ctxt->disableSAX = 1; 713 } 714 } 715 716 /** 717 * xmlErrMsgStr: 718 * @ctxt: an XML parser context 719 * @error: the error number 720 * @msg: the error message 721 * @val: a string value 722 * 723 * Handle a non fatal parser error 724 */ 725 static void 726 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 727 const char *msg, const xmlChar * val) 728 { 729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 730 (ctxt->instate == XML_PARSER_EOF)) 731 return; 732 if (ctxt != NULL) 733 ctxt->errNo = error; 734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 735 XML_FROM_PARSER, error, XML_ERR_ERROR, 736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 737 val); 738 } 739 740 /** 741 * xmlNsErr: 742 * @ctxt: an XML parser context 743 * @error: the error number 744 * @msg: the message 745 * @info1: extra information string 746 * @info2: extra information string 747 * 748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 749 */ 750 static void 751 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 752 const char *msg, 753 const xmlChar * info1, const xmlChar * info2, 754 const xmlChar * info3) 755 { 756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 757 (ctxt->instate == XML_PARSER_EOF)) 758 return; 759 if (ctxt != NULL) 760 ctxt->errNo = error; 761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 762 XML_ERR_ERROR, NULL, 0, (const char *) info1, 763 (const char *) info2, (const char *) info3, 0, 0, msg, 764 info1, info2, info3); 765 if (ctxt != NULL) 766 ctxt->nsWellFormed = 0; 767 } 768 769 /** 770 * xmlNsWarn 771 * @ctxt: an XML parser context 772 * @error: the error number 773 * @msg: the message 774 * @info1: extra information string 775 * @info2: extra information string 776 * 777 * Handle a namespace warning error 778 */ 779 static void 780 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 781 const char *msg, 782 const xmlChar * info1, const xmlChar * info2, 783 const xmlChar * info3) 784 { 785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 786 (ctxt->instate == XML_PARSER_EOF)) 787 return; 788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 789 XML_ERR_WARNING, NULL, 0, (const char *) info1, 790 (const char *) info2, (const char *) info3, 0, 0, msg, 791 info1, info2, info3); 792 } 793 794 /************************************************************************ 795 * * 796 * Library wide options * 797 * * 798 ************************************************************************/ 799 800 /** 801 * xmlHasFeature: 802 * @feature: the feature to be examined 803 * 804 * Examines if the library has been compiled with a given feature. 805 * 806 * Returns a non-zero value if the feature exist, otherwise zero. 807 * Returns zero (0) if the feature does not exist or an unknown 808 * unknown feature is requested, non-zero otherwise. 809 */ 810 int 811 xmlHasFeature(xmlFeature feature) 812 { 813 switch (feature) { 814 case XML_WITH_THREAD: 815 #ifdef LIBXML_THREAD_ENABLED 816 return(1); 817 #else 818 return(0); 819 #endif 820 case XML_WITH_TREE: 821 #ifdef LIBXML_TREE_ENABLED 822 return(1); 823 #else 824 return(0); 825 #endif 826 case XML_WITH_OUTPUT: 827 #ifdef LIBXML_OUTPUT_ENABLED 828 return(1); 829 #else 830 return(0); 831 #endif 832 case XML_WITH_PUSH: 833 #ifdef LIBXML_PUSH_ENABLED 834 return(1); 835 #else 836 return(0); 837 #endif 838 case XML_WITH_READER: 839 #ifdef LIBXML_READER_ENABLED 840 return(1); 841 #else 842 return(0); 843 #endif 844 case XML_WITH_PATTERN: 845 #ifdef LIBXML_PATTERN_ENABLED 846 return(1); 847 #else 848 return(0); 849 #endif 850 case XML_WITH_WRITER: 851 #ifdef LIBXML_WRITER_ENABLED 852 return(1); 853 #else 854 return(0); 855 #endif 856 case XML_WITH_SAX1: 857 #ifdef LIBXML_SAX1_ENABLED 858 return(1); 859 #else 860 return(0); 861 #endif 862 case XML_WITH_FTP: 863 #ifdef LIBXML_FTP_ENABLED 864 return(1); 865 #else 866 return(0); 867 #endif 868 case XML_WITH_HTTP: 869 #ifdef LIBXML_HTTP_ENABLED 870 return(1); 871 #else 872 return(0); 873 #endif 874 case XML_WITH_VALID: 875 #ifdef LIBXML_VALID_ENABLED 876 return(1); 877 #else 878 return(0); 879 #endif 880 case XML_WITH_HTML: 881 #ifdef LIBXML_HTML_ENABLED 882 return(1); 883 #else 884 return(0); 885 #endif 886 case XML_WITH_LEGACY: 887 #ifdef LIBXML_LEGACY_ENABLED 888 return(1); 889 #else 890 return(0); 891 #endif 892 case XML_WITH_C14N: 893 #ifdef LIBXML_C14N_ENABLED 894 return(1); 895 #else 896 return(0); 897 #endif 898 case XML_WITH_CATALOG: 899 #ifdef LIBXML_CATALOG_ENABLED 900 return(1); 901 #else 902 return(0); 903 #endif 904 case XML_WITH_XPATH: 905 #ifdef LIBXML_XPATH_ENABLED 906 return(1); 907 #else 908 return(0); 909 #endif 910 case XML_WITH_XPTR: 911 #ifdef LIBXML_XPTR_ENABLED 912 return(1); 913 #else 914 return(0); 915 #endif 916 case XML_WITH_XINCLUDE: 917 #ifdef LIBXML_XINCLUDE_ENABLED 918 return(1); 919 #else 920 return(0); 921 #endif 922 case XML_WITH_ICONV: 923 #ifdef LIBXML_ICONV_ENABLED 924 return(1); 925 #else 926 return(0); 927 #endif 928 case XML_WITH_ISO8859X: 929 #ifdef LIBXML_ISO8859X_ENABLED 930 return(1); 931 #else 932 return(0); 933 #endif 934 case XML_WITH_UNICODE: 935 #ifdef LIBXML_UNICODE_ENABLED 936 return(1); 937 #else 938 return(0); 939 #endif 940 case XML_WITH_REGEXP: 941 #ifdef LIBXML_REGEXP_ENABLED 942 return(1); 943 #else 944 return(0); 945 #endif 946 case XML_WITH_AUTOMATA: 947 #ifdef LIBXML_AUTOMATA_ENABLED 948 return(1); 949 #else 950 return(0); 951 #endif 952 case XML_WITH_EXPR: 953 #ifdef LIBXML_EXPR_ENABLED 954 return(1); 955 #else 956 return(0); 957 #endif 958 case XML_WITH_SCHEMAS: 959 #ifdef LIBXML_SCHEMAS_ENABLED 960 return(1); 961 #else 962 return(0); 963 #endif 964 case XML_WITH_SCHEMATRON: 965 #ifdef LIBXML_SCHEMATRON_ENABLED 966 return(1); 967 #else 968 return(0); 969 #endif 970 case XML_WITH_MODULES: 971 #ifdef LIBXML_MODULES_ENABLED 972 return(1); 973 #else 974 return(0); 975 #endif 976 case XML_WITH_DEBUG: 977 #ifdef LIBXML_DEBUG_ENABLED 978 return(1); 979 #else 980 return(0); 981 #endif 982 case XML_WITH_DEBUG_MEM: 983 #ifdef DEBUG_MEMORY_LOCATION 984 return(1); 985 #else 986 return(0); 987 #endif 988 case XML_WITH_DEBUG_RUN: 989 #ifdef LIBXML_DEBUG_RUNTIME 990 return(1); 991 #else 992 return(0); 993 #endif 994 case XML_WITH_ZLIB: 995 #ifdef LIBXML_ZLIB_ENABLED 996 return(1); 997 #else 998 return(0); 999 #endif 1000 case XML_WITH_LZMA: 1001 #ifdef LIBXML_LZMA_ENABLED 1002 return(1); 1003 #else 1004 return(0); 1005 #endif 1006 case XML_WITH_ICU: 1007 #ifdef LIBXML_ICU_ENABLED 1008 return(1); 1009 #else 1010 return(0); 1011 #endif 1012 default: 1013 break; 1014 } 1015 return(0); 1016 } 1017 1018 /************************************************************************ 1019 * * 1020 * SAX2 defaulted attributes handling * 1021 * * 1022 ************************************************************************/ 1023 1024 /** 1025 * xmlDetectSAX2: 1026 * @ctxt: an XML parser context 1027 * 1028 * Do the SAX2 detection and specific intialization 1029 */ 1030 static void 1031 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1032 if (ctxt == NULL) return; 1033 #ifdef LIBXML_SAX1_ENABLED 1034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1035 ((ctxt->sax->startElementNs != NULL) || 1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1037 #else 1038 ctxt->sax2 = 1; 1039 #endif /* LIBXML_SAX1_ENABLED */ 1040 1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1045 (ctxt->str_xml_ns == NULL)) { 1046 xmlErrMemory(ctxt, NULL); 1047 } 1048 } 1049 1050 typedef struct _xmlDefAttrs xmlDefAttrs; 1051 typedef xmlDefAttrs *xmlDefAttrsPtr; 1052 struct _xmlDefAttrs { 1053 int nbAttrs; /* number of defaulted attributes on that element */ 1054 int maxAttrs; /* the size of the array */ 1055 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1056 }; 1057 1058 /** 1059 * xmlAttrNormalizeSpace: 1060 * @src: the source string 1061 * @dst: the target string 1062 * 1063 * Normalize the space in non CDATA attribute values: 1064 * If the attribute type is not CDATA, then the XML processor MUST further 1065 * process the normalized attribute value by discarding any leading and 1066 * trailing space (#x20) characters, and by replacing sequences of space 1067 * (#x20) characters by a single space (#x20) character. 1068 * Note that the size of dst need to be at least src, and if one doesn't need 1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1070 * passing src as dst is just fine. 1071 * 1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1073 * is needed. 1074 */ 1075 static xmlChar * 1076 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1077 { 1078 if ((src == NULL) || (dst == NULL)) 1079 return(NULL); 1080 1081 while (*src == 0x20) src++; 1082 while (*src != 0) { 1083 if (*src == 0x20) { 1084 while (*src == 0x20) src++; 1085 if (*src != 0) 1086 *dst++ = 0x20; 1087 } else { 1088 *dst++ = *src++; 1089 } 1090 } 1091 *dst = 0; 1092 if (dst == src) 1093 return(NULL); 1094 return(dst); 1095 } 1096 1097 /** 1098 * xmlAttrNormalizeSpace2: 1099 * @src: the source string 1100 * 1101 * Normalize the space in non CDATA attribute values, a slightly more complex 1102 * front end to avoid allocation problems when running on attribute values 1103 * coming from the input. 1104 * 1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1106 * is needed. 1107 */ 1108 static const xmlChar * 1109 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1110 { 1111 int i; 1112 int remove_head = 0; 1113 int need_realloc = 0; 1114 const xmlChar *cur; 1115 1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1117 return(NULL); 1118 i = *len; 1119 if (i <= 0) 1120 return(NULL); 1121 1122 cur = src; 1123 while (*cur == 0x20) { 1124 cur++; 1125 remove_head++; 1126 } 1127 while (*cur != 0) { 1128 if (*cur == 0x20) { 1129 cur++; 1130 if ((*cur == 0x20) || (*cur == 0)) { 1131 need_realloc = 1; 1132 break; 1133 } 1134 } else 1135 cur++; 1136 } 1137 if (need_realloc) { 1138 xmlChar *ret; 1139 1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1141 if (ret == NULL) { 1142 xmlErrMemory(ctxt, NULL); 1143 return(NULL); 1144 } 1145 xmlAttrNormalizeSpace(ret, ret); 1146 *len = (int) strlen((const char *)ret); 1147 return(ret); 1148 } else if (remove_head) { 1149 *len -= remove_head; 1150 memmove(src, src + remove_head, 1 + *len); 1151 return(src); 1152 } 1153 return(NULL); 1154 } 1155 1156 /** 1157 * xmlAddDefAttrs: 1158 * @ctxt: an XML parser context 1159 * @fullname: the element fullname 1160 * @fullattr: the attribute fullname 1161 * @value: the attribute value 1162 * 1163 * Add a defaulted attribute for an element 1164 */ 1165 static void 1166 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1167 const xmlChar *fullname, 1168 const xmlChar *fullattr, 1169 const xmlChar *value) { 1170 xmlDefAttrsPtr defaults; 1171 int len; 1172 const xmlChar *name; 1173 const xmlChar *prefix; 1174 1175 /* 1176 * Allows to detect attribute redefinitions 1177 */ 1178 if (ctxt->attsSpecial != NULL) { 1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1180 return; 1181 } 1182 1183 if (ctxt->attsDefault == NULL) { 1184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1185 if (ctxt->attsDefault == NULL) 1186 goto mem_error; 1187 } 1188 1189 /* 1190 * split the element name into prefix:localname , the string found 1191 * are within the DTD and then not associated to namespace names. 1192 */ 1193 name = xmlSplitQName3(fullname, &len); 1194 if (name == NULL) { 1195 name = xmlDictLookup(ctxt->dict, fullname, -1); 1196 prefix = NULL; 1197 } else { 1198 name = xmlDictLookup(ctxt->dict, name, -1); 1199 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1200 } 1201 1202 /* 1203 * make sure there is some storage 1204 */ 1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1206 if (defaults == NULL) { 1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1208 (4 * 5) * sizeof(const xmlChar *)); 1209 if (defaults == NULL) 1210 goto mem_error; 1211 defaults->nbAttrs = 0; 1212 defaults->maxAttrs = 4; 1213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1214 defaults, NULL) < 0) { 1215 xmlFree(defaults); 1216 goto mem_error; 1217 } 1218 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1219 xmlDefAttrsPtr temp; 1220 1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1223 if (temp == NULL) 1224 goto mem_error; 1225 defaults = temp; 1226 defaults->maxAttrs *= 2; 1227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1228 defaults, NULL) < 0) { 1229 xmlFree(defaults); 1230 goto mem_error; 1231 } 1232 } 1233 1234 /* 1235 * Split the element name into prefix:localname , the string found 1236 * are within the DTD and hen not associated to namespace names. 1237 */ 1238 name = xmlSplitQName3(fullattr, &len); 1239 if (name == NULL) { 1240 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1241 prefix = NULL; 1242 } else { 1243 name = xmlDictLookup(ctxt->dict, name, -1); 1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1245 } 1246 1247 defaults->values[5 * defaults->nbAttrs] = name; 1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1249 /* intern the string and precompute the end */ 1250 len = xmlStrlen(value); 1251 value = xmlDictLookup(ctxt->dict, value, len); 1252 defaults->values[5 * defaults->nbAttrs + 2] = value; 1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1254 if (ctxt->external) 1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1256 else 1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1258 defaults->nbAttrs++; 1259 1260 return; 1261 1262 mem_error: 1263 xmlErrMemory(ctxt, NULL); 1264 return; 1265 } 1266 1267 /** 1268 * xmlAddSpecialAttr: 1269 * @ctxt: an XML parser context 1270 * @fullname: the element fullname 1271 * @fullattr: the attribute fullname 1272 * @type: the attribute type 1273 * 1274 * Register this attribute type 1275 */ 1276 static void 1277 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1278 const xmlChar *fullname, 1279 const xmlChar *fullattr, 1280 int type) 1281 { 1282 if (ctxt->attsSpecial == NULL) { 1283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1284 if (ctxt->attsSpecial == NULL) 1285 goto mem_error; 1286 } 1287 1288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1289 return; 1290 1291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1292 (void *) (long) type); 1293 return; 1294 1295 mem_error: 1296 xmlErrMemory(ctxt, NULL); 1297 return; 1298 } 1299 1300 /** 1301 * xmlCleanSpecialAttrCallback: 1302 * 1303 * Removes CDATA attributes from the special attribute table 1304 */ 1305 static void 1306 xmlCleanSpecialAttrCallback(void *payload, void *data, 1307 const xmlChar *fullname, const xmlChar *fullattr, 1308 const xmlChar *unused ATTRIBUTE_UNUSED) { 1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1310 1311 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1313 } 1314 } 1315 1316 /** 1317 * xmlCleanSpecialAttr: 1318 * @ctxt: an XML parser context 1319 * 1320 * Trim the list of attributes defined to remove all those of type 1321 * CDATA as they are not special. This call should be done when finishing 1322 * to parse the DTD and before starting to parse the document root. 1323 */ 1324 static void 1325 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1326 { 1327 if (ctxt->attsSpecial == NULL) 1328 return; 1329 1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1331 1332 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1333 xmlHashFree(ctxt->attsSpecial, NULL); 1334 ctxt->attsSpecial = NULL; 1335 } 1336 return; 1337 } 1338 1339 /** 1340 * xmlCheckLanguageID: 1341 * @lang: pointer to the string value 1342 * 1343 * Checks that the value conforms to the LanguageID production: 1344 * 1345 * NOTE: this is somewhat deprecated, those productions were removed from 1346 * the XML Second edition. 1347 * 1348 * [33] LanguageID ::= Langcode ('-' Subcode)* 1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1353 * [38] Subcode ::= ([a-z] | [A-Z])+ 1354 * 1355 * The current REC reference the sucessors of RFC 1766, currently 5646 1356 * 1357 * http://www.rfc-editor.org/rfc/rfc5646.txt 1358 * langtag = language 1359 * ["-" script] 1360 * ["-" region] 1361 * *("-" variant) 1362 * *("-" extension) 1363 * ["-" privateuse] 1364 * language = 2*3ALPHA ; shortest ISO 639 code 1365 * ["-" extlang] ; sometimes followed by 1366 * ; extended language subtags 1367 * / 4ALPHA ; or reserved for future use 1368 * / 5*8ALPHA ; or registered language subtag 1369 * 1370 * extlang = 3ALPHA ; selected ISO 639 codes 1371 * *2("-" 3ALPHA) ; permanently reserved 1372 * 1373 * script = 4ALPHA ; ISO 15924 code 1374 * 1375 * region = 2ALPHA ; ISO 3166-1 code 1376 * / 3DIGIT ; UN M.49 code 1377 * 1378 * variant = 5*8alphanum ; registered variants 1379 * / (DIGIT 3alphanum) 1380 * 1381 * extension = singleton 1*("-" (2*8alphanum)) 1382 * 1383 * ; Single alphanumerics 1384 * ; "x" reserved for private use 1385 * singleton = DIGIT ; 0 - 9 1386 * / %x41-57 ; A - W 1387 * / %x59-5A ; Y - Z 1388 * / %x61-77 ; a - w 1389 * / %x79-7A ; y - z 1390 * 1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1392 * The parser below doesn't try to cope with extension or privateuse 1393 * that could be added but that's not interoperable anyway 1394 * 1395 * Returns 1 if correct 0 otherwise 1396 **/ 1397 int 1398 xmlCheckLanguageID(const xmlChar * lang) 1399 { 1400 const xmlChar *cur = lang, *nxt; 1401 1402 if (cur == NULL) 1403 return (0); 1404 if (((cur[0] == 'i') && (cur[1] == '-')) || 1405 ((cur[0] == 'I') && (cur[1] == '-')) || 1406 ((cur[0] == 'x') && (cur[1] == '-')) || 1407 ((cur[0] == 'X') && (cur[1] == '-'))) { 1408 /* 1409 * Still allow IANA code and user code which were coming 1410 * from the previous version of the XML-1.0 specification 1411 * it's deprecated but we should not fail 1412 */ 1413 cur += 2; 1414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1415 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1416 cur++; 1417 return(cur[0] == 0); 1418 } 1419 nxt = cur; 1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1422 nxt++; 1423 if (nxt - cur >= 4) { 1424 /* 1425 * Reserved 1426 */ 1427 if ((nxt - cur > 8) || (nxt[0] != 0)) 1428 return(0); 1429 return(1); 1430 } 1431 if (nxt - cur < 2) 1432 return(0); 1433 /* we got an ISO 639 code */ 1434 if (nxt[0] == 0) 1435 return(1); 1436 if (nxt[0] != '-') 1437 return(0); 1438 1439 nxt++; 1440 cur = nxt; 1441 /* now we can have extlang or script or region or variant */ 1442 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1443 goto region_m49; 1444 1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1447 nxt++; 1448 if (nxt - cur == 4) 1449 goto script; 1450 if (nxt - cur == 2) 1451 goto region; 1452 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1453 goto variant; 1454 if (nxt - cur != 3) 1455 return(0); 1456 /* we parsed an extlang */ 1457 if (nxt[0] == 0) 1458 return(1); 1459 if (nxt[0] != '-') 1460 return(0); 1461 1462 nxt++; 1463 cur = nxt; 1464 /* now we can have script or region or variant */ 1465 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1466 goto region_m49; 1467 1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1470 nxt++; 1471 if (nxt - cur == 2) 1472 goto region; 1473 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1474 goto variant; 1475 if (nxt - cur != 4) 1476 return(0); 1477 /* we parsed a script */ 1478 script: 1479 if (nxt[0] == 0) 1480 return(1); 1481 if (nxt[0] != '-') 1482 return(0); 1483 1484 nxt++; 1485 cur = nxt; 1486 /* now we can have region or variant */ 1487 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1488 goto region_m49; 1489 1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1492 nxt++; 1493 1494 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1495 goto variant; 1496 if (nxt - cur != 2) 1497 return(0); 1498 /* we parsed a region */ 1499 region: 1500 if (nxt[0] == 0) 1501 return(1); 1502 if (nxt[0] != '-') 1503 return(0); 1504 1505 nxt++; 1506 cur = nxt; 1507 /* now we can just have a variant */ 1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1510 nxt++; 1511 1512 if ((nxt - cur < 5) || (nxt - cur > 8)) 1513 return(0); 1514 1515 /* we parsed a variant */ 1516 variant: 1517 if (nxt[0] == 0) 1518 return(1); 1519 if (nxt[0] != '-') 1520 return(0); 1521 /* extensions and private use subtags not checked */ 1522 return (1); 1523 1524 region_m49: 1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1527 nxt += 3; 1528 goto region; 1529 } 1530 return(0); 1531 } 1532 1533 /************************************************************************ 1534 * * 1535 * Parser stacks related functions and macros * 1536 * * 1537 ************************************************************************/ 1538 1539 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1540 const xmlChar ** str); 1541 1542 #ifdef SAX2 1543 /** 1544 * nsPush: 1545 * @ctxt: an XML parser context 1546 * @prefix: the namespace prefix or NULL 1547 * @URL: the namespace name 1548 * 1549 * Pushes a new parser namespace on top of the ns stack 1550 * 1551 * Returns -1 in case of error, -2 if the namespace should be discarded 1552 * and the index in the stack otherwise. 1553 */ 1554 static int 1555 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1556 { 1557 if (ctxt->options & XML_PARSE_NSCLEAN) { 1558 int i; 1559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1560 if (ctxt->nsTab[i] == prefix) { 1561 /* in scope */ 1562 if (ctxt->nsTab[i + 1] == URL) 1563 return(-2); 1564 /* out of scope keep it */ 1565 break; 1566 } 1567 } 1568 } 1569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1570 ctxt->nsMax = 10; 1571 ctxt->nsNr = 0; 1572 ctxt->nsTab = (const xmlChar **) 1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1574 if (ctxt->nsTab == NULL) { 1575 xmlErrMemory(ctxt, NULL); 1576 ctxt->nsMax = 0; 1577 return (-1); 1578 } 1579 } else if (ctxt->nsNr >= ctxt->nsMax) { 1580 const xmlChar ** tmp; 1581 ctxt->nsMax *= 2; 1582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1583 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1584 if (tmp == NULL) { 1585 xmlErrMemory(ctxt, NULL); 1586 ctxt->nsMax /= 2; 1587 return (-1); 1588 } 1589 ctxt->nsTab = tmp; 1590 } 1591 ctxt->nsTab[ctxt->nsNr++] = prefix; 1592 ctxt->nsTab[ctxt->nsNr++] = URL; 1593 return (ctxt->nsNr); 1594 } 1595 /** 1596 * nsPop: 1597 * @ctxt: an XML parser context 1598 * @nr: the number to pop 1599 * 1600 * Pops the top @nr parser prefix/namespace from the ns stack 1601 * 1602 * Returns the number of namespaces removed 1603 */ 1604 static int 1605 nsPop(xmlParserCtxtPtr ctxt, int nr) 1606 { 1607 int i; 1608 1609 if (ctxt->nsTab == NULL) return(0); 1610 if (ctxt->nsNr < nr) { 1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1612 nr = ctxt->nsNr; 1613 } 1614 if (ctxt->nsNr <= 0) 1615 return (0); 1616 1617 for (i = 0;i < nr;i++) { 1618 ctxt->nsNr--; 1619 ctxt->nsTab[ctxt->nsNr] = NULL; 1620 } 1621 return(nr); 1622 } 1623 #endif 1624 1625 static int 1626 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1627 const xmlChar **atts; 1628 int *attallocs; 1629 int maxatts; 1630 1631 if (ctxt->atts == NULL) { 1632 maxatts = 55; /* allow for 10 attrs by default */ 1633 atts = (const xmlChar **) 1634 xmlMalloc(maxatts * sizeof(xmlChar *)); 1635 if (atts == NULL) goto mem_error; 1636 ctxt->atts = atts; 1637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1638 if (attallocs == NULL) goto mem_error; 1639 ctxt->attallocs = attallocs; 1640 ctxt->maxatts = maxatts; 1641 } else if (nr + 5 > ctxt->maxatts) { 1642 maxatts = (nr + 5) * 2; 1643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1644 maxatts * sizeof(const xmlChar *)); 1645 if (atts == NULL) goto mem_error; 1646 ctxt->atts = atts; 1647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1648 (maxatts / 5) * sizeof(int)); 1649 if (attallocs == NULL) goto mem_error; 1650 ctxt->attallocs = attallocs; 1651 ctxt->maxatts = maxatts; 1652 } 1653 return(ctxt->maxatts); 1654 mem_error: 1655 xmlErrMemory(ctxt, NULL); 1656 return(-1); 1657 } 1658 1659 /** 1660 * inputPush: 1661 * @ctxt: an XML parser context 1662 * @value: the parser input 1663 * 1664 * Pushes a new parser input on top of the input stack 1665 * 1666 * Returns -1 in case of error, the index in the stack otherwise 1667 */ 1668 int 1669 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1670 { 1671 if ((ctxt == NULL) || (value == NULL)) 1672 return(-1); 1673 if (ctxt->inputNr >= ctxt->inputMax) { 1674 ctxt->inputMax *= 2; 1675 ctxt->inputTab = 1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1677 ctxt->inputMax * 1678 sizeof(ctxt->inputTab[0])); 1679 if (ctxt->inputTab == NULL) { 1680 xmlErrMemory(ctxt, NULL); 1681 xmlFreeInputStream(value); 1682 ctxt->inputMax /= 2; 1683 value = NULL; 1684 return (-1); 1685 } 1686 } 1687 ctxt->inputTab[ctxt->inputNr] = value; 1688 ctxt->input = value; 1689 return (ctxt->inputNr++); 1690 } 1691 /** 1692 * inputPop: 1693 * @ctxt: an XML parser context 1694 * 1695 * Pops the top parser input from the input stack 1696 * 1697 * Returns the input just removed 1698 */ 1699 xmlParserInputPtr 1700 inputPop(xmlParserCtxtPtr ctxt) 1701 { 1702 xmlParserInputPtr ret; 1703 1704 if (ctxt == NULL) 1705 return(NULL); 1706 if (ctxt->inputNr <= 0) 1707 return (NULL); 1708 ctxt->inputNr--; 1709 if (ctxt->inputNr > 0) 1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1711 else 1712 ctxt->input = NULL; 1713 ret = ctxt->inputTab[ctxt->inputNr]; 1714 ctxt->inputTab[ctxt->inputNr] = NULL; 1715 return (ret); 1716 } 1717 /** 1718 * nodePush: 1719 * @ctxt: an XML parser context 1720 * @value: the element node 1721 * 1722 * Pushes a new element node on top of the node stack 1723 * 1724 * Returns -1 in case of error, the index in the stack otherwise 1725 */ 1726 int 1727 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1728 { 1729 if (ctxt == NULL) return(0); 1730 if (ctxt->nodeNr >= ctxt->nodeMax) { 1731 xmlNodePtr *tmp; 1732 1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1734 ctxt->nodeMax * 2 * 1735 sizeof(ctxt->nodeTab[0])); 1736 if (tmp == NULL) { 1737 xmlErrMemory(ctxt, NULL); 1738 return (-1); 1739 } 1740 ctxt->nodeTab = tmp; 1741 ctxt->nodeMax *= 2; 1742 } 1743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1747 xmlParserMaxDepth); 1748 ctxt->instate = XML_PARSER_EOF; 1749 return(-1); 1750 } 1751 ctxt->nodeTab[ctxt->nodeNr] = value; 1752 ctxt->node = value; 1753 return (ctxt->nodeNr++); 1754 } 1755 1756 /** 1757 * nodePop: 1758 * @ctxt: an XML parser context 1759 * 1760 * Pops the top element node from the node stack 1761 * 1762 * Returns the node just removed 1763 */ 1764 xmlNodePtr 1765 nodePop(xmlParserCtxtPtr ctxt) 1766 { 1767 xmlNodePtr ret; 1768 1769 if (ctxt == NULL) return(NULL); 1770 if (ctxt->nodeNr <= 0) 1771 return (NULL); 1772 ctxt->nodeNr--; 1773 if (ctxt->nodeNr > 0) 1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1775 else 1776 ctxt->node = NULL; 1777 ret = ctxt->nodeTab[ctxt->nodeNr]; 1778 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1779 return (ret); 1780 } 1781 1782 #ifdef LIBXML_PUSH_ENABLED 1783 /** 1784 * nameNsPush: 1785 * @ctxt: an XML parser context 1786 * @value: the element name 1787 * @prefix: the element prefix 1788 * @URI: the element namespace name 1789 * 1790 * Pushes a new element name/prefix/URL on top of the name stack 1791 * 1792 * Returns -1 in case of error, the index in the stack otherwise 1793 */ 1794 static int 1795 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1796 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1797 { 1798 if (ctxt->nameNr >= ctxt->nameMax) { 1799 const xmlChar * *tmp; 1800 void **tmp2; 1801 ctxt->nameMax *= 2; 1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1803 ctxt->nameMax * 1804 sizeof(ctxt->nameTab[0])); 1805 if (tmp == NULL) { 1806 ctxt->nameMax /= 2; 1807 goto mem_error; 1808 } 1809 ctxt->nameTab = tmp; 1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1811 ctxt->nameMax * 3 * 1812 sizeof(ctxt->pushTab[0])); 1813 if (tmp2 == NULL) { 1814 ctxt->nameMax /= 2; 1815 goto mem_error; 1816 } 1817 ctxt->pushTab = tmp2; 1818 } 1819 ctxt->nameTab[ctxt->nameNr] = value; 1820 ctxt->name = value; 1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1824 return (ctxt->nameNr++); 1825 mem_error: 1826 xmlErrMemory(ctxt, NULL); 1827 return (-1); 1828 } 1829 /** 1830 * nameNsPop: 1831 * @ctxt: an XML parser context 1832 * 1833 * Pops the top element/prefix/URI name from the name stack 1834 * 1835 * Returns the name just removed 1836 */ 1837 static const xmlChar * 1838 nameNsPop(xmlParserCtxtPtr ctxt) 1839 { 1840 const xmlChar *ret; 1841 1842 if (ctxt->nameNr <= 0) 1843 return (NULL); 1844 ctxt->nameNr--; 1845 if (ctxt->nameNr > 0) 1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1847 else 1848 ctxt->name = NULL; 1849 ret = ctxt->nameTab[ctxt->nameNr]; 1850 ctxt->nameTab[ctxt->nameNr] = NULL; 1851 return (ret); 1852 } 1853 #endif /* LIBXML_PUSH_ENABLED */ 1854 1855 /** 1856 * namePush: 1857 * @ctxt: an XML parser context 1858 * @value: the element name 1859 * 1860 * Pushes a new element name on top of the name stack 1861 * 1862 * Returns -1 in case of error, the index in the stack otherwise 1863 */ 1864 int 1865 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1866 { 1867 if (ctxt == NULL) return (-1); 1868 1869 if (ctxt->nameNr >= ctxt->nameMax) { 1870 const xmlChar * *tmp; 1871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1872 ctxt->nameMax * 2 * 1873 sizeof(ctxt->nameTab[0])); 1874 if (tmp == NULL) { 1875 goto mem_error; 1876 } 1877 ctxt->nameTab = tmp; 1878 ctxt->nameMax *= 2; 1879 } 1880 ctxt->nameTab[ctxt->nameNr] = value; 1881 ctxt->name = value; 1882 return (ctxt->nameNr++); 1883 mem_error: 1884 xmlErrMemory(ctxt, NULL); 1885 return (-1); 1886 } 1887 /** 1888 * namePop: 1889 * @ctxt: an XML parser context 1890 * 1891 * Pops the top element name from the name stack 1892 * 1893 * Returns the name just removed 1894 */ 1895 const xmlChar * 1896 namePop(xmlParserCtxtPtr ctxt) 1897 { 1898 const xmlChar *ret; 1899 1900 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1901 return (NULL); 1902 ctxt->nameNr--; 1903 if (ctxt->nameNr > 0) 1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1905 else 1906 ctxt->name = NULL; 1907 ret = ctxt->nameTab[ctxt->nameNr]; 1908 ctxt->nameTab[ctxt->nameNr] = NULL; 1909 return (ret); 1910 } 1911 1912 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1913 if (ctxt->spaceNr >= ctxt->spaceMax) { 1914 int *tmp; 1915 1916 ctxt->spaceMax *= 2; 1917 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1919 if (tmp == NULL) { 1920 xmlErrMemory(ctxt, NULL); 1921 ctxt->spaceMax /=2; 1922 return(-1); 1923 } 1924 ctxt->spaceTab = tmp; 1925 } 1926 ctxt->spaceTab[ctxt->spaceNr] = val; 1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1928 return(ctxt->spaceNr++); 1929 } 1930 1931 static int spacePop(xmlParserCtxtPtr ctxt) { 1932 int ret; 1933 if (ctxt->spaceNr <= 0) return(0); 1934 ctxt->spaceNr--; 1935 if (ctxt->spaceNr > 0) 1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1937 else 1938 ctxt->space = &ctxt->spaceTab[0]; 1939 ret = ctxt->spaceTab[ctxt->spaceNr]; 1940 ctxt->spaceTab[ctxt->spaceNr] = -1; 1941 return(ret); 1942 } 1943 1944 /* 1945 * Macros for accessing the content. Those should be used only by the parser, 1946 * and not exported. 1947 * 1948 * Dirty macros, i.e. one often need to make assumption on the context to 1949 * use them 1950 * 1951 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1952 * To be used with extreme caution since operations consuming 1953 * characters may move the input buffer to a different location ! 1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1955 * This should be used internally by the parser 1956 * only to compare to ASCII values otherwise it would break when 1957 * running with UTF-8 encoding. 1958 * RAW same as CUR but in the input buffer, bypass any token 1959 * extraction that may have been done 1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1961 * to compare on ASCII based substring. 1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1963 * strings without newlines within the parser. 1964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1965 * defined char within the parser. 1966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1967 * 1968 * NEXT Skip to the next character, this does the proper decoding 1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1970 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1971 * CUR_CHAR(l) returns the current unicode character (int), set l 1972 * to the number of xmlChars used for the encoding [0-5]. 1973 * CUR_SCHAR same but operate on a string instead of the context 1974 * COPY_BUF copy the current unicode char to the target buffer, increment 1975 * the index 1976 * GROW, SHRINK handling of input buffers 1977 */ 1978 1979 #define RAW (*ctxt->input->cur) 1980 #define CUR (*ctxt->input->cur) 1981 #define NXT(val) ctxt->input->cur[(val)] 1982 #define CUR_PTR ctxt->input->cur 1983 1984 #define CMP4( s, c1, c2, c3, c4 ) \ 1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1987 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1989 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1991 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1993 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1995 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1997 ((unsigned char *) s)[ 8 ] == c9 ) 1998 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2000 ((unsigned char *) s)[ 9 ] == c10 ) 2001 2002 #define SKIP(val) do { \ 2003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2005 if ((*ctxt->input->cur == 0) && \ 2006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2007 xmlPopInput(ctxt); \ 2008 } while (0) 2009 2010 #define SKIPL(val) do { \ 2011 int skipl; \ 2012 for(skipl=0; skipl<val; skipl++) { \ 2013 if (*(ctxt->input->cur) == '\n') { \ 2014 ctxt->input->line++; ctxt->input->col = 1; \ 2015 } else ctxt->input->col++; \ 2016 ctxt->nbChars++; \ 2017 ctxt->input->cur++; \ 2018 } \ 2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2020 if ((*ctxt->input->cur == 0) && \ 2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2022 xmlPopInput(ctxt); \ 2023 } while (0) 2024 2025 #define SHRINK if ((ctxt->progressive == 0) && \ 2026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2028 xmlSHRINK (ctxt); 2029 2030 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2031 xmlParserInputShrink(ctxt->input); 2032 if ((*ctxt->input->cur == 0) && 2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2034 xmlPopInput(ctxt); 2035 } 2036 2037 #define GROW if ((ctxt->progressive == 0) && \ 2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2039 xmlGROW (ctxt); 2040 2041 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 2044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2045 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2047 ctxt->instate = XML_PARSER_EOF; 2048 } 2049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2052 xmlPopInput(ctxt); 2053 } 2054 2055 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2056 2057 #define NEXT xmlNextChar(ctxt) 2058 2059 #define NEXT1 { \ 2060 ctxt->input->col++; \ 2061 ctxt->input->cur++; \ 2062 ctxt->nbChars++; \ 2063 if (*ctxt->input->cur == 0) \ 2064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2065 } 2066 2067 #define NEXTL(l) do { \ 2068 if (*(ctxt->input->cur) == '\n') { \ 2069 ctxt->input->line++; ctxt->input->col = 1; \ 2070 } else ctxt->input->col++; \ 2071 ctxt->input->cur += l; \ 2072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2073 } while (0) 2074 2075 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2076 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2077 2078 #define COPY_BUF(l,b,i,v) \ 2079 if (l == 1) b[i++] = (xmlChar) v; \ 2080 else i += xmlCopyCharMultiByte(&b[i],v) 2081 2082 /** 2083 * xmlSkipBlankChars: 2084 * @ctxt: the XML parser context 2085 * 2086 * skip all blanks character found at that point in the input streams. 2087 * It pops up finished entities in the process if allowable at that point. 2088 * 2089 * Returns the number of space chars skipped 2090 */ 2091 2092 int 2093 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2094 int res = 0; 2095 2096 /* 2097 * It's Okay to use CUR/NEXT here since all the blanks are on 2098 * the ASCII range. 2099 */ 2100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2101 const xmlChar *cur; 2102 /* 2103 * if we are in the document content, go really fast 2104 */ 2105 cur = ctxt->input->cur; 2106 while (IS_BLANK_CH(*cur)) { 2107 if (*cur == '\n') { 2108 ctxt->input->line++; ctxt->input->col = 1; 2109 } 2110 cur++; 2111 res++; 2112 if (*cur == 0) { 2113 ctxt->input->cur = cur; 2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2115 cur = ctxt->input->cur; 2116 } 2117 } 2118 ctxt->input->cur = cur; 2119 } else { 2120 int cur; 2121 do { 2122 cur = CUR; 2123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2124 NEXT; 2125 cur = CUR; 2126 res++; 2127 } 2128 while ((cur == 0) && (ctxt->inputNr > 1) && 2129 (ctxt->instate != XML_PARSER_COMMENT)) { 2130 xmlPopInput(ctxt); 2131 cur = CUR; 2132 } 2133 /* 2134 * Need to handle support of entities branching here 2135 */ 2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2138 } 2139 return(res); 2140 } 2141 2142 /************************************************************************ 2143 * * 2144 * Commodity functions to handle entities * 2145 * * 2146 ************************************************************************/ 2147 2148 /** 2149 * xmlPopInput: 2150 * @ctxt: an XML parser context 2151 * 2152 * xmlPopInput: the current input pointed by ctxt->input came to an end 2153 * pop it and return the next char. 2154 * 2155 * Returns the current xmlChar in the parser context 2156 */ 2157 xmlChar 2158 xmlPopInput(xmlParserCtxtPtr ctxt) { 2159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2160 if (xmlParserDebugEntities) 2161 xmlGenericError(xmlGenericErrorContext, 2162 "Popping input %d\n", ctxt->inputNr); 2163 xmlFreeInputStream(inputPop(ctxt)); 2164 if ((*ctxt->input->cur == 0) && 2165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2166 return(xmlPopInput(ctxt)); 2167 return(CUR); 2168 } 2169 2170 /** 2171 * xmlPushInput: 2172 * @ctxt: an XML parser context 2173 * @input: an XML parser input fragment (entity, XML fragment ...). 2174 * 2175 * xmlPushInput: switch to a new input stream which is stacked on top 2176 * of the previous one(s). 2177 * Returns -1 in case of error or the index in the input stack 2178 */ 2179 int 2180 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2181 int ret; 2182 if (input == NULL) return(-1); 2183 2184 if (xmlParserDebugEntities) { 2185 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2186 xmlGenericError(xmlGenericErrorContext, 2187 "%s(%d): ", ctxt->input->filename, 2188 ctxt->input->line); 2189 xmlGenericError(xmlGenericErrorContext, 2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2191 } 2192 ret = inputPush(ctxt, input); 2193 if (ctxt->instate == XML_PARSER_EOF) 2194 return(-1); 2195 GROW; 2196 return(ret); 2197 } 2198 2199 /** 2200 * xmlParseCharRef: 2201 * @ctxt: an XML parser context 2202 * 2203 * parse Reference declarations 2204 * 2205 * [66] CharRef ::= '&#' [0-9]+ ';' | 2206 * '&#x' [0-9a-fA-F]+ ';' 2207 * 2208 * [ WFC: Legal Character ] 2209 * Characters referred to using character references must match the 2210 * production for Char. 2211 * 2212 * Returns the value parsed (as an int), 0 in case of error 2213 */ 2214 int 2215 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2216 unsigned int val = 0; 2217 int count = 0; 2218 unsigned int outofrange = 0; 2219 2220 /* 2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2222 */ 2223 if ((RAW == '&') && (NXT(1) == '#') && 2224 (NXT(2) == 'x')) { 2225 SKIP(3); 2226 GROW; 2227 while (RAW != ';') { /* loop blocked by count */ 2228 if (count++ > 20) { 2229 count = 0; 2230 GROW; 2231 if (ctxt->instate == XML_PARSER_EOF) 2232 return(0); 2233 } 2234 if ((RAW >= '0') && (RAW <= '9')) 2235 val = val * 16 + (CUR - '0'); 2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2237 val = val * 16 + (CUR - 'a') + 10; 2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2239 val = val * 16 + (CUR - 'A') + 10; 2240 else { 2241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2242 val = 0; 2243 break; 2244 } 2245 if (val > 0x10FFFF) 2246 outofrange = val; 2247 2248 NEXT; 2249 count++; 2250 } 2251 if (RAW == ';') { 2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2253 ctxt->input->col++; 2254 ctxt->nbChars ++; 2255 ctxt->input->cur++; 2256 } 2257 } else if ((RAW == '&') && (NXT(1) == '#')) { 2258 SKIP(2); 2259 GROW; 2260 while (RAW != ';') { /* loop blocked by count */ 2261 if (count++ > 20) { 2262 count = 0; 2263 GROW; 2264 if (ctxt->instate == XML_PARSER_EOF) 2265 return(0); 2266 } 2267 if ((RAW >= '0') && (RAW <= '9')) 2268 val = val * 10 + (CUR - '0'); 2269 else { 2270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2271 val = 0; 2272 break; 2273 } 2274 if (val > 0x10FFFF) 2275 outofrange = val; 2276 2277 NEXT; 2278 count++; 2279 } 2280 if (RAW == ';') { 2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2282 ctxt->input->col++; 2283 ctxt->nbChars ++; 2284 ctxt->input->cur++; 2285 } 2286 } else { 2287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2288 } 2289 2290 /* 2291 * [ WFC: Legal Character ] 2292 * Characters referred to using character references must match the 2293 * production for Char. 2294 */ 2295 if ((IS_CHAR(val) && (outofrange == 0))) { 2296 return(val); 2297 } else { 2298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2299 "xmlParseCharRef: invalid xmlChar value %d\n", 2300 val); 2301 } 2302 return(0); 2303 } 2304 2305 /** 2306 * xmlParseStringCharRef: 2307 * @ctxt: an XML parser context 2308 * @str: a pointer to an index in the string 2309 * 2310 * parse Reference declarations, variant parsing from a string rather 2311 * than an an input flow. 2312 * 2313 * [66] CharRef ::= '&#' [0-9]+ ';' | 2314 * '&#x' [0-9a-fA-F]+ ';' 2315 * 2316 * [ WFC: Legal Character ] 2317 * Characters referred to using character references must match the 2318 * production for Char. 2319 * 2320 * Returns the value parsed (as an int), 0 in case of error, str will be 2321 * updated to the current value of the index 2322 */ 2323 static int 2324 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2325 const xmlChar *ptr; 2326 xmlChar cur; 2327 unsigned int val = 0; 2328 unsigned int outofrange = 0; 2329 2330 if ((str == NULL) || (*str == NULL)) return(0); 2331 ptr = *str; 2332 cur = *ptr; 2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2334 ptr += 3; 2335 cur = *ptr; 2336 while (cur != ';') { /* Non input consuming loop */ 2337 if ((cur >= '0') && (cur <= '9')) 2338 val = val * 16 + (cur - '0'); 2339 else if ((cur >= 'a') && (cur <= 'f')) 2340 val = val * 16 + (cur - 'a') + 10; 2341 else if ((cur >= 'A') && (cur <= 'F')) 2342 val = val * 16 + (cur - 'A') + 10; 2343 else { 2344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2345 val = 0; 2346 break; 2347 } 2348 if (val > 0x10FFFF) 2349 outofrange = val; 2350 2351 ptr++; 2352 cur = *ptr; 2353 } 2354 if (cur == ';') 2355 ptr++; 2356 } else if ((cur == '&') && (ptr[1] == '#')){ 2357 ptr += 2; 2358 cur = *ptr; 2359 while (cur != ';') { /* Non input consuming loops */ 2360 if ((cur >= '0') && (cur <= '9')) 2361 val = val * 10 + (cur - '0'); 2362 else { 2363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2364 val = 0; 2365 break; 2366 } 2367 if (val > 0x10FFFF) 2368 outofrange = val; 2369 2370 ptr++; 2371 cur = *ptr; 2372 } 2373 if (cur == ';') 2374 ptr++; 2375 } else { 2376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2377 return(0); 2378 } 2379 *str = ptr; 2380 2381 /* 2382 * [ WFC: Legal Character ] 2383 * Characters referred to using character references must match the 2384 * production for Char. 2385 */ 2386 if ((IS_CHAR(val) && (outofrange == 0))) { 2387 return(val); 2388 } else { 2389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2390 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2391 val); 2392 } 2393 return(0); 2394 } 2395 2396 /** 2397 * xmlNewBlanksWrapperInputStream: 2398 * @ctxt: an XML parser context 2399 * @entity: an Entity pointer 2400 * 2401 * Create a new input stream for wrapping 2402 * blanks around a PEReference 2403 * 2404 * Returns the new input stream or NULL 2405 */ 2406 2407 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2408 2409 static xmlParserInputPtr 2410 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2411 xmlParserInputPtr input; 2412 xmlChar *buffer; 2413 size_t length; 2414 if (entity == NULL) { 2415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2416 "xmlNewBlanksWrapperInputStream entity\n"); 2417 return(NULL); 2418 } 2419 if (xmlParserDebugEntities) 2420 xmlGenericError(xmlGenericErrorContext, 2421 "new blanks wrapper for entity: %s\n", entity->name); 2422 input = xmlNewInputStream(ctxt); 2423 if (input == NULL) { 2424 return(NULL); 2425 } 2426 length = xmlStrlen(entity->name) + 5; 2427 buffer = xmlMallocAtomic(length); 2428 if (buffer == NULL) { 2429 xmlErrMemory(ctxt, NULL); 2430 xmlFree(input); 2431 return(NULL); 2432 } 2433 buffer [0] = ' '; 2434 buffer [1] = '%'; 2435 buffer [length-3] = ';'; 2436 buffer [length-2] = ' '; 2437 buffer [length-1] = 0; 2438 memcpy(buffer + 2, entity->name, length - 5); 2439 input->free = deallocblankswrapper; 2440 input->base = buffer; 2441 input->cur = buffer; 2442 input->length = length; 2443 input->end = &buffer[length]; 2444 return(input); 2445 } 2446 2447 /** 2448 * xmlParserHandlePEReference: 2449 * @ctxt: the parser context 2450 * 2451 * [69] PEReference ::= '%' Name ';' 2452 * 2453 * [ WFC: No Recursion ] 2454 * A parsed entity must not contain a recursive 2455 * reference to itself, either directly or indirectly. 2456 * 2457 * [ WFC: Entity Declared ] 2458 * In a document without any DTD, a document with only an internal DTD 2459 * subset which contains no parameter entity references, or a document 2460 * with "standalone='yes'", ... ... The declaration of a parameter 2461 * entity must precede any reference to it... 2462 * 2463 * [ VC: Entity Declared ] 2464 * In a document with an external subset or external parameter entities 2465 * with "standalone='no'", ... ... The declaration of a parameter entity 2466 * must precede any reference to it... 2467 * 2468 * [ WFC: In DTD ] 2469 * Parameter-entity references may only appear in the DTD. 2470 * NOTE: misleading but this is handled. 2471 * 2472 * A PEReference may have been detected in the current input stream 2473 * the handling is done accordingly to 2474 * http://www.w3.org/TR/REC-xml#entproc 2475 * i.e. 2476 * - Included in literal in entity values 2477 * - Included as Parameter Entity reference within DTDs 2478 */ 2479 void 2480 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2481 const xmlChar *name; 2482 xmlEntityPtr entity = NULL; 2483 xmlParserInputPtr input; 2484 2485 if (RAW != '%') return; 2486 switch(ctxt->instate) { 2487 case XML_PARSER_CDATA_SECTION: 2488 return; 2489 case XML_PARSER_COMMENT: 2490 return; 2491 case XML_PARSER_START_TAG: 2492 return; 2493 case XML_PARSER_END_TAG: 2494 return; 2495 case XML_PARSER_EOF: 2496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2497 return; 2498 case XML_PARSER_PROLOG: 2499 case XML_PARSER_START: 2500 case XML_PARSER_MISC: 2501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2502 return; 2503 case XML_PARSER_ENTITY_DECL: 2504 case XML_PARSER_CONTENT: 2505 case XML_PARSER_ATTRIBUTE_VALUE: 2506 case XML_PARSER_PI: 2507 case XML_PARSER_SYSTEM_LITERAL: 2508 case XML_PARSER_PUBLIC_LITERAL: 2509 /* we just ignore it there */ 2510 return; 2511 case XML_PARSER_EPILOG: 2512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2513 return; 2514 case XML_PARSER_ENTITY_VALUE: 2515 /* 2516 * NOTE: in the case of entity values, we don't do the 2517 * substitution here since we need the literal 2518 * entity value to be able to save the internal 2519 * subset of the document. 2520 * This will be handled by xmlStringDecodeEntities 2521 */ 2522 return; 2523 case XML_PARSER_DTD: 2524 /* 2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2526 * In the internal DTD subset, parameter-entity references 2527 * can occur only where markup declarations can occur, not 2528 * within markup declarations. 2529 * In that case this is handled in xmlParseMarkupDecl 2530 */ 2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2532 return; 2533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2534 return; 2535 break; 2536 case XML_PARSER_IGNORE: 2537 return; 2538 } 2539 2540 NEXT; 2541 name = xmlParseName(ctxt); 2542 if (xmlParserDebugEntities) 2543 xmlGenericError(xmlGenericErrorContext, 2544 "PEReference: %s\n", name); 2545 if (name == NULL) { 2546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2547 } else { 2548 if (RAW == ';') { 2549 NEXT; 2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2552 if (ctxt->instate == XML_PARSER_EOF) 2553 return; 2554 if (entity == NULL) { 2555 2556 /* 2557 * [ WFC: Entity Declared ] 2558 * In a document without any DTD, a document with only an 2559 * internal DTD subset which contains no parameter entity 2560 * references, or a document with "standalone='yes'", ... 2561 * ... The declaration of a parameter entity must precede 2562 * any reference to it... 2563 */ 2564 if ((ctxt->standalone == 1) || 2565 ((ctxt->hasExternalSubset == 0) && 2566 (ctxt->hasPErefs == 0))) { 2567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2568 "PEReference: %%%s; not found\n", name); 2569 } else { 2570 /* 2571 * [ VC: Entity Declared ] 2572 * In a document with an external subset or external 2573 * parameter entities with "standalone='no'", ... 2574 * ... The declaration of a parameter entity must precede 2575 * any reference to it... 2576 */ 2577 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2578 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2579 "PEReference: %%%s; not found\n", 2580 name, NULL); 2581 } else 2582 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2583 "PEReference: %%%s; not found\n", 2584 name, NULL); 2585 ctxt->valid = 0; 2586 } 2587 } else if (ctxt->input->free != deallocblankswrapper) { 2588 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2589 if (xmlPushInput(ctxt, input) < 0) 2590 return; 2591 } else { 2592 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2593 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2594 xmlChar start[4]; 2595 xmlCharEncoding enc; 2596 2597 /* 2598 * handle the extra spaces added before and after 2599 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2600 * this is done independently. 2601 */ 2602 input = xmlNewEntityInputStream(ctxt, entity); 2603 if (xmlPushInput(ctxt, input) < 0) 2604 return; 2605 2606 /* 2607 * Get the 4 first bytes and decode the charset 2608 * if enc != XML_CHAR_ENCODING_NONE 2609 * plug some encoding conversion routines. 2610 * Note that, since we may have some non-UTF8 2611 * encoding (like UTF16, bug 135229), the 'length' 2612 * is not known, but we can calculate based upon 2613 * the amount of data in the buffer. 2614 */ 2615 GROW 2616 if (ctxt->instate == XML_PARSER_EOF) 2617 return; 2618 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2619 start[0] = RAW; 2620 start[1] = NXT(1); 2621 start[2] = NXT(2); 2622 start[3] = NXT(3); 2623 enc = xmlDetectCharEncoding(start, 4); 2624 if (enc != XML_CHAR_ENCODING_NONE) { 2625 xmlSwitchEncoding(ctxt, enc); 2626 } 2627 } 2628 2629 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2630 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2631 (IS_BLANK_CH(NXT(5)))) { 2632 xmlParseTextDecl(ctxt); 2633 } 2634 } else { 2635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2636 "PEReference: %s is not a parameter entity\n", 2637 name); 2638 } 2639 } 2640 } else { 2641 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2642 } 2643 } 2644 } 2645 2646 /* 2647 * Macro used to grow the current buffer. 2648 * buffer##_size is expected to be a size_t 2649 * mem_error: is expected to handle memory allocation failures 2650 */ 2651 #define growBuffer(buffer, n) { \ 2652 xmlChar *tmp; \ 2653 size_t new_size = buffer##_size * 2 + n; \ 2654 if (new_size < buffer##_size) goto mem_error; \ 2655 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2656 if (tmp == NULL) goto mem_error; \ 2657 buffer = tmp; \ 2658 buffer##_size = new_size; \ 2659 } 2660 2661 /** 2662 * xmlStringLenDecodeEntities: 2663 * @ctxt: the parser context 2664 * @str: the input string 2665 * @len: the string length 2666 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2667 * @end: an end marker xmlChar, 0 if none 2668 * @end2: an end marker xmlChar, 0 if none 2669 * @end3: an end marker xmlChar, 0 if none 2670 * 2671 * Takes a entity string content and process to do the adequate substitutions. 2672 * 2673 * [67] Reference ::= EntityRef | CharRef 2674 * 2675 * [69] PEReference ::= '%' Name ';' 2676 * 2677 * Returns A newly allocated string with the substitution done. The caller 2678 * must deallocate it ! 2679 */ 2680 xmlChar * 2681 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2682 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2683 xmlChar *buffer = NULL; 2684 size_t buffer_size = 0; 2685 size_t nbchars = 0; 2686 2687 xmlChar *current = NULL; 2688 xmlChar *rep = NULL; 2689 const xmlChar *last; 2690 xmlEntityPtr ent; 2691 int c,l; 2692 2693 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2694 return(NULL); 2695 last = str + len; 2696 2697 if (((ctxt->depth > 40) && 2698 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2699 (ctxt->depth > 1024)) { 2700 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2701 return(NULL); 2702 } 2703 2704 /* 2705 * allocate a translation buffer. 2706 */ 2707 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2708 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2709 if (buffer == NULL) goto mem_error; 2710 2711 /* 2712 * OK loop until we reach one of the ending char or a size limit. 2713 * we are operating on already parsed values. 2714 */ 2715 if (str < last) 2716 c = CUR_SCHAR(str, l); 2717 else 2718 c = 0; 2719 while ((c != 0) && (c != end) && /* non input consuming loop */ 2720 (c != end2) && (c != end3)) { 2721 2722 if (c == 0) break; 2723 if ((c == '&') && (str[1] == '#')) { 2724 int val = xmlParseStringCharRef(ctxt, &str); 2725 if (val != 0) { 2726 COPY_BUF(0,buffer,nbchars,val); 2727 } 2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2730 } 2731 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2732 if (xmlParserDebugEntities) 2733 xmlGenericError(xmlGenericErrorContext, 2734 "String decoding Entity Reference: %.30s\n", 2735 str); 2736 ent = xmlParseStringEntityRef(ctxt, &str); 2737 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2738 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2739 goto int_error; 2740 if (ent != NULL) 2741 ctxt->nbentities += ent->checked / 2; 2742 if ((ent != NULL) && 2743 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2744 if (ent->content != NULL) { 2745 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2746 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2748 } 2749 } else { 2750 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2751 "predefined entity has no content\n"); 2752 } 2753 } else if ((ent != NULL) && (ent->content != NULL)) { 2754 ctxt->depth++; 2755 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2756 0, 0, 0); 2757 ctxt->depth--; 2758 2759 if (rep != NULL) { 2760 current = rep; 2761 while (*current != 0) { /* non input consuming loop */ 2762 buffer[nbchars++] = *current++; 2763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2764 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2765 goto int_error; 2766 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2767 } 2768 } 2769 xmlFree(rep); 2770 rep = NULL; 2771 } 2772 } else if (ent != NULL) { 2773 int i = xmlStrlen(ent->name); 2774 const xmlChar *cur = ent->name; 2775 2776 buffer[nbchars++] = '&'; 2777 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2778 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2779 } 2780 for (;i > 0;i--) 2781 buffer[nbchars++] = *cur++; 2782 buffer[nbchars++] = ';'; 2783 } 2784 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2785 if (xmlParserDebugEntities) 2786 xmlGenericError(xmlGenericErrorContext, 2787 "String decoding PE Reference: %.30s\n", str); 2788 ent = xmlParseStringPEReference(ctxt, &str); 2789 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2790 goto int_error; 2791 if (ent != NULL) 2792 ctxt->nbentities += ent->checked / 2; 2793 if (ent != NULL) { 2794 if (ent->content == NULL) { 2795 xmlLoadEntityContent(ctxt, ent); 2796 } 2797 ctxt->depth++; 2798 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2799 0, 0, 0); 2800 ctxt->depth--; 2801 if (rep != NULL) { 2802 current = rep; 2803 while (*current != 0) { /* non input consuming loop */ 2804 buffer[nbchars++] = *current++; 2805 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2806 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2807 goto int_error; 2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2809 } 2810 } 2811 xmlFree(rep); 2812 rep = NULL; 2813 } 2814 } 2815 } else { 2816 COPY_BUF(l,buffer,nbchars,c); 2817 str += l; 2818 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2819 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2820 } 2821 } 2822 if (str < last) 2823 c = CUR_SCHAR(str, l); 2824 else 2825 c = 0; 2826 } 2827 buffer[nbchars] = 0; 2828 return(buffer); 2829 2830 mem_error: 2831 xmlErrMemory(ctxt, NULL); 2832 int_error: 2833 if (rep != NULL) 2834 xmlFree(rep); 2835 if (buffer != NULL) 2836 xmlFree(buffer); 2837 return(NULL); 2838 } 2839 2840 /** 2841 * xmlStringDecodeEntities: 2842 * @ctxt: the parser context 2843 * @str: the input string 2844 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2845 * @end: an end marker xmlChar, 0 if none 2846 * @end2: an end marker xmlChar, 0 if none 2847 * @end3: an end marker xmlChar, 0 if none 2848 * 2849 * Takes a entity string content and process to do the adequate substitutions. 2850 * 2851 * [67] Reference ::= EntityRef | CharRef 2852 * 2853 * [69] PEReference ::= '%' Name ';' 2854 * 2855 * Returns A newly allocated string with the substitution done. The caller 2856 * must deallocate it ! 2857 */ 2858 xmlChar * 2859 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2860 xmlChar end, xmlChar end2, xmlChar end3) { 2861 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2862 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2863 end, end2, end3)); 2864 } 2865 2866 /************************************************************************ 2867 * * 2868 * Commodity functions, cleanup needed ? * 2869 * * 2870 ************************************************************************/ 2871 2872 /** 2873 * areBlanks: 2874 * @ctxt: an XML parser context 2875 * @str: a xmlChar * 2876 * @len: the size of @str 2877 * @blank_chars: we know the chars are blanks 2878 * 2879 * Is this a sequence of blank chars that one can ignore ? 2880 * 2881 * Returns 1 if ignorable 0 otherwise. 2882 */ 2883 2884 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2885 int blank_chars) { 2886 int i, ret; 2887 xmlNodePtr lastChild; 2888 2889 /* 2890 * Don't spend time trying to differentiate them, the same callback is 2891 * used ! 2892 */ 2893 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2894 return(0); 2895 2896 /* 2897 * Check for xml:space value. 2898 */ 2899 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2900 (*(ctxt->space) == -2)) 2901 return(0); 2902 2903 /* 2904 * Check that the string is made of blanks 2905 */ 2906 if (blank_chars == 0) { 2907 for (i = 0;i < len;i++) 2908 if (!(IS_BLANK_CH(str[i]))) return(0); 2909 } 2910 2911 /* 2912 * Look if the element is mixed content in the DTD if available 2913 */ 2914 if (ctxt->node == NULL) return(0); 2915 if (ctxt->myDoc != NULL) { 2916 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2917 if (ret == 0) return(1); 2918 if (ret == 1) return(0); 2919 } 2920 2921 /* 2922 * Otherwise, heuristic :-\ 2923 */ 2924 if ((RAW != '<') && (RAW != 0xD)) return(0); 2925 if ((ctxt->node->children == NULL) && 2926 (RAW == '<') && (NXT(1) == '/')) return(0); 2927 2928 lastChild = xmlGetLastChild(ctxt->node); 2929 if (lastChild == NULL) { 2930 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2931 (ctxt->node->content != NULL)) return(0); 2932 } else if (xmlNodeIsText(lastChild)) 2933 return(0); 2934 else if ((ctxt->node->children != NULL) && 2935 (xmlNodeIsText(ctxt->node->children))) 2936 return(0); 2937 return(1); 2938 } 2939 2940 /************************************************************************ 2941 * * 2942 * Extra stuff for namespace support * 2943 * Relates to http://www.w3.org/TR/WD-xml-names * 2944 * * 2945 ************************************************************************/ 2946 2947 /** 2948 * xmlSplitQName: 2949 * @ctxt: an XML parser context 2950 * @name: an XML parser context 2951 * @prefix: a xmlChar ** 2952 * 2953 * parse an UTF8 encoded XML qualified name string 2954 * 2955 * [NS 5] QName ::= (Prefix ':')? LocalPart 2956 * 2957 * [NS 6] Prefix ::= NCName 2958 * 2959 * [NS 7] LocalPart ::= NCName 2960 * 2961 * Returns the local part, and prefix is updated 2962 * to get the Prefix if any. 2963 */ 2964 2965 xmlChar * 2966 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2967 xmlChar buf[XML_MAX_NAMELEN + 5]; 2968 xmlChar *buffer = NULL; 2969 int len = 0; 2970 int max = XML_MAX_NAMELEN; 2971 xmlChar *ret = NULL; 2972 const xmlChar *cur = name; 2973 int c; 2974 2975 if (prefix == NULL) return(NULL); 2976 *prefix = NULL; 2977 2978 if (cur == NULL) return(NULL); 2979 2980 #ifndef XML_XML_NAMESPACE 2981 /* xml: prefix is not really a namespace */ 2982 if ((cur[0] == 'x') && (cur[1] == 'm') && 2983 (cur[2] == 'l') && (cur[3] == ':')) 2984 return(xmlStrdup(name)); 2985 #endif 2986 2987 /* nasty but well=formed */ 2988 if (cur[0] == ':') 2989 return(xmlStrdup(name)); 2990 2991 c = *cur++; 2992 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2993 buf[len++] = c; 2994 c = *cur++; 2995 } 2996 if (len >= max) { 2997 /* 2998 * Okay someone managed to make a huge name, so he's ready to pay 2999 * for the processing speed. 3000 */ 3001 max = len * 2; 3002 3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3004 if (buffer == NULL) { 3005 xmlErrMemory(ctxt, NULL); 3006 return(NULL); 3007 } 3008 memcpy(buffer, buf, len); 3009 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3010 if (len + 10 > max) { 3011 xmlChar *tmp; 3012 3013 max *= 2; 3014 tmp = (xmlChar *) xmlRealloc(buffer, 3015 max * sizeof(xmlChar)); 3016 if (tmp == NULL) { 3017 xmlFree(buffer); 3018 xmlErrMemory(ctxt, NULL); 3019 return(NULL); 3020 } 3021 buffer = tmp; 3022 } 3023 buffer[len++] = c; 3024 c = *cur++; 3025 } 3026 buffer[len] = 0; 3027 } 3028 3029 if ((c == ':') && (*cur == 0)) { 3030 if (buffer != NULL) 3031 xmlFree(buffer); 3032 *prefix = NULL; 3033 return(xmlStrdup(name)); 3034 } 3035 3036 if (buffer == NULL) 3037 ret = xmlStrndup(buf, len); 3038 else { 3039 ret = buffer; 3040 buffer = NULL; 3041 max = XML_MAX_NAMELEN; 3042 } 3043 3044 3045 if (c == ':') { 3046 c = *cur; 3047 *prefix = ret; 3048 if (c == 0) { 3049 return(xmlStrndup(BAD_CAST "", 0)); 3050 } 3051 len = 0; 3052 3053 /* 3054 * Check that the first character is proper to start 3055 * a new name 3056 */ 3057 if (!(((c >= 0x61) && (c <= 0x7A)) || 3058 ((c >= 0x41) && (c <= 0x5A)) || 3059 (c == '_') || (c == ':'))) { 3060 int l; 3061 int first = CUR_SCHAR(cur, l); 3062 3063 if (!IS_LETTER(first) && (first != '_')) { 3064 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3065 "Name %s is not XML Namespace compliant\n", 3066 name); 3067 } 3068 } 3069 cur++; 3070 3071 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3072 buf[len++] = c; 3073 c = *cur++; 3074 } 3075 if (len >= max) { 3076 /* 3077 * Okay someone managed to make a huge name, so he's ready to pay 3078 * for the processing speed. 3079 */ 3080 max = len * 2; 3081 3082 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3083 if (buffer == NULL) { 3084 xmlErrMemory(ctxt, NULL); 3085 return(NULL); 3086 } 3087 memcpy(buffer, buf, len); 3088 while (c != 0) { /* tested bigname2.xml */ 3089 if (len + 10 > max) { 3090 xmlChar *tmp; 3091 3092 max *= 2; 3093 tmp = (xmlChar *) xmlRealloc(buffer, 3094 max * sizeof(xmlChar)); 3095 if (tmp == NULL) { 3096 xmlErrMemory(ctxt, NULL); 3097 xmlFree(buffer); 3098 return(NULL); 3099 } 3100 buffer = tmp; 3101 } 3102 buffer[len++] = c; 3103 c = *cur++; 3104 } 3105 buffer[len] = 0; 3106 } 3107 3108 if (buffer == NULL) 3109 ret = xmlStrndup(buf, len); 3110 else { 3111 ret = buffer; 3112 } 3113 } 3114 3115 return(ret); 3116 } 3117 3118 /************************************************************************ 3119 * * 3120 * The parser itself * 3121 * Relates to http://www.w3.org/TR/REC-xml * 3122 * * 3123 ************************************************************************/ 3124 3125 /************************************************************************ 3126 * * 3127 * Routines to parse Name, NCName and NmToken * 3128 * * 3129 ************************************************************************/ 3130 #ifdef DEBUG 3131 static unsigned long nbParseName = 0; 3132 static unsigned long nbParseNmToken = 0; 3133 static unsigned long nbParseNCName = 0; 3134 static unsigned long nbParseNCNameComplex = 0; 3135 static unsigned long nbParseNameComplex = 0; 3136 static unsigned long nbParseStringName = 0; 3137 #endif 3138 3139 /* 3140 * The two following functions are related to the change of accepted 3141 * characters for Name and NmToken in the Revision 5 of XML-1.0 3142 * They correspond to the modified production [4] and the new production [4a] 3143 * changes in that revision. Also note that the macros used for the 3144 * productions Letter, Digit, CombiningChar and Extender are not needed 3145 * anymore. 3146 * We still keep compatibility to pre-revision5 parsing semantic if the 3147 * new XML_PARSE_OLD10 option is given to the parser. 3148 */ 3149 static int 3150 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3151 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3152 /* 3153 * Use the new checks of production [4] [4a] amd [5] of the 3154 * Update 5 of XML-1.0 3155 */ 3156 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3157 (((c >= 'a') && (c <= 'z')) || 3158 ((c >= 'A') && (c <= 'Z')) || 3159 (c == '_') || (c == ':') || 3160 ((c >= 0xC0) && (c <= 0xD6)) || 3161 ((c >= 0xD8) && (c <= 0xF6)) || 3162 ((c >= 0xF8) && (c <= 0x2FF)) || 3163 ((c >= 0x370) && (c <= 0x37D)) || 3164 ((c >= 0x37F) && (c <= 0x1FFF)) || 3165 ((c >= 0x200C) && (c <= 0x200D)) || 3166 ((c >= 0x2070) && (c <= 0x218F)) || 3167 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3168 ((c >= 0x3001) && (c <= 0xD7FF)) || 3169 ((c >= 0xF900) && (c <= 0xFDCF)) || 3170 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3171 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3172 return(1); 3173 } else { 3174 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3175 return(1); 3176 } 3177 return(0); 3178 } 3179 3180 static int 3181 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3183 /* 3184 * Use the new checks of production [4] [4a] amd [5] of the 3185 * Update 5 of XML-1.0 3186 */ 3187 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3188 (((c >= 'a') && (c <= 'z')) || 3189 ((c >= 'A') && (c <= 'Z')) || 3190 ((c >= '0') && (c <= '9')) || /* !start */ 3191 (c == '_') || (c == ':') || 3192 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3193 ((c >= 0xC0) && (c <= 0xD6)) || 3194 ((c >= 0xD8) && (c <= 0xF6)) || 3195 ((c >= 0xF8) && (c <= 0x2FF)) || 3196 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3197 ((c >= 0x370) && (c <= 0x37D)) || 3198 ((c >= 0x37F) && (c <= 0x1FFF)) || 3199 ((c >= 0x200C) && (c <= 0x200D)) || 3200 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3201 ((c >= 0x2070) && (c <= 0x218F)) || 3202 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3203 ((c >= 0x3001) && (c <= 0xD7FF)) || 3204 ((c >= 0xF900) && (c <= 0xFDCF)) || 3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3206 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3207 return(1); 3208 } else { 3209 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3210 (c == '.') || (c == '-') || 3211 (c == '_') || (c == ':') || 3212 (IS_COMBINING(c)) || 3213 (IS_EXTENDER(c))) 3214 return(1); 3215 } 3216 return(0); 3217 } 3218 3219 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3220 int *len, int *alloc, int normalize); 3221 3222 static const xmlChar * 3223 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3224 int len = 0, l; 3225 int c; 3226 int count = 0; 3227 3228 #ifdef DEBUG 3229 nbParseNameComplex++; 3230 #endif 3231 3232 /* 3233 * Handler for more complex cases 3234 */ 3235 GROW; 3236 if (ctxt->instate == XML_PARSER_EOF) 3237 return(NULL); 3238 c = CUR_CHAR(l); 3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3240 /* 3241 * Use the new checks of production [4] [4a] amd [5] of the 3242 * Update 5 of XML-1.0 3243 */ 3244 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3245 (!(((c >= 'a') && (c <= 'z')) || 3246 ((c >= 'A') && (c <= 'Z')) || 3247 (c == '_') || (c == ':') || 3248 ((c >= 0xC0) && (c <= 0xD6)) || 3249 ((c >= 0xD8) && (c <= 0xF6)) || 3250 ((c >= 0xF8) && (c <= 0x2FF)) || 3251 ((c >= 0x370) && (c <= 0x37D)) || 3252 ((c >= 0x37F) && (c <= 0x1FFF)) || 3253 ((c >= 0x200C) && (c <= 0x200D)) || 3254 ((c >= 0x2070) && (c <= 0x218F)) || 3255 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3256 ((c >= 0x3001) && (c <= 0xD7FF)) || 3257 ((c >= 0xF900) && (c <= 0xFDCF)) || 3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3259 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3260 return(NULL); 3261 } 3262 len += l; 3263 NEXTL(l); 3264 c = CUR_CHAR(l); 3265 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3266 (((c >= 'a') && (c <= 'z')) || 3267 ((c >= 'A') && (c <= 'Z')) || 3268 ((c >= '0') && (c <= '9')) || /* !start */ 3269 (c == '_') || (c == ':') || 3270 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3271 ((c >= 0xC0) && (c <= 0xD6)) || 3272 ((c >= 0xD8) && (c <= 0xF6)) || 3273 ((c >= 0xF8) && (c <= 0x2FF)) || 3274 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3275 ((c >= 0x370) && (c <= 0x37D)) || 3276 ((c >= 0x37F) && (c <= 0x1FFF)) || 3277 ((c >= 0x200C) && (c <= 0x200D)) || 3278 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3279 ((c >= 0x2070) && (c <= 0x218F)) || 3280 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3281 ((c >= 0x3001) && (c <= 0xD7FF)) || 3282 ((c >= 0xF900) && (c <= 0xFDCF)) || 3283 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3284 ((c >= 0x10000) && (c <= 0xEFFFF)) 3285 )) { 3286 if (count++ > XML_PARSER_CHUNK_SIZE) { 3287 count = 0; 3288 GROW; 3289 if (ctxt->instate == XML_PARSER_EOF) 3290 return(NULL); 3291 } 3292 len += l; 3293 NEXTL(l); 3294 c = CUR_CHAR(l); 3295 } 3296 } else { 3297 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3298 (!IS_LETTER(c) && (c != '_') && 3299 (c != ':'))) { 3300 return(NULL); 3301 } 3302 len += l; 3303 NEXTL(l); 3304 c = CUR_CHAR(l); 3305 3306 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3307 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3308 (c == '.') || (c == '-') || 3309 (c == '_') || (c == ':') || 3310 (IS_COMBINING(c)) || 3311 (IS_EXTENDER(c)))) { 3312 if (count++ > XML_PARSER_CHUNK_SIZE) { 3313 count = 0; 3314 GROW; 3315 if (ctxt->instate == XML_PARSER_EOF) 3316 return(NULL); 3317 } 3318 len += l; 3319 NEXTL(l); 3320 c = CUR_CHAR(l); 3321 if (c == 0) { 3322 count = 0; 3323 GROW; 3324 if (ctxt->instate == XML_PARSER_EOF) 3325 return(NULL); 3326 c = CUR_CHAR(l); 3327 } 3328 } 3329 } 3330 if ((len > XML_MAX_NAME_LENGTH) && 3331 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3332 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3333 return(NULL); 3334 } 3335 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3336 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3337 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3338 } 3339 3340 /** 3341 * xmlParseName: 3342 * @ctxt: an XML parser context 3343 * 3344 * parse an XML name. 3345 * 3346 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3347 * CombiningChar | Extender 3348 * 3349 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3350 * 3351 * [6] Names ::= Name (#x20 Name)* 3352 * 3353 * Returns the Name parsed or NULL 3354 */ 3355 3356 const xmlChar * 3357 xmlParseName(xmlParserCtxtPtr ctxt) { 3358 const xmlChar *in; 3359 const xmlChar *ret; 3360 int count = 0; 3361 3362 GROW; 3363 3364 #ifdef DEBUG 3365 nbParseName++; 3366 #endif 3367 3368 /* 3369 * Accelerator for simple ASCII names 3370 */ 3371 in = ctxt->input->cur; 3372 if (((*in >= 0x61) && (*in <= 0x7A)) || 3373 ((*in >= 0x41) && (*in <= 0x5A)) || 3374 (*in == '_') || (*in == ':')) { 3375 in++; 3376 while (((*in >= 0x61) && (*in <= 0x7A)) || 3377 ((*in >= 0x41) && (*in <= 0x5A)) || 3378 ((*in >= 0x30) && (*in <= 0x39)) || 3379 (*in == '_') || (*in == '-') || 3380 (*in == ':') || (*in == '.')) 3381 in++; 3382 if ((*in > 0) && (*in < 0x80)) { 3383 count = in - ctxt->input->cur; 3384 if ((count > XML_MAX_NAME_LENGTH) && 3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3387 return(NULL); 3388 } 3389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3390 ctxt->input->cur = in; 3391 ctxt->nbChars += count; 3392 ctxt->input->col += count; 3393 if (ret == NULL) 3394 xmlErrMemory(ctxt, NULL); 3395 return(ret); 3396 } 3397 } 3398 /* accelerator for special cases */ 3399 return(xmlParseNameComplex(ctxt)); 3400 } 3401 3402 static const xmlChar * 3403 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3404 int len = 0, l; 3405 int c; 3406 int count = 0; 3407 3408 #ifdef DEBUG 3409 nbParseNCNameComplex++; 3410 #endif 3411 3412 /* 3413 * Handler for more complex cases 3414 */ 3415 GROW; 3416 c = CUR_CHAR(l); 3417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3418 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3419 return(NULL); 3420 } 3421 3422 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3423 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3424 if (count++ > XML_PARSER_CHUNK_SIZE) { 3425 if ((len > XML_MAX_NAME_LENGTH) && 3426 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3427 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3428 return(NULL); 3429 } 3430 count = 0; 3431 GROW; 3432 if (ctxt->instate == XML_PARSER_EOF) 3433 return(NULL); 3434 } 3435 len += l; 3436 NEXTL(l); 3437 c = CUR_CHAR(l); 3438 if (c == 0) { 3439 count = 0; 3440 GROW; 3441 if (ctxt->instate == XML_PARSER_EOF) 3442 return(NULL); 3443 c = CUR_CHAR(l); 3444 } 3445 } 3446 if ((len > XML_MAX_NAME_LENGTH) && 3447 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3448 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3449 return(NULL); 3450 } 3451 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3452 } 3453 3454 /** 3455 * xmlParseNCName: 3456 * @ctxt: an XML parser context 3457 * @len: length of the string parsed 3458 * 3459 * parse an XML name. 3460 * 3461 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3462 * CombiningChar | Extender 3463 * 3464 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3465 * 3466 * Returns the Name parsed or NULL 3467 */ 3468 3469 static const xmlChar * 3470 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3471 const xmlChar *in; 3472 const xmlChar *ret; 3473 int count = 0; 3474 3475 #ifdef DEBUG 3476 nbParseNCName++; 3477 #endif 3478 3479 /* 3480 * Accelerator for simple ASCII names 3481 */ 3482 in = ctxt->input->cur; 3483 if (((*in >= 0x61) && (*in <= 0x7A)) || 3484 ((*in >= 0x41) && (*in <= 0x5A)) || 3485 (*in == '_')) { 3486 in++; 3487 while (((*in >= 0x61) && (*in <= 0x7A)) || 3488 ((*in >= 0x41) && (*in <= 0x5A)) || 3489 ((*in >= 0x30) && (*in <= 0x39)) || 3490 (*in == '_') || (*in == '-') || 3491 (*in == '.')) 3492 in++; 3493 if ((*in > 0) && (*in < 0x80)) { 3494 count = in - ctxt->input->cur; 3495 if ((count > XML_MAX_NAME_LENGTH) && 3496 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3498 return(NULL); 3499 } 3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3501 ctxt->input->cur = in; 3502 ctxt->nbChars += count; 3503 ctxt->input->col += count; 3504 if (ret == NULL) { 3505 xmlErrMemory(ctxt, NULL); 3506 } 3507 return(ret); 3508 } 3509 } 3510 return(xmlParseNCNameComplex(ctxt)); 3511 } 3512 3513 /** 3514 * xmlParseNameAndCompare: 3515 * @ctxt: an XML parser context 3516 * 3517 * parse an XML name and compares for match 3518 * (specialized for endtag parsing) 3519 * 3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3521 * and the name for mismatch 3522 */ 3523 3524 static const xmlChar * 3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3526 register const xmlChar *cmp = other; 3527 register const xmlChar *in; 3528 const xmlChar *ret; 3529 3530 GROW; 3531 if (ctxt->instate == XML_PARSER_EOF) 3532 return(NULL); 3533 3534 in = ctxt->input->cur; 3535 while (*in != 0 && *in == *cmp) { 3536 ++in; 3537 ++cmp; 3538 ctxt->input->col++; 3539 } 3540 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3541 /* success */ 3542 ctxt->input->cur = in; 3543 return (const xmlChar*) 1; 3544 } 3545 /* failure (or end of input buffer), check with full function */ 3546 ret = xmlParseName (ctxt); 3547 /* strings coming from the dictionnary direct compare possible */ 3548 if (ret == other) { 3549 return (const xmlChar*) 1; 3550 } 3551 return ret; 3552 } 3553 3554 /** 3555 * xmlParseStringName: 3556 * @ctxt: an XML parser context 3557 * @str: a pointer to the string pointer (IN/OUT) 3558 * 3559 * parse an XML name. 3560 * 3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3562 * CombiningChar | Extender 3563 * 3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3565 * 3566 * [6] Names ::= Name (#x20 Name)* 3567 * 3568 * Returns the Name parsed or NULL. The @str pointer 3569 * is updated to the current location in the string. 3570 */ 3571 3572 static xmlChar * 3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3574 xmlChar buf[XML_MAX_NAMELEN + 5]; 3575 const xmlChar *cur = *str; 3576 int len = 0, l; 3577 int c; 3578 3579 #ifdef DEBUG 3580 nbParseStringName++; 3581 #endif 3582 3583 c = CUR_SCHAR(cur, l); 3584 if (!xmlIsNameStartChar(ctxt, c)) { 3585 return(NULL); 3586 } 3587 3588 COPY_BUF(l,buf,len,c); 3589 cur += l; 3590 c = CUR_SCHAR(cur, l); 3591 while (xmlIsNameChar(ctxt, c)) { 3592 COPY_BUF(l,buf,len,c); 3593 cur += l; 3594 c = CUR_SCHAR(cur, l); 3595 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3596 /* 3597 * Okay someone managed to make a huge name, so he's ready to pay 3598 * for the processing speed. 3599 */ 3600 xmlChar *buffer; 3601 int max = len * 2; 3602 3603 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3604 if (buffer == NULL) { 3605 xmlErrMemory(ctxt, NULL); 3606 return(NULL); 3607 } 3608 memcpy(buffer, buf, len); 3609 while (xmlIsNameChar(ctxt, c)) { 3610 if (len + 10 > max) { 3611 xmlChar *tmp; 3612 3613 if ((len > XML_MAX_NAME_LENGTH) && 3614 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3615 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3616 xmlFree(buffer); 3617 return(NULL); 3618 } 3619 max *= 2; 3620 tmp = (xmlChar *) xmlRealloc(buffer, 3621 max * sizeof(xmlChar)); 3622 if (tmp == NULL) { 3623 xmlErrMemory(ctxt, NULL); 3624 xmlFree(buffer); 3625 return(NULL); 3626 } 3627 buffer = tmp; 3628 } 3629 COPY_BUF(l,buffer,len,c); 3630 cur += l; 3631 c = CUR_SCHAR(cur, l); 3632 } 3633 buffer[len] = 0; 3634 *str = cur; 3635 return(buffer); 3636 } 3637 } 3638 if ((len > XML_MAX_NAME_LENGTH) && 3639 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3641 return(NULL); 3642 } 3643 *str = cur; 3644 return(xmlStrndup(buf, len)); 3645 } 3646 3647 /** 3648 * xmlParseNmtoken: 3649 * @ctxt: an XML parser context 3650 * 3651 * parse an XML Nmtoken. 3652 * 3653 * [7] Nmtoken ::= (NameChar)+ 3654 * 3655 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3656 * 3657 * Returns the Nmtoken parsed or NULL 3658 */ 3659 3660 xmlChar * 3661 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3662 xmlChar buf[XML_MAX_NAMELEN + 5]; 3663 int len = 0, l; 3664 int c; 3665 int count = 0; 3666 3667 #ifdef DEBUG 3668 nbParseNmToken++; 3669 #endif 3670 3671 GROW; 3672 if (ctxt->instate == XML_PARSER_EOF) 3673 return(NULL); 3674 c = CUR_CHAR(l); 3675 3676 while (xmlIsNameChar(ctxt, c)) { 3677 if (count++ > XML_PARSER_CHUNK_SIZE) { 3678 count = 0; 3679 GROW; 3680 } 3681 COPY_BUF(l,buf,len,c); 3682 NEXTL(l); 3683 c = CUR_CHAR(l); 3684 if (c == 0) { 3685 count = 0; 3686 GROW; 3687 if (ctxt->instate == XML_PARSER_EOF) 3688 return(NULL); 3689 c = CUR_CHAR(l); 3690 } 3691 if (len >= XML_MAX_NAMELEN) { 3692 /* 3693 * Okay someone managed to make a huge token, so he's ready to pay 3694 * for the processing speed. 3695 */ 3696 xmlChar *buffer; 3697 int max = len * 2; 3698 3699 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3700 if (buffer == NULL) { 3701 xmlErrMemory(ctxt, NULL); 3702 return(NULL); 3703 } 3704 memcpy(buffer, buf, len); 3705 while (xmlIsNameChar(ctxt, c)) { 3706 if (count++ > XML_PARSER_CHUNK_SIZE) { 3707 count = 0; 3708 GROW; 3709 if (ctxt->instate == XML_PARSER_EOF) { 3710 xmlFree(buffer); 3711 return(NULL); 3712 } 3713 } 3714 if (len + 10 > max) { 3715 xmlChar *tmp; 3716 3717 if ((max > XML_MAX_NAME_LENGTH) && 3718 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3720 xmlFree(buffer); 3721 return(NULL); 3722 } 3723 max *= 2; 3724 tmp = (xmlChar *) xmlRealloc(buffer, 3725 max * sizeof(xmlChar)); 3726 if (tmp == NULL) { 3727 xmlErrMemory(ctxt, NULL); 3728 xmlFree(buffer); 3729 return(NULL); 3730 } 3731 buffer = tmp; 3732 } 3733 COPY_BUF(l,buffer,len,c); 3734 NEXTL(l); 3735 c = CUR_CHAR(l); 3736 } 3737 buffer[len] = 0; 3738 return(buffer); 3739 } 3740 } 3741 if (len == 0) 3742 return(NULL); 3743 if ((len > XML_MAX_NAME_LENGTH) && 3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3746 return(NULL); 3747 } 3748 return(xmlStrndup(buf, len)); 3749 } 3750 3751 /** 3752 * xmlParseEntityValue: 3753 * @ctxt: an XML parser context 3754 * @orig: if non-NULL store a copy of the original entity value 3755 * 3756 * parse a value for ENTITY declarations 3757 * 3758 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3759 * "'" ([^%&'] | PEReference | Reference)* "'" 3760 * 3761 * Returns the EntityValue parsed with reference substituted or NULL 3762 */ 3763 3764 xmlChar * 3765 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3766 xmlChar *buf = NULL; 3767 int len = 0; 3768 int size = XML_PARSER_BUFFER_SIZE; 3769 int c, l; 3770 xmlChar stop; 3771 xmlChar *ret = NULL; 3772 const xmlChar *cur = NULL; 3773 xmlParserInputPtr input; 3774 3775 if (RAW == '"') stop = '"'; 3776 else if (RAW == '\'') stop = '\''; 3777 else { 3778 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3779 return(NULL); 3780 } 3781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3782 if (buf == NULL) { 3783 xmlErrMemory(ctxt, NULL); 3784 return(NULL); 3785 } 3786 3787 /* 3788 * The content of the entity definition is copied in a buffer. 3789 */ 3790 3791 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3792 input = ctxt->input; 3793 GROW; 3794 if (ctxt->instate == XML_PARSER_EOF) { 3795 xmlFree(buf); 3796 return(NULL); 3797 } 3798 NEXT; 3799 c = CUR_CHAR(l); 3800 /* 3801 * NOTE: 4.4.5 Included in Literal 3802 * When a parameter entity reference appears in a literal entity 3803 * value, ... a single or double quote character in the replacement 3804 * text is always treated as a normal data character and will not 3805 * terminate the literal. 3806 * In practice it means we stop the loop only when back at parsing 3807 * the initial entity and the quote is found 3808 */ 3809 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3810 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3811 if (len + 5 >= size) { 3812 xmlChar *tmp; 3813 3814 size *= 2; 3815 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3816 if (tmp == NULL) { 3817 xmlErrMemory(ctxt, NULL); 3818 xmlFree(buf); 3819 return(NULL); 3820 } 3821 buf = tmp; 3822 } 3823 COPY_BUF(l,buf,len,c); 3824 NEXTL(l); 3825 /* 3826 * Pop-up of finished entities. 3827 */ 3828 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3829 xmlPopInput(ctxt); 3830 3831 GROW; 3832 c = CUR_CHAR(l); 3833 if (c == 0) { 3834 GROW; 3835 c = CUR_CHAR(l); 3836 } 3837 } 3838 buf[len] = 0; 3839 if (ctxt->instate == XML_PARSER_EOF) { 3840 xmlFree(buf); 3841 return(NULL); 3842 } 3843 3844 /* 3845 * Raise problem w.r.t. '&' and '%' being used in non-entities 3846 * reference constructs. Note Charref will be handled in 3847 * xmlStringDecodeEntities() 3848 */ 3849 cur = buf; 3850 while (*cur != 0) { /* non input consuming */ 3851 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3852 xmlChar *name; 3853 xmlChar tmp = *cur; 3854 3855 cur++; 3856 name = xmlParseStringName(ctxt, &cur); 3857 if ((name == NULL) || (*cur != ';')) { 3858 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3859 "EntityValue: '%c' forbidden except for entities references\n", 3860 tmp); 3861 } 3862 if ((tmp == '%') && (ctxt->inSubset == 1) && 3863 (ctxt->inputNr == 1)) { 3864 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3865 } 3866 if (name != NULL) 3867 xmlFree(name); 3868 if (*cur == 0) 3869 break; 3870 } 3871 cur++; 3872 } 3873 3874 /* 3875 * Then PEReference entities are substituted. 3876 */ 3877 if (c != stop) { 3878 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3879 xmlFree(buf); 3880 } else { 3881 NEXT; 3882 /* 3883 * NOTE: 4.4.7 Bypassed 3884 * When a general entity reference appears in the EntityValue in 3885 * an entity declaration, it is bypassed and left as is. 3886 * so XML_SUBSTITUTE_REF is not set here. 3887 */ 3888 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3889 0, 0, 0); 3890 if (orig != NULL) 3891 *orig = buf; 3892 else 3893 xmlFree(buf); 3894 } 3895 3896 return(ret); 3897 } 3898 3899 /** 3900 * xmlParseAttValueComplex: 3901 * @ctxt: an XML parser context 3902 * @len: the resulting attribute len 3903 * @normalize: wether to apply the inner normalization 3904 * 3905 * parse a value for an attribute, this is the fallback function 3906 * of xmlParseAttValue() when the attribute parsing requires handling 3907 * of non-ASCII characters, or normalization compaction. 3908 * 3909 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3910 */ 3911 static xmlChar * 3912 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3913 xmlChar limit = 0; 3914 xmlChar *buf = NULL; 3915 xmlChar *rep = NULL; 3916 size_t len = 0; 3917 size_t buf_size = 0; 3918 int c, l, in_space = 0; 3919 xmlChar *current = NULL; 3920 xmlEntityPtr ent; 3921 3922 if (NXT(0) == '"') { 3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3924 limit = '"'; 3925 NEXT; 3926 } else if (NXT(0) == '\'') { 3927 limit = '\''; 3928 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3929 NEXT; 3930 } else { 3931 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3932 return(NULL); 3933 } 3934 3935 /* 3936 * allocate a translation buffer. 3937 */ 3938 buf_size = XML_PARSER_BUFFER_SIZE; 3939 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3940 if (buf == NULL) goto mem_error; 3941 3942 /* 3943 * OK loop until we reach one of the ending char or a size limit. 3944 */ 3945 c = CUR_CHAR(l); 3946 while (((NXT(0) != limit) && /* checked */ 3947 (IS_CHAR(c)) && (c != '<')) && 3948 (ctxt->instate != XML_PARSER_EOF)) { 3949 /* 3950 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3951 * special option is given 3952 */ 3953 if ((len > XML_MAX_TEXT_LENGTH) && 3954 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3955 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3956 "AttValue length too long\n"); 3957 goto mem_error; 3958 } 3959 if (c == 0) break; 3960 if (c == '&') { 3961 in_space = 0; 3962 if (NXT(1) == '#') { 3963 int val = xmlParseCharRef(ctxt); 3964 3965 if (val == '&') { 3966 if (ctxt->replaceEntities) { 3967 if (len + 10 > buf_size) { 3968 growBuffer(buf, 10); 3969 } 3970 buf[len++] = '&'; 3971 } else { 3972 /* 3973 * The reparsing will be done in xmlStringGetNodeList() 3974 * called by the attribute() function in SAX.c 3975 */ 3976 if (len + 10 > buf_size) { 3977 growBuffer(buf, 10); 3978 } 3979 buf[len++] = '&'; 3980 buf[len++] = '#'; 3981 buf[len++] = '3'; 3982 buf[len++] = '8'; 3983 buf[len++] = ';'; 3984 } 3985 } else if (val != 0) { 3986 if (len + 10 > buf_size) { 3987 growBuffer(buf, 10); 3988 } 3989 len += xmlCopyChar(0, &buf[len], val); 3990 } 3991 } else { 3992 ent = xmlParseEntityRef(ctxt); 3993 ctxt->nbentities++; 3994 if (ent != NULL) 3995 ctxt->nbentities += ent->owner; 3996 if ((ent != NULL) && 3997 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3998 if (len + 10 > buf_size) { 3999 growBuffer(buf, 10); 4000 } 4001 if ((ctxt->replaceEntities == 0) && 4002 (ent->content[0] == '&')) { 4003 buf[len++] = '&'; 4004 buf[len++] = '#'; 4005 buf[len++] = '3'; 4006 buf[len++] = '8'; 4007 buf[len++] = ';'; 4008 } else { 4009 buf[len++] = ent->content[0]; 4010 } 4011 } else if ((ent != NULL) && 4012 (ctxt->replaceEntities != 0)) { 4013 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4014 rep = xmlStringDecodeEntities(ctxt, ent->content, 4015 XML_SUBSTITUTE_REF, 4016 0, 0, 0); 4017 if (rep != NULL) { 4018 current = rep; 4019 while (*current != 0) { /* non input consuming */ 4020 if ((*current == 0xD) || (*current == 0xA) || 4021 (*current == 0x9)) { 4022 buf[len++] = 0x20; 4023 current++; 4024 } else 4025 buf[len++] = *current++; 4026 if (len + 10 > buf_size) { 4027 growBuffer(buf, 10); 4028 } 4029 } 4030 xmlFree(rep); 4031 rep = NULL; 4032 } 4033 } else { 4034 if (len + 10 > buf_size) { 4035 growBuffer(buf, 10); 4036 } 4037 if (ent->content != NULL) 4038 buf[len++] = ent->content[0]; 4039 } 4040 } else if (ent != NULL) { 4041 int i = xmlStrlen(ent->name); 4042 const xmlChar *cur = ent->name; 4043 4044 /* 4045 * This may look absurd but is needed to detect 4046 * entities problems 4047 */ 4048 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4049 (ent->content != NULL) && (ent->checked == 0)) { 4050 unsigned long oldnbent = ctxt->nbentities; 4051 4052 rep = xmlStringDecodeEntities(ctxt, ent->content, 4053 XML_SUBSTITUTE_REF, 0, 0, 0); 4054 4055 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4056 if (rep != NULL) { 4057 if (xmlStrchr(rep, '<')) 4058 ent->checked |= 1; 4059 xmlFree(rep); 4060 rep = NULL; 4061 } 4062 } 4063 4064 /* 4065 * Just output the reference 4066 */ 4067 buf[len++] = '&'; 4068 while (len + i + 10 > buf_size) { 4069 growBuffer(buf, i + 10); 4070 } 4071 for (;i > 0;i--) 4072 buf[len++] = *cur++; 4073 buf[len++] = ';'; 4074 } 4075 } 4076 } else { 4077 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4078 if ((len != 0) || (!normalize)) { 4079 if ((!normalize) || (!in_space)) { 4080 COPY_BUF(l,buf,len,0x20); 4081 while (len + 10 > buf_size) { 4082 growBuffer(buf, 10); 4083 } 4084 } 4085 in_space = 1; 4086 } 4087 } else { 4088 in_space = 0; 4089 COPY_BUF(l,buf,len,c); 4090 if (len + 10 > buf_size) { 4091 growBuffer(buf, 10); 4092 } 4093 } 4094 NEXTL(l); 4095 } 4096 GROW; 4097 c = CUR_CHAR(l); 4098 } 4099 if (ctxt->instate == XML_PARSER_EOF) 4100 goto error; 4101 4102 if ((in_space) && (normalize)) { 4103 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4104 } 4105 buf[len] = 0; 4106 if (RAW == '<') { 4107 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4108 } else if (RAW != limit) { 4109 if ((c != 0) && (!IS_CHAR(c))) { 4110 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4111 "invalid character in attribute value\n"); 4112 } else { 4113 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4114 "AttValue: ' expected\n"); 4115 } 4116 } else 4117 NEXT; 4118 4119 /* 4120 * There we potentially risk an overflow, don't allow attribute value of 4121 * length more than INT_MAX it is a very reasonnable assumption ! 4122 */ 4123 if (len >= INT_MAX) { 4124 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4125 "AttValue length too long\n"); 4126 goto mem_error; 4127 } 4128 4129 if (attlen != NULL) *attlen = (int) len; 4130 return(buf); 4131 4132 mem_error: 4133 xmlErrMemory(ctxt, NULL); 4134 error: 4135 if (buf != NULL) 4136 xmlFree(buf); 4137 if (rep != NULL) 4138 xmlFree(rep); 4139 return(NULL); 4140 } 4141 4142 /** 4143 * xmlParseAttValue: 4144 * @ctxt: an XML parser context 4145 * 4146 * parse a value for an attribute 4147 * Note: the parser won't do substitution of entities here, this 4148 * will be handled later in xmlStringGetNodeList 4149 * 4150 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4151 * "'" ([^<&'] | Reference)* "'" 4152 * 4153 * 3.3.3 Attribute-Value Normalization: 4154 * Before the value of an attribute is passed to the application or 4155 * checked for validity, the XML processor must normalize it as follows: 4156 * - a character reference is processed by appending the referenced 4157 * character to the attribute value 4158 * - an entity reference is processed by recursively processing the 4159 * replacement text of the entity 4160 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4161 * appending #x20 to the normalized value, except that only a single 4162 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4163 * parsed entity or the literal entity value of an internal parsed entity 4164 * - other characters are processed by appending them to the normalized value 4165 * If the declared value is not CDATA, then the XML processor must further 4166 * process the normalized attribute value by discarding any leading and 4167 * trailing space (#x20) characters, and by replacing sequences of space 4168 * (#x20) characters by a single space (#x20) character. 4169 * All attributes for which no declaration has been read should be treated 4170 * by a non-validating parser as if declared CDATA. 4171 * 4172 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4173 */ 4174 4175 4176 xmlChar * 4177 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4178 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4179 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4180 } 4181 4182 /** 4183 * xmlParseSystemLiteral: 4184 * @ctxt: an XML parser context 4185 * 4186 * parse an XML Literal 4187 * 4188 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4189 * 4190 * Returns the SystemLiteral parsed or NULL 4191 */ 4192 4193 xmlChar * 4194 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4195 xmlChar *buf = NULL; 4196 int len = 0; 4197 int size = XML_PARSER_BUFFER_SIZE; 4198 int cur, l; 4199 xmlChar stop; 4200 int state = ctxt->instate; 4201 int count = 0; 4202 4203 SHRINK; 4204 if (RAW == '"') { 4205 NEXT; 4206 stop = '"'; 4207 } else if (RAW == '\'') { 4208 NEXT; 4209 stop = '\''; 4210 } else { 4211 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4212 return(NULL); 4213 } 4214 4215 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4216 if (buf == NULL) { 4217 xmlErrMemory(ctxt, NULL); 4218 return(NULL); 4219 } 4220 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4221 cur = CUR_CHAR(l); 4222 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4223 if (len + 5 >= size) { 4224 xmlChar *tmp; 4225 4226 if ((size > XML_MAX_NAME_LENGTH) && 4227 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4228 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4229 xmlFree(buf); 4230 ctxt->instate = (xmlParserInputState) state; 4231 return(NULL); 4232 } 4233 size *= 2; 4234 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4235 if (tmp == NULL) { 4236 xmlFree(buf); 4237 xmlErrMemory(ctxt, NULL); 4238 ctxt->instate = (xmlParserInputState) state; 4239 return(NULL); 4240 } 4241 buf = tmp; 4242 } 4243 count++; 4244 if (count > 50) { 4245 GROW; 4246 count = 0; 4247 if (ctxt->instate == XML_PARSER_EOF) { 4248 xmlFree(buf); 4249 return(NULL); 4250 } 4251 } 4252 COPY_BUF(l,buf,len,cur); 4253 NEXTL(l); 4254 cur = CUR_CHAR(l); 4255 if (cur == 0) { 4256 GROW; 4257 SHRINK; 4258 cur = CUR_CHAR(l); 4259 } 4260 } 4261 buf[len] = 0; 4262 ctxt->instate = (xmlParserInputState) state; 4263 if (!IS_CHAR(cur)) { 4264 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4265 } else { 4266 NEXT; 4267 } 4268 return(buf); 4269 } 4270 4271 /** 4272 * xmlParsePubidLiteral: 4273 * @ctxt: an XML parser context 4274 * 4275 * parse an XML public literal 4276 * 4277 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4278 * 4279 * Returns the PubidLiteral parsed or NULL. 4280 */ 4281 4282 xmlChar * 4283 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4284 xmlChar *buf = NULL; 4285 int len = 0; 4286 int size = XML_PARSER_BUFFER_SIZE; 4287 xmlChar cur; 4288 xmlChar stop; 4289 int count = 0; 4290 xmlParserInputState oldstate = ctxt->instate; 4291 4292 SHRINK; 4293 if (RAW == '"') { 4294 NEXT; 4295 stop = '"'; 4296 } else if (RAW == '\'') { 4297 NEXT; 4298 stop = '\''; 4299 } else { 4300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4301 return(NULL); 4302 } 4303 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4304 if (buf == NULL) { 4305 xmlErrMemory(ctxt, NULL); 4306 return(NULL); 4307 } 4308 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4309 cur = CUR; 4310 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4311 if (len + 1 >= size) { 4312 xmlChar *tmp; 4313 4314 if ((size > XML_MAX_NAME_LENGTH) && 4315 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4316 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4317 xmlFree(buf); 4318 return(NULL); 4319 } 4320 size *= 2; 4321 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4322 if (tmp == NULL) { 4323 xmlErrMemory(ctxt, NULL); 4324 xmlFree(buf); 4325 return(NULL); 4326 } 4327 buf = tmp; 4328 } 4329 buf[len++] = cur; 4330 count++; 4331 if (count > 50) { 4332 GROW; 4333 count = 0; 4334 if (ctxt->instate == XML_PARSER_EOF) { 4335 xmlFree(buf); 4336 return(NULL); 4337 } 4338 } 4339 NEXT; 4340 cur = CUR; 4341 if (cur == 0) { 4342 GROW; 4343 SHRINK; 4344 cur = CUR; 4345 } 4346 } 4347 buf[len] = 0; 4348 if (cur != stop) { 4349 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4350 } else { 4351 NEXT; 4352 } 4353 ctxt->instate = oldstate; 4354 return(buf); 4355 } 4356 4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4358 4359 /* 4360 * used for the test in the inner loop of the char data testing 4361 */ 4362 static const unsigned char test_char_data[256] = { 4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4395 }; 4396 4397 /** 4398 * xmlParseCharData: 4399 * @ctxt: an XML parser context 4400 * @cdata: int indicating whether we are within a CDATA section 4401 * 4402 * parse a CharData section. 4403 * if we are within a CDATA section ']]>' marks an end of section. 4404 * 4405 * The right angle bracket (>) may be represented using the string ">", 4406 * and must, for compatibility, be escaped using ">" or a character 4407 * reference when it appears in the string "]]>" in content, when that 4408 * string is not marking the end of a CDATA section. 4409 * 4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4411 */ 4412 4413 void 4414 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4415 const xmlChar *in; 4416 int nbchar = 0; 4417 int line = ctxt->input->line; 4418 int col = ctxt->input->col; 4419 int ccol; 4420 4421 SHRINK; 4422 GROW; 4423 /* 4424 * Accelerated common case where input don't need to be 4425 * modified before passing it to the handler. 4426 */ 4427 if (!cdata) { 4428 in = ctxt->input->cur; 4429 do { 4430 get_more_space: 4431 while (*in == 0x20) { in++; ctxt->input->col++; } 4432 if (*in == 0xA) { 4433 do { 4434 ctxt->input->line++; ctxt->input->col = 1; 4435 in++; 4436 } while (*in == 0xA); 4437 goto get_more_space; 4438 } 4439 if (*in == '<') { 4440 nbchar = in - ctxt->input->cur; 4441 if (nbchar > 0) { 4442 const xmlChar *tmp = ctxt->input->cur; 4443 ctxt->input->cur = in; 4444 4445 if ((ctxt->sax != NULL) && 4446 (ctxt->sax->ignorableWhitespace != 4447 ctxt->sax->characters)) { 4448 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4449 if (ctxt->sax->ignorableWhitespace != NULL) 4450 ctxt->sax->ignorableWhitespace(ctxt->userData, 4451 tmp, nbchar); 4452 } else { 4453 if (ctxt->sax->characters != NULL) 4454 ctxt->sax->characters(ctxt->userData, 4455 tmp, nbchar); 4456 if (*ctxt->space == -1) 4457 *ctxt->space = -2; 4458 } 4459 } else if ((ctxt->sax != NULL) && 4460 (ctxt->sax->characters != NULL)) { 4461 ctxt->sax->characters(ctxt->userData, 4462 tmp, nbchar); 4463 } 4464 } 4465 return; 4466 } 4467 4468 get_more: 4469 ccol = ctxt->input->col; 4470 while (test_char_data[*in]) { 4471 in++; 4472 ccol++; 4473 } 4474 ctxt->input->col = ccol; 4475 if (*in == 0xA) { 4476 do { 4477 ctxt->input->line++; ctxt->input->col = 1; 4478 in++; 4479 } while (*in == 0xA); 4480 goto get_more; 4481 } 4482 if (*in == ']') { 4483 if ((in[1] == ']') && (in[2] == '>')) { 4484 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4485 ctxt->input->cur = in; 4486 return; 4487 } 4488 in++; 4489 ctxt->input->col++; 4490 goto get_more; 4491 } 4492 nbchar = in - ctxt->input->cur; 4493 if (nbchar > 0) { 4494 if ((ctxt->sax != NULL) && 4495 (ctxt->sax->ignorableWhitespace != 4496 ctxt->sax->characters) && 4497 (IS_BLANK_CH(*ctxt->input->cur))) { 4498 const xmlChar *tmp = ctxt->input->cur; 4499 ctxt->input->cur = in; 4500 4501 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4502 if (ctxt->sax->ignorableWhitespace != NULL) 4503 ctxt->sax->ignorableWhitespace(ctxt->userData, 4504 tmp, nbchar); 4505 } else { 4506 if (ctxt->sax->characters != NULL) 4507 ctxt->sax->characters(ctxt->userData, 4508 tmp, nbchar); 4509 if (*ctxt->space == -1) 4510 *ctxt->space = -2; 4511 } 4512 line = ctxt->input->line; 4513 col = ctxt->input->col; 4514 } else if (ctxt->sax != NULL) { 4515 if (ctxt->sax->characters != NULL) 4516 ctxt->sax->characters(ctxt->userData, 4517 ctxt->input->cur, nbchar); 4518 line = ctxt->input->line; 4519 col = ctxt->input->col; 4520 } 4521 /* something really bad happened in the SAX callback */ 4522 if (ctxt->instate != XML_PARSER_CONTENT) 4523 return; 4524 } 4525 ctxt->input->cur = in; 4526 if (*in == 0xD) { 4527 in++; 4528 if (*in == 0xA) { 4529 ctxt->input->cur = in; 4530 in++; 4531 ctxt->input->line++; ctxt->input->col = 1; 4532 continue; /* while */ 4533 } 4534 in--; 4535 } 4536 if (*in == '<') { 4537 return; 4538 } 4539 if (*in == '&') { 4540 return; 4541 } 4542 SHRINK; 4543 GROW; 4544 if (ctxt->instate == XML_PARSER_EOF) 4545 return; 4546 in = ctxt->input->cur; 4547 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4548 nbchar = 0; 4549 } 4550 ctxt->input->line = line; 4551 ctxt->input->col = col; 4552 xmlParseCharDataComplex(ctxt, cdata); 4553 } 4554 4555 /** 4556 * xmlParseCharDataComplex: 4557 * @ctxt: an XML parser context 4558 * @cdata: int indicating whether we are within a CDATA section 4559 * 4560 * parse a CharData section.this is the fallback function 4561 * of xmlParseCharData() when the parsing requires handling 4562 * of non-ASCII characters. 4563 */ 4564 static void 4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4566 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4567 int nbchar = 0; 4568 int cur, l; 4569 int count = 0; 4570 4571 SHRINK; 4572 GROW; 4573 cur = CUR_CHAR(l); 4574 while ((cur != '<') && /* checked */ 4575 (cur != '&') && 4576 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4577 if ((cur == ']') && (NXT(1) == ']') && 4578 (NXT(2) == '>')) { 4579 if (cdata) break; 4580 else { 4581 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4582 } 4583 } 4584 COPY_BUF(l,buf,nbchar,cur); 4585 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4586 buf[nbchar] = 0; 4587 4588 /* 4589 * OK the segment is to be consumed as chars. 4590 */ 4591 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4592 if (areBlanks(ctxt, buf, nbchar, 0)) { 4593 if (ctxt->sax->ignorableWhitespace != NULL) 4594 ctxt->sax->ignorableWhitespace(ctxt->userData, 4595 buf, nbchar); 4596 } else { 4597 if (ctxt->sax->characters != NULL) 4598 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4599 if ((ctxt->sax->characters != 4600 ctxt->sax->ignorableWhitespace) && 4601 (*ctxt->space == -1)) 4602 *ctxt->space = -2; 4603 } 4604 } 4605 nbchar = 0; 4606 /* something really bad happened in the SAX callback */ 4607 if (ctxt->instate != XML_PARSER_CONTENT) 4608 return; 4609 } 4610 count++; 4611 if (count > 50) { 4612 GROW; 4613 count = 0; 4614 if (ctxt->instate == XML_PARSER_EOF) 4615 return; 4616 } 4617 NEXTL(l); 4618 cur = CUR_CHAR(l); 4619 } 4620 if (nbchar != 0) { 4621 buf[nbchar] = 0; 4622 /* 4623 * OK the segment is to be consumed as chars. 4624 */ 4625 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4626 if (areBlanks(ctxt, buf, nbchar, 0)) { 4627 if (ctxt->sax->ignorableWhitespace != NULL) 4628 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4629 } else { 4630 if (ctxt->sax->characters != NULL) 4631 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4632 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4633 (*ctxt->space == -1)) 4634 *ctxt->space = -2; 4635 } 4636 } 4637 } 4638 if ((cur != 0) && (!IS_CHAR(cur))) { 4639 /* Generate the error and skip the offending character */ 4640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4641 "PCDATA invalid Char value %d\n", 4642 cur); 4643 NEXTL(l); 4644 } 4645 } 4646 4647 /** 4648 * xmlParseExternalID: 4649 * @ctxt: an XML parser context 4650 * @publicID: a xmlChar** receiving PubidLiteral 4651 * @strict: indicate whether we should restrict parsing to only 4652 * production [75], see NOTE below 4653 * 4654 * Parse an External ID or a Public ID 4655 * 4656 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4657 * 'PUBLIC' S PubidLiteral S SystemLiteral 4658 * 4659 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4660 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4661 * 4662 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4663 * 4664 * Returns the function returns SystemLiteral and in the second 4665 * case publicID receives PubidLiteral, is strict is off 4666 * it is possible to return NULL and have publicID set. 4667 */ 4668 4669 xmlChar * 4670 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4671 xmlChar *URI = NULL; 4672 4673 SHRINK; 4674 4675 *publicID = NULL; 4676 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4677 SKIP(6); 4678 if (!IS_BLANK_CH(CUR)) { 4679 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4680 "Space required after 'SYSTEM'\n"); 4681 } 4682 SKIP_BLANKS; 4683 URI = xmlParseSystemLiteral(ctxt); 4684 if (URI == NULL) { 4685 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4686 } 4687 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4688 SKIP(6); 4689 if (!IS_BLANK_CH(CUR)) { 4690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4691 "Space required after 'PUBLIC'\n"); 4692 } 4693 SKIP_BLANKS; 4694 *publicID = xmlParsePubidLiteral(ctxt); 4695 if (*publicID == NULL) { 4696 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4697 } 4698 if (strict) { 4699 /* 4700 * We don't handle [83] so "S SystemLiteral" is required. 4701 */ 4702 if (!IS_BLANK_CH(CUR)) { 4703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4704 "Space required after the Public Identifier\n"); 4705 } 4706 } else { 4707 /* 4708 * We handle [83] so we return immediately, if 4709 * "S SystemLiteral" is not detected. From a purely parsing 4710 * point of view that's a nice mess. 4711 */ 4712 const xmlChar *ptr; 4713 GROW; 4714 4715 ptr = CUR_PTR; 4716 if (!IS_BLANK_CH(*ptr)) return(NULL); 4717 4718 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4719 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4720 } 4721 SKIP_BLANKS; 4722 URI = xmlParseSystemLiteral(ctxt); 4723 if (URI == NULL) { 4724 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4725 } 4726 } 4727 return(URI); 4728 } 4729 4730 /** 4731 * xmlParseCommentComplex: 4732 * @ctxt: an XML parser context 4733 * @buf: the already parsed part of the buffer 4734 * @len: number of bytes filles in the buffer 4735 * @size: allocated size of the buffer 4736 * 4737 * Skip an XML (SGML) comment <!-- .... --> 4738 * The spec says that "For compatibility, the string "--" (double-hyphen) 4739 * must not occur within comments. " 4740 * This is the slow routine in case the accelerator for ascii didn't work 4741 * 4742 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4743 */ 4744 static void 4745 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4746 size_t len, size_t size) { 4747 int q, ql; 4748 int r, rl; 4749 int cur, l; 4750 size_t count = 0; 4751 int inputid; 4752 4753 inputid = ctxt->input->id; 4754 4755 if (buf == NULL) { 4756 len = 0; 4757 size = XML_PARSER_BUFFER_SIZE; 4758 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4759 if (buf == NULL) { 4760 xmlErrMemory(ctxt, NULL); 4761 return; 4762 } 4763 } 4764 GROW; /* Assure there's enough input data */ 4765 q = CUR_CHAR(ql); 4766 if (q == 0) 4767 goto not_terminated; 4768 if (!IS_CHAR(q)) { 4769 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4770 "xmlParseComment: invalid xmlChar value %d\n", 4771 q); 4772 xmlFree (buf); 4773 return; 4774 } 4775 NEXTL(ql); 4776 r = CUR_CHAR(rl); 4777 if (r == 0) 4778 goto not_terminated; 4779 if (!IS_CHAR(r)) { 4780 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4781 "xmlParseComment: invalid xmlChar value %d\n", 4782 q); 4783 xmlFree (buf); 4784 return; 4785 } 4786 NEXTL(rl); 4787 cur = CUR_CHAR(l); 4788 if (cur == 0) 4789 goto not_terminated; 4790 while (IS_CHAR(cur) && /* checked */ 4791 ((cur != '>') || 4792 (r != '-') || (q != '-'))) { 4793 if ((r == '-') && (q == '-')) { 4794 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4795 } 4796 if ((len > XML_MAX_TEXT_LENGTH) && 4797 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4799 "Comment too big found", NULL); 4800 xmlFree (buf); 4801 return; 4802 } 4803 if (len + 5 >= size) { 4804 xmlChar *new_buf; 4805 size_t new_size; 4806 4807 new_size = size * 2; 4808 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4809 if (new_buf == NULL) { 4810 xmlFree (buf); 4811 xmlErrMemory(ctxt, NULL); 4812 return; 4813 } 4814 buf = new_buf; 4815 size = new_size; 4816 } 4817 COPY_BUF(ql,buf,len,q); 4818 q = r; 4819 ql = rl; 4820 r = cur; 4821 rl = l; 4822 4823 count++; 4824 if (count > 50) { 4825 GROW; 4826 count = 0; 4827 if (ctxt->instate == XML_PARSER_EOF) { 4828 xmlFree(buf); 4829 return; 4830 } 4831 } 4832 NEXTL(l); 4833 cur = CUR_CHAR(l); 4834 if (cur == 0) { 4835 SHRINK; 4836 GROW; 4837 cur = CUR_CHAR(l); 4838 } 4839 } 4840 buf[len] = 0; 4841 if (cur == 0) { 4842 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4843 "Comment not terminated \n<!--%.50s\n", buf); 4844 } else if (!IS_CHAR(cur)) { 4845 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4846 "xmlParseComment: invalid xmlChar value %d\n", 4847 cur); 4848 } else { 4849 if (inputid != ctxt->input->id) { 4850 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4851 "Comment doesn't start and stop in the same entity\n"); 4852 } 4853 NEXT; 4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4855 (!ctxt->disableSAX)) 4856 ctxt->sax->comment(ctxt->userData, buf); 4857 } 4858 xmlFree(buf); 4859 return; 4860 not_terminated: 4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4862 "Comment not terminated\n", NULL); 4863 xmlFree(buf); 4864 return; 4865 } 4866 4867 /** 4868 * xmlParseComment: 4869 * @ctxt: an XML parser context 4870 * 4871 * Skip an XML (SGML) comment <!-- .... --> 4872 * The spec says that "For compatibility, the string "--" (double-hyphen) 4873 * must not occur within comments. " 4874 * 4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4876 */ 4877 void 4878 xmlParseComment(xmlParserCtxtPtr ctxt) { 4879 xmlChar *buf = NULL; 4880 size_t size = XML_PARSER_BUFFER_SIZE; 4881 size_t len = 0; 4882 xmlParserInputState state; 4883 const xmlChar *in; 4884 size_t nbchar = 0; 4885 int ccol; 4886 int inputid; 4887 4888 /* 4889 * Check that there is a comment right here. 4890 */ 4891 if ((RAW != '<') || (NXT(1) != '!') || 4892 (NXT(2) != '-') || (NXT(3) != '-')) return; 4893 state = ctxt->instate; 4894 ctxt->instate = XML_PARSER_COMMENT; 4895 inputid = ctxt->input->id; 4896 SKIP(4); 4897 SHRINK; 4898 GROW; 4899 4900 /* 4901 * Accelerated common case where input don't need to be 4902 * modified before passing it to the handler. 4903 */ 4904 in = ctxt->input->cur; 4905 do { 4906 if (*in == 0xA) { 4907 do { 4908 ctxt->input->line++; ctxt->input->col = 1; 4909 in++; 4910 } while (*in == 0xA); 4911 } 4912 get_more: 4913 ccol = ctxt->input->col; 4914 while (((*in > '-') && (*in <= 0x7F)) || 4915 ((*in >= 0x20) && (*in < '-')) || 4916 (*in == 0x09)) { 4917 in++; 4918 ccol++; 4919 } 4920 ctxt->input->col = ccol; 4921 if (*in == 0xA) { 4922 do { 4923 ctxt->input->line++; ctxt->input->col = 1; 4924 in++; 4925 } while (*in == 0xA); 4926 goto get_more; 4927 } 4928 nbchar = in - ctxt->input->cur; 4929 /* 4930 * save current set of data 4931 */ 4932 if (nbchar > 0) { 4933 if ((ctxt->sax != NULL) && 4934 (ctxt->sax->comment != NULL)) { 4935 if (buf == NULL) { 4936 if ((*in == '-') && (in[1] == '-')) 4937 size = nbchar + 1; 4938 else 4939 size = XML_PARSER_BUFFER_SIZE + nbchar; 4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4941 if (buf == NULL) { 4942 xmlErrMemory(ctxt, NULL); 4943 ctxt->instate = state; 4944 return; 4945 } 4946 len = 0; 4947 } else if (len + nbchar + 1 >= size) { 4948 xmlChar *new_buf; 4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4950 new_buf = (xmlChar *) xmlRealloc(buf, 4951 size * sizeof(xmlChar)); 4952 if (new_buf == NULL) { 4953 xmlFree (buf); 4954 xmlErrMemory(ctxt, NULL); 4955 ctxt->instate = state; 4956 return; 4957 } 4958 buf = new_buf; 4959 } 4960 memcpy(&buf[len], ctxt->input->cur, nbchar); 4961 len += nbchar; 4962 buf[len] = 0; 4963 } 4964 } 4965 if ((len > XML_MAX_TEXT_LENGTH) && 4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4968 "Comment too big found", NULL); 4969 xmlFree (buf); 4970 return; 4971 } 4972 ctxt->input->cur = in; 4973 if (*in == 0xA) { 4974 in++; 4975 ctxt->input->line++; ctxt->input->col = 1; 4976 } 4977 if (*in == 0xD) { 4978 in++; 4979 if (*in == 0xA) { 4980 ctxt->input->cur = in; 4981 in++; 4982 ctxt->input->line++; ctxt->input->col = 1; 4983 continue; /* while */ 4984 } 4985 in--; 4986 } 4987 SHRINK; 4988 GROW; 4989 if (ctxt->instate == XML_PARSER_EOF) { 4990 xmlFree(buf); 4991 return; 4992 } 4993 in = ctxt->input->cur; 4994 if (*in == '-') { 4995 if (in[1] == '-') { 4996 if (in[2] == '>') { 4997 if (ctxt->input->id != inputid) { 4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4999 "comment doesn't start and stop in the same entity\n"); 5000 } 5001 SKIP(3); 5002 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5003 (!ctxt->disableSAX)) { 5004 if (buf != NULL) 5005 ctxt->sax->comment(ctxt->userData, buf); 5006 else 5007 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5008 } 5009 if (buf != NULL) 5010 xmlFree(buf); 5011 if (ctxt->instate != XML_PARSER_EOF) 5012 ctxt->instate = state; 5013 return; 5014 } 5015 if (buf != NULL) { 5016 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5017 "Double hyphen within comment: " 5018 "<!--%.50s\n", 5019 buf); 5020 } else 5021 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5022 "Double hyphen within comment\n", NULL); 5023 in++; 5024 ctxt->input->col++; 5025 } 5026 in++; 5027 ctxt->input->col++; 5028 goto get_more; 5029 } 5030 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5031 xmlParseCommentComplex(ctxt, buf, len, size); 5032 ctxt->instate = state; 5033 return; 5034 } 5035 5036 5037 /** 5038 * xmlParsePITarget: 5039 * @ctxt: an XML parser context 5040 * 5041 * parse the name of a PI 5042 * 5043 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5044 * 5045 * Returns the PITarget name or NULL 5046 */ 5047 5048 const xmlChar * 5049 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5050 const xmlChar *name; 5051 5052 name = xmlParseName(ctxt); 5053 if ((name != NULL) && 5054 ((name[0] == 'x') || (name[0] == 'X')) && 5055 ((name[1] == 'm') || (name[1] == 'M')) && 5056 ((name[2] == 'l') || (name[2] == 'L'))) { 5057 int i; 5058 if ((name[0] == 'x') && (name[1] == 'm') && 5059 (name[2] == 'l') && (name[3] == 0)) { 5060 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5061 "XML declaration allowed only at the start of the document\n"); 5062 return(name); 5063 } else if (name[3] == 0) { 5064 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5065 return(name); 5066 } 5067 for (i = 0;;i++) { 5068 if (xmlW3CPIs[i] == NULL) break; 5069 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5070 return(name); 5071 } 5072 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5073 "xmlParsePITarget: invalid name prefix 'xml'\n", 5074 NULL, NULL); 5075 } 5076 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5077 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5078 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 5079 } 5080 return(name); 5081 } 5082 5083 #ifdef LIBXML_CATALOG_ENABLED 5084 /** 5085 * xmlParseCatalogPI: 5086 * @ctxt: an XML parser context 5087 * @catalog: the PI value string 5088 * 5089 * parse an XML Catalog Processing Instruction. 5090 * 5091 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5092 * 5093 * Occurs only if allowed by the user and if happening in the Misc 5094 * part of the document before any doctype informations 5095 * This will add the given catalog to the parsing context in order 5096 * to be used if there is a resolution need further down in the document 5097 */ 5098 5099 static void 5100 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5101 xmlChar *URL = NULL; 5102 const xmlChar *tmp, *base; 5103 xmlChar marker; 5104 5105 tmp = catalog; 5106 while (IS_BLANK_CH(*tmp)) tmp++; 5107 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5108 goto error; 5109 tmp += 7; 5110 while (IS_BLANK_CH(*tmp)) tmp++; 5111 if (*tmp != '=') { 5112 return; 5113 } 5114 tmp++; 5115 while (IS_BLANK_CH(*tmp)) tmp++; 5116 marker = *tmp; 5117 if ((marker != '\'') && (marker != '"')) 5118 goto error; 5119 tmp++; 5120 base = tmp; 5121 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5122 if (*tmp == 0) 5123 goto error; 5124 URL = xmlStrndup(base, tmp - base); 5125 tmp++; 5126 while (IS_BLANK_CH(*tmp)) tmp++; 5127 if (*tmp != 0) 5128 goto error; 5129 5130 if (URL != NULL) { 5131 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5132 xmlFree(URL); 5133 } 5134 return; 5135 5136 error: 5137 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5138 "Catalog PI syntax error: %s\n", 5139 catalog, NULL); 5140 if (URL != NULL) 5141 xmlFree(URL); 5142 } 5143 #endif 5144 5145 /** 5146 * xmlParsePI: 5147 * @ctxt: an XML parser context 5148 * 5149 * parse an XML Processing Instruction. 5150 * 5151 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5152 * 5153 * The processing is transfered to SAX once parsed. 5154 */ 5155 5156 void 5157 xmlParsePI(xmlParserCtxtPtr ctxt) { 5158 xmlChar *buf = NULL; 5159 size_t len = 0; 5160 size_t size = XML_PARSER_BUFFER_SIZE; 5161 int cur, l; 5162 const xmlChar *target; 5163 xmlParserInputState state; 5164 int count = 0; 5165 5166 if ((RAW == '<') && (NXT(1) == '?')) { 5167 xmlParserInputPtr input = ctxt->input; 5168 state = ctxt->instate; 5169 ctxt->instate = XML_PARSER_PI; 5170 /* 5171 * this is a Processing Instruction. 5172 */ 5173 SKIP(2); 5174 SHRINK; 5175 5176 /* 5177 * Parse the target name and check for special support like 5178 * namespace. 5179 */ 5180 target = xmlParsePITarget(ctxt); 5181 if (target != NULL) { 5182 if ((RAW == '?') && (NXT(1) == '>')) { 5183 if (input != ctxt->input) { 5184 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5185 "PI declaration doesn't start and stop in the same entity\n"); 5186 } 5187 SKIP(2); 5188 5189 /* 5190 * SAX: PI detected. 5191 */ 5192 if ((ctxt->sax) && (!ctxt->disableSAX) && 5193 (ctxt->sax->processingInstruction != NULL)) 5194 ctxt->sax->processingInstruction(ctxt->userData, 5195 target, NULL); 5196 if (ctxt->instate != XML_PARSER_EOF) 5197 ctxt->instate = state; 5198 return; 5199 } 5200 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5201 if (buf == NULL) { 5202 xmlErrMemory(ctxt, NULL); 5203 ctxt->instate = state; 5204 return; 5205 } 5206 cur = CUR; 5207 if (!IS_BLANK(cur)) { 5208 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5209 "ParsePI: PI %s space expected\n", target); 5210 } 5211 SKIP_BLANKS; 5212 cur = CUR_CHAR(l); 5213 while (IS_CHAR(cur) && /* checked */ 5214 ((cur != '?') || (NXT(1) != '>'))) { 5215 if (len + 5 >= size) { 5216 xmlChar *tmp; 5217 size_t new_size = size * 2; 5218 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5219 if (tmp == NULL) { 5220 xmlErrMemory(ctxt, NULL); 5221 xmlFree(buf); 5222 ctxt->instate = state; 5223 return; 5224 } 5225 buf = tmp; 5226 size = new_size; 5227 } 5228 count++; 5229 if (count > 50) { 5230 GROW; 5231 if (ctxt->instate == XML_PARSER_EOF) { 5232 xmlFree(buf); 5233 return; 5234 } 5235 count = 0; 5236 if ((len > XML_MAX_TEXT_LENGTH) && 5237 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5239 "PI %s too big found", target); 5240 xmlFree(buf); 5241 ctxt->instate = state; 5242 return; 5243 } 5244 } 5245 COPY_BUF(l,buf,len,cur); 5246 NEXTL(l); 5247 cur = CUR_CHAR(l); 5248 if (cur == 0) { 5249 SHRINK; 5250 GROW; 5251 cur = CUR_CHAR(l); 5252 } 5253 } 5254 if ((len > XML_MAX_TEXT_LENGTH) && 5255 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5256 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5257 "PI %s too big found", target); 5258 xmlFree(buf); 5259 ctxt->instate = state; 5260 return; 5261 } 5262 buf[len] = 0; 5263 if (cur != '?') { 5264 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5265 "ParsePI: PI %s never end ...\n", target); 5266 } else { 5267 if (input != ctxt->input) { 5268 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5269 "PI declaration doesn't start and stop in the same entity\n"); 5270 } 5271 SKIP(2); 5272 5273 #ifdef LIBXML_CATALOG_ENABLED 5274 if (((state == XML_PARSER_MISC) || 5275 (state == XML_PARSER_START)) && 5276 (xmlStrEqual(target, XML_CATALOG_PI))) { 5277 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5278 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5279 (allow == XML_CATA_ALLOW_ALL)) 5280 xmlParseCatalogPI(ctxt, buf); 5281 } 5282 #endif 5283 5284 5285 /* 5286 * SAX: PI detected. 5287 */ 5288 if ((ctxt->sax) && (!ctxt->disableSAX) && 5289 (ctxt->sax->processingInstruction != NULL)) 5290 ctxt->sax->processingInstruction(ctxt->userData, 5291 target, buf); 5292 } 5293 xmlFree(buf); 5294 } else { 5295 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5296 } 5297 if (ctxt->instate != XML_PARSER_EOF) 5298 ctxt->instate = state; 5299 } 5300 } 5301 5302 /** 5303 * xmlParseNotationDecl: 5304 * @ctxt: an XML parser context 5305 * 5306 * parse a notation declaration 5307 * 5308 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5309 * 5310 * Hence there is actually 3 choices: 5311 * 'PUBLIC' S PubidLiteral 5312 * 'PUBLIC' S PubidLiteral S SystemLiteral 5313 * and 'SYSTEM' S SystemLiteral 5314 * 5315 * See the NOTE on xmlParseExternalID(). 5316 */ 5317 5318 void 5319 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5320 const xmlChar *name; 5321 xmlChar *Pubid; 5322 xmlChar *Systemid; 5323 5324 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5325 xmlParserInputPtr input = ctxt->input; 5326 SHRINK; 5327 SKIP(10); 5328 if (!IS_BLANK_CH(CUR)) { 5329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5330 "Space required after '<!NOTATION'\n"); 5331 return; 5332 } 5333 SKIP_BLANKS; 5334 5335 name = xmlParseName(ctxt); 5336 if (name == NULL) { 5337 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5338 return; 5339 } 5340 if (!IS_BLANK_CH(CUR)) { 5341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5342 "Space required after the NOTATION name'\n"); 5343 return; 5344 } 5345 if (xmlStrchr(name, ':') != NULL) { 5346 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5347 "colon are forbidden from notation names '%s'\n", 5348 name, NULL, NULL); 5349 } 5350 SKIP_BLANKS; 5351 5352 /* 5353 * Parse the IDs. 5354 */ 5355 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5356 SKIP_BLANKS; 5357 5358 if (RAW == '>') { 5359 if (input != ctxt->input) { 5360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5361 "Notation declaration doesn't start and stop in the same entity\n"); 5362 } 5363 NEXT; 5364 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5365 (ctxt->sax->notationDecl != NULL)) 5366 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5367 } else { 5368 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5369 } 5370 if (Systemid != NULL) xmlFree(Systemid); 5371 if (Pubid != NULL) xmlFree(Pubid); 5372 } 5373 } 5374 5375 /** 5376 * xmlParseEntityDecl: 5377 * @ctxt: an XML parser context 5378 * 5379 * parse <!ENTITY declarations 5380 * 5381 * [70] EntityDecl ::= GEDecl | PEDecl 5382 * 5383 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5384 * 5385 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5386 * 5387 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5388 * 5389 * [74] PEDef ::= EntityValue | ExternalID 5390 * 5391 * [76] NDataDecl ::= S 'NDATA' S Name 5392 * 5393 * [ VC: Notation Declared ] 5394 * The Name must match the declared name of a notation. 5395 */ 5396 5397 void 5398 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5399 const xmlChar *name = NULL; 5400 xmlChar *value = NULL; 5401 xmlChar *URI = NULL, *literal = NULL; 5402 const xmlChar *ndata = NULL; 5403 int isParameter = 0; 5404 xmlChar *orig = NULL; 5405 int skipped; 5406 5407 /* GROW; done in the caller */ 5408 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5409 xmlParserInputPtr input = ctxt->input; 5410 SHRINK; 5411 SKIP(8); 5412 skipped = SKIP_BLANKS; 5413 if (skipped == 0) { 5414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5415 "Space required after '<!ENTITY'\n"); 5416 } 5417 5418 if (RAW == '%') { 5419 NEXT; 5420 skipped = SKIP_BLANKS; 5421 if (skipped == 0) { 5422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5423 "Space required after '%'\n"); 5424 } 5425 isParameter = 1; 5426 } 5427 5428 name = xmlParseName(ctxt); 5429 if (name == NULL) { 5430 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5431 "xmlParseEntityDecl: no name\n"); 5432 return; 5433 } 5434 if (xmlStrchr(name, ':') != NULL) { 5435 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5436 "colon are forbidden from entities names '%s'\n", 5437 name, NULL, NULL); 5438 } 5439 skipped = SKIP_BLANKS; 5440 if (skipped == 0) { 5441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5442 "Space required after the entity name\n"); 5443 } 5444 5445 ctxt->instate = XML_PARSER_ENTITY_DECL; 5446 /* 5447 * handle the various case of definitions... 5448 */ 5449 if (isParameter) { 5450 if ((RAW == '"') || (RAW == '\'')) { 5451 value = xmlParseEntityValue(ctxt, &orig); 5452 if (value) { 5453 if ((ctxt->sax != NULL) && 5454 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5455 ctxt->sax->entityDecl(ctxt->userData, name, 5456 XML_INTERNAL_PARAMETER_ENTITY, 5457 NULL, NULL, value); 5458 } 5459 } else { 5460 URI = xmlParseExternalID(ctxt, &literal, 1); 5461 if ((URI == NULL) && (literal == NULL)) { 5462 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5463 } 5464 if (URI) { 5465 xmlURIPtr uri; 5466 5467 uri = xmlParseURI((const char *) URI); 5468 if (uri == NULL) { 5469 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5470 "Invalid URI: %s\n", URI); 5471 /* 5472 * This really ought to be a well formedness error 5473 * but the XML Core WG decided otherwise c.f. issue 5474 * E26 of the XML erratas. 5475 */ 5476 } else { 5477 if (uri->fragment != NULL) { 5478 /* 5479 * Okay this is foolish to block those but not 5480 * invalid URIs. 5481 */ 5482 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5483 } else { 5484 if ((ctxt->sax != NULL) && 5485 (!ctxt->disableSAX) && 5486 (ctxt->sax->entityDecl != NULL)) 5487 ctxt->sax->entityDecl(ctxt->userData, name, 5488 XML_EXTERNAL_PARAMETER_ENTITY, 5489 literal, URI, NULL); 5490 } 5491 xmlFreeURI(uri); 5492 } 5493 } 5494 } 5495 } else { 5496 if ((RAW == '"') || (RAW == '\'')) { 5497 value = xmlParseEntityValue(ctxt, &orig); 5498 if ((ctxt->sax != NULL) && 5499 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5500 ctxt->sax->entityDecl(ctxt->userData, name, 5501 XML_INTERNAL_GENERAL_ENTITY, 5502 NULL, NULL, value); 5503 /* 5504 * For expat compatibility in SAX mode. 5505 */ 5506 if ((ctxt->myDoc == NULL) || 5507 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5508 if (ctxt->myDoc == NULL) { 5509 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5510 if (ctxt->myDoc == NULL) { 5511 xmlErrMemory(ctxt, "New Doc failed"); 5512 return; 5513 } 5514 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5515 } 5516 if (ctxt->myDoc->intSubset == NULL) 5517 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5518 BAD_CAST "fake", NULL, NULL); 5519 5520 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5521 NULL, NULL, value); 5522 } 5523 } else { 5524 URI = xmlParseExternalID(ctxt, &literal, 1); 5525 if ((URI == NULL) && (literal == NULL)) { 5526 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5527 } 5528 if (URI) { 5529 xmlURIPtr uri; 5530 5531 uri = xmlParseURI((const char *)URI); 5532 if (uri == NULL) { 5533 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5534 "Invalid URI: %s\n", URI); 5535 /* 5536 * This really ought to be a well formedness error 5537 * but the XML Core WG decided otherwise c.f. issue 5538 * E26 of the XML erratas. 5539 */ 5540 } else { 5541 if (uri->fragment != NULL) { 5542 /* 5543 * Okay this is foolish to block those but not 5544 * invalid URIs. 5545 */ 5546 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5547 } 5548 xmlFreeURI(uri); 5549 } 5550 } 5551 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5553 "Space required before 'NDATA'\n"); 5554 } 5555 SKIP_BLANKS; 5556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5557 SKIP(5); 5558 if (!IS_BLANK_CH(CUR)) { 5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5560 "Space required after 'NDATA'\n"); 5561 } 5562 SKIP_BLANKS; 5563 ndata = xmlParseName(ctxt); 5564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5565 (ctxt->sax->unparsedEntityDecl != NULL)) 5566 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5567 literal, URI, ndata); 5568 } else { 5569 if ((ctxt->sax != NULL) && 5570 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5571 ctxt->sax->entityDecl(ctxt->userData, name, 5572 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5573 literal, URI, NULL); 5574 /* 5575 * For expat compatibility in SAX mode. 5576 * assuming the entity repalcement was asked for 5577 */ 5578 if ((ctxt->replaceEntities != 0) && 5579 ((ctxt->myDoc == NULL) || 5580 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5581 if (ctxt->myDoc == NULL) { 5582 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5583 if (ctxt->myDoc == NULL) { 5584 xmlErrMemory(ctxt, "New Doc failed"); 5585 return; 5586 } 5587 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5588 } 5589 5590 if (ctxt->myDoc->intSubset == NULL) 5591 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5592 BAD_CAST "fake", NULL, NULL); 5593 xmlSAX2EntityDecl(ctxt, name, 5594 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5595 literal, URI, NULL); 5596 } 5597 } 5598 } 5599 } 5600 if (ctxt->instate == XML_PARSER_EOF) 5601 return; 5602 SKIP_BLANKS; 5603 if (RAW != '>') { 5604 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5605 "xmlParseEntityDecl: entity %s not terminated\n", name); 5606 } else { 5607 if (input != ctxt->input) { 5608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5609 "Entity declaration doesn't start and stop in the same entity\n"); 5610 } 5611 NEXT; 5612 } 5613 if (orig != NULL) { 5614 /* 5615 * Ugly mechanism to save the raw entity value. 5616 */ 5617 xmlEntityPtr cur = NULL; 5618 5619 if (isParameter) { 5620 if ((ctxt->sax != NULL) && 5621 (ctxt->sax->getParameterEntity != NULL)) 5622 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5623 } else { 5624 if ((ctxt->sax != NULL) && 5625 (ctxt->sax->getEntity != NULL)) 5626 cur = ctxt->sax->getEntity(ctxt->userData, name); 5627 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5628 cur = xmlSAX2GetEntity(ctxt, name); 5629 } 5630 } 5631 if (cur != NULL) { 5632 if (cur->orig != NULL) 5633 xmlFree(orig); 5634 else 5635 cur->orig = orig; 5636 } else 5637 xmlFree(orig); 5638 } 5639 if (value != NULL) xmlFree(value); 5640 if (URI != NULL) xmlFree(URI); 5641 if (literal != NULL) xmlFree(literal); 5642 } 5643 } 5644 5645 /** 5646 * xmlParseDefaultDecl: 5647 * @ctxt: an XML parser context 5648 * @value: Receive a possible fixed default value for the attribute 5649 * 5650 * Parse an attribute default declaration 5651 * 5652 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5653 * 5654 * [ VC: Required Attribute ] 5655 * if the default declaration is the keyword #REQUIRED, then the 5656 * attribute must be specified for all elements of the type in the 5657 * attribute-list declaration. 5658 * 5659 * [ VC: Attribute Default Legal ] 5660 * The declared default value must meet the lexical constraints of 5661 * the declared attribute type c.f. xmlValidateAttributeDecl() 5662 * 5663 * [ VC: Fixed Attribute Default ] 5664 * if an attribute has a default value declared with the #FIXED 5665 * keyword, instances of that attribute must match the default value. 5666 * 5667 * [ WFC: No < in Attribute Values ] 5668 * handled in xmlParseAttValue() 5669 * 5670 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5671 * or XML_ATTRIBUTE_FIXED. 5672 */ 5673 5674 int 5675 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5676 int val; 5677 xmlChar *ret; 5678 5679 *value = NULL; 5680 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5681 SKIP(9); 5682 return(XML_ATTRIBUTE_REQUIRED); 5683 } 5684 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5685 SKIP(8); 5686 return(XML_ATTRIBUTE_IMPLIED); 5687 } 5688 val = XML_ATTRIBUTE_NONE; 5689 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5690 SKIP(6); 5691 val = XML_ATTRIBUTE_FIXED; 5692 if (!IS_BLANK_CH(CUR)) { 5693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5694 "Space required after '#FIXED'\n"); 5695 } 5696 SKIP_BLANKS; 5697 } 5698 ret = xmlParseAttValue(ctxt); 5699 ctxt->instate = XML_PARSER_DTD; 5700 if (ret == NULL) { 5701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5702 "Attribute default value declaration error\n"); 5703 } else 5704 *value = ret; 5705 return(val); 5706 } 5707 5708 /** 5709 * xmlParseNotationType: 5710 * @ctxt: an XML parser context 5711 * 5712 * parse an Notation attribute type. 5713 * 5714 * Note: the leading 'NOTATION' S part has already being parsed... 5715 * 5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5717 * 5718 * [ VC: Notation Attributes ] 5719 * Values of this type must match one of the notation names included 5720 * in the declaration; all notation names in the declaration must be declared. 5721 * 5722 * Returns: the notation attribute tree built while parsing 5723 */ 5724 5725 xmlEnumerationPtr 5726 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5727 const xmlChar *name; 5728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5729 5730 if (RAW != '(') { 5731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5732 return(NULL); 5733 } 5734 SHRINK; 5735 do { 5736 NEXT; 5737 SKIP_BLANKS; 5738 name = xmlParseName(ctxt); 5739 if (name == NULL) { 5740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5741 "Name expected in NOTATION declaration\n"); 5742 xmlFreeEnumeration(ret); 5743 return(NULL); 5744 } 5745 tmp = ret; 5746 while (tmp != NULL) { 5747 if (xmlStrEqual(name, tmp->name)) { 5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5749 "standalone: attribute notation value token %s duplicated\n", 5750 name, NULL); 5751 if (!xmlDictOwns(ctxt->dict, name)) 5752 xmlFree((xmlChar *) name); 5753 break; 5754 } 5755 tmp = tmp->next; 5756 } 5757 if (tmp == NULL) { 5758 cur = xmlCreateEnumeration(name); 5759 if (cur == NULL) { 5760 xmlFreeEnumeration(ret); 5761 return(NULL); 5762 } 5763 if (last == NULL) ret = last = cur; 5764 else { 5765 last->next = cur; 5766 last = cur; 5767 } 5768 } 5769 SKIP_BLANKS; 5770 } while (RAW == '|'); 5771 if (RAW != ')') { 5772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5773 xmlFreeEnumeration(ret); 5774 return(NULL); 5775 } 5776 NEXT; 5777 return(ret); 5778 } 5779 5780 /** 5781 * xmlParseEnumerationType: 5782 * @ctxt: an XML parser context 5783 * 5784 * parse an Enumeration attribute type. 5785 * 5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5787 * 5788 * [ VC: Enumeration ] 5789 * Values of this type must match one of the Nmtoken tokens in 5790 * the declaration 5791 * 5792 * Returns: the enumeration attribute tree built while parsing 5793 */ 5794 5795 xmlEnumerationPtr 5796 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5797 xmlChar *name; 5798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5799 5800 if (RAW != '(') { 5801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5802 return(NULL); 5803 } 5804 SHRINK; 5805 do { 5806 NEXT; 5807 SKIP_BLANKS; 5808 name = xmlParseNmtoken(ctxt); 5809 if (name == NULL) { 5810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5811 return(ret); 5812 } 5813 tmp = ret; 5814 while (tmp != NULL) { 5815 if (xmlStrEqual(name, tmp->name)) { 5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5817 "standalone: attribute enumeration value token %s duplicated\n", 5818 name, NULL); 5819 if (!xmlDictOwns(ctxt->dict, name)) 5820 xmlFree(name); 5821 break; 5822 } 5823 tmp = tmp->next; 5824 } 5825 if (tmp == NULL) { 5826 cur = xmlCreateEnumeration(name); 5827 if (!xmlDictOwns(ctxt->dict, name)) 5828 xmlFree(name); 5829 if (cur == NULL) { 5830 xmlFreeEnumeration(ret); 5831 return(NULL); 5832 } 5833 if (last == NULL) ret = last = cur; 5834 else { 5835 last->next = cur; 5836 last = cur; 5837 } 5838 } 5839 SKIP_BLANKS; 5840 } while (RAW == '|'); 5841 if (RAW != ')') { 5842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5843 return(ret); 5844 } 5845 NEXT; 5846 return(ret); 5847 } 5848 5849 /** 5850 * xmlParseEnumeratedType: 5851 * @ctxt: an XML parser context 5852 * @tree: the enumeration tree built while parsing 5853 * 5854 * parse an Enumerated attribute type. 5855 * 5856 * [57] EnumeratedType ::= NotationType | Enumeration 5857 * 5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5859 * 5860 * 5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5862 */ 5863 5864 int 5865 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5867 SKIP(8); 5868 if (!IS_BLANK_CH(CUR)) { 5869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5870 "Space required after 'NOTATION'\n"); 5871 return(0); 5872 } 5873 SKIP_BLANKS; 5874 *tree = xmlParseNotationType(ctxt); 5875 if (*tree == NULL) return(0); 5876 return(XML_ATTRIBUTE_NOTATION); 5877 } 5878 *tree = xmlParseEnumerationType(ctxt); 5879 if (*tree == NULL) return(0); 5880 return(XML_ATTRIBUTE_ENUMERATION); 5881 } 5882 5883 /** 5884 * xmlParseAttributeType: 5885 * @ctxt: an XML parser context 5886 * @tree: the enumeration tree built while parsing 5887 * 5888 * parse the Attribute list def for an element 5889 * 5890 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5891 * 5892 * [55] StringType ::= 'CDATA' 5893 * 5894 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5895 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5896 * 5897 * Validity constraints for attribute values syntax are checked in 5898 * xmlValidateAttributeValue() 5899 * 5900 * [ VC: ID ] 5901 * Values of type ID must match the Name production. A name must not 5902 * appear more than once in an XML document as a value of this type; 5903 * i.e., ID values must uniquely identify the elements which bear them. 5904 * 5905 * [ VC: One ID per Element Type ] 5906 * No element type may have more than one ID attribute specified. 5907 * 5908 * [ VC: ID Attribute Default ] 5909 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5910 * 5911 * [ VC: IDREF ] 5912 * Values of type IDREF must match the Name production, and values 5913 * of type IDREFS must match Names; each IDREF Name must match the value 5914 * of an ID attribute on some element in the XML document; i.e. IDREF 5915 * values must match the value of some ID attribute. 5916 * 5917 * [ VC: Entity Name ] 5918 * Values of type ENTITY must match the Name production, values 5919 * of type ENTITIES must match Names; each Entity Name must match the 5920 * name of an unparsed entity declared in the DTD. 5921 * 5922 * [ VC: Name Token ] 5923 * Values of type NMTOKEN must match the Nmtoken production; values 5924 * of type NMTOKENS must match Nmtokens. 5925 * 5926 * Returns the attribute type 5927 */ 5928 int 5929 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5930 SHRINK; 5931 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5932 SKIP(5); 5933 return(XML_ATTRIBUTE_CDATA); 5934 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5935 SKIP(6); 5936 return(XML_ATTRIBUTE_IDREFS); 5937 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5938 SKIP(5); 5939 return(XML_ATTRIBUTE_IDREF); 5940 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5941 SKIP(2); 5942 return(XML_ATTRIBUTE_ID); 5943 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5944 SKIP(6); 5945 return(XML_ATTRIBUTE_ENTITY); 5946 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5947 SKIP(8); 5948 return(XML_ATTRIBUTE_ENTITIES); 5949 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5950 SKIP(8); 5951 return(XML_ATTRIBUTE_NMTOKENS); 5952 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5953 SKIP(7); 5954 return(XML_ATTRIBUTE_NMTOKEN); 5955 } 5956 return(xmlParseEnumeratedType(ctxt, tree)); 5957 } 5958 5959 /** 5960 * xmlParseAttributeListDecl: 5961 * @ctxt: an XML parser context 5962 * 5963 * : parse the Attribute list def for an element 5964 * 5965 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5966 * 5967 * [53] AttDef ::= S Name S AttType S DefaultDecl 5968 * 5969 */ 5970 void 5971 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5972 const xmlChar *elemName; 5973 const xmlChar *attrName; 5974 xmlEnumerationPtr tree; 5975 5976 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5977 xmlParserInputPtr input = ctxt->input; 5978 5979 SKIP(9); 5980 if (!IS_BLANK_CH(CUR)) { 5981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5982 "Space required after '<!ATTLIST'\n"); 5983 } 5984 SKIP_BLANKS; 5985 elemName = xmlParseName(ctxt); 5986 if (elemName == NULL) { 5987 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5988 "ATTLIST: no name for Element\n"); 5989 return; 5990 } 5991 SKIP_BLANKS; 5992 GROW; 5993 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5994 const xmlChar *check = CUR_PTR; 5995 int type; 5996 int def; 5997 xmlChar *defaultValue = NULL; 5998 5999 GROW; 6000 tree = NULL; 6001 attrName = xmlParseName(ctxt); 6002 if (attrName == NULL) { 6003 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6004 "ATTLIST: no name for Attribute\n"); 6005 break; 6006 } 6007 GROW; 6008 if (!IS_BLANK_CH(CUR)) { 6009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6010 "Space required after the attribute name\n"); 6011 break; 6012 } 6013 SKIP_BLANKS; 6014 6015 type = xmlParseAttributeType(ctxt, &tree); 6016 if (type <= 0) { 6017 break; 6018 } 6019 6020 GROW; 6021 if (!IS_BLANK_CH(CUR)) { 6022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6023 "Space required after the attribute type\n"); 6024 if (tree != NULL) 6025 xmlFreeEnumeration(tree); 6026 break; 6027 } 6028 SKIP_BLANKS; 6029 6030 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6031 if (def <= 0) { 6032 if (defaultValue != NULL) 6033 xmlFree(defaultValue); 6034 if (tree != NULL) 6035 xmlFreeEnumeration(tree); 6036 break; 6037 } 6038 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6039 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6040 6041 GROW; 6042 if (RAW != '>') { 6043 if (!IS_BLANK_CH(CUR)) { 6044 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6045 "Space required after the attribute default value\n"); 6046 if (defaultValue != NULL) 6047 xmlFree(defaultValue); 6048 if (tree != NULL) 6049 xmlFreeEnumeration(tree); 6050 break; 6051 } 6052 SKIP_BLANKS; 6053 } 6054 if (check == CUR_PTR) { 6055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6056 "in xmlParseAttributeListDecl\n"); 6057 if (defaultValue != NULL) 6058 xmlFree(defaultValue); 6059 if (tree != NULL) 6060 xmlFreeEnumeration(tree); 6061 break; 6062 } 6063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6064 (ctxt->sax->attributeDecl != NULL)) 6065 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6066 type, def, defaultValue, tree); 6067 else if (tree != NULL) 6068 xmlFreeEnumeration(tree); 6069 6070 if ((ctxt->sax2) && (defaultValue != NULL) && 6071 (def != XML_ATTRIBUTE_IMPLIED) && 6072 (def != XML_ATTRIBUTE_REQUIRED)) { 6073 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6074 } 6075 if (ctxt->sax2) { 6076 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6077 } 6078 if (defaultValue != NULL) 6079 xmlFree(defaultValue); 6080 GROW; 6081 } 6082 if (RAW == '>') { 6083 if (input != ctxt->input) { 6084 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6085 "Attribute list declaration doesn't start and stop in the same entity\n", 6086 NULL, NULL); 6087 } 6088 NEXT; 6089 } 6090 } 6091 } 6092 6093 /** 6094 * xmlParseElementMixedContentDecl: 6095 * @ctxt: an XML parser context 6096 * @inputchk: the input used for the current entity, needed for boundary checks 6097 * 6098 * parse the declaration for a Mixed Element content 6099 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6100 * 6101 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6102 * '(' S? '#PCDATA' S? ')' 6103 * 6104 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6105 * 6106 * [ VC: No Duplicate Types ] 6107 * The same name must not appear more than once in a single 6108 * mixed-content declaration. 6109 * 6110 * returns: the list of the xmlElementContentPtr describing the element choices 6111 */ 6112 xmlElementContentPtr 6113 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6114 xmlElementContentPtr ret = NULL, cur = NULL, n; 6115 const xmlChar *elem = NULL; 6116 6117 GROW; 6118 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6119 SKIP(7); 6120 SKIP_BLANKS; 6121 SHRINK; 6122 if (RAW == ')') { 6123 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6124 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6125 "Element content declaration doesn't start and stop in the same entity\n", 6126 NULL, NULL); 6127 } 6128 NEXT; 6129 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6130 if (ret == NULL) 6131 return(NULL); 6132 if (RAW == '*') { 6133 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6134 NEXT; 6135 } 6136 return(ret); 6137 } 6138 if ((RAW == '(') || (RAW == '|')) { 6139 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6140 if (ret == NULL) return(NULL); 6141 } 6142 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6143 NEXT; 6144 if (elem == NULL) { 6145 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6146 if (ret == NULL) return(NULL); 6147 ret->c1 = cur; 6148 if (cur != NULL) 6149 cur->parent = ret; 6150 cur = ret; 6151 } else { 6152 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6153 if (n == NULL) return(NULL); 6154 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6155 if (n->c1 != NULL) 6156 n->c1->parent = n; 6157 cur->c2 = n; 6158 if (n != NULL) 6159 n->parent = cur; 6160 cur = n; 6161 } 6162 SKIP_BLANKS; 6163 elem = xmlParseName(ctxt); 6164 if (elem == NULL) { 6165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6166 "xmlParseElementMixedContentDecl : Name expected\n"); 6167 xmlFreeDocElementContent(ctxt->myDoc, cur); 6168 return(NULL); 6169 } 6170 SKIP_BLANKS; 6171 GROW; 6172 } 6173 if ((RAW == ')') && (NXT(1) == '*')) { 6174 if (elem != NULL) { 6175 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6176 XML_ELEMENT_CONTENT_ELEMENT); 6177 if (cur->c2 != NULL) 6178 cur->c2->parent = cur; 6179 } 6180 if (ret != NULL) 6181 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6182 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6183 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6184 "Element content declaration doesn't start and stop in the same entity\n", 6185 NULL, NULL); 6186 } 6187 SKIP(2); 6188 } else { 6189 xmlFreeDocElementContent(ctxt->myDoc, ret); 6190 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6191 return(NULL); 6192 } 6193 6194 } else { 6195 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6196 } 6197 return(ret); 6198 } 6199 6200 /** 6201 * xmlParseElementChildrenContentDeclPriv: 6202 * @ctxt: an XML parser context 6203 * @inputchk: the input used for the current entity, needed for boundary checks 6204 * @depth: the level of recursion 6205 * 6206 * parse the declaration for a Mixed Element content 6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6208 * 6209 * 6210 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6211 * 6212 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6213 * 6214 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6215 * 6216 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6217 * 6218 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6219 * TODO Parameter-entity replacement text must be properly nested 6220 * with parenthesized groups. That is to say, if either of the 6221 * opening or closing parentheses in a choice, seq, or Mixed 6222 * construct is contained in the replacement text for a parameter 6223 * entity, both must be contained in the same replacement text. For 6224 * interoperability, if a parameter-entity reference appears in a 6225 * choice, seq, or Mixed construct, its replacement text should not 6226 * be empty, and neither the first nor last non-blank character of 6227 * the replacement text should be a connector (| or ,). 6228 * 6229 * Returns the tree of xmlElementContentPtr describing the element 6230 * hierarchy. 6231 */ 6232 static xmlElementContentPtr 6233 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6234 int depth) { 6235 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6236 const xmlChar *elem; 6237 xmlChar type = 0; 6238 6239 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6240 (depth > 2048)) { 6241 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6242 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6243 depth); 6244 return(NULL); 6245 } 6246 SKIP_BLANKS; 6247 GROW; 6248 if (RAW == '(') { 6249 int inputid = ctxt->input->id; 6250 6251 /* Recurse on first child */ 6252 NEXT; 6253 SKIP_BLANKS; 6254 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6255 depth + 1); 6256 SKIP_BLANKS; 6257 GROW; 6258 } else { 6259 elem = xmlParseName(ctxt); 6260 if (elem == NULL) { 6261 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6262 return(NULL); 6263 } 6264 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6265 if (cur == NULL) { 6266 xmlErrMemory(ctxt, NULL); 6267 return(NULL); 6268 } 6269 GROW; 6270 if (RAW == '?') { 6271 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6272 NEXT; 6273 } else if (RAW == '*') { 6274 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6275 NEXT; 6276 } else if (RAW == '+') { 6277 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6278 NEXT; 6279 } else { 6280 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6281 } 6282 GROW; 6283 } 6284 SKIP_BLANKS; 6285 SHRINK; 6286 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6287 /* 6288 * Each loop we parse one separator and one element. 6289 */ 6290 if (RAW == ',') { 6291 if (type == 0) type = CUR; 6292 6293 /* 6294 * Detect "Name | Name , Name" error 6295 */ 6296 else if (type != CUR) { 6297 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6298 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6299 type); 6300 if ((last != NULL) && (last != ret)) 6301 xmlFreeDocElementContent(ctxt->myDoc, last); 6302 if (ret != NULL) 6303 xmlFreeDocElementContent(ctxt->myDoc, ret); 6304 return(NULL); 6305 } 6306 NEXT; 6307 6308 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6309 if (op == NULL) { 6310 if ((last != NULL) && (last != ret)) 6311 xmlFreeDocElementContent(ctxt->myDoc, last); 6312 xmlFreeDocElementContent(ctxt->myDoc, ret); 6313 return(NULL); 6314 } 6315 if (last == NULL) { 6316 op->c1 = ret; 6317 if (ret != NULL) 6318 ret->parent = op; 6319 ret = cur = op; 6320 } else { 6321 cur->c2 = op; 6322 if (op != NULL) 6323 op->parent = cur; 6324 op->c1 = last; 6325 if (last != NULL) 6326 last->parent = op; 6327 cur =op; 6328 last = NULL; 6329 } 6330 } else if (RAW == '|') { 6331 if (type == 0) type = CUR; 6332 6333 /* 6334 * Detect "Name , Name | Name" error 6335 */ 6336 else if (type != CUR) { 6337 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6338 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6339 type); 6340 if ((last != NULL) && (last != ret)) 6341 xmlFreeDocElementContent(ctxt->myDoc, last); 6342 if (ret != NULL) 6343 xmlFreeDocElementContent(ctxt->myDoc, ret); 6344 return(NULL); 6345 } 6346 NEXT; 6347 6348 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6349 if (op == NULL) { 6350 if ((last != NULL) && (last != ret)) 6351 xmlFreeDocElementContent(ctxt->myDoc, last); 6352 if (ret != NULL) 6353 xmlFreeDocElementContent(ctxt->myDoc, ret); 6354 return(NULL); 6355 } 6356 if (last == NULL) { 6357 op->c1 = ret; 6358 if (ret != NULL) 6359 ret->parent = op; 6360 ret = cur = op; 6361 } else { 6362 cur->c2 = op; 6363 if (op != NULL) 6364 op->parent = cur; 6365 op->c1 = last; 6366 if (last != NULL) 6367 last->parent = op; 6368 cur =op; 6369 last = NULL; 6370 } 6371 } else { 6372 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6373 if ((last != NULL) && (last != ret)) 6374 xmlFreeDocElementContent(ctxt->myDoc, last); 6375 if (ret != NULL) 6376 xmlFreeDocElementContent(ctxt->myDoc, ret); 6377 return(NULL); 6378 } 6379 GROW; 6380 SKIP_BLANKS; 6381 GROW; 6382 if (RAW == '(') { 6383 int inputid = ctxt->input->id; 6384 /* Recurse on second child */ 6385 NEXT; 6386 SKIP_BLANKS; 6387 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6388 depth + 1); 6389 SKIP_BLANKS; 6390 } else { 6391 elem = xmlParseName(ctxt); 6392 if (elem == NULL) { 6393 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6394 if (ret != NULL) 6395 xmlFreeDocElementContent(ctxt->myDoc, ret); 6396 return(NULL); 6397 } 6398 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6399 if (last == NULL) { 6400 if (ret != NULL) 6401 xmlFreeDocElementContent(ctxt->myDoc, ret); 6402 return(NULL); 6403 } 6404 if (RAW == '?') { 6405 last->ocur = XML_ELEMENT_CONTENT_OPT; 6406 NEXT; 6407 } else if (RAW == '*') { 6408 last->ocur = XML_ELEMENT_CONTENT_MULT; 6409 NEXT; 6410 } else if (RAW == '+') { 6411 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6412 NEXT; 6413 } else { 6414 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6415 } 6416 } 6417 SKIP_BLANKS; 6418 GROW; 6419 } 6420 if ((cur != NULL) && (last != NULL)) { 6421 cur->c2 = last; 6422 if (last != NULL) 6423 last->parent = cur; 6424 } 6425 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6426 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6427 "Element content declaration doesn't start and stop in the same entity\n", 6428 NULL, NULL); 6429 } 6430 NEXT; 6431 if (RAW == '?') { 6432 if (ret != NULL) { 6433 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6434 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6435 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6436 else 6437 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6438 } 6439 NEXT; 6440 } else if (RAW == '*') { 6441 if (ret != NULL) { 6442 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6443 cur = ret; 6444 /* 6445 * Some normalization: 6446 * (a | b* | c?)* == (a | b | c)* 6447 */ 6448 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6449 if ((cur->c1 != NULL) && 6450 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6451 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6452 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6453 if ((cur->c2 != NULL) && 6454 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6455 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6456 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6457 cur = cur->c2; 6458 } 6459 } 6460 NEXT; 6461 } else if (RAW == '+') { 6462 if (ret != NULL) { 6463 int found = 0; 6464 6465 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6466 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6467 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6468 else 6469 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6470 /* 6471 * Some normalization: 6472 * (a | b*)+ == (a | b)* 6473 * (a | b?)+ == (a | b)* 6474 */ 6475 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6476 if ((cur->c1 != NULL) && 6477 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6478 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6479 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6480 found = 1; 6481 } 6482 if ((cur->c2 != NULL) && 6483 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6484 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6485 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6486 found = 1; 6487 } 6488 cur = cur->c2; 6489 } 6490 if (found) 6491 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6492 } 6493 NEXT; 6494 } 6495 return(ret); 6496 } 6497 6498 /** 6499 * xmlParseElementChildrenContentDecl: 6500 * @ctxt: an XML parser context 6501 * @inputchk: the input used for the current entity, needed for boundary checks 6502 * 6503 * parse the declaration for a Mixed Element content 6504 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6505 * 6506 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6507 * 6508 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6509 * 6510 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6511 * 6512 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6513 * 6514 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6515 * TODO Parameter-entity replacement text must be properly nested 6516 * with parenthesized groups. That is to say, if either of the 6517 * opening or closing parentheses in a choice, seq, or Mixed 6518 * construct is contained in the replacement text for a parameter 6519 * entity, both must be contained in the same replacement text. For 6520 * interoperability, if a parameter-entity reference appears in a 6521 * choice, seq, or Mixed construct, its replacement text should not 6522 * be empty, and neither the first nor last non-blank character of 6523 * the replacement text should be a connector (| or ,). 6524 * 6525 * Returns the tree of xmlElementContentPtr describing the element 6526 * hierarchy. 6527 */ 6528 xmlElementContentPtr 6529 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6530 /* stub left for API/ABI compat */ 6531 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6532 } 6533 6534 /** 6535 * xmlParseElementContentDecl: 6536 * @ctxt: an XML parser context 6537 * @name: the name of the element being defined. 6538 * @result: the Element Content pointer will be stored here if any 6539 * 6540 * parse the declaration for an Element content either Mixed or Children, 6541 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6542 * 6543 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6544 * 6545 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6546 */ 6547 6548 int 6549 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6550 xmlElementContentPtr *result) { 6551 6552 xmlElementContentPtr tree = NULL; 6553 int inputid = ctxt->input->id; 6554 int res; 6555 6556 *result = NULL; 6557 6558 if (RAW != '(') { 6559 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6560 "xmlParseElementContentDecl : %s '(' expected\n", name); 6561 return(-1); 6562 } 6563 NEXT; 6564 GROW; 6565 if (ctxt->instate == XML_PARSER_EOF) 6566 return(-1); 6567 SKIP_BLANKS; 6568 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6569 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6570 res = XML_ELEMENT_TYPE_MIXED; 6571 } else { 6572 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6573 res = XML_ELEMENT_TYPE_ELEMENT; 6574 } 6575 SKIP_BLANKS; 6576 *result = tree; 6577 return(res); 6578 } 6579 6580 /** 6581 * xmlParseElementDecl: 6582 * @ctxt: an XML parser context 6583 * 6584 * parse an Element declaration. 6585 * 6586 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6587 * 6588 * [ VC: Unique Element Type Declaration ] 6589 * No element type may be declared more than once 6590 * 6591 * Returns the type of the element, or -1 in case of error 6592 */ 6593 int 6594 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6595 const xmlChar *name; 6596 int ret = -1; 6597 xmlElementContentPtr content = NULL; 6598 6599 /* GROW; done in the caller */ 6600 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6601 xmlParserInputPtr input = ctxt->input; 6602 6603 SKIP(9); 6604 if (!IS_BLANK_CH(CUR)) { 6605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6606 "Space required after 'ELEMENT'\n"); 6607 } 6608 SKIP_BLANKS; 6609 name = xmlParseName(ctxt); 6610 if (name == NULL) { 6611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6612 "xmlParseElementDecl: no name for Element\n"); 6613 return(-1); 6614 } 6615 while ((RAW == 0) && (ctxt->inputNr > 1)) 6616 xmlPopInput(ctxt); 6617 if (!IS_BLANK_CH(CUR)) { 6618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6619 "Space required after the element name\n"); 6620 } 6621 SKIP_BLANKS; 6622 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6623 SKIP(5); 6624 /* 6625 * Element must always be empty. 6626 */ 6627 ret = XML_ELEMENT_TYPE_EMPTY; 6628 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6629 (NXT(2) == 'Y')) { 6630 SKIP(3); 6631 /* 6632 * Element is a generic container. 6633 */ 6634 ret = XML_ELEMENT_TYPE_ANY; 6635 } else if (RAW == '(') { 6636 ret = xmlParseElementContentDecl(ctxt, name, &content); 6637 } else { 6638 /* 6639 * [ WFC: PEs in Internal Subset ] error handling. 6640 */ 6641 if ((RAW == '%') && (ctxt->external == 0) && 6642 (ctxt->inputNr == 1)) { 6643 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6644 "PEReference: forbidden within markup decl in internal subset\n"); 6645 } else { 6646 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6647 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6648 } 6649 return(-1); 6650 } 6651 6652 SKIP_BLANKS; 6653 /* 6654 * Pop-up of finished entities. 6655 */ 6656 while ((RAW == 0) && (ctxt->inputNr > 1)) 6657 xmlPopInput(ctxt); 6658 SKIP_BLANKS; 6659 6660 if (RAW != '>') { 6661 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6662 if (content != NULL) { 6663 xmlFreeDocElementContent(ctxt->myDoc, content); 6664 } 6665 } else { 6666 if (input != ctxt->input) { 6667 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6668 "Element declaration doesn't start and stop in the same entity\n"); 6669 } 6670 6671 NEXT; 6672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6673 (ctxt->sax->elementDecl != NULL)) { 6674 if (content != NULL) 6675 content->parent = NULL; 6676 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6677 content); 6678 if ((content != NULL) && (content->parent == NULL)) { 6679 /* 6680 * this is a trick: if xmlAddElementDecl is called, 6681 * instead of copying the full tree it is plugged directly 6682 * if called from the parser. Avoid duplicating the 6683 * interfaces or change the API/ABI 6684 */ 6685 xmlFreeDocElementContent(ctxt->myDoc, content); 6686 } 6687 } else if (content != NULL) { 6688 xmlFreeDocElementContent(ctxt->myDoc, content); 6689 } 6690 } 6691 } 6692 return(ret); 6693 } 6694 6695 /** 6696 * xmlParseConditionalSections 6697 * @ctxt: an XML parser context 6698 * 6699 * [61] conditionalSect ::= includeSect | ignoreSect 6700 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6701 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6702 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6703 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6704 */ 6705 6706 static void 6707 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6708 int id = ctxt->input->id; 6709 6710 SKIP(3); 6711 SKIP_BLANKS; 6712 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6713 SKIP(7); 6714 SKIP_BLANKS; 6715 if (RAW != '[') { 6716 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6717 } else { 6718 if (ctxt->input->id != id) { 6719 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6720 "All markup of the conditional section is not in the same entity\n", 6721 NULL, NULL); 6722 } 6723 NEXT; 6724 } 6725 if (xmlParserDebugEntities) { 6726 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6727 xmlGenericError(xmlGenericErrorContext, 6728 "%s(%d): ", ctxt->input->filename, 6729 ctxt->input->line); 6730 xmlGenericError(xmlGenericErrorContext, 6731 "Entering INCLUDE Conditional Section\n"); 6732 } 6733 6734 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6735 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6736 const xmlChar *check = CUR_PTR; 6737 unsigned int cons = ctxt->input->consumed; 6738 6739 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6740 xmlParseConditionalSections(ctxt); 6741 } else if (IS_BLANK_CH(CUR)) { 6742 NEXT; 6743 } else if (RAW == '%') { 6744 xmlParsePEReference(ctxt); 6745 } else 6746 xmlParseMarkupDecl(ctxt); 6747 6748 /* 6749 * Pop-up of finished entities. 6750 */ 6751 while ((RAW == 0) && (ctxt->inputNr > 1)) 6752 xmlPopInput(ctxt); 6753 6754 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6755 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6756 break; 6757 } 6758 } 6759 if (xmlParserDebugEntities) { 6760 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6761 xmlGenericError(xmlGenericErrorContext, 6762 "%s(%d): ", ctxt->input->filename, 6763 ctxt->input->line); 6764 xmlGenericError(xmlGenericErrorContext, 6765 "Leaving INCLUDE Conditional Section\n"); 6766 } 6767 6768 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6769 int state; 6770 xmlParserInputState instate; 6771 int depth = 0; 6772 6773 SKIP(6); 6774 SKIP_BLANKS; 6775 if (RAW != '[') { 6776 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6777 } else { 6778 if (ctxt->input->id != id) { 6779 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6780 "All markup of the conditional section is not in the same entity\n", 6781 NULL, NULL); 6782 } 6783 NEXT; 6784 } 6785 if (xmlParserDebugEntities) { 6786 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6787 xmlGenericError(xmlGenericErrorContext, 6788 "%s(%d): ", ctxt->input->filename, 6789 ctxt->input->line); 6790 xmlGenericError(xmlGenericErrorContext, 6791 "Entering IGNORE Conditional Section\n"); 6792 } 6793 6794 /* 6795 * Parse up to the end of the conditional section 6796 * But disable SAX event generating DTD building in the meantime 6797 */ 6798 state = ctxt->disableSAX; 6799 instate = ctxt->instate; 6800 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6801 ctxt->instate = XML_PARSER_IGNORE; 6802 6803 while (((depth >= 0) && (RAW != 0)) && 6804 (ctxt->instate != XML_PARSER_EOF)) { 6805 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6806 depth++; 6807 SKIP(3); 6808 continue; 6809 } 6810 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6811 if (--depth >= 0) SKIP(3); 6812 continue; 6813 } 6814 NEXT; 6815 continue; 6816 } 6817 6818 ctxt->disableSAX = state; 6819 ctxt->instate = instate; 6820 6821 if (xmlParserDebugEntities) { 6822 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6823 xmlGenericError(xmlGenericErrorContext, 6824 "%s(%d): ", ctxt->input->filename, 6825 ctxt->input->line); 6826 xmlGenericError(xmlGenericErrorContext, 6827 "Leaving IGNORE Conditional Section\n"); 6828 } 6829 6830 } else { 6831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6832 } 6833 6834 if (RAW == 0) 6835 SHRINK; 6836 6837 if (RAW == 0) { 6838 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6839 } else { 6840 if (ctxt->input->id != id) { 6841 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6842 "All markup of the conditional section is not in the same entity\n", 6843 NULL, NULL); 6844 } 6845 SKIP(3); 6846 } 6847 } 6848 6849 /** 6850 * xmlParseMarkupDecl: 6851 * @ctxt: an XML parser context 6852 * 6853 * parse Markup declarations 6854 * 6855 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6856 * NotationDecl | PI | Comment 6857 * 6858 * [ VC: Proper Declaration/PE Nesting ] 6859 * Parameter-entity replacement text must be properly nested with 6860 * markup declarations. That is to say, if either the first character 6861 * or the last character of a markup declaration (markupdecl above) is 6862 * contained in the replacement text for a parameter-entity reference, 6863 * both must be contained in the same replacement text. 6864 * 6865 * [ WFC: PEs in Internal Subset ] 6866 * In the internal DTD subset, parameter-entity references can occur 6867 * only where markup declarations can occur, not within markup declarations. 6868 * (This does not apply to references that occur in external parameter 6869 * entities or to the external subset.) 6870 */ 6871 void 6872 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6873 GROW; 6874 if (CUR == '<') { 6875 if (NXT(1) == '!') { 6876 switch (NXT(2)) { 6877 case 'E': 6878 if (NXT(3) == 'L') 6879 xmlParseElementDecl(ctxt); 6880 else if (NXT(3) == 'N') 6881 xmlParseEntityDecl(ctxt); 6882 break; 6883 case 'A': 6884 xmlParseAttributeListDecl(ctxt); 6885 break; 6886 case 'N': 6887 xmlParseNotationDecl(ctxt); 6888 break; 6889 case '-': 6890 xmlParseComment(ctxt); 6891 break; 6892 default: 6893 /* there is an error but it will be detected later */ 6894 break; 6895 } 6896 } else if (NXT(1) == '?') { 6897 xmlParsePI(ctxt); 6898 } 6899 } 6900 /* 6901 * This is only for internal subset. On external entities, 6902 * the replacement is done before parsing stage 6903 */ 6904 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6905 xmlParsePEReference(ctxt); 6906 6907 /* 6908 * Conditional sections are allowed from entities included 6909 * by PE References in the internal subset. 6910 */ 6911 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6912 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6913 xmlParseConditionalSections(ctxt); 6914 } 6915 } 6916 6917 ctxt->instate = XML_PARSER_DTD; 6918 } 6919 6920 /** 6921 * xmlParseTextDecl: 6922 * @ctxt: an XML parser context 6923 * 6924 * parse an XML declaration header for external entities 6925 * 6926 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6927 */ 6928 6929 void 6930 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6931 xmlChar *version; 6932 const xmlChar *encoding; 6933 6934 /* 6935 * We know that '<?xml' is here. 6936 */ 6937 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6938 SKIP(5); 6939 } else { 6940 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6941 return; 6942 } 6943 6944 if (!IS_BLANK_CH(CUR)) { 6945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6946 "Space needed after '<?xml'\n"); 6947 } 6948 SKIP_BLANKS; 6949 6950 /* 6951 * We may have the VersionInfo here. 6952 */ 6953 version = xmlParseVersionInfo(ctxt); 6954 if (version == NULL) 6955 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6956 else { 6957 if (!IS_BLANK_CH(CUR)) { 6958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6959 "Space needed here\n"); 6960 } 6961 } 6962 ctxt->input->version = version; 6963 6964 /* 6965 * We must have the encoding declaration 6966 */ 6967 encoding = xmlParseEncodingDecl(ctxt); 6968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6969 /* 6970 * The XML REC instructs us to stop parsing right here 6971 */ 6972 return; 6973 } 6974 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6975 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6976 "Missing encoding in text declaration\n"); 6977 } 6978 6979 SKIP_BLANKS; 6980 if ((RAW == '?') && (NXT(1) == '>')) { 6981 SKIP(2); 6982 } else if (RAW == '>') { 6983 /* Deprecated old WD ... */ 6984 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6985 NEXT; 6986 } else { 6987 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6988 MOVETO_ENDTAG(CUR_PTR); 6989 NEXT; 6990 } 6991 } 6992 6993 /** 6994 * xmlParseExternalSubset: 6995 * @ctxt: an XML parser context 6996 * @ExternalID: the external identifier 6997 * @SystemID: the system identifier (or URL) 6998 * 6999 * parse Markup declarations from an external subset 7000 * 7001 * [30] extSubset ::= textDecl? extSubsetDecl 7002 * 7003 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7004 */ 7005 void 7006 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7007 const xmlChar *SystemID) { 7008 xmlDetectSAX2(ctxt); 7009 GROW; 7010 7011 if ((ctxt->encoding == NULL) && 7012 (ctxt->input->end - ctxt->input->cur >= 4)) { 7013 xmlChar start[4]; 7014 xmlCharEncoding enc; 7015 7016 start[0] = RAW; 7017 start[1] = NXT(1); 7018 start[2] = NXT(2); 7019 start[3] = NXT(3); 7020 enc = xmlDetectCharEncoding(start, 4); 7021 if (enc != XML_CHAR_ENCODING_NONE) 7022 xmlSwitchEncoding(ctxt, enc); 7023 } 7024 7025 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7026 xmlParseTextDecl(ctxt); 7027 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7028 /* 7029 * The XML REC instructs us to stop parsing right here 7030 */ 7031 ctxt->instate = XML_PARSER_EOF; 7032 return; 7033 } 7034 } 7035 if (ctxt->myDoc == NULL) { 7036 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7037 if (ctxt->myDoc == NULL) { 7038 xmlErrMemory(ctxt, "New Doc failed"); 7039 return; 7040 } 7041 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7042 } 7043 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7044 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7045 7046 ctxt->instate = XML_PARSER_DTD; 7047 ctxt->external = 1; 7048 while (((RAW == '<') && (NXT(1) == '?')) || 7049 ((RAW == '<') && (NXT(1) == '!')) || 7050 (RAW == '%') || IS_BLANK_CH(CUR)) { 7051 const xmlChar *check = CUR_PTR; 7052 unsigned int cons = ctxt->input->consumed; 7053 7054 GROW; 7055 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7056 xmlParseConditionalSections(ctxt); 7057 } else if (IS_BLANK_CH(CUR)) { 7058 NEXT; 7059 } else if (RAW == '%') { 7060 xmlParsePEReference(ctxt); 7061 } else 7062 xmlParseMarkupDecl(ctxt); 7063 7064 /* 7065 * Pop-up of finished entities. 7066 */ 7067 while ((RAW == 0) && (ctxt->inputNr > 1)) 7068 xmlPopInput(ctxt); 7069 7070 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7071 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7072 break; 7073 } 7074 } 7075 7076 if (RAW != 0) { 7077 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7078 } 7079 7080 } 7081 7082 /** 7083 * xmlParseReference: 7084 * @ctxt: an XML parser context 7085 * 7086 * parse and handle entity references in content, depending on the SAX 7087 * interface, this may end-up in a call to character() if this is a 7088 * CharRef, a predefined entity, if there is no reference() callback. 7089 * or if the parser was asked to switch to that mode. 7090 * 7091 * [67] Reference ::= EntityRef | CharRef 7092 */ 7093 void 7094 xmlParseReference(xmlParserCtxtPtr ctxt) { 7095 xmlEntityPtr ent; 7096 xmlChar *val; 7097 int was_checked; 7098 xmlNodePtr list = NULL; 7099 xmlParserErrors ret = XML_ERR_OK; 7100 7101 7102 if (RAW != '&') 7103 return; 7104 7105 /* 7106 * Simple case of a CharRef 7107 */ 7108 if (NXT(1) == '#') { 7109 int i = 0; 7110 xmlChar out[10]; 7111 int hex = NXT(2); 7112 int value = xmlParseCharRef(ctxt); 7113 7114 if (value == 0) 7115 return; 7116 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7117 /* 7118 * So we are using non-UTF-8 buffers 7119 * Check that the char fit on 8bits, if not 7120 * generate a CharRef. 7121 */ 7122 if (value <= 0xFF) { 7123 out[0] = value; 7124 out[1] = 0; 7125 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7126 (!ctxt->disableSAX)) 7127 ctxt->sax->characters(ctxt->userData, out, 1); 7128 } else { 7129 if ((hex == 'x') || (hex == 'X')) 7130 snprintf((char *)out, sizeof(out), "#x%X", value); 7131 else 7132 snprintf((char *)out, sizeof(out), "#%d", value); 7133 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7134 (!ctxt->disableSAX)) 7135 ctxt->sax->reference(ctxt->userData, out); 7136 } 7137 } else { 7138 /* 7139 * Just encode the value in UTF-8 7140 */ 7141 COPY_BUF(0 ,out, i, value); 7142 out[i] = 0; 7143 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7144 (!ctxt->disableSAX)) 7145 ctxt->sax->characters(ctxt->userData, out, i); 7146 } 7147 return; 7148 } 7149 7150 /* 7151 * We are seeing an entity reference 7152 */ 7153 ent = xmlParseEntityRef(ctxt); 7154 if (ent == NULL) return; 7155 if (!ctxt->wellFormed) 7156 return; 7157 was_checked = ent->checked; 7158 7159 /* special case of predefined entities */ 7160 if ((ent->name == NULL) || 7161 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7162 val = ent->content; 7163 if (val == NULL) return; 7164 /* 7165 * inline the entity. 7166 */ 7167 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7168 (!ctxt->disableSAX)) 7169 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7170 return; 7171 } 7172 7173 /* 7174 * The first reference to the entity trigger a parsing phase 7175 * where the ent->children is filled with the result from 7176 * the parsing. 7177 * Note: external parsed entities will not be loaded, it is not 7178 * required for a non-validating parser, unless the parsing option 7179 * of validating, or substituting entities were given. Doing so is 7180 * far more secure as the parser will only process data coming from 7181 * the document entity by default. 7182 */ 7183 if ((ent->checked == 0) && 7184 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7185 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7186 unsigned long oldnbent = ctxt->nbentities; 7187 7188 /* 7189 * This is a bit hackish but this seems the best 7190 * way to make sure both SAX and DOM entity support 7191 * behaves okay. 7192 */ 7193 void *user_data; 7194 if (ctxt->userData == ctxt) 7195 user_data = NULL; 7196 else 7197 user_data = ctxt->userData; 7198 7199 /* 7200 * Check that this entity is well formed 7201 * 4.3.2: An internal general parsed entity is well-formed 7202 * if its replacement text matches the production labeled 7203 * content. 7204 */ 7205 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7206 ctxt->depth++; 7207 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7208 user_data, &list); 7209 ctxt->depth--; 7210 7211 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7212 ctxt->depth++; 7213 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7214 user_data, ctxt->depth, ent->URI, 7215 ent->ExternalID, &list); 7216 ctxt->depth--; 7217 } else { 7218 ret = XML_ERR_ENTITY_PE_INTERNAL; 7219 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7220 "invalid entity type found\n", NULL); 7221 } 7222 7223 /* 7224 * Store the number of entities needing parsing for this entity 7225 * content and do checkings 7226 */ 7227 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7228 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7229 ent->checked |= 1; 7230 if (ret == XML_ERR_ENTITY_LOOP) { 7231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7232 xmlFreeNodeList(list); 7233 return; 7234 } 7235 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7236 xmlFreeNodeList(list); 7237 return; 7238 } 7239 7240 if ((ret == XML_ERR_OK) && (list != NULL)) { 7241 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7242 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7243 (ent->children == NULL)) { 7244 ent->children = list; 7245 if (ctxt->replaceEntities) { 7246 /* 7247 * Prune it directly in the generated document 7248 * except for single text nodes. 7249 */ 7250 if (((list->type == XML_TEXT_NODE) && 7251 (list->next == NULL)) || 7252 (ctxt->parseMode == XML_PARSE_READER)) { 7253 list->parent = (xmlNodePtr) ent; 7254 list = NULL; 7255 ent->owner = 1; 7256 } else { 7257 ent->owner = 0; 7258 while (list != NULL) { 7259 list->parent = (xmlNodePtr) ctxt->node; 7260 list->doc = ctxt->myDoc; 7261 if (list->next == NULL) 7262 ent->last = list; 7263 list = list->next; 7264 } 7265 list = ent->children; 7266 #ifdef LIBXML_LEGACY_ENABLED 7267 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7268 xmlAddEntityReference(ent, list, NULL); 7269 #endif /* LIBXML_LEGACY_ENABLED */ 7270 } 7271 } else { 7272 ent->owner = 1; 7273 while (list != NULL) { 7274 list->parent = (xmlNodePtr) ent; 7275 xmlSetTreeDoc(list, ent->doc); 7276 if (list->next == NULL) 7277 ent->last = list; 7278 list = list->next; 7279 } 7280 } 7281 } else { 7282 xmlFreeNodeList(list); 7283 list = NULL; 7284 } 7285 } else if ((ret != XML_ERR_OK) && 7286 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7287 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7288 "Entity '%s' failed to parse\n", ent->name); 7289 } else if (list != NULL) { 7290 xmlFreeNodeList(list); 7291 list = NULL; 7292 } 7293 if (ent->checked == 0) 7294 ent->checked = 2; 7295 } else if (ent->checked != 1) { 7296 ctxt->nbentities += ent->checked / 2; 7297 } 7298 7299 /* 7300 * Now that the entity content has been gathered 7301 * provide it to the application, this can take different forms based 7302 * on the parsing modes. 7303 */ 7304 if (ent->children == NULL) { 7305 /* 7306 * Probably running in SAX mode and the callbacks don't 7307 * build the entity content. So unless we already went 7308 * though parsing for first checking go though the entity 7309 * content to generate callbacks associated to the entity 7310 */ 7311 if (was_checked != 0) { 7312 void *user_data; 7313 /* 7314 * This is a bit hackish but this seems the best 7315 * way to make sure both SAX and DOM entity support 7316 * behaves okay. 7317 */ 7318 if (ctxt->userData == ctxt) 7319 user_data = NULL; 7320 else 7321 user_data = ctxt->userData; 7322 7323 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7324 ctxt->depth++; 7325 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7326 ent->content, user_data, NULL); 7327 ctxt->depth--; 7328 } else if (ent->etype == 7329 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7330 ctxt->depth++; 7331 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7332 ctxt->sax, user_data, ctxt->depth, 7333 ent->URI, ent->ExternalID, NULL); 7334 ctxt->depth--; 7335 } else { 7336 ret = XML_ERR_ENTITY_PE_INTERNAL; 7337 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7338 "invalid entity type found\n", NULL); 7339 } 7340 if (ret == XML_ERR_ENTITY_LOOP) { 7341 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7342 return; 7343 } 7344 } 7345 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7346 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7347 /* 7348 * Entity reference callback comes second, it's somewhat 7349 * superfluous but a compatibility to historical behaviour 7350 */ 7351 ctxt->sax->reference(ctxt->userData, ent->name); 7352 } 7353 return; 7354 } 7355 7356 /* 7357 * If we didn't get any children for the entity being built 7358 */ 7359 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7360 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7361 /* 7362 * Create a node. 7363 */ 7364 ctxt->sax->reference(ctxt->userData, ent->name); 7365 return; 7366 } 7367 7368 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7369 /* 7370 * There is a problem on the handling of _private for entities 7371 * (bug 155816): Should we copy the content of the field from 7372 * the entity (possibly overwriting some value set by the user 7373 * when a copy is created), should we leave it alone, or should 7374 * we try to take care of different situations? The problem 7375 * is exacerbated by the usage of this field by the xmlReader. 7376 * To fix this bug, we look at _private on the created node 7377 * and, if it's NULL, we copy in whatever was in the entity. 7378 * If it's not NULL we leave it alone. This is somewhat of a 7379 * hack - maybe we should have further tests to determine 7380 * what to do. 7381 */ 7382 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7383 /* 7384 * Seems we are generating the DOM content, do 7385 * a simple tree copy for all references except the first 7386 * In the first occurrence list contains the replacement. 7387 */ 7388 if (((list == NULL) && (ent->owner == 0)) || 7389 (ctxt->parseMode == XML_PARSE_READER)) { 7390 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7391 7392 /* 7393 * We are copying here, make sure there is no abuse 7394 */ 7395 ctxt->sizeentcopy += ent->length; 7396 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7397 return; 7398 7399 /* 7400 * when operating on a reader, the entities definitions 7401 * are always owning the entities subtree. 7402 if (ctxt->parseMode == XML_PARSE_READER) 7403 ent->owner = 1; 7404 */ 7405 7406 cur = ent->children; 7407 while (cur != NULL) { 7408 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7409 if (nw != NULL) { 7410 if (nw->_private == NULL) 7411 nw->_private = cur->_private; 7412 if (firstChild == NULL){ 7413 firstChild = nw; 7414 } 7415 nw = xmlAddChild(ctxt->node, nw); 7416 } 7417 if (cur == ent->last) { 7418 /* 7419 * needed to detect some strange empty 7420 * node cases in the reader tests 7421 */ 7422 if ((ctxt->parseMode == XML_PARSE_READER) && 7423 (nw != NULL) && 7424 (nw->type == XML_ELEMENT_NODE) && 7425 (nw->children == NULL)) 7426 nw->extra = 1; 7427 7428 break; 7429 } 7430 cur = cur->next; 7431 } 7432 #ifdef LIBXML_LEGACY_ENABLED 7433 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7434 xmlAddEntityReference(ent, firstChild, nw); 7435 #endif /* LIBXML_LEGACY_ENABLED */ 7436 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7437 xmlNodePtr nw = NULL, cur, next, last, 7438 firstChild = NULL; 7439 7440 /* 7441 * We are copying here, make sure there is no abuse 7442 */ 7443 ctxt->sizeentcopy += ent->length; 7444 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7445 return; 7446 7447 /* 7448 * Copy the entity child list and make it the new 7449 * entity child list. The goal is to make sure any 7450 * ID or REF referenced will be the one from the 7451 * document content and not the entity copy. 7452 */ 7453 cur = ent->children; 7454 ent->children = NULL; 7455 last = ent->last; 7456 ent->last = NULL; 7457 while (cur != NULL) { 7458 next = cur->next; 7459 cur->next = NULL; 7460 cur->parent = NULL; 7461 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7462 if (nw != NULL) { 7463 if (nw->_private == NULL) 7464 nw->_private = cur->_private; 7465 if (firstChild == NULL){ 7466 firstChild = cur; 7467 } 7468 xmlAddChild((xmlNodePtr) ent, nw); 7469 xmlAddChild(ctxt->node, cur); 7470 } 7471 if (cur == last) 7472 break; 7473 cur = next; 7474 } 7475 if (ent->owner == 0) 7476 ent->owner = 1; 7477 #ifdef LIBXML_LEGACY_ENABLED 7478 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7479 xmlAddEntityReference(ent, firstChild, nw); 7480 #endif /* LIBXML_LEGACY_ENABLED */ 7481 } else { 7482 const xmlChar *nbktext; 7483 7484 /* 7485 * the name change is to avoid coalescing of the 7486 * node with a possible previous text one which 7487 * would make ent->children a dangling pointer 7488 */ 7489 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7490 -1); 7491 if (ent->children->type == XML_TEXT_NODE) 7492 ent->children->name = nbktext; 7493 if ((ent->last != ent->children) && 7494 (ent->last->type == XML_TEXT_NODE)) 7495 ent->last->name = nbktext; 7496 xmlAddChildList(ctxt->node, ent->children); 7497 } 7498 7499 /* 7500 * This is to avoid a nasty side effect, see 7501 * characters() in SAX.c 7502 */ 7503 ctxt->nodemem = 0; 7504 ctxt->nodelen = 0; 7505 return; 7506 } 7507 } 7508 } 7509 7510 /** 7511 * xmlParseEntityRef: 7512 * @ctxt: an XML parser context 7513 * 7514 * parse ENTITY references declarations 7515 * 7516 * [68] EntityRef ::= '&' Name ';' 7517 * 7518 * [ WFC: Entity Declared ] 7519 * In a document without any DTD, a document with only an internal DTD 7520 * subset which contains no parameter entity references, or a document 7521 * with "standalone='yes'", the Name given in the entity reference 7522 * must match that in an entity declaration, except that well-formed 7523 * documents need not declare any of the following entities: amp, lt, 7524 * gt, apos, quot. The declaration of a parameter entity must precede 7525 * any reference to it. Similarly, the declaration of a general entity 7526 * must precede any reference to it which appears in a default value in an 7527 * attribute-list declaration. Note that if entities are declared in the 7528 * external subset or in external parameter entities, a non-validating 7529 * processor is not obligated to read and process their declarations; 7530 * for such documents, the rule that an entity must be declared is a 7531 * well-formedness constraint only if standalone='yes'. 7532 * 7533 * [ WFC: Parsed Entity ] 7534 * An entity reference must not contain the name of an unparsed entity 7535 * 7536 * Returns the xmlEntityPtr if found, or NULL otherwise. 7537 */ 7538 xmlEntityPtr 7539 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7540 const xmlChar *name; 7541 xmlEntityPtr ent = NULL; 7542 7543 GROW; 7544 if (ctxt->instate == XML_PARSER_EOF) 7545 return(NULL); 7546 7547 if (RAW != '&') 7548 return(NULL); 7549 NEXT; 7550 name = xmlParseName(ctxt); 7551 if (name == NULL) { 7552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7553 "xmlParseEntityRef: no name\n"); 7554 return(NULL); 7555 } 7556 if (RAW != ';') { 7557 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7558 return(NULL); 7559 } 7560 NEXT; 7561 7562 /* 7563 * Predefined entities override any extra definition 7564 */ 7565 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7566 ent = xmlGetPredefinedEntity(name); 7567 if (ent != NULL) 7568 return(ent); 7569 } 7570 7571 /* 7572 * Increase the number of entity references parsed 7573 */ 7574 ctxt->nbentities++; 7575 7576 /* 7577 * Ask first SAX for entity resolution, otherwise try the 7578 * entities which may have stored in the parser context. 7579 */ 7580 if (ctxt->sax != NULL) { 7581 if (ctxt->sax->getEntity != NULL) 7582 ent = ctxt->sax->getEntity(ctxt->userData, name); 7583 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7584 (ctxt->options & XML_PARSE_OLDSAX)) 7585 ent = xmlGetPredefinedEntity(name); 7586 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7587 (ctxt->userData==ctxt)) { 7588 ent = xmlSAX2GetEntity(ctxt, name); 7589 } 7590 } 7591 if (ctxt->instate == XML_PARSER_EOF) 7592 return(NULL); 7593 /* 7594 * [ WFC: Entity Declared ] 7595 * In a document without any DTD, a document with only an 7596 * internal DTD subset which contains no parameter entity 7597 * references, or a document with "standalone='yes'", the 7598 * Name given in the entity reference must match that in an 7599 * entity declaration, except that well-formed documents 7600 * need not declare any of the following entities: amp, lt, 7601 * gt, apos, quot. 7602 * The declaration of a parameter entity must precede any 7603 * reference to it. 7604 * Similarly, the declaration of a general entity must 7605 * precede any reference to it which appears in a default 7606 * value in an attribute-list declaration. Note that if 7607 * entities are declared in the external subset or in 7608 * external parameter entities, a non-validating processor 7609 * is not obligated to read and process their declarations; 7610 * for such documents, the rule that an entity must be 7611 * declared is a well-formedness constraint only if 7612 * standalone='yes'. 7613 */ 7614 if (ent == NULL) { 7615 if ((ctxt->standalone == 1) || 7616 ((ctxt->hasExternalSubset == 0) && 7617 (ctxt->hasPErefs == 0))) { 7618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7619 "Entity '%s' not defined\n", name); 7620 } else { 7621 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7622 "Entity '%s' not defined\n", name); 7623 if ((ctxt->inSubset == 0) && 7624 (ctxt->sax != NULL) && 7625 (ctxt->sax->reference != NULL)) { 7626 ctxt->sax->reference(ctxt->userData, name); 7627 } 7628 } 7629 ctxt->valid = 0; 7630 } 7631 7632 /* 7633 * [ WFC: Parsed Entity ] 7634 * An entity reference must not contain the name of an 7635 * unparsed entity 7636 */ 7637 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7638 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7639 "Entity reference to unparsed entity %s\n", name); 7640 } 7641 7642 /* 7643 * [ WFC: No External Entity References ] 7644 * Attribute values cannot contain direct or indirect 7645 * entity references to external entities. 7646 */ 7647 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7648 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7649 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7650 "Attribute references external entity '%s'\n", name); 7651 } 7652 /* 7653 * [ WFC: No < in Attribute Values ] 7654 * The replacement text of any entity referred to directly or 7655 * indirectly in an attribute value (other than "<") must 7656 * not contain a <. 7657 */ 7658 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7659 (ent != NULL) && 7660 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7661 if ((ent->checked & 1) || ((ent->checked == 0) && 7662 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) { 7663 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7664 "'<' in entity '%s' is not allowed in attributes values\n", name); 7665 } 7666 } 7667 7668 /* 7669 * Internal check, no parameter entities here ... 7670 */ 7671 else { 7672 switch (ent->etype) { 7673 case XML_INTERNAL_PARAMETER_ENTITY: 7674 case XML_EXTERNAL_PARAMETER_ENTITY: 7675 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7676 "Attempt to reference the parameter entity '%s'\n", 7677 name); 7678 break; 7679 default: 7680 break; 7681 } 7682 } 7683 7684 /* 7685 * [ WFC: No Recursion ] 7686 * A parsed entity must not contain a recursive reference 7687 * to itself, either directly or indirectly. 7688 * Done somewhere else 7689 */ 7690 return(ent); 7691 } 7692 7693 /** 7694 * xmlParseStringEntityRef: 7695 * @ctxt: an XML parser context 7696 * @str: a pointer to an index in the string 7697 * 7698 * parse ENTITY references declarations, but this version parses it from 7699 * a string value. 7700 * 7701 * [68] EntityRef ::= '&' Name ';' 7702 * 7703 * [ WFC: Entity Declared ] 7704 * In a document without any DTD, a document with only an internal DTD 7705 * subset which contains no parameter entity references, or a document 7706 * with "standalone='yes'", the Name given in the entity reference 7707 * must match that in an entity declaration, except that well-formed 7708 * documents need not declare any of the following entities: amp, lt, 7709 * gt, apos, quot. The declaration of a parameter entity must precede 7710 * any reference to it. Similarly, the declaration of a general entity 7711 * must precede any reference to it which appears in a default value in an 7712 * attribute-list declaration. Note that if entities are declared in the 7713 * external subset or in external parameter entities, a non-validating 7714 * processor is not obligated to read and process their declarations; 7715 * for such documents, the rule that an entity must be declared is a 7716 * well-formedness constraint only if standalone='yes'. 7717 * 7718 * [ WFC: Parsed Entity ] 7719 * An entity reference must not contain the name of an unparsed entity 7720 * 7721 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7722 * is updated to the current location in the string. 7723 */ 7724 static xmlEntityPtr 7725 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7726 xmlChar *name; 7727 const xmlChar *ptr; 7728 xmlChar cur; 7729 xmlEntityPtr ent = NULL; 7730 7731 if ((str == NULL) || (*str == NULL)) 7732 return(NULL); 7733 ptr = *str; 7734 cur = *ptr; 7735 if (cur != '&') 7736 return(NULL); 7737 7738 ptr++; 7739 name = xmlParseStringName(ctxt, &ptr); 7740 if (name == NULL) { 7741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7742 "xmlParseStringEntityRef: no name\n"); 7743 *str = ptr; 7744 return(NULL); 7745 } 7746 if (*ptr != ';') { 7747 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7748 xmlFree(name); 7749 *str = ptr; 7750 return(NULL); 7751 } 7752 ptr++; 7753 7754 7755 /* 7756 * Predefined entites override any extra definition 7757 */ 7758 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7759 ent = xmlGetPredefinedEntity(name); 7760 if (ent != NULL) { 7761 xmlFree(name); 7762 *str = ptr; 7763 return(ent); 7764 } 7765 } 7766 7767 /* 7768 * Increate the number of entity references parsed 7769 */ 7770 ctxt->nbentities++; 7771 7772 /* 7773 * Ask first SAX for entity resolution, otherwise try the 7774 * entities which may have stored in the parser context. 7775 */ 7776 if (ctxt->sax != NULL) { 7777 if (ctxt->sax->getEntity != NULL) 7778 ent = ctxt->sax->getEntity(ctxt->userData, name); 7779 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7780 ent = xmlGetPredefinedEntity(name); 7781 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7782 ent = xmlSAX2GetEntity(ctxt, name); 7783 } 7784 } 7785 if (ctxt->instate == XML_PARSER_EOF) { 7786 xmlFree(name); 7787 return(NULL); 7788 } 7789 7790 /* 7791 * [ WFC: Entity Declared ] 7792 * In a document without any DTD, a document with only an 7793 * internal DTD subset which contains no parameter entity 7794 * references, or a document with "standalone='yes'", the 7795 * Name given in the entity reference must match that in an 7796 * entity declaration, except that well-formed documents 7797 * need not declare any of the following entities: amp, lt, 7798 * gt, apos, quot. 7799 * The declaration of a parameter entity must precede any 7800 * reference to it. 7801 * Similarly, the declaration of a general entity must 7802 * precede any reference to it which appears in a default 7803 * value in an attribute-list declaration. Note that if 7804 * entities are declared in the external subset or in 7805 * external parameter entities, a non-validating processor 7806 * is not obligated to read and process their declarations; 7807 * for such documents, the rule that an entity must be 7808 * declared is a well-formedness constraint only if 7809 * standalone='yes'. 7810 */ 7811 if (ent == NULL) { 7812 if ((ctxt->standalone == 1) || 7813 ((ctxt->hasExternalSubset == 0) && 7814 (ctxt->hasPErefs == 0))) { 7815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7816 "Entity '%s' not defined\n", name); 7817 } else { 7818 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7819 "Entity '%s' not defined\n", 7820 name); 7821 } 7822 /* TODO ? check regressions ctxt->valid = 0; */ 7823 } 7824 7825 /* 7826 * [ WFC: Parsed Entity ] 7827 * An entity reference must not contain the name of an 7828 * unparsed entity 7829 */ 7830 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7831 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7832 "Entity reference to unparsed entity %s\n", name); 7833 } 7834 7835 /* 7836 * [ WFC: No External Entity References ] 7837 * Attribute values cannot contain direct or indirect 7838 * entity references to external entities. 7839 */ 7840 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7841 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7842 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7843 "Attribute references external entity '%s'\n", name); 7844 } 7845 /* 7846 * [ WFC: No < in Attribute Values ] 7847 * The replacement text of any entity referred to directly or 7848 * indirectly in an attribute value (other than "<") must 7849 * not contain a <. 7850 */ 7851 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7852 (ent != NULL) && (ent->content != NULL) && 7853 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7854 (xmlStrchr(ent->content, '<'))) { 7855 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7856 "'<' in entity '%s' is not allowed in attributes values\n", 7857 name); 7858 } 7859 7860 /* 7861 * Internal check, no parameter entities here ... 7862 */ 7863 else { 7864 switch (ent->etype) { 7865 case XML_INTERNAL_PARAMETER_ENTITY: 7866 case XML_EXTERNAL_PARAMETER_ENTITY: 7867 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7868 "Attempt to reference the parameter entity '%s'\n", 7869 name); 7870 break; 7871 default: 7872 break; 7873 } 7874 } 7875 7876 /* 7877 * [ WFC: No Recursion ] 7878 * A parsed entity must not contain a recursive reference 7879 * to itself, either directly or indirectly. 7880 * Done somewhere else 7881 */ 7882 7883 xmlFree(name); 7884 *str = ptr; 7885 return(ent); 7886 } 7887 7888 /** 7889 * xmlParsePEReference: 7890 * @ctxt: an XML parser context 7891 * 7892 * parse PEReference declarations 7893 * The entity content is handled directly by pushing it's content as 7894 * a new input stream. 7895 * 7896 * [69] PEReference ::= '%' Name ';' 7897 * 7898 * [ WFC: No Recursion ] 7899 * A parsed entity must not contain a recursive 7900 * reference to itself, either directly or indirectly. 7901 * 7902 * [ WFC: Entity Declared ] 7903 * In a document without any DTD, a document with only an internal DTD 7904 * subset which contains no parameter entity references, or a document 7905 * with "standalone='yes'", ... ... The declaration of a parameter 7906 * entity must precede any reference to it... 7907 * 7908 * [ VC: Entity Declared ] 7909 * In a document with an external subset or external parameter entities 7910 * with "standalone='no'", ... ... The declaration of a parameter entity 7911 * must precede any reference to it... 7912 * 7913 * [ WFC: In DTD ] 7914 * Parameter-entity references may only appear in the DTD. 7915 * NOTE: misleading but this is handled. 7916 */ 7917 void 7918 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7919 { 7920 const xmlChar *name; 7921 xmlEntityPtr entity = NULL; 7922 xmlParserInputPtr input; 7923 7924 if (RAW != '%') 7925 return; 7926 NEXT; 7927 name = xmlParseName(ctxt); 7928 if (name == NULL) { 7929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7930 "xmlParsePEReference: no name\n"); 7931 return; 7932 } 7933 if (RAW != ';') { 7934 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7935 return; 7936 } 7937 7938 NEXT; 7939 7940 /* 7941 * Increate the number of entity references parsed 7942 */ 7943 ctxt->nbentities++; 7944 7945 /* 7946 * Request the entity from SAX 7947 */ 7948 if ((ctxt->sax != NULL) && 7949 (ctxt->sax->getParameterEntity != NULL)) 7950 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7951 if (ctxt->instate == XML_PARSER_EOF) 7952 return; 7953 if (entity == NULL) { 7954 /* 7955 * [ WFC: Entity Declared ] 7956 * In a document without any DTD, a document with only an 7957 * internal DTD subset which contains no parameter entity 7958 * references, or a document with "standalone='yes'", ... 7959 * ... The declaration of a parameter entity must precede 7960 * any reference to it... 7961 */ 7962 if ((ctxt->standalone == 1) || 7963 ((ctxt->hasExternalSubset == 0) && 7964 (ctxt->hasPErefs == 0))) { 7965 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7966 "PEReference: %%%s; not found\n", 7967 name); 7968 } else { 7969 /* 7970 * [ VC: Entity Declared ] 7971 * In a document with an external subset or external 7972 * parameter entities with "standalone='no'", ... 7973 * ... The declaration of a parameter entity must 7974 * precede any reference to it... 7975 */ 7976 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7977 "PEReference: %%%s; not found\n", 7978 name, NULL); 7979 ctxt->valid = 0; 7980 } 7981 } else { 7982 /* 7983 * Internal checking in case the entity quest barfed 7984 */ 7985 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7986 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7987 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7988 "Internal: %%%s; is not a parameter entity\n", 7989 name, NULL); 7990 } else if (ctxt->input->free != deallocblankswrapper) { 7991 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7992 if (xmlPushInput(ctxt, input) < 0) 7993 return; 7994 } else { 7995 /* 7996 * TODO !!! 7997 * handle the extra spaces added before and after 7998 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7999 */ 8000 input = xmlNewEntityInputStream(ctxt, entity); 8001 if (xmlPushInput(ctxt, input) < 0) 8002 return; 8003 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8004 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8005 (IS_BLANK_CH(NXT(5)))) { 8006 xmlParseTextDecl(ctxt); 8007 if (ctxt->errNo == 8008 XML_ERR_UNSUPPORTED_ENCODING) { 8009 /* 8010 * The XML REC instructs us to stop parsing 8011 * right here 8012 */ 8013 ctxt->instate = XML_PARSER_EOF; 8014 return; 8015 } 8016 } 8017 } 8018 } 8019 ctxt->hasPErefs = 1; 8020 } 8021 8022 /** 8023 * xmlLoadEntityContent: 8024 * @ctxt: an XML parser context 8025 * @entity: an unloaded system entity 8026 * 8027 * Load the original content of the given system entity from the 8028 * ExternalID/SystemID given. This is to be used for Included in Literal 8029 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8030 * 8031 * Returns 0 in case of success and -1 in case of failure 8032 */ 8033 static int 8034 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8035 xmlParserInputPtr input; 8036 xmlBufferPtr buf; 8037 int l, c; 8038 int count = 0; 8039 8040 if ((ctxt == NULL) || (entity == NULL) || 8041 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8042 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8043 (entity->content != NULL)) { 8044 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8045 "xmlLoadEntityContent parameter error"); 8046 return(-1); 8047 } 8048 8049 if (xmlParserDebugEntities) 8050 xmlGenericError(xmlGenericErrorContext, 8051 "Reading %s entity content input\n", entity->name); 8052 8053 buf = xmlBufferCreate(); 8054 if (buf == NULL) { 8055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8056 "xmlLoadEntityContent parameter error"); 8057 return(-1); 8058 } 8059 8060 input = xmlNewEntityInputStream(ctxt, entity); 8061 if (input == NULL) { 8062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8063 "xmlLoadEntityContent input error"); 8064 xmlBufferFree(buf); 8065 return(-1); 8066 } 8067 8068 /* 8069 * Push the entity as the current input, read char by char 8070 * saving to the buffer until the end of the entity or an error 8071 */ 8072 if (xmlPushInput(ctxt, input) < 0) { 8073 xmlBufferFree(buf); 8074 return(-1); 8075 } 8076 8077 GROW; 8078 c = CUR_CHAR(l); 8079 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8080 (IS_CHAR(c))) { 8081 xmlBufferAdd(buf, ctxt->input->cur, l); 8082 if (count++ > XML_PARSER_CHUNK_SIZE) { 8083 count = 0; 8084 GROW; 8085 if (ctxt->instate == XML_PARSER_EOF) { 8086 xmlBufferFree(buf); 8087 return(-1); 8088 } 8089 } 8090 NEXTL(l); 8091 c = CUR_CHAR(l); 8092 if (c == 0) { 8093 count = 0; 8094 GROW; 8095 if (ctxt->instate == XML_PARSER_EOF) { 8096 xmlBufferFree(buf); 8097 return(-1); 8098 } 8099 c = CUR_CHAR(l); 8100 } 8101 } 8102 8103 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8104 xmlPopInput(ctxt); 8105 } else if (!IS_CHAR(c)) { 8106 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8107 "xmlLoadEntityContent: invalid char value %d\n", 8108 c); 8109 xmlBufferFree(buf); 8110 return(-1); 8111 } 8112 entity->content = buf->content; 8113 buf->content = NULL; 8114 xmlBufferFree(buf); 8115 8116 return(0); 8117 } 8118 8119 /** 8120 * xmlParseStringPEReference: 8121 * @ctxt: an XML parser context 8122 * @str: a pointer to an index in the string 8123 * 8124 * parse PEReference declarations 8125 * 8126 * [69] PEReference ::= '%' Name ';' 8127 * 8128 * [ WFC: No Recursion ] 8129 * A parsed entity must not contain a recursive 8130 * reference to itself, either directly or indirectly. 8131 * 8132 * [ WFC: Entity Declared ] 8133 * In a document without any DTD, a document with only an internal DTD 8134 * subset which contains no parameter entity references, or a document 8135 * with "standalone='yes'", ... ... The declaration of a parameter 8136 * entity must precede any reference to it... 8137 * 8138 * [ VC: Entity Declared ] 8139 * In a document with an external subset or external parameter entities 8140 * with "standalone='no'", ... ... The declaration of a parameter entity 8141 * must precede any reference to it... 8142 * 8143 * [ WFC: In DTD ] 8144 * Parameter-entity references may only appear in the DTD. 8145 * NOTE: misleading but this is handled. 8146 * 8147 * Returns the string of the entity content. 8148 * str is updated to the current value of the index 8149 */ 8150 static xmlEntityPtr 8151 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8152 const xmlChar *ptr; 8153 xmlChar cur; 8154 xmlChar *name; 8155 xmlEntityPtr entity = NULL; 8156 8157 if ((str == NULL) || (*str == NULL)) return(NULL); 8158 ptr = *str; 8159 cur = *ptr; 8160 if (cur != '%') 8161 return(NULL); 8162 ptr++; 8163 name = xmlParseStringName(ctxt, &ptr); 8164 if (name == NULL) { 8165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8166 "xmlParseStringPEReference: no name\n"); 8167 *str = ptr; 8168 return(NULL); 8169 } 8170 cur = *ptr; 8171 if (cur != ';') { 8172 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8173 xmlFree(name); 8174 *str = ptr; 8175 return(NULL); 8176 } 8177 ptr++; 8178 8179 /* 8180 * Increate the number of entity references parsed 8181 */ 8182 ctxt->nbentities++; 8183 8184 /* 8185 * Request the entity from SAX 8186 */ 8187 if ((ctxt->sax != NULL) && 8188 (ctxt->sax->getParameterEntity != NULL)) 8189 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8190 if (ctxt->instate == XML_PARSER_EOF) { 8191 xmlFree(name); 8192 return(NULL); 8193 } 8194 if (entity == NULL) { 8195 /* 8196 * [ WFC: Entity Declared ] 8197 * In a document without any DTD, a document with only an 8198 * internal DTD subset which contains no parameter entity 8199 * references, or a document with "standalone='yes'", ... 8200 * ... The declaration of a parameter entity must precede 8201 * any reference to it... 8202 */ 8203 if ((ctxt->standalone == 1) || 8204 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8205 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8206 "PEReference: %%%s; not found\n", name); 8207 } else { 8208 /* 8209 * [ VC: Entity Declared ] 8210 * In a document with an external subset or external 8211 * parameter entities with "standalone='no'", ... 8212 * ... The declaration of a parameter entity must 8213 * precede any reference to it... 8214 */ 8215 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8216 "PEReference: %%%s; not found\n", 8217 name, NULL); 8218 ctxt->valid = 0; 8219 } 8220 } else { 8221 /* 8222 * Internal checking in case the entity quest barfed 8223 */ 8224 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8225 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8227 "%%%s; is not a parameter entity\n", 8228 name, NULL); 8229 } 8230 } 8231 ctxt->hasPErefs = 1; 8232 xmlFree(name); 8233 *str = ptr; 8234 return(entity); 8235 } 8236 8237 /** 8238 * xmlParseDocTypeDecl: 8239 * @ctxt: an XML parser context 8240 * 8241 * parse a DOCTYPE declaration 8242 * 8243 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8244 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8245 * 8246 * [ VC: Root Element Type ] 8247 * The Name in the document type declaration must match the element 8248 * type of the root element. 8249 */ 8250 8251 void 8252 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8253 const xmlChar *name = NULL; 8254 xmlChar *ExternalID = NULL; 8255 xmlChar *URI = NULL; 8256 8257 /* 8258 * We know that '<!DOCTYPE' has been detected. 8259 */ 8260 SKIP(9); 8261 8262 SKIP_BLANKS; 8263 8264 /* 8265 * Parse the DOCTYPE name. 8266 */ 8267 name = xmlParseName(ctxt); 8268 if (name == NULL) { 8269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8270 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8271 } 8272 ctxt->intSubName = name; 8273 8274 SKIP_BLANKS; 8275 8276 /* 8277 * Check for SystemID and ExternalID 8278 */ 8279 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8280 8281 if ((URI != NULL) || (ExternalID != NULL)) { 8282 ctxt->hasExternalSubset = 1; 8283 } 8284 ctxt->extSubURI = URI; 8285 ctxt->extSubSystem = ExternalID; 8286 8287 SKIP_BLANKS; 8288 8289 /* 8290 * Create and update the internal subset. 8291 */ 8292 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8293 (!ctxt->disableSAX)) 8294 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8295 if (ctxt->instate == XML_PARSER_EOF) 8296 return; 8297 8298 /* 8299 * Is there any internal subset declarations ? 8300 * they are handled separately in xmlParseInternalSubset() 8301 */ 8302 if (RAW == '[') 8303 return; 8304 8305 /* 8306 * We should be at the end of the DOCTYPE declaration. 8307 */ 8308 if (RAW != '>') { 8309 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8310 } 8311 NEXT; 8312 } 8313 8314 /** 8315 * xmlParseInternalSubset: 8316 * @ctxt: an XML parser context 8317 * 8318 * parse the internal subset declaration 8319 * 8320 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8321 */ 8322 8323 static void 8324 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8325 /* 8326 * Is there any DTD definition ? 8327 */ 8328 if (RAW == '[') { 8329 ctxt->instate = XML_PARSER_DTD; 8330 NEXT; 8331 /* 8332 * Parse the succession of Markup declarations and 8333 * PEReferences. 8334 * Subsequence (markupdecl | PEReference | S)* 8335 */ 8336 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8337 const xmlChar *check = CUR_PTR; 8338 unsigned int cons = ctxt->input->consumed; 8339 8340 SKIP_BLANKS; 8341 xmlParseMarkupDecl(ctxt); 8342 xmlParsePEReference(ctxt); 8343 8344 /* 8345 * Pop-up of finished entities. 8346 */ 8347 while ((RAW == 0) && (ctxt->inputNr > 1)) 8348 xmlPopInput(ctxt); 8349 8350 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8352 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8353 break; 8354 } 8355 } 8356 if (RAW == ']') { 8357 NEXT; 8358 SKIP_BLANKS; 8359 } 8360 } 8361 8362 /* 8363 * We should be at the end of the DOCTYPE declaration. 8364 */ 8365 if (RAW != '>') { 8366 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8367 } 8368 NEXT; 8369 } 8370 8371 #ifdef LIBXML_SAX1_ENABLED 8372 /** 8373 * xmlParseAttribute: 8374 * @ctxt: an XML parser context 8375 * @value: a xmlChar ** used to store the value of the attribute 8376 * 8377 * parse an attribute 8378 * 8379 * [41] Attribute ::= Name Eq AttValue 8380 * 8381 * [ WFC: No External Entity References ] 8382 * Attribute values cannot contain direct or indirect entity references 8383 * to external entities. 8384 * 8385 * [ WFC: No < in Attribute Values ] 8386 * The replacement text of any entity referred to directly or indirectly in 8387 * an attribute value (other than "<") must not contain a <. 8388 * 8389 * [ VC: Attribute Value Type ] 8390 * The attribute must have been declared; the value must be of the type 8391 * declared for it. 8392 * 8393 * [25] Eq ::= S? '=' S? 8394 * 8395 * With namespace: 8396 * 8397 * [NS 11] Attribute ::= QName Eq AttValue 8398 * 8399 * Also the case QName == xmlns:??? is handled independently as a namespace 8400 * definition. 8401 * 8402 * Returns the attribute name, and the value in *value. 8403 */ 8404 8405 const xmlChar * 8406 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8407 const xmlChar *name; 8408 xmlChar *val; 8409 8410 *value = NULL; 8411 GROW; 8412 name = xmlParseName(ctxt); 8413 if (name == NULL) { 8414 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8415 "error parsing attribute name\n"); 8416 return(NULL); 8417 } 8418 8419 /* 8420 * read the value 8421 */ 8422 SKIP_BLANKS; 8423 if (RAW == '=') { 8424 NEXT; 8425 SKIP_BLANKS; 8426 val = xmlParseAttValue(ctxt); 8427 ctxt->instate = XML_PARSER_CONTENT; 8428 } else { 8429 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8430 "Specification mandate value for attribute %s\n", name); 8431 return(NULL); 8432 } 8433 8434 /* 8435 * Check that xml:lang conforms to the specification 8436 * No more registered as an error, just generate a warning now 8437 * since this was deprecated in XML second edition 8438 */ 8439 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8440 if (!xmlCheckLanguageID(val)) { 8441 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8442 "Malformed value for xml:lang : %s\n", 8443 val, NULL); 8444 } 8445 } 8446 8447 /* 8448 * Check that xml:space conforms to the specification 8449 */ 8450 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8451 if (xmlStrEqual(val, BAD_CAST "default")) 8452 *(ctxt->space) = 0; 8453 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8454 *(ctxt->space) = 1; 8455 else { 8456 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8457 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8458 val, NULL); 8459 } 8460 } 8461 8462 *value = val; 8463 return(name); 8464 } 8465 8466 /** 8467 * xmlParseStartTag: 8468 * @ctxt: an XML parser context 8469 * 8470 * parse a start of tag either for rule element or 8471 * EmptyElement. In both case we don't parse the tag closing chars. 8472 * 8473 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8474 * 8475 * [ WFC: Unique Att Spec ] 8476 * No attribute name may appear more than once in the same start-tag or 8477 * empty-element tag. 8478 * 8479 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8480 * 8481 * [ WFC: Unique Att Spec ] 8482 * No attribute name may appear more than once in the same start-tag or 8483 * empty-element tag. 8484 * 8485 * With namespace: 8486 * 8487 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8488 * 8489 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8490 * 8491 * Returns the element name parsed 8492 */ 8493 8494 const xmlChar * 8495 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8496 const xmlChar *name; 8497 const xmlChar *attname; 8498 xmlChar *attvalue; 8499 const xmlChar **atts = ctxt->atts; 8500 int nbatts = 0; 8501 int maxatts = ctxt->maxatts; 8502 int i; 8503 8504 if (RAW != '<') return(NULL); 8505 NEXT1; 8506 8507 name = xmlParseName(ctxt); 8508 if (name == NULL) { 8509 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8510 "xmlParseStartTag: invalid element name\n"); 8511 return(NULL); 8512 } 8513 8514 /* 8515 * Now parse the attributes, it ends up with the ending 8516 * 8517 * (S Attribute)* S? 8518 */ 8519 SKIP_BLANKS; 8520 GROW; 8521 8522 while (((RAW != '>') && 8523 ((RAW != '/') || (NXT(1) != '>')) && 8524 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8525 const xmlChar *q = CUR_PTR; 8526 unsigned int cons = ctxt->input->consumed; 8527 8528 attname = xmlParseAttribute(ctxt, &attvalue); 8529 if ((attname != NULL) && (attvalue != NULL)) { 8530 /* 8531 * [ WFC: Unique Att Spec ] 8532 * No attribute name may appear more than once in the same 8533 * start-tag or empty-element tag. 8534 */ 8535 for (i = 0; i < nbatts;i += 2) { 8536 if (xmlStrEqual(atts[i], attname)) { 8537 xmlErrAttributeDup(ctxt, NULL, attname); 8538 xmlFree(attvalue); 8539 goto failed; 8540 } 8541 } 8542 /* 8543 * Add the pair to atts 8544 */ 8545 if (atts == NULL) { 8546 maxatts = 22; /* allow for 10 attrs by default */ 8547 atts = (const xmlChar **) 8548 xmlMalloc(maxatts * sizeof(xmlChar *)); 8549 if (atts == NULL) { 8550 xmlErrMemory(ctxt, NULL); 8551 if (attvalue != NULL) 8552 xmlFree(attvalue); 8553 goto failed; 8554 } 8555 ctxt->atts = atts; 8556 ctxt->maxatts = maxatts; 8557 } else if (nbatts + 4 > maxatts) { 8558 const xmlChar **n; 8559 8560 maxatts *= 2; 8561 n = (const xmlChar **) xmlRealloc((void *) atts, 8562 maxatts * sizeof(const xmlChar *)); 8563 if (n == NULL) { 8564 xmlErrMemory(ctxt, NULL); 8565 if (attvalue != NULL) 8566 xmlFree(attvalue); 8567 goto failed; 8568 } 8569 atts = n; 8570 ctxt->atts = atts; 8571 ctxt->maxatts = maxatts; 8572 } 8573 atts[nbatts++] = attname; 8574 atts[nbatts++] = attvalue; 8575 atts[nbatts] = NULL; 8576 atts[nbatts + 1] = NULL; 8577 } else { 8578 if (attvalue != NULL) 8579 xmlFree(attvalue); 8580 } 8581 8582 failed: 8583 8584 GROW 8585 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8586 break; 8587 if (!IS_BLANK_CH(RAW)) { 8588 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8589 "attributes construct error\n"); 8590 } 8591 SKIP_BLANKS; 8592 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8593 (attname == NULL) && (attvalue == NULL)) { 8594 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8595 "xmlParseStartTag: problem parsing attributes\n"); 8596 break; 8597 } 8598 SHRINK; 8599 GROW; 8600 } 8601 8602 /* 8603 * SAX: Start of Element ! 8604 */ 8605 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8606 (!ctxt->disableSAX)) { 8607 if (nbatts > 0) 8608 ctxt->sax->startElement(ctxt->userData, name, atts); 8609 else 8610 ctxt->sax->startElement(ctxt->userData, name, NULL); 8611 } 8612 8613 if (atts != NULL) { 8614 /* Free only the content strings */ 8615 for (i = 1;i < nbatts;i+=2) 8616 if (atts[i] != NULL) 8617 xmlFree((xmlChar *) atts[i]); 8618 } 8619 return(name); 8620 } 8621 8622 /** 8623 * xmlParseEndTag1: 8624 * @ctxt: an XML parser context 8625 * @line: line of the start tag 8626 * @nsNr: number of namespaces on the start tag 8627 * 8628 * parse an end of tag 8629 * 8630 * [42] ETag ::= '</' Name S? '>' 8631 * 8632 * With namespace 8633 * 8634 * [NS 9] ETag ::= '</' QName S? '>' 8635 */ 8636 8637 static void 8638 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8639 const xmlChar *name; 8640 8641 GROW; 8642 if ((RAW != '<') || (NXT(1) != '/')) { 8643 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8644 "xmlParseEndTag: '</' not found\n"); 8645 return; 8646 } 8647 SKIP(2); 8648 8649 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8650 8651 /* 8652 * We should definitely be at the ending "S? '>'" part 8653 */ 8654 GROW; 8655 SKIP_BLANKS; 8656 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8657 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8658 } else 8659 NEXT1; 8660 8661 /* 8662 * [ WFC: Element Type Match ] 8663 * The Name in an element's end-tag must match the element type in the 8664 * start-tag. 8665 * 8666 */ 8667 if (name != (xmlChar*)1) { 8668 if (name == NULL) name = BAD_CAST "unparseable"; 8669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8670 "Opening and ending tag mismatch: %s line %d and %s\n", 8671 ctxt->name, line, name); 8672 } 8673 8674 /* 8675 * SAX: End of Tag 8676 */ 8677 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8678 (!ctxt->disableSAX)) 8679 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8680 8681 namePop(ctxt); 8682 spacePop(ctxt); 8683 return; 8684 } 8685 8686 /** 8687 * xmlParseEndTag: 8688 * @ctxt: an XML parser context 8689 * 8690 * parse an end of tag 8691 * 8692 * [42] ETag ::= '</' Name S? '>' 8693 * 8694 * With namespace 8695 * 8696 * [NS 9] ETag ::= '</' QName S? '>' 8697 */ 8698 8699 void 8700 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8701 xmlParseEndTag1(ctxt, 0); 8702 } 8703 #endif /* LIBXML_SAX1_ENABLED */ 8704 8705 /************************************************************************ 8706 * * 8707 * SAX 2 specific operations * 8708 * * 8709 ************************************************************************/ 8710 8711 /* 8712 * xmlGetNamespace: 8713 * @ctxt: an XML parser context 8714 * @prefix: the prefix to lookup 8715 * 8716 * Lookup the namespace name for the @prefix (which ca be NULL) 8717 * The prefix must come from the @ctxt->dict dictionnary 8718 * 8719 * Returns the namespace name or NULL if not bound 8720 */ 8721 static const xmlChar * 8722 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8723 int i; 8724 8725 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8726 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8727 if (ctxt->nsTab[i] == prefix) { 8728 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8729 return(NULL); 8730 return(ctxt->nsTab[i + 1]); 8731 } 8732 return(NULL); 8733 } 8734 8735 /** 8736 * xmlParseQName: 8737 * @ctxt: an XML parser context 8738 * @prefix: pointer to store the prefix part 8739 * 8740 * parse an XML Namespace QName 8741 * 8742 * [6] QName ::= (Prefix ':')? LocalPart 8743 * [7] Prefix ::= NCName 8744 * [8] LocalPart ::= NCName 8745 * 8746 * Returns the Name parsed or NULL 8747 */ 8748 8749 static const xmlChar * 8750 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8751 const xmlChar *l, *p; 8752 8753 GROW; 8754 8755 l = xmlParseNCName(ctxt); 8756 if (l == NULL) { 8757 if (CUR == ':') { 8758 l = xmlParseName(ctxt); 8759 if (l != NULL) { 8760 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8761 "Failed to parse QName '%s'\n", l, NULL, NULL); 8762 *prefix = NULL; 8763 return(l); 8764 } 8765 } 8766 return(NULL); 8767 } 8768 if (CUR == ':') { 8769 NEXT; 8770 p = l; 8771 l = xmlParseNCName(ctxt); 8772 if (l == NULL) { 8773 xmlChar *tmp; 8774 8775 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8776 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8777 l = xmlParseNmtoken(ctxt); 8778 if (l == NULL) 8779 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8780 else { 8781 tmp = xmlBuildQName(l, p, NULL, 0); 8782 xmlFree((char *)l); 8783 } 8784 p = xmlDictLookup(ctxt->dict, tmp, -1); 8785 if (tmp != NULL) xmlFree(tmp); 8786 *prefix = NULL; 8787 return(p); 8788 } 8789 if (CUR == ':') { 8790 xmlChar *tmp; 8791 8792 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8793 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8794 NEXT; 8795 tmp = (xmlChar *) xmlParseName(ctxt); 8796 if (tmp != NULL) { 8797 tmp = xmlBuildQName(tmp, l, NULL, 0); 8798 l = xmlDictLookup(ctxt->dict, tmp, -1); 8799 if (tmp != NULL) xmlFree(tmp); 8800 *prefix = p; 8801 return(l); 8802 } 8803 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8804 l = xmlDictLookup(ctxt->dict, tmp, -1); 8805 if (tmp != NULL) xmlFree(tmp); 8806 *prefix = p; 8807 return(l); 8808 } 8809 *prefix = p; 8810 } else 8811 *prefix = NULL; 8812 return(l); 8813 } 8814 8815 /** 8816 * xmlParseQNameAndCompare: 8817 * @ctxt: an XML parser context 8818 * @name: the localname 8819 * @prefix: the prefix, if any. 8820 * 8821 * parse an XML name and compares for match 8822 * (specialized for endtag parsing) 8823 * 8824 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8825 * and the name for mismatch 8826 */ 8827 8828 static const xmlChar * 8829 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8830 xmlChar const *prefix) { 8831 const xmlChar *cmp; 8832 const xmlChar *in; 8833 const xmlChar *ret; 8834 const xmlChar *prefix2; 8835 8836 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8837 8838 GROW; 8839 in = ctxt->input->cur; 8840 8841 cmp = prefix; 8842 while (*in != 0 && *in == *cmp) { 8843 ++in; 8844 ++cmp; 8845 } 8846 if ((*cmp == 0) && (*in == ':')) { 8847 in++; 8848 cmp = name; 8849 while (*in != 0 && *in == *cmp) { 8850 ++in; 8851 ++cmp; 8852 } 8853 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8854 /* success */ 8855 ctxt->input->cur = in; 8856 return((const xmlChar*) 1); 8857 } 8858 } 8859 /* 8860 * all strings coms from the dictionary, equality can be done directly 8861 */ 8862 ret = xmlParseQName (ctxt, &prefix2); 8863 if ((ret == name) && (prefix == prefix2)) 8864 return((const xmlChar*) 1); 8865 return ret; 8866 } 8867 8868 /** 8869 * xmlParseAttValueInternal: 8870 * @ctxt: an XML parser context 8871 * @len: attribute len result 8872 * @alloc: whether the attribute was reallocated as a new string 8873 * @normalize: if 1 then further non-CDATA normalization must be done 8874 * 8875 * parse a value for an attribute. 8876 * NOTE: if no normalization is needed, the routine will return pointers 8877 * directly from the data buffer. 8878 * 8879 * 3.3.3 Attribute-Value Normalization: 8880 * Before the value of an attribute is passed to the application or 8881 * checked for validity, the XML processor must normalize it as follows: 8882 * - a character reference is processed by appending the referenced 8883 * character to the attribute value 8884 * - an entity reference is processed by recursively processing the 8885 * replacement text of the entity 8886 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8887 * appending #x20 to the normalized value, except that only a single 8888 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8889 * parsed entity or the literal entity value of an internal parsed entity 8890 * - other characters are processed by appending them to the normalized value 8891 * If the declared value is not CDATA, then the XML processor must further 8892 * process the normalized attribute value by discarding any leading and 8893 * trailing space (#x20) characters, and by replacing sequences of space 8894 * (#x20) characters by a single space (#x20) character. 8895 * All attributes for which no declaration has been read should be treated 8896 * by a non-validating parser as if declared CDATA. 8897 * 8898 * Returns the AttValue parsed or NULL. The value has to be freed by the 8899 * caller if it was copied, this can be detected by val[*len] == 0. 8900 */ 8901 8902 static xmlChar * 8903 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8904 int normalize) 8905 { 8906 xmlChar limit = 0; 8907 const xmlChar *in = NULL, *start, *end, *last; 8908 xmlChar *ret = NULL; 8909 8910 GROW; 8911 in = (xmlChar *) CUR_PTR; 8912 if (*in != '"' && *in != '\'') { 8913 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8914 return (NULL); 8915 } 8916 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8917 8918 /* 8919 * try to handle in this routine the most common case where no 8920 * allocation of a new string is required and where content is 8921 * pure ASCII. 8922 */ 8923 limit = *in++; 8924 end = ctxt->input->end; 8925 start = in; 8926 if (in >= end) { 8927 const xmlChar *oldbase = ctxt->input->base; 8928 GROW; 8929 if (oldbase != ctxt->input->base) { 8930 long delta = ctxt->input->base - oldbase; 8931 start = start + delta; 8932 in = in + delta; 8933 } 8934 end = ctxt->input->end; 8935 } 8936 if (normalize) { 8937 /* 8938 * Skip any leading spaces 8939 */ 8940 while ((in < end) && (*in != limit) && 8941 ((*in == 0x20) || (*in == 0x9) || 8942 (*in == 0xA) || (*in == 0xD))) { 8943 in++; 8944 start = in; 8945 if (in >= end) { 8946 const xmlChar *oldbase = ctxt->input->base; 8947 GROW; 8948 if (ctxt->instate == XML_PARSER_EOF) 8949 return(NULL); 8950 if (oldbase != ctxt->input->base) { 8951 long delta = ctxt->input->base - oldbase; 8952 start = start + delta; 8953 in = in + delta; 8954 } 8955 end = ctxt->input->end; 8956 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8959 "AttValue length too long\n"); 8960 return(NULL); 8961 } 8962 } 8963 } 8964 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8965 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8966 if ((*in++ == 0x20) && (*in == 0x20)) break; 8967 if (in >= end) { 8968 const xmlChar *oldbase = ctxt->input->base; 8969 GROW; 8970 if (ctxt->instate == XML_PARSER_EOF) 8971 return(NULL); 8972 if (oldbase != ctxt->input->base) { 8973 long delta = ctxt->input->base - oldbase; 8974 start = start + delta; 8975 in = in + delta; 8976 } 8977 end = ctxt->input->end; 8978 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8981 "AttValue length too long\n"); 8982 return(NULL); 8983 } 8984 } 8985 } 8986 last = in; 8987 /* 8988 * skip the trailing blanks 8989 */ 8990 while ((last[-1] == 0x20) && (last > start)) last--; 8991 while ((in < end) && (*in != limit) && 8992 ((*in == 0x20) || (*in == 0x9) || 8993 (*in == 0xA) || (*in == 0xD))) { 8994 in++; 8995 if (in >= end) { 8996 const xmlChar *oldbase = ctxt->input->base; 8997 GROW; 8998 if (ctxt->instate == XML_PARSER_EOF) 8999 return(NULL); 9000 if (oldbase != ctxt->input->base) { 9001 long delta = ctxt->input->base - oldbase; 9002 start = start + delta; 9003 in = in + delta; 9004 last = last + delta; 9005 } 9006 end = ctxt->input->end; 9007 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9008 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9009 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9010 "AttValue length too long\n"); 9011 return(NULL); 9012 } 9013 } 9014 } 9015 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9016 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9018 "AttValue length too long\n"); 9019 return(NULL); 9020 } 9021 if (*in != limit) goto need_complex; 9022 } else { 9023 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9025 in++; 9026 if (in >= end) { 9027 const xmlChar *oldbase = ctxt->input->base; 9028 GROW; 9029 if (ctxt->instate == XML_PARSER_EOF) 9030 return(NULL); 9031 if (oldbase != ctxt->input->base) { 9032 long delta = ctxt->input->base - oldbase; 9033 start = start + delta; 9034 in = in + delta; 9035 } 9036 end = ctxt->input->end; 9037 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9038 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9039 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9040 "AttValue length too long\n"); 9041 return(NULL); 9042 } 9043 } 9044 } 9045 last = in; 9046 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9047 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9048 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9049 "AttValue length too long\n"); 9050 return(NULL); 9051 } 9052 if (*in != limit) goto need_complex; 9053 } 9054 in++; 9055 if (len != NULL) { 9056 *len = last - start; 9057 ret = (xmlChar *) start; 9058 } else { 9059 if (alloc) *alloc = 1; 9060 ret = xmlStrndup(start, last - start); 9061 } 9062 CUR_PTR = in; 9063 if (alloc) *alloc = 0; 9064 return ret; 9065 need_complex: 9066 if (alloc) *alloc = 1; 9067 return xmlParseAttValueComplex(ctxt, len, normalize); 9068 } 9069 9070 /** 9071 * xmlParseAttribute2: 9072 * @ctxt: an XML parser context 9073 * @pref: the element prefix 9074 * @elem: the element name 9075 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9076 * @value: a xmlChar ** used to store the value of the attribute 9077 * @len: an int * to save the length of the attribute 9078 * @alloc: an int * to indicate if the attribute was allocated 9079 * 9080 * parse an attribute in the new SAX2 framework. 9081 * 9082 * Returns the attribute name, and the value in *value, . 9083 */ 9084 9085 static const xmlChar * 9086 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9087 const xmlChar * pref, const xmlChar * elem, 9088 const xmlChar ** prefix, xmlChar ** value, 9089 int *len, int *alloc) 9090 { 9091 const xmlChar *name; 9092 xmlChar *val, *internal_val = NULL; 9093 int normalize = 0; 9094 9095 *value = NULL; 9096 GROW; 9097 name = xmlParseQName(ctxt, prefix); 9098 if (name == NULL) { 9099 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9100 "error parsing attribute name\n"); 9101 return (NULL); 9102 } 9103 9104 /* 9105 * get the type if needed 9106 */ 9107 if (ctxt->attsSpecial != NULL) { 9108 int type; 9109 9110 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9111 pref, elem, *prefix, name); 9112 if (type != 0) 9113 normalize = 1; 9114 } 9115 9116 /* 9117 * read the value 9118 */ 9119 SKIP_BLANKS; 9120 if (RAW == '=') { 9121 NEXT; 9122 SKIP_BLANKS; 9123 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9124 if (normalize) { 9125 /* 9126 * Sometimes a second normalisation pass for spaces is needed 9127 * but that only happens if charrefs or entities refernces 9128 * have been used in the attribute value, i.e. the attribute 9129 * value have been extracted in an allocated string already. 9130 */ 9131 if (*alloc) { 9132 const xmlChar *val2; 9133 9134 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9135 if ((val2 != NULL) && (val2 != val)) { 9136 xmlFree(val); 9137 val = (xmlChar *) val2; 9138 } 9139 } 9140 } 9141 ctxt->instate = XML_PARSER_CONTENT; 9142 } else { 9143 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9144 "Specification mandate value for attribute %s\n", 9145 name); 9146 return (NULL); 9147 } 9148 9149 if (*prefix == ctxt->str_xml) { 9150 /* 9151 * Check that xml:lang conforms to the specification 9152 * No more registered as an error, just generate a warning now 9153 * since this was deprecated in XML second edition 9154 */ 9155 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9156 internal_val = xmlStrndup(val, *len); 9157 if (!xmlCheckLanguageID(internal_val)) { 9158 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9159 "Malformed value for xml:lang : %s\n", 9160 internal_val, NULL); 9161 } 9162 } 9163 9164 /* 9165 * Check that xml:space conforms to the specification 9166 */ 9167 if (xmlStrEqual(name, BAD_CAST "space")) { 9168 internal_val = xmlStrndup(val, *len); 9169 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9170 *(ctxt->space) = 0; 9171 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9172 *(ctxt->space) = 1; 9173 else { 9174 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9175 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9176 internal_val, NULL); 9177 } 9178 } 9179 if (internal_val) { 9180 xmlFree(internal_val); 9181 } 9182 } 9183 9184 *value = val; 9185 return (name); 9186 } 9187 /** 9188 * xmlParseStartTag2: 9189 * @ctxt: an XML parser context 9190 * 9191 * parse a start of tag either for rule element or 9192 * EmptyElement. In both case we don't parse the tag closing chars. 9193 * This routine is called when running SAX2 parsing 9194 * 9195 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9196 * 9197 * [ WFC: Unique Att Spec ] 9198 * No attribute name may appear more than once in the same start-tag or 9199 * empty-element tag. 9200 * 9201 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9202 * 9203 * [ WFC: Unique Att Spec ] 9204 * No attribute name may appear more than once in the same start-tag or 9205 * empty-element tag. 9206 * 9207 * With namespace: 9208 * 9209 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9210 * 9211 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9212 * 9213 * Returns the element name parsed 9214 */ 9215 9216 static const xmlChar * 9217 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9218 const xmlChar **URI, int *tlen) { 9219 const xmlChar *localname; 9220 const xmlChar *prefix; 9221 const xmlChar *attname; 9222 const xmlChar *aprefix; 9223 const xmlChar *nsname; 9224 xmlChar *attvalue; 9225 const xmlChar **atts = ctxt->atts; 9226 int maxatts = ctxt->maxatts; 9227 int nratts, nbatts, nbdef; 9228 int i, j, nbNs, attval, oldline, oldcol; 9229 const xmlChar *base; 9230 unsigned long cur; 9231 int nsNr = ctxt->nsNr; 9232 9233 if (RAW != '<') return(NULL); 9234 NEXT1; 9235 9236 /* 9237 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9238 * point since the attribute values may be stored as pointers to 9239 * the buffer and calling SHRINK would destroy them ! 9240 * The Shrinking is only possible once the full set of attribute 9241 * callbacks have been done. 9242 */ 9243 reparse: 9244 SHRINK; 9245 base = ctxt->input->base; 9246 cur = ctxt->input->cur - ctxt->input->base; 9247 oldline = ctxt->input->line; 9248 oldcol = ctxt->input->col; 9249 nbatts = 0; 9250 nratts = 0; 9251 nbdef = 0; 9252 nbNs = 0; 9253 attval = 0; 9254 /* Forget any namespaces added during an earlier parse of this element. */ 9255 ctxt->nsNr = nsNr; 9256 9257 localname = xmlParseQName(ctxt, &prefix); 9258 if (localname == NULL) { 9259 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9260 "StartTag: invalid element name\n"); 9261 return(NULL); 9262 } 9263 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9264 9265 /* 9266 * Now parse the attributes, it ends up with the ending 9267 * 9268 * (S Attribute)* S? 9269 */ 9270 SKIP_BLANKS; 9271 GROW; 9272 if (ctxt->input->base != base) goto base_changed; 9273 9274 while (((RAW != '>') && 9275 ((RAW != '/') || (NXT(1) != '>')) && 9276 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9277 const xmlChar *q = CUR_PTR; 9278 unsigned int cons = ctxt->input->consumed; 9279 int len = -1, alloc = 0; 9280 9281 attname = xmlParseAttribute2(ctxt, prefix, localname, 9282 &aprefix, &attvalue, &len, &alloc); 9283 if (ctxt->input->base != base) { 9284 if ((attvalue != NULL) && (alloc != 0)) 9285 xmlFree(attvalue); 9286 attvalue = NULL; 9287 goto base_changed; 9288 } 9289 if ((attname != NULL) && (attvalue != NULL)) { 9290 if (len < 0) len = xmlStrlen(attvalue); 9291 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9292 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9293 xmlURIPtr uri; 9294 9295 if (*URL != 0) { 9296 uri = xmlParseURI((const char *) URL); 9297 if (uri == NULL) { 9298 xmlNsErr(ctxt, XML_WAR_NS_URI, 9299 "xmlns: '%s' is not a valid URI\n", 9300 URL, NULL, NULL); 9301 } else { 9302 if (uri->scheme == NULL) { 9303 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9304 "xmlns: URI %s is not absolute\n", 9305 URL, NULL, NULL); 9306 } 9307 xmlFreeURI(uri); 9308 } 9309 if (URL == ctxt->str_xml_ns) { 9310 if (attname != ctxt->str_xml) { 9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9312 "xml namespace URI cannot be the default namespace\n", 9313 NULL, NULL, NULL); 9314 } 9315 goto skip_default_ns; 9316 } 9317 if ((len == 29) && 9318 (xmlStrEqual(URL, 9319 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9321 "reuse of the xmlns namespace name is forbidden\n", 9322 NULL, NULL, NULL); 9323 goto skip_default_ns; 9324 } 9325 } 9326 /* 9327 * check that it's not a defined namespace 9328 */ 9329 for (j = 1;j <= nbNs;j++) 9330 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9331 break; 9332 if (j <= nbNs) 9333 xmlErrAttributeDup(ctxt, NULL, attname); 9334 else 9335 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9336 skip_default_ns: 9337 if (alloc != 0) xmlFree(attvalue); 9338 SKIP_BLANKS; 9339 continue; 9340 } 9341 if (aprefix == ctxt->str_xmlns) { 9342 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9343 xmlURIPtr uri; 9344 9345 if (attname == ctxt->str_xml) { 9346 if (URL != ctxt->str_xml_ns) { 9347 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9348 "xml namespace prefix mapped to wrong URI\n", 9349 NULL, NULL, NULL); 9350 } 9351 /* 9352 * Do not keep a namespace definition node 9353 */ 9354 goto skip_ns; 9355 } 9356 if (URL == ctxt->str_xml_ns) { 9357 if (attname != ctxt->str_xml) { 9358 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9359 "xml namespace URI mapped to wrong prefix\n", 9360 NULL, NULL, NULL); 9361 } 9362 goto skip_ns; 9363 } 9364 if (attname == ctxt->str_xmlns) { 9365 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9366 "redefinition of the xmlns prefix is forbidden\n", 9367 NULL, NULL, NULL); 9368 goto skip_ns; 9369 } 9370 if ((len == 29) && 9371 (xmlStrEqual(URL, 9372 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9374 "reuse of the xmlns namespace name is forbidden\n", 9375 NULL, NULL, NULL); 9376 goto skip_ns; 9377 } 9378 if ((URL == NULL) || (URL[0] == 0)) { 9379 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9380 "xmlns:%s: Empty XML namespace is not allowed\n", 9381 attname, NULL, NULL); 9382 goto skip_ns; 9383 } else { 9384 uri = xmlParseURI((const char *) URL); 9385 if (uri == NULL) { 9386 xmlNsErr(ctxt, XML_WAR_NS_URI, 9387 "xmlns:%s: '%s' is not a valid URI\n", 9388 attname, URL, NULL); 9389 } else { 9390 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9391 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9392 "xmlns:%s: URI %s is not absolute\n", 9393 attname, URL, NULL); 9394 } 9395 xmlFreeURI(uri); 9396 } 9397 } 9398 9399 /* 9400 * check that it's not a defined namespace 9401 */ 9402 for (j = 1;j <= nbNs;j++) 9403 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9404 break; 9405 if (j <= nbNs) 9406 xmlErrAttributeDup(ctxt, aprefix, attname); 9407 else 9408 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9409 skip_ns: 9410 if (alloc != 0) xmlFree(attvalue); 9411 SKIP_BLANKS; 9412 if (ctxt->input->base != base) goto base_changed; 9413 continue; 9414 } 9415 9416 /* 9417 * Add the pair to atts 9418 */ 9419 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9420 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9421 if (attvalue[len] == 0) 9422 xmlFree(attvalue); 9423 goto failed; 9424 } 9425 maxatts = ctxt->maxatts; 9426 atts = ctxt->atts; 9427 } 9428 ctxt->attallocs[nratts++] = alloc; 9429 atts[nbatts++] = attname; 9430 atts[nbatts++] = aprefix; 9431 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9432 atts[nbatts++] = attvalue; 9433 attvalue += len; 9434 atts[nbatts++] = attvalue; 9435 /* 9436 * tag if some deallocation is needed 9437 */ 9438 if (alloc != 0) attval = 1; 9439 } else { 9440 if ((attvalue != NULL) && (attvalue[len] == 0)) 9441 xmlFree(attvalue); 9442 } 9443 9444 failed: 9445 9446 GROW 9447 if (ctxt->instate == XML_PARSER_EOF) 9448 break; 9449 if (ctxt->input->base != base) goto base_changed; 9450 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9451 break; 9452 if (!IS_BLANK_CH(RAW)) { 9453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9454 "attributes construct error\n"); 9455 break; 9456 } 9457 SKIP_BLANKS; 9458 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9459 (attname == NULL) && (attvalue == NULL)) { 9460 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9461 "xmlParseStartTag: problem parsing attributes\n"); 9462 break; 9463 } 9464 GROW; 9465 if (ctxt->input->base != base) goto base_changed; 9466 } 9467 9468 /* 9469 * The attributes defaulting 9470 */ 9471 if (ctxt->attsDefault != NULL) { 9472 xmlDefAttrsPtr defaults; 9473 9474 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9475 if (defaults != NULL) { 9476 for (i = 0;i < defaults->nbAttrs;i++) { 9477 attname = defaults->values[5 * i]; 9478 aprefix = defaults->values[5 * i + 1]; 9479 9480 /* 9481 * special work for namespaces defaulted defs 9482 */ 9483 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9484 /* 9485 * check that it's not a defined namespace 9486 */ 9487 for (j = 1;j <= nbNs;j++) 9488 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9489 break; 9490 if (j <= nbNs) continue; 9491 9492 nsname = xmlGetNamespace(ctxt, NULL); 9493 if (nsname != defaults->values[5 * i + 2]) { 9494 if (nsPush(ctxt, NULL, 9495 defaults->values[5 * i + 2]) > 0) 9496 nbNs++; 9497 } 9498 } else if (aprefix == ctxt->str_xmlns) { 9499 /* 9500 * check that it's not a defined namespace 9501 */ 9502 for (j = 1;j <= nbNs;j++) 9503 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9504 break; 9505 if (j <= nbNs) continue; 9506 9507 nsname = xmlGetNamespace(ctxt, attname); 9508 if (nsname != defaults->values[2]) { 9509 if (nsPush(ctxt, attname, 9510 defaults->values[5 * i + 2]) > 0) 9511 nbNs++; 9512 } 9513 } else { 9514 /* 9515 * check that it's not a defined attribute 9516 */ 9517 for (j = 0;j < nbatts;j+=5) { 9518 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9519 break; 9520 } 9521 if (j < nbatts) continue; 9522 9523 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9524 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9525 return(NULL); 9526 } 9527 maxatts = ctxt->maxatts; 9528 atts = ctxt->atts; 9529 } 9530 atts[nbatts++] = attname; 9531 atts[nbatts++] = aprefix; 9532 if (aprefix == NULL) 9533 atts[nbatts++] = NULL; 9534 else 9535 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9536 atts[nbatts++] = defaults->values[5 * i + 2]; 9537 atts[nbatts++] = defaults->values[5 * i + 3]; 9538 if ((ctxt->standalone == 1) && 9539 (defaults->values[5 * i + 4] != NULL)) { 9540 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9541 "standalone: attribute %s on %s defaulted from external subset\n", 9542 attname, localname); 9543 } 9544 nbdef++; 9545 } 9546 } 9547 } 9548 } 9549 9550 /* 9551 * The attributes checkings 9552 */ 9553 for (i = 0; i < nbatts;i += 5) { 9554 /* 9555 * The default namespace does not apply to attribute names. 9556 */ 9557 if (atts[i + 1] != NULL) { 9558 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9559 if (nsname == NULL) { 9560 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9561 "Namespace prefix %s for %s on %s is not defined\n", 9562 atts[i + 1], atts[i], localname); 9563 } 9564 atts[i + 2] = nsname; 9565 } else 9566 nsname = NULL; 9567 /* 9568 * [ WFC: Unique Att Spec ] 9569 * No attribute name may appear more than once in the same 9570 * start-tag or empty-element tag. 9571 * As extended by the Namespace in XML REC. 9572 */ 9573 for (j = 0; j < i;j += 5) { 9574 if (atts[i] == atts[j]) { 9575 if (atts[i+1] == atts[j+1]) { 9576 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9577 break; 9578 } 9579 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9580 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9581 "Namespaced Attribute %s in '%s' redefined\n", 9582 atts[i], nsname, NULL); 9583 break; 9584 } 9585 } 9586 } 9587 } 9588 9589 nsname = xmlGetNamespace(ctxt, prefix); 9590 if ((prefix != NULL) && (nsname == NULL)) { 9591 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9592 "Namespace prefix %s on %s is not defined\n", 9593 prefix, localname, NULL); 9594 } 9595 *pref = prefix; 9596 *URI = nsname; 9597 9598 /* 9599 * SAX: Start of Element ! 9600 */ 9601 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9602 (!ctxt->disableSAX)) { 9603 if (nbNs > 0) 9604 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9605 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9606 nbatts / 5, nbdef, atts); 9607 else 9608 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9609 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9610 } 9611 9612 /* 9613 * Free up attribute allocated strings if needed 9614 */ 9615 if (attval != 0) { 9616 for (i = 3,j = 0; j < nratts;i += 5,j++) 9617 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9618 xmlFree((xmlChar *) atts[i]); 9619 } 9620 9621 return(localname); 9622 9623 base_changed: 9624 /* 9625 * the attribute strings are valid iif the base didn't changed 9626 */ 9627 if (attval != 0) { 9628 for (i = 3,j = 0; j < nratts;i += 5,j++) 9629 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9630 xmlFree((xmlChar *) atts[i]); 9631 } 9632 ctxt->input->cur = ctxt->input->base + cur; 9633 ctxt->input->line = oldline; 9634 ctxt->input->col = oldcol; 9635 if (ctxt->wellFormed == 1) { 9636 goto reparse; 9637 } 9638 return(NULL); 9639 } 9640 9641 /** 9642 * xmlParseEndTag2: 9643 * @ctxt: an XML parser context 9644 * @line: line of the start tag 9645 * @nsNr: number of namespaces on the start tag 9646 * 9647 * parse an end of tag 9648 * 9649 * [42] ETag ::= '</' Name S? '>' 9650 * 9651 * With namespace 9652 * 9653 * [NS 9] ETag ::= '</' QName S? '>' 9654 */ 9655 9656 static void 9657 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9658 const xmlChar *URI, int line, int nsNr, int tlen) { 9659 const xmlChar *name; 9660 9661 GROW; 9662 if ((RAW != '<') || (NXT(1) != '/')) { 9663 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9664 return; 9665 } 9666 SKIP(2); 9667 9668 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9669 if (ctxt->input->cur[tlen] == '>') { 9670 ctxt->input->cur += tlen + 1; 9671 goto done; 9672 } 9673 ctxt->input->cur += tlen; 9674 name = (xmlChar*)1; 9675 } else { 9676 if (prefix == NULL) 9677 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9678 else 9679 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9680 } 9681 9682 /* 9683 * We should definitely be at the ending "S? '>'" part 9684 */ 9685 GROW; 9686 if (ctxt->instate == XML_PARSER_EOF) 9687 return; 9688 SKIP_BLANKS; 9689 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9690 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9691 } else 9692 NEXT1; 9693 9694 /* 9695 * [ WFC: Element Type Match ] 9696 * The Name in an element's end-tag must match the element type in the 9697 * start-tag. 9698 * 9699 */ 9700 if (name != (xmlChar*)1) { 9701 if (name == NULL) name = BAD_CAST "unparseable"; 9702 if ((line == 0) && (ctxt->node != NULL)) 9703 line = ctxt->node->line; 9704 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9705 "Opening and ending tag mismatch: %s line %d and %s\n", 9706 ctxt->name, line, name); 9707 } 9708 9709 /* 9710 * SAX: End of Tag 9711 */ 9712 done: 9713 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9714 (!ctxt->disableSAX)) 9715 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9716 9717 spacePop(ctxt); 9718 if (nsNr != 0) 9719 nsPop(ctxt, nsNr); 9720 return; 9721 } 9722 9723 /** 9724 * xmlParseCDSect: 9725 * @ctxt: an XML parser context 9726 * 9727 * Parse escaped pure raw content. 9728 * 9729 * [18] CDSect ::= CDStart CData CDEnd 9730 * 9731 * [19] CDStart ::= '<![CDATA[' 9732 * 9733 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9734 * 9735 * [21] CDEnd ::= ']]>' 9736 */ 9737 void 9738 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9739 xmlChar *buf = NULL; 9740 int len = 0; 9741 int size = XML_PARSER_BUFFER_SIZE; 9742 int r, rl; 9743 int s, sl; 9744 int cur, l; 9745 int count = 0; 9746 9747 /* Check 2.6.0 was NXT(0) not RAW */ 9748 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9749 SKIP(9); 9750 } else 9751 return; 9752 9753 ctxt->instate = XML_PARSER_CDATA_SECTION; 9754 r = CUR_CHAR(rl); 9755 if (!IS_CHAR(r)) { 9756 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9757 ctxt->instate = XML_PARSER_CONTENT; 9758 return; 9759 } 9760 NEXTL(rl); 9761 s = CUR_CHAR(sl); 9762 if (!IS_CHAR(s)) { 9763 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9764 ctxt->instate = XML_PARSER_CONTENT; 9765 return; 9766 } 9767 NEXTL(sl); 9768 cur = CUR_CHAR(l); 9769 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9770 if (buf == NULL) { 9771 xmlErrMemory(ctxt, NULL); 9772 return; 9773 } 9774 while (IS_CHAR(cur) && 9775 ((r != ']') || (s != ']') || (cur != '>'))) { 9776 if (len + 5 >= size) { 9777 xmlChar *tmp; 9778 9779 if ((size > XML_MAX_TEXT_LENGTH) && 9780 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9781 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9782 "CData section too big found", NULL); 9783 xmlFree (buf); 9784 return; 9785 } 9786 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9787 if (tmp == NULL) { 9788 xmlFree(buf); 9789 xmlErrMemory(ctxt, NULL); 9790 return; 9791 } 9792 buf = tmp; 9793 size *= 2; 9794 } 9795 COPY_BUF(rl,buf,len,r); 9796 r = s; 9797 rl = sl; 9798 s = cur; 9799 sl = l; 9800 count++; 9801 if (count > 50) { 9802 GROW; 9803 if (ctxt->instate == XML_PARSER_EOF) { 9804 xmlFree(buf); 9805 return; 9806 } 9807 count = 0; 9808 } 9809 NEXTL(l); 9810 cur = CUR_CHAR(l); 9811 } 9812 buf[len] = 0; 9813 ctxt->instate = XML_PARSER_CONTENT; 9814 if (cur != '>') { 9815 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9816 "CData section not finished\n%.50s\n", buf); 9817 xmlFree(buf); 9818 return; 9819 } 9820 NEXTL(l); 9821 9822 /* 9823 * OK the buffer is to be consumed as cdata. 9824 */ 9825 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9826 if (ctxt->sax->cdataBlock != NULL) 9827 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9828 else if (ctxt->sax->characters != NULL) 9829 ctxt->sax->characters(ctxt->userData, buf, len); 9830 } 9831 xmlFree(buf); 9832 } 9833 9834 /** 9835 * xmlParseContent: 9836 * @ctxt: an XML parser context 9837 * 9838 * Parse a content: 9839 * 9840 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9841 */ 9842 9843 void 9844 xmlParseContent(xmlParserCtxtPtr ctxt) { 9845 GROW; 9846 while ((RAW != 0) && 9847 ((RAW != '<') || (NXT(1) != '/')) && 9848 (ctxt->instate != XML_PARSER_EOF)) { 9849 const xmlChar *test = CUR_PTR; 9850 unsigned int cons = ctxt->input->consumed; 9851 const xmlChar *cur = ctxt->input->cur; 9852 9853 /* 9854 * First case : a Processing Instruction. 9855 */ 9856 if ((*cur == '<') && (cur[1] == '?')) { 9857 xmlParsePI(ctxt); 9858 } 9859 9860 /* 9861 * Second case : a CDSection 9862 */ 9863 /* 2.6.0 test was *cur not RAW */ 9864 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9865 xmlParseCDSect(ctxt); 9866 } 9867 9868 /* 9869 * Third case : a comment 9870 */ 9871 else if ((*cur == '<') && (NXT(1) == '!') && 9872 (NXT(2) == '-') && (NXT(3) == '-')) { 9873 xmlParseComment(ctxt); 9874 ctxt->instate = XML_PARSER_CONTENT; 9875 } 9876 9877 /* 9878 * Fourth case : a sub-element. 9879 */ 9880 else if (*cur == '<') { 9881 xmlParseElement(ctxt); 9882 } 9883 9884 /* 9885 * Fifth case : a reference. If if has not been resolved, 9886 * parsing returns it's Name, create the node 9887 */ 9888 9889 else if (*cur == '&') { 9890 xmlParseReference(ctxt); 9891 } 9892 9893 /* 9894 * Last case, text. Note that References are handled directly. 9895 */ 9896 else { 9897 xmlParseCharData(ctxt, 0); 9898 } 9899 9900 GROW; 9901 /* 9902 * Pop-up of finished entities. 9903 */ 9904 while ((RAW == 0) && (ctxt->inputNr > 1)) 9905 xmlPopInput(ctxt); 9906 SHRINK; 9907 9908 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9909 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9910 "detected an error in element content\n"); 9911 ctxt->instate = XML_PARSER_EOF; 9912 break; 9913 } 9914 } 9915 } 9916 9917 /** 9918 * xmlParseElement: 9919 * @ctxt: an XML parser context 9920 * 9921 * parse an XML element, this is highly recursive 9922 * 9923 * [39] element ::= EmptyElemTag | STag content ETag 9924 * 9925 * [ WFC: Element Type Match ] 9926 * The Name in an element's end-tag must match the element type in the 9927 * start-tag. 9928 * 9929 */ 9930 9931 void 9932 xmlParseElement(xmlParserCtxtPtr ctxt) { 9933 const xmlChar *name; 9934 const xmlChar *prefix = NULL; 9935 const xmlChar *URI = NULL; 9936 xmlParserNodeInfo node_info; 9937 int line, tlen = 0; 9938 xmlNodePtr ret; 9939 int nsNr = ctxt->nsNr; 9940 9941 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9942 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9943 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9944 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9945 xmlParserMaxDepth); 9946 ctxt->instate = XML_PARSER_EOF; 9947 return; 9948 } 9949 9950 /* Capture start position */ 9951 if (ctxt->record_info) { 9952 node_info.begin_pos = ctxt->input->consumed + 9953 (CUR_PTR - ctxt->input->base); 9954 node_info.begin_line = ctxt->input->line; 9955 } 9956 9957 if (ctxt->spaceNr == 0) 9958 spacePush(ctxt, -1); 9959 else if (*ctxt->space == -2) 9960 spacePush(ctxt, -1); 9961 else 9962 spacePush(ctxt, *ctxt->space); 9963 9964 line = ctxt->input->line; 9965 #ifdef LIBXML_SAX1_ENABLED 9966 if (ctxt->sax2) 9967 #endif /* LIBXML_SAX1_ENABLED */ 9968 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9969 #ifdef LIBXML_SAX1_ENABLED 9970 else 9971 name = xmlParseStartTag(ctxt); 9972 #endif /* LIBXML_SAX1_ENABLED */ 9973 if (ctxt->instate == XML_PARSER_EOF) 9974 return; 9975 if (name == NULL) { 9976 spacePop(ctxt); 9977 return; 9978 } 9979 namePush(ctxt, name); 9980 ret = ctxt->node; 9981 9982 #ifdef LIBXML_VALID_ENABLED 9983 /* 9984 * [ VC: Root Element Type ] 9985 * The Name in the document type declaration must match the element 9986 * type of the root element. 9987 */ 9988 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9989 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9990 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9991 #endif /* LIBXML_VALID_ENABLED */ 9992 9993 /* 9994 * Check for an Empty Element. 9995 */ 9996 if ((RAW == '/') && (NXT(1) == '>')) { 9997 SKIP(2); 9998 if (ctxt->sax2) { 9999 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10000 (!ctxt->disableSAX)) 10001 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10002 #ifdef LIBXML_SAX1_ENABLED 10003 } else { 10004 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10005 (!ctxt->disableSAX)) 10006 ctxt->sax->endElement(ctxt->userData, name); 10007 #endif /* LIBXML_SAX1_ENABLED */ 10008 } 10009 namePop(ctxt); 10010 spacePop(ctxt); 10011 if (nsNr != ctxt->nsNr) 10012 nsPop(ctxt, ctxt->nsNr - nsNr); 10013 if ( ret != NULL && ctxt->record_info ) { 10014 node_info.end_pos = ctxt->input->consumed + 10015 (CUR_PTR - ctxt->input->base); 10016 node_info.end_line = ctxt->input->line; 10017 node_info.node = ret; 10018 xmlParserAddNodeInfo(ctxt, &node_info); 10019 } 10020 return; 10021 } 10022 if (RAW == '>') { 10023 NEXT1; 10024 } else { 10025 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10026 "Couldn't find end of Start Tag %s line %d\n", 10027 name, line, NULL); 10028 10029 /* 10030 * end of parsing of this node. 10031 */ 10032 nodePop(ctxt); 10033 namePop(ctxt); 10034 spacePop(ctxt); 10035 if (nsNr != ctxt->nsNr) 10036 nsPop(ctxt, ctxt->nsNr - nsNr); 10037 10038 /* 10039 * Capture end position and add node 10040 */ 10041 if ( ret != NULL && ctxt->record_info ) { 10042 node_info.end_pos = ctxt->input->consumed + 10043 (CUR_PTR - ctxt->input->base); 10044 node_info.end_line = ctxt->input->line; 10045 node_info.node = ret; 10046 xmlParserAddNodeInfo(ctxt, &node_info); 10047 } 10048 return; 10049 } 10050 10051 /* 10052 * Parse the content of the element: 10053 */ 10054 xmlParseContent(ctxt); 10055 if (ctxt->instate == XML_PARSER_EOF) 10056 return; 10057 if (!IS_BYTE_CHAR(RAW)) { 10058 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10059 "Premature end of data in tag %s line %d\n", 10060 name, line, NULL); 10061 10062 /* 10063 * end of parsing of this node. 10064 */ 10065 nodePop(ctxt); 10066 namePop(ctxt); 10067 spacePop(ctxt); 10068 if (nsNr != ctxt->nsNr) 10069 nsPop(ctxt, ctxt->nsNr - nsNr); 10070 return; 10071 } 10072 10073 /* 10074 * parse the end of tag: '</' should be here. 10075 */ 10076 if (ctxt->sax2) { 10077 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10078 namePop(ctxt); 10079 } 10080 #ifdef LIBXML_SAX1_ENABLED 10081 else 10082 xmlParseEndTag1(ctxt, line); 10083 #endif /* LIBXML_SAX1_ENABLED */ 10084 10085 /* 10086 * Capture end position and add node 10087 */ 10088 if ( ret != NULL && ctxt->record_info ) { 10089 node_info.end_pos = ctxt->input->consumed + 10090 (CUR_PTR - ctxt->input->base); 10091 node_info.end_line = ctxt->input->line; 10092 node_info.node = ret; 10093 xmlParserAddNodeInfo(ctxt, &node_info); 10094 } 10095 } 10096 10097 /** 10098 * xmlParseVersionNum: 10099 * @ctxt: an XML parser context 10100 * 10101 * parse the XML version value. 10102 * 10103 * [26] VersionNum ::= '1.' [0-9]+ 10104 * 10105 * In practice allow [0-9].[0-9]+ at that level 10106 * 10107 * Returns the string giving the XML version number, or NULL 10108 */ 10109 xmlChar * 10110 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10111 xmlChar *buf = NULL; 10112 int len = 0; 10113 int size = 10; 10114 xmlChar cur; 10115 10116 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10117 if (buf == NULL) { 10118 xmlErrMemory(ctxt, NULL); 10119 return(NULL); 10120 } 10121 cur = CUR; 10122 if (!((cur >= '0') && (cur <= '9'))) { 10123 xmlFree(buf); 10124 return(NULL); 10125 } 10126 buf[len++] = cur; 10127 NEXT; 10128 cur=CUR; 10129 if (cur != '.') { 10130 xmlFree(buf); 10131 return(NULL); 10132 } 10133 buf[len++] = cur; 10134 NEXT; 10135 cur=CUR; 10136 while ((cur >= '0') && (cur <= '9')) { 10137 if (len + 1 >= size) { 10138 xmlChar *tmp; 10139 10140 size *= 2; 10141 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10142 if (tmp == NULL) { 10143 xmlFree(buf); 10144 xmlErrMemory(ctxt, NULL); 10145 return(NULL); 10146 } 10147 buf = tmp; 10148 } 10149 buf[len++] = cur; 10150 NEXT; 10151 cur=CUR; 10152 } 10153 buf[len] = 0; 10154 return(buf); 10155 } 10156 10157 /** 10158 * xmlParseVersionInfo: 10159 * @ctxt: an XML parser context 10160 * 10161 * parse the XML version. 10162 * 10163 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10164 * 10165 * [25] Eq ::= S? '=' S? 10166 * 10167 * Returns the version string, e.g. "1.0" 10168 */ 10169 10170 xmlChar * 10171 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10172 xmlChar *version = NULL; 10173 10174 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10175 SKIP(7); 10176 SKIP_BLANKS; 10177 if (RAW != '=') { 10178 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10179 return(NULL); 10180 } 10181 NEXT; 10182 SKIP_BLANKS; 10183 if (RAW == '"') { 10184 NEXT; 10185 version = xmlParseVersionNum(ctxt); 10186 if (RAW != '"') { 10187 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10188 } else 10189 NEXT; 10190 } else if (RAW == '\''){ 10191 NEXT; 10192 version = xmlParseVersionNum(ctxt); 10193 if (RAW != '\'') { 10194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10195 } else 10196 NEXT; 10197 } else { 10198 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10199 } 10200 } 10201 return(version); 10202 } 10203 10204 /** 10205 * xmlParseEncName: 10206 * @ctxt: an XML parser context 10207 * 10208 * parse the XML encoding name 10209 * 10210 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10211 * 10212 * Returns the encoding name value or NULL 10213 */ 10214 xmlChar * 10215 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10216 xmlChar *buf = NULL; 10217 int len = 0; 10218 int size = 10; 10219 xmlChar cur; 10220 10221 cur = CUR; 10222 if (((cur >= 'a') && (cur <= 'z')) || 10223 ((cur >= 'A') && (cur <= 'Z'))) { 10224 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10225 if (buf == NULL) { 10226 xmlErrMemory(ctxt, NULL); 10227 return(NULL); 10228 } 10229 10230 buf[len++] = cur; 10231 NEXT; 10232 cur = CUR; 10233 while (((cur >= 'a') && (cur <= 'z')) || 10234 ((cur >= 'A') && (cur <= 'Z')) || 10235 ((cur >= '0') && (cur <= '9')) || 10236 (cur == '.') || (cur == '_') || 10237 (cur == '-')) { 10238 if (len + 1 >= size) { 10239 xmlChar *tmp; 10240 10241 size *= 2; 10242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10243 if (tmp == NULL) { 10244 xmlErrMemory(ctxt, NULL); 10245 xmlFree(buf); 10246 return(NULL); 10247 } 10248 buf = tmp; 10249 } 10250 buf[len++] = cur; 10251 NEXT; 10252 cur = CUR; 10253 if (cur == 0) { 10254 SHRINK; 10255 GROW; 10256 cur = CUR; 10257 } 10258 } 10259 buf[len] = 0; 10260 } else { 10261 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10262 } 10263 return(buf); 10264 } 10265 10266 /** 10267 * xmlParseEncodingDecl: 10268 * @ctxt: an XML parser context 10269 * 10270 * parse the XML encoding declaration 10271 * 10272 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10273 * 10274 * this setups the conversion filters. 10275 * 10276 * Returns the encoding value or NULL 10277 */ 10278 10279 const xmlChar * 10280 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10281 xmlChar *encoding = NULL; 10282 10283 SKIP_BLANKS; 10284 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10285 SKIP(8); 10286 SKIP_BLANKS; 10287 if (RAW != '=') { 10288 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10289 return(NULL); 10290 } 10291 NEXT; 10292 SKIP_BLANKS; 10293 if (RAW == '"') { 10294 NEXT; 10295 encoding = xmlParseEncName(ctxt); 10296 if (RAW != '"') { 10297 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10298 } else 10299 NEXT; 10300 } else if (RAW == '\''){ 10301 NEXT; 10302 encoding = xmlParseEncName(ctxt); 10303 if (RAW != '\'') { 10304 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10305 } else 10306 NEXT; 10307 } else { 10308 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10309 } 10310 10311 /* 10312 * Non standard parsing, allowing the user to ignore encoding 10313 */ 10314 if (ctxt->options & XML_PARSE_IGNORE_ENC) 10315 return(encoding); 10316 10317 /* 10318 * UTF-16 encoding stwich has already taken place at this stage, 10319 * more over the little-endian/big-endian selection is already done 10320 */ 10321 if ((encoding != NULL) && 10322 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10323 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10324 /* 10325 * If no encoding was passed to the parser, that we are 10326 * using UTF-16 and no decoder is present i.e. the 10327 * document is apparently UTF-8 compatible, then raise an 10328 * encoding mismatch fatal error 10329 */ 10330 if ((ctxt->encoding == NULL) && 10331 (ctxt->input->buf != NULL) && 10332 (ctxt->input->buf->encoder == NULL)) { 10333 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10334 "Document labelled UTF-16 but has UTF-8 content\n"); 10335 } 10336 if (ctxt->encoding != NULL) 10337 xmlFree((xmlChar *) ctxt->encoding); 10338 ctxt->encoding = encoding; 10339 } 10340 /* 10341 * UTF-8 encoding is handled natively 10342 */ 10343 else if ((encoding != NULL) && 10344 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10345 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10346 if (ctxt->encoding != NULL) 10347 xmlFree((xmlChar *) ctxt->encoding); 10348 ctxt->encoding = encoding; 10349 } 10350 else if (encoding != NULL) { 10351 xmlCharEncodingHandlerPtr handler; 10352 10353 if (ctxt->input->encoding != NULL) 10354 xmlFree((xmlChar *) ctxt->input->encoding); 10355 ctxt->input->encoding = encoding; 10356 10357 handler = xmlFindCharEncodingHandler((const char *) encoding); 10358 if (handler != NULL) { 10359 xmlSwitchToEncoding(ctxt, handler); 10360 } else { 10361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10362 "Unsupported encoding %s\n", encoding); 10363 return(NULL); 10364 } 10365 } 10366 } 10367 return(encoding); 10368 } 10369 10370 /** 10371 * xmlParseSDDecl: 10372 * @ctxt: an XML parser context 10373 * 10374 * parse the XML standalone declaration 10375 * 10376 * [32] SDDecl ::= S 'standalone' Eq 10377 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10378 * 10379 * [ VC: Standalone Document Declaration ] 10380 * TODO The standalone document declaration must have the value "no" 10381 * if any external markup declarations contain declarations of: 10382 * - attributes with default values, if elements to which these 10383 * attributes apply appear in the document without specifications 10384 * of values for these attributes, or 10385 * - entities (other than amp, lt, gt, apos, quot), if references 10386 * to those entities appear in the document, or 10387 * - attributes with values subject to normalization, where the 10388 * attribute appears in the document with a value which will change 10389 * as a result of normalization, or 10390 * - element types with element content, if white space occurs directly 10391 * within any instance of those types. 10392 * 10393 * Returns: 10394 * 1 if standalone="yes" 10395 * 0 if standalone="no" 10396 * -2 if standalone attribute is missing or invalid 10397 * (A standalone value of -2 means that the XML declaration was found, 10398 * but no value was specified for the standalone attribute). 10399 */ 10400 10401 int 10402 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10403 int standalone = -2; 10404 10405 SKIP_BLANKS; 10406 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10407 SKIP(10); 10408 SKIP_BLANKS; 10409 if (RAW != '=') { 10410 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10411 return(standalone); 10412 } 10413 NEXT; 10414 SKIP_BLANKS; 10415 if (RAW == '\''){ 10416 NEXT; 10417 if ((RAW == 'n') && (NXT(1) == 'o')) { 10418 standalone = 0; 10419 SKIP(2); 10420 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10421 (NXT(2) == 's')) { 10422 standalone = 1; 10423 SKIP(3); 10424 } else { 10425 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10426 } 10427 if (RAW != '\'') { 10428 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10429 } else 10430 NEXT; 10431 } else if (RAW == '"'){ 10432 NEXT; 10433 if ((RAW == 'n') && (NXT(1) == 'o')) { 10434 standalone = 0; 10435 SKIP(2); 10436 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10437 (NXT(2) == 's')) { 10438 standalone = 1; 10439 SKIP(3); 10440 } else { 10441 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10442 } 10443 if (RAW != '"') { 10444 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10445 } else 10446 NEXT; 10447 } else { 10448 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10449 } 10450 } 10451 return(standalone); 10452 } 10453 10454 /** 10455 * xmlParseXMLDecl: 10456 * @ctxt: an XML parser context 10457 * 10458 * parse an XML declaration header 10459 * 10460 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10461 */ 10462 10463 void 10464 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10465 xmlChar *version; 10466 10467 /* 10468 * This value for standalone indicates that the document has an 10469 * XML declaration but it does not have a standalone attribute. 10470 * It will be overwritten later if a standalone attribute is found. 10471 */ 10472 ctxt->input->standalone = -2; 10473 10474 /* 10475 * We know that '<?xml' is here. 10476 */ 10477 SKIP(5); 10478 10479 if (!IS_BLANK_CH(RAW)) { 10480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10481 "Blank needed after '<?xml'\n"); 10482 } 10483 SKIP_BLANKS; 10484 10485 /* 10486 * We must have the VersionInfo here. 10487 */ 10488 version = xmlParseVersionInfo(ctxt); 10489 if (version == NULL) { 10490 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10491 } else { 10492 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10493 /* 10494 * Changed here for XML-1.0 5th edition 10495 */ 10496 if (ctxt->options & XML_PARSE_OLD10) { 10497 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10498 "Unsupported version '%s'\n", 10499 version); 10500 } else { 10501 if ((version[0] == '1') && ((version[1] == '.'))) { 10502 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10503 "Unsupported version '%s'\n", 10504 version, NULL); 10505 } else { 10506 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10507 "Unsupported version '%s'\n", 10508 version); 10509 } 10510 } 10511 } 10512 if (ctxt->version != NULL) 10513 xmlFree((void *) ctxt->version); 10514 ctxt->version = version; 10515 } 10516 10517 /* 10518 * We may have the encoding declaration 10519 */ 10520 if (!IS_BLANK_CH(RAW)) { 10521 if ((RAW == '?') && (NXT(1) == '>')) { 10522 SKIP(2); 10523 return; 10524 } 10525 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10526 } 10527 xmlParseEncodingDecl(ctxt); 10528 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10529 /* 10530 * The XML REC instructs us to stop parsing right here 10531 */ 10532 return; 10533 } 10534 10535 /* 10536 * We may have the standalone status. 10537 */ 10538 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10539 if ((RAW == '?') && (NXT(1) == '>')) { 10540 SKIP(2); 10541 return; 10542 } 10543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10544 } 10545 10546 /* 10547 * We can grow the input buffer freely at that point 10548 */ 10549 GROW; 10550 10551 SKIP_BLANKS; 10552 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10553 10554 SKIP_BLANKS; 10555 if ((RAW == '?') && (NXT(1) == '>')) { 10556 SKIP(2); 10557 } else if (RAW == '>') { 10558 /* Deprecated old WD ... */ 10559 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10560 NEXT; 10561 } else { 10562 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10563 MOVETO_ENDTAG(CUR_PTR); 10564 NEXT; 10565 } 10566 } 10567 10568 /** 10569 * xmlParseMisc: 10570 * @ctxt: an XML parser context 10571 * 10572 * parse an XML Misc* optional field. 10573 * 10574 * [27] Misc ::= Comment | PI | S 10575 */ 10576 10577 void 10578 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10579 while ((ctxt->instate != XML_PARSER_EOF) && 10580 (((RAW == '<') && (NXT(1) == '?')) || 10581 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10582 IS_BLANK_CH(CUR))) { 10583 if ((RAW == '<') && (NXT(1) == '?')) { 10584 xmlParsePI(ctxt); 10585 } else if (IS_BLANK_CH(CUR)) { 10586 NEXT; 10587 } else 10588 xmlParseComment(ctxt); 10589 } 10590 } 10591 10592 /** 10593 * xmlParseDocument: 10594 * @ctxt: an XML parser context 10595 * 10596 * parse an XML document (and build a tree if using the standard SAX 10597 * interface). 10598 * 10599 * [1] document ::= prolog element Misc* 10600 * 10601 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10602 * 10603 * Returns 0, -1 in case of error. the parser context is augmented 10604 * as a result of the parsing. 10605 */ 10606 10607 int 10608 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10609 xmlChar start[4]; 10610 xmlCharEncoding enc; 10611 10612 xmlInitParser(); 10613 10614 if ((ctxt == NULL) || (ctxt->input == NULL)) 10615 return(-1); 10616 10617 GROW; 10618 10619 /* 10620 * SAX: detecting the level. 10621 */ 10622 xmlDetectSAX2(ctxt); 10623 10624 /* 10625 * SAX: beginning of the document processing. 10626 */ 10627 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10628 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10629 if (ctxt->instate == XML_PARSER_EOF) 10630 return(-1); 10631 10632 if ((ctxt->encoding == NULL) && 10633 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10634 /* 10635 * Get the 4 first bytes and decode the charset 10636 * if enc != XML_CHAR_ENCODING_NONE 10637 * plug some encoding conversion routines. 10638 */ 10639 start[0] = RAW; 10640 start[1] = NXT(1); 10641 start[2] = NXT(2); 10642 start[3] = NXT(3); 10643 enc = xmlDetectCharEncoding(&start[0], 4); 10644 if (enc != XML_CHAR_ENCODING_NONE) { 10645 xmlSwitchEncoding(ctxt, enc); 10646 } 10647 } 10648 10649 10650 if (CUR == 0) { 10651 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10652 } 10653 10654 /* 10655 * Check for the XMLDecl in the Prolog. 10656 * do not GROW here to avoid the detected encoder to decode more 10657 * than just the first line, unless the amount of data is really 10658 * too small to hold "<?xml version="1.0" encoding="foo" 10659 */ 10660 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10661 GROW; 10662 } 10663 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10664 10665 /* 10666 * Note that we will switch encoding on the fly. 10667 */ 10668 xmlParseXMLDecl(ctxt); 10669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10670 /* 10671 * The XML REC instructs us to stop parsing right here 10672 */ 10673 return(-1); 10674 } 10675 ctxt->standalone = ctxt->input->standalone; 10676 SKIP_BLANKS; 10677 } else { 10678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10679 } 10680 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10681 ctxt->sax->startDocument(ctxt->userData); 10682 if (ctxt->instate == XML_PARSER_EOF) 10683 return(-1); 10684 10685 /* 10686 * The Misc part of the Prolog 10687 */ 10688 GROW; 10689 xmlParseMisc(ctxt); 10690 10691 /* 10692 * Then possibly doc type declaration(s) and more Misc 10693 * (doctypedecl Misc*)? 10694 */ 10695 GROW; 10696 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10697 10698 ctxt->inSubset = 1; 10699 xmlParseDocTypeDecl(ctxt); 10700 if (RAW == '[') { 10701 ctxt->instate = XML_PARSER_DTD; 10702 xmlParseInternalSubset(ctxt); 10703 if (ctxt->instate == XML_PARSER_EOF) 10704 return(-1); 10705 } 10706 10707 /* 10708 * Create and update the external subset. 10709 */ 10710 ctxt->inSubset = 2; 10711 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10712 (!ctxt->disableSAX)) 10713 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10714 ctxt->extSubSystem, ctxt->extSubURI); 10715 if (ctxt->instate == XML_PARSER_EOF) 10716 return(-1); 10717 ctxt->inSubset = 0; 10718 10719 xmlCleanSpecialAttr(ctxt); 10720 10721 ctxt->instate = XML_PARSER_PROLOG; 10722 xmlParseMisc(ctxt); 10723 } 10724 10725 /* 10726 * Time to start parsing the tree itself 10727 */ 10728 GROW; 10729 if (RAW != '<') { 10730 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10731 "Start tag expected, '<' not found\n"); 10732 } else { 10733 ctxt->instate = XML_PARSER_CONTENT; 10734 xmlParseElement(ctxt); 10735 ctxt->instate = XML_PARSER_EPILOG; 10736 10737 10738 /* 10739 * The Misc part at the end 10740 */ 10741 xmlParseMisc(ctxt); 10742 10743 if (RAW != 0) { 10744 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10745 } 10746 ctxt->instate = XML_PARSER_EOF; 10747 } 10748 10749 /* 10750 * SAX: end of the document processing. 10751 */ 10752 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10753 ctxt->sax->endDocument(ctxt->userData); 10754 10755 /* 10756 * Remove locally kept entity definitions if the tree was not built 10757 */ 10758 if ((ctxt->myDoc != NULL) && 10759 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10760 xmlFreeDoc(ctxt->myDoc); 10761 ctxt->myDoc = NULL; 10762 } 10763 10764 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10765 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10766 if (ctxt->valid) 10767 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10768 if (ctxt->nsWellFormed) 10769 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10770 if (ctxt->options & XML_PARSE_OLD10) 10771 ctxt->myDoc->properties |= XML_DOC_OLD10; 10772 } 10773 if (! ctxt->wellFormed) { 10774 ctxt->valid = 0; 10775 return(-1); 10776 } 10777 return(0); 10778 } 10779 10780 /** 10781 * xmlParseExtParsedEnt: 10782 * @ctxt: an XML parser context 10783 * 10784 * parse a general parsed entity 10785 * An external general parsed entity is well-formed if it matches the 10786 * production labeled extParsedEnt. 10787 * 10788 * [78] extParsedEnt ::= TextDecl? content 10789 * 10790 * Returns 0, -1 in case of error. the parser context is augmented 10791 * as a result of the parsing. 10792 */ 10793 10794 int 10795 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10796 xmlChar start[4]; 10797 xmlCharEncoding enc; 10798 10799 if ((ctxt == NULL) || (ctxt->input == NULL)) 10800 return(-1); 10801 10802 xmlDefaultSAXHandlerInit(); 10803 10804 xmlDetectSAX2(ctxt); 10805 10806 GROW; 10807 10808 /* 10809 * SAX: beginning of the document processing. 10810 */ 10811 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10812 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10813 10814 /* 10815 * Get the 4 first bytes and decode the charset 10816 * if enc != XML_CHAR_ENCODING_NONE 10817 * plug some encoding conversion routines. 10818 */ 10819 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10820 start[0] = RAW; 10821 start[1] = NXT(1); 10822 start[2] = NXT(2); 10823 start[3] = NXT(3); 10824 enc = xmlDetectCharEncoding(start, 4); 10825 if (enc != XML_CHAR_ENCODING_NONE) { 10826 xmlSwitchEncoding(ctxt, enc); 10827 } 10828 } 10829 10830 10831 if (CUR == 0) { 10832 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10833 } 10834 10835 /* 10836 * Check for the XMLDecl in the Prolog. 10837 */ 10838 GROW; 10839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10840 10841 /* 10842 * Note that we will switch encoding on the fly. 10843 */ 10844 xmlParseXMLDecl(ctxt); 10845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10846 /* 10847 * The XML REC instructs us to stop parsing right here 10848 */ 10849 return(-1); 10850 } 10851 SKIP_BLANKS; 10852 } else { 10853 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10854 } 10855 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10856 ctxt->sax->startDocument(ctxt->userData); 10857 if (ctxt->instate == XML_PARSER_EOF) 10858 return(-1); 10859 10860 /* 10861 * Doing validity checking on chunk doesn't make sense 10862 */ 10863 ctxt->instate = XML_PARSER_CONTENT; 10864 ctxt->validate = 0; 10865 ctxt->loadsubset = 0; 10866 ctxt->depth = 0; 10867 10868 xmlParseContent(ctxt); 10869 if (ctxt->instate == XML_PARSER_EOF) 10870 return(-1); 10871 10872 if ((RAW == '<') && (NXT(1) == '/')) { 10873 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10874 } else if (RAW != 0) { 10875 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10876 } 10877 10878 /* 10879 * SAX: end of the document processing. 10880 */ 10881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10882 ctxt->sax->endDocument(ctxt->userData); 10883 10884 if (! ctxt->wellFormed) return(-1); 10885 return(0); 10886 } 10887 10888 #ifdef LIBXML_PUSH_ENABLED 10889 /************************************************************************ 10890 * * 10891 * Progressive parsing interfaces * 10892 * * 10893 ************************************************************************/ 10894 10895 /** 10896 * xmlParseLookupSequence: 10897 * @ctxt: an XML parser context 10898 * @first: the first char to lookup 10899 * @next: the next char to lookup or zero 10900 * @third: the next char to lookup or zero 10901 * 10902 * Try to find if a sequence (first, next, third) or just (first next) or 10903 * (first) is available in the input stream. 10904 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10905 * to avoid rescanning sequences of bytes, it DOES change the state of the 10906 * parser, do not use liberally. 10907 * 10908 * Returns the index to the current parsing point if the full sequence 10909 * is available, -1 otherwise. 10910 */ 10911 static int 10912 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10913 xmlChar next, xmlChar third) { 10914 int base, len; 10915 xmlParserInputPtr in; 10916 const xmlChar *buf; 10917 10918 in = ctxt->input; 10919 if (in == NULL) return(-1); 10920 base = in->cur - in->base; 10921 if (base < 0) return(-1); 10922 if (ctxt->checkIndex > base) 10923 base = ctxt->checkIndex; 10924 if (in->buf == NULL) { 10925 buf = in->base; 10926 len = in->length; 10927 } else { 10928 buf = xmlBufContent(in->buf->buffer); 10929 len = xmlBufUse(in->buf->buffer); 10930 } 10931 /* take into account the sequence length */ 10932 if (third) len -= 2; 10933 else if (next) len --; 10934 for (;base < len;base++) { 10935 if (buf[base] == first) { 10936 if (third != 0) { 10937 if ((buf[base + 1] != next) || 10938 (buf[base + 2] != third)) continue; 10939 } else if (next != 0) { 10940 if (buf[base + 1] != next) continue; 10941 } 10942 ctxt->checkIndex = 0; 10943 #ifdef DEBUG_PUSH 10944 if (next == 0) 10945 xmlGenericError(xmlGenericErrorContext, 10946 "PP: lookup '%c' found at %d\n", 10947 first, base); 10948 else if (third == 0) 10949 xmlGenericError(xmlGenericErrorContext, 10950 "PP: lookup '%c%c' found at %d\n", 10951 first, next, base); 10952 else 10953 xmlGenericError(xmlGenericErrorContext, 10954 "PP: lookup '%c%c%c' found at %d\n", 10955 first, next, third, base); 10956 #endif 10957 return(base - (in->cur - in->base)); 10958 } 10959 } 10960 ctxt->checkIndex = base; 10961 #ifdef DEBUG_PUSH 10962 if (next == 0) 10963 xmlGenericError(xmlGenericErrorContext, 10964 "PP: lookup '%c' failed\n", first); 10965 else if (third == 0) 10966 xmlGenericError(xmlGenericErrorContext, 10967 "PP: lookup '%c%c' failed\n", first, next); 10968 else 10969 xmlGenericError(xmlGenericErrorContext, 10970 "PP: lookup '%c%c%c' failed\n", first, next, third); 10971 #endif 10972 return(-1); 10973 } 10974 10975 /** 10976 * xmlParseGetLasts: 10977 * @ctxt: an XML parser context 10978 * @lastlt: pointer to store the last '<' from the input 10979 * @lastgt: pointer to store the last '>' from the input 10980 * 10981 * Lookup the last < and > in the current chunk 10982 */ 10983 static void 10984 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10985 const xmlChar **lastgt) { 10986 const xmlChar *tmp; 10987 10988 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10989 xmlGenericError(xmlGenericErrorContext, 10990 "Internal error: xmlParseGetLasts\n"); 10991 return; 10992 } 10993 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10994 tmp = ctxt->input->end; 10995 tmp--; 10996 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10997 if (tmp < ctxt->input->base) { 10998 *lastlt = NULL; 10999 *lastgt = NULL; 11000 } else { 11001 *lastlt = tmp; 11002 tmp++; 11003 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11004 if (*tmp == '\'') { 11005 tmp++; 11006 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11007 if (tmp < ctxt->input->end) tmp++; 11008 } else if (*tmp == '"') { 11009 tmp++; 11010 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11011 if (tmp < ctxt->input->end) tmp++; 11012 } else 11013 tmp++; 11014 } 11015 if (tmp < ctxt->input->end) 11016 *lastgt = tmp; 11017 else { 11018 tmp = *lastlt; 11019 tmp--; 11020 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11021 if (tmp >= ctxt->input->base) 11022 *lastgt = tmp; 11023 else 11024 *lastgt = NULL; 11025 } 11026 } 11027 } else { 11028 *lastlt = NULL; 11029 *lastgt = NULL; 11030 } 11031 } 11032 /** 11033 * xmlCheckCdataPush: 11034 * @cur: pointer to the bock of characters 11035 * @len: length of the block in bytes 11036 * 11037 * Check that the block of characters is okay as SCdata content [20] 11038 * 11039 * Returns the number of bytes to pass if okay, a negative index where an 11040 * UTF-8 error occured otherwise 11041 */ 11042 static int 11043 xmlCheckCdataPush(const xmlChar *utf, int len) { 11044 int ix; 11045 unsigned char c; 11046 int codepoint; 11047 11048 if ((utf == NULL) || (len <= 0)) 11049 return(0); 11050 11051 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11052 c = utf[ix]; 11053 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11054 if (c >= 0x20) 11055 ix++; 11056 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11057 ix++; 11058 else 11059 return(-ix); 11060 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11061 if (ix + 2 > len) return(ix); 11062 if ((utf[ix+1] & 0xc0 ) != 0x80) 11063 return(-ix); 11064 codepoint = (utf[ix] & 0x1f) << 6; 11065 codepoint |= utf[ix+1] & 0x3f; 11066 if (!xmlIsCharQ(codepoint)) 11067 return(-ix); 11068 ix += 2; 11069 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11070 if (ix + 3 > len) return(ix); 11071 if (((utf[ix+1] & 0xc0) != 0x80) || 11072 ((utf[ix+2] & 0xc0) != 0x80)) 11073 return(-ix); 11074 codepoint = (utf[ix] & 0xf) << 12; 11075 codepoint |= (utf[ix+1] & 0x3f) << 6; 11076 codepoint |= utf[ix+2] & 0x3f; 11077 if (!xmlIsCharQ(codepoint)) 11078 return(-ix); 11079 ix += 3; 11080 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11081 if (ix + 4 > len) return(ix); 11082 if (((utf[ix+1] & 0xc0) != 0x80) || 11083 ((utf[ix+2] & 0xc0) != 0x80) || 11084 ((utf[ix+3] & 0xc0) != 0x80)) 11085 return(-ix); 11086 codepoint = (utf[ix] & 0x7) << 18; 11087 codepoint |= (utf[ix+1] & 0x3f) << 12; 11088 codepoint |= (utf[ix+2] & 0x3f) << 6; 11089 codepoint |= utf[ix+3] & 0x3f; 11090 if (!xmlIsCharQ(codepoint)) 11091 return(-ix); 11092 ix += 4; 11093 } else /* unknown encoding */ 11094 return(-ix); 11095 } 11096 return(ix); 11097 } 11098 11099 /** 11100 * xmlParseTryOrFinish: 11101 * @ctxt: an XML parser context 11102 * @terminate: last chunk indicator 11103 * 11104 * Try to progress on parsing 11105 * 11106 * Returns zero if no parsing was possible 11107 */ 11108 static int 11109 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11110 int ret = 0; 11111 int avail, tlen; 11112 xmlChar cur, next; 11113 const xmlChar *lastlt, *lastgt; 11114 11115 if (ctxt->input == NULL) 11116 return(0); 11117 11118 #ifdef DEBUG_PUSH 11119 switch (ctxt->instate) { 11120 case XML_PARSER_EOF: 11121 xmlGenericError(xmlGenericErrorContext, 11122 "PP: try EOF\n"); break; 11123 case XML_PARSER_START: 11124 xmlGenericError(xmlGenericErrorContext, 11125 "PP: try START\n"); break; 11126 case XML_PARSER_MISC: 11127 xmlGenericError(xmlGenericErrorContext, 11128 "PP: try MISC\n");break; 11129 case XML_PARSER_COMMENT: 11130 xmlGenericError(xmlGenericErrorContext, 11131 "PP: try COMMENT\n");break; 11132 case XML_PARSER_PROLOG: 11133 xmlGenericError(xmlGenericErrorContext, 11134 "PP: try PROLOG\n");break; 11135 case XML_PARSER_START_TAG: 11136 xmlGenericError(xmlGenericErrorContext, 11137 "PP: try START_TAG\n");break; 11138 case XML_PARSER_CONTENT: 11139 xmlGenericError(xmlGenericErrorContext, 11140 "PP: try CONTENT\n");break; 11141 case XML_PARSER_CDATA_SECTION: 11142 xmlGenericError(xmlGenericErrorContext, 11143 "PP: try CDATA_SECTION\n");break; 11144 case XML_PARSER_END_TAG: 11145 xmlGenericError(xmlGenericErrorContext, 11146 "PP: try END_TAG\n");break; 11147 case XML_PARSER_ENTITY_DECL: 11148 xmlGenericError(xmlGenericErrorContext, 11149 "PP: try ENTITY_DECL\n");break; 11150 case XML_PARSER_ENTITY_VALUE: 11151 xmlGenericError(xmlGenericErrorContext, 11152 "PP: try ENTITY_VALUE\n");break; 11153 case XML_PARSER_ATTRIBUTE_VALUE: 11154 xmlGenericError(xmlGenericErrorContext, 11155 "PP: try ATTRIBUTE_VALUE\n");break; 11156 case XML_PARSER_DTD: 11157 xmlGenericError(xmlGenericErrorContext, 11158 "PP: try DTD\n");break; 11159 case XML_PARSER_EPILOG: 11160 xmlGenericError(xmlGenericErrorContext, 11161 "PP: try EPILOG\n");break; 11162 case XML_PARSER_PI: 11163 xmlGenericError(xmlGenericErrorContext, 11164 "PP: try PI\n");break; 11165 case XML_PARSER_IGNORE: 11166 xmlGenericError(xmlGenericErrorContext, 11167 "PP: try IGNORE\n");break; 11168 } 11169 #endif 11170 11171 if ((ctxt->input != NULL) && 11172 (ctxt->input->cur - ctxt->input->base > 4096)) { 11173 xmlSHRINK(ctxt); 11174 ctxt->checkIndex = 0; 11175 } 11176 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11177 11178 while (ctxt->instate != XML_PARSER_EOF) { 11179 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11180 return(0); 11181 11182 11183 /* 11184 * Pop-up of finished entities. 11185 */ 11186 while ((RAW == 0) && (ctxt->inputNr > 1)) 11187 xmlPopInput(ctxt); 11188 11189 if (ctxt->input == NULL) break; 11190 if (ctxt->input->buf == NULL) 11191 avail = ctxt->input->length - 11192 (ctxt->input->cur - ctxt->input->base); 11193 else { 11194 /* 11195 * If we are operating on converted input, try to flush 11196 * remainng chars to avoid them stalling in the non-converted 11197 * buffer. But do not do this in document start where 11198 * encoding="..." may not have been read and we work on a 11199 * guessed encoding. 11200 */ 11201 if ((ctxt->instate != XML_PARSER_START) && 11202 (ctxt->input->buf->raw != NULL) && 11203 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11204 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11205 ctxt->input); 11206 size_t current = ctxt->input->cur - ctxt->input->base; 11207 11208 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11209 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11210 base, current); 11211 } 11212 avail = xmlBufUse(ctxt->input->buf->buffer) - 11213 (ctxt->input->cur - ctxt->input->base); 11214 } 11215 if (avail < 1) 11216 goto done; 11217 switch (ctxt->instate) { 11218 case XML_PARSER_EOF: 11219 /* 11220 * Document parsing is done ! 11221 */ 11222 goto done; 11223 case XML_PARSER_START: 11224 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11225 xmlChar start[4]; 11226 xmlCharEncoding enc; 11227 11228 /* 11229 * Very first chars read from the document flow. 11230 */ 11231 if (avail < 4) 11232 goto done; 11233 11234 /* 11235 * Get the 4 first bytes and decode the charset 11236 * if enc != XML_CHAR_ENCODING_NONE 11237 * plug some encoding conversion routines, 11238 * else xmlSwitchEncoding will set to (default) 11239 * UTF8. 11240 */ 11241 start[0] = RAW; 11242 start[1] = NXT(1); 11243 start[2] = NXT(2); 11244 start[3] = NXT(3); 11245 enc = xmlDetectCharEncoding(start, 4); 11246 xmlSwitchEncoding(ctxt, enc); 11247 break; 11248 } 11249 11250 if (avail < 2) 11251 goto done; 11252 cur = ctxt->input->cur[0]; 11253 next = ctxt->input->cur[1]; 11254 if (cur == 0) { 11255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11256 ctxt->sax->setDocumentLocator(ctxt->userData, 11257 &xmlDefaultSAXLocator); 11258 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11259 ctxt->instate = XML_PARSER_EOF; 11260 #ifdef DEBUG_PUSH 11261 xmlGenericError(xmlGenericErrorContext, 11262 "PP: entering EOF\n"); 11263 #endif 11264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11265 ctxt->sax->endDocument(ctxt->userData); 11266 goto done; 11267 } 11268 if ((cur == '<') && (next == '?')) { 11269 /* PI or XML decl */ 11270 if (avail < 5) return(ret); 11271 if ((!terminate) && 11272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11273 return(ret); 11274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11275 ctxt->sax->setDocumentLocator(ctxt->userData, 11276 &xmlDefaultSAXLocator); 11277 if ((ctxt->input->cur[2] == 'x') && 11278 (ctxt->input->cur[3] == 'm') && 11279 (ctxt->input->cur[4] == 'l') && 11280 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11281 ret += 5; 11282 #ifdef DEBUG_PUSH 11283 xmlGenericError(xmlGenericErrorContext, 11284 "PP: Parsing XML Decl\n"); 11285 #endif 11286 xmlParseXMLDecl(ctxt); 11287 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11288 /* 11289 * The XML REC instructs us to stop parsing right 11290 * here 11291 */ 11292 ctxt->instate = XML_PARSER_EOF; 11293 return(0); 11294 } 11295 ctxt->standalone = ctxt->input->standalone; 11296 if ((ctxt->encoding == NULL) && 11297 (ctxt->input->encoding != NULL)) 11298 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11299 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11300 (!ctxt->disableSAX)) 11301 ctxt->sax->startDocument(ctxt->userData); 11302 ctxt->instate = XML_PARSER_MISC; 11303 #ifdef DEBUG_PUSH 11304 xmlGenericError(xmlGenericErrorContext, 11305 "PP: entering MISC\n"); 11306 #endif 11307 } else { 11308 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11309 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11310 (!ctxt->disableSAX)) 11311 ctxt->sax->startDocument(ctxt->userData); 11312 ctxt->instate = XML_PARSER_MISC; 11313 #ifdef DEBUG_PUSH 11314 xmlGenericError(xmlGenericErrorContext, 11315 "PP: entering MISC\n"); 11316 #endif 11317 } 11318 } else { 11319 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11320 ctxt->sax->setDocumentLocator(ctxt->userData, 11321 &xmlDefaultSAXLocator); 11322 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11323 if (ctxt->version == NULL) { 11324 xmlErrMemory(ctxt, NULL); 11325 break; 11326 } 11327 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11328 (!ctxt->disableSAX)) 11329 ctxt->sax->startDocument(ctxt->userData); 11330 ctxt->instate = XML_PARSER_MISC; 11331 #ifdef DEBUG_PUSH 11332 xmlGenericError(xmlGenericErrorContext, 11333 "PP: entering MISC\n"); 11334 #endif 11335 } 11336 break; 11337 case XML_PARSER_START_TAG: { 11338 const xmlChar *name; 11339 const xmlChar *prefix = NULL; 11340 const xmlChar *URI = NULL; 11341 int nsNr = ctxt->nsNr; 11342 11343 if ((avail < 2) && (ctxt->inputNr == 1)) 11344 goto done; 11345 cur = ctxt->input->cur[0]; 11346 if (cur != '<') { 11347 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11348 ctxt->instate = XML_PARSER_EOF; 11349 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11350 ctxt->sax->endDocument(ctxt->userData); 11351 goto done; 11352 } 11353 if (!terminate) { 11354 if (ctxt->progressive) { 11355 /* > can be found unescaped in attribute values */ 11356 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11357 goto done; 11358 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11359 goto done; 11360 } 11361 } 11362 if (ctxt->spaceNr == 0) 11363 spacePush(ctxt, -1); 11364 else if (*ctxt->space == -2) 11365 spacePush(ctxt, -1); 11366 else 11367 spacePush(ctxt, *ctxt->space); 11368 #ifdef LIBXML_SAX1_ENABLED 11369 if (ctxt->sax2) 11370 #endif /* LIBXML_SAX1_ENABLED */ 11371 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11372 #ifdef LIBXML_SAX1_ENABLED 11373 else 11374 name = xmlParseStartTag(ctxt); 11375 #endif /* LIBXML_SAX1_ENABLED */ 11376 if (ctxt->instate == XML_PARSER_EOF) 11377 goto done; 11378 if (name == NULL) { 11379 spacePop(ctxt); 11380 ctxt->instate = XML_PARSER_EOF; 11381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11382 ctxt->sax->endDocument(ctxt->userData); 11383 goto done; 11384 } 11385 #ifdef LIBXML_VALID_ENABLED 11386 /* 11387 * [ VC: Root Element Type ] 11388 * The Name in the document type declaration must match 11389 * the element type of the root element. 11390 */ 11391 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11392 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11393 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11394 #endif /* LIBXML_VALID_ENABLED */ 11395 11396 /* 11397 * Check for an Empty Element. 11398 */ 11399 if ((RAW == '/') && (NXT(1) == '>')) { 11400 SKIP(2); 11401 11402 if (ctxt->sax2) { 11403 if ((ctxt->sax != NULL) && 11404 (ctxt->sax->endElementNs != NULL) && 11405 (!ctxt->disableSAX)) 11406 ctxt->sax->endElementNs(ctxt->userData, name, 11407 prefix, URI); 11408 if (ctxt->nsNr - nsNr > 0) 11409 nsPop(ctxt, ctxt->nsNr - nsNr); 11410 #ifdef LIBXML_SAX1_ENABLED 11411 } else { 11412 if ((ctxt->sax != NULL) && 11413 (ctxt->sax->endElement != NULL) && 11414 (!ctxt->disableSAX)) 11415 ctxt->sax->endElement(ctxt->userData, name); 11416 #endif /* LIBXML_SAX1_ENABLED */ 11417 } 11418 if (ctxt->instate == XML_PARSER_EOF) 11419 goto done; 11420 spacePop(ctxt); 11421 if (ctxt->nameNr == 0) { 11422 ctxt->instate = XML_PARSER_EPILOG; 11423 } else { 11424 ctxt->instate = XML_PARSER_CONTENT; 11425 } 11426 ctxt->progressive = 1; 11427 break; 11428 } 11429 if (RAW == '>') { 11430 NEXT; 11431 } else { 11432 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11433 "Couldn't find end of Start Tag %s\n", 11434 name); 11435 nodePop(ctxt); 11436 spacePop(ctxt); 11437 } 11438 if (ctxt->sax2) 11439 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11440 #ifdef LIBXML_SAX1_ENABLED 11441 else 11442 namePush(ctxt, name); 11443 #endif /* LIBXML_SAX1_ENABLED */ 11444 11445 ctxt->instate = XML_PARSER_CONTENT; 11446 ctxt->progressive = 1; 11447 break; 11448 } 11449 case XML_PARSER_CONTENT: { 11450 const xmlChar *test; 11451 unsigned int cons; 11452 if ((avail < 2) && (ctxt->inputNr == 1)) 11453 goto done; 11454 cur = ctxt->input->cur[0]; 11455 next = ctxt->input->cur[1]; 11456 11457 test = CUR_PTR; 11458 cons = ctxt->input->consumed; 11459 if ((cur == '<') && (next == '/')) { 11460 ctxt->instate = XML_PARSER_END_TAG; 11461 break; 11462 } else if ((cur == '<') && (next == '?')) { 11463 if ((!terminate) && 11464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11465 ctxt->progressive = XML_PARSER_PI; 11466 goto done; 11467 } 11468 xmlParsePI(ctxt); 11469 ctxt->instate = XML_PARSER_CONTENT; 11470 ctxt->progressive = 1; 11471 } else if ((cur == '<') && (next != '!')) { 11472 ctxt->instate = XML_PARSER_START_TAG; 11473 break; 11474 } else if ((cur == '<') && (next == '!') && 11475 (ctxt->input->cur[2] == '-') && 11476 (ctxt->input->cur[3] == '-')) { 11477 int term; 11478 11479 if (avail < 4) 11480 goto done; 11481 ctxt->input->cur += 4; 11482 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11483 ctxt->input->cur -= 4; 11484 if ((!terminate) && (term < 0)) { 11485 ctxt->progressive = XML_PARSER_COMMENT; 11486 goto done; 11487 } 11488 xmlParseComment(ctxt); 11489 ctxt->instate = XML_PARSER_CONTENT; 11490 ctxt->progressive = 1; 11491 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11492 (ctxt->input->cur[2] == '[') && 11493 (ctxt->input->cur[3] == 'C') && 11494 (ctxt->input->cur[4] == 'D') && 11495 (ctxt->input->cur[5] == 'A') && 11496 (ctxt->input->cur[6] == 'T') && 11497 (ctxt->input->cur[7] == 'A') && 11498 (ctxt->input->cur[8] == '[')) { 11499 SKIP(9); 11500 ctxt->instate = XML_PARSER_CDATA_SECTION; 11501 break; 11502 } else if ((cur == '<') && (next == '!') && 11503 (avail < 9)) { 11504 goto done; 11505 } else if (cur == '&') { 11506 if ((!terminate) && 11507 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11508 goto done; 11509 xmlParseReference(ctxt); 11510 } else { 11511 /* TODO Avoid the extra copy, handle directly !!! */ 11512 /* 11513 * Goal of the following test is: 11514 * - minimize calls to the SAX 'character' callback 11515 * when they are mergeable 11516 * - handle an problem for isBlank when we only parse 11517 * a sequence of blank chars and the next one is 11518 * not available to check against '<' presence. 11519 * - tries to homogenize the differences in SAX 11520 * callbacks between the push and pull versions 11521 * of the parser. 11522 */ 11523 if ((ctxt->inputNr == 1) && 11524 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11525 if (!terminate) { 11526 if (ctxt->progressive) { 11527 if ((lastlt == NULL) || 11528 (ctxt->input->cur > lastlt)) 11529 goto done; 11530 } else if (xmlParseLookupSequence(ctxt, 11531 '<', 0, 0) < 0) { 11532 goto done; 11533 } 11534 } 11535 } 11536 ctxt->checkIndex = 0; 11537 xmlParseCharData(ctxt, 0); 11538 } 11539 /* 11540 * Pop-up of finished entities. 11541 */ 11542 while ((RAW == 0) && (ctxt->inputNr > 1)) 11543 xmlPopInput(ctxt); 11544 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11545 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11546 "detected an error in element content\n"); 11547 ctxt->instate = XML_PARSER_EOF; 11548 break; 11549 } 11550 break; 11551 } 11552 case XML_PARSER_END_TAG: 11553 if (avail < 2) 11554 goto done; 11555 if (!terminate) { 11556 if (ctxt->progressive) { 11557 /* > can be found unescaped in attribute values */ 11558 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11559 goto done; 11560 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11561 goto done; 11562 } 11563 } 11564 if (ctxt->sax2) { 11565 xmlParseEndTag2(ctxt, 11566 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11567 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11568 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11569 nameNsPop(ctxt); 11570 } 11571 #ifdef LIBXML_SAX1_ENABLED 11572 else 11573 xmlParseEndTag1(ctxt, 0); 11574 #endif /* LIBXML_SAX1_ENABLED */ 11575 if (ctxt->instate == XML_PARSER_EOF) { 11576 /* Nothing */ 11577 } else if (ctxt->nameNr == 0) { 11578 ctxt->instate = XML_PARSER_EPILOG; 11579 } else { 11580 ctxt->instate = XML_PARSER_CONTENT; 11581 } 11582 break; 11583 case XML_PARSER_CDATA_SECTION: { 11584 /* 11585 * The Push mode need to have the SAX callback for 11586 * cdataBlock merge back contiguous callbacks. 11587 */ 11588 int base; 11589 11590 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11591 if (base < 0) { 11592 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11593 int tmp; 11594 11595 tmp = xmlCheckCdataPush(ctxt->input->cur, 11596 XML_PARSER_BIG_BUFFER_SIZE); 11597 if (tmp < 0) { 11598 tmp = -tmp; 11599 ctxt->input->cur += tmp; 11600 goto encoding_error; 11601 } 11602 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11603 if (ctxt->sax->cdataBlock != NULL) 11604 ctxt->sax->cdataBlock(ctxt->userData, 11605 ctxt->input->cur, tmp); 11606 else if (ctxt->sax->characters != NULL) 11607 ctxt->sax->characters(ctxt->userData, 11608 ctxt->input->cur, tmp); 11609 } 11610 if (ctxt->instate == XML_PARSER_EOF) 11611 goto done; 11612 SKIPL(tmp); 11613 ctxt->checkIndex = 0; 11614 } 11615 goto done; 11616 } else { 11617 int tmp; 11618 11619 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11620 if ((tmp < 0) || (tmp != base)) { 11621 tmp = -tmp; 11622 ctxt->input->cur += tmp; 11623 goto encoding_error; 11624 } 11625 if ((ctxt->sax != NULL) && (base == 0) && 11626 (ctxt->sax->cdataBlock != NULL) && 11627 (!ctxt->disableSAX)) { 11628 /* 11629 * Special case to provide identical behaviour 11630 * between pull and push parsers on enpty CDATA 11631 * sections 11632 */ 11633 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11634 (!strncmp((const char *)&ctxt->input->cur[-9], 11635 "<![CDATA[", 9))) 11636 ctxt->sax->cdataBlock(ctxt->userData, 11637 BAD_CAST "", 0); 11638 } else if ((ctxt->sax != NULL) && (base > 0) && 11639 (!ctxt->disableSAX)) { 11640 if (ctxt->sax->cdataBlock != NULL) 11641 ctxt->sax->cdataBlock(ctxt->userData, 11642 ctxt->input->cur, base); 11643 else if (ctxt->sax->characters != NULL) 11644 ctxt->sax->characters(ctxt->userData, 11645 ctxt->input->cur, base); 11646 } 11647 if (ctxt->instate == XML_PARSER_EOF) 11648 goto done; 11649 SKIPL(base + 3); 11650 ctxt->checkIndex = 0; 11651 ctxt->instate = XML_PARSER_CONTENT; 11652 #ifdef DEBUG_PUSH 11653 xmlGenericError(xmlGenericErrorContext, 11654 "PP: entering CONTENT\n"); 11655 #endif 11656 } 11657 break; 11658 } 11659 case XML_PARSER_MISC: 11660 SKIP_BLANKS; 11661 if (ctxt->input->buf == NULL) 11662 avail = ctxt->input->length - 11663 (ctxt->input->cur - ctxt->input->base); 11664 else 11665 avail = xmlBufUse(ctxt->input->buf->buffer) - 11666 (ctxt->input->cur - ctxt->input->base); 11667 if (avail < 2) 11668 goto done; 11669 cur = ctxt->input->cur[0]; 11670 next = ctxt->input->cur[1]; 11671 if ((cur == '<') && (next == '?')) { 11672 if ((!terminate) && 11673 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11674 ctxt->progressive = XML_PARSER_PI; 11675 goto done; 11676 } 11677 #ifdef DEBUG_PUSH 11678 xmlGenericError(xmlGenericErrorContext, 11679 "PP: Parsing PI\n"); 11680 #endif 11681 xmlParsePI(ctxt); 11682 if (ctxt->instate == XML_PARSER_EOF) 11683 goto done; 11684 ctxt->instate = XML_PARSER_MISC; 11685 ctxt->progressive = 1; 11686 ctxt->checkIndex = 0; 11687 } else if ((cur == '<') && (next == '!') && 11688 (ctxt->input->cur[2] == '-') && 11689 (ctxt->input->cur[3] == '-')) { 11690 if ((!terminate) && 11691 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11692 ctxt->progressive = XML_PARSER_COMMENT; 11693 goto done; 11694 } 11695 #ifdef DEBUG_PUSH 11696 xmlGenericError(xmlGenericErrorContext, 11697 "PP: Parsing Comment\n"); 11698 #endif 11699 xmlParseComment(ctxt); 11700 if (ctxt->instate == XML_PARSER_EOF) 11701 goto done; 11702 ctxt->instate = XML_PARSER_MISC; 11703 ctxt->progressive = 1; 11704 ctxt->checkIndex = 0; 11705 } else if ((cur == '<') && (next == '!') && 11706 (ctxt->input->cur[2] == 'D') && 11707 (ctxt->input->cur[3] == 'O') && 11708 (ctxt->input->cur[4] == 'C') && 11709 (ctxt->input->cur[5] == 'T') && 11710 (ctxt->input->cur[6] == 'Y') && 11711 (ctxt->input->cur[7] == 'P') && 11712 (ctxt->input->cur[8] == 'E')) { 11713 if ((!terminate) && 11714 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11715 ctxt->progressive = XML_PARSER_DTD; 11716 goto done; 11717 } 11718 #ifdef DEBUG_PUSH 11719 xmlGenericError(xmlGenericErrorContext, 11720 "PP: Parsing internal subset\n"); 11721 #endif 11722 ctxt->inSubset = 1; 11723 ctxt->progressive = 0; 11724 ctxt->checkIndex = 0; 11725 xmlParseDocTypeDecl(ctxt); 11726 if (ctxt->instate == XML_PARSER_EOF) 11727 goto done; 11728 if (RAW == '[') { 11729 ctxt->instate = XML_PARSER_DTD; 11730 #ifdef DEBUG_PUSH 11731 xmlGenericError(xmlGenericErrorContext, 11732 "PP: entering DTD\n"); 11733 #endif 11734 } else { 11735 /* 11736 * Create and update the external subset. 11737 */ 11738 ctxt->inSubset = 2; 11739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11740 (ctxt->sax->externalSubset != NULL)) 11741 ctxt->sax->externalSubset(ctxt->userData, 11742 ctxt->intSubName, ctxt->extSubSystem, 11743 ctxt->extSubURI); 11744 ctxt->inSubset = 0; 11745 xmlCleanSpecialAttr(ctxt); 11746 ctxt->instate = XML_PARSER_PROLOG; 11747 #ifdef DEBUG_PUSH 11748 xmlGenericError(xmlGenericErrorContext, 11749 "PP: entering PROLOG\n"); 11750 #endif 11751 } 11752 } else if ((cur == '<') && (next == '!') && 11753 (avail < 9)) { 11754 goto done; 11755 } else { 11756 ctxt->instate = XML_PARSER_START_TAG; 11757 ctxt->progressive = XML_PARSER_START_TAG; 11758 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11759 #ifdef DEBUG_PUSH 11760 xmlGenericError(xmlGenericErrorContext, 11761 "PP: entering START_TAG\n"); 11762 #endif 11763 } 11764 break; 11765 case XML_PARSER_PROLOG: 11766 SKIP_BLANKS; 11767 if (ctxt->input->buf == NULL) 11768 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11769 else 11770 avail = xmlBufUse(ctxt->input->buf->buffer) - 11771 (ctxt->input->cur - ctxt->input->base); 11772 if (avail < 2) 11773 goto done; 11774 cur = ctxt->input->cur[0]; 11775 next = ctxt->input->cur[1]; 11776 if ((cur == '<') && (next == '?')) { 11777 if ((!terminate) && 11778 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11779 ctxt->progressive = XML_PARSER_PI; 11780 goto done; 11781 } 11782 #ifdef DEBUG_PUSH 11783 xmlGenericError(xmlGenericErrorContext, 11784 "PP: Parsing PI\n"); 11785 #endif 11786 xmlParsePI(ctxt); 11787 if (ctxt->instate == XML_PARSER_EOF) 11788 goto done; 11789 ctxt->instate = XML_PARSER_PROLOG; 11790 ctxt->progressive = 1; 11791 } else if ((cur == '<') && (next == '!') && 11792 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11793 if ((!terminate) && 11794 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11795 ctxt->progressive = XML_PARSER_COMMENT; 11796 goto done; 11797 } 11798 #ifdef DEBUG_PUSH 11799 xmlGenericError(xmlGenericErrorContext, 11800 "PP: Parsing Comment\n"); 11801 #endif 11802 xmlParseComment(ctxt); 11803 if (ctxt->instate == XML_PARSER_EOF) 11804 goto done; 11805 ctxt->instate = XML_PARSER_PROLOG; 11806 ctxt->progressive = 1; 11807 } else if ((cur == '<') && (next == '!') && 11808 (avail < 4)) { 11809 goto done; 11810 } else { 11811 ctxt->instate = XML_PARSER_START_TAG; 11812 if (ctxt->progressive == 0) 11813 ctxt->progressive = XML_PARSER_START_TAG; 11814 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11815 #ifdef DEBUG_PUSH 11816 xmlGenericError(xmlGenericErrorContext, 11817 "PP: entering START_TAG\n"); 11818 #endif 11819 } 11820 break; 11821 case XML_PARSER_EPILOG: 11822 SKIP_BLANKS; 11823 if (ctxt->input->buf == NULL) 11824 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11825 else 11826 avail = xmlBufUse(ctxt->input->buf->buffer) - 11827 (ctxt->input->cur - ctxt->input->base); 11828 if (avail < 2) 11829 goto done; 11830 cur = ctxt->input->cur[0]; 11831 next = ctxt->input->cur[1]; 11832 if ((cur == '<') && (next == '?')) { 11833 if ((!terminate) && 11834 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11835 ctxt->progressive = XML_PARSER_PI; 11836 goto done; 11837 } 11838 #ifdef DEBUG_PUSH 11839 xmlGenericError(xmlGenericErrorContext, 11840 "PP: Parsing PI\n"); 11841 #endif 11842 xmlParsePI(ctxt); 11843 if (ctxt->instate == XML_PARSER_EOF) 11844 goto done; 11845 ctxt->instate = XML_PARSER_EPILOG; 11846 ctxt->progressive = 1; 11847 } else if ((cur == '<') && (next == '!') && 11848 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11849 if ((!terminate) && 11850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11851 ctxt->progressive = XML_PARSER_COMMENT; 11852 goto done; 11853 } 11854 #ifdef DEBUG_PUSH 11855 xmlGenericError(xmlGenericErrorContext, 11856 "PP: Parsing Comment\n"); 11857 #endif 11858 xmlParseComment(ctxt); 11859 if (ctxt->instate == XML_PARSER_EOF) 11860 goto done; 11861 ctxt->instate = XML_PARSER_EPILOG; 11862 ctxt->progressive = 1; 11863 } else if ((cur == '<') && (next == '!') && 11864 (avail < 4)) { 11865 goto done; 11866 } else { 11867 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11868 ctxt->instate = XML_PARSER_EOF; 11869 #ifdef DEBUG_PUSH 11870 xmlGenericError(xmlGenericErrorContext, 11871 "PP: entering EOF\n"); 11872 #endif 11873 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11874 ctxt->sax->endDocument(ctxt->userData); 11875 goto done; 11876 } 11877 break; 11878 case XML_PARSER_DTD: { 11879 /* 11880 * Sorry but progressive parsing of the internal subset 11881 * is not expected to be supported. We first check that 11882 * the full content of the internal subset is available and 11883 * the parsing is launched only at that point. 11884 * Internal subset ends up with "']' S? '>'" in an unescaped 11885 * section and not in a ']]>' sequence which are conditional 11886 * sections (whoever argued to keep that crap in XML deserve 11887 * a place in hell !). 11888 */ 11889 int base, i; 11890 xmlChar *buf; 11891 xmlChar quote = 0; 11892 size_t use; 11893 11894 base = ctxt->input->cur - ctxt->input->base; 11895 if (base < 0) return(0); 11896 if (ctxt->checkIndex > base) 11897 base = ctxt->checkIndex; 11898 buf = xmlBufContent(ctxt->input->buf->buffer); 11899 use = xmlBufUse(ctxt->input->buf->buffer); 11900 for (;(unsigned int) base < use; base++) { 11901 if (quote != 0) { 11902 if (buf[base] == quote) 11903 quote = 0; 11904 continue; 11905 } 11906 if ((quote == 0) && (buf[base] == '<')) { 11907 int found = 0; 11908 /* special handling of comments */ 11909 if (((unsigned int) base + 4 < use) && 11910 (buf[base + 1] == '!') && 11911 (buf[base + 2] == '-') && 11912 (buf[base + 3] == '-')) { 11913 for (;(unsigned int) base + 3 < use; base++) { 11914 if ((buf[base] == '-') && 11915 (buf[base + 1] == '-') && 11916 (buf[base + 2] == '>')) { 11917 found = 1; 11918 base += 2; 11919 break; 11920 } 11921 } 11922 if (!found) { 11923 #if 0 11924 fprintf(stderr, "unfinished comment\n"); 11925 #endif 11926 break; /* for */ 11927 } 11928 continue; 11929 } 11930 } 11931 if (buf[base] == '"') { 11932 quote = '"'; 11933 continue; 11934 } 11935 if (buf[base] == '\'') { 11936 quote = '\''; 11937 continue; 11938 } 11939 if (buf[base] == ']') { 11940 #if 0 11941 fprintf(stderr, "%c%c%c%c: ", buf[base], 11942 buf[base + 1], buf[base + 2], buf[base + 3]); 11943 #endif 11944 if ((unsigned int) base +1 >= use) 11945 break; 11946 if (buf[base + 1] == ']') { 11947 /* conditional crap, skip both ']' ! */ 11948 base++; 11949 continue; 11950 } 11951 for (i = 1; (unsigned int) base + i < use; i++) { 11952 if (buf[base + i] == '>') { 11953 #if 0 11954 fprintf(stderr, "found\n"); 11955 #endif 11956 goto found_end_int_subset; 11957 } 11958 if (!IS_BLANK_CH(buf[base + i])) { 11959 #if 0 11960 fprintf(stderr, "not found\n"); 11961 #endif 11962 goto not_end_of_int_subset; 11963 } 11964 } 11965 #if 0 11966 fprintf(stderr, "end of stream\n"); 11967 #endif 11968 break; 11969 11970 } 11971 not_end_of_int_subset: 11972 continue; /* for */ 11973 } 11974 /* 11975 * We didn't found the end of the Internal subset 11976 */ 11977 if (quote == 0) 11978 ctxt->checkIndex = base; 11979 else 11980 ctxt->checkIndex = 0; 11981 #ifdef DEBUG_PUSH 11982 if (next == 0) 11983 xmlGenericError(xmlGenericErrorContext, 11984 "PP: lookup of int subset end filed\n"); 11985 #endif 11986 goto done; 11987 11988 found_end_int_subset: 11989 ctxt->checkIndex = 0; 11990 xmlParseInternalSubset(ctxt); 11991 if (ctxt->instate == XML_PARSER_EOF) 11992 goto done; 11993 ctxt->inSubset = 2; 11994 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11995 (ctxt->sax->externalSubset != NULL)) 11996 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11997 ctxt->extSubSystem, ctxt->extSubURI); 11998 ctxt->inSubset = 0; 11999 xmlCleanSpecialAttr(ctxt); 12000 if (ctxt->instate == XML_PARSER_EOF) 12001 goto done; 12002 ctxt->instate = XML_PARSER_PROLOG; 12003 ctxt->checkIndex = 0; 12004 #ifdef DEBUG_PUSH 12005 xmlGenericError(xmlGenericErrorContext, 12006 "PP: entering PROLOG\n"); 12007 #endif 12008 break; 12009 } 12010 case XML_PARSER_COMMENT: 12011 xmlGenericError(xmlGenericErrorContext, 12012 "PP: internal error, state == COMMENT\n"); 12013 ctxt->instate = XML_PARSER_CONTENT; 12014 #ifdef DEBUG_PUSH 12015 xmlGenericError(xmlGenericErrorContext, 12016 "PP: entering CONTENT\n"); 12017 #endif 12018 break; 12019 case XML_PARSER_IGNORE: 12020 xmlGenericError(xmlGenericErrorContext, 12021 "PP: internal error, state == IGNORE"); 12022 ctxt->instate = XML_PARSER_DTD; 12023 #ifdef DEBUG_PUSH 12024 xmlGenericError(xmlGenericErrorContext, 12025 "PP: entering DTD\n"); 12026 #endif 12027 break; 12028 case XML_PARSER_PI: 12029 xmlGenericError(xmlGenericErrorContext, 12030 "PP: internal error, state == PI\n"); 12031 ctxt->instate = XML_PARSER_CONTENT; 12032 #ifdef DEBUG_PUSH 12033 xmlGenericError(xmlGenericErrorContext, 12034 "PP: entering CONTENT\n"); 12035 #endif 12036 break; 12037 case XML_PARSER_ENTITY_DECL: 12038 xmlGenericError(xmlGenericErrorContext, 12039 "PP: internal error, state == ENTITY_DECL\n"); 12040 ctxt->instate = XML_PARSER_DTD; 12041 #ifdef DEBUG_PUSH 12042 xmlGenericError(xmlGenericErrorContext, 12043 "PP: entering DTD\n"); 12044 #endif 12045 break; 12046 case XML_PARSER_ENTITY_VALUE: 12047 xmlGenericError(xmlGenericErrorContext, 12048 "PP: internal error, state == ENTITY_VALUE\n"); 12049 ctxt->instate = XML_PARSER_CONTENT; 12050 #ifdef DEBUG_PUSH 12051 xmlGenericError(xmlGenericErrorContext, 12052 "PP: entering DTD\n"); 12053 #endif 12054 break; 12055 case XML_PARSER_ATTRIBUTE_VALUE: 12056 xmlGenericError(xmlGenericErrorContext, 12057 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12058 ctxt->instate = XML_PARSER_START_TAG; 12059 #ifdef DEBUG_PUSH 12060 xmlGenericError(xmlGenericErrorContext, 12061 "PP: entering START_TAG\n"); 12062 #endif 12063 break; 12064 case XML_PARSER_SYSTEM_LITERAL: 12065 xmlGenericError(xmlGenericErrorContext, 12066 "PP: internal error, state == SYSTEM_LITERAL\n"); 12067 ctxt->instate = XML_PARSER_START_TAG; 12068 #ifdef DEBUG_PUSH 12069 xmlGenericError(xmlGenericErrorContext, 12070 "PP: entering START_TAG\n"); 12071 #endif 12072 break; 12073 case XML_PARSER_PUBLIC_LITERAL: 12074 xmlGenericError(xmlGenericErrorContext, 12075 "PP: internal error, state == PUBLIC_LITERAL\n"); 12076 ctxt->instate = XML_PARSER_START_TAG; 12077 #ifdef DEBUG_PUSH 12078 xmlGenericError(xmlGenericErrorContext, 12079 "PP: entering START_TAG\n"); 12080 #endif 12081 break; 12082 } 12083 } 12084 done: 12085 #ifdef DEBUG_PUSH 12086 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12087 #endif 12088 return(ret); 12089 encoding_error: 12090 { 12091 char buffer[150]; 12092 12093 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12094 ctxt->input->cur[0], ctxt->input->cur[1], 12095 ctxt->input->cur[2], ctxt->input->cur[3]); 12096 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12097 "Input is not proper UTF-8, indicate encoding !\n%s", 12098 BAD_CAST buffer, NULL); 12099 } 12100 return(0); 12101 } 12102 12103 /** 12104 * xmlParseCheckTransition: 12105 * @ctxt: an XML parser context 12106 * @chunk: a char array 12107 * @size: the size in byte of the chunk 12108 * 12109 * Check depending on the current parser state if the chunk given must be 12110 * processed immediately or one need more data to advance on parsing. 12111 * 12112 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12113 */ 12114 static int 12115 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12116 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12117 return(-1); 12118 if (ctxt->instate == XML_PARSER_START_TAG) { 12119 if (memchr(chunk, '>', size) != NULL) 12120 return(1); 12121 return(0); 12122 } 12123 if (ctxt->progressive == XML_PARSER_COMMENT) { 12124 if (memchr(chunk, '>', size) != NULL) 12125 return(1); 12126 return(0); 12127 } 12128 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12129 if (memchr(chunk, '>', size) != NULL) 12130 return(1); 12131 return(0); 12132 } 12133 if (ctxt->progressive == XML_PARSER_PI) { 12134 if (memchr(chunk, '>', size) != NULL) 12135 return(1); 12136 return(0); 12137 } 12138 if (ctxt->instate == XML_PARSER_END_TAG) { 12139 if (memchr(chunk, '>', size) != NULL) 12140 return(1); 12141 return(0); 12142 } 12143 if ((ctxt->progressive == XML_PARSER_DTD) || 12144 (ctxt->instate == XML_PARSER_DTD)) { 12145 if (memchr(chunk, '>', size) != NULL) 12146 return(1); 12147 return(0); 12148 } 12149 return(1); 12150 } 12151 12152 /** 12153 * xmlParseChunk: 12154 * @ctxt: an XML parser context 12155 * @chunk: an char array 12156 * @size: the size in byte of the chunk 12157 * @terminate: last chunk indicator 12158 * 12159 * Parse a Chunk of memory 12160 * 12161 * Returns zero if no error, the xmlParserErrors otherwise. 12162 */ 12163 int 12164 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12165 int terminate) { 12166 int end_in_lf = 0; 12167 int remain = 0; 12168 size_t old_avail = 0; 12169 size_t avail = 0; 12170 12171 if (ctxt == NULL) 12172 return(XML_ERR_INTERNAL_ERROR); 12173 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12174 return(ctxt->errNo); 12175 if (ctxt->instate == XML_PARSER_EOF) 12176 return(-1); 12177 if (ctxt->instate == XML_PARSER_START) 12178 xmlDetectSAX2(ctxt); 12179 if ((size > 0) && (chunk != NULL) && (!terminate) && 12180 (chunk[size - 1] == '\r')) { 12181 end_in_lf = 1; 12182 size--; 12183 } 12184 12185 xmldecl_done: 12186 12187 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12188 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12189 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12190 size_t cur = ctxt->input->cur - ctxt->input->base; 12191 int res; 12192 12193 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12194 /* 12195 * Specific handling if we autodetected an encoding, we should not 12196 * push more than the first line ... which depend on the encoding 12197 * And only push the rest once the final encoding was detected 12198 */ 12199 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12200 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12201 unsigned int len = 45; 12202 12203 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12204 BAD_CAST "UTF-16")) || 12205 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12206 BAD_CAST "UTF16"))) 12207 len = 90; 12208 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12209 BAD_CAST "UCS-4")) || 12210 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12211 BAD_CAST "UCS4"))) 12212 len = 180; 12213 12214 if (ctxt->input->buf->rawconsumed < len) 12215 len -= ctxt->input->buf->rawconsumed; 12216 12217 /* 12218 * Change size for reading the initial declaration only 12219 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12220 * will blindly copy extra bytes from memory. 12221 */ 12222 if ((unsigned int) size > len) { 12223 remain = size - len; 12224 size = len; 12225 } else { 12226 remain = 0; 12227 } 12228 } 12229 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12230 if (res < 0) { 12231 ctxt->errNo = XML_PARSER_EOF; 12232 ctxt->disableSAX = 1; 12233 return (XML_PARSER_EOF); 12234 } 12235 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12236 #ifdef DEBUG_PUSH 12237 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12238 #endif 12239 12240 } else if (ctxt->instate != XML_PARSER_EOF) { 12241 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12242 xmlParserInputBufferPtr in = ctxt->input->buf; 12243 if ((in->encoder != NULL) && (in->buffer != NULL) && 12244 (in->raw != NULL)) { 12245 int nbchars; 12246 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12247 size_t current = ctxt->input->cur - ctxt->input->base; 12248 12249 nbchars = xmlCharEncInput(in, terminate); 12250 if (nbchars < 0) { 12251 /* TODO 2.6.0 */ 12252 xmlGenericError(xmlGenericErrorContext, 12253 "xmlParseChunk: encoder error\n"); 12254 return(XML_ERR_INVALID_ENCODING); 12255 } 12256 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12257 } 12258 } 12259 } 12260 if (remain != 0) { 12261 xmlParseTryOrFinish(ctxt, 0); 12262 } else { 12263 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12264 avail = xmlBufUse(ctxt->input->buf->buffer); 12265 /* 12266 * Depending on the current state it may not be such 12267 * a good idea to try parsing if there is nothing in the chunk 12268 * which would be worth doing a parser state transition and we 12269 * need to wait for more data 12270 */ 12271 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12272 (old_avail == 0) || (avail == 0) || 12273 (xmlParseCheckTransition(ctxt, 12274 (const char *)&ctxt->input->base[old_avail], 12275 avail - old_avail))) 12276 xmlParseTryOrFinish(ctxt, terminate); 12277 } 12278 if (ctxt->instate == XML_PARSER_EOF) 12279 return(ctxt->errNo); 12280 12281 if ((ctxt->input != NULL) && 12282 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12283 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12284 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12285 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12286 ctxt->instate = XML_PARSER_EOF; 12287 } 12288 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12289 return(ctxt->errNo); 12290 12291 if (remain != 0) { 12292 chunk += size; 12293 size = remain; 12294 remain = 0; 12295 goto xmldecl_done; 12296 } 12297 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12298 (ctxt->input->buf != NULL)) { 12299 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12300 ctxt->input); 12301 size_t current = ctxt->input->cur - ctxt->input->base; 12302 12303 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12304 12305 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12306 base, current); 12307 } 12308 if (terminate) { 12309 /* 12310 * Check for termination 12311 */ 12312 int cur_avail = 0; 12313 12314 if (ctxt->input != NULL) { 12315 if (ctxt->input->buf == NULL) 12316 cur_avail = ctxt->input->length - 12317 (ctxt->input->cur - ctxt->input->base); 12318 else 12319 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12320 (ctxt->input->cur - ctxt->input->base); 12321 } 12322 12323 if ((ctxt->instate != XML_PARSER_EOF) && 12324 (ctxt->instate != XML_PARSER_EPILOG)) { 12325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12326 } 12327 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12328 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12329 } 12330 if (ctxt->instate != XML_PARSER_EOF) { 12331 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12332 ctxt->sax->endDocument(ctxt->userData); 12333 } 12334 ctxt->instate = XML_PARSER_EOF; 12335 } 12336 if (ctxt->wellFormed == 0) 12337 return((xmlParserErrors) ctxt->errNo); 12338 else 12339 return(0); 12340 } 12341 12342 /************************************************************************ 12343 * * 12344 * I/O front end functions to the parser * 12345 * * 12346 ************************************************************************/ 12347 12348 /** 12349 * xmlCreatePushParserCtxt: 12350 * @sax: a SAX handler 12351 * @user_data: The user data returned on SAX callbacks 12352 * @chunk: a pointer to an array of chars 12353 * @size: number of chars in the array 12354 * @filename: an optional file name or URI 12355 * 12356 * Create a parser context for using the XML parser in push mode. 12357 * If @buffer and @size are non-NULL, the data is used to detect 12358 * the encoding. The remaining characters will be parsed so they 12359 * don't need to be fed in again through xmlParseChunk. 12360 * To allow content encoding detection, @size should be >= 4 12361 * The value of @filename is used for fetching external entities 12362 * and error/warning reports. 12363 * 12364 * Returns the new parser context or NULL 12365 */ 12366 12367 xmlParserCtxtPtr 12368 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12369 const char *chunk, int size, const char *filename) { 12370 xmlParserCtxtPtr ctxt; 12371 xmlParserInputPtr inputStream; 12372 xmlParserInputBufferPtr buf; 12373 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12374 12375 /* 12376 * plug some encoding conversion routines 12377 */ 12378 if ((chunk != NULL) && (size >= 4)) 12379 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12380 12381 buf = xmlAllocParserInputBuffer(enc); 12382 if (buf == NULL) return(NULL); 12383 12384 ctxt = xmlNewParserCtxt(); 12385 if (ctxt == NULL) { 12386 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12387 xmlFreeParserInputBuffer(buf); 12388 return(NULL); 12389 } 12390 ctxt->dictNames = 1; 12391 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12392 if (ctxt->pushTab == NULL) { 12393 xmlErrMemory(ctxt, NULL); 12394 xmlFreeParserInputBuffer(buf); 12395 xmlFreeParserCtxt(ctxt); 12396 return(NULL); 12397 } 12398 if (sax != NULL) { 12399 #ifdef LIBXML_SAX1_ENABLED 12400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12401 #endif /* LIBXML_SAX1_ENABLED */ 12402 xmlFree(ctxt->sax); 12403 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12404 if (ctxt->sax == NULL) { 12405 xmlErrMemory(ctxt, NULL); 12406 xmlFreeParserInputBuffer(buf); 12407 xmlFreeParserCtxt(ctxt); 12408 return(NULL); 12409 } 12410 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12411 if (sax->initialized == XML_SAX2_MAGIC) 12412 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12413 else 12414 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12415 if (user_data != NULL) 12416 ctxt->userData = user_data; 12417 } 12418 if (filename == NULL) { 12419 ctxt->directory = NULL; 12420 } else { 12421 ctxt->directory = xmlParserGetDirectory(filename); 12422 } 12423 12424 inputStream = xmlNewInputStream(ctxt); 12425 if (inputStream == NULL) { 12426 xmlFreeParserCtxt(ctxt); 12427 xmlFreeParserInputBuffer(buf); 12428 return(NULL); 12429 } 12430 12431 if (filename == NULL) 12432 inputStream->filename = NULL; 12433 else { 12434 inputStream->filename = (char *) 12435 xmlCanonicPath((const xmlChar *) filename); 12436 if (inputStream->filename == NULL) { 12437 xmlFreeParserCtxt(ctxt); 12438 xmlFreeParserInputBuffer(buf); 12439 return(NULL); 12440 } 12441 } 12442 inputStream->buf = buf; 12443 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12444 inputPush(ctxt, inputStream); 12445 12446 /* 12447 * If the caller didn't provide an initial 'chunk' for determining 12448 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12449 * that it can be automatically determined later 12450 */ 12451 if ((size == 0) || (chunk == NULL)) { 12452 ctxt->charset = XML_CHAR_ENCODING_NONE; 12453 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12454 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12455 size_t cur = ctxt->input->cur - ctxt->input->base; 12456 12457 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12458 12459 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12460 #ifdef DEBUG_PUSH 12461 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12462 #endif 12463 } 12464 12465 if (enc != XML_CHAR_ENCODING_NONE) { 12466 xmlSwitchEncoding(ctxt, enc); 12467 } 12468 12469 return(ctxt); 12470 } 12471 #endif /* LIBXML_PUSH_ENABLED */ 12472 12473 /** 12474 * xmlStopParser: 12475 * @ctxt: an XML parser context 12476 * 12477 * Blocks further parser processing 12478 */ 12479 void 12480 xmlStopParser(xmlParserCtxtPtr ctxt) { 12481 if (ctxt == NULL) 12482 return; 12483 ctxt->instate = XML_PARSER_EOF; 12484 ctxt->errNo = XML_ERR_USER_STOP; 12485 ctxt->disableSAX = 1; 12486 if (ctxt->input != NULL) { 12487 ctxt->input->cur = BAD_CAST""; 12488 ctxt->input->base = ctxt->input->cur; 12489 } 12490 } 12491 12492 /** 12493 * xmlCreateIOParserCtxt: 12494 * @sax: a SAX handler 12495 * @user_data: The user data returned on SAX callbacks 12496 * @ioread: an I/O read function 12497 * @ioclose: an I/O close function 12498 * @ioctx: an I/O handler 12499 * @enc: the charset encoding if known 12500 * 12501 * Create a parser context for using the XML parser with an existing 12502 * I/O stream 12503 * 12504 * Returns the new parser context or NULL 12505 */ 12506 xmlParserCtxtPtr 12507 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12508 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12509 void *ioctx, xmlCharEncoding enc) { 12510 xmlParserCtxtPtr ctxt; 12511 xmlParserInputPtr inputStream; 12512 xmlParserInputBufferPtr buf; 12513 12514 if (ioread == NULL) return(NULL); 12515 12516 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12517 if (buf == NULL) { 12518 if (ioclose != NULL) 12519 ioclose(ioctx); 12520 return (NULL); 12521 } 12522 12523 ctxt = xmlNewParserCtxt(); 12524 if (ctxt == NULL) { 12525 xmlFreeParserInputBuffer(buf); 12526 return(NULL); 12527 } 12528 if (sax != NULL) { 12529 #ifdef LIBXML_SAX1_ENABLED 12530 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12531 #endif /* LIBXML_SAX1_ENABLED */ 12532 xmlFree(ctxt->sax); 12533 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12534 if (ctxt->sax == NULL) { 12535 xmlErrMemory(ctxt, NULL); 12536 xmlFreeParserCtxt(ctxt); 12537 return(NULL); 12538 } 12539 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12540 if (sax->initialized == XML_SAX2_MAGIC) 12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12542 else 12543 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12544 if (user_data != NULL) 12545 ctxt->userData = user_data; 12546 } 12547 12548 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12549 if (inputStream == NULL) { 12550 xmlFreeParserCtxt(ctxt); 12551 return(NULL); 12552 } 12553 inputPush(ctxt, inputStream); 12554 12555 return(ctxt); 12556 } 12557 12558 #ifdef LIBXML_VALID_ENABLED 12559 /************************************************************************ 12560 * * 12561 * Front ends when parsing a DTD * 12562 * * 12563 ************************************************************************/ 12564 12565 /** 12566 * xmlIOParseDTD: 12567 * @sax: the SAX handler block or NULL 12568 * @input: an Input Buffer 12569 * @enc: the charset encoding if known 12570 * 12571 * Load and parse a DTD 12572 * 12573 * Returns the resulting xmlDtdPtr or NULL in case of error. 12574 * @input will be freed by the function in any case. 12575 */ 12576 12577 xmlDtdPtr 12578 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12579 xmlCharEncoding enc) { 12580 xmlDtdPtr ret = NULL; 12581 xmlParserCtxtPtr ctxt; 12582 xmlParserInputPtr pinput = NULL; 12583 xmlChar start[4]; 12584 12585 if (input == NULL) 12586 return(NULL); 12587 12588 ctxt = xmlNewParserCtxt(); 12589 if (ctxt == NULL) { 12590 xmlFreeParserInputBuffer(input); 12591 return(NULL); 12592 } 12593 12594 /* 12595 * Set-up the SAX context 12596 */ 12597 if (sax != NULL) { 12598 if (ctxt->sax != NULL) 12599 xmlFree(ctxt->sax); 12600 ctxt->sax = sax; 12601 ctxt->userData = ctxt; 12602 } 12603 xmlDetectSAX2(ctxt); 12604 12605 /* 12606 * generate a parser input from the I/O handler 12607 */ 12608 12609 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12610 if (pinput == NULL) { 12611 if (sax != NULL) ctxt->sax = NULL; 12612 xmlFreeParserInputBuffer(input); 12613 xmlFreeParserCtxt(ctxt); 12614 return(NULL); 12615 } 12616 12617 /* 12618 * plug some encoding conversion routines here. 12619 */ 12620 if (xmlPushInput(ctxt, pinput) < 0) { 12621 if (sax != NULL) ctxt->sax = NULL; 12622 xmlFreeParserCtxt(ctxt); 12623 return(NULL); 12624 } 12625 if (enc != XML_CHAR_ENCODING_NONE) { 12626 xmlSwitchEncoding(ctxt, enc); 12627 } 12628 12629 pinput->filename = NULL; 12630 pinput->line = 1; 12631 pinput->col = 1; 12632 pinput->base = ctxt->input->cur; 12633 pinput->cur = ctxt->input->cur; 12634 pinput->free = NULL; 12635 12636 /* 12637 * let's parse that entity knowing it's an external subset. 12638 */ 12639 ctxt->inSubset = 2; 12640 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12641 if (ctxt->myDoc == NULL) { 12642 xmlErrMemory(ctxt, "New Doc failed"); 12643 return(NULL); 12644 } 12645 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12646 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12647 BAD_CAST "none", BAD_CAST "none"); 12648 12649 if ((enc == XML_CHAR_ENCODING_NONE) && 12650 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12651 /* 12652 * Get the 4 first bytes and decode the charset 12653 * if enc != XML_CHAR_ENCODING_NONE 12654 * plug some encoding conversion routines. 12655 */ 12656 start[0] = RAW; 12657 start[1] = NXT(1); 12658 start[2] = NXT(2); 12659 start[3] = NXT(3); 12660 enc = xmlDetectCharEncoding(start, 4); 12661 if (enc != XML_CHAR_ENCODING_NONE) { 12662 xmlSwitchEncoding(ctxt, enc); 12663 } 12664 } 12665 12666 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12667 12668 if (ctxt->myDoc != NULL) { 12669 if (ctxt->wellFormed) { 12670 ret = ctxt->myDoc->extSubset; 12671 ctxt->myDoc->extSubset = NULL; 12672 if (ret != NULL) { 12673 xmlNodePtr tmp; 12674 12675 ret->doc = NULL; 12676 tmp = ret->children; 12677 while (tmp != NULL) { 12678 tmp->doc = NULL; 12679 tmp = tmp->next; 12680 } 12681 } 12682 } else { 12683 ret = NULL; 12684 } 12685 xmlFreeDoc(ctxt->myDoc); 12686 ctxt->myDoc = NULL; 12687 } 12688 if (sax != NULL) ctxt->sax = NULL; 12689 xmlFreeParserCtxt(ctxt); 12690 12691 return(ret); 12692 } 12693 12694 /** 12695 * xmlSAXParseDTD: 12696 * @sax: the SAX handler block 12697 * @ExternalID: a NAME* containing the External ID of the DTD 12698 * @SystemID: a NAME* containing the URL to the DTD 12699 * 12700 * Load and parse an external subset. 12701 * 12702 * Returns the resulting xmlDtdPtr or NULL in case of error. 12703 */ 12704 12705 xmlDtdPtr 12706 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12707 const xmlChar *SystemID) { 12708 xmlDtdPtr ret = NULL; 12709 xmlParserCtxtPtr ctxt; 12710 xmlParserInputPtr input = NULL; 12711 xmlCharEncoding enc; 12712 xmlChar* systemIdCanonic; 12713 12714 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12715 12716 ctxt = xmlNewParserCtxt(); 12717 if (ctxt == NULL) { 12718 return(NULL); 12719 } 12720 12721 /* 12722 * Set-up the SAX context 12723 */ 12724 if (sax != NULL) { 12725 if (ctxt->sax != NULL) 12726 xmlFree(ctxt->sax); 12727 ctxt->sax = sax; 12728 ctxt->userData = ctxt; 12729 } 12730 12731 /* 12732 * Canonicalise the system ID 12733 */ 12734 systemIdCanonic = xmlCanonicPath(SystemID); 12735 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12736 xmlFreeParserCtxt(ctxt); 12737 return(NULL); 12738 } 12739 12740 /* 12741 * Ask the Entity resolver to load the damn thing 12742 */ 12743 12744 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12745 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12746 systemIdCanonic); 12747 if (input == NULL) { 12748 if (sax != NULL) ctxt->sax = NULL; 12749 xmlFreeParserCtxt(ctxt); 12750 if (systemIdCanonic != NULL) 12751 xmlFree(systemIdCanonic); 12752 return(NULL); 12753 } 12754 12755 /* 12756 * plug some encoding conversion routines here. 12757 */ 12758 if (xmlPushInput(ctxt, input) < 0) { 12759 if (sax != NULL) ctxt->sax = NULL; 12760 xmlFreeParserCtxt(ctxt); 12761 if (systemIdCanonic != NULL) 12762 xmlFree(systemIdCanonic); 12763 return(NULL); 12764 } 12765 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12766 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12767 xmlSwitchEncoding(ctxt, enc); 12768 } 12769 12770 if (input->filename == NULL) 12771 input->filename = (char *) systemIdCanonic; 12772 else 12773 xmlFree(systemIdCanonic); 12774 input->line = 1; 12775 input->col = 1; 12776 input->base = ctxt->input->cur; 12777 input->cur = ctxt->input->cur; 12778 input->free = NULL; 12779 12780 /* 12781 * let's parse that entity knowing it's an external subset. 12782 */ 12783 ctxt->inSubset = 2; 12784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12785 if (ctxt->myDoc == NULL) { 12786 xmlErrMemory(ctxt, "New Doc failed"); 12787 if (sax != NULL) ctxt->sax = NULL; 12788 xmlFreeParserCtxt(ctxt); 12789 return(NULL); 12790 } 12791 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12792 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12793 ExternalID, SystemID); 12794 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12795 12796 if (ctxt->myDoc != NULL) { 12797 if (ctxt->wellFormed) { 12798 ret = ctxt->myDoc->extSubset; 12799 ctxt->myDoc->extSubset = NULL; 12800 if (ret != NULL) { 12801 xmlNodePtr tmp; 12802 12803 ret->doc = NULL; 12804 tmp = ret->children; 12805 while (tmp != NULL) { 12806 tmp->doc = NULL; 12807 tmp = tmp->next; 12808 } 12809 } 12810 } else { 12811 ret = NULL; 12812 } 12813 xmlFreeDoc(ctxt->myDoc); 12814 ctxt->myDoc = NULL; 12815 } 12816 if (sax != NULL) ctxt->sax = NULL; 12817 xmlFreeParserCtxt(ctxt); 12818 12819 return(ret); 12820 } 12821 12822 12823 /** 12824 * xmlParseDTD: 12825 * @ExternalID: a NAME* containing the External ID of the DTD 12826 * @SystemID: a NAME* containing the URL to the DTD 12827 * 12828 * Load and parse an external subset. 12829 * 12830 * Returns the resulting xmlDtdPtr or NULL in case of error. 12831 */ 12832 12833 xmlDtdPtr 12834 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12835 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12836 } 12837 #endif /* LIBXML_VALID_ENABLED */ 12838 12839 /************************************************************************ 12840 * * 12841 * Front ends when parsing an Entity * 12842 * * 12843 ************************************************************************/ 12844 12845 /** 12846 * xmlParseCtxtExternalEntity: 12847 * @ctx: the existing parsing context 12848 * @URL: the URL for the entity to load 12849 * @ID: the System ID for the entity to load 12850 * @lst: the return value for the set of parsed nodes 12851 * 12852 * Parse an external general entity within an existing parsing context 12853 * An external general parsed entity is well-formed if it matches the 12854 * production labeled extParsedEnt. 12855 * 12856 * [78] extParsedEnt ::= TextDecl? content 12857 * 12858 * Returns 0 if the entity is well formed, -1 in case of args problem and 12859 * the parser error code otherwise 12860 */ 12861 12862 int 12863 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12864 const xmlChar *ID, xmlNodePtr *lst) { 12865 xmlParserCtxtPtr ctxt; 12866 xmlDocPtr newDoc; 12867 xmlNodePtr newRoot; 12868 xmlSAXHandlerPtr oldsax = NULL; 12869 int ret = 0; 12870 xmlChar start[4]; 12871 xmlCharEncoding enc; 12872 12873 if (ctx == NULL) return(-1); 12874 12875 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12876 (ctx->depth > 1024)) { 12877 return(XML_ERR_ENTITY_LOOP); 12878 } 12879 12880 if (lst != NULL) 12881 *lst = NULL; 12882 if ((URL == NULL) && (ID == NULL)) 12883 return(-1); 12884 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12885 return(-1); 12886 12887 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12888 if (ctxt == NULL) { 12889 return(-1); 12890 } 12891 12892 oldsax = ctxt->sax; 12893 ctxt->sax = ctx->sax; 12894 xmlDetectSAX2(ctxt); 12895 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12896 if (newDoc == NULL) { 12897 xmlFreeParserCtxt(ctxt); 12898 return(-1); 12899 } 12900 newDoc->properties = XML_DOC_INTERNAL; 12901 if (ctx->myDoc->dict) { 12902 newDoc->dict = ctx->myDoc->dict; 12903 xmlDictReference(newDoc->dict); 12904 } 12905 if (ctx->myDoc != NULL) { 12906 newDoc->intSubset = ctx->myDoc->intSubset; 12907 newDoc->extSubset = ctx->myDoc->extSubset; 12908 } 12909 if (ctx->myDoc->URL != NULL) { 12910 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12911 } 12912 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12913 if (newRoot == NULL) { 12914 ctxt->sax = oldsax; 12915 xmlFreeParserCtxt(ctxt); 12916 newDoc->intSubset = NULL; 12917 newDoc->extSubset = NULL; 12918 xmlFreeDoc(newDoc); 12919 return(-1); 12920 } 12921 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12922 nodePush(ctxt, newDoc->children); 12923 if (ctx->myDoc == NULL) { 12924 ctxt->myDoc = newDoc; 12925 } else { 12926 ctxt->myDoc = ctx->myDoc; 12927 newDoc->children->doc = ctx->myDoc; 12928 } 12929 12930 /* 12931 * Get the 4 first bytes and decode the charset 12932 * if enc != XML_CHAR_ENCODING_NONE 12933 * plug some encoding conversion routines. 12934 */ 12935 GROW 12936 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12937 start[0] = RAW; 12938 start[1] = NXT(1); 12939 start[2] = NXT(2); 12940 start[3] = NXT(3); 12941 enc = xmlDetectCharEncoding(start, 4); 12942 if (enc != XML_CHAR_ENCODING_NONE) { 12943 xmlSwitchEncoding(ctxt, enc); 12944 } 12945 } 12946 12947 /* 12948 * Parse a possible text declaration first 12949 */ 12950 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12951 xmlParseTextDecl(ctxt); 12952 /* 12953 * An XML-1.0 document can't reference an entity not XML-1.0 12954 */ 12955 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12956 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12957 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12958 "Version mismatch between document and entity\n"); 12959 } 12960 } 12961 12962 /* 12963 * If the user provided its own SAX callbacks then reuse the 12964 * useData callback field, otherwise the expected setup in a 12965 * DOM builder is to have userData == ctxt 12966 */ 12967 if (ctx->userData == ctx) 12968 ctxt->userData = ctxt; 12969 else 12970 ctxt->userData = ctx->userData; 12971 12972 /* 12973 * Doing validity checking on chunk doesn't make sense 12974 */ 12975 ctxt->instate = XML_PARSER_CONTENT; 12976 ctxt->validate = ctx->validate; 12977 ctxt->valid = ctx->valid; 12978 ctxt->loadsubset = ctx->loadsubset; 12979 ctxt->depth = ctx->depth + 1; 12980 ctxt->replaceEntities = ctx->replaceEntities; 12981 if (ctxt->validate) { 12982 ctxt->vctxt.error = ctx->vctxt.error; 12983 ctxt->vctxt.warning = ctx->vctxt.warning; 12984 } else { 12985 ctxt->vctxt.error = NULL; 12986 ctxt->vctxt.warning = NULL; 12987 } 12988 ctxt->vctxt.nodeTab = NULL; 12989 ctxt->vctxt.nodeNr = 0; 12990 ctxt->vctxt.nodeMax = 0; 12991 ctxt->vctxt.node = NULL; 12992 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12993 ctxt->dict = ctx->dict; 12994 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12995 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12996 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12997 ctxt->dictNames = ctx->dictNames; 12998 ctxt->attsDefault = ctx->attsDefault; 12999 ctxt->attsSpecial = ctx->attsSpecial; 13000 ctxt->linenumbers = ctx->linenumbers; 13001 13002 xmlParseContent(ctxt); 13003 13004 ctx->validate = ctxt->validate; 13005 ctx->valid = ctxt->valid; 13006 if ((RAW == '<') && (NXT(1) == '/')) { 13007 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13008 } else if (RAW != 0) { 13009 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13010 } 13011 if (ctxt->node != newDoc->children) { 13012 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13013 } 13014 13015 if (!ctxt->wellFormed) { 13016 if (ctxt->errNo == 0) 13017 ret = 1; 13018 else 13019 ret = ctxt->errNo; 13020 } else { 13021 if (lst != NULL) { 13022 xmlNodePtr cur; 13023 13024 /* 13025 * Return the newly created nodeset after unlinking it from 13026 * they pseudo parent. 13027 */ 13028 cur = newDoc->children->children; 13029 *lst = cur; 13030 while (cur != NULL) { 13031 cur->parent = NULL; 13032 cur = cur->next; 13033 } 13034 newDoc->children->children = NULL; 13035 } 13036 ret = 0; 13037 } 13038 ctxt->sax = oldsax; 13039 ctxt->dict = NULL; 13040 ctxt->attsDefault = NULL; 13041 ctxt->attsSpecial = NULL; 13042 xmlFreeParserCtxt(ctxt); 13043 newDoc->intSubset = NULL; 13044 newDoc->extSubset = NULL; 13045 xmlFreeDoc(newDoc); 13046 13047 return(ret); 13048 } 13049 13050 /** 13051 * xmlParseExternalEntityPrivate: 13052 * @doc: the document the chunk pertains to 13053 * @oldctxt: the previous parser context if available 13054 * @sax: the SAX handler bloc (possibly NULL) 13055 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13056 * @depth: Used for loop detection, use 0 13057 * @URL: the URL for the entity to load 13058 * @ID: the System ID for the entity to load 13059 * @list: the return value for the set of parsed nodes 13060 * 13061 * Private version of xmlParseExternalEntity() 13062 * 13063 * Returns 0 if the entity is well formed, -1 in case of args problem and 13064 * the parser error code otherwise 13065 */ 13066 13067 static xmlParserErrors 13068 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13069 xmlSAXHandlerPtr sax, 13070 void *user_data, int depth, const xmlChar *URL, 13071 const xmlChar *ID, xmlNodePtr *list) { 13072 xmlParserCtxtPtr ctxt; 13073 xmlDocPtr newDoc; 13074 xmlNodePtr newRoot; 13075 xmlSAXHandlerPtr oldsax = NULL; 13076 xmlParserErrors ret = XML_ERR_OK; 13077 xmlChar start[4]; 13078 xmlCharEncoding enc; 13079 13080 if (((depth > 40) && 13081 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13082 (depth > 1024)) { 13083 return(XML_ERR_ENTITY_LOOP); 13084 } 13085 13086 if (list != NULL) 13087 *list = NULL; 13088 if ((URL == NULL) && (ID == NULL)) 13089 return(XML_ERR_INTERNAL_ERROR); 13090 if (doc == NULL) 13091 return(XML_ERR_INTERNAL_ERROR); 13092 13093 13094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13095 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13096 ctxt->userData = ctxt; 13097 if (oldctxt != NULL) { 13098 ctxt->_private = oldctxt->_private; 13099 ctxt->loadsubset = oldctxt->loadsubset; 13100 ctxt->validate = oldctxt->validate; 13101 ctxt->external = oldctxt->external; 13102 ctxt->record_info = oldctxt->record_info; 13103 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13104 ctxt->node_seq.length = oldctxt->node_seq.length; 13105 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13106 } else { 13107 /* 13108 * Doing validity checking on chunk without context 13109 * doesn't make sense 13110 */ 13111 ctxt->_private = NULL; 13112 ctxt->validate = 0; 13113 ctxt->external = 2; 13114 ctxt->loadsubset = 0; 13115 } 13116 if (sax != NULL) { 13117 oldsax = ctxt->sax; 13118 ctxt->sax = sax; 13119 if (user_data != NULL) 13120 ctxt->userData = user_data; 13121 } 13122 xmlDetectSAX2(ctxt); 13123 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13124 if (newDoc == NULL) { 13125 ctxt->node_seq.maximum = 0; 13126 ctxt->node_seq.length = 0; 13127 ctxt->node_seq.buffer = NULL; 13128 xmlFreeParserCtxt(ctxt); 13129 return(XML_ERR_INTERNAL_ERROR); 13130 } 13131 newDoc->properties = XML_DOC_INTERNAL; 13132 newDoc->intSubset = doc->intSubset; 13133 newDoc->extSubset = doc->extSubset; 13134 newDoc->dict = doc->dict; 13135 xmlDictReference(newDoc->dict); 13136 13137 if (doc->URL != NULL) { 13138 newDoc->URL = xmlStrdup(doc->URL); 13139 } 13140 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13141 if (newRoot == NULL) { 13142 if (sax != NULL) 13143 ctxt->sax = oldsax; 13144 ctxt->node_seq.maximum = 0; 13145 ctxt->node_seq.length = 0; 13146 ctxt->node_seq.buffer = NULL; 13147 xmlFreeParserCtxt(ctxt); 13148 newDoc->intSubset = NULL; 13149 newDoc->extSubset = NULL; 13150 xmlFreeDoc(newDoc); 13151 return(XML_ERR_INTERNAL_ERROR); 13152 } 13153 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13154 nodePush(ctxt, newDoc->children); 13155 ctxt->myDoc = doc; 13156 newRoot->doc = doc; 13157 13158 /* 13159 * Get the 4 first bytes and decode the charset 13160 * if enc != XML_CHAR_ENCODING_NONE 13161 * plug some encoding conversion routines. 13162 */ 13163 GROW; 13164 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13165 start[0] = RAW; 13166 start[1] = NXT(1); 13167 start[2] = NXT(2); 13168 start[3] = NXT(3); 13169 enc = xmlDetectCharEncoding(start, 4); 13170 if (enc != XML_CHAR_ENCODING_NONE) { 13171 xmlSwitchEncoding(ctxt, enc); 13172 } 13173 } 13174 13175 /* 13176 * Parse a possible text declaration first 13177 */ 13178 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13179 xmlParseTextDecl(ctxt); 13180 } 13181 13182 ctxt->instate = XML_PARSER_CONTENT; 13183 ctxt->depth = depth; 13184 13185 xmlParseContent(ctxt); 13186 13187 if ((RAW == '<') && (NXT(1) == '/')) { 13188 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13189 } else if (RAW != 0) { 13190 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13191 } 13192 if (ctxt->node != newDoc->children) { 13193 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13194 } 13195 13196 if (!ctxt->wellFormed) { 13197 if (ctxt->errNo == 0) 13198 ret = XML_ERR_INTERNAL_ERROR; 13199 else 13200 ret = (xmlParserErrors)ctxt->errNo; 13201 } else { 13202 if (list != NULL) { 13203 xmlNodePtr cur; 13204 13205 /* 13206 * Return the newly created nodeset after unlinking it from 13207 * they pseudo parent. 13208 */ 13209 cur = newDoc->children->children; 13210 *list = cur; 13211 while (cur != NULL) { 13212 cur->parent = NULL; 13213 cur = cur->next; 13214 } 13215 newDoc->children->children = NULL; 13216 } 13217 ret = XML_ERR_OK; 13218 } 13219 13220 /* 13221 * Record in the parent context the number of entities replacement 13222 * done when parsing that reference. 13223 */ 13224 if (oldctxt != NULL) 13225 oldctxt->nbentities += ctxt->nbentities; 13226 13227 /* 13228 * Also record the size of the entity parsed 13229 */ 13230 if (ctxt->input != NULL) { 13231 oldctxt->sizeentities += ctxt->input->consumed; 13232 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13233 } 13234 /* 13235 * And record the last error if any 13236 */ 13237 if (ctxt->lastError.code != XML_ERR_OK) 13238 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13239 13240 if (sax != NULL) 13241 ctxt->sax = oldsax; 13242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13243 oldctxt->node_seq.length = ctxt->node_seq.length; 13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13245 ctxt->node_seq.maximum = 0; 13246 ctxt->node_seq.length = 0; 13247 ctxt->node_seq.buffer = NULL; 13248 xmlFreeParserCtxt(ctxt); 13249 newDoc->intSubset = NULL; 13250 newDoc->extSubset = NULL; 13251 xmlFreeDoc(newDoc); 13252 13253 return(ret); 13254 } 13255 13256 #ifdef LIBXML_SAX1_ENABLED 13257 /** 13258 * xmlParseExternalEntity: 13259 * @doc: the document the chunk pertains to 13260 * @sax: the SAX handler bloc (possibly NULL) 13261 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13262 * @depth: Used for loop detection, use 0 13263 * @URL: the URL for the entity to load 13264 * @ID: the System ID for the entity to load 13265 * @lst: the return value for the set of parsed nodes 13266 * 13267 * Parse an external general entity 13268 * An external general parsed entity is well-formed if it matches the 13269 * production labeled extParsedEnt. 13270 * 13271 * [78] extParsedEnt ::= TextDecl? content 13272 * 13273 * Returns 0 if the entity is well formed, -1 in case of args problem and 13274 * the parser error code otherwise 13275 */ 13276 13277 int 13278 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13279 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13280 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13281 ID, lst)); 13282 } 13283 13284 /** 13285 * xmlParseBalancedChunkMemory: 13286 * @doc: the document the chunk pertains to 13287 * @sax: the SAX handler bloc (possibly NULL) 13288 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13289 * @depth: Used for loop detection, use 0 13290 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13291 * @lst: the return value for the set of parsed nodes 13292 * 13293 * Parse a well-balanced chunk of an XML document 13294 * called by the parser 13295 * The allowed sequence for the Well Balanced Chunk is the one defined by 13296 * the content production in the XML grammar: 13297 * 13298 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13299 * 13300 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13301 * the parser error code otherwise 13302 */ 13303 13304 int 13305 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13306 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13307 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13308 depth, string, lst, 0 ); 13309 } 13310 #endif /* LIBXML_SAX1_ENABLED */ 13311 13312 /** 13313 * xmlParseBalancedChunkMemoryInternal: 13314 * @oldctxt: the existing parsing context 13315 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13316 * @user_data: the user data field for the parser context 13317 * @lst: the return value for the set of parsed nodes 13318 * 13319 * 13320 * Parse a well-balanced chunk of an XML document 13321 * called by the parser 13322 * The allowed sequence for the Well Balanced Chunk is the one defined by 13323 * the content production in the XML grammar: 13324 * 13325 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13326 * 13327 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13328 * error code otherwise 13329 * 13330 * In case recover is set to 1, the nodelist will not be empty even if 13331 * the parsed chunk is not well balanced. 13332 */ 13333 static xmlParserErrors 13334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13335 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13336 xmlParserCtxtPtr ctxt; 13337 xmlDocPtr newDoc = NULL; 13338 xmlNodePtr newRoot; 13339 xmlSAXHandlerPtr oldsax = NULL; 13340 xmlNodePtr content = NULL; 13341 xmlNodePtr last = NULL; 13342 int size; 13343 xmlParserErrors ret = XML_ERR_OK; 13344 #ifdef SAX2 13345 int i; 13346 #endif 13347 13348 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13349 (oldctxt->depth > 1024)) { 13350 return(XML_ERR_ENTITY_LOOP); 13351 } 13352 13353 13354 if (lst != NULL) 13355 *lst = NULL; 13356 if (string == NULL) 13357 return(XML_ERR_INTERNAL_ERROR); 13358 13359 size = xmlStrlen(string); 13360 13361 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13362 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13363 if (user_data != NULL) 13364 ctxt->userData = user_data; 13365 else 13366 ctxt->userData = ctxt; 13367 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13368 ctxt->dict = oldctxt->dict; 13369 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13370 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13371 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13372 13373 #ifdef SAX2 13374 /* propagate namespaces down the entity */ 13375 for (i = 0;i < oldctxt->nsNr;i += 2) { 13376 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13377 } 13378 #endif 13379 13380 oldsax = ctxt->sax; 13381 ctxt->sax = oldctxt->sax; 13382 xmlDetectSAX2(ctxt); 13383 ctxt->replaceEntities = oldctxt->replaceEntities; 13384 ctxt->options = oldctxt->options; 13385 13386 ctxt->_private = oldctxt->_private; 13387 if (oldctxt->myDoc == NULL) { 13388 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13389 if (newDoc == NULL) { 13390 ctxt->sax = oldsax; 13391 ctxt->dict = NULL; 13392 xmlFreeParserCtxt(ctxt); 13393 return(XML_ERR_INTERNAL_ERROR); 13394 } 13395 newDoc->properties = XML_DOC_INTERNAL; 13396 newDoc->dict = ctxt->dict; 13397 xmlDictReference(newDoc->dict); 13398 ctxt->myDoc = newDoc; 13399 } else { 13400 ctxt->myDoc = oldctxt->myDoc; 13401 content = ctxt->myDoc->children; 13402 last = ctxt->myDoc->last; 13403 } 13404 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13405 if (newRoot == NULL) { 13406 ctxt->sax = oldsax; 13407 ctxt->dict = NULL; 13408 xmlFreeParserCtxt(ctxt); 13409 if (newDoc != NULL) { 13410 xmlFreeDoc(newDoc); 13411 } 13412 return(XML_ERR_INTERNAL_ERROR); 13413 } 13414 ctxt->myDoc->children = NULL; 13415 ctxt->myDoc->last = NULL; 13416 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13417 nodePush(ctxt, ctxt->myDoc->children); 13418 ctxt->instate = XML_PARSER_CONTENT; 13419 ctxt->depth = oldctxt->depth + 1; 13420 13421 ctxt->validate = 0; 13422 ctxt->loadsubset = oldctxt->loadsubset; 13423 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13424 /* 13425 * ID/IDREF registration will be done in xmlValidateElement below 13426 */ 13427 ctxt->loadsubset |= XML_SKIP_IDS; 13428 } 13429 ctxt->dictNames = oldctxt->dictNames; 13430 ctxt->attsDefault = oldctxt->attsDefault; 13431 ctxt->attsSpecial = oldctxt->attsSpecial; 13432 13433 xmlParseContent(ctxt); 13434 if ((RAW == '<') && (NXT(1) == '/')) { 13435 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13436 } else if (RAW != 0) { 13437 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13438 } 13439 if (ctxt->node != ctxt->myDoc->children) { 13440 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13441 } 13442 13443 if (!ctxt->wellFormed) { 13444 if (ctxt->errNo == 0) 13445 ret = XML_ERR_INTERNAL_ERROR; 13446 else 13447 ret = (xmlParserErrors)ctxt->errNo; 13448 } else { 13449 ret = XML_ERR_OK; 13450 } 13451 13452 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13453 xmlNodePtr cur; 13454 13455 /* 13456 * Return the newly created nodeset after unlinking it from 13457 * they pseudo parent. 13458 */ 13459 cur = ctxt->myDoc->children->children; 13460 *lst = cur; 13461 while (cur != NULL) { 13462 #ifdef LIBXML_VALID_ENABLED 13463 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13464 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13465 (cur->type == XML_ELEMENT_NODE)) { 13466 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13467 oldctxt->myDoc, cur); 13468 } 13469 #endif /* LIBXML_VALID_ENABLED */ 13470 cur->parent = NULL; 13471 cur = cur->next; 13472 } 13473 ctxt->myDoc->children->children = NULL; 13474 } 13475 if (ctxt->myDoc != NULL) { 13476 xmlFreeNode(ctxt->myDoc->children); 13477 ctxt->myDoc->children = content; 13478 ctxt->myDoc->last = last; 13479 } 13480 13481 /* 13482 * Record in the parent context the number of entities replacement 13483 * done when parsing that reference. 13484 */ 13485 if (oldctxt != NULL) 13486 oldctxt->nbentities += ctxt->nbentities; 13487 13488 /* 13489 * Also record the last error if any 13490 */ 13491 if (ctxt->lastError.code != XML_ERR_OK) 13492 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13493 13494 ctxt->sax = oldsax; 13495 ctxt->dict = NULL; 13496 ctxt->attsDefault = NULL; 13497 ctxt->attsSpecial = NULL; 13498 xmlFreeParserCtxt(ctxt); 13499 if (newDoc != NULL) { 13500 xmlFreeDoc(newDoc); 13501 } 13502 13503 return(ret); 13504 } 13505 13506 /** 13507 * xmlParseInNodeContext: 13508 * @node: the context node 13509 * @data: the input string 13510 * @datalen: the input string length in bytes 13511 * @options: a combination of xmlParserOption 13512 * @lst: the return value for the set of parsed nodes 13513 * 13514 * Parse a well-balanced chunk of an XML document 13515 * within the context (DTD, namespaces, etc ...) of the given node. 13516 * 13517 * The allowed sequence for the data is a Well Balanced Chunk defined by 13518 * the content production in the XML grammar: 13519 * 13520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13521 * 13522 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13523 * error code otherwise 13524 */ 13525 xmlParserErrors 13526 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13527 int options, xmlNodePtr *lst) { 13528 #ifdef SAX2 13529 xmlParserCtxtPtr ctxt; 13530 xmlDocPtr doc = NULL; 13531 xmlNodePtr fake, cur; 13532 int nsnr = 0; 13533 13534 xmlParserErrors ret = XML_ERR_OK; 13535 13536 /* 13537 * check all input parameters, grab the document 13538 */ 13539 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13540 return(XML_ERR_INTERNAL_ERROR); 13541 switch (node->type) { 13542 case XML_ELEMENT_NODE: 13543 case XML_ATTRIBUTE_NODE: 13544 case XML_TEXT_NODE: 13545 case XML_CDATA_SECTION_NODE: 13546 case XML_ENTITY_REF_NODE: 13547 case XML_PI_NODE: 13548 case XML_COMMENT_NODE: 13549 case XML_DOCUMENT_NODE: 13550 case XML_HTML_DOCUMENT_NODE: 13551 break; 13552 default: 13553 return(XML_ERR_INTERNAL_ERROR); 13554 13555 } 13556 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13557 (node->type != XML_DOCUMENT_NODE) && 13558 (node->type != XML_HTML_DOCUMENT_NODE)) 13559 node = node->parent; 13560 if (node == NULL) 13561 return(XML_ERR_INTERNAL_ERROR); 13562 if (node->type == XML_ELEMENT_NODE) 13563 doc = node->doc; 13564 else 13565 doc = (xmlDocPtr) node; 13566 if (doc == NULL) 13567 return(XML_ERR_INTERNAL_ERROR); 13568 13569 /* 13570 * allocate a context and set-up everything not related to the 13571 * node position in the tree 13572 */ 13573 if (doc->type == XML_DOCUMENT_NODE) 13574 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13575 #ifdef LIBXML_HTML_ENABLED 13576 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13577 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13578 /* 13579 * When parsing in context, it makes no sense to add implied 13580 * elements like html/body/etc... 13581 */ 13582 options |= HTML_PARSE_NOIMPLIED; 13583 } 13584 #endif 13585 else 13586 return(XML_ERR_INTERNAL_ERROR); 13587 13588 if (ctxt == NULL) 13589 return(XML_ERR_NO_MEMORY); 13590 13591 /* 13592 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13593 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13594 * we must wait until the last moment to free the original one. 13595 */ 13596 if (doc->dict != NULL) { 13597 if (ctxt->dict != NULL) 13598 xmlDictFree(ctxt->dict); 13599 ctxt->dict = doc->dict; 13600 } else 13601 options |= XML_PARSE_NODICT; 13602 13603 if (doc->encoding != NULL) { 13604 xmlCharEncodingHandlerPtr hdlr; 13605 13606 if (ctxt->encoding != NULL) 13607 xmlFree((xmlChar *) ctxt->encoding); 13608 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13609 13610 hdlr = xmlFindCharEncodingHandler(doc->encoding); 13611 if (hdlr != NULL) { 13612 xmlSwitchToEncoding(ctxt, hdlr); 13613 } else { 13614 return(XML_ERR_UNSUPPORTED_ENCODING); 13615 } 13616 } 13617 13618 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13619 xmlDetectSAX2(ctxt); 13620 ctxt->myDoc = doc; 13621 13622 fake = xmlNewComment(NULL); 13623 if (fake == NULL) { 13624 xmlFreeParserCtxt(ctxt); 13625 return(XML_ERR_NO_MEMORY); 13626 } 13627 xmlAddChild(node, fake); 13628 13629 if (node->type == XML_ELEMENT_NODE) { 13630 nodePush(ctxt, node); 13631 /* 13632 * initialize the SAX2 namespaces stack 13633 */ 13634 cur = node; 13635 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13636 xmlNsPtr ns = cur->nsDef; 13637 const xmlChar *iprefix, *ihref; 13638 13639 while (ns != NULL) { 13640 if (ctxt->dict) { 13641 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13642 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13643 } else { 13644 iprefix = ns->prefix; 13645 ihref = ns->href; 13646 } 13647 13648 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13649 nsPush(ctxt, iprefix, ihref); 13650 nsnr++; 13651 } 13652 ns = ns->next; 13653 } 13654 cur = cur->parent; 13655 } 13656 ctxt->instate = XML_PARSER_CONTENT; 13657 } 13658 13659 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13660 /* 13661 * ID/IDREF registration will be done in xmlValidateElement below 13662 */ 13663 ctxt->loadsubset |= XML_SKIP_IDS; 13664 } 13665 13666 #ifdef LIBXML_HTML_ENABLED 13667 if (doc->type == XML_HTML_DOCUMENT_NODE) 13668 __htmlParseContent(ctxt); 13669 else 13670 #endif 13671 xmlParseContent(ctxt); 13672 13673 nsPop(ctxt, nsnr); 13674 if ((RAW == '<') && (NXT(1) == '/')) { 13675 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13676 } else if (RAW != 0) { 13677 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13678 } 13679 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13681 ctxt->wellFormed = 0; 13682 } 13683 13684 if (!ctxt->wellFormed) { 13685 if (ctxt->errNo == 0) 13686 ret = XML_ERR_INTERNAL_ERROR; 13687 else 13688 ret = (xmlParserErrors)ctxt->errNo; 13689 } else { 13690 ret = XML_ERR_OK; 13691 } 13692 13693 /* 13694 * Return the newly created nodeset after unlinking it from 13695 * the pseudo sibling. 13696 */ 13697 13698 cur = fake->next; 13699 fake->next = NULL; 13700 node->last = fake; 13701 13702 if (cur != NULL) { 13703 cur->prev = NULL; 13704 } 13705 13706 *lst = cur; 13707 13708 while (cur != NULL) { 13709 cur->parent = NULL; 13710 cur = cur->next; 13711 } 13712 13713 xmlUnlinkNode(fake); 13714 xmlFreeNode(fake); 13715 13716 13717 if (ret != XML_ERR_OK) { 13718 xmlFreeNodeList(*lst); 13719 *lst = NULL; 13720 } 13721 13722 if (doc->dict != NULL) 13723 ctxt->dict = NULL; 13724 xmlFreeParserCtxt(ctxt); 13725 13726 return(ret); 13727 #else /* !SAX2 */ 13728 return(XML_ERR_INTERNAL_ERROR); 13729 #endif 13730 } 13731 13732 #ifdef LIBXML_SAX1_ENABLED 13733 /** 13734 * xmlParseBalancedChunkMemoryRecover: 13735 * @doc: the document the chunk pertains to 13736 * @sax: the SAX handler bloc (possibly NULL) 13737 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13738 * @depth: Used for loop detection, use 0 13739 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13740 * @lst: the return value for the set of parsed nodes 13741 * @recover: return nodes even if the data is broken (use 0) 13742 * 13743 * 13744 * Parse a well-balanced chunk of an XML document 13745 * called by the parser 13746 * The allowed sequence for the Well Balanced Chunk is the one defined by 13747 * the content production in the XML grammar: 13748 * 13749 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13750 * 13751 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13752 * the parser error code otherwise 13753 * 13754 * In case recover is set to 1, the nodelist will not be empty even if 13755 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13756 * some extent. 13757 */ 13758 int 13759 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13760 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13761 int recover) { 13762 xmlParserCtxtPtr ctxt; 13763 xmlDocPtr newDoc; 13764 xmlSAXHandlerPtr oldsax = NULL; 13765 xmlNodePtr content, newRoot; 13766 int size; 13767 int ret = 0; 13768 13769 if (depth > 40) { 13770 return(XML_ERR_ENTITY_LOOP); 13771 } 13772 13773 13774 if (lst != NULL) 13775 *lst = NULL; 13776 if (string == NULL) 13777 return(-1); 13778 13779 size = xmlStrlen(string); 13780 13781 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13782 if (ctxt == NULL) return(-1); 13783 ctxt->userData = ctxt; 13784 if (sax != NULL) { 13785 oldsax = ctxt->sax; 13786 ctxt->sax = sax; 13787 if (user_data != NULL) 13788 ctxt->userData = user_data; 13789 } 13790 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13791 if (newDoc == NULL) { 13792 xmlFreeParserCtxt(ctxt); 13793 return(-1); 13794 } 13795 newDoc->properties = XML_DOC_INTERNAL; 13796 if ((doc != NULL) && (doc->dict != NULL)) { 13797 xmlDictFree(ctxt->dict); 13798 ctxt->dict = doc->dict; 13799 xmlDictReference(ctxt->dict); 13800 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13801 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13802 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13803 ctxt->dictNames = 1; 13804 } else { 13805 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13806 } 13807 if (doc != NULL) { 13808 newDoc->intSubset = doc->intSubset; 13809 newDoc->extSubset = doc->extSubset; 13810 } 13811 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13812 if (newRoot == NULL) { 13813 if (sax != NULL) 13814 ctxt->sax = oldsax; 13815 xmlFreeParserCtxt(ctxt); 13816 newDoc->intSubset = NULL; 13817 newDoc->extSubset = NULL; 13818 xmlFreeDoc(newDoc); 13819 return(-1); 13820 } 13821 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13822 nodePush(ctxt, newRoot); 13823 if (doc == NULL) { 13824 ctxt->myDoc = newDoc; 13825 } else { 13826 ctxt->myDoc = newDoc; 13827 newDoc->children->doc = doc; 13828 /* Ensure that doc has XML spec namespace */ 13829 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13830 newDoc->oldNs = doc->oldNs; 13831 } 13832 ctxt->instate = XML_PARSER_CONTENT; 13833 ctxt->depth = depth; 13834 13835 /* 13836 * Doing validity checking on chunk doesn't make sense 13837 */ 13838 ctxt->validate = 0; 13839 ctxt->loadsubset = 0; 13840 xmlDetectSAX2(ctxt); 13841 13842 if ( doc != NULL ){ 13843 content = doc->children; 13844 doc->children = NULL; 13845 xmlParseContent(ctxt); 13846 doc->children = content; 13847 } 13848 else { 13849 xmlParseContent(ctxt); 13850 } 13851 if ((RAW == '<') && (NXT(1) == '/')) { 13852 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13853 } else if (RAW != 0) { 13854 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13855 } 13856 if (ctxt->node != newDoc->children) { 13857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13858 } 13859 13860 if (!ctxt->wellFormed) { 13861 if (ctxt->errNo == 0) 13862 ret = 1; 13863 else 13864 ret = ctxt->errNo; 13865 } else { 13866 ret = 0; 13867 } 13868 13869 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13870 xmlNodePtr cur; 13871 13872 /* 13873 * Return the newly created nodeset after unlinking it from 13874 * they pseudo parent. 13875 */ 13876 cur = newDoc->children->children; 13877 *lst = cur; 13878 while (cur != NULL) { 13879 xmlSetTreeDoc(cur, doc); 13880 cur->parent = NULL; 13881 cur = cur->next; 13882 } 13883 newDoc->children->children = NULL; 13884 } 13885 13886 if (sax != NULL) 13887 ctxt->sax = oldsax; 13888 xmlFreeParserCtxt(ctxt); 13889 newDoc->intSubset = NULL; 13890 newDoc->extSubset = NULL; 13891 newDoc->oldNs = NULL; 13892 xmlFreeDoc(newDoc); 13893 13894 return(ret); 13895 } 13896 13897 /** 13898 * xmlSAXParseEntity: 13899 * @sax: the SAX handler block 13900 * @filename: the filename 13901 * 13902 * parse an XML external entity out of context and build a tree. 13903 * It use the given SAX function block to handle the parsing callback. 13904 * If sax is NULL, fallback to the default DOM tree building routines. 13905 * 13906 * [78] extParsedEnt ::= TextDecl? content 13907 * 13908 * This correspond to a "Well Balanced" chunk 13909 * 13910 * Returns the resulting document tree 13911 */ 13912 13913 xmlDocPtr 13914 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13915 xmlDocPtr ret; 13916 xmlParserCtxtPtr ctxt; 13917 13918 ctxt = xmlCreateFileParserCtxt(filename); 13919 if (ctxt == NULL) { 13920 return(NULL); 13921 } 13922 if (sax != NULL) { 13923 if (ctxt->sax != NULL) 13924 xmlFree(ctxt->sax); 13925 ctxt->sax = sax; 13926 ctxt->userData = NULL; 13927 } 13928 13929 xmlParseExtParsedEnt(ctxt); 13930 13931 if (ctxt->wellFormed) 13932 ret = ctxt->myDoc; 13933 else { 13934 ret = NULL; 13935 xmlFreeDoc(ctxt->myDoc); 13936 ctxt->myDoc = NULL; 13937 } 13938 if (sax != NULL) 13939 ctxt->sax = NULL; 13940 xmlFreeParserCtxt(ctxt); 13941 13942 return(ret); 13943 } 13944 13945 /** 13946 * xmlParseEntity: 13947 * @filename: the filename 13948 * 13949 * parse an XML external entity out of context and build a tree. 13950 * 13951 * [78] extParsedEnt ::= TextDecl? content 13952 * 13953 * This correspond to a "Well Balanced" chunk 13954 * 13955 * Returns the resulting document tree 13956 */ 13957 13958 xmlDocPtr 13959 xmlParseEntity(const char *filename) { 13960 return(xmlSAXParseEntity(NULL, filename)); 13961 } 13962 #endif /* LIBXML_SAX1_ENABLED */ 13963 13964 /** 13965 * xmlCreateEntityParserCtxtInternal: 13966 * @URL: the entity URL 13967 * @ID: the entity PUBLIC ID 13968 * @base: a possible base for the target URI 13969 * @pctx: parser context used to set options on new context 13970 * 13971 * Create a parser context for an external entity 13972 * Automatic support for ZLIB/Compress compressed document is provided 13973 * by default if found at compile-time. 13974 * 13975 * Returns the new parser context or NULL 13976 */ 13977 static xmlParserCtxtPtr 13978 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13979 const xmlChar *base, xmlParserCtxtPtr pctx) { 13980 xmlParserCtxtPtr ctxt; 13981 xmlParserInputPtr inputStream; 13982 char *directory = NULL; 13983 xmlChar *uri; 13984 13985 ctxt = xmlNewParserCtxt(); 13986 if (ctxt == NULL) { 13987 return(NULL); 13988 } 13989 13990 if (pctx != NULL) { 13991 ctxt->options = pctx->options; 13992 ctxt->_private = pctx->_private; 13993 } 13994 13995 uri = xmlBuildURI(URL, base); 13996 13997 if (uri == NULL) { 13998 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13999 if (inputStream == NULL) { 14000 xmlFreeParserCtxt(ctxt); 14001 return(NULL); 14002 } 14003 14004 inputPush(ctxt, inputStream); 14005 14006 if ((ctxt->directory == NULL) && (directory == NULL)) 14007 directory = xmlParserGetDirectory((char *)URL); 14008 if ((ctxt->directory == NULL) && (directory != NULL)) 14009 ctxt->directory = directory; 14010 } else { 14011 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14012 if (inputStream == NULL) { 14013 xmlFree(uri); 14014 xmlFreeParserCtxt(ctxt); 14015 return(NULL); 14016 } 14017 14018 inputPush(ctxt, inputStream); 14019 14020 if ((ctxt->directory == NULL) && (directory == NULL)) 14021 directory = xmlParserGetDirectory((char *)uri); 14022 if ((ctxt->directory == NULL) && (directory != NULL)) 14023 ctxt->directory = directory; 14024 xmlFree(uri); 14025 } 14026 return(ctxt); 14027 } 14028 14029 /** 14030 * xmlCreateEntityParserCtxt: 14031 * @URL: the entity URL 14032 * @ID: the entity PUBLIC ID 14033 * @base: a possible base for the target URI 14034 * 14035 * Create a parser context for an external entity 14036 * Automatic support for ZLIB/Compress compressed document is provided 14037 * by default if found at compile-time. 14038 * 14039 * Returns the new parser context or NULL 14040 */ 14041 xmlParserCtxtPtr 14042 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14043 const xmlChar *base) { 14044 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14045 14046 } 14047 14048 /************************************************************************ 14049 * * 14050 * Front ends when parsing from a file * 14051 * * 14052 ************************************************************************/ 14053 14054 /** 14055 * xmlCreateURLParserCtxt: 14056 * @filename: the filename or URL 14057 * @options: a combination of xmlParserOption 14058 * 14059 * Create a parser context for a file or URL content. 14060 * Automatic support for ZLIB/Compress compressed document is provided 14061 * by default if found at compile-time and for file accesses 14062 * 14063 * Returns the new parser context or NULL 14064 */ 14065 xmlParserCtxtPtr 14066 xmlCreateURLParserCtxt(const char *filename, int options) 14067 { 14068 xmlParserCtxtPtr ctxt; 14069 xmlParserInputPtr inputStream; 14070 char *directory = NULL; 14071 14072 ctxt = xmlNewParserCtxt(); 14073 if (ctxt == NULL) { 14074 xmlErrMemory(NULL, "cannot allocate parser context"); 14075 return(NULL); 14076 } 14077 14078 if (options) 14079 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14080 ctxt->linenumbers = 1; 14081 14082 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14083 if (inputStream == NULL) { 14084 xmlFreeParserCtxt(ctxt); 14085 return(NULL); 14086 } 14087 14088 inputPush(ctxt, inputStream); 14089 if ((ctxt->directory == NULL) && (directory == NULL)) 14090 directory = xmlParserGetDirectory(filename); 14091 if ((ctxt->directory == NULL) && (directory != NULL)) 14092 ctxt->directory = directory; 14093 14094 return(ctxt); 14095 } 14096 14097 /** 14098 * xmlCreateFileParserCtxt: 14099 * @filename: the filename 14100 * 14101 * Create a parser context for a file content. 14102 * Automatic support for ZLIB/Compress compressed document is provided 14103 * by default if found at compile-time. 14104 * 14105 * Returns the new parser context or NULL 14106 */ 14107 xmlParserCtxtPtr 14108 xmlCreateFileParserCtxt(const char *filename) 14109 { 14110 return(xmlCreateURLParserCtxt(filename, 0)); 14111 } 14112 14113 #ifdef LIBXML_SAX1_ENABLED 14114 /** 14115 * xmlSAXParseFileWithData: 14116 * @sax: the SAX handler block 14117 * @filename: the filename 14118 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14119 * documents 14120 * @data: the userdata 14121 * 14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14123 * compressed document is provided by default if found at compile-time. 14124 * It use the given SAX function block to handle the parsing callback. 14125 * If sax is NULL, fallback to the default DOM tree building routines. 14126 * 14127 * User data (void *) is stored within the parser context in the 14128 * context's _private member, so it is available nearly everywhere in libxml 14129 * 14130 * Returns the resulting document tree 14131 */ 14132 14133 xmlDocPtr 14134 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14135 int recovery, void *data) { 14136 xmlDocPtr ret; 14137 xmlParserCtxtPtr ctxt; 14138 14139 xmlInitParser(); 14140 14141 ctxt = xmlCreateFileParserCtxt(filename); 14142 if (ctxt == NULL) { 14143 return(NULL); 14144 } 14145 if (sax != NULL) { 14146 if (ctxt->sax != NULL) 14147 xmlFree(ctxt->sax); 14148 ctxt->sax = sax; 14149 } 14150 xmlDetectSAX2(ctxt); 14151 if (data!=NULL) { 14152 ctxt->_private = data; 14153 } 14154 14155 if (ctxt->directory == NULL) 14156 ctxt->directory = xmlParserGetDirectory(filename); 14157 14158 ctxt->recovery = recovery; 14159 14160 xmlParseDocument(ctxt); 14161 14162 if ((ctxt->wellFormed) || recovery) { 14163 ret = ctxt->myDoc; 14164 if (ret != NULL) { 14165 if (ctxt->input->buf->compressed > 0) 14166 ret->compression = 9; 14167 else 14168 ret->compression = ctxt->input->buf->compressed; 14169 } 14170 } 14171 else { 14172 ret = NULL; 14173 xmlFreeDoc(ctxt->myDoc); 14174 ctxt->myDoc = NULL; 14175 } 14176 if (sax != NULL) 14177 ctxt->sax = NULL; 14178 xmlFreeParserCtxt(ctxt); 14179 14180 return(ret); 14181 } 14182 14183 /** 14184 * xmlSAXParseFile: 14185 * @sax: the SAX handler block 14186 * @filename: the filename 14187 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14188 * documents 14189 * 14190 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14191 * compressed document is provided by default if found at compile-time. 14192 * It use the given SAX function block to handle the parsing callback. 14193 * If sax is NULL, fallback to the default DOM tree building routines. 14194 * 14195 * Returns the resulting document tree 14196 */ 14197 14198 xmlDocPtr 14199 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14200 int recovery) { 14201 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14202 } 14203 14204 /** 14205 * xmlRecoverDoc: 14206 * @cur: a pointer to an array of xmlChar 14207 * 14208 * parse an XML in-memory document and build a tree. 14209 * In the case the document is not Well Formed, a attempt to build a 14210 * tree is tried anyway 14211 * 14212 * Returns the resulting document tree or NULL in case of failure 14213 */ 14214 14215 xmlDocPtr 14216 xmlRecoverDoc(const xmlChar *cur) { 14217 return(xmlSAXParseDoc(NULL, cur, 1)); 14218 } 14219 14220 /** 14221 * xmlParseFile: 14222 * @filename: the filename 14223 * 14224 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14225 * compressed document is provided by default if found at compile-time. 14226 * 14227 * Returns the resulting document tree if the file was wellformed, 14228 * NULL otherwise. 14229 */ 14230 14231 xmlDocPtr 14232 xmlParseFile(const char *filename) { 14233 return(xmlSAXParseFile(NULL, filename, 0)); 14234 } 14235 14236 /** 14237 * xmlRecoverFile: 14238 * @filename: the filename 14239 * 14240 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14241 * compressed document is provided by default if found at compile-time. 14242 * In the case the document is not Well Formed, it attempts to build 14243 * a tree anyway 14244 * 14245 * Returns the resulting document tree or NULL in case of failure 14246 */ 14247 14248 xmlDocPtr 14249 xmlRecoverFile(const char *filename) { 14250 return(xmlSAXParseFile(NULL, filename, 1)); 14251 } 14252 14253 14254 /** 14255 * xmlSetupParserForBuffer: 14256 * @ctxt: an XML parser context 14257 * @buffer: a xmlChar * buffer 14258 * @filename: a file name 14259 * 14260 * Setup the parser context to parse a new buffer; Clears any prior 14261 * contents from the parser context. The buffer parameter must not be 14262 * NULL, but the filename parameter can be 14263 */ 14264 void 14265 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14266 const char* filename) 14267 { 14268 xmlParserInputPtr input; 14269 14270 if ((ctxt == NULL) || (buffer == NULL)) 14271 return; 14272 14273 input = xmlNewInputStream(ctxt); 14274 if (input == NULL) { 14275 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14276 xmlClearParserCtxt(ctxt); 14277 return; 14278 } 14279 14280 xmlClearParserCtxt(ctxt); 14281 if (filename != NULL) 14282 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14283 input->base = buffer; 14284 input->cur = buffer; 14285 input->end = &buffer[xmlStrlen(buffer)]; 14286 inputPush(ctxt, input); 14287 } 14288 14289 /** 14290 * xmlSAXUserParseFile: 14291 * @sax: a SAX handler 14292 * @user_data: The user data returned on SAX callbacks 14293 * @filename: a file name 14294 * 14295 * parse an XML file and call the given SAX handler routines. 14296 * Automatic support for ZLIB/Compress compressed document is provided 14297 * 14298 * Returns 0 in case of success or a error number otherwise 14299 */ 14300 int 14301 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14302 const char *filename) { 14303 int ret = 0; 14304 xmlParserCtxtPtr ctxt; 14305 14306 ctxt = xmlCreateFileParserCtxt(filename); 14307 if (ctxt == NULL) return -1; 14308 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14309 xmlFree(ctxt->sax); 14310 ctxt->sax = sax; 14311 xmlDetectSAX2(ctxt); 14312 14313 if (user_data != NULL) 14314 ctxt->userData = user_data; 14315 14316 xmlParseDocument(ctxt); 14317 14318 if (ctxt->wellFormed) 14319 ret = 0; 14320 else { 14321 if (ctxt->errNo != 0) 14322 ret = ctxt->errNo; 14323 else 14324 ret = -1; 14325 } 14326 if (sax != NULL) 14327 ctxt->sax = NULL; 14328 if (ctxt->myDoc != NULL) { 14329 xmlFreeDoc(ctxt->myDoc); 14330 ctxt->myDoc = NULL; 14331 } 14332 xmlFreeParserCtxt(ctxt); 14333 14334 return ret; 14335 } 14336 #endif /* LIBXML_SAX1_ENABLED */ 14337 14338 /************************************************************************ 14339 * * 14340 * Front ends when parsing from memory * 14341 * * 14342 ************************************************************************/ 14343 14344 /** 14345 * xmlCreateMemoryParserCtxt: 14346 * @buffer: a pointer to a char array 14347 * @size: the size of the array 14348 * 14349 * Create a parser context for an XML in-memory document. 14350 * 14351 * Returns the new parser context or NULL 14352 */ 14353 xmlParserCtxtPtr 14354 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14355 xmlParserCtxtPtr ctxt; 14356 xmlParserInputPtr input; 14357 xmlParserInputBufferPtr buf; 14358 14359 if (buffer == NULL) 14360 return(NULL); 14361 if (size <= 0) 14362 return(NULL); 14363 14364 ctxt = xmlNewParserCtxt(); 14365 if (ctxt == NULL) 14366 return(NULL); 14367 14368 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14369 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14370 if (buf == NULL) { 14371 xmlFreeParserCtxt(ctxt); 14372 return(NULL); 14373 } 14374 14375 input = xmlNewInputStream(ctxt); 14376 if (input == NULL) { 14377 xmlFreeParserInputBuffer(buf); 14378 xmlFreeParserCtxt(ctxt); 14379 return(NULL); 14380 } 14381 14382 input->filename = NULL; 14383 input->buf = buf; 14384 xmlBufResetInput(input->buf->buffer, input); 14385 14386 inputPush(ctxt, input); 14387 return(ctxt); 14388 } 14389 14390 #ifdef LIBXML_SAX1_ENABLED 14391 /** 14392 * xmlSAXParseMemoryWithData: 14393 * @sax: the SAX handler block 14394 * @buffer: an pointer to a char array 14395 * @size: the size of the array 14396 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14397 * documents 14398 * @data: the userdata 14399 * 14400 * parse an XML in-memory block and use the given SAX function block 14401 * to handle the parsing callback. If sax is NULL, fallback to the default 14402 * DOM tree building routines. 14403 * 14404 * User data (void *) is stored within the parser context in the 14405 * context's _private member, so it is available nearly everywhere in libxml 14406 * 14407 * Returns the resulting document tree 14408 */ 14409 14410 xmlDocPtr 14411 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14412 int size, int recovery, void *data) { 14413 xmlDocPtr ret; 14414 xmlParserCtxtPtr ctxt; 14415 14416 xmlInitParser(); 14417 14418 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14419 if (ctxt == NULL) return(NULL); 14420 if (sax != NULL) { 14421 if (ctxt->sax != NULL) 14422 xmlFree(ctxt->sax); 14423 ctxt->sax = sax; 14424 } 14425 xmlDetectSAX2(ctxt); 14426 if (data!=NULL) { 14427 ctxt->_private=data; 14428 } 14429 14430 ctxt->recovery = recovery; 14431 14432 xmlParseDocument(ctxt); 14433 14434 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14435 else { 14436 ret = NULL; 14437 xmlFreeDoc(ctxt->myDoc); 14438 ctxt->myDoc = NULL; 14439 } 14440 if (sax != NULL) 14441 ctxt->sax = NULL; 14442 xmlFreeParserCtxt(ctxt); 14443 14444 return(ret); 14445 } 14446 14447 /** 14448 * xmlSAXParseMemory: 14449 * @sax: the SAX handler block 14450 * @buffer: an pointer to a char array 14451 * @size: the size of the array 14452 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14453 * documents 14454 * 14455 * parse an XML in-memory block and use the given SAX function block 14456 * to handle the parsing callback. If sax is NULL, fallback to the default 14457 * DOM tree building routines. 14458 * 14459 * Returns the resulting document tree 14460 */ 14461 xmlDocPtr 14462 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14463 int size, int recovery) { 14464 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14465 } 14466 14467 /** 14468 * xmlParseMemory: 14469 * @buffer: an pointer to a char array 14470 * @size: the size of the array 14471 * 14472 * parse an XML in-memory block and build a tree. 14473 * 14474 * Returns the resulting document tree 14475 */ 14476 14477 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14478 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14479 } 14480 14481 /** 14482 * xmlRecoverMemory: 14483 * @buffer: an pointer to a char array 14484 * @size: the size of the array 14485 * 14486 * parse an XML in-memory block and build a tree. 14487 * In the case the document is not Well Formed, an attempt to 14488 * build a tree is tried anyway 14489 * 14490 * Returns the resulting document tree or NULL in case of error 14491 */ 14492 14493 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14494 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14495 } 14496 14497 /** 14498 * xmlSAXUserParseMemory: 14499 * @sax: a SAX handler 14500 * @user_data: The user data returned on SAX callbacks 14501 * @buffer: an in-memory XML document input 14502 * @size: the length of the XML document in bytes 14503 * 14504 * A better SAX parsing routine. 14505 * parse an XML in-memory buffer and call the given SAX handler routines. 14506 * 14507 * Returns 0 in case of success or a error number otherwise 14508 */ 14509 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14510 const char *buffer, int size) { 14511 int ret = 0; 14512 xmlParserCtxtPtr ctxt; 14513 14514 xmlInitParser(); 14515 14516 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14517 if (ctxt == NULL) return -1; 14518 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14519 xmlFree(ctxt->sax); 14520 ctxt->sax = sax; 14521 xmlDetectSAX2(ctxt); 14522 14523 if (user_data != NULL) 14524 ctxt->userData = user_data; 14525 14526 xmlParseDocument(ctxt); 14527 14528 if (ctxt->wellFormed) 14529 ret = 0; 14530 else { 14531 if (ctxt->errNo != 0) 14532 ret = ctxt->errNo; 14533 else 14534 ret = -1; 14535 } 14536 if (sax != NULL) 14537 ctxt->sax = NULL; 14538 if (ctxt->myDoc != NULL) { 14539 xmlFreeDoc(ctxt->myDoc); 14540 ctxt->myDoc = NULL; 14541 } 14542 xmlFreeParserCtxt(ctxt); 14543 14544 return ret; 14545 } 14546 #endif /* LIBXML_SAX1_ENABLED */ 14547 14548 /** 14549 * xmlCreateDocParserCtxt: 14550 * @cur: a pointer to an array of xmlChar 14551 * 14552 * Creates a parser context for an XML in-memory document. 14553 * 14554 * Returns the new parser context or NULL 14555 */ 14556 xmlParserCtxtPtr 14557 xmlCreateDocParserCtxt(const xmlChar *cur) { 14558 int len; 14559 14560 if (cur == NULL) 14561 return(NULL); 14562 len = xmlStrlen(cur); 14563 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14564 } 14565 14566 #ifdef LIBXML_SAX1_ENABLED 14567 /** 14568 * xmlSAXParseDoc: 14569 * @sax: the SAX handler block 14570 * @cur: a pointer to an array of xmlChar 14571 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14572 * documents 14573 * 14574 * parse an XML in-memory document and build a tree. 14575 * It use the given SAX function block to handle the parsing callback. 14576 * If sax is NULL, fallback to the default DOM tree building routines. 14577 * 14578 * Returns the resulting document tree 14579 */ 14580 14581 xmlDocPtr 14582 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14583 xmlDocPtr ret; 14584 xmlParserCtxtPtr ctxt; 14585 xmlSAXHandlerPtr oldsax = NULL; 14586 14587 if (cur == NULL) return(NULL); 14588 14589 14590 ctxt = xmlCreateDocParserCtxt(cur); 14591 if (ctxt == NULL) return(NULL); 14592 if (sax != NULL) { 14593 oldsax = ctxt->sax; 14594 ctxt->sax = sax; 14595 ctxt->userData = NULL; 14596 } 14597 xmlDetectSAX2(ctxt); 14598 14599 xmlParseDocument(ctxt); 14600 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14601 else { 14602 ret = NULL; 14603 xmlFreeDoc(ctxt->myDoc); 14604 ctxt->myDoc = NULL; 14605 } 14606 if (sax != NULL) 14607 ctxt->sax = oldsax; 14608 xmlFreeParserCtxt(ctxt); 14609 14610 return(ret); 14611 } 14612 14613 /** 14614 * xmlParseDoc: 14615 * @cur: a pointer to an array of xmlChar 14616 * 14617 * parse an XML in-memory document and build a tree. 14618 * 14619 * Returns the resulting document tree 14620 */ 14621 14622 xmlDocPtr 14623 xmlParseDoc(const xmlChar *cur) { 14624 return(xmlSAXParseDoc(NULL, cur, 0)); 14625 } 14626 #endif /* LIBXML_SAX1_ENABLED */ 14627 14628 #ifdef LIBXML_LEGACY_ENABLED 14629 /************************************************************************ 14630 * * 14631 * Specific function to keep track of entities references * 14632 * and used by the XSLT debugger * 14633 * * 14634 ************************************************************************/ 14635 14636 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14637 14638 /** 14639 * xmlAddEntityReference: 14640 * @ent : A valid entity 14641 * @firstNode : A valid first node for children of entity 14642 * @lastNode : A valid last node of children entity 14643 * 14644 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14645 */ 14646 static void 14647 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14648 xmlNodePtr lastNode) 14649 { 14650 if (xmlEntityRefFunc != NULL) { 14651 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14652 } 14653 } 14654 14655 14656 /** 14657 * xmlSetEntityReferenceFunc: 14658 * @func: A valid function 14659 * 14660 * Set the function to call call back when a xml reference has been made 14661 */ 14662 void 14663 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14664 { 14665 xmlEntityRefFunc = func; 14666 } 14667 #endif /* LIBXML_LEGACY_ENABLED */ 14668 14669 /************************************************************************ 14670 * * 14671 * Miscellaneous * 14672 * * 14673 ************************************************************************/ 14674 14675 #ifdef LIBXML_XPATH_ENABLED 14676 #include <libxml/xpath.h> 14677 #endif 14678 14679 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14680 static int xmlParserInitialized = 0; 14681 14682 /** 14683 * xmlInitParser: 14684 * 14685 * Initialization function for the XML parser. 14686 * This is not reentrant. Call once before processing in case of 14687 * use in multithreaded programs. 14688 */ 14689 14690 void 14691 xmlInitParser(void) { 14692 if (xmlParserInitialized != 0) 14693 return; 14694 14695 #ifdef LIBXML_THREAD_ENABLED 14696 __xmlGlobalInitMutexLock(); 14697 if (xmlParserInitialized == 0) { 14698 #endif 14699 xmlInitThreads(); 14700 xmlInitGlobals(); 14701 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14702 (xmlGenericError == NULL)) 14703 initGenericErrorDefaultFunc(NULL); 14704 xmlInitMemory(); 14705 xmlInitializeDict(); 14706 xmlInitCharEncodingHandlers(); 14707 xmlDefaultSAXHandlerInit(); 14708 xmlRegisterDefaultInputCallbacks(); 14709 #ifdef LIBXML_OUTPUT_ENABLED 14710 xmlRegisterDefaultOutputCallbacks(); 14711 #endif /* LIBXML_OUTPUT_ENABLED */ 14712 #ifdef LIBXML_HTML_ENABLED 14713 htmlInitAutoClose(); 14714 htmlDefaultSAXHandlerInit(); 14715 #endif 14716 #ifdef LIBXML_XPATH_ENABLED 14717 xmlXPathInit(); 14718 #endif 14719 xmlParserInitialized = 1; 14720 #ifdef LIBXML_THREAD_ENABLED 14721 } 14722 __xmlGlobalInitMutexUnlock(); 14723 #endif 14724 } 14725 14726 /** 14727 * xmlCleanupParser: 14728 * 14729 * This function name is somewhat misleading. It does not clean up 14730 * parser state, it cleans up memory allocated by the library itself. 14731 * It is a cleanup function for the XML library. It tries to reclaim all 14732 * related global memory allocated for the library processing. 14733 * It doesn't deallocate any document related memory. One should 14734 * call xmlCleanupParser() only when the process has finished using 14735 * the library and all XML/HTML documents built with it. 14736 * See also xmlInitParser() which has the opposite function of preparing 14737 * the library for operations. 14738 * 14739 * WARNING: if your application is multithreaded or has plugin support 14740 * calling this may crash the application if another thread or 14741 * a plugin is still using libxml2. It's sometimes very hard to 14742 * guess if libxml2 is in use in the application, some libraries 14743 * or plugins may use it without notice. In case of doubt abstain 14744 * from calling this function or do it just before calling exit() 14745 * to avoid leak reports from valgrind ! 14746 */ 14747 14748 void 14749 xmlCleanupParser(void) { 14750 if (!xmlParserInitialized) 14751 return; 14752 14753 xmlCleanupCharEncodingHandlers(); 14754 #ifdef LIBXML_CATALOG_ENABLED 14755 xmlCatalogCleanup(); 14756 #endif 14757 xmlDictCleanup(); 14758 xmlCleanupInputCallbacks(); 14759 #ifdef LIBXML_OUTPUT_ENABLED 14760 xmlCleanupOutputCallbacks(); 14761 #endif 14762 #ifdef LIBXML_SCHEMAS_ENABLED 14763 xmlSchemaCleanupTypes(); 14764 xmlRelaxNGCleanupTypes(); 14765 #endif 14766 xmlCleanupGlobals(); 14767 xmlResetLastError(); 14768 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14769 xmlCleanupMemory(); 14770 xmlParserInitialized = 0; 14771 } 14772 14773 /************************************************************************ 14774 * * 14775 * New set (2.6.0) of simpler and more flexible APIs * 14776 * * 14777 ************************************************************************/ 14778 14779 /** 14780 * DICT_FREE: 14781 * @str: a string 14782 * 14783 * Free a string if it is not owned by the "dict" dictionnary in the 14784 * current scope 14785 */ 14786 #define DICT_FREE(str) \ 14787 if ((str) && ((!dict) || \ 14788 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14789 xmlFree((char *)(str)); 14790 14791 /** 14792 * xmlCtxtReset: 14793 * @ctxt: an XML parser context 14794 * 14795 * Reset a parser context 14796 */ 14797 void 14798 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14799 { 14800 xmlParserInputPtr input; 14801 xmlDictPtr dict; 14802 14803 if (ctxt == NULL) 14804 return; 14805 14806 dict = ctxt->dict; 14807 14808 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14809 xmlFreeInputStream(input); 14810 } 14811 ctxt->inputNr = 0; 14812 ctxt->input = NULL; 14813 14814 ctxt->spaceNr = 0; 14815 if (ctxt->spaceTab != NULL) { 14816 ctxt->spaceTab[0] = -1; 14817 ctxt->space = &ctxt->spaceTab[0]; 14818 } else { 14819 ctxt->space = NULL; 14820 } 14821 14822 14823 ctxt->nodeNr = 0; 14824 ctxt->node = NULL; 14825 14826 ctxt->nameNr = 0; 14827 ctxt->name = NULL; 14828 14829 DICT_FREE(ctxt->version); 14830 ctxt->version = NULL; 14831 DICT_FREE(ctxt->encoding); 14832 ctxt->encoding = NULL; 14833 DICT_FREE(ctxt->directory); 14834 ctxt->directory = NULL; 14835 DICT_FREE(ctxt->extSubURI); 14836 ctxt->extSubURI = NULL; 14837 DICT_FREE(ctxt->extSubSystem); 14838 ctxt->extSubSystem = NULL; 14839 if (ctxt->myDoc != NULL) 14840 xmlFreeDoc(ctxt->myDoc); 14841 ctxt->myDoc = NULL; 14842 14843 ctxt->standalone = -1; 14844 ctxt->hasExternalSubset = 0; 14845 ctxt->hasPErefs = 0; 14846 ctxt->html = 0; 14847 ctxt->external = 0; 14848 ctxt->instate = XML_PARSER_START; 14849 ctxt->token = 0; 14850 14851 ctxt->wellFormed = 1; 14852 ctxt->nsWellFormed = 1; 14853 ctxt->disableSAX = 0; 14854 ctxt->valid = 1; 14855 #if 0 14856 ctxt->vctxt.userData = ctxt; 14857 ctxt->vctxt.error = xmlParserValidityError; 14858 ctxt->vctxt.warning = xmlParserValidityWarning; 14859 #endif 14860 ctxt->record_info = 0; 14861 ctxt->nbChars = 0; 14862 ctxt->checkIndex = 0; 14863 ctxt->inSubset = 0; 14864 ctxt->errNo = XML_ERR_OK; 14865 ctxt->depth = 0; 14866 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14867 ctxt->catalogs = NULL; 14868 ctxt->nbentities = 0; 14869 ctxt->sizeentities = 0; 14870 ctxt->sizeentcopy = 0; 14871 xmlInitNodeInfoSeq(&ctxt->node_seq); 14872 14873 if (ctxt->attsDefault != NULL) { 14874 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14875 ctxt->attsDefault = NULL; 14876 } 14877 if (ctxt->attsSpecial != NULL) { 14878 xmlHashFree(ctxt->attsSpecial, NULL); 14879 ctxt->attsSpecial = NULL; 14880 } 14881 14882 #ifdef LIBXML_CATALOG_ENABLED 14883 if (ctxt->catalogs != NULL) 14884 xmlCatalogFreeLocal(ctxt->catalogs); 14885 #endif 14886 if (ctxt->lastError.code != XML_ERR_OK) 14887 xmlResetError(&ctxt->lastError); 14888 } 14889 14890 /** 14891 * xmlCtxtResetPush: 14892 * @ctxt: an XML parser context 14893 * @chunk: a pointer to an array of chars 14894 * @size: number of chars in the array 14895 * @filename: an optional file name or URI 14896 * @encoding: the document encoding, or NULL 14897 * 14898 * Reset a push parser context 14899 * 14900 * Returns 0 in case of success and 1 in case of error 14901 */ 14902 int 14903 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14904 int size, const char *filename, const char *encoding) 14905 { 14906 xmlParserInputPtr inputStream; 14907 xmlParserInputBufferPtr buf; 14908 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14909 14910 if (ctxt == NULL) 14911 return(1); 14912 14913 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14914 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14915 14916 buf = xmlAllocParserInputBuffer(enc); 14917 if (buf == NULL) 14918 return(1); 14919 14920 if (ctxt == NULL) { 14921 xmlFreeParserInputBuffer(buf); 14922 return(1); 14923 } 14924 14925 xmlCtxtReset(ctxt); 14926 14927 if (ctxt->pushTab == NULL) { 14928 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14929 sizeof(xmlChar *)); 14930 if (ctxt->pushTab == NULL) { 14931 xmlErrMemory(ctxt, NULL); 14932 xmlFreeParserInputBuffer(buf); 14933 return(1); 14934 } 14935 } 14936 14937 if (filename == NULL) { 14938 ctxt->directory = NULL; 14939 } else { 14940 ctxt->directory = xmlParserGetDirectory(filename); 14941 } 14942 14943 inputStream = xmlNewInputStream(ctxt); 14944 if (inputStream == NULL) { 14945 xmlFreeParserInputBuffer(buf); 14946 return(1); 14947 } 14948 14949 if (filename == NULL) 14950 inputStream->filename = NULL; 14951 else 14952 inputStream->filename = (char *) 14953 xmlCanonicPath((const xmlChar *) filename); 14954 inputStream->buf = buf; 14955 xmlBufResetInput(buf->buffer, inputStream); 14956 14957 inputPush(ctxt, inputStream); 14958 14959 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14960 (ctxt->input->buf != NULL)) { 14961 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14962 size_t cur = ctxt->input->cur - ctxt->input->base; 14963 14964 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14965 14966 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14967 #ifdef DEBUG_PUSH 14968 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14969 #endif 14970 } 14971 14972 if (encoding != NULL) { 14973 xmlCharEncodingHandlerPtr hdlr; 14974 14975 if (ctxt->encoding != NULL) 14976 xmlFree((xmlChar *) ctxt->encoding); 14977 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14978 14979 hdlr = xmlFindCharEncodingHandler(encoding); 14980 if (hdlr != NULL) { 14981 xmlSwitchToEncoding(ctxt, hdlr); 14982 } else { 14983 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14984 "Unsupported encoding %s\n", BAD_CAST encoding); 14985 } 14986 } else if (enc != XML_CHAR_ENCODING_NONE) { 14987 xmlSwitchEncoding(ctxt, enc); 14988 } 14989 14990 return(0); 14991 } 14992 14993 14994 /** 14995 * xmlCtxtUseOptionsInternal: 14996 * @ctxt: an XML parser context 14997 * @options: a combination of xmlParserOption 14998 * @encoding: the user provided encoding to use 14999 * 15000 * Applies the options to the parser context 15001 * 15002 * Returns 0 in case of success, the set of unknown or unimplemented options 15003 * in case of error. 15004 */ 15005 static int 15006 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15007 { 15008 if (ctxt == NULL) 15009 return(-1); 15010 if (encoding != NULL) { 15011 if (ctxt->encoding != NULL) 15012 xmlFree((xmlChar *) ctxt->encoding); 15013 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15014 } 15015 if (options & XML_PARSE_RECOVER) { 15016 ctxt->recovery = 1; 15017 options -= XML_PARSE_RECOVER; 15018 ctxt->options |= XML_PARSE_RECOVER; 15019 } else 15020 ctxt->recovery = 0; 15021 if (options & XML_PARSE_DTDLOAD) { 15022 ctxt->loadsubset = XML_DETECT_IDS; 15023 options -= XML_PARSE_DTDLOAD; 15024 ctxt->options |= XML_PARSE_DTDLOAD; 15025 } else 15026 ctxt->loadsubset = 0; 15027 if (options & XML_PARSE_DTDATTR) { 15028 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15029 options -= XML_PARSE_DTDATTR; 15030 ctxt->options |= XML_PARSE_DTDATTR; 15031 } 15032 if (options & XML_PARSE_NOENT) { 15033 ctxt->replaceEntities = 1; 15034 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15035 options -= XML_PARSE_NOENT; 15036 ctxt->options |= XML_PARSE_NOENT; 15037 } else 15038 ctxt->replaceEntities = 0; 15039 if (options & XML_PARSE_PEDANTIC) { 15040 ctxt->pedantic = 1; 15041 options -= XML_PARSE_PEDANTIC; 15042 ctxt->options |= XML_PARSE_PEDANTIC; 15043 } else 15044 ctxt->pedantic = 0; 15045 if (options & XML_PARSE_NOBLANKS) { 15046 ctxt->keepBlanks = 0; 15047 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15048 options -= XML_PARSE_NOBLANKS; 15049 ctxt->options |= XML_PARSE_NOBLANKS; 15050 } else 15051 ctxt->keepBlanks = 1; 15052 if (options & XML_PARSE_DTDVALID) { 15053 ctxt->validate = 1; 15054 if (options & XML_PARSE_NOWARNING) 15055 ctxt->vctxt.warning = NULL; 15056 if (options & XML_PARSE_NOERROR) 15057 ctxt->vctxt.error = NULL; 15058 options -= XML_PARSE_DTDVALID; 15059 ctxt->options |= XML_PARSE_DTDVALID; 15060 } else 15061 ctxt->validate = 0; 15062 if (options & XML_PARSE_NOWARNING) { 15063 ctxt->sax->warning = NULL; 15064 options -= XML_PARSE_NOWARNING; 15065 } 15066 if (options & XML_PARSE_NOERROR) { 15067 ctxt->sax->error = NULL; 15068 ctxt->sax->fatalError = NULL; 15069 options -= XML_PARSE_NOERROR; 15070 } 15071 #ifdef LIBXML_SAX1_ENABLED 15072 if (options & XML_PARSE_SAX1) { 15073 ctxt->sax->startElement = xmlSAX2StartElement; 15074 ctxt->sax->endElement = xmlSAX2EndElement; 15075 ctxt->sax->startElementNs = NULL; 15076 ctxt->sax->endElementNs = NULL; 15077 ctxt->sax->initialized = 1; 15078 options -= XML_PARSE_SAX1; 15079 ctxt->options |= XML_PARSE_SAX1; 15080 } 15081 #endif /* LIBXML_SAX1_ENABLED */ 15082 if (options & XML_PARSE_NODICT) { 15083 ctxt->dictNames = 0; 15084 options -= XML_PARSE_NODICT; 15085 ctxt->options |= XML_PARSE_NODICT; 15086 } else { 15087 ctxt->dictNames = 1; 15088 } 15089 if (options & XML_PARSE_NOCDATA) { 15090 ctxt->sax->cdataBlock = NULL; 15091 options -= XML_PARSE_NOCDATA; 15092 ctxt->options |= XML_PARSE_NOCDATA; 15093 } 15094 if (options & XML_PARSE_NSCLEAN) { 15095 ctxt->options |= XML_PARSE_NSCLEAN; 15096 options -= XML_PARSE_NSCLEAN; 15097 } 15098 if (options & XML_PARSE_NONET) { 15099 ctxt->options |= XML_PARSE_NONET; 15100 options -= XML_PARSE_NONET; 15101 } 15102 if (options & XML_PARSE_COMPACT) { 15103 ctxt->options |= XML_PARSE_COMPACT; 15104 options -= XML_PARSE_COMPACT; 15105 } 15106 if (options & XML_PARSE_OLD10) { 15107 ctxt->options |= XML_PARSE_OLD10; 15108 options -= XML_PARSE_OLD10; 15109 } 15110 if (options & XML_PARSE_NOBASEFIX) { 15111 ctxt->options |= XML_PARSE_NOBASEFIX; 15112 options -= XML_PARSE_NOBASEFIX; 15113 } 15114 if (options & XML_PARSE_HUGE) { 15115 ctxt->options |= XML_PARSE_HUGE; 15116 options -= XML_PARSE_HUGE; 15117 if (ctxt->dict != NULL) 15118 xmlDictSetLimit(ctxt->dict, 0); 15119 } 15120 if (options & XML_PARSE_OLDSAX) { 15121 ctxt->options |= XML_PARSE_OLDSAX; 15122 options -= XML_PARSE_OLDSAX; 15123 } 15124 if (options & XML_PARSE_IGNORE_ENC) { 15125 ctxt->options |= XML_PARSE_IGNORE_ENC; 15126 options -= XML_PARSE_IGNORE_ENC; 15127 } 15128 if (options & XML_PARSE_BIG_LINES) { 15129 ctxt->options |= XML_PARSE_BIG_LINES; 15130 options -= XML_PARSE_BIG_LINES; 15131 } 15132 ctxt->linenumbers = 1; 15133 return (options); 15134 } 15135 15136 /** 15137 * xmlCtxtUseOptions: 15138 * @ctxt: an XML parser context 15139 * @options: a combination of xmlParserOption 15140 * 15141 * Applies the options to the parser context 15142 * 15143 * Returns 0 in case of success, the set of unknown or unimplemented options 15144 * in case of error. 15145 */ 15146 int 15147 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15148 { 15149 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15150 } 15151 15152 /** 15153 * xmlDoRead: 15154 * @ctxt: an XML parser context 15155 * @URL: the base URL to use for the document 15156 * @encoding: the document encoding, or NULL 15157 * @options: a combination of xmlParserOption 15158 * @reuse: keep the context for reuse 15159 * 15160 * Common front-end for the xmlRead functions 15161 * 15162 * Returns the resulting document tree or NULL 15163 */ 15164 static xmlDocPtr 15165 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15166 int options, int reuse) 15167 { 15168 xmlDocPtr ret; 15169 15170 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15171 if (encoding != NULL) { 15172 xmlCharEncodingHandlerPtr hdlr; 15173 15174 hdlr = xmlFindCharEncodingHandler(encoding); 15175 if (hdlr != NULL) 15176 xmlSwitchToEncoding(ctxt, hdlr); 15177 } 15178 if ((URL != NULL) && (ctxt->input != NULL) && 15179 (ctxt->input->filename == NULL)) 15180 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15181 xmlParseDocument(ctxt); 15182 if ((ctxt->wellFormed) || ctxt->recovery) 15183 ret = ctxt->myDoc; 15184 else { 15185 ret = NULL; 15186 if (ctxt->myDoc != NULL) { 15187 xmlFreeDoc(ctxt->myDoc); 15188 } 15189 } 15190 ctxt->myDoc = NULL; 15191 if (!reuse) { 15192 xmlFreeParserCtxt(ctxt); 15193 } 15194 15195 return (ret); 15196 } 15197 15198 /** 15199 * xmlReadDoc: 15200 * @cur: a pointer to a zero terminated string 15201 * @URL: the base URL to use for the document 15202 * @encoding: the document encoding, or NULL 15203 * @options: a combination of xmlParserOption 15204 * 15205 * parse an XML in-memory document and build a tree. 15206 * 15207 * Returns the resulting document tree 15208 */ 15209 xmlDocPtr 15210 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15211 { 15212 xmlParserCtxtPtr ctxt; 15213 15214 if (cur == NULL) 15215 return (NULL); 15216 15217 ctxt = xmlCreateDocParserCtxt(cur); 15218 if (ctxt == NULL) 15219 return (NULL); 15220 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15221 } 15222 15223 /** 15224 * xmlReadFile: 15225 * @filename: a file or URL 15226 * @encoding: the document encoding, or NULL 15227 * @options: a combination of xmlParserOption 15228 * 15229 * parse an XML file from the filesystem or the network. 15230 * 15231 * Returns the resulting document tree 15232 */ 15233 xmlDocPtr 15234 xmlReadFile(const char *filename, const char *encoding, int options) 15235 { 15236 xmlParserCtxtPtr ctxt; 15237 15238 ctxt = xmlCreateURLParserCtxt(filename, options); 15239 if (ctxt == NULL) 15240 return (NULL); 15241 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15242 } 15243 15244 /** 15245 * xmlReadMemory: 15246 * @buffer: a pointer to a char array 15247 * @size: the size of the array 15248 * @URL: the base URL to use for the document 15249 * @encoding: the document encoding, or NULL 15250 * @options: a combination of xmlParserOption 15251 * 15252 * parse an XML in-memory document and build a tree. 15253 * 15254 * Returns the resulting document tree 15255 */ 15256 xmlDocPtr 15257 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15258 { 15259 xmlParserCtxtPtr ctxt; 15260 15261 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15262 if (ctxt == NULL) 15263 return (NULL); 15264 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15265 } 15266 15267 /** 15268 * xmlReadFd: 15269 * @fd: an open file descriptor 15270 * @URL: the base URL to use for the document 15271 * @encoding: the document encoding, or NULL 15272 * @options: a combination of xmlParserOption 15273 * 15274 * parse an XML from a file descriptor and build a tree. 15275 * NOTE that the file descriptor will not be closed when the 15276 * reader is closed or reset. 15277 * 15278 * Returns the resulting document tree 15279 */ 15280 xmlDocPtr 15281 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15282 { 15283 xmlParserCtxtPtr ctxt; 15284 xmlParserInputBufferPtr input; 15285 xmlParserInputPtr stream; 15286 15287 if (fd < 0) 15288 return (NULL); 15289 15290 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15291 if (input == NULL) 15292 return (NULL); 15293 input->closecallback = NULL; 15294 ctxt = xmlNewParserCtxt(); 15295 if (ctxt == NULL) { 15296 xmlFreeParserInputBuffer(input); 15297 return (NULL); 15298 } 15299 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15300 if (stream == NULL) { 15301 xmlFreeParserInputBuffer(input); 15302 xmlFreeParserCtxt(ctxt); 15303 return (NULL); 15304 } 15305 inputPush(ctxt, stream); 15306 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15307 } 15308 15309 /** 15310 * xmlReadIO: 15311 * @ioread: an I/O read function 15312 * @ioclose: an I/O close function 15313 * @ioctx: an I/O handler 15314 * @URL: the base URL to use for the document 15315 * @encoding: the document encoding, or NULL 15316 * @options: a combination of xmlParserOption 15317 * 15318 * parse an XML document from I/O functions and source and build a tree. 15319 * 15320 * Returns the resulting document tree 15321 */ 15322 xmlDocPtr 15323 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15324 void *ioctx, const char *URL, const char *encoding, int options) 15325 { 15326 xmlParserCtxtPtr ctxt; 15327 xmlParserInputBufferPtr input; 15328 xmlParserInputPtr stream; 15329 15330 if (ioread == NULL) 15331 return (NULL); 15332 15333 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15334 XML_CHAR_ENCODING_NONE); 15335 if (input == NULL) { 15336 if (ioclose != NULL) 15337 ioclose(ioctx); 15338 return (NULL); 15339 } 15340 ctxt = xmlNewParserCtxt(); 15341 if (ctxt == NULL) { 15342 xmlFreeParserInputBuffer(input); 15343 return (NULL); 15344 } 15345 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15346 if (stream == NULL) { 15347 xmlFreeParserInputBuffer(input); 15348 xmlFreeParserCtxt(ctxt); 15349 return (NULL); 15350 } 15351 inputPush(ctxt, stream); 15352 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15353 } 15354 15355 /** 15356 * xmlCtxtReadDoc: 15357 * @ctxt: an XML parser context 15358 * @cur: a pointer to a zero terminated string 15359 * @URL: the base URL to use for the document 15360 * @encoding: the document encoding, or NULL 15361 * @options: a combination of xmlParserOption 15362 * 15363 * parse an XML in-memory document and build a tree. 15364 * This reuses the existing @ctxt parser context 15365 * 15366 * Returns the resulting document tree 15367 */ 15368 xmlDocPtr 15369 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15370 const char *URL, const char *encoding, int options) 15371 { 15372 xmlParserInputPtr stream; 15373 15374 if (cur == NULL) 15375 return (NULL); 15376 if (ctxt == NULL) 15377 return (NULL); 15378 15379 xmlCtxtReset(ctxt); 15380 15381 stream = xmlNewStringInputStream(ctxt, cur); 15382 if (stream == NULL) { 15383 return (NULL); 15384 } 15385 inputPush(ctxt, stream); 15386 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15387 } 15388 15389 /** 15390 * xmlCtxtReadFile: 15391 * @ctxt: an XML parser context 15392 * @filename: a file or URL 15393 * @encoding: the document encoding, or NULL 15394 * @options: a combination of xmlParserOption 15395 * 15396 * parse an XML file from the filesystem or the network. 15397 * This reuses the existing @ctxt parser context 15398 * 15399 * Returns the resulting document tree 15400 */ 15401 xmlDocPtr 15402 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15403 const char *encoding, int options) 15404 { 15405 xmlParserInputPtr stream; 15406 15407 if (filename == NULL) 15408 return (NULL); 15409 if (ctxt == NULL) 15410 return (NULL); 15411 15412 xmlCtxtReset(ctxt); 15413 15414 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15415 if (stream == NULL) { 15416 return (NULL); 15417 } 15418 inputPush(ctxt, stream); 15419 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15420 } 15421 15422 /** 15423 * xmlCtxtReadMemory: 15424 * @ctxt: an XML parser context 15425 * @buffer: a pointer to a char array 15426 * @size: the size of the array 15427 * @URL: the base URL to use for the document 15428 * @encoding: the document encoding, or NULL 15429 * @options: a combination of xmlParserOption 15430 * 15431 * parse an XML in-memory document and build a tree. 15432 * This reuses the existing @ctxt parser context 15433 * 15434 * Returns the resulting document tree 15435 */ 15436 xmlDocPtr 15437 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15438 const char *URL, const char *encoding, int options) 15439 { 15440 xmlParserInputBufferPtr input; 15441 xmlParserInputPtr stream; 15442 15443 if (ctxt == NULL) 15444 return (NULL); 15445 if (buffer == NULL) 15446 return (NULL); 15447 15448 xmlCtxtReset(ctxt); 15449 15450 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15451 if (input == NULL) { 15452 return(NULL); 15453 } 15454 15455 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15456 if (stream == NULL) { 15457 xmlFreeParserInputBuffer(input); 15458 return(NULL); 15459 } 15460 15461 inputPush(ctxt, stream); 15462 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15463 } 15464 15465 /** 15466 * xmlCtxtReadFd: 15467 * @ctxt: an XML parser context 15468 * @fd: an open file descriptor 15469 * @URL: the base URL to use for the document 15470 * @encoding: the document encoding, or NULL 15471 * @options: a combination of xmlParserOption 15472 * 15473 * parse an XML from a file descriptor and build a tree. 15474 * This reuses the existing @ctxt parser context 15475 * NOTE that the file descriptor will not be closed when the 15476 * reader is closed or reset. 15477 * 15478 * Returns the resulting document tree 15479 */ 15480 xmlDocPtr 15481 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15482 const char *URL, const char *encoding, int options) 15483 { 15484 xmlParserInputBufferPtr input; 15485 xmlParserInputPtr stream; 15486 15487 if (fd < 0) 15488 return (NULL); 15489 if (ctxt == NULL) 15490 return (NULL); 15491 15492 xmlCtxtReset(ctxt); 15493 15494 15495 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15496 if (input == NULL) 15497 return (NULL); 15498 input->closecallback = NULL; 15499 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15500 if (stream == NULL) { 15501 xmlFreeParserInputBuffer(input); 15502 return (NULL); 15503 } 15504 inputPush(ctxt, stream); 15505 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15506 } 15507 15508 /** 15509 * xmlCtxtReadIO: 15510 * @ctxt: an XML parser context 15511 * @ioread: an I/O read function 15512 * @ioclose: an I/O close function 15513 * @ioctx: an I/O handler 15514 * @URL: the base URL to use for the document 15515 * @encoding: the document encoding, or NULL 15516 * @options: a combination of xmlParserOption 15517 * 15518 * parse an XML document from I/O functions and source and build a tree. 15519 * This reuses the existing @ctxt parser context 15520 * 15521 * Returns the resulting document tree 15522 */ 15523 xmlDocPtr 15524 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15525 xmlInputCloseCallback ioclose, void *ioctx, 15526 const char *URL, 15527 const char *encoding, int options) 15528 { 15529 xmlParserInputBufferPtr input; 15530 xmlParserInputPtr stream; 15531 15532 if (ioread == NULL) 15533 return (NULL); 15534 if (ctxt == NULL) 15535 return (NULL); 15536 15537 xmlCtxtReset(ctxt); 15538 15539 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15540 XML_CHAR_ENCODING_NONE); 15541 if (input == NULL) { 15542 if (ioclose != NULL) 15543 ioclose(ioctx); 15544 return (NULL); 15545 } 15546 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15547 if (stream == NULL) { 15548 xmlFreeParserInputBuffer(input); 15549 return (NULL); 15550 } 15551 inputPush(ctxt, stream); 15552 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15553 } 15554 15555 #define bottom_parser 15556 #include "elfgcchack.h" 15557