1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel (at) veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <string.h> 44 #include <stdarg.h> 45 #include <libxml/xmlmemory.h> 46 #include <libxml/threads.h> 47 #include <libxml/globals.h> 48 #include <libxml/tree.h> 49 #include <libxml/parser.h> 50 #include <libxml/parserInternals.h> 51 #include <libxml/valid.h> 52 #include <libxml/entities.h> 53 #include <libxml/xmlerror.h> 54 #include <libxml/encoding.h> 55 #include <libxml/xmlIO.h> 56 #include <libxml/uri.h> 57 #ifdef LIBXML_CATALOG_ENABLED 58 #include <libxml/catalog.h> 59 #endif 60 #ifdef LIBXML_SCHEMAS_ENABLED 61 #include <libxml/xmlschemastypes.h> 62 #include <libxml/relaxng.h> 63 #endif 64 #ifdef HAVE_CTYPE_H 65 #include <ctype.h> 66 #endif 67 #ifdef HAVE_STDLIB_H 68 #include <stdlib.h> 69 #endif 70 #ifdef HAVE_SYS_STAT_H 71 #include <sys/stat.h> 72 #endif 73 #ifdef HAVE_FCNTL_H 74 #include <fcntl.h> 75 #endif 76 #ifdef HAVE_UNISTD_H 77 #include <unistd.h> 78 #endif 79 #ifdef HAVE_ZLIB_H 80 #include <zlib.h> 81 #endif 82 #ifdef HAVE_LZMA_H 83 #include <lzma.h> 84 #endif 85 86 static void 87 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 88 89 static xmlParserCtxtPtr 90 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 91 const xmlChar *base, xmlParserCtxtPtr pctx); 92 93 /************************************************************************ 94 * * 95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 96 * * 97 ************************************************************************/ 98 99 #define XML_PARSER_BIG_ENTITY 1000 100 #define XML_PARSER_LOT_ENTITY 5000 101 102 /* 103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 104 * replacement over the size in byte of the input indicates that you have 105 * and eponential behaviour. A value of 10 correspond to at least 3 entity 106 * replacement per byte of input. 107 */ 108 #define XML_PARSER_NON_LINEAR 10 109 110 /* 111 * xmlParserEntityCheck 112 * 113 * Function to check non-linear entity expansion behaviour 114 * This is here to detect and stop exponential linear entity expansion 115 * This is not a limitation of the parser but a safety 116 * boundary feature. It can be disabled with the XML_PARSE_HUGE 117 * parser option. 118 */ 119 static int 120 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 121 xmlEntityPtr ent) 122 { 123 unsigned long consumed = 0; 124 125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 126 return (0); 127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 128 return (1); 129 if (size != 0) { 130 /* 131 * Do the check based on the replacement size of the entity 132 */ 133 if (size < XML_PARSER_BIG_ENTITY) 134 return(0); 135 136 /* 137 * A limit on the amount of text data reasonably used 138 */ 139 if (ctxt->input != NULL) { 140 consumed = ctxt->input->consumed + 141 (ctxt->input->cur - ctxt->input->base); 142 } 143 consumed += ctxt->sizeentities; 144 145 if ((size < XML_PARSER_NON_LINEAR * consumed) && 146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 147 return (0); 148 } else if (ent != NULL) { 149 /* 150 * use the number of parsed entities in the replacement 151 */ 152 size = ent->checked; 153 154 /* 155 * The amount of data parsed counting entities size only once 156 */ 157 if (ctxt->input != NULL) { 158 consumed = ctxt->input->consumed + 159 (ctxt->input->cur - ctxt->input->base); 160 } 161 consumed += ctxt->sizeentities; 162 163 /* 164 * Check the density of entities for the amount of data 165 * knowing an entity reference will take at least 3 bytes 166 */ 167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 168 return (0); 169 } else { 170 /* 171 * strange we got no data for checking just return 172 */ 173 return (0); 174 } 175 176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 177 return (1); 178 } 179 180 /** 181 * xmlParserMaxDepth: 182 * 183 * arbitrary depth limit for the XML documents that we allow to 184 * process. This is not a limitation of the parser but a safety 185 * boundary feature. It can be disabled with the XML_PARSE_HUGE 186 * parser option. 187 */ 188 unsigned int xmlParserMaxDepth = 256; 189 190 191 192 #define SAX2 1 193 #define XML_PARSER_BIG_BUFFER_SIZE 300 194 #define XML_PARSER_BUFFER_SIZE 100 195 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 196 197 /* 198 * List of XML prefixed PI allowed by W3C specs 199 */ 200 201 static const char *xmlW3CPIs[] = { 202 "xml-stylesheet", 203 "xml-model", 204 NULL 205 }; 206 207 208 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 209 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 210 const xmlChar **str); 211 212 static xmlParserErrors 213 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 214 xmlSAXHandlerPtr sax, 215 void *user_data, int depth, const xmlChar *URL, 216 const xmlChar *ID, xmlNodePtr *list); 217 218 static int 219 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 220 const char *encoding); 221 #ifdef LIBXML_LEGACY_ENABLED 222 static void 223 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 224 xmlNodePtr lastNode); 225 #endif /* LIBXML_LEGACY_ENABLED */ 226 227 static xmlParserErrors 228 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 229 const xmlChar *string, void *user_data, xmlNodePtr *lst); 230 231 static int 232 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 233 234 /************************************************************************ 235 * * 236 * Some factorized error routines * 237 * * 238 ************************************************************************/ 239 240 /** 241 * xmlErrAttributeDup: 242 * @ctxt: an XML parser context 243 * @prefix: the attribute prefix 244 * @localname: the attribute localname 245 * 246 * Handle a redefinition of attribute error 247 */ 248 static void 249 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 250 const xmlChar * localname) 251 { 252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 253 (ctxt->instate == XML_PARSER_EOF)) 254 return; 255 if (ctxt != NULL) 256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 257 258 if (prefix == NULL) 259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 261 (const char *) localname, NULL, NULL, 0, 0, 262 "Attribute %s redefined\n", localname); 263 else 264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 266 (const char *) prefix, (const char *) localname, 267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 268 localname); 269 if (ctxt != NULL) { 270 ctxt->wellFormed = 0; 271 if (ctxt->recovery == 0) 272 ctxt->disableSAX = 1; 273 } 274 } 275 276 /** 277 * xmlFatalErr: 278 * @ctxt: an XML parser context 279 * @error: the error number 280 * @extra: extra information string 281 * 282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 283 */ 284 static void 285 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 286 { 287 const char *errmsg; 288 289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 290 (ctxt->instate == XML_PARSER_EOF)) 291 return; 292 switch (error) { 293 case XML_ERR_INVALID_HEX_CHARREF: 294 errmsg = "CharRef: invalid hexadecimal value\n"; 295 break; 296 case XML_ERR_INVALID_DEC_CHARREF: 297 errmsg = "CharRef: invalid decimal value\n"; 298 break; 299 case XML_ERR_INVALID_CHARREF: 300 errmsg = "CharRef: invalid value\n"; 301 break; 302 case XML_ERR_INTERNAL_ERROR: 303 errmsg = "internal error"; 304 break; 305 case XML_ERR_PEREF_AT_EOF: 306 errmsg = "PEReference at end of document\n"; 307 break; 308 case XML_ERR_PEREF_IN_PROLOG: 309 errmsg = "PEReference in prolog\n"; 310 break; 311 case XML_ERR_PEREF_IN_EPILOG: 312 errmsg = "PEReference in epilog\n"; 313 break; 314 case XML_ERR_PEREF_NO_NAME: 315 errmsg = "PEReference: no name\n"; 316 break; 317 case XML_ERR_PEREF_SEMICOL_MISSING: 318 errmsg = "PEReference: expecting ';'\n"; 319 break; 320 case XML_ERR_ENTITY_LOOP: 321 errmsg = "Detected an entity reference loop\n"; 322 break; 323 case XML_ERR_ENTITY_NOT_STARTED: 324 errmsg = "EntityValue: \" or ' expected\n"; 325 break; 326 case XML_ERR_ENTITY_PE_INTERNAL: 327 errmsg = "PEReferences forbidden in internal subset\n"; 328 break; 329 case XML_ERR_ENTITY_NOT_FINISHED: 330 errmsg = "EntityValue: \" or ' expected\n"; 331 break; 332 case XML_ERR_ATTRIBUTE_NOT_STARTED: 333 errmsg = "AttValue: \" or ' expected\n"; 334 break; 335 case XML_ERR_LT_IN_ATTRIBUTE: 336 errmsg = "Unescaped '<' not allowed in attributes values\n"; 337 break; 338 case XML_ERR_LITERAL_NOT_STARTED: 339 errmsg = "SystemLiteral \" or ' expected\n"; 340 break; 341 case XML_ERR_LITERAL_NOT_FINISHED: 342 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 343 break; 344 case XML_ERR_MISPLACED_CDATA_END: 345 errmsg = "Sequence ']]>' not allowed in content\n"; 346 break; 347 case XML_ERR_URI_REQUIRED: 348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 349 break; 350 case XML_ERR_PUBID_REQUIRED: 351 errmsg = "PUBLIC, the Public Identifier is missing\n"; 352 break; 353 case XML_ERR_HYPHEN_IN_COMMENT: 354 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 355 break; 356 case XML_ERR_PI_NOT_STARTED: 357 errmsg = "xmlParsePI : no target name\n"; 358 break; 359 case XML_ERR_RESERVED_XML_NAME: 360 errmsg = "Invalid PI name\n"; 361 break; 362 case XML_ERR_NOTATION_NOT_STARTED: 363 errmsg = "NOTATION: Name expected here\n"; 364 break; 365 case XML_ERR_NOTATION_NOT_FINISHED: 366 errmsg = "'>' required to close NOTATION declaration\n"; 367 break; 368 case XML_ERR_VALUE_REQUIRED: 369 errmsg = "Entity value required\n"; 370 break; 371 case XML_ERR_URI_FRAGMENT: 372 errmsg = "Fragment not allowed"; 373 break; 374 case XML_ERR_ATTLIST_NOT_STARTED: 375 errmsg = "'(' required to start ATTLIST enumeration\n"; 376 break; 377 case XML_ERR_NMTOKEN_REQUIRED: 378 errmsg = "NmToken expected in ATTLIST enumeration\n"; 379 break; 380 case XML_ERR_ATTLIST_NOT_FINISHED: 381 errmsg = "')' required to finish ATTLIST enumeration\n"; 382 break; 383 case XML_ERR_MIXED_NOT_STARTED: 384 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 385 break; 386 case XML_ERR_PCDATA_REQUIRED: 387 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 388 break; 389 case XML_ERR_ELEMCONTENT_NOT_STARTED: 390 errmsg = "ContentDecl : Name or '(' expected\n"; 391 break; 392 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 393 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 394 break; 395 case XML_ERR_PEREF_IN_INT_SUBSET: 396 errmsg = 397 "PEReference: forbidden within markup decl in internal subset\n"; 398 break; 399 case XML_ERR_GT_REQUIRED: 400 errmsg = "expected '>'\n"; 401 break; 402 case XML_ERR_CONDSEC_INVALID: 403 errmsg = "XML conditional section '[' expected\n"; 404 break; 405 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 406 errmsg = "Content error in the external subset\n"; 407 break; 408 case XML_ERR_CONDSEC_INVALID_KEYWORD: 409 errmsg = 410 "conditional section INCLUDE or IGNORE keyword expected\n"; 411 break; 412 case XML_ERR_CONDSEC_NOT_FINISHED: 413 errmsg = "XML conditional section not closed\n"; 414 break; 415 case XML_ERR_XMLDECL_NOT_STARTED: 416 errmsg = "Text declaration '<?xml' required\n"; 417 break; 418 case XML_ERR_XMLDECL_NOT_FINISHED: 419 errmsg = "parsing XML declaration: '?>' expected\n"; 420 break; 421 case XML_ERR_EXT_ENTITY_STANDALONE: 422 errmsg = "external parsed entities cannot be standalone\n"; 423 break; 424 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 425 errmsg = "EntityRef: expecting ';'\n"; 426 break; 427 case XML_ERR_DOCTYPE_NOT_FINISHED: 428 errmsg = "DOCTYPE improperly terminated\n"; 429 break; 430 case XML_ERR_LTSLASH_REQUIRED: 431 errmsg = "EndTag: '</' not found\n"; 432 break; 433 case XML_ERR_EQUAL_REQUIRED: 434 errmsg = "expected '='\n"; 435 break; 436 case XML_ERR_STRING_NOT_CLOSED: 437 errmsg = "String not closed expecting \" or '\n"; 438 break; 439 case XML_ERR_STRING_NOT_STARTED: 440 errmsg = "String not started expecting ' or \"\n"; 441 break; 442 case XML_ERR_ENCODING_NAME: 443 errmsg = "Invalid XML encoding name\n"; 444 break; 445 case XML_ERR_STANDALONE_VALUE: 446 errmsg = "standalone accepts only 'yes' or 'no'\n"; 447 break; 448 case XML_ERR_DOCUMENT_EMPTY: 449 errmsg = "Document is empty\n"; 450 break; 451 case XML_ERR_DOCUMENT_END: 452 errmsg = "Extra content at the end of the document\n"; 453 break; 454 case XML_ERR_NOT_WELL_BALANCED: 455 errmsg = "chunk is not well balanced\n"; 456 break; 457 case XML_ERR_EXTRA_CONTENT: 458 errmsg = "extra content at the end of well balanced chunk\n"; 459 break; 460 case XML_ERR_VERSION_MISSING: 461 errmsg = "Malformed declaration expecting version\n"; 462 break; 463 #if 0 464 case: 465 errmsg = "\n"; 466 break; 467 #endif 468 default: 469 errmsg = "Unregistered error message\n"; 470 } 471 if (ctxt != NULL) 472 ctxt->errNo = error; 473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 475 info); 476 if (ctxt != NULL) { 477 ctxt->wellFormed = 0; 478 if (ctxt->recovery == 0) 479 ctxt->disableSAX = 1; 480 } 481 } 482 483 /** 484 * xmlFatalErrMsg: 485 * @ctxt: an XML parser context 486 * @error: the error number 487 * @msg: the error message 488 * 489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 490 */ 491 static void 492 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 493 const char *msg) 494 { 495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 496 (ctxt->instate == XML_PARSER_EOF)) 497 return; 498 if (ctxt != NULL) 499 ctxt->errNo = error; 500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 502 if (ctxt != NULL) { 503 ctxt->wellFormed = 0; 504 if (ctxt->recovery == 0) 505 ctxt->disableSAX = 1; 506 } 507 } 508 509 /** 510 * xmlWarningMsg: 511 * @ctxt: an XML parser context 512 * @error: the error number 513 * @msg: the error message 514 * @str1: extra data 515 * @str2: extra data 516 * 517 * Handle a warning. 518 */ 519 static void 520 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 521 const char *msg, const xmlChar *str1, const xmlChar *str2) 522 { 523 xmlStructuredErrorFunc schannel = NULL; 524 525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 526 (ctxt->instate == XML_PARSER_EOF)) 527 return; 528 if ((ctxt != NULL) && (ctxt->sax != NULL) && 529 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 530 schannel = ctxt->sax->serror; 531 if (ctxt != NULL) { 532 __xmlRaiseError(schannel, 533 (ctxt->sax) ? ctxt->sax->warning : NULL, 534 ctxt->userData, 535 ctxt, NULL, XML_FROM_PARSER, error, 536 XML_ERR_WARNING, NULL, 0, 537 (const char *) str1, (const char *) str2, NULL, 0, 0, 538 msg, (const char *) str1, (const char *) str2); 539 } else { 540 __xmlRaiseError(schannel, NULL, NULL, 541 ctxt, NULL, XML_FROM_PARSER, error, 542 XML_ERR_WARNING, NULL, 0, 543 (const char *) str1, (const char *) str2, NULL, 0, 0, 544 msg, (const char *) str1, (const char *) str2); 545 } 546 } 547 548 /** 549 * xmlValidityError: 550 * @ctxt: an XML parser context 551 * @error: the error number 552 * @msg: the error message 553 * @str1: extra data 554 * 555 * Handle a validity error. 556 */ 557 static void 558 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 559 const char *msg, const xmlChar *str1, const xmlChar *str2) 560 { 561 xmlStructuredErrorFunc schannel = NULL; 562 563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 564 (ctxt->instate == XML_PARSER_EOF)) 565 return; 566 if (ctxt != NULL) { 567 ctxt->errNo = error; 568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569 schannel = ctxt->sax->serror; 570 } 571 if (ctxt != NULL) { 572 __xmlRaiseError(schannel, 573 ctxt->vctxt.error, ctxt->vctxt.userData, 574 ctxt, NULL, XML_FROM_DTD, error, 575 XML_ERR_ERROR, NULL, 0, (const char *) str1, 576 (const char *) str2, NULL, 0, 0, 577 msg, (const char *) str1, (const char *) str2); 578 ctxt->valid = 0; 579 } else { 580 __xmlRaiseError(schannel, NULL, NULL, 581 ctxt, NULL, XML_FROM_DTD, error, 582 XML_ERR_ERROR, NULL, 0, (const char *) str1, 583 (const char *) str2, NULL, 0, 0, 584 msg, (const char *) str1, (const char *) str2); 585 } 586 } 587 588 /** 589 * xmlFatalErrMsgInt: 590 * @ctxt: an XML parser context 591 * @error: the error number 592 * @msg: the error message 593 * @val: an integer value 594 * 595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 596 */ 597 static void 598 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 599 const char *msg, int val) 600 { 601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 602 (ctxt->instate == XML_PARSER_EOF)) 603 return; 604 if (ctxt != NULL) 605 ctxt->errNo = error; 606 __xmlRaiseError(NULL, NULL, NULL, 607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 609 if (ctxt != NULL) { 610 ctxt->wellFormed = 0; 611 if (ctxt->recovery == 0) 612 ctxt->disableSAX = 1; 613 } 614 } 615 616 /** 617 * xmlFatalErrMsgStrIntStr: 618 * @ctxt: an XML parser context 619 * @error: the error number 620 * @msg: the error message 621 * @str1: an string info 622 * @val: an integer value 623 * @str2: an string info 624 * 625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 626 */ 627 static void 628 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 629 const char *msg, const xmlChar *str1, int val, 630 const xmlChar *str2) 631 { 632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 633 (ctxt->instate == XML_PARSER_EOF)) 634 return; 635 if (ctxt != NULL) 636 ctxt->errNo = error; 637 __xmlRaiseError(NULL, NULL, NULL, 638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 639 NULL, 0, (const char *) str1, (const char *) str2, 640 NULL, val, 0, msg, str1, val, str2); 641 if (ctxt != NULL) { 642 ctxt->wellFormed = 0; 643 if (ctxt->recovery == 0) 644 ctxt->disableSAX = 1; 645 } 646 } 647 648 /** 649 * xmlFatalErrMsgStr: 650 * @ctxt: an XML parser context 651 * @error: the error number 652 * @msg: the error message 653 * @val: a string value 654 * 655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 656 */ 657 static void 658 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 659 const char *msg, const xmlChar * val) 660 { 661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 662 (ctxt->instate == XML_PARSER_EOF)) 663 return; 664 if (ctxt != NULL) 665 ctxt->errNo = error; 666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 667 XML_FROM_PARSER, error, XML_ERR_FATAL, 668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 669 val); 670 if (ctxt != NULL) { 671 ctxt->wellFormed = 0; 672 if (ctxt->recovery == 0) 673 ctxt->disableSAX = 1; 674 } 675 } 676 677 /** 678 * xmlErrMsgStr: 679 * @ctxt: an XML parser context 680 * @error: the error number 681 * @msg: the error message 682 * @val: a string value 683 * 684 * Handle a non fatal parser error 685 */ 686 static void 687 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 688 const char *msg, const xmlChar * val) 689 { 690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 691 (ctxt->instate == XML_PARSER_EOF)) 692 return; 693 if (ctxt != NULL) 694 ctxt->errNo = error; 695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 696 XML_FROM_PARSER, error, XML_ERR_ERROR, 697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 698 val); 699 } 700 701 /** 702 * xmlNsErr: 703 * @ctxt: an XML parser context 704 * @error: the error number 705 * @msg: the message 706 * @info1: extra information string 707 * @info2: extra information string 708 * 709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 710 */ 711 static void 712 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 713 const char *msg, 714 const xmlChar * info1, const xmlChar * info2, 715 const xmlChar * info3) 716 { 717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 718 (ctxt->instate == XML_PARSER_EOF)) 719 return; 720 if (ctxt != NULL) 721 ctxt->errNo = error; 722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 723 XML_ERR_ERROR, NULL, 0, (const char *) info1, 724 (const char *) info2, (const char *) info3, 0, 0, msg, 725 info1, info2, info3); 726 if (ctxt != NULL) 727 ctxt->nsWellFormed = 0; 728 } 729 730 /** 731 * xmlNsWarn 732 * @ctxt: an XML parser context 733 * @error: the error number 734 * @msg: the message 735 * @info1: extra information string 736 * @info2: extra information string 737 * 738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 739 */ 740 static void 741 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 742 const char *msg, 743 const xmlChar * info1, const xmlChar * info2, 744 const xmlChar * info3) 745 { 746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 747 (ctxt->instate == XML_PARSER_EOF)) 748 return; 749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 750 XML_ERR_WARNING, NULL, 0, (const char *) info1, 751 (const char *) info2, (const char *) info3, 0, 0, msg, 752 info1, info2, info3); 753 } 754 755 /************************************************************************ 756 * * 757 * Library wide options * 758 * * 759 ************************************************************************/ 760 761 /** 762 * xmlHasFeature: 763 * @feature: the feature to be examined 764 * 765 * Examines if the library has been compiled with a given feature. 766 * 767 * Returns a non-zero value if the feature exist, otherwise zero. 768 * Returns zero (0) if the feature does not exist or an unknown 769 * unknown feature is requested, non-zero otherwise. 770 */ 771 int 772 xmlHasFeature(xmlFeature feature) 773 { 774 switch (feature) { 775 case XML_WITH_THREAD: 776 #ifdef LIBXML_THREAD_ENABLED 777 return(1); 778 #else 779 return(0); 780 #endif 781 case XML_WITH_TREE: 782 #ifdef LIBXML_TREE_ENABLED 783 return(1); 784 #else 785 return(0); 786 #endif 787 case XML_WITH_OUTPUT: 788 #ifdef LIBXML_OUTPUT_ENABLED 789 return(1); 790 #else 791 return(0); 792 #endif 793 case XML_WITH_PUSH: 794 #ifdef LIBXML_PUSH_ENABLED 795 return(1); 796 #else 797 return(0); 798 #endif 799 case XML_WITH_READER: 800 #ifdef LIBXML_READER_ENABLED 801 return(1); 802 #else 803 return(0); 804 #endif 805 case XML_WITH_PATTERN: 806 #ifdef LIBXML_PATTERN_ENABLED 807 return(1); 808 #else 809 return(0); 810 #endif 811 case XML_WITH_WRITER: 812 #ifdef LIBXML_WRITER_ENABLED 813 return(1); 814 #else 815 return(0); 816 #endif 817 case XML_WITH_SAX1: 818 #ifdef LIBXML_SAX1_ENABLED 819 return(1); 820 #else 821 return(0); 822 #endif 823 case XML_WITH_FTP: 824 #ifdef LIBXML_FTP_ENABLED 825 return(1); 826 #else 827 return(0); 828 #endif 829 case XML_WITH_HTTP: 830 #ifdef LIBXML_HTTP_ENABLED 831 return(1); 832 #else 833 return(0); 834 #endif 835 case XML_WITH_VALID: 836 #ifdef LIBXML_VALID_ENABLED 837 return(1); 838 #else 839 return(0); 840 #endif 841 case XML_WITH_HTML: 842 #ifdef LIBXML_HTML_ENABLED 843 return(1); 844 #else 845 return(0); 846 #endif 847 case XML_WITH_LEGACY: 848 #ifdef LIBXML_LEGACY_ENABLED 849 return(1); 850 #else 851 return(0); 852 #endif 853 case XML_WITH_C14N: 854 #ifdef LIBXML_C14N_ENABLED 855 return(1); 856 #else 857 return(0); 858 #endif 859 case XML_WITH_CATALOG: 860 #ifdef LIBXML_CATALOG_ENABLED 861 return(1); 862 #else 863 return(0); 864 #endif 865 case XML_WITH_XPATH: 866 #ifdef LIBXML_XPATH_ENABLED 867 return(1); 868 #else 869 return(0); 870 #endif 871 case XML_WITH_XPTR: 872 #ifdef LIBXML_XPTR_ENABLED 873 return(1); 874 #else 875 return(0); 876 #endif 877 case XML_WITH_XINCLUDE: 878 #ifdef LIBXML_XINCLUDE_ENABLED 879 return(1); 880 #else 881 return(0); 882 #endif 883 case XML_WITH_ICONV: 884 #ifdef LIBXML_ICONV_ENABLED 885 return(1); 886 #else 887 return(0); 888 #endif 889 case XML_WITH_ISO8859X: 890 #ifdef LIBXML_ISO8859X_ENABLED 891 return(1); 892 #else 893 return(0); 894 #endif 895 case XML_WITH_UNICODE: 896 #ifdef LIBXML_UNICODE_ENABLED 897 return(1); 898 #else 899 return(0); 900 #endif 901 case XML_WITH_REGEXP: 902 #ifdef LIBXML_REGEXP_ENABLED 903 return(1); 904 #else 905 return(0); 906 #endif 907 case XML_WITH_AUTOMATA: 908 #ifdef LIBXML_AUTOMATA_ENABLED 909 return(1); 910 #else 911 return(0); 912 #endif 913 case XML_WITH_EXPR: 914 #ifdef LIBXML_EXPR_ENABLED 915 return(1); 916 #else 917 return(0); 918 #endif 919 case XML_WITH_SCHEMAS: 920 #ifdef LIBXML_SCHEMAS_ENABLED 921 return(1); 922 #else 923 return(0); 924 #endif 925 case XML_WITH_SCHEMATRON: 926 #ifdef LIBXML_SCHEMATRON_ENABLED 927 return(1); 928 #else 929 return(0); 930 #endif 931 case XML_WITH_MODULES: 932 #ifdef LIBXML_MODULES_ENABLED 933 return(1); 934 #else 935 return(0); 936 #endif 937 case XML_WITH_DEBUG: 938 #ifdef LIBXML_DEBUG_ENABLED 939 return(1); 940 #else 941 return(0); 942 #endif 943 case XML_WITH_DEBUG_MEM: 944 #ifdef DEBUG_MEMORY_LOCATION 945 return(1); 946 #else 947 return(0); 948 #endif 949 case XML_WITH_DEBUG_RUN: 950 #ifdef LIBXML_DEBUG_RUNTIME 951 return(1); 952 #else 953 return(0); 954 #endif 955 case XML_WITH_ZLIB: 956 #ifdef LIBXML_ZLIB_ENABLED 957 return(1); 958 #else 959 return(0); 960 #endif 961 case XML_WITH_LZMA: 962 #ifdef LIBXML_LZMA_ENABLED 963 return(1); 964 #else 965 return(0); 966 #endif 967 case XML_WITH_ICU: 968 #ifdef LIBXML_ICU_ENABLED 969 return(1); 970 #else 971 return(0); 972 #endif 973 default: 974 break; 975 } 976 return(0); 977 } 978 979 /************************************************************************ 980 * * 981 * SAX2 defaulted attributes handling * 982 * * 983 ************************************************************************/ 984 985 /** 986 * xmlDetectSAX2: 987 * @ctxt: an XML parser context 988 * 989 * Do the SAX2 detection and specific intialization 990 */ 991 static void 992 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 993 if (ctxt == NULL) return; 994 #ifdef LIBXML_SAX1_ENABLED 995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 996 ((ctxt->sax->startElementNs != NULL) || 997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 998 #else 999 ctxt->sax2 = 1; 1000 #endif /* LIBXML_SAX1_ENABLED */ 1001 1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1006 (ctxt->str_xml_ns == NULL)) { 1007 xmlErrMemory(ctxt, NULL); 1008 } 1009 } 1010 1011 typedef struct _xmlDefAttrs xmlDefAttrs; 1012 typedef xmlDefAttrs *xmlDefAttrsPtr; 1013 struct _xmlDefAttrs { 1014 int nbAttrs; /* number of defaulted attributes on that element */ 1015 int maxAttrs; /* the size of the array */ 1016 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1017 }; 1018 1019 /** 1020 * xmlAttrNormalizeSpace: 1021 * @src: the source string 1022 * @dst: the target string 1023 * 1024 * Normalize the space in non CDATA attribute values: 1025 * If the attribute type is not CDATA, then the XML processor MUST further 1026 * process the normalized attribute value by discarding any leading and 1027 * trailing space (#x20) characters, and by replacing sequences of space 1028 * (#x20) characters by a single space (#x20) character. 1029 * Note that the size of dst need to be at least src, and if one doesn't need 1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1031 * passing src as dst is just fine. 1032 * 1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1034 * is needed. 1035 */ 1036 static xmlChar * 1037 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1038 { 1039 if ((src == NULL) || (dst == NULL)) 1040 return(NULL); 1041 1042 while (*src == 0x20) src++; 1043 while (*src != 0) { 1044 if (*src == 0x20) { 1045 while (*src == 0x20) src++; 1046 if (*src != 0) 1047 *dst++ = 0x20; 1048 } else { 1049 *dst++ = *src++; 1050 } 1051 } 1052 *dst = 0; 1053 if (dst == src) 1054 return(NULL); 1055 return(dst); 1056 } 1057 1058 /** 1059 * xmlAttrNormalizeSpace2: 1060 * @src: the source string 1061 * 1062 * Normalize the space in non CDATA attribute values, a slightly more complex 1063 * front end to avoid allocation problems when running on attribute values 1064 * coming from the input. 1065 * 1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1067 * is needed. 1068 */ 1069 static const xmlChar * 1070 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1071 { 1072 int i; 1073 int remove_head = 0; 1074 int need_realloc = 0; 1075 const xmlChar *cur; 1076 1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1078 return(NULL); 1079 i = *len; 1080 if (i <= 0) 1081 return(NULL); 1082 1083 cur = src; 1084 while (*cur == 0x20) { 1085 cur++; 1086 remove_head++; 1087 } 1088 while (*cur != 0) { 1089 if (*cur == 0x20) { 1090 cur++; 1091 if ((*cur == 0x20) || (*cur == 0)) { 1092 need_realloc = 1; 1093 break; 1094 } 1095 } else 1096 cur++; 1097 } 1098 if (need_realloc) { 1099 xmlChar *ret; 1100 1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1102 if (ret == NULL) { 1103 xmlErrMemory(ctxt, NULL); 1104 return(NULL); 1105 } 1106 xmlAttrNormalizeSpace(ret, ret); 1107 *len = (int) strlen((const char *)ret); 1108 return(ret); 1109 } else if (remove_head) { 1110 *len -= remove_head; 1111 memmove(src, src + remove_head, 1 + *len); 1112 return(src); 1113 } 1114 return(NULL); 1115 } 1116 1117 /** 1118 * xmlAddDefAttrs: 1119 * @ctxt: an XML parser context 1120 * @fullname: the element fullname 1121 * @fullattr: the attribute fullname 1122 * @value: the attribute value 1123 * 1124 * Add a defaulted attribute for an element 1125 */ 1126 static void 1127 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1128 const xmlChar *fullname, 1129 const xmlChar *fullattr, 1130 const xmlChar *value) { 1131 xmlDefAttrsPtr defaults; 1132 int len; 1133 const xmlChar *name; 1134 const xmlChar *prefix; 1135 1136 /* 1137 * Allows to detect attribute redefinitions 1138 */ 1139 if (ctxt->attsSpecial != NULL) { 1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1141 return; 1142 } 1143 1144 if (ctxt->attsDefault == NULL) { 1145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1146 if (ctxt->attsDefault == NULL) 1147 goto mem_error; 1148 } 1149 1150 /* 1151 * split the element name into prefix:localname , the string found 1152 * are within the DTD and then not associated to namespace names. 1153 */ 1154 name = xmlSplitQName3(fullname, &len); 1155 if (name == NULL) { 1156 name = xmlDictLookup(ctxt->dict, fullname, -1); 1157 prefix = NULL; 1158 } else { 1159 name = xmlDictLookup(ctxt->dict, name, -1); 1160 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1161 } 1162 1163 /* 1164 * make sure there is some storage 1165 */ 1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1167 if (defaults == NULL) { 1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1169 (4 * 5) * sizeof(const xmlChar *)); 1170 if (defaults == NULL) 1171 goto mem_error; 1172 defaults->nbAttrs = 0; 1173 defaults->maxAttrs = 4; 1174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1175 defaults, NULL) < 0) { 1176 xmlFree(defaults); 1177 goto mem_error; 1178 } 1179 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1180 xmlDefAttrsPtr temp; 1181 1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1184 if (temp == NULL) 1185 goto mem_error; 1186 defaults = temp; 1187 defaults->maxAttrs *= 2; 1188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1189 defaults, NULL) < 0) { 1190 xmlFree(defaults); 1191 goto mem_error; 1192 } 1193 } 1194 1195 /* 1196 * Split the element name into prefix:localname , the string found 1197 * are within the DTD and hen not associated to namespace names. 1198 */ 1199 name = xmlSplitQName3(fullattr, &len); 1200 if (name == NULL) { 1201 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1202 prefix = NULL; 1203 } else { 1204 name = xmlDictLookup(ctxt->dict, name, -1); 1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1206 } 1207 1208 defaults->values[5 * defaults->nbAttrs] = name; 1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1210 /* intern the string and precompute the end */ 1211 len = xmlStrlen(value); 1212 value = xmlDictLookup(ctxt->dict, value, len); 1213 defaults->values[5 * defaults->nbAttrs + 2] = value; 1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1215 if (ctxt->external) 1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1217 else 1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1219 defaults->nbAttrs++; 1220 1221 return; 1222 1223 mem_error: 1224 xmlErrMemory(ctxt, NULL); 1225 return; 1226 } 1227 1228 /** 1229 * xmlAddSpecialAttr: 1230 * @ctxt: an XML parser context 1231 * @fullname: the element fullname 1232 * @fullattr: the attribute fullname 1233 * @type: the attribute type 1234 * 1235 * Register this attribute type 1236 */ 1237 static void 1238 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1239 const xmlChar *fullname, 1240 const xmlChar *fullattr, 1241 int type) 1242 { 1243 if (ctxt->attsSpecial == NULL) { 1244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1245 if (ctxt->attsSpecial == NULL) 1246 goto mem_error; 1247 } 1248 1249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1250 return; 1251 1252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1253 (void *) (long) type); 1254 return; 1255 1256 mem_error: 1257 xmlErrMemory(ctxt, NULL); 1258 return; 1259 } 1260 1261 /** 1262 * xmlCleanSpecialAttrCallback: 1263 * 1264 * Removes CDATA attributes from the special attribute table 1265 */ 1266 static void 1267 xmlCleanSpecialAttrCallback(void *payload, void *data, 1268 const xmlChar *fullname, const xmlChar *fullattr, 1269 const xmlChar *unused ATTRIBUTE_UNUSED) { 1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1271 1272 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1274 } 1275 } 1276 1277 /** 1278 * xmlCleanSpecialAttr: 1279 * @ctxt: an XML parser context 1280 * 1281 * Trim the list of attributes defined to remove all those of type 1282 * CDATA as they are not special. This call should be done when finishing 1283 * to parse the DTD and before starting to parse the document root. 1284 */ 1285 static void 1286 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1287 { 1288 if (ctxt->attsSpecial == NULL) 1289 return; 1290 1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1292 1293 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1294 xmlHashFree(ctxt->attsSpecial, NULL); 1295 ctxt->attsSpecial = NULL; 1296 } 1297 return; 1298 } 1299 1300 /** 1301 * xmlCheckLanguageID: 1302 * @lang: pointer to the string value 1303 * 1304 * Checks that the value conforms to the LanguageID production: 1305 * 1306 * NOTE: this is somewhat deprecated, those productions were removed from 1307 * the XML Second edition. 1308 * 1309 * [33] LanguageID ::= Langcode ('-' Subcode)* 1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1314 * [38] Subcode ::= ([a-z] | [A-Z])+ 1315 * 1316 * The current REC reference the sucessors of RFC 1766, currently 5646 1317 * 1318 * http://www.rfc-editor.org/rfc/rfc5646.txt 1319 * langtag = language 1320 * ["-" script] 1321 * ["-" region] 1322 * *("-" variant) 1323 * *("-" extension) 1324 * ["-" privateuse] 1325 * language = 2*3ALPHA ; shortest ISO 639 code 1326 * ["-" extlang] ; sometimes followed by 1327 * ; extended language subtags 1328 * / 4ALPHA ; or reserved for future use 1329 * / 5*8ALPHA ; or registered language subtag 1330 * 1331 * extlang = 3ALPHA ; selected ISO 639 codes 1332 * *2("-" 3ALPHA) ; permanently reserved 1333 * 1334 * script = 4ALPHA ; ISO 15924 code 1335 * 1336 * region = 2ALPHA ; ISO 3166-1 code 1337 * / 3DIGIT ; UN M.49 code 1338 * 1339 * variant = 5*8alphanum ; registered variants 1340 * / (DIGIT 3alphanum) 1341 * 1342 * extension = singleton 1*("-" (2*8alphanum)) 1343 * 1344 * ; Single alphanumerics 1345 * ; "x" reserved for private use 1346 * singleton = DIGIT ; 0 - 9 1347 * / %x41-57 ; A - W 1348 * / %x59-5A ; Y - Z 1349 * / %x61-77 ; a - w 1350 * / %x79-7A ; y - z 1351 * 1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1353 * The parser below doesn't try to cope with extension or privateuse 1354 * that could be added but that's not interoperable anyway 1355 * 1356 * Returns 1 if correct 0 otherwise 1357 **/ 1358 int 1359 xmlCheckLanguageID(const xmlChar * lang) 1360 { 1361 const xmlChar *cur = lang, *nxt; 1362 1363 if (cur == NULL) 1364 return (0); 1365 if (((cur[0] == 'i') && (cur[1] == '-')) || 1366 ((cur[0] == 'I') && (cur[1] == '-')) || 1367 ((cur[0] == 'x') && (cur[1] == '-')) || 1368 ((cur[0] == 'X') && (cur[1] == '-'))) { 1369 /* 1370 * Still allow IANA code and user code which were coming 1371 * from the previous version of the XML-1.0 specification 1372 * it's deprecated but we should not fail 1373 */ 1374 cur += 2; 1375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1376 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1377 cur++; 1378 return(cur[0] == 0); 1379 } 1380 nxt = cur; 1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1383 nxt++; 1384 if (nxt - cur >= 4) { 1385 /* 1386 * Reserved 1387 */ 1388 if ((nxt - cur > 8) || (nxt[0] != 0)) 1389 return(0); 1390 return(1); 1391 } 1392 if (nxt - cur < 2) 1393 return(0); 1394 /* we got an ISO 639 code */ 1395 if (nxt[0] == 0) 1396 return(1); 1397 if (nxt[0] != '-') 1398 return(0); 1399 1400 nxt++; 1401 cur = nxt; 1402 /* now we can have extlang or script or region or variant */ 1403 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1404 goto region_m49; 1405 1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1408 nxt++; 1409 if (nxt - cur == 4) 1410 goto script; 1411 if (nxt - cur == 2) 1412 goto region; 1413 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1414 goto variant; 1415 if (nxt - cur != 3) 1416 return(0); 1417 /* we parsed an extlang */ 1418 if (nxt[0] == 0) 1419 return(1); 1420 if (nxt[0] != '-') 1421 return(0); 1422 1423 nxt++; 1424 cur = nxt; 1425 /* now we can have script or region or variant */ 1426 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1427 goto region_m49; 1428 1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1431 nxt++; 1432 if (nxt - cur == 2) 1433 goto region; 1434 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1435 goto variant; 1436 if (nxt - cur != 4) 1437 return(0); 1438 /* we parsed a script */ 1439 script: 1440 if (nxt[0] == 0) 1441 return(1); 1442 if (nxt[0] != '-') 1443 return(0); 1444 1445 nxt++; 1446 cur = nxt; 1447 /* now we can have region or variant */ 1448 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1449 goto region_m49; 1450 1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1453 nxt++; 1454 1455 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1456 goto variant; 1457 if (nxt - cur != 2) 1458 return(0); 1459 /* we parsed a region */ 1460 region: 1461 if (nxt[0] == 0) 1462 return(1); 1463 if (nxt[0] != '-') 1464 return(0); 1465 1466 nxt++; 1467 cur = nxt; 1468 /* now we can just have a variant */ 1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1471 nxt++; 1472 1473 if ((nxt - cur < 5) || (nxt - cur > 8)) 1474 return(0); 1475 1476 /* we parsed a variant */ 1477 variant: 1478 if (nxt[0] == 0) 1479 return(1); 1480 if (nxt[0] != '-') 1481 return(0); 1482 /* extensions and private use subtags not checked */ 1483 return (1); 1484 1485 region_m49: 1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1488 nxt += 3; 1489 goto region; 1490 } 1491 return(0); 1492 } 1493 1494 /************************************************************************ 1495 * * 1496 * Parser stacks related functions and macros * 1497 * * 1498 ************************************************************************/ 1499 1500 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1501 const xmlChar ** str); 1502 1503 #ifdef SAX2 1504 /** 1505 * nsPush: 1506 * @ctxt: an XML parser context 1507 * @prefix: the namespace prefix or NULL 1508 * @URL: the namespace name 1509 * 1510 * Pushes a new parser namespace on top of the ns stack 1511 * 1512 * Returns -1 in case of error, -2 if the namespace should be discarded 1513 * and the index in the stack otherwise. 1514 */ 1515 static int 1516 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1517 { 1518 if (ctxt->options & XML_PARSE_NSCLEAN) { 1519 int i; 1520 for (i = 0;i < ctxt->nsNr;i += 2) { 1521 if (ctxt->nsTab[i] == prefix) { 1522 /* in scope */ 1523 if (ctxt->nsTab[i + 1] == URL) 1524 return(-2); 1525 /* out of scope keep it */ 1526 break; 1527 } 1528 } 1529 } 1530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1531 ctxt->nsMax = 10; 1532 ctxt->nsNr = 0; 1533 ctxt->nsTab = (const xmlChar **) 1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1535 if (ctxt->nsTab == NULL) { 1536 xmlErrMemory(ctxt, NULL); 1537 ctxt->nsMax = 0; 1538 return (-1); 1539 } 1540 } else if (ctxt->nsNr >= ctxt->nsMax) { 1541 const xmlChar ** tmp; 1542 ctxt->nsMax *= 2; 1543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1544 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1545 if (tmp == NULL) { 1546 xmlErrMemory(ctxt, NULL); 1547 ctxt->nsMax /= 2; 1548 return (-1); 1549 } 1550 ctxt->nsTab = tmp; 1551 } 1552 ctxt->nsTab[ctxt->nsNr++] = prefix; 1553 ctxt->nsTab[ctxt->nsNr++] = URL; 1554 return (ctxt->nsNr); 1555 } 1556 /** 1557 * nsPop: 1558 * @ctxt: an XML parser context 1559 * @nr: the number to pop 1560 * 1561 * Pops the top @nr parser prefix/namespace from the ns stack 1562 * 1563 * Returns the number of namespaces removed 1564 */ 1565 static int 1566 nsPop(xmlParserCtxtPtr ctxt, int nr) 1567 { 1568 int i; 1569 1570 if (ctxt->nsTab == NULL) return(0); 1571 if (ctxt->nsNr < nr) { 1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1573 nr = ctxt->nsNr; 1574 } 1575 if (ctxt->nsNr <= 0) 1576 return (0); 1577 1578 for (i = 0;i < nr;i++) { 1579 ctxt->nsNr--; 1580 ctxt->nsTab[ctxt->nsNr] = NULL; 1581 } 1582 return(nr); 1583 } 1584 #endif 1585 1586 static int 1587 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1588 const xmlChar **atts; 1589 int *attallocs; 1590 int maxatts; 1591 1592 if (ctxt->atts == NULL) { 1593 maxatts = 55; /* allow for 10 attrs by default */ 1594 atts = (const xmlChar **) 1595 xmlMalloc(maxatts * sizeof(xmlChar *)); 1596 if (atts == NULL) goto mem_error; 1597 ctxt->atts = atts; 1598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1599 if (attallocs == NULL) goto mem_error; 1600 ctxt->attallocs = attallocs; 1601 ctxt->maxatts = maxatts; 1602 } else if (nr + 5 > ctxt->maxatts) { 1603 maxatts = (nr + 5) * 2; 1604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1605 maxatts * sizeof(const xmlChar *)); 1606 if (atts == NULL) goto mem_error; 1607 ctxt->atts = atts; 1608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1609 (maxatts / 5) * sizeof(int)); 1610 if (attallocs == NULL) goto mem_error; 1611 ctxt->attallocs = attallocs; 1612 ctxt->maxatts = maxatts; 1613 } 1614 return(ctxt->maxatts); 1615 mem_error: 1616 xmlErrMemory(ctxt, NULL); 1617 return(-1); 1618 } 1619 1620 /** 1621 * inputPush: 1622 * @ctxt: an XML parser context 1623 * @value: the parser input 1624 * 1625 * Pushes a new parser input on top of the input stack 1626 * 1627 * Returns -1 in case of error, the index in the stack otherwise 1628 */ 1629 int 1630 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1631 { 1632 if ((ctxt == NULL) || (value == NULL)) 1633 return(-1); 1634 if (ctxt->inputNr >= ctxt->inputMax) { 1635 ctxt->inputMax *= 2; 1636 ctxt->inputTab = 1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1638 ctxt->inputMax * 1639 sizeof(ctxt->inputTab[0])); 1640 if (ctxt->inputTab == NULL) { 1641 xmlErrMemory(ctxt, NULL); 1642 xmlFreeInputStream(value); 1643 ctxt->inputMax /= 2; 1644 value = NULL; 1645 return (-1); 1646 } 1647 } 1648 ctxt->inputTab[ctxt->inputNr] = value; 1649 ctxt->input = value; 1650 return (ctxt->inputNr++); 1651 } 1652 /** 1653 * inputPop: 1654 * @ctxt: an XML parser context 1655 * 1656 * Pops the top parser input from the input stack 1657 * 1658 * Returns the input just removed 1659 */ 1660 xmlParserInputPtr 1661 inputPop(xmlParserCtxtPtr ctxt) 1662 { 1663 xmlParserInputPtr ret; 1664 1665 if (ctxt == NULL) 1666 return(NULL); 1667 if (ctxt->inputNr <= 0) 1668 return (NULL); 1669 ctxt->inputNr--; 1670 if (ctxt->inputNr > 0) 1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1672 else 1673 ctxt->input = NULL; 1674 ret = ctxt->inputTab[ctxt->inputNr]; 1675 ctxt->inputTab[ctxt->inputNr] = NULL; 1676 return (ret); 1677 } 1678 /** 1679 * nodePush: 1680 * @ctxt: an XML parser context 1681 * @value: the element node 1682 * 1683 * Pushes a new element node on top of the node stack 1684 * 1685 * Returns -1 in case of error, the index in the stack otherwise 1686 */ 1687 int 1688 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1689 { 1690 if (ctxt == NULL) return(0); 1691 if (ctxt->nodeNr >= ctxt->nodeMax) { 1692 xmlNodePtr *tmp; 1693 1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1695 ctxt->nodeMax * 2 * 1696 sizeof(ctxt->nodeTab[0])); 1697 if (tmp == NULL) { 1698 xmlErrMemory(ctxt, NULL); 1699 return (-1); 1700 } 1701 ctxt->nodeTab = tmp; 1702 ctxt->nodeMax *= 2; 1703 } 1704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1708 xmlParserMaxDepth); 1709 ctxt->instate = XML_PARSER_EOF; 1710 return(-1); 1711 } 1712 ctxt->nodeTab[ctxt->nodeNr] = value; 1713 ctxt->node = value; 1714 return (ctxt->nodeNr++); 1715 } 1716 1717 /** 1718 * nodePop: 1719 * @ctxt: an XML parser context 1720 * 1721 * Pops the top element node from the node stack 1722 * 1723 * Returns the node just removed 1724 */ 1725 xmlNodePtr 1726 nodePop(xmlParserCtxtPtr ctxt) 1727 { 1728 xmlNodePtr ret; 1729 1730 if (ctxt == NULL) return(NULL); 1731 if (ctxt->nodeNr <= 0) 1732 return (NULL); 1733 ctxt->nodeNr--; 1734 if (ctxt->nodeNr > 0) 1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1736 else 1737 ctxt->node = NULL; 1738 ret = ctxt->nodeTab[ctxt->nodeNr]; 1739 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1740 return (ret); 1741 } 1742 1743 #ifdef LIBXML_PUSH_ENABLED 1744 /** 1745 * nameNsPush: 1746 * @ctxt: an XML parser context 1747 * @value: the element name 1748 * @prefix: the element prefix 1749 * @URI: the element namespace name 1750 * 1751 * Pushes a new element name/prefix/URL on top of the name stack 1752 * 1753 * Returns -1 in case of error, the index in the stack otherwise 1754 */ 1755 static int 1756 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1757 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1758 { 1759 if (ctxt->nameNr >= ctxt->nameMax) { 1760 const xmlChar * *tmp; 1761 void **tmp2; 1762 ctxt->nameMax *= 2; 1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1764 ctxt->nameMax * 1765 sizeof(ctxt->nameTab[0])); 1766 if (tmp == NULL) { 1767 ctxt->nameMax /= 2; 1768 goto mem_error; 1769 } 1770 ctxt->nameTab = tmp; 1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1772 ctxt->nameMax * 3 * 1773 sizeof(ctxt->pushTab[0])); 1774 if (tmp2 == NULL) { 1775 ctxt->nameMax /= 2; 1776 goto mem_error; 1777 } 1778 ctxt->pushTab = tmp2; 1779 } 1780 ctxt->nameTab[ctxt->nameNr] = value; 1781 ctxt->name = value; 1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1785 return (ctxt->nameNr++); 1786 mem_error: 1787 xmlErrMemory(ctxt, NULL); 1788 return (-1); 1789 } 1790 /** 1791 * nameNsPop: 1792 * @ctxt: an XML parser context 1793 * 1794 * Pops the top element/prefix/URI name from the name stack 1795 * 1796 * Returns the name just removed 1797 */ 1798 static const xmlChar * 1799 nameNsPop(xmlParserCtxtPtr ctxt) 1800 { 1801 const xmlChar *ret; 1802 1803 if (ctxt->nameNr <= 0) 1804 return (NULL); 1805 ctxt->nameNr--; 1806 if (ctxt->nameNr > 0) 1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1808 else 1809 ctxt->name = NULL; 1810 ret = ctxt->nameTab[ctxt->nameNr]; 1811 ctxt->nameTab[ctxt->nameNr] = NULL; 1812 return (ret); 1813 } 1814 #endif /* LIBXML_PUSH_ENABLED */ 1815 1816 /** 1817 * namePush: 1818 * @ctxt: an XML parser context 1819 * @value: the element name 1820 * 1821 * Pushes a new element name on top of the name stack 1822 * 1823 * Returns -1 in case of error, the index in the stack otherwise 1824 */ 1825 int 1826 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1827 { 1828 if (ctxt == NULL) return (-1); 1829 1830 if (ctxt->nameNr >= ctxt->nameMax) { 1831 const xmlChar * *tmp; 1832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1833 ctxt->nameMax * 2 * 1834 sizeof(ctxt->nameTab[0])); 1835 if (tmp == NULL) { 1836 goto mem_error; 1837 } 1838 ctxt->nameTab = tmp; 1839 ctxt->nameMax *= 2; 1840 } 1841 ctxt->nameTab[ctxt->nameNr] = value; 1842 ctxt->name = value; 1843 return (ctxt->nameNr++); 1844 mem_error: 1845 xmlErrMemory(ctxt, NULL); 1846 return (-1); 1847 } 1848 /** 1849 * namePop: 1850 * @ctxt: an XML parser context 1851 * 1852 * Pops the top element name from the name stack 1853 * 1854 * Returns the name just removed 1855 */ 1856 const xmlChar * 1857 namePop(xmlParserCtxtPtr ctxt) 1858 { 1859 const xmlChar *ret; 1860 1861 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1862 return (NULL); 1863 ctxt->nameNr--; 1864 if (ctxt->nameNr > 0) 1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1866 else 1867 ctxt->name = NULL; 1868 ret = ctxt->nameTab[ctxt->nameNr]; 1869 ctxt->nameTab[ctxt->nameNr] = NULL; 1870 return (ret); 1871 } 1872 1873 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1874 if (ctxt->spaceNr >= ctxt->spaceMax) { 1875 int *tmp; 1876 1877 ctxt->spaceMax *= 2; 1878 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1880 if (tmp == NULL) { 1881 xmlErrMemory(ctxt, NULL); 1882 ctxt->spaceMax /=2; 1883 return(-1); 1884 } 1885 ctxt->spaceTab = tmp; 1886 } 1887 ctxt->spaceTab[ctxt->spaceNr] = val; 1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1889 return(ctxt->spaceNr++); 1890 } 1891 1892 static int spacePop(xmlParserCtxtPtr ctxt) { 1893 int ret; 1894 if (ctxt->spaceNr <= 0) return(0); 1895 ctxt->spaceNr--; 1896 if (ctxt->spaceNr > 0) 1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1898 else 1899 ctxt->space = &ctxt->spaceTab[0]; 1900 ret = ctxt->spaceTab[ctxt->spaceNr]; 1901 ctxt->spaceTab[ctxt->spaceNr] = -1; 1902 return(ret); 1903 } 1904 1905 /* 1906 * Macros for accessing the content. Those should be used only by the parser, 1907 * and not exported. 1908 * 1909 * Dirty macros, i.e. one often need to make assumption on the context to 1910 * use them 1911 * 1912 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1913 * To be used with extreme caution since operations consuming 1914 * characters may move the input buffer to a different location ! 1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1916 * This should be used internally by the parser 1917 * only to compare to ASCII values otherwise it would break when 1918 * running with UTF-8 encoding. 1919 * RAW same as CUR but in the input buffer, bypass any token 1920 * extraction that may have been done 1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1922 * to compare on ASCII based substring. 1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1924 * strings without newlines within the parser. 1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1926 * defined char within the parser. 1927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1928 * 1929 * NEXT Skip to the next character, this does the proper decoding 1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1931 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1932 * CUR_CHAR(l) returns the current unicode character (int), set l 1933 * to the number of xmlChars used for the encoding [0-5]. 1934 * CUR_SCHAR same but operate on a string instead of the context 1935 * COPY_BUF copy the current unicode char to the target buffer, increment 1936 * the index 1937 * GROW, SHRINK handling of input buffers 1938 */ 1939 1940 #define RAW (*ctxt->input->cur) 1941 #define CUR (*ctxt->input->cur) 1942 #define NXT(val) ctxt->input->cur[(val)] 1943 #define CUR_PTR ctxt->input->cur 1944 1945 #define CMP4( s, c1, c2, c3, c4 ) \ 1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1948 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1950 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1952 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1954 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1956 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1958 ((unsigned char *) s)[ 8 ] == c9 ) 1959 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1961 ((unsigned char *) s)[ 9 ] == c10 ) 1962 1963 #define SKIP(val) do { \ 1964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1966 if ((*ctxt->input->cur == 0) && \ 1967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1968 xmlPopInput(ctxt); \ 1969 } while (0) 1970 1971 #define SKIPL(val) do { \ 1972 int skipl; \ 1973 for(skipl=0; skipl<val; skipl++) { \ 1974 if (*(ctxt->input->cur) == '\n') { \ 1975 ctxt->input->line++; ctxt->input->col = 1; \ 1976 } else ctxt->input->col++; \ 1977 ctxt->nbChars++; \ 1978 ctxt->input->cur++; \ 1979 } \ 1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1981 if ((*ctxt->input->cur == 0) && \ 1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1983 xmlPopInput(ctxt); \ 1984 } while (0) 1985 1986 #define SHRINK if ((ctxt->progressive == 0) && \ 1987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1989 xmlSHRINK (ctxt); 1990 1991 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1992 xmlParserInputShrink(ctxt->input); 1993 if ((*ctxt->input->cur == 0) && 1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1995 xmlPopInput(ctxt); 1996 } 1997 1998 #define GROW if ((ctxt->progressive == 0) && \ 1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2000 xmlGROW (ctxt); 2001 2002 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2006 xmlPopInput(ctxt); 2007 } 2008 2009 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2010 2011 #define NEXT xmlNextChar(ctxt) 2012 2013 #define NEXT1 { \ 2014 ctxt->input->col++; \ 2015 ctxt->input->cur++; \ 2016 ctxt->nbChars++; \ 2017 if (*ctxt->input->cur == 0) \ 2018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2019 } 2020 2021 #define NEXTL(l) do { \ 2022 if (*(ctxt->input->cur) == '\n') { \ 2023 ctxt->input->line++; ctxt->input->col = 1; \ 2024 } else ctxt->input->col++; \ 2025 ctxt->input->cur += l; \ 2026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2027 } while (0) 2028 2029 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2030 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2031 2032 #define COPY_BUF(l,b,i,v) \ 2033 if (l == 1) b[i++] = (xmlChar) v; \ 2034 else i += xmlCopyCharMultiByte(&b[i],v) 2035 2036 /** 2037 * xmlSkipBlankChars: 2038 * @ctxt: the XML parser context 2039 * 2040 * skip all blanks character found at that point in the input streams. 2041 * It pops up finished entities in the process if allowable at that point. 2042 * 2043 * Returns the number of space chars skipped 2044 */ 2045 2046 int 2047 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2048 int res = 0; 2049 2050 /* 2051 * It's Okay to use CUR/NEXT here since all the blanks are on 2052 * the ASCII range. 2053 */ 2054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2055 const xmlChar *cur; 2056 /* 2057 * if we are in the document content, go really fast 2058 */ 2059 cur = ctxt->input->cur; 2060 while (IS_BLANK_CH(*cur)) { 2061 if (*cur == '\n') { 2062 ctxt->input->line++; ctxt->input->col = 1; 2063 } 2064 cur++; 2065 res++; 2066 if (*cur == 0) { 2067 ctxt->input->cur = cur; 2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2069 cur = ctxt->input->cur; 2070 } 2071 } 2072 ctxt->input->cur = cur; 2073 } else { 2074 int cur; 2075 do { 2076 cur = CUR; 2077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2078 NEXT; 2079 cur = CUR; 2080 res++; 2081 } 2082 while ((cur == 0) && (ctxt->inputNr > 1) && 2083 (ctxt->instate != XML_PARSER_COMMENT)) { 2084 xmlPopInput(ctxt); 2085 cur = CUR; 2086 } 2087 /* 2088 * Need to handle support of entities branching here 2089 */ 2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2092 } 2093 return(res); 2094 } 2095 2096 /************************************************************************ 2097 * * 2098 * Commodity functions to handle entities * 2099 * * 2100 ************************************************************************/ 2101 2102 /** 2103 * xmlPopInput: 2104 * @ctxt: an XML parser context 2105 * 2106 * xmlPopInput: the current input pointed by ctxt->input came to an end 2107 * pop it and return the next char. 2108 * 2109 * Returns the current xmlChar in the parser context 2110 */ 2111 xmlChar 2112 xmlPopInput(xmlParserCtxtPtr ctxt) { 2113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2114 if (xmlParserDebugEntities) 2115 xmlGenericError(xmlGenericErrorContext, 2116 "Popping input %d\n", ctxt->inputNr); 2117 xmlFreeInputStream(inputPop(ctxt)); 2118 if ((*ctxt->input->cur == 0) && 2119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2120 return(xmlPopInput(ctxt)); 2121 return(CUR); 2122 } 2123 2124 /** 2125 * xmlPushInput: 2126 * @ctxt: an XML parser context 2127 * @input: an XML parser input fragment (entity, XML fragment ...). 2128 * 2129 * xmlPushInput: switch to a new input stream which is stacked on top 2130 * of the previous one(s). 2131 * Returns -1 in case of error or the index in the input stack 2132 */ 2133 int 2134 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2135 int ret; 2136 if (input == NULL) return(-1); 2137 2138 if (xmlParserDebugEntities) { 2139 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2140 xmlGenericError(xmlGenericErrorContext, 2141 "%s(%d): ", ctxt->input->filename, 2142 ctxt->input->line); 2143 xmlGenericError(xmlGenericErrorContext, 2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2145 } 2146 ret = inputPush(ctxt, input); 2147 GROW; 2148 return(ret); 2149 } 2150 2151 /** 2152 * xmlParseCharRef: 2153 * @ctxt: an XML parser context 2154 * 2155 * parse Reference declarations 2156 * 2157 * [66] CharRef ::= '&#' [0-9]+ ';' | 2158 * '&#x' [0-9a-fA-F]+ ';' 2159 * 2160 * [ WFC: Legal Character ] 2161 * Characters referred to using character references must match the 2162 * production for Char. 2163 * 2164 * Returns the value parsed (as an int), 0 in case of error 2165 */ 2166 int 2167 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2168 unsigned int val = 0; 2169 int count = 0; 2170 unsigned int outofrange = 0; 2171 2172 /* 2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2174 */ 2175 if ((RAW == '&') && (NXT(1) == '#') && 2176 (NXT(2) == 'x')) { 2177 SKIP(3); 2178 GROW; 2179 while (RAW != ';') { /* loop blocked by count */ 2180 if (count++ > 20) { 2181 count = 0; 2182 GROW; 2183 } 2184 if ((RAW >= '0') && (RAW <= '9')) 2185 val = val * 16 + (CUR - '0'); 2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2187 val = val * 16 + (CUR - 'a') + 10; 2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2189 val = val * 16 + (CUR - 'A') + 10; 2190 else { 2191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2192 val = 0; 2193 break; 2194 } 2195 if (val > 0x10FFFF) 2196 outofrange = val; 2197 2198 NEXT; 2199 count++; 2200 } 2201 if (RAW == ';') { 2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2203 ctxt->input->col++; 2204 ctxt->nbChars ++; 2205 ctxt->input->cur++; 2206 } 2207 } else if ((RAW == '&') && (NXT(1) == '#')) { 2208 SKIP(2); 2209 GROW; 2210 while (RAW != ';') { /* loop blocked by count */ 2211 if (count++ > 20) { 2212 count = 0; 2213 GROW; 2214 } 2215 if ((RAW >= '0') && (RAW <= '9')) 2216 val = val * 10 + (CUR - '0'); 2217 else { 2218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2219 val = 0; 2220 break; 2221 } 2222 if (val > 0x10FFFF) 2223 outofrange = val; 2224 2225 NEXT; 2226 count++; 2227 } 2228 if (RAW == ';') { 2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2230 ctxt->input->col++; 2231 ctxt->nbChars ++; 2232 ctxt->input->cur++; 2233 } 2234 } else { 2235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2236 } 2237 2238 /* 2239 * [ WFC: Legal Character ] 2240 * Characters referred to using character references must match the 2241 * production for Char. 2242 */ 2243 if ((IS_CHAR(val) && (outofrange == 0))) { 2244 return(val); 2245 } else { 2246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2247 "xmlParseCharRef: invalid xmlChar value %d\n", 2248 val); 2249 } 2250 return(0); 2251 } 2252 2253 /** 2254 * xmlParseStringCharRef: 2255 * @ctxt: an XML parser context 2256 * @str: a pointer to an index in the string 2257 * 2258 * parse Reference declarations, variant parsing from a string rather 2259 * than an an input flow. 2260 * 2261 * [66] CharRef ::= '&#' [0-9]+ ';' | 2262 * '&#x' [0-9a-fA-F]+ ';' 2263 * 2264 * [ WFC: Legal Character ] 2265 * Characters referred to using character references must match the 2266 * production for Char. 2267 * 2268 * Returns the value parsed (as an int), 0 in case of error, str will be 2269 * updated to the current value of the index 2270 */ 2271 static int 2272 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2273 const xmlChar *ptr; 2274 xmlChar cur; 2275 unsigned int val = 0; 2276 unsigned int outofrange = 0; 2277 2278 if ((str == NULL) || (*str == NULL)) return(0); 2279 ptr = *str; 2280 cur = *ptr; 2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2282 ptr += 3; 2283 cur = *ptr; 2284 while (cur != ';') { /* Non input consuming loop */ 2285 if ((cur >= '0') && (cur <= '9')) 2286 val = val * 16 + (cur - '0'); 2287 else if ((cur >= 'a') && (cur <= 'f')) 2288 val = val * 16 + (cur - 'a') + 10; 2289 else if ((cur >= 'A') && (cur <= 'F')) 2290 val = val * 16 + (cur - 'A') + 10; 2291 else { 2292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2293 val = 0; 2294 break; 2295 } 2296 if (val > 0x10FFFF) 2297 outofrange = val; 2298 2299 ptr++; 2300 cur = *ptr; 2301 } 2302 if (cur == ';') 2303 ptr++; 2304 } else if ((cur == '&') && (ptr[1] == '#')){ 2305 ptr += 2; 2306 cur = *ptr; 2307 while (cur != ';') { /* Non input consuming loops */ 2308 if ((cur >= '0') && (cur <= '9')) 2309 val = val * 10 + (cur - '0'); 2310 else { 2311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2312 val = 0; 2313 break; 2314 } 2315 if (val > 0x10FFFF) 2316 outofrange = val; 2317 2318 ptr++; 2319 cur = *ptr; 2320 } 2321 if (cur == ';') 2322 ptr++; 2323 } else { 2324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2325 return(0); 2326 } 2327 *str = ptr; 2328 2329 /* 2330 * [ WFC: Legal Character ] 2331 * Characters referred to using character references must match the 2332 * production for Char. 2333 */ 2334 if ((IS_CHAR(val) && (outofrange == 0))) { 2335 return(val); 2336 } else { 2337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2338 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2339 val); 2340 } 2341 return(0); 2342 } 2343 2344 /** 2345 * xmlNewBlanksWrapperInputStream: 2346 * @ctxt: an XML parser context 2347 * @entity: an Entity pointer 2348 * 2349 * Create a new input stream for wrapping 2350 * blanks around a PEReference 2351 * 2352 * Returns the new input stream or NULL 2353 */ 2354 2355 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2356 2357 static xmlParserInputPtr 2358 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2359 xmlParserInputPtr input; 2360 xmlChar *buffer; 2361 size_t length; 2362 if (entity == NULL) { 2363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2364 "xmlNewBlanksWrapperInputStream entity\n"); 2365 return(NULL); 2366 } 2367 if (xmlParserDebugEntities) 2368 xmlGenericError(xmlGenericErrorContext, 2369 "new blanks wrapper for entity: %s\n", entity->name); 2370 input = xmlNewInputStream(ctxt); 2371 if (input == NULL) { 2372 return(NULL); 2373 } 2374 length = xmlStrlen(entity->name) + 5; 2375 buffer = xmlMallocAtomic(length); 2376 if (buffer == NULL) { 2377 xmlErrMemory(ctxt, NULL); 2378 xmlFree(input); 2379 return(NULL); 2380 } 2381 buffer [0] = ' '; 2382 buffer [1] = '%'; 2383 buffer [length-3] = ';'; 2384 buffer [length-2] = ' '; 2385 buffer [length-1] = 0; 2386 memcpy(buffer + 2, entity->name, length - 5); 2387 input->free = deallocblankswrapper; 2388 input->base = buffer; 2389 input->cur = buffer; 2390 input->length = length; 2391 input->end = &buffer[length]; 2392 return(input); 2393 } 2394 2395 /** 2396 * xmlParserHandlePEReference: 2397 * @ctxt: the parser context 2398 * 2399 * [69] PEReference ::= '%' Name ';' 2400 * 2401 * [ WFC: No Recursion ] 2402 * A parsed entity must not contain a recursive 2403 * reference to itself, either directly or indirectly. 2404 * 2405 * [ WFC: Entity Declared ] 2406 * In a document without any DTD, a document with only an internal DTD 2407 * subset which contains no parameter entity references, or a document 2408 * with "standalone='yes'", ... ... The declaration of a parameter 2409 * entity must precede any reference to it... 2410 * 2411 * [ VC: Entity Declared ] 2412 * In a document with an external subset or external parameter entities 2413 * with "standalone='no'", ... ... The declaration of a parameter entity 2414 * must precede any reference to it... 2415 * 2416 * [ WFC: In DTD ] 2417 * Parameter-entity references may only appear in the DTD. 2418 * NOTE: misleading but this is handled. 2419 * 2420 * A PEReference may have been detected in the current input stream 2421 * the handling is done accordingly to 2422 * http://www.w3.org/TR/REC-xml#entproc 2423 * i.e. 2424 * - Included in literal in entity values 2425 * - Included as Parameter Entity reference within DTDs 2426 */ 2427 void 2428 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2429 const xmlChar *name; 2430 xmlEntityPtr entity = NULL; 2431 xmlParserInputPtr input; 2432 2433 if (RAW != '%') return; 2434 switch(ctxt->instate) { 2435 case XML_PARSER_CDATA_SECTION: 2436 return; 2437 case XML_PARSER_COMMENT: 2438 return; 2439 case XML_PARSER_START_TAG: 2440 return; 2441 case XML_PARSER_END_TAG: 2442 return; 2443 case XML_PARSER_EOF: 2444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2445 return; 2446 case XML_PARSER_PROLOG: 2447 case XML_PARSER_START: 2448 case XML_PARSER_MISC: 2449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2450 return; 2451 case XML_PARSER_ENTITY_DECL: 2452 case XML_PARSER_CONTENT: 2453 case XML_PARSER_ATTRIBUTE_VALUE: 2454 case XML_PARSER_PI: 2455 case XML_PARSER_SYSTEM_LITERAL: 2456 case XML_PARSER_PUBLIC_LITERAL: 2457 /* we just ignore it there */ 2458 return; 2459 case XML_PARSER_EPILOG: 2460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2461 return; 2462 case XML_PARSER_ENTITY_VALUE: 2463 /* 2464 * NOTE: in the case of entity values, we don't do the 2465 * substitution here since we need the literal 2466 * entity value to be able to save the internal 2467 * subset of the document. 2468 * This will be handled by xmlStringDecodeEntities 2469 */ 2470 return; 2471 case XML_PARSER_DTD: 2472 /* 2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2474 * In the internal DTD subset, parameter-entity references 2475 * can occur only where markup declarations can occur, not 2476 * within markup declarations. 2477 * In that case this is handled in xmlParseMarkupDecl 2478 */ 2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2480 return; 2481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2482 return; 2483 break; 2484 case XML_PARSER_IGNORE: 2485 return; 2486 } 2487 2488 NEXT; 2489 name = xmlParseName(ctxt); 2490 if (xmlParserDebugEntities) 2491 xmlGenericError(xmlGenericErrorContext, 2492 "PEReference: %s\n", name); 2493 if (name == NULL) { 2494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2495 } else { 2496 if (RAW == ';') { 2497 NEXT; 2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2500 if (entity == NULL) { 2501 2502 /* 2503 * [ WFC: Entity Declared ] 2504 * In a document without any DTD, a document with only an 2505 * internal DTD subset which contains no parameter entity 2506 * references, or a document with "standalone='yes'", ... 2507 * ... The declaration of a parameter entity must precede 2508 * any reference to it... 2509 */ 2510 if ((ctxt->standalone == 1) || 2511 ((ctxt->hasExternalSubset == 0) && 2512 (ctxt->hasPErefs == 0))) { 2513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2514 "PEReference: %%%s; not found\n", name); 2515 } else { 2516 /* 2517 * [ VC: Entity Declared ] 2518 * In a document with an external subset or external 2519 * parameter entities with "standalone='no'", ... 2520 * ... The declaration of a parameter entity must precede 2521 * any reference to it... 2522 */ 2523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2525 "PEReference: %%%s; not found\n", 2526 name, NULL); 2527 } else 2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2529 "PEReference: %%%s; not found\n", 2530 name, NULL); 2531 ctxt->valid = 0; 2532 } 2533 } else if (ctxt->input->free != deallocblankswrapper) { 2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2535 if (xmlPushInput(ctxt, input) < 0) 2536 return; 2537 } else { 2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2540 xmlChar start[4]; 2541 xmlCharEncoding enc; 2542 2543 /* 2544 * handle the extra spaces added before and after 2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2546 * this is done independently. 2547 */ 2548 input = xmlNewEntityInputStream(ctxt, entity); 2549 if (xmlPushInput(ctxt, input) < 0) 2550 return; 2551 2552 /* 2553 * Get the 4 first bytes and decode the charset 2554 * if enc != XML_CHAR_ENCODING_NONE 2555 * plug some encoding conversion routines. 2556 * Note that, since we may have some non-UTF8 2557 * encoding (like UTF16, bug 135229), the 'length' 2558 * is not known, but we can calculate based upon 2559 * the amount of data in the buffer. 2560 */ 2561 GROW 2562 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2563 start[0] = RAW; 2564 start[1] = NXT(1); 2565 start[2] = NXT(2); 2566 start[3] = NXT(3); 2567 enc = xmlDetectCharEncoding(start, 4); 2568 if (enc != XML_CHAR_ENCODING_NONE) { 2569 xmlSwitchEncoding(ctxt, enc); 2570 } 2571 } 2572 2573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2575 (IS_BLANK_CH(NXT(5)))) { 2576 xmlParseTextDecl(ctxt); 2577 } 2578 } else { 2579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2580 "PEReference: %s is not a parameter entity\n", 2581 name); 2582 } 2583 } 2584 } else { 2585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2586 } 2587 } 2588 } 2589 2590 /* 2591 * Macro used to grow the current buffer. 2592 */ 2593 #define growBuffer(buffer, n) { \ 2594 xmlChar *tmp; \ 2595 buffer##_size *= 2; \ 2596 buffer##_size += n; \ 2597 tmp = (xmlChar *) \ 2598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2599 if (tmp == NULL) goto mem_error; \ 2600 buffer = tmp; \ 2601 } 2602 2603 /** 2604 * xmlStringLenDecodeEntities: 2605 * @ctxt: the parser context 2606 * @str: the input string 2607 * @len: the string length 2608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2609 * @end: an end marker xmlChar, 0 if none 2610 * @end2: an end marker xmlChar, 0 if none 2611 * @end3: an end marker xmlChar, 0 if none 2612 * 2613 * Takes a entity string content and process to do the adequate substitutions. 2614 * 2615 * [67] Reference ::= EntityRef | CharRef 2616 * 2617 * [69] PEReference ::= '%' Name ';' 2618 * 2619 * Returns A newly allocated string with the substitution done. The caller 2620 * must deallocate it ! 2621 */ 2622 xmlChar * 2623 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2624 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2625 xmlChar *buffer = NULL; 2626 int buffer_size = 0; 2627 2628 xmlChar *current = NULL; 2629 xmlChar *rep = NULL; 2630 const xmlChar *last; 2631 xmlEntityPtr ent; 2632 int c,l; 2633 int nbchars = 0; 2634 2635 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2636 return(NULL); 2637 last = str + len; 2638 2639 if (((ctxt->depth > 40) && 2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2641 (ctxt->depth > 1024)) { 2642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2643 return(NULL); 2644 } 2645 2646 /* 2647 * allocate a translation buffer. 2648 */ 2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2651 if (buffer == NULL) goto mem_error; 2652 2653 /* 2654 * OK loop until we reach one of the ending char or a size limit. 2655 * we are operating on already parsed values. 2656 */ 2657 if (str < last) 2658 c = CUR_SCHAR(str, l); 2659 else 2660 c = 0; 2661 while ((c != 0) && (c != end) && /* non input consuming loop */ 2662 (c != end2) && (c != end3)) { 2663 2664 if (c == 0) break; 2665 if ((c == '&') && (str[1] == '#')) { 2666 int val = xmlParseStringCharRef(ctxt, &str); 2667 if (val != 0) { 2668 COPY_BUF(0,buffer,nbchars,val); 2669 } 2670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2672 } 2673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2674 if (xmlParserDebugEntities) 2675 xmlGenericError(xmlGenericErrorContext, 2676 "String decoding Entity Reference: %.30s\n", 2677 str); 2678 ent = xmlParseStringEntityRef(ctxt, &str); 2679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2681 goto int_error; 2682 if (ent != NULL) 2683 ctxt->nbentities += ent->checked; 2684 if ((ent != NULL) && 2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2686 if (ent->content != NULL) { 2687 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2690 } 2691 } else { 2692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2693 "predefined entity has no content\n"); 2694 } 2695 } else if ((ent != NULL) && (ent->content != NULL)) { 2696 ctxt->depth++; 2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2698 0, 0, 0); 2699 ctxt->depth--; 2700 2701 if (rep != NULL) { 2702 current = rep; 2703 while (*current != 0) { /* non input consuming loop */ 2704 buffer[nbchars++] = *current++; 2705 if (nbchars > 2706 buffer_size - XML_PARSER_BUFFER_SIZE) { 2707 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2708 goto int_error; 2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2710 } 2711 } 2712 xmlFree(rep); 2713 rep = NULL; 2714 } 2715 } else if (ent != NULL) { 2716 int i = xmlStrlen(ent->name); 2717 const xmlChar *cur = ent->name; 2718 2719 buffer[nbchars++] = '&'; 2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2722 } 2723 for (;i > 0;i--) 2724 buffer[nbchars++] = *cur++; 2725 buffer[nbchars++] = ';'; 2726 } 2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2728 if (xmlParserDebugEntities) 2729 xmlGenericError(xmlGenericErrorContext, 2730 "String decoding PE Reference: %.30s\n", str); 2731 ent = xmlParseStringPEReference(ctxt, &str); 2732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2733 goto int_error; 2734 if (ent != NULL) 2735 ctxt->nbentities += ent->checked; 2736 if (ent != NULL) { 2737 if (ent->content == NULL) { 2738 xmlLoadEntityContent(ctxt, ent); 2739 } 2740 ctxt->depth++; 2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2742 0, 0, 0); 2743 ctxt->depth--; 2744 if (rep != NULL) { 2745 current = rep; 2746 while (*current != 0) { /* non input consuming loop */ 2747 buffer[nbchars++] = *current++; 2748 if (nbchars > 2749 buffer_size - XML_PARSER_BUFFER_SIZE) { 2750 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2751 goto int_error; 2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2753 } 2754 } 2755 xmlFree(rep); 2756 rep = NULL; 2757 } 2758 } 2759 } else { 2760 COPY_BUF(l,buffer,nbchars,c); 2761 str += l; 2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2764 } 2765 } 2766 if (str < last) 2767 c = CUR_SCHAR(str, l); 2768 else 2769 c = 0; 2770 } 2771 buffer[nbchars] = 0; 2772 return(buffer); 2773 2774 mem_error: 2775 xmlErrMemory(ctxt, NULL); 2776 int_error: 2777 if (rep != NULL) 2778 xmlFree(rep); 2779 if (buffer != NULL) 2780 xmlFree(buffer); 2781 return(NULL); 2782 } 2783 2784 /** 2785 * xmlStringDecodeEntities: 2786 * @ctxt: the parser context 2787 * @str: the input string 2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2789 * @end: an end marker xmlChar, 0 if none 2790 * @end2: an end marker xmlChar, 0 if none 2791 * @end3: an end marker xmlChar, 0 if none 2792 * 2793 * Takes a entity string content and process to do the adequate substitutions. 2794 * 2795 * [67] Reference ::= EntityRef | CharRef 2796 * 2797 * [69] PEReference ::= '%' Name ';' 2798 * 2799 * Returns A newly allocated string with the substitution done. The caller 2800 * must deallocate it ! 2801 */ 2802 xmlChar * 2803 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2804 xmlChar end, xmlChar end2, xmlChar end3) { 2805 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2807 end, end2, end3)); 2808 } 2809 2810 /************************************************************************ 2811 * * 2812 * Commodity functions, cleanup needed ? * 2813 * * 2814 ************************************************************************/ 2815 2816 /** 2817 * areBlanks: 2818 * @ctxt: an XML parser context 2819 * @str: a xmlChar * 2820 * @len: the size of @str 2821 * @blank_chars: we know the chars are blanks 2822 * 2823 * Is this a sequence of blank chars that one can ignore ? 2824 * 2825 * Returns 1 if ignorable 0 otherwise. 2826 */ 2827 2828 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2829 int blank_chars) { 2830 int i, ret; 2831 xmlNodePtr lastChild; 2832 2833 /* 2834 * Don't spend time trying to differentiate them, the same callback is 2835 * used ! 2836 */ 2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2838 return(0); 2839 2840 /* 2841 * Check for xml:space value. 2842 */ 2843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2844 (*(ctxt->space) == -2)) 2845 return(0); 2846 2847 /* 2848 * Check that the string is made of blanks 2849 */ 2850 if (blank_chars == 0) { 2851 for (i = 0;i < len;i++) 2852 if (!(IS_BLANK_CH(str[i]))) return(0); 2853 } 2854 2855 /* 2856 * Look if the element is mixed content in the DTD if available 2857 */ 2858 if (ctxt->node == NULL) return(0); 2859 if (ctxt->myDoc != NULL) { 2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2861 if (ret == 0) return(1); 2862 if (ret == 1) return(0); 2863 } 2864 2865 /* 2866 * Otherwise, heuristic :-\ 2867 */ 2868 if ((RAW != '<') && (RAW != 0xD)) return(0); 2869 if ((ctxt->node->children == NULL) && 2870 (RAW == '<') && (NXT(1) == '/')) return(0); 2871 2872 lastChild = xmlGetLastChild(ctxt->node); 2873 if (lastChild == NULL) { 2874 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2875 (ctxt->node->content != NULL)) return(0); 2876 } else if (xmlNodeIsText(lastChild)) 2877 return(0); 2878 else if ((ctxt->node->children != NULL) && 2879 (xmlNodeIsText(ctxt->node->children))) 2880 return(0); 2881 return(1); 2882 } 2883 2884 /************************************************************************ 2885 * * 2886 * Extra stuff for namespace support * 2887 * Relates to http://www.w3.org/TR/WD-xml-names * 2888 * * 2889 ************************************************************************/ 2890 2891 /** 2892 * xmlSplitQName: 2893 * @ctxt: an XML parser context 2894 * @name: an XML parser context 2895 * @prefix: a xmlChar ** 2896 * 2897 * parse an UTF8 encoded XML qualified name string 2898 * 2899 * [NS 5] QName ::= (Prefix ':')? LocalPart 2900 * 2901 * [NS 6] Prefix ::= NCName 2902 * 2903 * [NS 7] LocalPart ::= NCName 2904 * 2905 * Returns the local part, and prefix is updated 2906 * to get the Prefix if any. 2907 */ 2908 2909 xmlChar * 2910 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2911 xmlChar buf[XML_MAX_NAMELEN + 5]; 2912 xmlChar *buffer = NULL; 2913 int len = 0; 2914 int max = XML_MAX_NAMELEN; 2915 xmlChar *ret = NULL; 2916 const xmlChar *cur = name; 2917 int c; 2918 2919 if (prefix == NULL) return(NULL); 2920 *prefix = NULL; 2921 2922 if (cur == NULL) return(NULL); 2923 2924 #ifndef XML_XML_NAMESPACE 2925 /* xml: prefix is not really a namespace */ 2926 if ((cur[0] == 'x') && (cur[1] == 'm') && 2927 (cur[2] == 'l') && (cur[3] == ':')) 2928 return(xmlStrdup(name)); 2929 #endif 2930 2931 /* nasty but well=formed */ 2932 if (cur[0] == ':') 2933 return(xmlStrdup(name)); 2934 2935 c = *cur++; 2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2937 buf[len++] = c; 2938 c = *cur++; 2939 } 2940 if (len >= max) { 2941 /* 2942 * Okay someone managed to make a huge name, so he's ready to pay 2943 * for the processing speed. 2944 */ 2945 max = len * 2; 2946 2947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2948 if (buffer == NULL) { 2949 xmlErrMemory(ctxt, NULL); 2950 return(NULL); 2951 } 2952 memcpy(buffer, buf, len); 2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2954 if (len + 10 > max) { 2955 xmlChar *tmp; 2956 2957 max *= 2; 2958 tmp = (xmlChar *) xmlRealloc(buffer, 2959 max * sizeof(xmlChar)); 2960 if (tmp == NULL) { 2961 xmlFree(buffer); 2962 xmlErrMemory(ctxt, NULL); 2963 return(NULL); 2964 } 2965 buffer = tmp; 2966 } 2967 buffer[len++] = c; 2968 c = *cur++; 2969 } 2970 buffer[len] = 0; 2971 } 2972 2973 if ((c == ':') && (*cur == 0)) { 2974 if (buffer != NULL) 2975 xmlFree(buffer); 2976 *prefix = NULL; 2977 return(xmlStrdup(name)); 2978 } 2979 2980 if (buffer == NULL) 2981 ret = xmlStrndup(buf, len); 2982 else { 2983 ret = buffer; 2984 buffer = NULL; 2985 max = XML_MAX_NAMELEN; 2986 } 2987 2988 2989 if (c == ':') { 2990 c = *cur; 2991 *prefix = ret; 2992 if (c == 0) { 2993 return(xmlStrndup(BAD_CAST "", 0)); 2994 } 2995 len = 0; 2996 2997 /* 2998 * Check that the first character is proper to start 2999 * a new name 3000 */ 3001 if (!(((c >= 0x61) && (c <= 0x7A)) || 3002 ((c >= 0x41) && (c <= 0x5A)) || 3003 (c == '_') || (c == ':'))) { 3004 int l; 3005 int first = CUR_SCHAR(cur, l); 3006 3007 if (!IS_LETTER(first) && (first != '_')) { 3008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3009 "Name %s is not XML Namespace compliant\n", 3010 name); 3011 } 3012 } 3013 cur++; 3014 3015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3016 buf[len++] = c; 3017 c = *cur++; 3018 } 3019 if (len >= max) { 3020 /* 3021 * Okay someone managed to make a huge name, so he's ready to pay 3022 * for the processing speed. 3023 */ 3024 max = len * 2; 3025 3026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3027 if (buffer == NULL) { 3028 xmlErrMemory(ctxt, NULL); 3029 return(NULL); 3030 } 3031 memcpy(buffer, buf, len); 3032 while (c != 0) { /* tested bigname2.xml */ 3033 if (len + 10 > max) { 3034 xmlChar *tmp; 3035 3036 max *= 2; 3037 tmp = (xmlChar *) xmlRealloc(buffer, 3038 max * sizeof(xmlChar)); 3039 if (tmp == NULL) { 3040 xmlErrMemory(ctxt, NULL); 3041 xmlFree(buffer); 3042 return(NULL); 3043 } 3044 buffer = tmp; 3045 } 3046 buffer[len++] = c; 3047 c = *cur++; 3048 } 3049 buffer[len] = 0; 3050 } 3051 3052 if (buffer == NULL) 3053 ret = xmlStrndup(buf, len); 3054 else { 3055 ret = buffer; 3056 } 3057 } 3058 3059 return(ret); 3060 } 3061 3062 /************************************************************************ 3063 * * 3064 * The parser itself * 3065 * Relates to http://www.w3.org/TR/REC-xml * 3066 * * 3067 ************************************************************************/ 3068 3069 /************************************************************************ 3070 * * 3071 * Routines to parse Name, NCName and NmToken * 3072 * * 3073 ************************************************************************/ 3074 #ifdef DEBUG 3075 static unsigned long nbParseName = 0; 3076 static unsigned long nbParseNmToken = 0; 3077 static unsigned long nbParseNCName = 0; 3078 static unsigned long nbParseNCNameComplex = 0; 3079 static unsigned long nbParseNameComplex = 0; 3080 static unsigned long nbParseStringName = 0; 3081 #endif 3082 3083 /* 3084 * The two following functions are related to the change of accepted 3085 * characters for Name and NmToken in the Revision 5 of XML-1.0 3086 * They correspond to the modified production [4] and the new production [4a] 3087 * changes in that revision. Also note that the macros used for the 3088 * productions Letter, Digit, CombiningChar and Extender are not needed 3089 * anymore. 3090 * We still keep compatibility to pre-revision5 parsing semantic if the 3091 * new XML_PARSE_OLD10 option is given to the parser. 3092 */ 3093 static int 3094 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3096 /* 3097 * Use the new checks of production [4] [4a] amd [5] of the 3098 * Update 5 of XML-1.0 3099 */ 3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3101 (((c >= 'a') && (c <= 'z')) || 3102 ((c >= 'A') && (c <= 'Z')) || 3103 (c == '_') || (c == ':') || 3104 ((c >= 0xC0) && (c <= 0xD6)) || 3105 ((c >= 0xD8) && (c <= 0xF6)) || 3106 ((c >= 0xF8) && (c <= 0x2FF)) || 3107 ((c >= 0x370) && (c <= 0x37D)) || 3108 ((c >= 0x37F) && (c <= 0x1FFF)) || 3109 ((c >= 0x200C) && (c <= 0x200D)) || 3110 ((c >= 0x2070) && (c <= 0x218F)) || 3111 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3112 ((c >= 0x3001) && (c <= 0xD7FF)) || 3113 ((c >= 0xF900) && (c <= 0xFDCF)) || 3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3115 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3116 return(1); 3117 } else { 3118 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3119 return(1); 3120 } 3121 return(0); 3122 } 3123 3124 static int 3125 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3127 /* 3128 * Use the new checks of production [4] [4a] amd [5] of the 3129 * Update 5 of XML-1.0 3130 */ 3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3132 (((c >= 'a') && (c <= 'z')) || 3133 ((c >= 'A') && (c <= 'Z')) || 3134 ((c >= '0') && (c <= '9')) || /* !start */ 3135 (c == '_') || (c == ':') || 3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3137 ((c >= 0xC0) && (c <= 0xD6)) || 3138 ((c >= 0xD8) && (c <= 0xF6)) || 3139 ((c >= 0xF8) && (c <= 0x2FF)) || 3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3141 ((c >= 0x370) && (c <= 0x37D)) || 3142 ((c >= 0x37F) && (c <= 0x1FFF)) || 3143 ((c >= 0x200C) && (c <= 0x200D)) || 3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3145 ((c >= 0x2070) && (c <= 0x218F)) || 3146 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3147 ((c >= 0x3001) && (c <= 0xD7FF)) || 3148 ((c >= 0xF900) && (c <= 0xFDCF)) || 3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3150 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3151 return(1); 3152 } else { 3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3154 (c == '.') || (c == '-') || 3155 (c == '_') || (c == ':') || 3156 (IS_COMBINING(c)) || 3157 (IS_EXTENDER(c))) 3158 return(1); 3159 } 3160 return(0); 3161 } 3162 3163 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3164 int *len, int *alloc, int normalize); 3165 3166 static const xmlChar * 3167 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3168 int len = 0, l; 3169 int c; 3170 int count = 0; 3171 3172 #ifdef DEBUG 3173 nbParseNameComplex++; 3174 #endif 3175 3176 /* 3177 * Handler for more complex cases 3178 */ 3179 GROW; 3180 c = CUR_CHAR(l); 3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3182 /* 3183 * Use the new checks of production [4] [4a] amd [5] of the 3184 * Update 5 of XML-1.0 3185 */ 3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3187 (!(((c >= 'a') && (c <= 'z')) || 3188 ((c >= 'A') && (c <= 'Z')) || 3189 (c == '_') || (c == ':') || 3190 ((c >= 0xC0) && (c <= 0xD6)) || 3191 ((c >= 0xD8) && (c <= 0xF6)) || 3192 ((c >= 0xF8) && (c <= 0x2FF)) || 3193 ((c >= 0x370) && (c <= 0x37D)) || 3194 ((c >= 0x37F) && (c <= 0x1FFF)) || 3195 ((c >= 0x200C) && (c <= 0x200D)) || 3196 ((c >= 0x2070) && (c <= 0x218F)) || 3197 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3198 ((c >= 0x3001) && (c <= 0xD7FF)) || 3199 ((c >= 0xF900) && (c <= 0xFDCF)) || 3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3202 return(NULL); 3203 } 3204 len += l; 3205 NEXTL(l); 3206 c = CUR_CHAR(l); 3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3208 (((c >= 'a') && (c <= 'z')) || 3209 ((c >= 'A') && (c <= 'Z')) || 3210 ((c >= '0') && (c <= '9')) || /* !start */ 3211 (c == '_') || (c == ':') || 3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3213 ((c >= 0xC0) && (c <= 0xD6)) || 3214 ((c >= 0xD8) && (c <= 0xF6)) || 3215 ((c >= 0xF8) && (c <= 0x2FF)) || 3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3217 ((c >= 0x370) && (c <= 0x37D)) || 3218 ((c >= 0x37F) && (c <= 0x1FFF)) || 3219 ((c >= 0x200C) && (c <= 0x200D)) || 3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3221 ((c >= 0x2070) && (c <= 0x218F)) || 3222 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3223 ((c >= 0x3001) && (c <= 0xD7FF)) || 3224 ((c >= 0xF900) && (c <= 0xFDCF)) || 3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3226 ((c >= 0x10000) && (c <= 0xEFFFF)) 3227 )) { 3228 if (count++ > 100) { 3229 count = 0; 3230 GROW; 3231 } 3232 len += l; 3233 NEXTL(l); 3234 c = CUR_CHAR(l); 3235 } 3236 } else { 3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3238 (!IS_LETTER(c) && (c != '_') && 3239 (c != ':'))) { 3240 return(NULL); 3241 } 3242 len += l; 3243 NEXTL(l); 3244 c = CUR_CHAR(l); 3245 3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3248 (c == '.') || (c == '-') || 3249 (c == '_') || (c == ':') || 3250 (IS_COMBINING(c)) || 3251 (IS_EXTENDER(c)))) { 3252 if (count++ > 100) { 3253 count = 0; 3254 GROW; 3255 } 3256 len += l; 3257 NEXTL(l); 3258 c = CUR_CHAR(l); 3259 } 3260 } 3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3264 } 3265 3266 /** 3267 * xmlParseName: 3268 * @ctxt: an XML parser context 3269 * 3270 * parse an XML name. 3271 * 3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3273 * CombiningChar | Extender 3274 * 3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3276 * 3277 * [6] Names ::= Name (#x20 Name)* 3278 * 3279 * Returns the Name parsed or NULL 3280 */ 3281 3282 const xmlChar * 3283 xmlParseName(xmlParserCtxtPtr ctxt) { 3284 const xmlChar *in; 3285 const xmlChar *ret; 3286 int count = 0; 3287 3288 GROW; 3289 3290 #ifdef DEBUG 3291 nbParseName++; 3292 #endif 3293 3294 /* 3295 * Accelerator for simple ASCII names 3296 */ 3297 in = ctxt->input->cur; 3298 if (((*in >= 0x61) && (*in <= 0x7A)) || 3299 ((*in >= 0x41) && (*in <= 0x5A)) || 3300 (*in == '_') || (*in == ':')) { 3301 in++; 3302 while (((*in >= 0x61) && (*in <= 0x7A)) || 3303 ((*in >= 0x41) && (*in <= 0x5A)) || 3304 ((*in >= 0x30) && (*in <= 0x39)) || 3305 (*in == '_') || (*in == '-') || 3306 (*in == ':') || (*in == '.')) 3307 in++; 3308 if ((*in > 0) && (*in < 0x80)) { 3309 count = in - ctxt->input->cur; 3310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3311 ctxt->input->cur = in; 3312 ctxt->nbChars += count; 3313 ctxt->input->col += count; 3314 if (ret == NULL) 3315 xmlErrMemory(ctxt, NULL); 3316 return(ret); 3317 } 3318 } 3319 /* accelerator for special cases */ 3320 return(xmlParseNameComplex(ctxt)); 3321 } 3322 3323 static const xmlChar * 3324 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3325 int len = 0, l; 3326 int c; 3327 int count = 0; 3328 3329 #ifdef DEBUG 3330 nbParseNCNameComplex++; 3331 #endif 3332 3333 /* 3334 * Handler for more complex cases 3335 */ 3336 GROW; 3337 c = CUR_CHAR(l); 3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3340 return(NULL); 3341 } 3342 3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3345 if (count++ > 100) { 3346 count = 0; 3347 GROW; 3348 } 3349 len += l; 3350 NEXTL(l); 3351 c = CUR_CHAR(l); 3352 } 3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3354 } 3355 3356 /** 3357 * xmlParseNCName: 3358 * @ctxt: an XML parser context 3359 * @len: lenght of the string parsed 3360 * 3361 * parse an XML name. 3362 * 3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3364 * CombiningChar | Extender 3365 * 3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3367 * 3368 * Returns the Name parsed or NULL 3369 */ 3370 3371 static const xmlChar * 3372 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3373 const xmlChar *in; 3374 const xmlChar *ret; 3375 int count = 0; 3376 3377 #ifdef DEBUG 3378 nbParseNCName++; 3379 #endif 3380 3381 /* 3382 * Accelerator for simple ASCII names 3383 */ 3384 in = ctxt->input->cur; 3385 if (((*in >= 0x61) && (*in <= 0x7A)) || 3386 ((*in >= 0x41) && (*in <= 0x5A)) || 3387 (*in == '_')) { 3388 in++; 3389 while (((*in >= 0x61) && (*in <= 0x7A)) || 3390 ((*in >= 0x41) && (*in <= 0x5A)) || 3391 ((*in >= 0x30) && (*in <= 0x39)) || 3392 (*in == '_') || (*in == '-') || 3393 (*in == '.')) 3394 in++; 3395 if ((*in > 0) && (*in < 0x80)) { 3396 count = in - ctxt->input->cur; 3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3398 ctxt->input->cur = in; 3399 ctxt->nbChars += count; 3400 ctxt->input->col += count; 3401 if (ret == NULL) { 3402 xmlErrMemory(ctxt, NULL); 3403 } 3404 return(ret); 3405 } 3406 } 3407 return(xmlParseNCNameComplex(ctxt)); 3408 } 3409 3410 /** 3411 * xmlParseNameAndCompare: 3412 * @ctxt: an XML parser context 3413 * 3414 * parse an XML name and compares for match 3415 * (specialized for endtag parsing) 3416 * 3417 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3418 * and the name for mismatch 3419 */ 3420 3421 static const xmlChar * 3422 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3423 register const xmlChar *cmp = other; 3424 register const xmlChar *in; 3425 const xmlChar *ret; 3426 3427 GROW; 3428 3429 in = ctxt->input->cur; 3430 while (*in != 0 && *in == *cmp) { 3431 ++in; 3432 ++cmp; 3433 ctxt->input->col++; 3434 } 3435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3436 /* success */ 3437 ctxt->input->cur = in; 3438 return (const xmlChar*) 1; 3439 } 3440 /* failure (or end of input buffer), check with full function */ 3441 ret = xmlParseName (ctxt); 3442 /* strings coming from the dictionnary direct compare possible */ 3443 if (ret == other) { 3444 return (const xmlChar*) 1; 3445 } 3446 return ret; 3447 } 3448 3449 /** 3450 * xmlParseStringName: 3451 * @ctxt: an XML parser context 3452 * @str: a pointer to the string pointer (IN/OUT) 3453 * 3454 * parse an XML name. 3455 * 3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3457 * CombiningChar | Extender 3458 * 3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3460 * 3461 * [6] Names ::= Name (#x20 Name)* 3462 * 3463 * Returns the Name parsed or NULL. The @str pointer 3464 * is updated to the current location in the string. 3465 */ 3466 3467 static xmlChar * 3468 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3469 xmlChar buf[XML_MAX_NAMELEN + 5]; 3470 const xmlChar *cur = *str; 3471 int len = 0, l; 3472 int c; 3473 3474 #ifdef DEBUG 3475 nbParseStringName++; 3476 #endif 3477 3478 c = CUR_SCHAR(cur, l); 3479 if (!xmlIsNameStartChar(ctxt, c)) { 3480 return(NULL); 3481 } 3482 3483 COPY_BUF(l,buf,len,c); 3484 cur += l; 3485 c = CUR_SCHAR(cur, l); 3486 while (xmlIsNameChar(ctxt, c)) { 3487 COPY_BUF(l,buf,len,c); 3488 cur += l; 3489 c = CUR_SCHAR(cur, l); 3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3491 /* 3492 * Okay someone managed to make a huge name, so he's ready to pay 3493 * for the processing speed. 3494 */ 3495 xmlChar *buffer; 3496 int max = len * 2; 3497 3498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3499 if (buffer == NULL) { 3500 xmlErrMemory(ctxt, NULL); 3501 return(NULL); 3502 } 3503 memcpy(buffer, buf, len); 3504 while (xmlIsNameChar(ctxt, c)) { 3505 if (len + 10 > max) { 3506 xmlChar *tmp; 3507 max *= 2; 3508 tmp = (xmlChar *) xmlRealloc(buffer, 3509 max * sizeof(xmlChar)); 3510 if (tmp == NULL) { 3511 xmlErrMemory(ctxt, NULL); 3512 xmlFree(buffer); 3513 return(NULL); 3514 } 3515 buffer = tmp; 3516 } 3517 COPY_BUF(l,buffer,len,c); 3518 cur += l; 3519 c = CUR_SCHAR(cur, l); 3520 } 3521 buffer[len] = 0; 3522 *str = cur; 3523 return(buffer); 3524 } 3525 } 3526 *str = cur; 3527 return(xmlStrndup(buf, len)); 3528 } 3529 3530 /** 3531 * xmlParseNmtoken: 3532 * @ctxt: an XML parser context 3533 * 3534 * parse an XML Nmtoken. 3535 * 3536 * [7] Nmtoken ::= (NameChar)+ 3537 * 3538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3539 * 3540 * Returns the Nmtoken parsed or NULL 3541 */ 3542 3543 xmlChar * 3544 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3545 xmlChar buf[XML_MAX_NAMELEN + 5]; 3546 int len = 0, l; 3547 int c; 3548 int count = 0; 3549 3550 #ifdef DEBUG 3551 nbParseNmToken++; 3552 #endif 3553 3554 GROW; 3555 c = CUR_CHAR(l); 3556 3557 while (xmlIsNameChar(ctxt, c)) { 3558 if (count++ > 100) { 3559 count = 0; 3560 GROW; 3561 } 3562 COPY_BUF(l,buf,len,c); 3563 NEXTL(l); 3564 c = CUR_CHAR(l); 3565 if (len >= XML_MAX_NAMELEN) { 3566 /* 3567 * Okay someone managed to make a huge token, so he's ready to pay 3568 * for the processing speed. 3569 */ 3570 xmlChar *buffer; 3571 int max = len * 2; 3572 3573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3574 if (buffer == NULL) { 3575 xmlErrMemory(ctxt, NULL); 3576 return(NULL); 3577 } 3578 memcpy(buffer, buf, len); 3579 while (xmlIsNameChar(ctxt, c)) { 3580 if (count++ > 100) { 3581 count = 0; 3582 GROW; 3583 } 3584 if (len + 10 > max) { 3585 xmlChar *tmp; 3586 3587 max *= 2; 3588 tmp = (xmlChar *) xmlRealloc(buffer, 3589 max * sizeof(xmlChar)); 3590 if (tmp == NULL) { 3591 xmlErrMemory(ctxt, NULL); 3592 xmlFree(buffer); 3593 return(NULL); 3594 } 3595 buffer = tmp; 3596 } 3597 COPY_BUF(l,buffer,len,c); 3598 NEXTL(l); 3599 c = CUR_CHAR(l); 3600 } 3601 buffer[len] = 0; 3602 return(buffer); 3603 } 3604 } 3605 if (len == 0) 3606 return(NULL); 3607 return(xmlStrndup(buf, len)); 3608 } 3609 3610 /** 3611 * xmlParseEntityValue: 3612 * @ctxt: an XML parser context 3613 * @orig: if non-NULL store a copy of the original entity value 3614 * 3615 * parse a value for ENTITY declarations 3616 * 3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3618 * "'" ([^%&'] | PEReference | Reference)* "'" 3619 * 3620 * Returns the EntityValue parsed with reference substituted or NULL 3621 */ 3622 3623 xmlChar * 3624 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3625 xmlChar *buf = NULL; 3626 int len = 0; 3627 int size = XML_PARSER_BUFFER_SIZE; 3628 int c, l; 3629 xmlChar stop; 3630 xmlChar *ret = NULL; 3631 const xmlChar *cur = NULL; 3632 xmlParserInputPtr input; 3633 3634 if (RAW == '"') stop = '"'; 3635 else if (RAW == '\'') stop = '\''; 3636 else { 3637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3638 return(NULL); 3639 } 3640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3641 if (buf == NULL) { 3642 xmlErrMemory(ctxt, NULL); 3643 return(NULL); 3644 } 3645 3646 /* 3647 * The content of the entity definition is copied in a buffer. 3648 */ 3649 3650 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3651 input = ctxt->input; 3652 GROW; 3653 NEXT; 3654 c = CUR_CHAR(l); 3655 /* 3656 * NOTE: 4.4.5 Included in Literal 3657 * When a parameter entity reference appears in a literal entity 3658 * value, ... a single or double quote character in the replacement 3659 * text is always treated as a normal data character and will not 3660 * terminate the literal. 3661 * In practice it means we stop the loop only when back at parsing 3662 * the initial entity and the quote is found 3663 */ 3664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3665 (ctxt->input != input))) { 3666 if (len + 5 >= size) { 3667 xmlChar *tmp; 3668 3669 size *= 2; 3670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3671 if (tmp == NULL) { 3672 xmlErrMemory(ctxt, NULL); 3673 xmlFree(buf); 3674 return(NULL); 3675 } 3676 buf = tmp; 3677 } 3678 COPY_BUF(l,buf,len,c); 3679 NEXTL(l); 3680 /* 3681 * Pop-up of finished entities. 3682 */ 3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3684 xmlPopInput(ctxt); 3685 3686 GROW; 3687 c = CUR_CHAR(l); 3688 if (c == 0) { 3689 GROW; 3690 c = CUR_CHAR(l); 3691 } 3692 } 3693 buf[len] = 0; 3694 3695 /* 3696 * Raise problem w.r.t. '&' and '%' being used in non-entities 3697 * reference constructs. Note Charref will be handled in 3698 * xmlStringDecodeEntities() 3699 */ 3700 cur = buf; 3701 while (*cur != 0) { /* non input consuming */ 3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3703 xmlChar *name; 3704 xmlChar tmp = *cur; 3705 3706 cur++; 3707 name = xmlParseStringName(ctxt, &cur); 3708 if ((name == NULL) || (*cur != ';')) { 3709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3710 "EntityValue: '%c' forbidden except for entities references\n", 3711 tmp); 3712 } 3713 if ((tmp == '%') && (ctxt->inSubset == 1) && 3714 (ctxt->inputNr == 1)) { 3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3716 } 3717 if (name != NULL) 3718 xmlFree(name); 3719 if (*cur == 0) 3720 break; 3721 } 3722 cur++; 3723 } 3724 3725 /* 3726 * Then PEReference entities are substituted. 3727 */ 3728 if (c != stop) { 3729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3730 xmlFree(buf); 3731 } else { 3732 NEXT; 3733 /* 3734 * NOTE: 4.4.7 Bypassed 3735 * When a general entity reference appears in the EntityValue in 3736 * an entity declaration, it is bypassed and left as is. 3737 * so XML_SUBSTITUTE_REF is not set here. 3738 */ 3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3740 0, 0, 0); 3741 if (orig != NULL) 3742 *orig = buf; 3743 else 3744 xmlFree(buf); 3745 } 3746 3747 return(ret); 3748 } 3749 3750 /** 3751 * xmlParseAttValueComplex: 3752 * @ctxt: an XML parser context 3753 * @len: the resulting attribute len 3754 * @normalize: wether to apply the inner normalization 3755 * 3756 * parse a value for an attribute, this is the fallback function 3757 * of xmlParseAttValue() when the attribute parsing requires handling 3758 * of non-ASCII characters, or normalization compaction. 3759 * 3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3761 */ 3762 static xmlChar * 3763 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3764 xmlChar limit = 0; 3765 xmlChar *buf = NULL; 3766 xmlChar *rep = NULL; 3767 int len = 0; 3768 int buf_size = 0; 3769 int c, l, in_space = 0; 3770 xmlChar *current = NULL; 3771 xmlEntityPtr ent; 3772 3773 if (NXT(0) == '"') { 3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3775 limit = '"'; 3776 NEXT; 3777 } else if (NXT(0) == '\'') { 3778 limit = '\''; 3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3780 NEXT; 3781 } else { 3782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3783 return(NULL); 3784 } 3785 3786 /* 3787 * allocate a translation buffer. 3788 */ 3789 buf_size = XML_PARSER_BUFFER_SIZE; 3790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3791 if (buf == NULL) goto mem_error; 3792 3793 /* 3794 * OK loop until we reach one of the ending char or a size limit. 3795 */ 3796 c = CUR_CHAR(l); 3797 while ((NXT(0) != limit) && /* checked */ 3798 (IS_CHAR(c)) && (c != '<')) { 3799 if (c == 0) break; 3800 if (c == '&') { 3801 in_space = 0; 3802 if (NXT(1) == '#') { 3803 int val = xmlParseCharRef(ctxt); 3804 3805 if (val == '&') { 3806 if (ctxt->replaceEntities) { 3807 if (len > buf_size - 10) { 3808 growBuffer(buf, 10); 3809 } 3810 buf[len++] = '&'; 3811 } else { 3812 /* 3813 * The reparsing will be done in xmlStringGetNodeList() 3814 * called by the attribute() function in SAX.c 3815 */ 3816 if (len > buf_size - 10) { 3817 growBuffer(buf, 10); 3818 } 3819 buf[len++] = '&'; 3820 buf[len++] = '#'; 3821 buf[len++] = '3'; 3822 buf[len++] = '8'; 3823 buf[len++] = ';'; 3824 } 3825 } else if (val != 0) { 3826 if (len > buf_size - 10) { 3827 growBuffer(buf, 10); 3828 } 3829 len += xmlCopyChar(0, &buf[len], val); 3830 } 3831 } else { 3832 ent = xmlParseEntityRef(ctxt); 3833 ctxt->nbentities++; 3834 if (ent != NULL) 3835 ctxt->nbentities += ent->owner; 3836 if ((ent != NULL) && 3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3838 if (len > buf_size - 10) { 3839 growBuffer(buf, 10); 3840 } 3841 if ((ctxt->replaceEntities == 0) && 3842 (ent->content[0] == '&')) { 3843 buf[len++] = '&'; 3844 buf[len++] = '#'; 3845 buf[len++] = '3'; 3846 buf[len++] = '8'; 3847 buf[len++] = ';'; 3848 } else { 3849 buf[len++] = ent->content[0]; 3850 } 3851 } else if ((ent != NULL) && 3852 (ctxt->replaceEntities != 0)) { 3853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3854 rep = xmlStringDecodeEntities(ctxt, ent->content, 3855 XML_SUBSTITUTE_REF, 3856 0, 0, 0); 3857 if (rep != NULL) { 3858 current = rep; 3859 while (*current != 0) { /* non input consuming */ 3860 if ((*current == 0xD) || (*current == 0xA) || 3861 (*current == 0x9)) { 3862 buf[len++] = 0x20; 3863 current++; 3864 } else 3865 buf[len++] = *current++; 3866 if (len > buf_size - 10) { 3867 growBuffer(buf, 10); 3868 } 3869 } 3870 xmlFree(rep); 3871 rep = NULL; 3872 } 3873 } else { 3874 if (len > buf_size - 10) { 3875 growBuffer(buf, 10); 3876 } 3877 if (ent->content != NULL) 3878 buf[len++] = ent->content[0]; 3879 } 3880 } else if (ent != NULL) { 3881 int i = xmlStrlen(ent->name); 3882 const xmlChar *cur = ent->name; 3883 3884 /* 3885 * This may look absurd but is needed to detect 3886 * entities problems 3887 */ 3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3889 (ent->content != NULL)) { 3890 rep = xmlStringDecodeEntities(ctxt, ent->content, 3891 XML_SUBSTITUTE_REF, 0, 0, 0); 3892 if (rep != NULL) { 3893 xmlFree(rep); 3894 rep = NULL; 3895 } 3896 } 3897 3898 /* 3899 * Just output the reference 3900 */ 3901 buf[len++] = '&'; 3902 while (len > buf_size - i - 10) { 3903 growBuffer(buf, i + 10); 3904 } 3905 for (;i > 0;i--) 3906 buf[len++] = *cur++; 3907 buf[len++] = ';'; 3908 } 3909 } 3910 } else { 3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3912 if ((len != 0) || (!normalize)) { 3913 if ((!normalize) || (!in_space)) { 3914 COPY_BUF(l,buf,len,0x20); 3915 while (len > buf_size - 10) { 3916 growBuffer(buf, 10); 3917 } 3918 } 3919 in_space = 1; 3920 } 3921 } else { 3922 in_space = 0; 3923 COPY_BUF(l,buf,len,c); 3924 if (len > buf_size - 10) { 3925 growBuffer(buf, 10); 3926 } 3927 } 3928 NEXTL(l); 3929 } 3930 GROW; 3931 c = CUR_CHAR(l); 3932 } 3933 if ((in_space) && (normalize)) { 3934 while (buf[len - 1] == 0x20) len--; 3935 } 3936 buf[len] = 0; 3937 if (RAW == '<') { 3938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3939 } else if (RAW != limit) { 3940 if ((c != 0) && (!IS_CHAR(c))) { 3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3942 "invalid character in attribute value\n"); 3943 } else { 3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3945 "AttValue: ' expected\n"); 3946 } 3947 } else 3948 NEXT; 3949 if (attlen != NULL) *attlen = len; 3950 return(buf); 3951 3952 mem_error: 3953 xmlErrMemory(ctxt, NULL); 3954 if (buf != NULL) 3955 xmlFree(buf); 3956 if (rep != NULL) 3957 xmlFree(rep); 3958 return(NULL); 3959 } 3960 3961 /** 3962 * xmlParseAttValue: 3963 * @ctxt: an XML parser context 3964 * 3965 * parse a value for an attribute 3966 * Note: the parser won't do substitution of entities here, this 3967 * will be handled later in xmlStringGetNodeList 3968 * 3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3970 * "'" ([^<&'] | Reference)* "'" 3971 * 3972 * 3.3.3 Attribute-Value Normalization: 3973 * Before the value of an attribute is passed to the application or 3974 * checked for validity, the XML processor must normalize it as follows: 3975 * - a character reference is processed by appending the referenced 3976 * character to the attribute value 3977 * - an entity reference is processed by recursively processing the 3978 * replacement text of the entity 3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3980 * appending #x20 to the normalized value, except that only a single 3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3982 * parsed entity or the literal entity value of an internal parsed entity 3983 * - other characters are processed by appending them to the normalized value 3984 * If the declared value is not CDATA, then the XML processor must further 3985 * process the normalized attribute value by discarding any leading and 3986 * trailing space (#x20) characters, and by replacing sequences of space 3987 * (#x20) characters by a single space (#x20) character. 3988 * All attributes for which no declaration has been read should be treated 3989 * by a non-validating parser as if declared CDATA. 3990 * 3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3992 */ 3993 3994 3995 xmlChar * 3996 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3999 } 4000 4001 /** 4002 * xmlParseSystemLiteral: 4003 * @ctxt: an XML parser context 4004 * 4005 * parse an XML Literal 4006 * 4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4008 * 4009 * Returns the SystemLiteral parsed or NULL 4010 */ 4011 4012 xmlChar * 4013 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4014 xmlChar *buf = NULL; 4015 int len = 0; 4016 int size = XML_PARSER_BUFFER_SIZE; 4017 int cur, l; 4018 xmlChar stop; 4019 int state = ctxt->instate; 4020 int count = 0; 4021 4022 SHRINK; 4023 if (RAW == '"') { 4024 NEXT; 4025 stop = '"'; 4026 } else if (RAW == '\'') { 4027 NEXT; 4028 stop = '\''; 4029 } else { 4030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4031 return(NULL); 4032 } 4033 4034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4035 if (buf == NULL) { 4036 xmlErrMemory(ctxt, NULL); 4037 return(NULL); 4038 } 4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4040 cur = CUR_CHAR(l); 4041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4042 if (len + 5 >= size) { 4043 xmlChar *tmp; 4044 4045 size *= 2; 4046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4047 if (tmp == NULL) { 4048 xmlFree(buf); 4049 xmlErrMemory(ctxt, NULL); 4050 ctxt->instate = (xmlParserInputState) state; 4051 return(NULL); 4052 } 4053 buf = tmp; 4054 } 4055 count++; 4056 if (count > 50) { 4057 GROW; 4058 count = 0; 4059 } 4060 COPY_BUF(l,buf,len,cur); 4061 NEXTL(l); 4062 cur = CUR_CHAR(l); 4063 if (cur == 0) { 4064 GROW; 4065 SHRINK; 4066 cur = CUR_CHAR(l); 4067 } 4068 } 4069 buf[len] = 0; 4070 ctxt->instate = (xmlParserInputState) state; 4071 if (!IS_CHAR(cur)) { 4072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4073 } else { 4074 NEXT; 4075 } 4076 return(buf); 4077 } 4078 4079 /** 4080 * xmlParsePubidLiteral: 4081 * @ctxt: an XML parser context 4082 * 4083 * parse an XML public literal 4084 * 4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4086 * 4087 * Returns the PubidLiteral parsed or NULL. 4088 */ 4089 4090 xmlChar * 4091 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4092 xmlChar *buf = NULL; 4093 int len = 0; 4094 int size = XML_PARSER_BUFFER_SIZE; 4095 xmlChar cur; 4096 xmlChar stop; 4097 int count = 0; 4098 xmlParserInputState oldstate = ctxt->instate; 4099 4100 SHRINK; 4101 if (RAW == '"') { 4102 NEXT; 4103 stop = '"'; 4104 } else if (RAW == '\'') { 4105 NEXT; 4106 stop = '\''; 4107 } else { 4108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4109 return(NULL); 4110 } 4111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4112 if (buf == NULL) { 4113 xmlErrMemory(ctxt, NULL); 4114 return(NULL); 4115 } 4116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4117 cur = CUR; 4118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4119 if (len + 1 >= size) { 4120 xmlChar *tmp; 4121 4122 size *= 2; 4123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4124 if (tmp == NULL) { 4125 xmlErrMemory(ctxt, NULL); 4126 xmlFree(buf); 4127 return(NULL); 4128 } 4129 buf = tmp; 4130 } 4131 buf[len++] = cur; 4132 count++; 4133 if (count > 50) { 4134 GROW; 4135 count = 0; 4136 } 4137 NEXT; 4138 cur = CUR; 4139 if (cur == 0) { 4140 GROW; 4141 SHRINK; 4142 cur = CUR; 4143 } 4144 } 4145 buf[len] = 0; 4146 if (cur != stop) { 4147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4148 } else { 4149 NEXT; 4150 } 4151 ctxt->instate = oldstate; 4152 return(buf); 4153 } 4154 4155 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4156 4157 /* 4158 * used for the test in the inner loop of the char data testing 4159 */ 4160 static const unsigned char test_char_data[256] = { 4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4193 }; 4194 4195 /** 4196 * xmlParseCharData: 4197 * @ctxt: an XML parser context 4198 * @cdata: int indicating whether we are within a CDATA section 4199 * 4200 * parse a CharData section. 4201 * if we are within a CDATA section ']]>' marks an end of section. 4202 * 4203 * The right angle bracket (>) may be represented using the string ">", 4204 * and must, for compatibility, be escaped using ">" or a character 4205 * reference when it appears in the string "]]>" in content, when that 4206 * string is not marking the end of a CDATA section. 4207 * 4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4209 */ 4210 4211 void 4212 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4213 const xmlChar *in; 4214 int nbchar = 0; 4215 int line = ctxt->input->line; 4216 int col = ctxt->input->col; 4217 int ccol; 4218 4219 SHRINK; 4220 GROW; 4221 /* 4222 * Accelerated common case where input don't need to be 4223 * modified before passing it to the handler. 4224 */ 4225 if (!cdata) { 4226 in = ctxt->input->cur; 4227 do { 4228 get_more_space: 4229 while (*in == 0x20) { in++; ctxt->input->col++; } 4230 if (*in == 0xA) { 4231 do { 4232 ctxt->input->line++; ctxt->input->col = 1; 4233 in++; 4234 } while (*in == 0xA); 4235 goto get_more_space; 4236 } 4237 if (*in == '<') { 4238 nbchar = in - ctxt->input->cur; 4239 if (nbchar > 0) { 4240 const xmlChar *tmp = ctxt->input->cur; 4241 ctxt->input->cur = in; 4242 4243 if ((ctxt->sax != NULL) && 4244 (ctxt->sax->ignorableWhitespace != 4245 ctxt->sax->characters)) { 4246 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4247 if (ctxt->sax->ignorableWhitespace != NULL) 4248 ctxt->sax->ignorableWhitespace(ctxt->userData, 4249 tmp, nbchar); 4250 } else { 4251 if (ctxt->sax->characters != NULL) 4252 ctxt->sax->characters(ctxt->userData, 4253 tmp, nbchar); 4254 if (*ctxt->space == -1) 4255 *ctxt->space = -2; 4256 } 4257 } else if ((ctxt->sax != NULL) && 4258 (ctxt->sax->characters != NULL)) { 4259 ctxt->sax->characters(ctxt->userData, 4260 tmp, nbchar); 4261 } 4262 } 4263 return; 4264 } 4265 4266 get_more: 4267 ccol = ctxt->input->col; 4268 while (test_char_data[*in]) { 4269 in++; 4270 ccol++; 4271 } 4272 ctxt->input->col = ccol; 4273 if (*in == 0xA) { 4274 do { 4275 ctxt->input->line++; ctxt->input->col = 1; 4276 in++; 4277 } while (*in == 0xA); 4278 goto get_more; 4279 } 4280 if (*in == ']') { 4281 if ((in[1] == ']') && (in[2] == '>')) { 4282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4283 ctxt->input->cur = in; 4284 return; 4285 } 4286 in++; 4287 ctxt->input->col++; 4288 goto get_more; 4289 } 4290 nbchar = in - ctxt->input->cur; 4291 if (nbchar > 0) { 4292 if ((ctxt->sax != NULL) && 4293 (ctxt->sax->ignorableWhitespace != 4294 ctxt->sax->characters) && 4295 (IS_BLANK_CH(*ctxt->input->cur))) { 4296 const xmlChar *tmp = ctxt->input->cur; 4297 ctxt->input->cur = in; 4298 4299 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4300 if (ctxt->sax->ignorableWhitespace != NULL) 4301 ctxt->sax->ignorableWhitespace(ctxt->userData, 4302 tmp, nbchar); 4303 } else { 4304 if (ctxt->sax->characters != NULL) 4305 ctxt->sax->characters(ctxt->userData, 4306 tmp, nbchar); 4307 if (*ctxt->space == -1) 4308 *ctxt->space = -2; 4309 } 4310 line = ctxt->input->line; 4311 col = ctxt->input->col; 4312 } else if (ctxt->sax != NULL) { 4313 if (ctxt->sax->characters != NULL) 4314 ctxt->sax->characters(ctxt->userData, 4315 ctxt->input->cur, nbchar); 4316 line = ctxt->input->line; 4317 col = ctxt->input->col; 4318 } 4319 /* something really bad happened in the SAX callback */ 4320 if (ctxt->instate != XML_PARSER_CONTENT) 4321 return; 4322 } 4323 ctxt->input->cur = in; 4324 if (*in == 0xD) { 4325 in++; 4326 if (*in == 0xA) { 4327 ctxt->input->cur = in; 4328 in++; 4329 ctxt->input->line++; ctxt->input->col = 1; 4330 continue; /* while */ 4331 } 4332 in--; 4333 } 4334 if (*in == '<') { 4335 return; 4336 } 4337 if (*in == '&') { 4338 return; 4339 } 4340 SHRINK; 4341 GROW; 4342 in = ctxt->input->cur; 4343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4344 nbchar = 0; 4345 } 4346 ctxt->input->line = line; 4347 ctxt->input->col = col; 4348 xmlParseCharDataComplex(ctxt, cdata); 4349 } 4350 4351 /** 4352 * xmlParseCharDataComplex: 4353 * @ctxt: an XML parser context 4354 * @cdata: int indicating whether we are within a CDATA section 4355 * 4356 * parse a CharData section.this is the fallback function 4357 * of xmlParseCharData() when the parsing requires handling 4358 * of non-ASCII characters. 4359 */ 4360 static void 4361 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4363 int nbchar = 0; 4364 int cur, l; 4365 int count = 0; 4366 4367 SHRINK; 4368 GROW; 4369 cur = CUR_CHAR(l); 4370 while ((cur != '<') && /* checked */ 4371 (cur != '&') && 4372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4373 if ((cur == ']') && (NXT(1) == ']') && 4374 (NXT(2) == '>')) { 4375 if (cdata) break; 4376 else { 4377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4378 } 4379 } 4380 COPY_BUF(l,buf,nbchar,cur); 4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4382 buf[nbchar] = 0; 4383 4384 /* 4385 * OK the segment is to be consumed as chars. 4386 */ 4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4388 if (areBlanks(ctxt, buf, nbchar, 0)) { 4389 if (ctxt->sax->ignorableWhitespace != NULL) 4390 ctxt->sax->ignorableWhitespace(ctxt->userData, 4391 buf, nbchar); 4392 } else { 4393 if (ctxt->sax->characters != NULL) 4394 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4395 if ((ctxt->sax->characters != 4396 ctxt->sax->ignorableWhitespace) && 4397 (*ctxt->space == -1)) 4398 *ctxt->space = -2; 4399 } 4400 } 4401 nbchar = 0; 4402 /* something really bad happened in the SAX callback */ 4403 if (ctxt->instate != XML_PARSER_CONTENT) 4404 return; 4405 } 4406 count++; 4407 if (count > 50) { 4408 GROW; 4409 count = 0; 4410 } 4411 NEXTL(l); 4412 cur = CUR_CHAR(l); 4413 } 4414 if (nbchar != 0) { 4415 buf[nbchar] = 0; 4416 /* 4417 * OK the segment is to be consumed as chars. 4418 */ 4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4420 if (areBlanks(ctxt, buf, nbchar, 0)) { 4421 if (ctxt->sax->ignorableWhitespace != NULL) 4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4423 } else { 4424 if (ctxt->sax->characters != NULL) 4425 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4427 (*ctxt->space == -1)) 4428 *ctxt->space = -2; 4429 } 4430 } 4431 } 4432 if ((cur != 0) && (!IS_CHAR(cur))) { 4433 /* Generate the error and skip the offending character */ 4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4435 "PCDATA invalid Char value %d\n", 4436 cur); 4437 NEXTL(l); 4438 } 4439 } 4440 4441 /** 4442 * xmlParseExternalID: 4443 * @ctxt: an XML parser context 4444 * @publicID: a xmlChar** receiving PubidLiteral 4445 * @strict: indicate whether we should restrict parsing to only 4446 * production [75], see NOTE below 4447 * 4448 * Parse an External ID or a Public ID 4449 * 4450 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4451 * 'PUBLIC' S PubidLiteral S SystemLiteral 4452 * 4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4455 * 4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4457 * 4458 * Returns the function returns SystemLiteral and in the second 4459 * case publicID receives PubidLiteral, is strict is off 4460 * it is possible to return NULL and have publicID set. 4461 */ 4462 4463 xmlChar * 4464 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4465 xmlChar *URI = NULL; 4466 4467 SHRINK; 4468 4469 *publicID = NULL; 4470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4471 SKIP(6); 4472 if (!IS_BLANK_CH(CUR)) { 4473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4474 "Space required after 'SYSTEM'\n"); 4475 } 4476 SKIP_BLANKS; 4477 URI = xmlParseSystemLiteral(ctxt); 4478 if (URI == NULL) { 4479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4480 } 4481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4482 SKIP(6); 4483 if (!IS_BLANK_CH(CUR)) { 4484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4485 "Space required after 'PUBLIC'\n"); 4486 } 4487 SKIP_BLANKS; 4488 *publicID = xmlParsePubidLiteral(ctxt); 4489 if (*publicID == NULL) { 4490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4491 } 4492 if (strict) { 4493 /* 4494 * We don't handle [83] so "S SystemLiteral" is required. 4495 */ 4496 if (!IS_BLANK_CH(CUR)) { 4497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4498 "Space required after the Public Identifier\n"); 4499 } 4500 } else { 4501 /* 4502 * We handle [83] so we return immediately, if 4503 * "S SystemLiteral" is not detected. From a purely parsing 4504 * point of view that's a nice mess. 4505 */ 4506 const xmlChar *ptr; 4507 GROW; 4508 4509 ptr = CUR_PTR; 4510 if (!IS_BLANK_CH(*ptr)) return(NULL); 4511 4512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4514 } 4515 SKIP_BLANKS; 4516 URI = xmlParseSystemLiteral(ctxt); 4517 if (URI == NULL) { 4518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4519 } 4520 } 4521 return(URI); 4522 } 4523 4524 /** 4525 * xmlParseCommentComplex: 4526 * @ctxt: an XML parser context 4527 * @buf: the already parsed part of the buffer 4528 * @len: number of bytes filles in the buffer 4529 * @size: allocated size of the buffer 4530 * 4531 * Skip an XML (SGML) comment <!-- .... --> 4532 * The spec says that "For compatibility, the string "--" (double-hyphen) 4533 * must not occur within comments. " 4534 * This is the slow routine in case the accelerator for ascii didn't work 4535 * 4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4537 */ 4538 static void 4539 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4540 int q, ql; 4541 int r, rl; 4542 int cur, l; 4543 int count = 0; 4544 int inputid; 4545 4546 inputid = ctxt->input->id; 4547 4548 if (buf == NULL) { 4549 len = 0; 4550 size = XML_PARSER_BUFFER_SIZE; 4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4552 if (buf == NULL) { 4553 xmlErrMemory(ctxt, NULL); 4554 return; 4555 } 4556 } 4557 GROW; /* Assure there's enough input data */ 4558 q = CUR_CHAR(ql); 4559 if (q == 0) 4560 goto not_terminated; 4561 if (!IS_CHAR(q)) { 4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4563 "xmlParseComment: invalid xmlChar value %d\n", 4564 q); 4565 xmlFree (buf); 4566 return; 4567 } 4568 NEXTL(ql); 4569 r = CUR_CHAR(rl); 4570 if (r == 0) 4571 goto not_terminated; 4572 if (!IS_CHAR(r)) { 4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4574 "xmlParseComment: invalid xmlChar value %d\n", 4575 q); 4576 xmlFree (buf); 4577 return; 4578 } 4579 NEXTL(rl); 4580 cur = CUR_CHAR(l); 4581 if (cur == 0) 4582 goto not_terminated; 4583 while (IS_CHAR(cur) && /* checked */ 4584 ((cur != '>') || 4585 (r != '-') || (q != '-'))) { 4586 if ((r == '-') && (q == '-')) { 4587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4588 } 4589 if (len + 5 >= size) { 4590 xmlChar *new_buf; 4591 size *= 2; 4592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4593 if (new_buf == NULL) { 4594 xmlFree (buf); 4595 xmlErrMemory(ctxt, NULL); 4596 return; 4597 } 4598 buf = new_buf; 4599 } 4600 COPY_BUF(ql,buf,len,q); 4601 q = r; 4602 ql = rl; 4603 r = cur; 4604 rl = l; 4605 4606 count++; 4607 if (count > 50) { 4608 GROW; 4609 count = 0; 4610 } 4611 NEXTL(l); 4612 cur = CUR_CHAR(l); 4613 if (cur == 0) { 4614 SHRINK; 4615 GROW; 4616 cur = CUR_CHAR(l); 4617 } 4618 } 4619 buf[len] = 0; 4620 if (cur == 0) { 4621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4622 "Comment not terminated \n<!--%.50s\n", buf); 4623 } else if (!IS_CHAR(cur)) { 4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4625 "xmlParseComment: invalid xmlChar value %d\n", 4626 cur); 4627 } else { 4628 if (inputid != ctxt->input->id) { 4629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4630 "Comment doesn't start and stop in the same entity\n"); 4631 } 4632 NEXT; 4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4634 (!ctxt->disableSAX)) 4635 ctxt->sax->comment(ctxt->userData, buf); 4636 } 4637 xmlFree(buf); 4638 return; 4639 not_terminated: 4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4641 "Comment not terminated\n", NULL); 4642 xmlFree(buf); 4643 return; 4644 } 4645 4646 /** 4647 * xmlParseComment: 4648 * @ctxt: an XML parser context 4649 * 4650 * Skip an XML (SGML) comment <!-- .... --> 4651 * The spec says that "For compatibility, the string "--" (double-hyphen) 4652 * must not occur within comments. " 4653 * 4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4655 */ 4656 void 4657 xmlParseComment(xmlParserCtxtPtr ctxt) { 4658 xmlChar *buf = NULL; 4659 int size = XML_PARSER_BUFFER_SIZE; 4660 int len = 0; 4661 xmlParserInputState state; 4662 const xmlChar *in; 4663 int nbchar = 0, ccol; 4664 int inputid; 4665 4666 /* 4667 * Check that there is a comment right here. 4668 */ 4669 if ((RAW != '<') || (NXT(1) != '!') || 4670 (NXT(2) != '-') || (NXT(3) != '-')) return; 4671 state = ctxt->instate; 4672 ctxt->instate = XML_PARSER_COMMENT; 4673 inputid = ctxt->input->id; 4674 SKIP(4); 4675 SHRINK; 4676 GROW; 4677 4678 /* 4679 * Accelerated common case where input don't need to be 4680 * modified before passing it to the handler. 4681 */ 4682 in = ctxt->input->cur; 4683 do { 4684 if (*in == 0xA) { 4685 do { 4686 ctxt->input->line++; ctxt->input->col = 1; 4687 in++; 4688 } while (*in == 0xA); 4689 } 4690 get_more: 4691 ccol = ctxt->input->col; 4692 while (((*in > '-') && (*in <= 0x7F)) || 4693 ((*in >= 0x20) && (*in < '-')) || 4694 (*in == 0x09)) { 4695 in++; 4696 ccol++; 4697 } 4698 ctxt->input->col = ccol; 4699 if (*in == 0xA) { 4700 do { 4701 ctxt->input->line++; ctxt->input->col = 1; 4702 in++; 4703 } while (*in == 0xA); 4704 goto get_more; 4705 } 4706 nbchar = in - ctxt->input->cur; 4707 /* 4708 * save current set of data 4709 */ 4710 if (nbchar > 0) { 4711 if ((ctxt->sax != NULL) && 4712 (ctxt->sax->comment != NULL)) { 4713 if (buf == NULL) { 4714 if ((*in == '-') && (in[1] == '-')) 4715 size = nbchar + 1; 4716 else 4717 size = XML_PARSER_BUFFER_SIZE + nbchar; 4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4719 if (buf == NULL) { 4720 xmlErrMemory(ctxt, NULL); 4721 ctxt->instate = state; 4722 return; 4723 } 4724 len = 0; 4725 } else if (len + nbchar + 1 >= size) { 4726 xmlChar *new_buf; 4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4728 new_buf = (xmlChar *) xmlRealloc(buf, 4729 size * sizeof(xmlChar)); 4730 if (new_buf == NULL) { 4731 xmlFree (buf); 4732 xmlErrMemory(ctxt, NULL); 4733 ctxt->instate = state; 4734 return; 4735 } 4736 buf = new_buf; 4737 } 4738 memcpy(&buf[len], ctxt->input->cur, nbchar); 4739 len += nbchar; 4740 buf[len] = 0; 4741 } 4742 } 4743 ctxt->input->cur = in; 4744 if (*in == 0xA) { 4745 in++; 4746 ctxt->input->line++; ctxt->input->col = 1; 4747 } 4748 if (*in == 0xD) { 4749 in++; 4750 if (*in == 0xA) { 4751 ctxt->input->cur = in; 4752 in++; 4753 ctxt->input->line++; ctxt->input->col = 1; 4754 continue; /* while */ 4755 } 4756 in--; 4757 } 4758 SHRINK; 4759 GROW; 4760 in = ctxt->input->cur; 4761 if (*in == '-') { 4762 if (in[1] == '-') { 4763 if (in[2] == '>') { 4764 if (ctxt->input->id != inputid) { 4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4766 "comment doesn't start and stop in the same entity\n"); 4767 } 4768 SKIP(3); 4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4770 (!ctxt->disableSAX)) { 4771 if (buf != NULL) 4772 ctxt->sax->comment(ctxt->userData, buf); 4773 else 4774 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4775 } 4776 if (buf != NULL) 4777 xmlFree(buf); 4778 ctxt->instate = state; 4779 return; 4780 } 4781 if (buf != NULL) 4782 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4783 "Comment not terminated \n<!--%.50s\n", 4784 buf); 4785 else 4786 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4787 "Comment not terminated \n", NULL); 4788 in++; 4789 ctxt->input->col++; 4790 } 4791 in++; 4792 ctxt->input->col++; 4793 goto get_more; 4794 } 4795 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4796 xmlParseCommentComplex(ctxt, buf, len, size); 4797 ctxt->instate = state; 4798 return; 4799 } 4800 4801 4802 /** 4803 * xmlParsePITarget: 4804 * @ctxt: an XML parser context 4805 * 4806 * parse the name of a PI 4807 * 4808 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4809 * 4810 * Returns the PITarget name or NULL 4811 */ 4812 4813 const xmlChar * 4814 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4815 const xmlChar *name; 4816 4817 name = xmlParseName(ctxt); 4818 if ((name != NULL) && 4819 ((name[0] == 'x') || (name[0] == 'X')) && 4820 ((name[1] == 'm') || (name[1] == 'M')) && 4821 ((name[2] == 'l') || (name[2] == 'L'))) { 4822 int i; 4823 if ((name[0] == 'x') && (name[1] == 'm') && 4824 (name[2] == 'l') && (name[3] == 0)) { 4825 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4826 "XML declaration allowed only at the start of the document\n"); 4827 return(name); 4828 } else if (name[3] == 0) { 4829 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4830 return(name); 4831 } 4832 for (i = 0;;i++) { 4833 if (xmlW3CPIs[i] == NULL) break; 4834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4835 return(name); 4836 } 4837 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4838 "xmlParsePITarget: invalid name prefix 'xml'\n", 4839 NULL, NULL); 4840 } 4841 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4842 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4843 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4844 } 4845 return(name); 4846 } 4847 4848 #ifdef LIBXML_CATALOG_ENABLED 4849 /** 4850 * xmlParseCatalogPI: 4851 * @ctxt: an XML parser context 4852 * @catalog: the PI value string 4853 * 4854 * parse an XML Catalog Processing Instruction. 4855 * 4856 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4857 * 4858 * Occurs only if allowed by the user and if happening in the Misc 4859 * part of the document before any doctype informations 4860 * This will add the given catalog to the parsing context in order 4861 * to be used if there is a resolution need further down in the document 4862 */ 4863 4864 static void 4865 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4866 xmlChar *URL = NULL; 4867 const xmlChar *tmp, *base; 4868 xmlChar marker; 4869 4870 tmp = catalog; 4871 while (IS_BLANK_CH(*tmp)) tmp++; 4872 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4873 goto error; 4874 tmp += 7; 4875 while (IS_BLANK_CH(*tmp)) tmp++; 4876 if (*tmp != '=') { 4877 return; 4878 } 4879 tmp++; 4880 while (IS_BLANK_CH(*tmp)) tmp++; 4881 marker = *tmp; 4882 if ((marker != '\'') && (marker != '"')) 4883 goto error; 4884 tmp++; 4885 base = tmp; 4886 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4887 if (*tmp == 0) 4888 goto error; 4889 URL = xmlStrndup(base, tmp - base); 4890 tmp++; 4891 while (IS_BLANK_CH(*tmp)) tmp++; 4892 if (*tmp != 0) 4893 goto error; 4894 4895 if (URL != NULL) { 4896 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4897 xmlFree(URL); 4898 } 4899 return; 4900 4901 error: 4902 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4903 "Catalog PI syntax error: %s\n", 4904 catalog, NULL); 4905 if (URL != NULL) 4906 xmlFree(URL); 4907 } 4908 #endif 4909 4910 /** 4911 * xmlParsePI: 4912 * @ctxt: an XML parser context 4913 * 4914 * parse an XML Processing Instruction. 4915 * 4916 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4917 * 4918 * The processing is transfered to SAX once parsed. 4919 */ 4920 4921 void 4922 xmlParsePI(xmlParserCtxtPtr ctxt) { 4923 xmlChar *buf = NULL; 4924 int len = 0; 4925 int size = XML_PARSER_BUFFER_SIZE; 4926 int cur, l; 4927 const xmlChar *target; 4928 xmlParserInputState state; 4929 int count = 0; 4930 4931 if ((RAW == '<') && (NXT(1) == '?')) { 4932 xmlParserInputPtr input = ctxt->input; 4933 state = ctxt->instate; 4934 ctxt->instate = XML_PARSER_PI; 4935 /* 4936 * this is a Processing Instruction. 4937 */ 4938 SKIP(2); 4939 SHRINK; 4940 4941 /* 4942 * Parse the target name and check for special support like 4943 * namespace. 4944 */ 4945 target = xmlParsePITarget(ctxt); 4946 if (target != NULL) { 4947 if ((RAW == '?') && (NXT(1) == '>')) { 4948 if (input != ctxt->input) { 4949 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4950 "PI declaration doesn't start and stop in the same entity\n"); 4951 } 4952 SKIP(2); 4953 4954 /* 4955 * SAX: PI detected. 4956 */ 4957 if ((ctxt->sax) && (!ctxt->disableSAX) && 4958 (ctxt->sax->processingInstruction != NULL)) 4959 ctxt->sax->processingInstruction(ctxt->userData, 4960 target, NULL); 4961 if (ctxt->instate != XML_PARSER_EOF) 4962 ctxt->instate = state; 4963 return; 4964 } 4965 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4966 if (buf == NULL) { 4967 xmlErrMemory(ctxt, NULL); 4968 ctxt->instate = state; 4969 return; 4970 } 4971 cur = CUR; 4972 if (!IS_BLANK(cur)) { 4973 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4974 "ParsePI: PI %s space expected\n", target); 4975 } 4976 SKIP_BLANKS; 4977 cur = CUR_CHAR(l); 4978 while (IS_CHAR(cur) && /* checked */ 4979 ((cur != '?') || (NXT(1) != '>'))) { 4980 if (len + 5 >= size) { 4981 xmlChar *tmp; 4982 4983 size *= 2; 4984 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4985 if (tmp == NULL) { 4986 xmlErrMemory(ctxt, NULL); 4987 xmlFree(buf); 4988 ctxt->instate = state; 4989 return; 4990 } 4991 buf = tmp; 4992 } 4993 count++; 4994 if (count > 50) { 4995 GROW; 4996 count = 0; 4997 } 4998 COPY_BUF(l,buf,len,cur); 4999 NEXTL(l); 5000 cur = CUR_CHAR(l); 5001 if (cur == 0) { 5002 SHRINK; 5003 GROW; 5004 cur = CUR_CHAR(l); 5005 } 5006 } 5007 buf[len] = 0; 5008 if (cur != '?') { 5009 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5010 "ParsePI: PI %s never end ...\n", target); 5011 } else { 5012 if (input != ctxt->input) { 5013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5014 "PI declaration doesn't start and stop in the same entity\n"); 5015 } 5016 SKIP(2); 5017 5018 #ifdef LIBXML_CATALOG_ENABLED 5019 if (((state == XML_PARSER_MISC) || 5020 (state == XML_PARSER_START)) && 5021 (xmlStrEqual(target, XML_CATALOG_PI))) { 5022 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5023 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5024 (allow == XML_CATA_ALLOW_ALL)) 5025 xmlParseCatalogPI(ctxt, buf); 5026 } 5027 #endif 5028 5029 5030 /* 5031 * SAX: PI detected. 5032 */ 5033 if ((ctxt->sax) && (!ctxt->disableSAX) && 5034 (ctxt->sax->processingInstruction != NULL)) 5035 ctxt->sax->processingInstruction(ctxt->userData, 5036 target, buf); 5037 } 5038 xmlFree(buf); 5039 } else { 5040 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5041 } 5042 if (ctxt->instate != XML_PARSER_EOF) 5043 ctxt->instate = state; 5044 } 5045 } 5046 5047 /** 5048 * xmlParseNotationDecl: 5049 * @ctxt: an XML parser context 5050 * 5051 * parse a notation declaration 5052 * 5053 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5054 * 5055 * Hence there is actually 3 choices: 5056 * 'PUBLIC' S PubidLiteral 5057 * 'PUBLIC' S PubidLiteral S SystemLiteral 5058 * and 'SYSTEM' S SystemLiteral 5059 * 5060 * See the NOTE on xmlParseExternalID(). 5061 */ 5062 5063 void 5064 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5065 const xmlChar *name; 5066 xmlChar *Pubid; 5067 xmlChar *Systemid; 5068 5069 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5070 xmlParserInputPtr input = ctxt->input; 5071 SHRINK; 5072 SKIP(10); 5073 if (!IS_BLANK_CH(CUR)) { 5074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5075 "Space required after '<!NOTATION'\n"); 5076 return; 5077 } 5078 SKIP_BLANKS; 5079 5080 name = xmlParseName(ctxt); 5081 if (name == NULL) { 5082 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5083 return; 5084 } 5085 if (!IS_BLANK_CH(CUR)) { 5086 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5087 "Space required after the NOTATION name'\n"); 5088 return; 5089 } 5090 if (xmlStrchr(name, ':') != NULL) { 5091 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5092 "colon are forbidden from notation names '%s'\n", 5093 name, NULL, NULL); 5094 } 5095 SKIP_BLANKS; 5096 5097 /* 5098 * Parse the IDs. 5099 */ 5100 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5101 SKIP_BLANKS; 5102 5103 if (RAW == '>') { 5104 if (input != ctxt->input) { 5105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5106 "Notation declaration doesn't start and stop in the same entity\n"); 5107 } 5108 NEXT; 5109 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5110 (ctxt->sax->notationDecl != NULL)) 5111 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5112 } else { 5113 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5114 } 5115 if (Systemid != NULL) xmlFree(Systemid); 5116 if (Pubid != NULL) xmlFree(Pubid); 5117 } 5118 } 5119 5120 /** 5121 * xmlParseEntityDecl: 5122 * @ctxt: an XML parser context 5123 * 5124 * parse <!ENTITY declarations 5125 * 5126 * [70] EntityDecl ::= GEDecl | PEDecl 5127 * 5128 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5129 * 5130 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5131 * 5132 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5133 * 5134 * [74] PEDef ::= EntityValue | ExternalID 5135 * 5136 * [76] NDataDecl ::= S 'NDATA' S Name 5137 * 5138 * [ VC: Notation Declared ] 5139 * The Name must match the declared name of a notation. 5140 */ 5141 5142 void 5143 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5144 const xmlChar *name = NULL; 5145 xmlChar *value = NULL; 5146 xmlChar *URI = NULL, *literal = NULL; 5147 const xmlChar *ndata = NULL; 5148 int isParameter = 0; 5149 xmlChar *orig = NULL; 5150 int skipped; 5151 5152 /* GROW; done in the caller */ 5153 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5154 xmlParserInputPtr input = ctxt->input; 5155 SHRINK; 5156 SKIP(8); 5157 skipped = SKIP_BLANKS; 5158 if (skipped == 0) { 5159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5160 "Space required after '<!ENTITY'\n"); 5161 } 5162 5163 if (RAW == '%') { 5164 NEXT; 5165 skipped = SKIP_BLANKS; 5166 if (skipped == 0) { 5167 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5168 "Space required after '%'\n"); 5169 } 5170 isParameter = 1; 5171 } 5172 5173 name = xmlParseName(ctxt); 5174 if (name == NULL) { 5175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5176 "xmlParseEntityDecl: no name\n"); 5177 return; 5178 } 5179 if (xmlStrchr(name, ':') != NULL) { 5180 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5181 "colon are forbidden from entities names '%s'\n", 5182 name, NULL, NULL); 5183 } 5184 skipped = SKIP_BLANKS; 5185 if (skipped == 0) { 5186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5187 "Space required after the entity name\n"); 5188 } 5189 5190 ctxt->instate = XML_PARSER_ENTITY_DECL; 5191 /* 5192 * handle the various case of definitions... 5193 */ 5194 if (isParameter) { 5195 if ((RAW == '"') || (RAW == '\'')) { 5196 value = xmlParseEntityValue(ctxt, &orig); 5197 if (value) { 5198 if ((ctxt->sax != NULL) && 5199 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5200 ctxt->sax->entityDecl(ctxt->userData, name, 5201 XML_INTERNAL_PARAMETER_ENTITY, 5202 NULL, NULL, value); 5203 } 5204 } else { 5205 URI = xmlParseExternalID(ctxt, &literal, 1); 5206 if ((URI == NULL) && (literal == NULL)) { 5207 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5208 } 5209 if (URI) { 5210 xmlURIPtr uri; 5211 5212 uri = xmlParseURI((const char *) URI); 5213 if (uri == NULL) { 5214 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5215 "Invalid URI: %s\n", URI); 5216 /* 5217 * This really ought to be a well formedness error 5218 * but the XML Core WG decided otherwise c.f. issue 5219 * E26 of the XML erratas. 5220 */ 5221 } else { 5222 if (uri->fragment != NULL) { 5223 /* 5224 * Okay this is foolish to block those but not 5225 * invalid URIs. 5226 */ 5227 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5228 } else { 5229 if ((ctxt->sax != NULL) && 5230 (!ctxt->disableSAX) && 5231 (ctxt->sax->entityDecl != NULL)) 5232 ctxt->sax->entityDecl(ctxt->userData, name, 5233 XML_EXTERNAL_PARAMETER_ENTITY, 5234 literal, URI, NULL); 5235 } 5236 xmlFreeURI(uri); 5237 } 5238 } 5239 } 5240 } else { 5241 if ((RAW == '"') || (RAW == '\'')) { 5242 value = xmlParseEntityValue(ctxt, &orig); 5243 if ((ctxt->sax != NULL) && 5244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5245 ctxt->sax->entityDecl(ctxt->userData, name, 5246 XML_INTERNAL_GENERAL_ENTITY, 5247 NULL, NULL, value); 5248 /* 5249 * For expat compatibility in SAX mode. 5250 */ 5251 if ((ctxt->myDoc == NULL) || 5252 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5253 if (ctxt->myDoc == NULL) { 5254 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5255 if (ctxt->myDoc == NULL) { 5256 xmlErrMemory(ctxt, "New Doc failed"); 5257 return; 5258 } 5259 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5260 } 5261 if (ctxt->myDoc->intSubset == NULL) 5262 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5263 BAD_CAST "fake", NULL, NULL); 5264 5265 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5266 NULL, NULL, value); 5267 } 5268 } else { 5269 URI = xmlParseExternalID(ctxt, &literal, 1); 5270 if ((URI == NULL) && (literal == NULL)) { 5271 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5272 } 5273 if (URI) { 5274 xmlURIPtr uri; 5275 5276 uri = xmlParseURI((const char *)URI); 5277 if (uri == NULL) { 5278 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5279 "Invalid URI: %s\n", URI); 5280 /* 5281 * This really ought to be a well formedness error 5282 * but the XML Core WG decided otherwise c.f. issue 5283 * E26 of the XML erratas. 5284 */ 5285 } else { 5286 if (uri->fragment != NULL) { 5287 /* 5288 * Okay this is foolish to block those but not 5289 * invalid URIs. 5290 */ 5291 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5292 } 5293 xmlFreeURI(uri); 5294 } 5295 } 5296 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5298 "Space required before 'NDATA'\n"); 5299 } 5300 SKIP_BLANKS; 5301 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5302 SKIP(5); 5303 if (!IS_BLANK_CH(CUR)) { 5304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5305 "Space required after 'NDATA'\n"); 5306 } 5307 SKIP_BLANKS; 5308 ndata = xmlParseName(ctxt); 5309 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5310 (ctxt->sax->unparsedEntityDecl != NULL)) 5311 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5312 literal, URI, ndata); 5313 } else { 5314 if ((ctxt->sax != NULL) && 5315 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5316 ctxt->sax->entityDecl(ctxt->userData, name, 5317 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5318 literal, URI, NULL); 5319 /* 5320 * For expat compatibility in SAX mode. 5321 * assuming the entity repalcement was asked for 5322 */ 5323 if ((ctxt->replaceEntities != 0) && 5324 ((ctxt->myDoc == NULL) || 5325 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5326 if (ctxt->myDoc == NULL) { 5327 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5328 if (ctxt->myDoc == NULL) { 5329 xmlErrMemory(ctxt, "New Doc failed"); 5330 return; 5331 } 5332 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5333 } 5334 5335 if (ctxt->myDoc->intSubset == NULL) 5336 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5337 BAD_CAST "fake", NULL, NULL); 5338 xmlSAX2EntityDecl(ctxt, name, 5339 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5340 literal, URI, NULL); 5341 } 5342 } 5343 } 5344 } 5345 SKIP_BLANKS; 5346 if (RAW != '>') { 5347 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5348 "xmlParseEntityDecl: entity %s not terminated\n", name); 5349 } else { 5350 if (input != ctxt->input) { 5351 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5352 "Entity declaration doesn't start and stop in the same entity\n"); 5353 } 5354 NEXT; 5355 } 5356 if (orig != NULL) { 5357 /* 5358 * Ugly mechanism to save the raw entity value. 5359 */ 5360 xmlEntityPtr cur = NULL; 5361 5362 if (isParameter) { 5363 if ((ctxt->sax != NULL) && 5364 (ctxt->sax->getParameterEntity != NULL)) 5365 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5366 } else { 5367 if ((ctxt->sax != NULL) && 5368 (ctxt->sax->getEntity != NULL)) 5369 cur = ctxt->sax->getEntity(ctxt->userData, name); 5370 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5371 cur = xmlSAX2GetEntity(ctxt, name); 5372 } 5373 } 5374 if (cur != NULL) { 5375 if (cur->orig != NULL) 5376 xmlFree(orig); 5377 else 5378 cur->orig = orig; 5379 } else 5380 xmlFree(orig); 5381 } 5382 if (value != NULL) xmlFree(value); 5383 if (URI != NULL) xmlFree(URI); 5384 if (literal != NULL) xmlFree(literal); 5385 } 5386 } 5387 5388 /** 5389 * xmlParseDefaultDecl: 5390 * @ctxt: an XML parser context 5391 * @value: Receive a possible fixed default value for the attribute 5392 * 5393 * Parse an attribute default declaration 5394 * 5395 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5396 * 5397 * [ VC: Required Attribute ] 5398 * if the default declaration is the keyword #REQUIRED, then the 5399 * attribute must be specified for all elements of the type in the 5400 * attribute-list declaration. 5401 * 5402 * [ VC: Attribute Default Legal ] 5403 * The declared default value must meet the lexical constraints of 5404 * the declared attribute type c.f. xmlValidateAttributeDecl() 5405 * 5406 * [ VC: Fixed Attribute Default ] 5407 * if an attribute has a default value declared with the #FIXED 5408 * keyword, instances of that attribute must match the default value. 5409 * 5410 * [ WFC: No < in Attribute Values ] 5411 * handled in xmlParseAttValue() 5412 * 5413 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5414 * or XML_ATTRIBUTE_FIXED. 5415 */ 5416 5417 int 5418 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5419 int val; 5420 xmlChar *ret; 5421 5422 *value = NULL; 5423 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5424 SKIP(9); 5425 return(XML_ATTRIBUTE_REQUIRED); 5426 } 5427 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5428 SKIP(8); 5429 return(XML_ATTRIBUTE_IMPLIED); 5430 } 5431 val = XML_ATTRIBUTE_NONE; 5432 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5433 SKIP(6); 5434 val = XML_ATTRIBUTE_FIXED; 5435 if (!IS_BLANK_CH(CUR)) { 5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5437 "Space required after '#FIXED'\n"); 5438 } 5439 SKIP_BLANKS; 5440 } 5441 ret = xmlParseAttValue(ctxt); 5442 ctxt->instate = XML_PARSER_DTD; 5443 if (ret == NULL) { 5444 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5445 "Attribute default value declaration error\n"); 5446 } else 5447 *value = ret; 5448 return(val); 5449 } 5450 5451 /** 5452 * xmlParseNotationType: 5453 * @ctxt: an XML parser context 5454 * 5455 * parse an Notation attribute type. 5456 * 5457 * Note: the leading 'NOTATION' S part has already being parsed... 5458 * 5459 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5460 * 5461 * [ VC: Notation Attributes ] 5462 * Values of this type must match one of the notation names included 5463 * in the declaration; all notation names in the declaration must be declared. 5464 * 5465 * Returns: the notation attribute tree built while parsing 5466 */ 5467 5468 xmlEnumerationPtr 5469 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5470 const xmlChar *name; 5471 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5472 5473 if (RAW != '(') { 5474 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5475 return(NULL); 5476 } 5477 SHRINK; 5478 do { 5479 NEXT; 5480 SKIP_BLANKS; 5481 name = xmlParseName(ctxt); 5482 if (name == NULL) { 5483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5484 "Name expected in NOTATION declaration\n"); 5485 xmlFreeEnumeration(ret); 5486 return(NULL); 5487 } 5488 tmp = ret; 5489 while (tmp != NULL) { 5490 if (xmlStrEqual(name, tmp->name)) { 5491 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5492 "standalone: attribute notation value token %s duplicated\n", 5493 name, NULL); 5494 if (!xmlDictOwns(ctxt->dict, name)) 5495 xmlFree((xmlChar *) name); 5496 break; 5497 } 5498 tmp = tmp->next; 5499 } 5500 if (tmp == NULL) { 5501 cur = xmlCreateEnumeration(name); 5502 if (cur == NULL) { 5503 xmlFreeEnumeration(ret); 5504 return(NULL); 5505 } 5506 if (last == NULL) ret = last = cur; 5507 else { 5508 last->next = cur; 5509 last = cur; 5510 } 5511 } 5512 SKIP_BLANKS; 5513 } while (RAW == '|'); 5514 if (RAW != ')') { 5515 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5516 xmlFreeEnumeration(ret); 5517 return(NULL); 5518 } 5519 NEXT; 5520 return(ret); 5521 } 5522 5523 /** 5524 * xmlParseEnumerationType: 5525 * @ctxt: an XML parser context 5526 * 5527 * parse an Enumeration attribute type. 5528 * 5529 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5530 * 5531 * [ VC: Enumeration ] 5532 * Values of this type must match one of the Nmtoken tokens in 5533 * the declaration 5534 * 5535 * Returns: the enumeration attribute tree built while parsing 5536 */ 5537 5538 xmlEnumerationPtr 5539 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5540 xmlChar *name; 5541 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5542 5543 if (RAW != '(') { 5544 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5545 return(NULL); 5546 } 5547 SHRINK; 5548 do { 5549 NEXT; 5550 SKIP_BLANKS; 5551 name = xmlParseNmtoken(ctxt); 5552 if (name == NULL) { 5553 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5554 return(ret); 5555 } 5556 tmp = ret; 5557 while (tmp != NULL) { 5558 if (xmlStrEqual(name, tmp->name)) { 5559 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5560 "standalone: attribute enumeration value token %s duplicated\n", 5561 name, NULL); 5562 if (!xmlDictOwns(ctxt->dict, name)) 5563 xmlFree(name); 5564 break; 5565 } 5566 tmp = tmp->next; 5567 } 5568 if (tmp == NULL) { 5569 cur = xmlCreateEnumeration(name); 5570 if (!xmlDictOwns(ctxt->dict, name)) 5571 xmlFree(name); 5572 if (cur == NULL) { 5573 xmlFreeEnumeration(ret); 5574 return(NULL); 5575 } 5576 if (last == NULL) ret = last = cur; 5577 else { 5578 last->next = cur; 5579 last = cur; 5580 } 5581 } 5582 SKIP_BLANKS; 5583 } while (RAW == '|'); 5584 if (RAW != ')') { 5585 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5586 return(ret); 5587 } 5588 NEXT; 5589 return(ret); 5590 } 5591 5592 /** 5593 * xmlParseEnumeratedType: 5594 * @ctxt: an XML parser context 5595 * @tree: the enumeration tree built while parsing 5596 * 5597 * parse an Enumerated attribute type. 5598 * 5599 * [57] EnumeratedType ::= NotationType | Enumeration 5600 * 5601 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5602 * 5603 * 5604 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5605 */ 5606 5607 int 5608 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5609 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5610 SKIP(8); 5611 if (!IS_BLANK_CH(CUR)) { 5612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5613 "Space required after 'NOTATION'\n"); 5614 return(0); 5615 } 5616 SKIP_BLANKS; 5617 *tree = xmlParseNotationType(ctxt); 5618 if (*tree == NULL) return(0); 5619 return(XML_ATTRIBUTE_NOTATION); 5620 } 5621 *tree = xmlParseEnumerationType(ctxt); 5622 if (*tree == NULL) return(0); 5623 return(XML_ATTRIBUTE_ENUMERATION); 5624 } 5625 5626 /** 5627 * xmlParseAttributeType: 5628 * @ctxt: an XML parser context 5629 * @tree: the enumeration tree built while parsing 5630 * 5631 * parse the Attribute list def for an element 5632 * 5633 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5634 * 5635 * [55] StringType ::= 'CDATA' 5636 * 5637 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5638 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5639 * 5640 * Validity constraints for attribute values syntax are checked in 5641 * xmlValidateAttributeValue() 5642 * 5643 * [ VC: ID ] 5644 * Values of type ID must match the Name production. A name must not 5645 * appear more than once in an XML document as a value of this type; 5646 * i.e., ID values must uniquely identify the elements which bear them. 5647 * 5648 * [ VC: One ID per Element Type ] 5649 * No element type may have more than one ID attribute specified. 5650 * 5651 * [ VC: ID Attribute Default ] 5652 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5653 * 5654 * [ VC: IDREF ] 5655 * Values of type IDREF must match the Name production, and values 5656 * of type IDREFS must match Names; each IDREF Name must match the value 5657 * of an ID attribute on some element in the XML document; i.e. IDREF 5658 * values must match the value of some ID attribute. 5659 * 5660 * [ VC: Entity Name ] 5661 * Values of type ENTITY must match the Name production, values 5662 * of type ENTITIES must match Names; each Entity Name must match the 5663 * name of an unparsed entity declared in the DTD. 5664 * 5665 * [ VC: Name Token ] 5666 * Values of type NMTOKEN must match the Nmtoken production; values 5667 * of type NMTOKENS must match Nmtokens. 5668 * 5669 * Returns the attribute type 5670 */ 5671 int 5672 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5673 SHRINK; 5674 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5675 SKIP(5); 5676 return(XML_ATTRIBUTE_CDATA); 5677 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5678 SKIP(6); 5679 return(XML_ATTRIBUTE_IDREFS); 5680 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5681 SKIP(5); 5682 return(XML_ATTRIBUTE_IDREF); 5683 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5684 SKIP(2); 5685 return(XML_ATTRIBUTE_ID); 5686 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5687 SKIP(6); 5688 return(XML_ATTRIBUTE_ENTITY); 5689 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5690 SKIP(8); 5691 return(XML_ATTRIBUTE_ENTITIES); 5692 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5693 SKIP(8); 5694 return(XML_ATTRIBUTE_NMTOKENS); 5695 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5696 SKIP(7); 5697 return(XML_ATTRIBUTE_NMTOKEN); 5698 } 5699 return(xmlParseEnumeratedType(ctxt, tree)); 5700 } 5701 5702 /** 5703 * xmlParseAttributeListDecl: 5704 * @ctxt: an XML parser context 5705 * 5706 * : parse the Attribute list def for an element 5707 * 5708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5709 * 5710 * [53] AttDef ::= S Name S AttType S DefaultDecl 5711 * 5712 */ 5713 void 5714 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5715 const xmlChar *elemName; 5716 const xmlChar *attrName; 5717 xmlEnumerationPtr tree; 5718 5719 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5720 xmlParserInputPtr input = ctxt->input; 5721 5722 SKIP(9); 5723 if (!IS_BLANK_CH(CUR)) { 5724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5725 "Space required after '<!ATTLIST'\n"); 5726 } 5727 SKIP_BLANKS; 5728 elemName = xmlParseName(ctxt); 5729 if (elemName == NULL) { 5730 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5731 "ATTLIST: no name for Element\n"); 5732 return; 5733 } 5734 SKIP_BLANKS; 5735 GROW; 5736 while (RAW != '>') { 5737 const xmlChar *check = CUR_PTR; 5738 int type; 5739 int def; 5740 xmlChar *defaultValue = NULL; 5741 5742 GROW; 5743 tree = NULL; 5744 attrName = xmlParseName(ctxt); 5745 if (attrName == NULL) { 5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5747 "ATTLIST: no name for Attribute\n"); 5748 break; 5749 } 5750 GROW; 5751 if (!IS_BLANK_CH(CUR)) { 5752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5753 "Space required after the attribute name\n"); 5754 break; 5755 } 5756 SKIP_BLANKS; 5757 5758 type = xmlParseAttributeType(ctxt, &tree); 5759 if (type <= 0) { 5760 break; 5761 } 5762 5763 GROW; 5764 if (!IS_BLANK_CH(CUR)) { 5765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5766 "Space required after the attribute type\n"); 5767 if (tree != NULL) 5768 xmlFreeEnumeration(tree); 5769 break; 5770 } 5771 SKIP_BLANKS; 5772 5773 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5774 if (def <= 0) { 5775 if (defaultValue != NULL) 5776 xmlFree(defaultValue); 5777 if (tree != NULL) 5778 xmlFreeEnumeration(tree); 5779 break; 5780 } 5781 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5782 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5783 5784 GROW; 5785 if (RAW != '>') { 5786 if (!IS_BLANK_CH(CUR)) { 5787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5788 "Space required after the attribute default value\n"); 5789 if (defaultValue != NULL) 5790 xmlFree(defaultValue); 5791 if (tree != NULL) 5792 xmlFreeEnumeration(tree); 5793 break; 5794 } 5795 SKIP_BLANKS; 5796 } 5797 if (check == CUR_PTR) { 5798 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5799 "in xmlParseAttributeListDecl\n"); 5800 if (defaultValue != NULL) 5801 xmlFree(defaultValue); 5802 if (tree != NULL) 5803 xmlFreeEnumeration(tree); 5804 break; 5805 } 5806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5807 (ctxt->sax->attributeDecl != NULL)) 5808 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5809 type, def, defaultValue, tree); 5810 else if (tree != NULL) 5811 xmlFreeEnumeration(tree); 5812 5813 if ((ctxt->sax2) && (defaultValue != NULL) && 5814 (def != XML_ATTRIBUTE_IMPLIED) && 5815 (def != XML_ATTRIBUTE_REQUIRED)) { 5816 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5817 } 5818 if (ctxt->sax2) { 5819 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5820 } 5821 if (defaultValue != NULL) 5822 xmlFree(defaultValue); 5823 GROW; 5824 } 5825 if (RAW == '>') { 5826 if (input != ctxt->input) { 5827 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5828 "Attribute list declaration doesn't start and stop in the same entity\n", 5829 NULL, NULL); 5830 } 5831 NEXT; 5832 } 5833 } 5834 } 5835 5836 /** 5837 * xmlParseElementMixedContentDecl: 5838 * @ctxt: an XML parser context 5839 * @inputchk: the input used for the current entity, needed for boundary checks 5840 * 5841 * parse the declaration for a Mixed Element content 5842 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5843 * 5844 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5845 * '(' S? '#PCDATA' S? ')' 5846 * 5847 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5848 * 5849 * [ VC: No Duplicate Types ] 5850 * The same name must not appear more than once in a single 5851 * mixed-content declaration. 5852 * 5853 * returns: the list of the xmlElementContentPtr describing the element choices 5854 */ 5855 xmlElementContentPtr 5856 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5857 xmlElementContentPtr ret = NULL, cur = NULL, n; 5858 const xmlChar *elem = NULL; 5859 5860 GROW; 5861 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5862 SKIP(7); 5863 SKIP_BLANKS; 5864 SHRINK; 5865 if (RAW == ')') { 5866 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5868 "Element content declaration doesn't start and stop in the same entity\n", 5869 NULL, NULL); 5870 } 5871 NEXT; 5872 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5873 if (ret == NULL) 5874 return(NULL); 5875 if (RAW == '*') { 5876 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5877 NEXT; 5878 } 5879 return(ret); 5880 } 5881 if ((RAW == '(') || (RAW == '|')) { 5882 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5883 if (ret == NULL) return(NULL); 5884 } 5885 while (RAW == '|') { 5886 NEXT; 5887 if (elem == NULL) { 5888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5889 if (ret == NULL) return(NULL); 5890 ret->c1 = cur; 5891 if (cur != NULL) 5892 cur->parent = ret; 5893 cur = ret; 5894 } else { 5895 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5896 if (n == NULL) return(NULL); 5897 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5898 if (n->c1 != NULL) 5899 n->c1->parent = n; 5900 cur->c2 = n; 5901 if (n != NULL) 5902 n->parent = cur; 5903 cur = n; 5904 } 5905 SKIP_BLANKS; 5906 elem = xmlParseName(ctxt); 5907 if (elem == NULL) { 5908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5909 "xmlParseElementMixedContentDecl : Name expected\n"); 5910 xmlFreeDocElementContent(ctxt->myDoc, cur); 5911 return(NULL); 5912 } 5913 SKIP_BLANKS; 5914 GROW; 5915 } 5916 if ((RAW == ')') && (NXT(1) == '*')) { 5917 if (elem != NULL) { 5918 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5919 XML_ELEMENT_CONTENT_ELEMENT); 5920 if (cur->c2 != NULL) 5921 cur->c2->parent = cur; 5922 } 5923 if (ret != NULL) 5924 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5925 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5926 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5927 "Element content declaration doesn't start and stop in the same entity\n", 5928 NULL, NULL); 5929 } 5930 SKIP(2); 5931 } else { 5932 xmlFreeDocElementContent(ctxt->myDoc, ret); 5933 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5934 return(NULL); 5935 } 5936 5937 } else { 5938 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5939 } 5940 return(ret); 5941 } 5942 5943 /** 5944 * xmlParseElementChildrenContentDeclPriv: 5945 * @ctxt: an XML parser context 5946 * @inputchk: the input used for the current entity, needed for boundary checks 5947 * @depth: the level of recursion 5948 * 5949 * parse the declaration for a Mixed Element content 5950 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5951 * 5952 * 5953 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5954 * 5955 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5956 * 5957 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5958 * 5959 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5960 * 5961 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5962 * TODO Parameter-entity replacement text must be properly nested 5963 * with parenthesized groups. That is to say, if either of the 5964 * opening or closing parentheses in a choice, seq, or Mixed 5965 * construct is contained in the replacement text for a parameter 5966 * entity, both must be contained in the same replacement text. For 5967 * interoperability, if a parameter-entity reference appears in a 5968 * choice, seq, or Mixed construct, its replacement text should not 5969 * be empty, and neither the first nor last non-blank character of 5970 * the replacement text should be a connector (| or ,). 5971 * 5972 * Returns the tree of xmlElementContentPtr describing the element 5973 * hierarchy. 5974 */ 5975 static xmlElementContentPtr 5976 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 5977 int depth) { 5978 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5979 const xmlChar *elem; 5980 xmlChar type = 0; 5981 5982 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 5983 (depth > 2048)) { 5984 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 5985 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 5986 depth); 5987 return(NULL); 5988 } 5989 SKIP_BLANKS; 5990 GROW; 5991 if (RAW == '(') { 5992 int inputid = ctxt->input->id; 5993 5994 /* Recurse on first child */ 5995 NEXT; 5996 SKIP_BLANKS; 5997 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5998 depth + 1); 5999 SKIP_BLANKS; 6000 GROW; 6001 } else { 6002 elem = xmlParseName(ctxt); 6003 if (elem == NULL) { 6004 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6005 return(NULL); 6006 } 6007 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6008 if (cur == NULL) { 6009 xmlErrMemory(ctxt, NULL); 6010 return(NULL); 6011 } 6012 GROW; 6013 if (RAW == '?') { 6014 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6015 NEXT; 6016 } else if (RAW == '*') { 6017 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6018 NEXT; 6019 } else if (RAW == '+') { 6020 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6021 NEXT; 6022 } else { 6023 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6024 } 6025 GROW; 6026 } 6027 SKIP_BLANKS; 6028 SHRINK; 6029 while (RAW != ')') { 6030 /* 6031 * Each loop we parse one separator and one element. 6032 */ 6033 if (RAW == ',') { 6034 if (type == 0) type = CUR; 6035 6036 /* 6037 * Detect "Name | Name , Name" error 6038 */ 6039 else if (type != CUR) { 6040 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6041 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6042 type); 6043 if ((last != NULL) && (last != ret)) 6044 xmlFreeDocElementContent(ctxt->myDoc, last); 6045 if (ret != NULL) 6046 xmlFreeDocElementContent(ctxt->myDoc, ret); 6047 return(NULL); 6048 } 6049 NEXT; 6050 6051 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6052 if (op == NULL) { 6053 if ((last != NULL) && (last != ret)) 6054 xmlFreeDocElementContent(ctxt->myDoc, last); 6055 xmlFreeDocElementContent(ctxt->myDoc, ret); 6056 return(NULL); 6057 } 6058 if (last == NULL) { 6059 op->c1 = ret; 6060 if (ret != NULL) 6061 ret->parent = op; 6062 ret = cur = op; 6063 } else { 6064 cur->c2 = op; 6065 if (op != NULL) 6066 op->parent = cur; 6067 op->c1 = last; 6068 if (last != NULL) 6069 last->parent = op; 6070 cur =op; 6071 last = NULL; 6072 } 6073 } else if (RAW == '|') { 6074 if (type == 0) type = CUR; 6075 6076 /* 6077 * Detect "Name , Name | Name" error 6078 */ 6079 else if (type != CUR) { 6080 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6081 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6082 type); 6083 if ((last != NULL) && (last != ret)) 6084 xmlFreeDocElementContent(ctxt->myDoc, last); 6085 if (ret != NULL) 6086 xmlFreeDocElementContent(ctxt->myDoc, ret); 6087 return(NULL); 6088 } 6089 NEXT; 6090 6091 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6092 if (op == NULL) { 6093 if ((last != NULL) && (last != ret)) 6094 xmlFreeDocElementContent(ctxt->myDoc, last); 6095 if (ret != NULL) 6096 xmlFreeDocElementContent(ctxt->myDoc, ret); 6097 return(NULL); 6098 } 6099 if (last == NULL) { 6100 op->c1 = ret; 6101 if (ret != NULL) 6102 ret->parent = op; 6103 ret = cur = op; 6104 } else { 6105 cur->c2 = op; 6106 if (op != NULL) 6107 op->parent = cur; 6108 op->c1 = last; 6109 if (last != NULL) 6110 last->parent = op; 6111 cur =op; 6112 last = NULL; 6113 } 6114 } else { 6115 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6116 if ((last != NULL) && (last != ret)) 6117 xmlFreeDocElementContent(ctxt->myDoc, last); 6118 if (ret != NULL) 6119 xmlFreeDocElementContent(ctxt->myDoc, ret); 6120 return(NULL); 6121 } 6122 GROW; 6123 SKIP_BLANKS; 6124 GROW; 6125 if (RAW == '(') { 6126 int inputid = ctxt->input->id; 6127 /* Recurse on second child */ 6128 NEXT; 6129 SKIP_BLANKS; 6130 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6131 depth + 1); 6132 SKIP_BLANKS; 6133 } else { 6134 elem = xmlParseName(ctxt); 6135 if (elem == NULL) { 6136 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6137 if (ret != NULL) 6138 xmlFreeDocElementContent(ctxt->myDoc, ret); 6139 return(NULL); 6140 } 6141 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6142 if (last == NULL) { 6143 if (ret != NULL) 6144 xmlFreeDocElementContent(ctxt->myDoc, ret); 6145 return(NULL); 6146 } 6147 if (RAW == '?') { 6148 last->ocur = XML_ELEMENT_CONTENT_OPT; 6149 NEXT; 6150 } else if (RAW == '*') { 6151 last->ocur = XML_ELEMENT_CONTENT_MULT; 6152 NEXT; 6153 } else if (RAW == '+') { 6154 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6155 NEXT; 6156 } else { 6157 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6158 } 6159 } 6160 SKIP_BLANKS; 6161 GROW; 6162 } 6163 if ((cur != NULL) && (last != NULL)) { 6164 cur->c2 = last; 6165 if (last != NULL) 6166 last->parent = cur; 6167 } 6168 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6169 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6170 "Element content declaration doesn't start and stop in the same entity\n", 6171 NULL, NULL); 6172 } 6173 NEXT; 6174 if (RAW == '?') { 6175 if (ret != NULL) { 6176 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6177 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6178 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6179 else 6180 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6181 } 6182 NEXT; 6183 } else if (RAW == '*') { 6184 if (ret != NULL) { 6185 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6186 cur = ret; 6187 /* 6188 * Some normalization: 6189 * (a | b* | c?)* == (a | b | c)* 6190 */ 6191 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6192 if ((cur->c1 != NULL) && 6193 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6194 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6195 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6196 if ((cur->c2 != NULL) && 6197 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6198 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6199 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6200 cur = cur->c2; 6201 } 6202 } 6203 NEXT; 6204 } else if (RAW == '+') { 6205 if (ret != NULL) { 6206 int found = 0; 6207 6208 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6209 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6210 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6211 else 6212 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6213 /* 6214 * Some normalization: 6215 * (a | b*)+ == (a | b)* 6216 * (a | b?)+ == (a | b)* 6217 */ 6218 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6219 if ((cur->c1 != NULL) && 6220 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6221 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6222 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6223 found = 1; 6224 } 6225 if ((cur->c2 != NULL) && 6226 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6227 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6228 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6229 found = 1; 6230 } 6231 cur = cur->c2; 6232 } 6233 if (found) 6234 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6235 } 6236 NEXT; 6237 } 6238 return(ret); 6239 } 6240 6241 /** 6242 * xmlParseElementChildrenContentDecl: 6243 * @ctxt: an XML parser context 6244 * @inputchk: the input used for the current entity, needed for boundary checks 6245 * 6246 * parse the declaration for a Mixed Element content 6247 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6248 * 6249 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6250 * 6251 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6252 * 6253 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6254 * 6255 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6256 * 6257 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6258 * TODO Parameter-entity replacement text must be properly nested 6259 * with parenthesized groups. That is to say, if either of the 6260 * opening or closing parentheses in a choice, seq, or Mixed 6261 * construct is contained in the replacement text for a parameter 6262 * entity, both must be contained in the same replacement text. For 6263 * interoperability, if a parameter-entity reference appears in a 6264 * choice, seq, or Mixed construct, its replacement text should not 6265 * be empty, and neither the first nor last non-blank character of 6266 * the replacement text should be a connector (| or ,). 6267 * 6268 * Returns the tree of xmlElementContentPtr describing the element 6269 * hierarchy. 6270 */ 6271 xmlElementContentPtr 6272 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6273 /* stub left for API/ABI compat */ 6274 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6275 } 6276 6277 /** 6278 * xmlParseElementContentDecl: 6279 * @ctxt: an XML parser context 6280 * @name: the name of the element being defined. 6281 * @result: the Element Content pointer will be stored here if any 6282 * 6283 * parse the declaration for an Element content either Mixed or Children, 6284 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6285 * 6286 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6287 * 6288 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6289 */ 6290 6291 int 6292 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6293 xmlElementContentPtr *result) { 6294 6295 xmlElementContentPtr tree = NULL; 6296 int inputid = ctxt->input->id; 6297 int res; 6298 6299 *result = NULL; 6300 6301 if (RAW != '(') { 6302 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6303 "xmlParseElementContentDecl : %s '(' expected\n", name); 6304 return(-1); 6305 } 6306 NEXT; 6307 GROW; 6308 SKIP_BLANKS; 6309 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6310 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6311 res = XML_ELEMENT_TYPE_MIXED; 6312 } else { 6313 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6314 res = XML_ELEMENT_TYPE_ELEMENT; 6315 } 6316 SKIP_BLANKS; 6317 *result = tree; 6318 return(res); 6319 } 6320 6321 /** 6322 * xmlParseElementDecl: 6323 * @ctxt: an XML parser context 6324 * 6325 * parse an Element declaration. 6326 * 6327 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6328 * 6329 * [ VC: Unique Element Type Declaration ] 6330 * No element type may be declared more than once 6331 * 6332 * Returns the type of the element, or -1 in case of error 6333 */ 6334 int 6335 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6336 const xmlChar *name; 6337 int ret = -1; 6338 xmlElementContentPtr content = NULL; 6339 6340 /* GROW; done in the caller */ 6341 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6342 xmlParserInputPtr input = ctxt->input; 6343 6344 SKIP(9); 6345 if (!IS_BLANK_CH(CUR)) { 6346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6347 "Space required after 'ELEMENT'\n"); 6348 } 6349 SKIP_BLANKS; 6350 name = xmlParseName(ctxt); 6351 if (name == NULL) { 6352 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6353 "xmlParseElementDecl: no name for Element\n"); 6354 return(-1); 6355 } 6356 while ((RAW == 0) && (ctxt->inputNr > 1)) 6357 xmlPopInput(ctxt); 6358 if (!IS_BLANK_CH(CUR)) { 6359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6360 "Space required after the element name\n"); 6361 } 6362 SKIP_BLANKS; 6363 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6364 SKIP(5); 6365 /* 6366 * Element must always be empty. 6367 */ 6368 ret = XML_ELEMENT_TYPE_EMPTY; 6369 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6370 (NXT(2) == 'Y')) { 6371 SKIP(3); 6372 /* 6373 * Element is a generic container. 6374 */ 6375 ret = XML_ELEMENT_TYPE_ANY; 6376 } else if (RAW == '(') { 6377 ret = xmlParseElementContentDecl(ctxt, name, &content); 6378 } else { 6379 /* 6380 * [ WFC: PEs in Internal Subset ] error handling. 6381 */ 6382 if ((RAW == '%') && (ctxt->external == 0) && 6383 (ctxt->inputNr == 1)) { 6384 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6385 "PEReference: forbidden within markup decl in internal subset\n"); 6386 } else { 6387 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6388 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6389 } 6390 return(-1); 6391 } 6392 6393 SKIP_BLANKS; 6394 /* 6395 * Pop-up of finished entities. 6396 */ 6397 while ((RAW == 0) && (ctxt->inputNr > 1)) 6398 xmlPopInput(ctxt); 6399 SKIP_BLANKS; 6400 6401 if (RAW != '>') { 6402 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6403 if (content != NULL) { 6404 xmlFreeDocElementContent(ctxt->myDoc, content); 6405 } 6406 } else { 6407 if (input != ctxt->input) { 6408 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6409 "Element declaration doesn't start and stop in the same entity\n"); 6410 } 6411 6412 NEXT; 6413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6414 (ctxt->sax->elementDecl != NULL)) { 6415 if (content != NULL) 6416 content->parent = NULL; 6417 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6418 content); 6419 if ((content != NULL) && (content->parent == NULL)) { 6420 /* 6421 * this is a trick: if xmlAddElementDecl is called, 6422 * instead of copying the full tree it is plugged directly 6423 * if called from the parser. Avoid duplicating the 6424 * interfaces or change the API/ABI 6425 */ 6426 xmlFreeDocElementContent(ctxt->myDoc, content); 6427 } 6428 } else if (content != NULL) { 6429 xmlFreeDocElementContent(ctxt->myDoc, content); 6430 } 6431 } 6432 } 6433 return(ret); 6434 } 6435 6436 /** 6437 * xmlParseConditionalSections 6438 * @ctxt: an XML parser context 6439 * 6440 * [61] conditionalSect ::= includeSect | ignoreSect 6441 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6442 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6443 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6444 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6445 */ 6446 6447 static void 6448 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6449 int id = ctxt->input->id; 6450 6451 SKIP(3); 6452 SKIP_BLANKS; 6453 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6454 SKIP(7); 6455 SKIP_BLANKS; 6456 if (RAW != '[') { 6457 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6458 } else { 6459 if (ctxt->input->id != id) { 6460 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6461 "All markup of the conditional section is not in the same entity\n", 6462 NULL, NULL); 6463 } 6464 NEXT; 6465 } 6466 if (xmlParserDebugEntities) { 6467 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6468 xmlGenericError(xmlGenericErrorContext, 6469 "%s(%d): ", ctxt->input->filename, 6470 ctxt->input->line); 6471 xmlGenericError(xmlGenericErrorContext, 6472 "Entering INCLUDE Conditional Section\n"); 6473 } 6474 6475 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6476 (NXT(2) != '>'))) { 6477 const xmlChar *check = CUR_PTR; 6478 unsigned int cons = ctxt->input->consumed; 6479 6480 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6481 xmlParseConditionalSections(ctxt); 6482 } else if (IS_BLANK_CH(CUR)) { 6483 NEXT; 6484 } else if (RAW == '%') { 6485 xmlParsePEReference(ctxt); 6486 } else 6487 xmlParseMarkupDecl(ctxt); 6488 6489 /* 6490 * Pop-up of finished entities. 6491 */ 6492 while ((RAW == 0) && (ctxt->inputNr > 1)) 6493 xmlPopInput(ctxt); 6494 6495 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6496 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6497 break; 6498 } 6499 } 6500 if (xmlParserDebugEntities) { 6501 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6502 xmlGenericError(xmlGenericErrorContext, 6503 "%s(%d): ", ctxt->input->filename, 6504 ctxt->input->line); 6505 xmlGenericError(xmlGenericErrorContext, 6506 "Leaving INCLUDE Conditional Section\n"); 6507 } 6508 6509 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6510 int state; 6511 xmlParserInputState instate; 6512 int depth = 0; 6513 6514 SKIP(6); 6515 SKIP_BLANKS; 6516 if (RAW != '[') { 6517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6518 } else { 6519 if (ctxt->input->id != id) { 6520 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6521 "All markup of the conditional section is not in the same entity\n", 6522 NULL, NULL); 6523 } 6524 NEXT; 6525 } 6526 if (xmlParserDebugEntities) { 6527 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6528 xmlGenericError(xmlGenericErrorContext, 6529 "%s(%d): ", ctxt->input->filename, 6530 ctxt->input->line); 6531 xmlGenericError(xmlGenericErrorContext, 6532 "Entering IGNORE Conditional Section\n"); 6533 } 6534 6535 /* 6536 * Parse up to the end of the conditional section 6537 * But disable SAX event generating DTD building in the meantime 6538 */ 6539 state = ctxt->disableSAX; 6540 instate = ctxt->instate; 6541 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6542 ctxt->instate = XML_PARSER_IGNORE; 6543 6544 while ((depth >= 0) && (RAW != 0)) { 6545 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6546 depth++; 6547 SKIP(3); 6548 continue; 6549 } 6550 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6551 if (--depth >= 0) SKIP(3); 6552 continue; 6553 } 6554 NEXT; 6555 continue; 6556 } 6557 6558 ctxt->disableSAX = state; 6559 ctxt->instate = instate; 6560 6561 if (xmlParserDebugEntities) { 6562 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6563 xmlGenericError(xmlGenericErrorContext, 6564 "%s(%d): ", ctxt->input->filename, 6565 ctxt->input->line); 6566 xmlGenericError(xmlGenericErrorContext, 6567 "Leaving IGNORE Conditional Section\n"); 6568 } 6569 6570 } else { 6571 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6572 } 6573 6574 if (RAW == 0) 6575 SHRINK; 6576 6577 if (RAW == 0) { 6578 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6579 } else { 6580 if (ctxt->input->id != id) { 6581 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6582 "All markup of the conditional section is not in the same entity\n", 6583 NULL, NULL); 6584 } 6585 SKIP(3); 6586 } 6587 } 6588 6589 /** 6590 * xmlParseMarkupDecl: 6591 * @ctxt: an XML parser context 6592 * 6593 * parse Markup declarations 6594 * 6595 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6596 * NotationDecl | PI | Comment 6597 * 6598 * [ VC: Proper Declaration/PE Nesting ] 6599 * Parameter-entity replacement text must be properly nested with 6600 * markup declarations. That is to say, if either the first character 6601 * or the last character of a markup declaration (markupdecl above) is 6602 * contained in the replacement text for a parameter-entity reference, 6603 * both must be contained in the same replacement text. 6604 * 6605 * [ WFC: PEs in Internal Subset ] 6606 * In the internal DTD subset, parameter-entity references can occur 6607 * only where markup declarations can occur, not within markup declarations. 6608 * (This does not apply to references that occur in external parameter 6609 * entities or to the external subset.) 6610 */ 6611 void 6612 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6613 GROW; 6614 if (CUR == '<') { 6615 if (NXT(1) == '!') { 6616 switch (NXT(2)) { 6617 case 'E': 6618 if (NXT(3) == 'L') 6619 xmlParseElementDecl(ctxt); 6620 else if (NXT(3) == 'N') 6621 xmlParseEntityDecl(ctxt); 6622 break; 6623 case 'A': 6624 xmlParseAttributeListDecl(ctxt); 6625 break; 6626 case 'N': 6627 xmlParseNotationDecl(ctxt); 6628 break; 6629 case '-': 6630 xmlParseComment(ctxt); 6631 break; 6632 default: 6633 /* there is an error but it will be detected later */ 6634 break; 6635 } 6636 } else if (NXT(1) == '?') { 6637 xmlParsePI(ctxt); 6638 } 6639 } 6640 /* 6641 * This is only for internal subset. On external entities, 6642 * the replacement is done before parsing stage 6643 */ 6644 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6645 xmlParsePEReference(ctxt); 6646 6647 /* 6648 * Conditional sections are allowed from entities included 6649 * by PE References in the internal subset. 6650 */ 6651 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6652 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6653 xmlParseConditionalSections(ctxt); 6654 } 6655 } 6656 6657 ctxt->instate = XML_PARSER_DTD; 6658 } 6659 6660 /** 6661 * xmlParseTextDecl: 6662 * @ctxt: an XML parser context 6663 * 6664 * parse an XML declaration header for external entities 6665 * 6666 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6667 */ 6668 6669 void 6670 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6671 xmlChar *version; 6672 const xmlChar *encoding; 6673 6674 /* 6675 * We know that '<?xml' is here. 6676 */ 6677 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6678 SKIP(5); 6679 } else { 6680 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6681 return; 6682 } 6683 6684 if (!IS_BLANK_CH(CUR)) { 6685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6686 "Space needed after '<?xml'\n"); 6687 } 6688 SKIP_BLANKS; 6689 6690 /* 6691 * We may have the VersionInfo here. 6692 */ 6693 version = xmlParseVersionInfo(ctxt); 6694 if (version == NULL) 6695 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6696 else { 6697 if (!IS_BLANK_CH(CUR)) { 6698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6699 "Space needed here\n"); 6700 } 6701 } 6702 ctxt->input->version = version; 6703 6704 /* 6705 * We must have the encoding declaration 6706 */ 6707 encoding = xmlParseEncodingDecl(ctxt); 6708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6709 /* 6710 * The XML REC instructs us to stop parsing right here 6711 */ 6712 return; 6713 } 6714 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6715 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6716 "Missing encoding in text declaration\n"); 6717 } 6718 6719 SKIP_BLANKS; 6720 if ((RAW == '?') && (NXT(1) == '>')) { 6721 SKIP(2); 6722 } else if (RAW == '>') { 6723 /* Deprecated old WD ... */ 6724 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6725 NEXT; 6726 } else { 6727 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6728 MOVETO_ENDTAG(CUR_PTR); 6729 NEXT; 6730 } 6731 } 6732 6733 /** 6734 * xmlParseExternalSubset: 6735 * @ctxt: an XML parser context 6736 * @ExternalID: the external identifier 6737 * @SystemID: the system identifier (or URL) 6738 * 6739 * parse Markup declarations from an external subset 6740 * 6741 * [30] extSubset ::= textDecl? extSubsetDecl 6742 * 6743 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6744 */ 6745 void 6746 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6747 const xmlChar *SystemID) { 6748 xmlDetectSAX2(ctxt); 6749 GROW; 6750 6751 if ((ctxt->encoding == NULL) && 6752 (ctxt->input->end - ctxt->input->cur >= 4)) { 6753 xmlChar start[4]; 6754 xmlCharEncoding enc; 6755 6756 start[0] = RAW; 6757 start[1] = NXT(1); 6758 start[2] = NXT(2); 6759 start[3] = NXT(3); 6760 enc = xmlDetectCharEncoding(start, 4); 6761 if (enc != XML_CHAR_ENCODING_NONE) 6762 xmlSwitchEncoding(ctxt, enc); 6763 } 6764 6765 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6766 xmlParseTextDecl(ctxt); 6767 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6768 /* 6769 * The XML REC instructs us to stop parsing right here 6770 */ 6771 ctxt->instate = XML_PARSER_EOF; 6772 return; 6773 } 6774 } 6775 if (ctxt->myDoc == NULL) { 6776 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6777 if (ctxt->myDoc == NULL) { 6778 xmlErrMemory(ctxt, "New Doc failed"); 6779 return; 6780 } 6781 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6782 } 6783 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6784 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6785 6786 ctxt->instate = XML_PARSER_DTD; 6787 ctxt->external = 1; 6788 while (((RAW == '<') && (NXT(1) == '?')) || 6789 ((RAW == '<') && (NXT(1) == '!')) || 6790 (RAW == '%') || IS_BLANK_CH(CUR)) { 6791 const xmlChar *check = CUR_PTR; 6792 unsigned int cons = ctxt->input->consumed; 6793 6794 GROW; 6795 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6796 xmlParseConditionalSections(ctxt); 6797 } else if (IS_BLANK_CH(CUR)) { 6798 NEXT; 6799 } else if (RAW == '%') { 6800 xmlParsePEReference(ctxt); 6801 } else 6802 xmlParseMarkupDecl(ctxt); 6803 6804 /* 6805 * Pop-up of finished entities. 6806 */ 6807 while ((RAW == 0) && (ctxt->inputNr > 1)) 6808 xmlPopInput(ctxt); 6809 6810 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6811 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6812 break; 6813 } 6814 } 6815 6816 if (RAW != 0) { 6817 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6818 } 6819 6820 } 6821 6822 /** 6823 * xmlParseReference: 6824 * @ctxt: an XML parser context 6825 * 6826 * parse and handle entity references in content, depending on the SAX 6827 * interface, this may end-up in a call to character() if this is a 6828 * CharRef, a predefined entity, if there is no reference() callback. 6829 * or if the parser was asked to switch to that mode. 6830 * 6831 * [67] Reference ::= EntityRef | CharRef 6832 */ 6833 void 6834 xmlParseReference(xmlParserCtxtPtr ctxt) { 6835 xmlEntityPtr ent; 6836 xmlChar *val; 6837 int was_checked; 6838 xmlNodePtr list = NULL; 6839 xmlParserErrors ret = XML_ERR_OK; 6840 6841 6842 if (RAW != '&') 6843 return; 6844 6845 /* 6846 * Simple case of a CharRef 6847 */ 6848 if (NXT(1) == '#') { 6849 int i = 0; 6850 xmlChar out[10]; 6851 int hex = NXT(2); 6852 int value = xmlParseCharRef(ctxt); 6853 6854 if (value == 0) 6855 return; 6856 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6857 /* 6858 * So we are using non-UTF-8 buffers 6859 * Check that the char fit on 8bits, if not 6860 * generate a CharRef. 6861 */ 6862 if (value <= 0xFF) { 6863 out[0] = value; 6864 out[1] = 0; 6865 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6866 (!ctxt->disableSAX)) 6867 ctxt->sax->characters(ctxt->userData, out, 1); 6868 } else { 6869 if ((hex == 'x') || (hex == 'X')) 6870 snprintf((char *)out, sizeof(out), "#x%X", value); 6871 else 6872 snprintf((char *)out, sizeof(out), "#%d", value); 6873 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6874 (!ctxt->disableSAX)) 6875 ctxt->sax->reference(ctxt->userData, out); 6876 } 6877 } else { 6878 /* 6879 * Just encode the value in UTF-8 6880 */ 6881 COPY_BUF(0 ,out, i, value); 6882 out[i] = 0; 6883 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6884 (!ctxt->disableSAX)) 6885 ctxt->sax->characters(ctxt->userData, out, i); 6886 } 6887 return; 6888 } 6889 6890 /* 6891 * We are seeing an entity reference 6892 */ 6893 ent = xmlParseEntityRef(ctxt); 6894 if (ent == NULL) return; 6895 if (!ctxt->wellFormed) 6896 return; 6897 was_checked = ent->checked; 6898 6899 /* special case of predefined entities */ 6900 if ((ent->name == NULL) || 6901 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6902 val = ent->content; 6903 if (val == NULL) return; 6904 /* 6905 * inline the entity. 6906 */ 6907 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6908 (!ctxt->disableSAX)) 6909 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6910 return; 6911 } 6912 6913 /* 6914 * The first reference to the entity trigger a parsing phase 6915 * where the ent->children is filled with the result from 6916 * the parsing. 6917 */ 6918 if (ent->checked == 0) { 6919 unsigned long oldnbent = ctxt->nbentities; 6920 6921 /* 6922 * This is a bit hackish but this seems the best 6923 * way to make sure both SAX and DOM entity support 6924 * behaves okay. 6925 */ 6926 void *user_data; 6927 if (ctxt->userData == ctxt) 6928 user_data = NULL; 6929 else 6930 user_data = ctxt->userData; 6931 6932 /* 6933 * Check that this entity is well formed 6934 * 4.3.2: An internal general parsed entity is well-formed 6935 * if its replacement text matches the production labeled 6936 * content. 6937 */ 6938 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6939 ctxt->depth++; 6940 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6941 user_data, &list); 6942 ctxt->depth--; 6943 6944 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6945 ctxt->depth++; 6946 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6947 user_data, ctxt->depth, ent->URI, 6948 ent->ExternalID, &list); 6949 ctxt->depth--; 6950 } else { 6951 ret = XML_ERR_ENTITY_PE_INTERNAL; 6952 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6953 "invalid entity type found\n", NULL); 6954 } 6955 6956 /* 6957 * Store the number of entities needing parsing for this entity 6958 * content and do checkings 6959 */ 6960 ent->checked = ctxt->nbentities - oldnbent; 6961 if (ret == XML_ERR_ENTITY_LOOP) { 6962 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6963 xmlFreeNodeList(list); 6964 return; 6965 } 6966 if (xmlParserEntityCheck(ctxt, 0, ent)) { 6967 xmlFreeNodeList(list); 6968 return; 6969 } 6970 6971 if ((ret == XML_ERR_OK) && (list != NULL)) { 6972 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6973 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6974 (ent->children == NULL)) { 6975 ent->children = list; 6976 if (ctxt->replaceEntities) { 6977 /* 6978 * Prune it directly in the generated document 6979 * except for single text nodes. 6980 */ 6981 if (((list->type == XML_TEXT_NODE) && 6982 (list->next == NULL)) || 6983 (ctxt->parseMode == XML_PARSE_READER)) { 6984 list->parent = (xmlNodePtr) ent; 6985 list = NULL; 6986 ent->owner = 1; 6987 } else { 6988 ent->owner = 0; 6989 while (list != NULL) { 6990 list->parent = (xmlNodePtr) ctxt->node; 6991 list->doc = ctxt->myDoc; 6992 if (list->next == NULL) 6993 ent->last = list; 6994 list = list->next; 6995 } 6996 list = ent->children; 6997 #ifdef LIBXML_LEGACY_ENABLED 6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6999 xmlAddEntityReference(ent, list, NULL); 7000 #endif /* LIBXML_LEGACY_ENABLED */ 7001 } 7002 } else { 7003 ent->owner = 1; 7004 while (list != NULL) { 7005 list->parent = (xmlNodePtr) ent; 7006 xmlSetTreeDoc(list, ent->doc); 7007 if (list->next == NULL) 7008 ent->last = list; 7009 list = list->next; 7010 } 7011 } 7012 } else { 7013 xmlFreeNodeList(list); 7014 list = NULL; 7015 } 7016 } else if ((ret != XML_ERR_OK) && 7017 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7018 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7019 "Entity '%s' failed to parse\n", ent->name); 7020 } else if (list != NULL) { 7021 xmlFreeNodeList(list); 7022 list = NULL; 7023 } 7024 if (ent->checked == 0) 7025 ent->checked = 1; 7026 } else if (ent->checked != 1) { 7027 ctxt->nbentities += ent->checked; 7028 } 7029 7030 /* 7031 * Now that the entity content has been gathered 7032 * provide it to the application, this can take different forms based 7033 * on the parsing modes. 7034 */ 7035 if (ent->children == NULL) { 7036 /* 7037 * Probably running in SAX mode and the callbacks don't 7038 * build the entity content. So unless we already went 7039 * though parsing for first checking go though the entity 7040 * content to generate callbacks associated to the entity 7041 */ 7042 if (was_checked != 0) { 7043 void *user_data; 7044 /* 7045 * This is a bit hackish but this seems the best 7046 * way to make sure both SAX and DOM entity support 7047 * behaves okay. 7048 */ 7049 if (ctxt->userData == ctxt) 7050 user_data = NULL; 7051 else 7052 user_data = ctxt->userData; 7053 7054 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7055 ctxt->depth++; 7056 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7057 ent->content, user_data, NULL); 7058 ctxt->depth--; 7059 } else if (ent->etype == 7060 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7061 ctxt->depth++; 7062 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7063 ctxt->sax, user_data, ctxt->depth, 7064 ent->URI, ent->ExternalID, NULL); 7065 ctxt->depth--; 7066 } else { 7067 ret = XML_ERR_ENTITY_PE_INTERNAL; 7068 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7069 "invalid entity type found\n", NULL); 7070 } 7071 if (ret == XML_ERR_ENTITY_LOOP) { 7072 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7073 return; 7074 } 7075 } 7076 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7077 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7078 /* 7079 * Entity reference callback comes second, it's somewhat 7080 * superfluous but a compatibility to historical behaviour 7081 */ 7082 ctxt->sax->reference(ctxt->userData, ent->name); 7083 } 7084 return; 7085 } 7086 7087 /* 7088 * If we didn't get any children for the entity being built 7089 */ 7090 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7091 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7092 /* 7093 * Create a node. 7094 */ 7095 ctxt->sax->reference(ctxt->userData, ent->name); 7096 return; 7097 } 7098 7099 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7100 /* 7101 * There is a problem on the handling of _private for entities 7102 * (bug 155816): Should we copy the content of the field from 7103 * the entity (possibly overwriting some value set by the user 7104 * when a copy is created), should we leave it alone, or should 7105 * we try to take care of different situations? The problem 7106 * is exacerbated by the usage of this field by the xmlReader. 7107 * To fix this bug, we look at _private on the created node 7108 * and, if it's NULL, we copy in whatever was in the entity. 7109 * If it's not NULL we leave it alone. This is somewhat of a 7110 * hack - maybe we should have further tests to determine 7111 * what to do. 7112 */ 7113 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7114 /* 7115 * Seems we are generating the DOM content, do 7116 * a simple tree copy for all references except the first 7117 * In the first occurrence list contains the replacement. 7118 * progressive == 2 means we are operating on the Reader 7119 * and since nodes are discarded we must copy all the time. 7120 */ 7121 if (((list == NULL) && (ent->owner == 0)) || 7122 (ctxt->parseMode == XML_PARSE_READER)) { 7123 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7124 7125 /* 7126 * when operating on a reader, the entities definitions 7127 * are always owning the entities subtree. 7128 if (ctxt->parseMode == XML_PARSE_READER) 7129 ent->owner = 1; 7130 */ 7131 7132 cur = ent->children; 7133 while (cur != NULL) { 7134 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7135 if (nw != NULL) { 7136 if (nw->_private == NULL) 7137 nw->_private = cur->_private; 7138 if (firstChild == NULL){ 7139 firstChild = nw; 7140 } 7141 nw = xmlAddChild(ctxt->node, nw); 7142 } 7143 if (cur == ent->last) { 7144 /* 7145 * needed to detect some strange empty 7146 * node cases in the reader tests 7147 */ 7148 if ((ctxt->parseMode == XML_PARSE_READER) && 7149 (nw != NULL) && 7150 (nw->type == XML_ELEMENT_NODE) && 7151 (nw->children == NULL)) 7152 nw->extra = 1; 7153 7154 break; 7155 } 7156 cur = cur->next; 7157 } 7158 #ifdef LIBXML_LEGACY_ENABLED 7159 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7160 xmlAddEntityReference(ent, firstChild, nw); 7161 #endif /* LIBXML_LEGACY_ENABLED */ 7162 } else if (list == NULL) { 7163 xmlNodePtr nw = NULL, cur, next, last, 7164 firstChild = NULL; 7165 /* 7166 * Copy the entity child list and make it the new 7167 * entity child list. The goal is to make sure any 7168 * ID or REF referenced will be the one from the 7169 * document content and not the entity copy. 7170 */ 7171 cur = ent->children; 7172 ent->children = NULL; 7173 last = ent->last; 7174 ent->last = NULL; 7175 while (cur != NULL) { 7176 next = cur->next; 7177 cur->next = NULL; 7178 cur->parent = NULL; 7179 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7180 if (nw != NULL) { 7181 if (nw->_private == NULL) 7182 nw->_private = cur->_private; 7183 if (firstChild == NULL){ 7184 firstChild = cur; 7185 } 7186 xmlAddChild((xmlNodePtr) ent, nw); 7187 xmlAddChild(ctxt->node, cur); 7188 } 7189 if (cur == last) 7190 break; 7191 cur = next; 7192 } 7193 if (ent->owner == 0) 7194 ent->owner = 1; 7195 #ifdef LIBXML_LEGACY_ENABLED 7196 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7197 xmlAddEntityReference(ent, firstChild, nw); 7198 #endif /* LIBXML_LEGACY_ENABLED */ 7199 } else { 7200 const xmlChar *nbktext; 7201 7202 /* 7203 * the name change is to avoid coalescing of the 7204 * node with a possible previous text one which 7205 * would make ent->children a dangling pointer 7206 */ 7207 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7208 -1); 7209 if (ent->children->type == XML_TEXT_NODE) 7210 ent->children->name = nbktext; 7211 if ((ent->last != ent->children) && 7212 (ent->last->type == XML_TEXT_NODE)) 7213 ent->last->name = nbktext; 7214 xmlAddChildList(ctxt->node, ent->children); 7215 } 7216 7217 /* 7218 * This is to avoid a nasty side effect, see 7219 * characters() in SAX.c 7220 */ 7221 ctxt->nodemem = 0; 7222 ctxt->nodelen = 0; 7223 return; 7224 } 7225 } 7226 } 7227 7228 /** 7229 * xmlParseEntityRef: 7230 * @ctxt: an XML parser context 7231 * 7232 * parse ENTITY references declarations 7233 * 7234 * [68] EntityRef ::= '&' Name ';' 7235 * 7236 * [ WFC: Entity Declared ] 7237 * In a document without any DTD, a document with only an internal DTD 7238 * subset which contains no parameter entity references, or a document 7239 * with "standalone='yes'", the Name given in the entity reference 7240 * must match that in an entity declaration, except that well-formed 7241 * documents need not declare any of the following entities: amp, lt, 7242 * gt, apos, quot. The declaration of a parameter entity must precede 7243 * any reference to it. Similarly, the declaration of a general entity 7244 * must precede any reference to it which appears in a default value in an 7245 * attribute-list declaration. Note that if entities are declared in the 7246 * external subset or in external parameter entities, a non-validating 7247 * processor is not obligated to read and process their declarations; 7248 * for such documents, the rule that an entity must be declared is a 7249 * well-formedness constraint only if standalone='yes'. 7250 * 7251 * [ WFC: Parsed Entity ] 7252 * An entity reference must not contain the name of an unparsed entity 7253 * 7254 * Returns the xmlEntityPtr if found, or NULL otherwise. 7255 */ 7256 xmlEntityPtr 7257 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7258 const xmlChar *name; 7259 xmlEntityPtr ent = NULL; 7260 7261 GROW; 7262 7263 if (RAW != '&') 7264 return(NULL); 7265 NEXT; 7266 name = xmlParseName(ctxt); 7267 if (name == NULL) { 7268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7269 "xmlParseEntityRef: no name\n"); 7270 return(NULL); 7271 } 7272 if (RAW != ';') { 7273 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7274 return(NULL); 7275 } 7276 NEXT; 7277 7278 /* 7279 * Predefined entites override any extra definition 7280 */ 7281 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7282 ent = xmlGetPredefinedEntity(name); 7283 if (ent != NULL) 7284 return(ent); 7285 } 7286 7287 /* 7288 * Increate the number of entity references parsed 7289 */ 7290 ctxt->nbentities++; 7291 7292 /* 7293 * Ask first SAX for entity resolution, otherwise try the 7294 * entities which may have stored in the parser context. 7295 */ 7296 if (ctxt->sax != NULL) { 7297 if (ctxt->sax->getEntity != NULL) 7298 ent = ctxt->sax->getEntity(ctxt->userData, name); 7299 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7300 (ctxt->options & XML_PARSE_OLDSAX)) 7301 ent = xmlGetPredefinedEntity(name); 7302 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7303 (ctxt->userData==ctxt)) { 7304 ent = xmlSAX2GetEntity(ctxt, name); 7305 } 7306 } 7307 /* 7308 * [ WFC: Entity Declared ] 7309 * In a document without any DTD, a document with only an 7310 * internal DTD subset which contains no parameter entity 7311 * references, or a document with "standalone='yes'", the 7312 * Name given in the entity reference must match that in an 7313 * entity declaration, except that well-formed documents 7314 * need not declare any of the following entities: amp, lt, 7315 * gt, apos, quot. 7316 * The declaration of a parameter entity must precede any 7317 * reference to it. 7318 * Similarly, the declaration of a general entity must 7319 * precede any reference to it which appears in a default 7320 * value in an attribute-list declaration. Note that if 7321 * entities are declared in the external subset or in 7322 * external parameter entities, a non-validating processor 7323 * is not obligated to read and process their declarations; 7324 * for such documents, the rule that an entity must be 7325 * declared is a well-formedness constraint only if 7326 * standalone='yes'. 7327 */ 7328 if (ent == NULL) { 7329 if ((ctxt->standalone == 1) || 7330 ((ctxt->hasExternalSubset == 0) && 7331 (ctxt->hasPErefs == 0))) { 7332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7333 "Entity '%s' not defined\n", name); 7334 } else { 7335 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7336 "Entity '%s' not defined\n", name); 7337 if ((ctxt->inSubset == 0) && 7338 (ctxt->sax != NULL) && 7339 (ctxt->sax->reference != NULL)) { 7340 ctxt->sax->reference(ctxt->userData, name); 7341 } 7342 } 7343 ctxt->valid = 0; 7344 } 7345 7346 /* 7347 * [ WFC: Parsed Entity ] 7348 * An entity reference must not contain the name of an 7349 * unparsed entity 7350 */ 7351 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7353 "Entity reference to unparsed entity %s\n", name); 7354 } 7355 7356 /* 7357 * [ WFC: No External Entity References ] 7358 * Attribute values cannot contain direct or indirect 7359 * entity references to external entities. 7360 */ 7361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7362 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7364 "Attribute references external entity '%s'\n", name); 7365 } 7366 /* 7367 * [ WFC: No < in Attribute Values ] 7368 * The replacement text of any entity referred to directly or 7369 * indirectly in an attribute value (other than "<") must 7370 * not contain a <. 7371 */ 7372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7373 (ent != NULL) && (ent->content != NULL) && 7374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7375 (xmlStrchr(ent->content, '<'))) { 7376 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7377 "'<' in entity '%s' is not allowed in attributes values\n", name); 7378 } 7379 7380 /* 7381 * Internal check, no parameter entities here ... 7382 */ 7383 else { 7384 switch (ent->etype) { 7385 case XML_INTERNAL_PARAMETER_ENTITY: 7386 case XML_EXTERNAL_PARAMETER_ENTITY: 7387 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7388 "Attempt to reference the parameter entity '%s'\n", 7389 name); 7390 break; 7391 default: 7392 break; 7393 } 7394 } 7395 7396 /* 7397 * [ WFC: No Recursion ] 7398 * A parsed entity must not contain a recursive reference 7399 * to itself, either directly or indirectly. 7400 * Done somewhere else 7401 */ 7402 return(ent); 7403 } 7404 7405 /** 7406 * xmlParseStringEntityRef: 7407 * @ctxt: an XML parser context 7408 * @str: a pointer to an index in the string 7409 * 7410 * parse ENTITY references declarations, but this version parses it from 7411 * a string value. 7412 * 7413 * [68] EntityRef ::= '&' Name ';' 7414 * 7415 * [ WFC: Entity Declared ] 7416 * In a document without any DTD, a document with only an internal DTD 7417 * subset which contains no parameter entity references, or a document 7418 * with "standalone='yes'", the Name given in the entity reference 7419 * must match that in an entity declaration, except that well-formed 7420 * documents need not declare any of the following entities: amp, lt, 7421 * gt, apos, quot. The declaration of a parameter entity must precede 7422 * any reference to it. Similarly, the declaration of a general entity 7423 * must precede any reference to it which appears in a default value in an 7424 * attribute-list declaration. Note that if entities are declared in the 7425 * external subset or in external parameter entities, a non-validating 7426 * processor is not obligated to read and process their declarations; 7427 * for such documents, the rule that an entity must be declared is a 7428 * well-formedness constraint only if standalone='yes'. 7429 * 7430 * [ WFC: Parsed Entity ] 7431 * An entity reference must not contain the name of an unparsed entity 7432 * 7433 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7434 * is updated to the current location in the string. 7435 */ 7436 static xmlEntityPtr 7437 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7438 xmlChar *name; 7439 const xmlChar *ptr; 7440 xmlChar cur; 7441 xmlEntityPtr ent = NULL; 7442 7443 if ((str == NULL) || (*str == NULL)) 7444 return(NULL); 7445 ptr = *str; 7446 cur = *ptr; 7447 if (cur != '&') 7448 return(NULL); 7449 7450 ptr++; 7451 name = xmlParseStringName(ctxt, &ptr); 7452 if (name == NULL) { 7453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7454 "xmlParseStringEntityRef: no name\n"); 7455 *str = ptr; 7456 return(NULL); 7457 } 7458 if (*ptr != ';') { 7459 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7460 xmlFree(name); 7461 *str = ptr; 7462 return(NULL); 7463 } 7464 ptr++; 7465 7466 7467 /* 7468 * Predefined entites override any extra definition 7469 */ 7470 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7471 ent = xmlGetPredefinedEntity(name); 7472 if (ent != NULL) { 7473 xmlFree(name); 7474 *str = ptr; 7475 return(ent); 7476 } 7477 } 7478 7479 /* 7480 * Increate the number of entity references parsed 7481 */ 7482 ctxt->nbentities++; 7483 7484 /* 7485 * Ask first SAX for entity resolution, otherwise try the 7486 * entities which may have stored in the parser context. 7487 */ 7488 if (ctxt->sax != NULL) { 7489 if (ctxt->sax->getEntity != NULL) 7490 ent = ctxt->sax->getEntity(ctxt->userData, name); 7491 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7492 ent = xmlGetPredefinedEntity(name); 7493 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7494 ent = xmlSAX2GetEntity(ctxt, name); 7495 } 7496 } 7497 7498 /* 7499 * [ WFC: Entity Declared ] 7500 * In a document without any DTD, a document with only an 7501 * internal DTD subset which contains no parameter entity 7502 * references, or a document with "standalone='yes'", the 7503 * Name given in the entity reference must match that in an 7504 * entity declaration, except that well-formed documents 7505 * need not declare any of the following entities: amp, lt, 7506 * gt, apos, quot. 7507 * The declaration of a parameter entity must precede any 7508 * reference to it. 7509 * Similarly, the declaration of a general entity must 7510 * precede any reference to it which appears in a default 7511 * value in an attribute-list declaration. Note that if 7512 * entities are declared in the external subset or in 7513 * external parameter entities, a non-validating processor 7514 * is not obligated to read and process their declarations; 7515 * for such documents, the rule that an entity must be 7516 * declared is a well-formedness constraint only if 7517 * standalone='yes'. 7518 */ 7519 if (ent == NULL) { 7520 if ((ctxt->standalone == 1) || 7521 ((ctxt->hasExternalSubset == 0) && 7522 (ctxt->hasPErefs == 0))) { 7523 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7524 "Entity '%s' not defined\n", name); 7525 } else { 7526 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7527 "Entity '%s' not defined\n", 7528 name); 7529 } 7530 /* TODO ? check regressions ctxt->valid = 0; */ 7531 } 7532 7533 /* 7534 * [ WFC: Parsed Entity ] 7535 * An entity reference must not contain the name of an 7536 * unparsed entity 7537 */ 7538 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7539 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7540 "Entity reference to unparsed entity %s\n", name); 7541 } 7542 7543 /* 7544 * [ WFC: No External Entity References ] 7545 * Attribute values cannot contain direct or indirect 7546 * entity references to external entities. 7547 */ 7548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7551 "Attribute references external entity '%s'\n", name); 7552 } 7553 /* 7554 * [ WFC: No < in Attribute Values ] 7555 * The replacement text of any entity referred to directly or 7556 * indirectly in an attribute value (other than "<") must 7557 * not contain a <. 7558 */ 7559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7560 (ent != NULL) && (ent->content != NULL) && 7561 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7562 (xmlStrchr(ent->content, '<'))) { 7563 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7564 "'<' in entity '%s' is not allowed in attributes values\n", 7565 name); 7566 } 7567 7568 /* 7569 * Internal check, no parameter entities here ... 7570 */ 7571 else { 7572 switch (ent->etype) { 7573 case XML_INTERNAL_PARAMETER_ENTITY: 7574 case XML_EXTERNAL_PARAMETER_ENTITY: 7575 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7576 "Attempt to reference the parameter entity '%s'\n", 7577 name); 7578 break; 7579 default: 7580 break; 7581 } 7582 } 7583 7584 /* 7585 * [ WFC: No Recursion ] 7586 * A parsed entity must not contain a recursive reference 7587 * to itself, either directly or indirectly. 7588 * Done somewhere else 7589 */ 7590 7591 xmlFree(name); 7592 *str = ptr; 7593 return(ent); 7594 } 7595 7596 /** 7597 * xmlParsePEReference: 7598 * @ctxt: an XML parser context 7599 * 7600 * parse PEReference declarations 7601 * The entity content is handled directly by pushing it's content as 7602 * a new input stream. 7603 * 7604 * [69] PEReference ::= '%' Name ';' 7605 * 7606 * [ WFC: No Recursion ] 7607 * A parsed entity must not contain a recursive 7608 * reference to itself, either directly or indirectly. 7609 * 7610 * [ WFC: Entity Declared ] 7611 * In a document without any DTD, a document with only an internal DTD 7612 * subset which contains no parameter entity references, or a document 7613 * with "standalone='yes'", ... ... The declaration of a parameter 7614 * entity must precede any reference to it... 7615 * 7616 * [ VC: Entity Declared ] 7617 * In a document with an external subset or external parameter entities 7618 * with "standalone='no'", ... ... The declaration of a parameter entity 7619 * must precede any reference to it... 7620 * 7621 * [ WFC: In DTD ] 7622 * Parameter-entity references may only appear in the DTD. 7623 * NOTE: misleading but this is handled. 7624 */ 7625 void 7626 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7627 { 7628 const xmlChar *name; 7629 xmlEntityPtr entity = NULL; 7630 xmlParserInputPtr input; 7631 7632 if (RAW != '%') 7633 return; 7634 NEXT; 7635 name = xmlParseName(ctxt); 7636 if (name == NULL) { 7637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7638 "xmlParsePEReference: no name\n"); 7639 return; 7640 } 7641 if (RAW != ';') { 7642 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7643 return; 7644 } 7645 7646 NEXT; 7647 7648 /* 7649 * Increate the number of entity references parsed 7650 */ 7651 ctxt->nbentities++; 7652 7653 /* 7654 * Request the entity from SAX 7655 */ 7656 if ((ctxt->sax != NULL) && 7657 (ctxt->sax->getParameterEntity != NULL)) 7658 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7659 name); 7660 if (entity == NULL) { 7661 /* 7662 * [ WFC: Entity Declared ] 7663 * In a document without any DTD, a document with only an 7664 * internal DTD subset which contains no parameter entity 7665 * references, or a document with "standalone='yes'", ... 7666 * ... The declaration of a parameter entity must precede 7667 * any reference to it... 7668 */ 7669 if ((ctxt->standalone == 1) || 7670 ((ctxt->hasExternalSubset == 0) && 7671 (ctxt->hasPErefs == 0))) { 7672 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7673 "PEReference: %%%s; not found\n", 7674 name); 7675 } else { 7676 /* 7677 * [ VC: Entity Declared ] 7678 * In a document with an external subset or external 7679 * parameter entities with "standalone='no'", ... 7680 * ... The declaration of a parameter entity must 7681 * precede any reference to it... 7682 */ 7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7684 "PEReference: %%%s; not found\n", 7685 name, NULL); 7686 ctxt->valid = 0; 7687 } 7688 } else { 7689 /* 7690 * Internal checking in case the entity quest barfed 7691 */ 7692 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7693 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7695 "Internal: %%%s; is not a parameter entity\n", 7696 name, NULL); 7697 } else if (ctxt->input->free != deallocblankswrapper) { 7698 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7699 if (xmlPushInput(ctxt, input) < 0) 7700 return; 7701 } else { 7702 /* 7703 * TODO !!! 7704 * handle the extra spaces added before and after 7705 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7706 */ 7707 input = xmlNewEntityInputStream(ctxt, entity); 7708 if (xmlPushInput(ctxt, input) < 0) 7709 return; 7710 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7711 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7712 (IS_BLANK_CH(NXT(5)))) { 7713 xmlParseTextDecl(ctxt); 7714 if (ctxt->errNo == 7715 XML_ERR_UNSUPPORTED_ENCODING) { 7716 /* 7717 * The XML REC instructs us to stop parsing 7718 * right here 7719 */ 7720 ctxt->instate = XML_PARSER_EOF; 7721 return; 7722 } 7723 } 7724 } 7725 } 7726 ctxt->hasPErefs = 1; 7727 } 7728 7729 /** 7730 * xmlLoadEntityContent: 7731 * @ctxt: an XML parser context 7732 * @entity: an unloaded system entity 7733 * 7734 * Load the original content of the given system entity from the 7735 * ExternalID/SystemID given. This is to be used for Included in Literal 7736 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7737 * 7738 * Returns 0 in case of success and -1 in case of failure 7739 */ 7740 static int 7741 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7742 xmlParserInputPtr input; 7743 xmlBufferPtr buf; 7744 int l, c; 7745 int count = 0; 7746 7747 if ((ctxt == NULL) || (entity == NULL) || 7748 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7749 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7750 (entity->content != NULL)) { 7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7752 "xmlLoadEntityContent parameter error"); 7753 return(-1); 7754 } 7755 7756 if (xmlParserDebugEntities) 7757 xmlGenericError(xmlGenericErrorContext, 7758 "Reading %s entity content input\n", entity->name); 7759 7760 buf = xmlBufferCreate(); 7761 if (buf == NULL) { 7762 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7763 "xmlLoadEntityContent parameter error"); 7764 return(-1); 7765 } 7766 7767 input = xmlNewEntityInputStream(ctxt, entity); 7768 if (input == NULL) { 7769 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7770 "xmlLoadEntityContent input error"); 7771 xmlBufferFree(buf); 7772 return(-1); 7773 } 7774 7775 /* 7776 * Push the entity as the current input, read char by char 7777 * saving to the buffer until the end of the entity or an error 7778 */ 7779 if (xmlPushInput(ctxt, input) < 0) { 7780 xmlBufferFree(buf); 7781 return(-1); 7782 } 7783 7784 GROW; 7785 c = CUR_CHAR(l); 7786 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7787 (IS_CHAR(c))) { 7788 xmlBufferAdd(buf, ctxt->input->cur, l); 7789 if (count++ > 100) { 7790 count = 0; 7791 GROW; 7792 } 7793 NEXTL(l); 7794 c = CUR_CHAR(l); 7795 } 7796 7797 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7798 xmlPopInput(ctxt); 7799 } else if (!IS_CHAR(c)) { 7800 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7801 "xmlLoadEntityContent: invalid char value %d\n", 7802 c); 7803 xmlBufferFree(buf); 7804 return(-1); 7805 } 7806 entity->content = buf->content; 7807 buf->content = NULL; 7808 xmlBufferFree(buf); 7809 7810 return(0); 7811 } 7812 7813 /** 7814 * xmlParseStringPEReference: 7815 * @ctxt: an XML parser context 7816 * @str: a pointer to an index in the string 7817 * 7818 * parse PEReference declarations 7819 * 7820 * [69] PEReference ::= '%' Name ';' 7821 * 7822 * [ WFC: No Recursion ] 7823 * A parsed entity must not contain a recursive 7824 * reference to itself, either directly or indirectly. 7825 * 7826 * [ WFC: Entity Declared ] 7827 * In a document without any DTD, a document with only an internal DTD 7828 * subset which contains no parameter entity references, or a document 7829 * with "standalone='yes'", ... ... The declaration of a parameter 7830 * entity must precede any reference to it... 7831 * 7832 * [ VC: Entity Declared ] 7833 * In a document with an external subset or external parameter entities 7834 * with "standalone='no'", ... ... The declaration of a parameter entity 7835 * must precede any reference to it... 7836 * 7837 * [ WFC: In DTD ] 7838 * Parameter-entity references may only appear in the DTD. 7839 * NOTE: misleading but this is handled. 7840 * 7841 * Returns the string of the entity content. 7842 * str is updated to the current value of the index 7843 */ 7844 static xmlEntityPtr 7845 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7846 const xmlChar *ptr; 7847 xmlChar cur; 7848 xmlChar *name; 7849 xmlEntityPtr entity = NULL; 7850 7851 if ((str == NULL) || (*str == NULL)) return(NULL); 7852 ptr = *str; 7853 cur = *ptr; 7854 if (cur != '%') 7855 return(NULL); 7856 ptr++; 7857 name = xmlParseStringName(ctxt, &ptr); 7858 if (name == NULL) { 7859 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7860 "xmlParseStringPEReference: no name\n"); 7861 *str = ptr; 7862 return(NULL); 7863 } 7864 cur = *ptr; 7865 if (cur != ';') { 7866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7867 xmlFree(name); 7868 *str = ptr; 7869 return(NULL); 7870 } 7871 ptr++; 7872 7873 /* 7874 * Increate the number of entity references parsed 7875 */ 7876 ctxt->nbentities++; 7877 7878 /* 7879 * Request the entity from SAX 7880 */ 7881 if ((ctxt->sax != NULL) && 7882 (ctxt->sax->getParameterEntity != NULL)) 7883 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7884 name); 7885 if (entity == NULL) { 7886 /* 7887 * [ WFC: Entity Declared ] 7888 * In a document without any DTD, a document with only an 7889 * internal DTD subset which contains no parameter entity 7890 * references, or a document with "standalone='yes'", ... 7891 * ... The declaration of a parameter entity must precede 7892 * any reference to it... 7893 */ 7894 if ((ctxt->standalone == 1) || 7895 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7897 "PEReference: %%%s; not found\n", name); 7898 } else { 7899 /* 7900 * [ VC: Entity Declared ] 7901 * In a document with an external subset or external 7902 * parameter entities with "standalone='no'", ... 7903 * ... The declaration of a parameter entity must 7904 * precede any reference to it... 7905 */ 7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7907 "PEReference: %%%s; not found\n", 7908 name, NULL); 7909 ctxt->valid = 0; 7910 } 7911 } else { 7912 /* 7913 * Internal checking in case the entity quest barfed 7914 */ 7915 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7916 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7917 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7918 "%%%s; is not a parameter entity\n", 7919 name, NULL); 7920 } 7921 } 7922 ctxt->hasPErefs = 1; 7923 xmlFree(name); 7924 *str = ptr; 7925 return(entity); 7926 } 7927 7928 /** 7929 * xmlParseDocTypeDecl: 7930 * @ctxt: an XML parser context 7931 * 7932 * parse a DOCTYPE declaration 7933 * 7934 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7935 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7936 * 7937 * [ VC: Root Element Type ] 7938 * The Name in the document type declaration must match the element 7939 * type of the root element. 7940 */ 7941 7942 void 7943 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7944 const xmlChar *name = NULL; 7945 xmlChar *ExternalID = NULL; 7946 xmlChar *URI = NULL; 7947 7948 /* 7949 * We know that '<!DOCTYPE' has been detected. 7950 */ 7951 SKIP(9); 7952 7953 SKIP_BLANKS; 7954 7955 /* 7956 * Parse the DOCTYPE name. 7957 */ 7958 name = xmlParseName(ctxt); 7959 if (name == NULL) { 7960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7961 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7962 } 7963 ctxt->intSubName = name; 7964 7965 SKIP_BLANKS; 7966 7967 /* 7968 * Check for SystemID and ExternalID 7969 */ 7970 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7971 7972 if ((URI != NULL) || (ExternalID != NULL)) { 7973 ctxt->hasExternalSubset = 1; 7974 } 7975 ctxt->extSubURI = URI; 7976 ctxt->extSubSystem = ExternalID; 7977 7978 SKIP_BLANKS; 7979 7980 /* 7981 * Create and update the internal subset. 7982 */ 7983 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7984 (!ctxt->disableSAX)) 7985 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7986 7987 /* 7988 * Is there any internal subset declarations ? 7989 * they are handled separately in xmlParseInternalSubset() 7990 */ 7991 if (RAW == '[') 7992 return; 7993 7994 /* 7995 * We should be at the end of the DOCTYPE declaration. 7996 */ 7997 if (RAW != '>') { 7998 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7999 } 8000 NEXT; 8001 } 8002 8003 /** 8004 * xmlParseInternalSubset: 8005 * @ctxt: an XML parser context 8006 * 8007 * parse the internal subset declaration 8008 * 8009 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8010 */ 8011 8012 static void 8013 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8014 /* 8015 * Is there any DTD definition ? 8016 */ 8017 if (RAW == '[') { 8018 ctxt->instate = XML_PARSER_DTD; 8019 NEXT; 8020 /* 8021 * Parse the succession of Markup declarations and 8022 * PEReferences. 8023 * Subsequence (markupdecl | PEReference | S)* 8024 */ 8025 while (RAW != ']') { 8026 const xmlChar *check = CUR_PTR; 8027 unsigned int cons = ctxt->input->consumed; 8028 8029 SKIP_BLANKS; 8030 xmlParseMarkupDecl(ctxt); 8031 xmlParsePEReference(ctxt); 8032 8033 /* 8034 * Pop-up of finished entities. 8035 */ 8036 while ((RAW == 0) && (ctxt->inputNr > 1)) 8037 xmlPopInput(ctxt); 8038 8039 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8040 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8041 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8042 break; 8043 } 8044 } 8045 if (RAW == ']') { 8046 NEXT; 8047 SKIP_BLANKS; 8048 } 8049 } 8050 8051 /* 8052 * We should be at the end of the DOCTYPE declaration. 8053 */ 8054 if (RAW != '>') { 8055 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8056 } 8057 NEXT; 8058 } 8059 8060 #ifdef LIBXML_SAX1_ENABLED 8061 /** 8062 * xmlParseAttribute: 8063 * @ctxt: an XML parser context 8064 * @value: a xmlChar ** used to store the value of the attribute 8065 * 8066 * parse an attribute 8067 * 8068 * [41] Attribute ::= Name Eq AttValue 8069 * 8070 * [ WFC: No External Entity References ] 8071 * Attribute values cannot contain direct or indirect entity references 8072 * to external entities. 8073 * 8074 * [ WFC: No < in Attribute Values ] 8075 * The replacement text of any entity referred to directly or indirectly in 8076 * an attribute value (other than "<") must not contain a <. 8077 * 8078 * [ VC: Attribute Value Type ] 8079 * The attribute must have been declared; the value must be of the type 8080 * declared for it. 8081 * 8082 * [25] Eq ::= S? '=' S? 8083 * 8084 * With namespace: 8085 * 8086 * [NS 11] Attribute ::= QName Eq AttValue 8087 * 8088 * Also the case QName == xmlns:??? is handled independently as a namespace 8089 * definition. 8090 * 8091 * Returns the attribute name, and the value in *value. 8092 */ 8093 8094 const xmlChar * 8095 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8096 const xmlChar *name; 8097 xmlChar *val; 8098 8099 *value = NULL; 8100 GROW; 8101 name = xmlParseName(ctxt); 8102 if (name == NULL) { 8103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8104 "error parsing attribute name\n"); 8105 return(NULL); 8106 } 8107 8108 /* 8109 * read the value 8110 */ 8111 SKIP_BLANKS; 8112 if (RAW == '=') { 8113 NEXT; 8114 SKIP_BLANKS; 8115 val = xmlParseAttValue(ctxt); 8116 ctxt->instate = XML_PARSER_CONTENT; 8117 } else { 8118 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8119 "Specification mandate value for attribute %s\n", name); 8120 return(NULL); 8121 } 8122 8123 /* 8124 * Check that xml:lang conforms to the specification 8125 * No more registered as an error, just generate a warning now 8126 * since this was deprecated in XML second edition 8127 */ 8128 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8129 if (!xmlCheckLanguageID(val)) { 8130 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8131 "Malformed value for xml:lang : %s\n", 8132 val, NULL); 8133 } 8134 } 8135 8136 /* 8137 * Check that xml:space conforms to the specification 8138 */ 8139 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8140 if (xmlStrEqual(val, BAD_CAST "default")) 8141 *(ctxt->space) = 0; 8142 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8143 *(ctxt->space) = 1; 8144 else { 8145 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8146 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8147 val, NULL); 8148 } 8149 } 8150 8151 *value = val; 8152 return(name); 8153 } 8154 8155 /** 8156 * xmlParseStartTag: 8157 * @ctxt: an XML parser context 8158 * 8159 * parse a start of tag either for rule element or 8160 * EmptyElement. In both case we don't parse the tag closing chars. 8161 * 8162 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8163 * 8164 * [ WFC: Unique Att Spec ] 8165 * No attribute name may appear more than once in the same start-tag or 8166 * empty-element tag. 8167 * 8168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8169 * 8170 * [ WFC: Unique Att Spec ] 8171 * No attribute name may appear more than once in the same start-tag or 8172 * empty-element tag. 8173 * 8174 * With namespace: 8175 * 8176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8177 * 8178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8179 * 8180 * Returns the element name parsed 8181 */ 8182 8183 const xmlChar * 8184 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8185 const xmlChar *name; 8186 const xmlChar *attname; 8187 xmlChar *attvalue; 8188 const xmlChar **atts = ctxt->atts; 8189 int nbatts = 0; 8190 int maxatts = ctxt->maxatts; 8191 int i; 8192 8193 if (RAW != '<') return(NULL); 8194 NEXT1; 8195 8196 name = xmlParseName(ctxt); 8197 if (name == NULL) { 8198 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8199 "xmlParseStartTag: invalid element name\n"); 8200 return(NULL); 8201 } 8202 8203 /* 8204 * Now parse the attributes, it ends up with the ending 8205 * 8206 * (S Attribute)* S? 8207 */ 8208 SKIP_BLANKS; 8209 GROW; 8210 8211 while ((RAW != '>') && 8212 ((RAW != '/') || (NXT(1) != '>')) && 8213 (IS_BYTE_CHAR(RAW))) { 8214 const xmlChar *q = CUR_PTR; 8215 unsigned int cons = ctxt->input->consumed; 8216 8217 attname = xmlParseAttribute(ctxt, &attvalue); 8218 if ((attname != NULL) && (attvalue != NULL)) { 8219 /* 8220 * [ WFC: Unique Att Spec ] 8221 * No attribute name may appear more than once in the same 8222 * start-tag or empty-element tag. 8223 */ 8224 for (i = 0; i < nbatts;i += 2) { 8225 if (xmlStrEqual(atts[i], attname)) { 8226 xmlErrAttributeDup(ctxt, NULL, attname); 8227 xmlFree(attvalue); 8228 goto failed; 8229 } 8230 } 8231 /* 8232 * Add the pair to atts 8233 */ 8234 if (atts == NULL) { 8235 maxatts = 22; /* allow for 10 attrs by default */ 8236 atts = (const xmlChar **) 8237 xmlMalloc(maxatts * sizeof(xmlChar *)); 8238 if (atts == NULL) { 8239 xmlErrMemory(ctxt, NULL); 8240 if (attvalue != NULL) 8241 xmlFree(attvalue); 8242 goto failed; 8243 } 8244 ctxt->atts = atts; 8245 ctxt->maxatts = maxatts; 8246 } else if (nbatts + 4 > maxatts) { 8247 const xmlChar **n; 8248 8249 maxatts *= 2; 8250 n = (const xmlChar **) xmlRealloc((void *) atts, 8251 maxatts * sizeof(const xmlChar *)); 8252 if (n == NULL) { 8253 xmlErrMemory(ctxt, NULL); 8254 if (attvalue != NULL) 8255 xmlFree(attvalue); 8256 goto failed; 8257 } 8258 atts = n; 8259 ctxt->atts = atts; 8260 ctxt->maxatts = maxatts; 8261 } 8262 atts[nbatts++] = attname; 8263 atts[nbatts++] = attvalue; 8264 atts[nbatts] = NULL; 8265 atts[nbatts + 1] = NULL; 8266 } else { 8267 if (attvalue != NULL) 8268 xmlFree(attvalue); 8269 } 8270 8271 failed: 8272 8273 GROW 8274 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8275 break; 8276 if (!IS_BLANK_CH(RAW)) { 8277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8278 "attributes construct error\n"); 8279 } 8280 SKIP_BLANKS; 8281 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8282 (attname == NULL) && (attvalue == NULL)) { 8283 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8284 "xmlParseStartTag: problem parsing attributes\n"); 8285 break; 8286 } 8287 SHRINK; 8288 GROW; 8289 } 8290 8291 /* 8292 * SAX: Start of Element ! 8293 */ 8294 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8295 (!ctxt->disableSAX)) { 8296 if (nbatts > 0) 8297 ctxt->sax->startElement(ctxt->userData, name, atts); 8298 else 8299 ctxt->sax->startElement(ctxt->userData, name, NULL); 8300 } 8301 8302 if (atts != NULL) { 8303 /* Free only the content strings */ 8304 for (i = 1;i < nbatts;i+=2) 8305 if (atts[i] != NULL) 8306 xmlFree((xmlChar *) atts[i]); 8307 } 8308 return(name); 8309 } 8310 8311 /** 8312 * xmlParseEndTag1: 8313 * @ctxt: an XML parser context 8314 * @line: line of the start tag 8315 * @nsNr: number of namespaces on the start tag 8316 * 8317 * parse an end of tag 8318 * 8319 * [42] ETag ::= '</' Name S? '>' 8320 * 8321 * With namespace 8322 * 8323 * [NS 9] ETag ::= '</' QName S? '>' 8324 */ 8325 8326 static void 8327 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8328 const xmlChar *name; 8329 8330 GROW; 8331 if ((RAW != '<') || (NXT(1) != '/')) { 8332 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8333 "xmlParseEndTag: '</' not found\n"); 8334 return; 8335 } 8336 SKIP(2); 8337 8338 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8339 8340 /* 8341 * We should definitely be at the ending "S? '>'" part 8342 */ 8343 GROW; 8344 SKIP_BLANKS; 8345 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8346 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8347 } else 8348 NEXT1; 8349 8350 /* 8351 * [ WFC: Element Type Match ] 8352 * The Name in an element's end-tag must match the element type in the 8353 * start-tag. 8354 * 8355 */ 8356 if (name != (xmlChar*)1) { 8357 if (name == NULL) name = BAD_CAST "unparseable"; 8358 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8359 "Opening and ending tag mismatch: %s line %d and %s\n", 8360 ctxt->name, line, name); 8361 } 8362 8363 /* 8364 * SAX: End of Tag 8365 */ 8366 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8367 (!ctxt->disableSAX)) 8368 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8369 8370 namePop(ctxt); 8371 spacePop(ctxt); 8372 return; 8373 } 8374 8375 /** 8376 * xmlParseEndTag: 8377 * @ctxt: an XML parser context 8378 * 8379 * parse an end of tag 8380 * 8381 * [42] ETag ::= '</' Name S? '>' 8382 * 8383 * With namespace 8384 * 8385 * [NS 9] ETag ::= '</' QName S? '>' 8386 */ 8387 8388 void 8389 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8390 xmlParseEndTag1(ctxt, 0); 8391 } 8392 #endif /* LIBXML_SAX1_ENABLED */ 8393 8394 /************************************************************************ 8395 * * 8396 * SAX 2 specific operations * 8397 * * 8398 ************************************************************************/ 8399 8400 /* 8401 * xmlGetNamespace: 8402 * @ctxt: an XML parser context 8403 * @prefix: the prefix to lookup 8404 * 8405 * Lookup the namespace name for the @prefix (which ca be NULL) 8406 * The prefix must come from the @ctxt->dict dictionnary 8407 * 8408 * Returns the namespace name or NULL if not bound 8409 */ 8410 static const xmlChar * 8411 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8412 int i; 8413 8414 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8415 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8416 if (ctxt->nsTab[i] == prefix) { 8417 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8418 return(NULL); 8419 return(ctxt->nsTab[i + 1]); 8420 } 8421 return(NULL); 8422 } 8423 8424 /** 8425 * xmlParseQName: 8426 * @ctxt: an XML parser context 8427 * @prefix: pointer to store the prefix part 8428 * 8429 * parse an XML Namespace QName 8430 * 8431 * [6] QName ::= (Prefix ':')? LocalPart 8432 * [7] Prefix ::= NCName 8433 * [8] LocalPart ::= NCName 8434 * 8435 * Returns the Name parsed or NULL 8436 */ 8437 8438 static const xmlChar * 8439 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8440 const xmlChar *l, *p; 8441 8442 GROW; 8443 8444 l = xmlParseNCName(ctxt); 8445 if (l == NULL) { 8446 if (CUR == ':') { 8447 l = xmlParseName(ctxt); 8448 if (l != NULL) { 8449 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8450 "Failed to parse QName '%s'\n", l, NULL, NULL); 8451 *prefix = NULL; 8452 return(l); 8453 } 8454 } 8455 return(NULL); 8456 } 8457 if (CUR == ':') { 8458 NEXT; 8459 p = l; 8460 l = xmlParseNCName(ctxt); 8461 if (l == NULL) { 8462 xmlChar *tmp; 8463 8464 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8465 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8466 l = xmlParseNmtoken(ctxt); 8467 if (l == NULL) 8468 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8469 else { 8470 tmp = xmlBuildQName(l, p, NULL, 0); 8471 xmlFree((char *)l); 8472 } 8473 p = xmlDictLookup(ctxt->dict, tmp, -1); 8474 if (tmp != NULL) xmlFree(tmp); 8475 *prefix = NULL; 8476 return(p); 8477 } 8478 if (CUR == ':') { 8479 xmlChar *tmp; 8480 8481 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8482 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8483 NEXT; 8484 tmp = (xmlChar *) xmlParseName(ctxt); 8485 if (tmp != NULL) { 8486 tmp = xmlBuildQName(tmp, l, NULL, 0); 8487 l = xmlDictLookup(ctxt->dict, tmp, -1); 8488 if (tmp != NULL) xmlFree(tmp); 8489 *prefix = p; 8490 return(l); 8491 } 8492 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8493 l = xmlDictLookup(ctxt->dict, tmp, -1); 8494 if (tmp != NULL) xmlFree(tmp); 8495 *prefix = p; 8496 return(l); 8497 } 8498 *prefix = p; 8499 } else 8500 *prefix = NULL; 8501 return(l); 8502 } 8503 8504 /** 8505 * xmlParseQNameAndCompare: 8506 * @ctxt: an XML parser context 8507 * @name: the localname 8508 * @prefix: the prefix, if any. 8509 * 8510 * parse an XML name and compares for match 8511 * (specialized for endtag parsing) 8512 * 8513 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8514 * and the name for mismatch 8515 */ 8516 8517 static const xmlChar * 8518 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8519 xmlChar const *prefix) { 8520 const xmlChar *cmp; 8521 const xmlChar *in; 8522 const xmlChar *ret; 8523 const xmlChar *prefix2; 8524 8525 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8526 8527 GROW; 8528 in = ctxt->input->cur; 8529 8530 cmp = prefix; 8531 while (*in != 0 && *in == *cmp) { 8532 ++in; 8533 ++cmp; 8534 } 8535 if ((*cmp == 0) && (*in == ':')) { 8536 in++; 8537 cmp = name; 8538 while (*in != 0 && *in == *cmp) { 8539 ++in; 8540 ++cmp; 8541 } 8542 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8543 /* success */ 8544 ctxt->input->cur = in; 8545 return((const xmlChar*) 1); 8546 } 8547 } 8548 /* 8549 * all strings coms from the dictionary, equality can be done directly 8550 */ 8551 ret = xmlParseQName (ctxt, &prefix2); 8552 if ((ret == name) && (prefix == prefix2)) 8553 return((const xmlChar*) 1); 8554 return ret; 8555 } 8556 8557 /** 8558 * xmlParseAttValueInternal: 8559 * @ctxt: an XML parser context 8560 * @len: attribute len result 8561 * @alloc: whether the attribute was reallocated as a new string 8562 * @normalize: if 1 then further non-CDATA normalization must be done 8563 * 8564 * parse a value for an attribute. 8565 * NOTE: if no normalization is needed, the routine will return pointers 8566 * directly from the data buffer. 8567 * 8568 * 3.3.3 Attribute-Value Normalization: 8569 * Before the value of an attribute is passed to the application or 8570 * checked for validity, the XML processor must normalize it as follows: 8571 * - a character reference is processed by appending the referenced 8572 * character to the attribute value 8573 * - an entity reference is processed by recursively processing the 8574 * replacement text of the entity 8575 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8576 * appending #x20 to the normalized value, except that only a single 8577 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8578 * parsed entity or the literal entity value of an internal parsed entity 8579 * - other characters are processed by appending them to the normalized value 8580 * If the declared value is not CDATA, then the XML processor must further 8581 * process the normalized attribute value by discarding any leading and 8582 * trailing space (#x20) characters, and by replacing sequences of space 8583 * (#x20) characters by a single space (#x20) character. 8584 * All attributes for which no declaration has been read should be treated 8585 * by a non-validating parser as if declared CDATA. 8586 * 8587 * Returns the AttValue parsed or NULL. The value has to be freed by the 8588 * caller if it was copied, this can be detected by val[*len] == 0. 8589 */ 8590 8591 static xmlChar * 8592 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8593 int normalize) 8594 { 8595 xmlChar limit = 0; 8596 const xmlChar *in = NULL, *start, *end, *last; 8597 xmlChar *ret = NULL; 8598 8599 GROW; 8600 in = (xmlChar *) CUR_PTR; 8601 if (*in != '"' && *in != '\'') { 8602 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8603 return (NULL); 8604 } 8605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8606 8607 /* 8608 * try to handle in this routine the most common case where no 8609 * allocation of a new string is required and where content is 8610 * pure ASCII. 8611 */ 8612 limit = *in++; 8613 end = ctxt->input->end; 8614 start = in; 8615 if (in >= end) { 8616 const xmlChar *oldbase = ctxt->input->base; 8617 GROW; 8618 if (oldbase != ctxt->input->base) { 8619 long delta = ctxt->input->base - oldbase; 8620 start = start + delta; 8621 in = in + delta; 8622 } 8623 end = ctxt->input->end; 8624 } 8625 if (normalize) { 8626 /* 8627 * Skip any leading spaces 8628 */ 8629 while ((in < end) && (*in != limit) && 8630 ((*in == 0x20) || (*in == 0x9) || 8631 (*in == 0xA) || (*in == 0xD))) { 8632 in++; 8633 start = in; 8634 if (in >= end) { 8635 const xmlChar *oldbase = ctxt->input->base; 8636 GROW; 8637 if (oldbase != ctxt->input->base) { 8638 long delta = ctxt->input->base - oldbase; 8639 start = start + delta; 8640 in = in + delta; 8641 } 8642 end = ctxt->input->end; 8643 } 8644 } 8645 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8646 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8647 if ((*in++ == 0x20) && (*in == 0x20)) break; 8648 if (in >= end) { 8649 const xmlChar *oldbase = ctxt->input->base; 8650 GROW; 8651 if (oldbase != ctxt->input->base) { 8652 long delta = ctxt->input->base - oldbase; 8653 start = start + delta; 8654 in = in + delta; 8655 } 8656 end = ctxt->input->end; 8657 } 8658 } 8659 last = in; 8660 /* 8661 * skip the trailing blanks 8662 */ 8663 while ((last[-1] == 0x20) && (last > start)) last--; 8664 while ((in < end) && (*in != limit) && 8665 ((*in == 0x20) || (*in == 0x9) || 8666 (*in == 0xA) || (*in == 0xD))) { 8667 in++; 8668 if (in >= end) { 8669 const xmlChar *oldbase = ctxt->input->base; 8670 GROW; 8671 if (oldbase != ctxt->input->base) { 8672 long delta = ctxt->input->base - oldbase; 8673 start = start + delta; 8674 in = in + delta; 8675 last = last + delta; 8676 } 8677 end = ctxt->input->end; 8678 } 8679 } 8680 if (*in != limit) goto need_complex; 8681 } else { 8682 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8683 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8684 in++; 8685 if (in >= end) { 8686 const xmlChar *oldbase = ctxt->input->base; 8687 GROW; 8688 if (oldbase != ctxt->input->base) { 8689 long delta = ctxt->input->base - oldbase; 8690 start = start + delta; 8691 in = in + delta; 8692 } 8693 end = ctxt->input->end; 8694 } 8695 } 8696 last = in; 8697 if (*in != limit) goto need_complex; 8698 } 8699 in++; 8700 if (len != NULL) { 8701 *len = last - start; 8702 ret = (xmlChar *) start; 8703 } else { 8704 if (alloc) *alloc = 1; 8705 ret = xmlStrndup(start, last - start); 8706 } 8707 CUR_PTR = in; 8708 if (alloc) *alloc = 0; 8709 return ret; 8710 need_complex: 8711 if (alloc) *alloc = 1; 8712 return xmlParseAttValueComplex(ctxt, len, normalize); 8713 } 8714 8715 /** 8716 * xmlParseAttribute2: 8717 * @ctxt: an XML parser context 8718 * @pref: the element prefix 8719 * @elem: the element name 8720 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8721 * @value: a xmlChar ** used to store the value of the attribute 8722 * @len: an int * to save the length of the attribute 8723 * @alloc: an int * to indicate if the attribute was allocated 8724 * 8725 * parse an attribute in the new SAX2 framework. 8726 * 8727 * Returns the attribute name, and the value in *value, . 8728 */ 8729 8730 static const xmlChar * 8731 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8732 const xmlChar * pref, const xmlChar * elem, 8733 const xmlChar ** prefix, xmlChar ** value, 8734 int *len, int *alloc) 8735 { 8736 const xmlChar *name; 8737 xmlChar *val, *internal_val = NULL; 8738 int normalize = 0; 8739 8740 *value = NULL; 8741 GROW; 8742 name = xmlParseQName(ctxt, prefix); 8743 if (name == NULL) { 8744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8745 "error parsing attribute name\n"); 8746 return (NULL); 8747 } 8748 8749 /* 8750 * get the type if needed 8751 */ 8752 if (ctxt->attsSpecial != NULL) { 8753 int type; 8754 8755 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8756 pref, elem, *prefix, name); 8757 if (type != 0) 8758 normalize = 1; 8759 } 8760 8761 /* 8762 * read the value 8763 */ 8764 SKIP_BLANKS; 8765 if (RAW == '=') { 8766 NEXT; 8767 SKIP_BLANKS; 8768 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8769 if (normalize) { 8770 /* 8771 * Sometimes a second normalisation pass for spaces is needed 8772 * but that only happens if charrefs or entities refernces 8773 * have been used in the attribute value, i.e. the attribute 8774 * value have been extracted in an allocated string already. 8775 */ 8776 if (*alloc) { 8777 const xmlChar *val2; 8778 8779 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8780 if ((val2 != NULL) && (val2 != val)) { 8781 xmlFree(val); 8782 val = (xmlChar *) val2; 8783 } 8784 } 8785 } 8786 ctxt->instate = XML_PARSER_CONTENT; 8787 } else { 8788 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8789 "Specification mandate value for attribute %s\n", 8790 name); 8791 return (NULL); 8792 } 8793 8794 if (*prefix == ctxt->str_xml) { 8795 /* 8796 * Check that xml:lang conforms to the specification 8797 * No more registered as an error, just generate a warning now 8798 * since this was deprecated in XML second edition 8799 */ 8800 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8801 internal_val = xmlStrndup(val, *len); 8802 if (!xmlCheckLanguageID(internal_val)) { 8803 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8804 "Malformed value for xml:lang : %s\n", 8805 internal_val, NULL); 8806 } 8807 } 8808 8809 /* 8810 * Check that xml:space conforms to the specification 8811 */ 8812 if (xmlStrEqual(name, BAD_CAST "space")) { 8813 internal_val = xmlStrndup(val, *len); 8814 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8815 *(ctxt->space) = 0; 8816 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8817 *(ctxt->space) = 1; 8818 else { 8819 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8820 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8821 internal_val, NULL); 8822 } 8823 } 8824 if (internal_val) { 8825 xmlFree(internal_val); 8826 } 8827 } 8828 8829 *value = val; 8830 return (name); 8831 } 8832 /** 8833 * xmlParseStartTag2: 8834 * @ctxt: an XML parser context 8835 * 8836 * parse a start of tag either for rule element or 8837 * EmptyElement. In both case we don't parse the tag closing chars. 8838 * This routine is called when running SAX2 parsing 8839 * 8840 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8841 * 8842 * [ WFC: Unique Att Spec ] 8843 * No attribute name may appear more than once in the same start-tag or 8844 * empty-element tag. 8845 * 8846 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8847 * 8848 * [ WFC: Unique Att Spec ] 8849 * No attribute name may appear more than once in the same start-tag or 8850 * empty-element tag. 8851 * 8852 * With namespace: 8853 * 8854 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8855 * 8856 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8857 * 8858 * Returns the element name parsed 8859 */ 8860 8861 static const xmlChar * 8862 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8863 const xmlChar **URI, int *tlen) { 8864 const xmlChar *localname; 8865 const xmlChar *prefix; 8866 const xmlChar *attname; 8867 const xmlChar *aprefix; 8868 const xmlChar *nsname; 8869 xmlChar *attvalue; 8870 const xmlChar **atts = ctxt->atts; 8871 int maxatts = ctxt->maxatts; 8872 int nratts, nbatts, nbdef; 8873 int i, j, nbNs, attval, oldline, oldcol; 8874 const xmlChar *base; 8875 unsigned long cur; 8876 int nsNr = ctxt->nsNr; 8877 8878 if (RAW != '<') return(NULL); 8879 NEXT1; 8880 8881 /* 8882 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8883 * point since the attribute values may be stored as pointers to 8884 * the buffer and calling SHRINK would destroy them ! 8885 * The Shrinking is only possible once the full set of attribute 8886 * callbacks have been done. 8887 */ 8888 reparse: 8889 SHRINK; 8890 base = ctxt->input->base; 8891 cur = ctxt->input->cur - ctxt->input->base; 8892 oldline = ctxt->input->line; 8893 oldcol = ctxt->input->col; 8894 nbatts = 0; 8895 nratts = 0; 8896 nbdef = 0; 8897 nbNs = 0; 8898 attval = 0; 8899 /* Forget any namespaces added during an earlier parse of this element. */ 8900 ctxt->nsNr = nsNr; 8901 8902 localname = xmlParseQName(ctxt, &prefix); 8903 if (localname == NULL) { 8904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8905 "StartTag: invalid element name\n"); 8906 return(NULL); 8907 } 8908 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8909 8910 /* 8911 * Now parse the attributes, it ends up with the ending 8912 * 8913 * (S Attribute)* S? 8914 */ 8915 SKIP_BLANKS; 8916 GROW; 8917 if (ctxt->input->base != base) goto base_changed; 8918 8919 while ((RAW != '>') && 8920 ((RAW != '/') || (NXT(1) != '>')) && 8921 (IS_BYTE_CHAR(RAW))) { 8922 const xmlChar *q = CUR_PTR; 8923 unsigned int cons = ctxt->input->consumed; 8924 int len = -1, alloc = 0; 8925 8926 attname = xmlParseAttribute2(ctxt, prefix, localname, 8927 &aprefix, &attvalue, &len, &alloc); 8928 if (ctxt->input->base != base) { 8929 if ((attvalue != NULL) && (alloc != 0)) 8930 xmlFree(attvalue); 8931 attvalue = NULL; 8932 goto base_changed; 8933 } 8934 if ((attname != NULL) && (attvalue != NULL)) { 8935 if (len < 0) len = xmlStrlen(attvalue); 8936 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8937 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8938 xmlURIPtr uri; 8939 8940 if (*URL != 0) { 8941 uri = xmlParseURI((const char *) URL); 8942 if (uri == NULL) { 8943 xmlNsErr(ctxt, XML_WAR_NS_URI, 8944 "xmlns: '%s' is not a valid URI\n", 8945 URL, NULL, NULL); 8946 } else { 8947 if (uri->scheme == NULL) { 8948 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8949 "xmlns: URI %s is not absolute\n", 8950 URL, NULL, NULL); 8951 } 8952 xmlFreeURI(uri); 8953 } 8954 if (URL == ctxt->str_xml_ns) { 8955 if (attname != ctxt->str_xml) { 8956 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8957 "xml namespace URI cannot be the default namespace\n", 8958 NULL, NULL, NULL); 8959 } 8960 goto skip_default_ns; 8961 } 8962 if ((len == 29) && 8963 (xmlStrEqual(URL, 8964 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8965 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8966 "reuse of the xmlns namespace name is forbidden\n", 8967 NULL, NULL, NULL); 8968 goto skip_default_ns; 8969 } 8970 } 8971 /* 8972 * check that it's not a defined namespace 8973 */ 8974 for (j = 1;j <= nbNs;j++) 8975 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8976 break; 8977 if (j <= nbNs) 8978 xmlErrAttributeDup(ctxt, NULL, attname); 8979 else 8980 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8981 skip_default_ns: 8982 if (alloc != 0) xmlFree(attvalue); 8983 SKIP_BLANKS; 8984 continue; 8985 } 8986 if (aprefix == ctxt->str_xmlns) { 8987 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8988 xmlURIPtr uri; 8989 8990 if (attname == ctxt->str_xml) { 8991 if (URL != ctxt->str_xml_ns) { 8992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8993 "xml namespace prefix mapped to wrong URI\n", 8994 NULL, NULL, NULL); 8995 } 8996 /* 8997 * Do not keep a namespace definition node 8998 */ 8999 goto skip_ns; 9000 } 9001 if (URL == ctxt->str_xml_ns) { 9002 if (attname != ctxt->str_xml) { 9003 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9004 "xml namespace URI mapped to wrong prefix\n", 9005 NULL, NULL, NULL); 9006 } 9007 goto skip_ns; 9008 } 9009 if (attname == ctxt->str_xmlns) { 9010 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9011 "redefinition of the xmlns prefix is forbidden\n", 9012 NULL, NULL, NULL); 9013 goto skip_ns; 9014 } 9015 if ((len == 29) && 9016 (xmlStrEqual(URL, 9017 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9018 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9019 "reuse of the xmlns namespace name is forbidden\n", 9020 NULL, NULL, NULL); 9021 goto skip_ns; 9022 } 9023 if ((URL == NULL) || (URL[0] == 0)) { 9024 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9025 "xmlns:%s: Empty XML namespace is not allowed\n", 9026 attname, NULL, NULL); 9027 goto skip_ns; 9028 } else { 9029 uri = xmlParseURI((const char *) URL); 9030 if (uri == NULL) { 9031 xmlNsErr(ctxt, XML_WAR_NS_URI, 9032 "xmlns:%s: '%s' is not a valid URI\n", 9033 attname, URL, NULL); 9034 } else { 9035 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9036 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9037 "xmlns:%s: URI %s is not absolute\n", 9038 attname, URL, NULL); 9039 } 9040 xmlFreeURI(uri); 9041 } 9042 } 9043 9044 /* 9045 * check that it's not a defined namespace 9046 */ 9047 for (j = 1;j <= nbNs;j++) 9048 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9049 break; 9050 if (j <= nbNs) 9051 xmlErrAttributeDup(ctxt, aprefix, attname); 9052 else 9053 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9054 skip_ns: 9055 if (alloc != 0) xmlFree(attvalue); 9056 SKIP_BLANKS; 9057 if (ctxt->input->base != base) goto base_changed; 9058 continue; 9059 } 9060 9061 /* 9062 * Add the pair to atts 9063 */ 9064 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9065 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9066 if (attvalue[len] == 0) 9067 xmlFree(attvalue); 9068 goto failed; 9069 } 9070 maxatts = ctxt->maxatts; 9071 atts = ctxt->atts; 9072 } 9073 ctxt->attallocs[nratts++] = alloc; 9074 atts[nbatts++] = attname; 9075 atts[nbatts++] = aprefix; 9076 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9077 atts[nbatts++] = attvalue; 9078 attvalue += len; 9079 atts[nbatts++] = attvalue; 9080 /* 9081 * tag if some deallocation is needed 9082 */ 9083 if (alloc != 0) attval = 1; 9084 } else { 9085 if ((attvalue != NULL) && (attvalue[len] == 0)) 9086 xmlFree(attvalue); 9087 } 9088 9089 failed: 9090 9091 GROW 9092 if (ctxt->input->base != base) goto base_changed; 9093 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9094 break; 9095 if (!IS_BLANK_CH(RAW)) { 9096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9097 "attributes construct error\n"); 9098 break; 9099 } 9100 SKIP_BLANKS; 9101 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9102 (attname == NULL) && (attvalue == NULL)) { 9103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9104 "xmlParseStartTag: problem parsing attributes\n"); 9105 break; 9106 } 9107 GROW; 9108 if (ctxt->input->base != base) goto base_changed; 9109 } 9110 9111 /* 9112 * The attributes defaulting 9113 */ 9114 if (ctxt->attsDefault != NULL) { 9115 xmlDefAttrsPtr defaults; 9116 9117 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9118 if (defaults != NULL) { 9119 for (i = 0;i < defaults->nbAttrs;i++) { 9120 attname = defaults->values[5 * i]; 9121 aprefix = defaults->values[5 * i + 1]; 9122 9123 /* 9124 * special work for namespaces defaulted defs 9125 */ 9126 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9127 /* 9128 * check that it's not a defined namespace 9129 */ 9130 for (j = 1;j <= nbNs;j++) 9131 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9132 break; 9133 if (j <= nbNs) continue; 9134 9135 nsname = xmlGetNamespace(ctxt, NULL); 9136 if (nsname != defaults->values[5 * i + 2]) { 9137 if (nsPush(ctxt, NULL, 9138 defaults->values[5 * i + 2]) > 0) 9139 nbNs++; 9140 } 9141 } else if (aprefix == ctxt->str_xmlns) { 9142 /* 9143 * check that it's not a defined namespace 9144 */ 9145 for (j = 1;j <= nbNs;j++) 9146 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9147 break; 9148 if (j <= nbNs) continue; 9149 9150 nsname = xmlGetNamespace(ctxt, attname); 9151 if (nsname != defaults->values[2]) { 9152 if (nsPush(ctxt, attname, 9153 defaults->values[5 * i + 2]) > 0) 9154 nbNs++; 9155 } 9156 } else { 9157 /* 9158 * check that it's not a defined attribute 9159 */ 9160 for (j = 0;j < nbatts;j+=5) { 9161 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9162 break; 9163 } 9164 if (j < nbatts) continue; 9165 9166 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9167 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9168 return(NULL); 9169 } 9170 maxatts = ctxt->maxatts; 9171 atts = ctxt->atts; 9172 } 9173 atts[nbatts++] = attname; 9174 atts[nbatts++] = aprefix; 9175 if (aprefix == NULL) 9176 atts[nbatts++] = NULL; 9177 else 9178 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9179 atts[nbatts++] = defaults->values[5 * i + 2]; 9180 atts[nbatts++] = defaults->values[5 * i + 3]; 9181 if ((ctxt->standalone == 1) && 9182 (defaults->values[5 * i + 4] != NULL)) { 9183 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9184 "standalone: attribute %s on %s defaulted from external subset\n", 9185 attname, localname); 9186 } 9187 nbdef++; 9188 } 9189 } 9190 } 9191 } 9192 9193 /* 9194 * The attributes checkings 9195 */ 9196 for (i = 0; i < nbatts;i += 5) { 9197 /* 9198 * The default namespace does not apply to attribute names. 9199 */ 9200 if (atts[i + 1] != NULL) { 9201 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9202 if (nsname == NULL) { 9203 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9204 "Namespace prefix %s for %s on %s is not defined\n", 9205 atts[i + 1], atts[i], localname); 9206 } 9207 atts[i + 2] = nsname; 9208 } else 9209 nsname = NULL; 9210 /* 9211 * [ WFC: Unique Att Spec ] 9212 * No attribute name may appear more than once in the same 9213 * start-tag or empty-element tag. 9214 * As extended by the Namespace in XML REC. 9215 */ 9216 for (j = 0; j < i;j += 5) { 9217 if (atts[i] == atts[j]) { 9218 if (atts[i+1] == atts[j+1]) { 9219 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9220 break; 9221 } 9222 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9223 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9224 "Namespaced Attribute %s in '%s' redefined\n", 9225 atts[i], nsname, NULL); 9226 break; 9227 } 9228 } 9229 } 9230 } 9231 9232 nsname = xmlGetNamespace(ctxt, prefix); 9233 if ((prefix != NULL) && (nsname == NULL)) { 9234 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9235 "Namespace prefix %s on %s is not defined\n", 9236 prefix, localname, NULL); 9237 } 9238 *pref = prefix; 9239 *URI = nsname; 9240 9241 /* 9242 * SAX: Start of Element ! 9243 */ 9244 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9245 (!ctxt->disableSAX)) { 9246 if (nbNs > 0) 9247 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9248 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9249 nbatts / 5, nbdef, atts); 9250 else 9251 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9252 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9253 } 9254 9255 /* 9256 * Free up attribute allocated strings if needed 9257 */ 9258 if (attval != 0) { 9259 for (i = 3,j = 0; j < nratts;i += 5,j++) 9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9261 xmlFree((xmlChar *) atts[i]); 9262 } 9263 9264 return(localname); 9265 9266 base_changed: 9267 /* 9268 * the attribute strings are valid iif the base didn't changed 9269 */ 9270 if (attval != 0) { 9271 for (i = 3,j = 0; j < nratts;i += 5,j++) 9272 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9273 xmlFree((xmlChar *) atts[i]); 9274 } 9275 ctxt->input->cur = ctxt->input->base + cur; 9276 ctxt->input->line = oldline; 9277 ctxt->input->col = oldcol; 9278 if (ctxt->wellFormed == 1) { 9279 goto reparse; 9280 } 9281 return(NULL); 9282 } 9283 9284 /** 9285 * xmlParseEndTag2: 9286 * @ctxt: an XML parser context 9287 * @line: line of the start tag 9288 * @nsNr: number of namespaces on the start tag 9289 * 9290 * parse an end of tag 9291 * 9292 * [42] ETag ::= '</' Name S? '>' 9293 * 9294 * With namespace 9295 * 9296 * [NS 9] ETag ::= '</' QName S? '>' 9297 */ 9298 9299 static void 9300 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9301 const xmlChar *URI, int line, int nsNr, int tlen) { 9302 const xmlChar *name; 9303 9304 GROW; 9305 if ((RAW != '<') || (NXT(1) != '/')) { 9306 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9307 return; 9308 } 9309 SKIP(2); 9310 9311 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9312 if (ctxt->input->cur[tlen] == '>') { 9313 ctxt->input->cur += tlen + 1; 9314 goto done; 9315 } 9316 ctxt->input->cur += tlen; 9317 name = (xmlChar*)1; 9318 } else { 9319 if (prefix == NULL) 9320 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9321 else 9322 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9323 } 9324 9325 /* 9326 * We should definitely be at the ending "S? '>'" part 9327 */ 9328 GROW; 9329 SKIP_BLANKS; 9330 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9331 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9332 } else 9333 NEXT1; 9334 9335 /* 9336 * [ WFC: Element Type Match ] 9337 * The Name in an element's end-tag must match the element type in the 9338 * start-tag. 9339 * 9340 */ 9341 if (name != (xmlChar*)1) { 9342 if (name == NULL) name = BAD_CAST "unparseable"; 9343 if ((line == 0) && (ctxt->node != NULL)) 9344 line = ctxt->node->line; 9345 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9346 "Opening and ending tag mismatch: %s line %d and %s\n", 9347 ctxt->name, line, name); 9348 } 9349 9350 /* 9351 * SAX: End of Tag 9352 */ 9353 done: 9354 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9355 (!ctxt->disableSAX)) 9356 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9357 9358 spacePop(ctxt); 9359 if (nsNr != 0) 9360 nsPop(ctxt, nsNr); 9361 return; 9362 } 9363 9364 /** 9365 * xmlParseCDSect: 9366 * @ctxt: an XML parser context 9367 * 9368 * Parse escaped pure raw content. 9369 * 9370 * [18] CDSect ::= CDStart CData CDEnd 9371 * 9372 * [19] CDStart ::= '<![CDATA[' 9373 * 9374 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9375 * 9376 * [21] CDEnd ::= ']]>' 9377 */ 9378 void 9379 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9380 xmlChar *buf = NULL; 9381 int len = 0; 9382 int size = XML_PARSER_BUFFER_SIZE; 9383 int r, rl; 9384 int s, sl; 9385 int cur, l; 9386 int count = 0; 9387 9388 /* Check 2.6.0 was NXT(0) not RAW */ 9389 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9390 SKIP(9); 9391 } else 9392 return; 9393 9394 ctxt->instate = XML_PARSER_CDATA_SECTION; 9395 r = CUR_CHAR(rl); 9396 if (!IS_CHAR(r)) { 9397 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9398 ctxt->instate = XML_PARSER_CONTENT; 9399 return; 9400 } 9401 NEXTL(rl); 9402 s = CUR_CHAR(sl); 9403 if (!IS_CHAR(s)) { 9404 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9405 ctxt->instate = XML_PARSER_CONTENT; 9406 return; 9407 } 9408 NEXTL(sl); 9409 cur = CUR_CHAR(l); 9410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9411 if (buf == NULL) { 9412 xmlErrMemory(ctxt, NULL); 9413 return; 9414 } 9415 while (IS_CHAR(cur) && 9416 ((r != ']') || (s != ']') || (cur != '>'))) { 9417 if (len + 5 >= size) { 9418 xmlChar *tmp; 9419 9420 size *= 2; 9421 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9422 if (tmp == NULL) { 9423 xmlFree(buf); 9424 xmlErrMemory(ctxt, NULL); 9425 return; 9426 } 9427 buf = tmp; 9428 } 9429 COPY_BUF(rl,buf,len,r); 9430 r = s; 9431 rl = sl; 9432 s = cur; 9433 sl = l; 9434 count++; 9435 if (count > 50) { 9436 GROW; 9437 count = 0; 9438 } 9439 NEXTL(l); 9440 cur = CUR_CHAR(l); 9441 } 9442 buf[len] = 0; 9443 ctxt->instate = XML_PARSER_CONTENT; 9444 if (cur != '>') { 9445 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9446 "CData section not finished\n%.50s\n", buf); 9447 xmlFree(buf); 9448 return; 9449 } 9450 NEXTL(l); 9451 9452 /* 9453 * OK the buffer is to be consumed as cdata. 9454 */ 9455 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9456 if (ctxt->sax->cdataBlock != NULL) 9457 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9458 else if (ctxt->sax->characters != NULL) 9459 ctxt->sax->characters(ctxt->userData, buf, len); 9460 } 9461 xmlFree(buf); 9462 } 9463 9464 /** 9465 * xmlParseContent: 9466 * @ctxt: an XML parser context 9467 * 9468 * Parse a content: 9469 * 9470 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9471 */ 9472 9473 void 9474 xmlParseContent(xmlParserCtxtPtr ctxt) { 9475 GROW; 9476 while ((RAW != 0) && 9477 ((RAW != '<') || (NXT(1) != '/')) && 9478 (ctxt->instate != XML_PARSER_EOF)) { 9479 const xmlChar *test = CUR_PTR; 9480 unsigned int cons = ctxt->input->consumed; 9481 const xmlChar *cur = ctxt->input->cur; 9482 9483 /* 9484 * First case : a Processing Instruction. 9485 */ 9486 if ((*cur == '<') && (cur[1] == '?')) { 9487 xmlParsePI(ctxt); 9488 } 9489 9490 /* 9491 * Second case : a CDSection 9492 */ 9493 /* 2.6.0 test was *cur not RAW */ 9494 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9495 xmlParseCDSect(ctxt); 9496 } 9497 9498 /* 9499 * Third case : a comment 9500 */ 9501 else if ((*cur == '<') && (NXT(1) == '!') && 9502 (NXT(2) == '-') && (NXT(3) == '-')) { 9503 xmlParseComment(ctxt); 9504 ctxt->instate = XML_PARSER_CONTENT; 9505 } 9506 9507 /* 9508 * Fourth case : a sub-element. 9509 */ 9510 else if (*cur == '<') { 9511 xmlParseElement(ctxt); 9512 } 9513 9514 /* 9515 * Fifth case : a reference. If if has not been resolved, 9516 * parsing returns it's Name, create the node 9517 */ 9518 9519 else if (*cur == '&') { 9520 xmlParseReference(ctxt); 9521 } 9522 9523 /* 9524 * Last case, text. Note that References are handled directly. 9525 */ 9526 else { 9527 xmlParseCharData(ctxt, 0); 9528 } 9529 9530 GROW; 9531 /* 9532 * Pop-up of finished entities. 9533 */ 9534 while ((RAW == 0) && (ctxt->inputNr > 1)) 9535 xmlPopInput(ctxt); 9536 SHRINK; 9537 9538 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9540 "detected an error in element content\n"); 9541 ctxt->instate = XML_PARSER_EOF; 9542 break; 9543 } 9544 } 9545 } 9546 9547 /** 9548 * xmlParseElement: 9549 * @ctxt: an XML parser context 9550 * 9551 * parse an XML element, this is highly recursive 9552 * 9553 * [39] element ::= EmptyElemTag | STag content ETag 9554 * 9555 * [ WFC: Element Type Match ] 9556 * The Name in an element's end-tag must match the element type in the 9557 * start-tag. 9558 * 9559 */ 9560 9561 void 9562 xmlParseElement(xmlParserCtxtPtr ctxt) { 9563 const xmlChar *name; 9564 const xmlChar *prefix = NULL; 9565 const xmlChar *URI = NULL; 9566 xmlParserNodeInfo node_info; 9567 int line, tlen; 9568 xmlNodePtr ret; 9569 int nsNr = ctxt->nsNr; 9570 9571 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9572 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9573 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9574 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9575 xmlParserMaxDepth); 9576 ctxt->instate = XML_PARSER_EOF; 9577 return; 9578 } 9579 9580 /* Capture start position */ 9581 if (ctxt->record_info) { 9582 node_info.begin_pos = ctxt->input->consumed + 9583 (CUR_PTR - ctxt->input->base); 9584 node_info.begin_line = ctxt->input->line; 9585 } 9586 9587 if (ctxt->spaceNr == 0) 9588 spacePush(ctxt, -1); 9589 else if (*ctxt->space == -2) 9590 spacePush(ctxt, -1); 9591 else 9592 spacePush(ctxt, *ctxt->space); 9593 9594 line = ctxt->input->line; 9595 #ifdef LIBXML_SAX1_ENABLED 9596 if (ctxt->sax2) 9597 #endif /* LIBXML_SAX1_ENABLED */ 9598 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9599 #ifdef LIBXML_SAX1_ENABLED 9600 else 9601 name = xmlParseStartTag(ctxt); 9602 #endif /* LIBXML_SAX1_ENABLED */ 9603 if (ctxt->instate == XML_PARSER_EOF) 9604 return; 9605 if (name == NULL) { 9606 spacePop(ctxt); 9607 return; 9608 } 9609 namePush(ctxt, name); 9610 ret = ctxt->node; 9611 9612 #ifdef LIBXML_VALID_ENABLED 9613 /* 9614 * [ VC: Root Element Type ] 9615 * The Name in the document type declaration must match the element 9616 * type of the root element. 9617 */ 9618 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9619 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9620 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9621 #endif /* LIBXML_VALID_ENABLED */ 9622 9623 /* 9624 * Check for an Empty Element. 9625 */ 9626 if ((RAW == '/') && (NXT(1) == '>')) { 9627 SKIP(2); 9628 if (ctxt->sax2) { 9629 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9630 (!ctxt->disableSAX)) 9631 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9632 #ifdef LIBXML_SAX1_ENABLED 9633 } else { 9634 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9635 (!ctxt->disableSAX)) 9636 ctxt->sax->endElement(ctxt->userData, name); 9637 #endif /* LIBXML_SAX1_ENABLED */ 9638 } 9639 namePop(ctxt); 9640 spacePop(ctxt); 9641 if (nsNr != ctxt->nsNr) 9642 nsPop(ctxt, ctxt->nsNr - nsNr); 9643 if ( ret != NULL && ctxt->record_info ) { 9644 node_info.end_pos = ctxt->input->consumed + 9645 (CUR_PTR - ctxt->input->base); 9646 node_info.end_line = ctxt->input->line; 9647 node_info.node = ret; 9648 xmlParserAddNodeInfo(ctxt, &node_info); 9649 } 9650 return; 9651 } 9652 if (RAW == '>') { 9653 NEXT1; 9654 } else { 9655 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9656 "Couldn't find end of Start Tag %s line %d\n", 9657 name, line, NULL); 9658 9659 /* 9660 * end of parsing of this node. 9661 */ 9662 nodePop(ctxt); 9663 namePop(ctxt); 9664 spacePop(ctxt); 9665 if (nsNr != ctxt->nsNr) 9666 nsPop(ctxt, ctxt->nsNr - nsNr); 9667 9668 /* 9669 * Capture end position and add node 9670 */ 9671 if ( ret != NULL && ctxt->record_info ) { 9672 node_info.end_pos = ctxt->input->consumed + 9673 (CUR_PTR - ctxt->input->base); 9674 node_info.end_line = ctxt->input->line; 9675 node_info.node = ret; 9676 xmlParserAddNodeInfo(ctxt, &node_info); 9677 } 9678 return; 9679 } 9680 9681 /* 9682 * Parse the content of the element: 9683 */ 9684 xmlParseContent(ctxt); 9685 if (!IS_BYTE_CHAR(RAW)) { 9686 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9687 "Premature end of data in tag %s line %d\n", 9688 name, line, NULL); 9689 9690 /* 9691 * end of parsing of this node. 9692 */ 9693 nodePop(ctxt); 9694 namePop(ctxt); 9695 spacePop(ctxt); 9696 if (nsNr != ctxt->nsNr) 9697 nsPop(ctxt, ctxt->nsNr - nsNr); 9698 return; 9699 } 9700 9701 /* 9702 * parse the end of tag: '</' should be here. 9703 */ 9704 if (ctxt->sax2) { 9705 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9706 namePop(ctxt); 9707 } 9708 #ifdef LIBXML_SAX1_ENABLED 9709 else 9710 xmlParseEndTag1(ctxt, line); 9711 #endif /* LIBXML_SAX1_ENABLED */ 9712 9713 /* 9714 * Capture end position and add node 9715 */ 9716 if ( ret != NULL && ctxt->record_info ) { 9717 node_info.end_pos = ctxt->input->consumed + 9718 (CUR_PTR - ctxt->input->base); 9719 node_info.end_line = ctxt->input->line; 9720 node_info.node = ret; 9721 xmlParserAddNodeInfo(ctxt, &node_info); 9722 } 9723 } 9724 9725 /** 9726 * xmlParseVersionNum: 9727 * @ctxt: an XML parser context 9728 * 9729 * parse the XML version value. 9730 * 9731 * [26] VersionNum ::= '1.' [0-9]+ 9732 * 9733 * In practice allow [0-9].[0-9]+ at that level 9734 * 9735 * Returns the string giving the XML version number, or NULL 9736 */ 9737 xmlChar * 9738 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9739 xmlChar *buf = NULL; 9740 int len = 0; 9741 int size = 10; 9742 xmlChar cur; 9743 9744 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9745 if (buf == NULL) { 9746 xmlErrMemory(ctxt, NULL); 9747 return(NULL); 9748 } 9749 cur = CUR; 9750 if (!((cur >= '0') && (cur <= '9'))) { 9751 xmlFree(buf); 9752 return(NULL); 9753 } 9754 buf[len++] = cur; 9755 NEXT; 9756 cur=CUR; 9757 if (cur != '.') { 9758 xmlFree(buf); 9759 return(NULL); 9760 } 9761 buf[len++] = cur; 9762 NEXT; 9763 cur=CUR; 9764 while ((cur >= '0') && (cur <= '9')) { 9765 if (len + 1 >= size) { 9766 xmlChar *tmp; 9767 9768 size *= 2; 9769 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9770 if (tmp == NULL) { 9771 xmlFree(buf); 9772 xmlErrMemory(ctxt, NULL); 9773 return(NULL); 9774 } 9775 buf = tmp; 9776 } 9777 buf[len++] = cur; 9778 NEXT; 9779 cur=CUR; 9780 } 9781 buf[len] = 0; 9782 return(buf); 9783 } 9784 9785 /** 9786 * xmlParseVersionInfo: 9787 * @ctxt: an XML parser context 9788 * 9789 * parse the XML version. 9790 * 9791 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9792 * 9793 * [25] Eq ::= S? '=' S? 9794 * 9795 * Returns the version string, e.g. "1.0" 9796 */ 9797 9798 xmlChar * 9799 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9800 xmlChar *version = NULL; 9801 9802 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9803 SKIP(7); 9804 SKIP_BLANKS; 9805 if (RAW != '=') { 9806 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9807 return(NULL); 9808 } 9809 NEXT; 9810 SKIP_BLANKS; 9811 if (RAW == '"') { 9812 NEXT; 9813 version = xmlParseVersionNum(ctxt); 9814 if (RAW != '"') { 9815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9816 } else 9817 NEXT; 9818 } else if (RAW == '\''){ 9819 NEXT; 9820 version = xmlParseVersionNum(ctxt); 9821 if (RAW != '\'') { 9822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9823 } else 9824 NEXT; 9825 } else { 9826 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9827 } 9828 } 9829 return(version); 9830 } 9831 9832 /** 9833 * xmlParseEncName: 9834 * @ctxt: an XML parser context 9835 * 9836 * parse the XML encoding name 9837 * 9838 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9839 * 9840 * Returns the encoding name value or NULL 9841 */ 9842 xmlChar * 9843 xmlParseEncName(xmlParserCtxtPtr ctxt) { 9844 xmlChar *buf = NULL; 9845 int len = 0; 9846 int size = 10; 9847 xmlChar cur; 9848 9849 cur = CUR; 9850 if (((cur >= 'a') && (cur <= 'z')) || 9851 ((cur >= 'A') && (cur <= 'Z'))) { 9852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9853 if (buf == NULL) { 9854 xmlErrMemory(ctxt, NULL); 9855 return(NULL); 9856 } 9857 9858 buf[len++] = cur; 9859 NEXT; 9860 cur = CUR; 9861 while (((cur >= 'a') && (cur <= 'z')) || 9862 ((cur >= 'A') && (cur <= 'Z')) || 9863 ((cur >= '0') && (cur <= '9')) || 9864 (cur == '.') || (cur == '_') || 9865 (cur == '-')) { 9866 if (len + 1 >= size) { 9867 xmlChar *tmp; 9868 9869 size *= 2; 9870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9871 if (tmp == NULL) { 9872 xmlErrMemory(ctxt, NULL); 9873 xmlFree(buf); 9874 return(NULL); 9875 } 9876 buf = tmp; 9877 } 9878 buf[len++] = cur; 9879 NEXT; 9880 cur = CUR; 9881 if (cur == 0) { 9882 SHRINK; 9883 GROW; 9884 cur = CUR; 9885 } 9886 } 9887 buf[len] = 0; 9888 } else { 9889 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9890 } 9891 return(buf); 9892 } 9893 9894 /** 9895 * xmlParseEncodingDecl: 9896 * @ctxt: an XML parser context 9897 * 9898 * parse the XML encoding declaration 9899 * 9900 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9901 * 9902 * this setups the conversion filters. 9903 * 9904 * Returns the encoding value or NULL 9905 */ 9906 9907 const xmlChar * 9908 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9909 xmlChar *encoding = NULL; 9910 9911 SKIP_BLANKS; 9912 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9913 SKIP(8); 9914 SKIP_BLANKS; 9915 if (RAW != '=') { 9916 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9917 return(NULL); 9918 } 9919 NEXT; 9920 SKIP_BLANKS; 9921 if (RAW == '"') { 9922 NEXT; 9923 encoding = xmlParseEncName(ctxt); 9924 if (RAW != '"') { 9925 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9926 } else 9927 NEXT; 9928 } else if (RAW == '\''){ 9929 NEXT; 9930 encoding = xmlParseEncName(ctxt); 9931 if (RAW != '\'') { 9932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9933 } else 9934 NEXT; 9935 } else { 9936 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9937 } 9938 9939 /* 9940 * Non standard parsing, allowing the user to ignore encoding 9941 */ 9942 if (ctxt->options & XML_PARSE_IGNORE_ENC) 9943 return(encoding); 9944 9945 /* 9946 * UTF-16 encoding stwich has already taken place at this stage, 9947 * more over the little-endian/big-endian selection is already done 9948 */ 9949 if ((encoding != NULL) && 9950 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9951 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9952 /* 9953 * If no encoding was passed to the parser, that we are 9954 * using UTF-16 and no decoder is present i.e. the 9955 * document is apparently UTF-8 compatible, then raise an 9956 * encoding mismatch fatal error 9957 */ 9958 if ((ctxt->encoding == NULL) && 9959 (ctxt->input->buf != NULL) && 9960 (ctxt->input->buf->encoder == NULL)) { 9961 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9962 "Document labelled UTF-16 but has UTF-8 content\n"); 9963 } 9964 if (ctxt->encoding != NULL) 9965 xmlFree((xmlChar *) ctxt->encoding); 9966 ctxt->encoding = encoding; 9967 } 9968 /* 9969 * UTF-8 encoding is handled natively 9970 */ 9971 else if ((encoding != NULL) && 9972 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9973 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9974 if (ctxt->encoding != NULL) 9975 xmlFree((xmlChar *) ctxt->encoding); 9976 ctxt->encoding = encoding; 9977 } 9978 else if (encoding != NULL) { 9979 xmlCharEncodingHandlerPtr handler; 9980 9981 if (ctxt->input->encoding != NULL) 9982 xmlFree((xmlChar *) ctxt->input->encoding); 9983 ctxt->input->encoding = encoding; 9984 9985 handler = xmlFindCharEncodingHandler((const char *) encoding); 9986 if (handler != NULL) { 9987 xmlSwitchToEncoding(ctxt, handler); 9988 } else { 9989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9990 "Unsupported encoding %s\n", encoding); 9991 return(NULL); 9992 } 9993 } 9994 } 9995 return(encoding); 9996 } 9997 9998 /** 9999 * xmlParseSDDecl: 10000 * @ctxt: an XML parser context 10001 * 10002 * parse the XML standalone declaration 10003 * 10004 * [32] SDDecl ::= S 'standalone' Eq 10005 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10006 * 10007 * [ VC: Standalone Document Declaration ] 10008 * TODO The standalone document declaration must have the value "no" 10009 * if any external markup declarations contain declarations of: 10010 * - attributes with default values, if elements to which these 10011 * attributes apply appear in the document without specifications 10012 * of values for these attributes, or 10013 * - entities (other than amp, lt, gt, apos, quot), if references 10014 * to those entities appear in the document, or 10015 * - attributes with values subject to normalization, where the 10016 * attribute appears in the document with a value which will change 10017 * as a result of normalization, or 10018 * - element types with element content, if white space occurs directly 10019 * within any instance of those types. 10020 * 10021 * Returns: 10022 * 1 if standalone="yes" 10023 * 0 if standalone="no" 10024 * -2 if standalone attribute is missing or invalid 10025 * (A standalone value of -2 means that the XML declaration was found, 10026 * but no value was specified for the standalone attribute). 10027 */ 10028 10029 int 10030 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10031 int standalone = -2; 10032 10033 SKIP_BLANKS; 10034 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10035 SKIP(10); 10036 SKIP_BLANKS; 10037 if (RAW != '=') { 10038 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10039 return(standalone); 10040 } 10041 NEXT; 10042 SKIP_BLANKS; 10043 if (RAW