1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel (at) veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(WIN32) && !defined (__CYGWIN__) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #ifdef HAVE_CTYPE_H 21 #include <ctype.h> 22 #endif 23 #ifdef HAVE_STDLIB_H 24 #include <stdlib.h> 25 #endif 26 #ifdef HAVE_SYS_STAT_H 27 #include <sys/stat.h> 28 #endif 29 #ifdef HAVE_FCNTL_H 30 #include <fcntl.h> 31 #endif 32 #ifdef HAVE_UNISTD_H 33 #include <unistd.h> 34 #endif 35 #ifdef HAVE_ZLIB_H 36 #include <zlib.h> 37 #endif 38 39 #include <libxml/xmlmemory.h> 40 #include <libxml/tree.h> 41 #include <libxml/parser.h> 42 #include <libxml/parserInternals.h> 43 #include <libxml/valid.h> 44 #include <libxml/entities.h> 45 #include <libxml/xmlerror.h> 46 #include <libxml/encoding.h> 47 #include <libxml/valid.h> 48 #include <libxml/xmlIO.h> 49 #include <libxml/uri.h> 50 #include <libxml/dict.h> 51 #include <libxml/SAX.h> 52 #ifdef LIBXML_CATALOG_ENABLED 53 #include <libxml/catalog.h> 54 #endif 55 #include <libxml/globals.h> 56 #include <libxml/chvalid.h> 57 58 #include "buf.h" 59 #include "enc.h" 60 61 /* 62 * Various global defaults for parsing 63 */ 64 65 /** 66 * xmlCheckVersion: 67 * @version: the include version number 68 * 69 * check the compiled lib version against the include one. 70 * This can warn or immediately kill the application 71 */ 72 void 73 xmlCheckVersion(int version) { 74 int myversion = (int) LIBXML_VERSION; 75 76 xmlInitParser(); 77 78 if ((myversion / 10000) != (version / 10000)) { 79 xmlGenericError(xmlGenericErrorContext, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 fprintf(stderr, 83 "Fatal: program compiled against libxml %d using libxml %d\n", 84 (version / 10000), (myversion / 10000)); 85 } 86 if ((myversion / 100) < (version / 100)) { 87 xmlGenericError(xmlGenericErrorContext, 88 "Warning: program compiled against libxml %d using older %d\n", 89 (version / 100), (myversion / 100)); 90 } 91 } 92 93 94 /************************************************************************ 95 * * 96 * Some factorized error routines * 97 * * 98 ************************************************************************/ 99 100 101 /** 102 * xmlErrMemory: 103 * @ctxt: an XML parser context 104 * @extra: extra informations 105 * 106 * Handle a redefinition of attribute error 107 */ 108 void 109 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 110 { 111 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 112 (ctxt->instate == XML_PARSER_EOF)) 113 return; 114 if (ctxt != NULL) { 115 ctxt->errNo = XML_ERR_NO_MEMORY; 116 ctxt->instate = XML_PARSER_EOF; 117 ctxt->disableSAX = 1; 118 } 119 if (extra) 120 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 121 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 122 NULL, NULL, 0, 0, 123 "Memory allocation failed : %s\n", extra); 124 else 125 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 126 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 127 NULL, NULL, 0, 0, "Memory allocation failed\n"); 128 } 129 130 /** 131 * __xmlErrEncoding: 132 * @ctxt: an XML parser context 133 * @xmlerr: the error number 134 * @msg: the error message 135 * @str1: an string info 136 * @str2: an string info 137 * 138 * Handle an encoding error 139 */ 140 void 141 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 142 const char *msg, const xmlChar * str1, const xmlChar * str2) 143 { 144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145 (ctxt->instate == XML_PARSER_EOF)) 146 return; 147 if (ctxt != NULL) 148 ctxt->errNo = xmlerr; 149 __xmlRaiseError(NULL, NULL, NULL, 150 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 151 NULL, 0, (const char *) str1, (const char *) str2, 152 NULL, 0, 0, msg, str1, str2); 153 if (ctxt != NULL) { 154 ctxt->wellFormed = 0; 155 if (ctxt->recovery == 0) 156 ctxt->disableSAX = 1; 157 } 158 } 159 160 /** 161 * xmlErrInternal: 162 * @ctxt: an XML parser context 163 * @msg: the error message 164 * @str: error informations 165 * 166 * Handle an internal error 167 */ 168 static void 169 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 170 { 171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 172 (ctxt->instate == XML_PARSER_EOF)) 173 return; 174 if (ctxt != NULL) 175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 176 __xmlRaiseError(NULL, NULL, NULL, 177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 179 0, 0, msg, str); 180 if (ctxt != NULL) { 181 ctxt->wellFormed = 0; 182 if (ctxt->recovery == 0) 183 ctxt->disableSAX = 1; 184 } 185 } 186 187 /** 188 * xmlErrEncodingInt: 189 * @ctxt: an XML parser context 190 * @error: the error number 191 * @msg: the error message 192 * @val: an integer value 193 * 194 * n encoding error 195 */ 196 static void 197 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 198 const char *msg, int val) 199 { 200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 201 (ctxt->instate == XML_PARSER_EOF)) 202 return; 203 if (ctxt != NULL) 204 ctxt->errNo = error; 205 __xmlRaiseError(NULL, NULL, NULL, 206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 207 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 208 if (ctxt != NULL) { 209 ctxt->wellFormed = 0; 210 if (ctxt->recovery == 0) 211 ctxt->disableSAX = 1; 212 } 213 } 214 215 /** 216 * xmlIsLetter: 217 * @c: an unicode character (int) 218 * 219 * Check whether the character is allowed by the production 220 * [84] Letter ::= BaseChar | Ideographic 221 * 222 * Returns 0 if not, non-zero otherwise 223 */ 224 int 225 xmlIsLetter(int c) { 226 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 227 } 228 229 /************************************************************************ 230 * * 231 * Input handling functions for progressive parsing * 232 * * 233 ************************************************************************/ 234 235 /* #define DEBUG_INPUT */ 236 /* #define DEBUG_STACK */ 237 /* #define DEBUG_PUSH */ 238 239 240 /* we need to keep enough input to show errors in context */ 241 #define LINE_LEN 80 242 243 #ifdef DEBUG_INPUT 244 #define CHECK_BUFFER(in) check_buffer(in) 245 246 static 247 void check_buffer(xmlParserInputPtr in) { 248 if (in->base != xmlBufContent(in->buf->buffer)) { 249 xmlGenericError(xmlGenericErrorContext, 250 "xmlParserInput: base mismatch problem\n"); 251 } 252 if (in->cur < in->base) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: cur < base problem\n"); 255 } 256 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur > base + use problem\n"); 259 } 260 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 261 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 262 xmlBufUse(in->buf->buffer)); 263 } 264 265 #else 266 #define CHECK_BUFFER(in) 267 #endif 268 269 270 /** 271 * xmlParserInputRead: 272 * @in: an XML parser input 273 * @len: an indicative size for the lookahead 274 * 275 * This function was internal and is deprecated. 276 * 277 * Returns -1 as this is an error to use it. 278 */ 279 int 280 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 281 return(-1); 282 } 283 284 /** 285 * xmlParserInputGrow: 286 * @in: an XML parser input 287 * @len: an indicative size for the lookahead 288 * 289 * This function increase the input for the parser. It tries to 290 * preserve pointers to the input buffer, and keep already read data 291 * 292 * Returns the amount of char read, or -1 in case of error, 0 indicate the 293 * end of this entity 294 */ 295 int 296 xmlParserInputGrow(xmlParserInputPtr in, int len) { 297 size_t ret; 298 size_t indx; 299 const xmlChar *content; 300 301 if ((in == NULL) || (len < 0)) return(-1); 302 #ifdef DEBUG_INPUT 303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 304 #endif 305 if (in->buf == NULL) return(-1); 306 if (in->base == NULL) return(-1); 307 if (in->cur == NULL) return(-1); 308 if (in->buf->buffer == NULL) return(-1); 309 310 CHECK_BUFFER(in); 311 312 indx = in->cur - in->base; 313 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 314 315 CHECK_BUFFER(in); 316 317 return(0); 318 } 319 if (in->buf->readcallback != NULL) { 320 ret = xmlParserInputBufferGrow(in->buf, len); 321 } else 322 return(0); 323 324 /* 325 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 326 * block, but we use it really as an integer to do some 327 * pointer arithmetic. Insure will raise it as a bug but in 328 * that specific case, that's not ! 329 */ 330 331 content = xmlBufContent(in->buf->buffer); 332 if (in->base != content) { 333 /* 334 * the buffer has been reallocated 335 */ 336 indx = in->cur - in->base; 337 in->base = content; 338 in->cur = &content[indx]; 339 } 340 in->end = xmlBufEnd(in->buf->buffer); 341 342 CHECK_BUFFER(in); 343 344 return(ret); 345 } 346 347 /** 348 * xmlParserInputShrink: 349 * @in: an XML parser input 350 * 351 * This function removes used input for the parser. 352 */ 353 void 354 xmlParserInputShrink(xmlParserInputPtr in) { 355 size_t used; 356 size_t ret; 357 size_t indx; 358 const xmlChar *content; 359 360 #ifdef DEBUG_INPUT 361 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 362 #endif 363 if (in == NULL) return; 364 if (in->buf == NULL) return; 365 if (in->base == NULL) return; 366 if (in->cur == NULL) return; 367 if (in->buf->buffer == NULL) return; 368 369 CHECK_BUFFER(in); 370 371 used = in->cur - xmlBufContent(in->buf->buffer); 372 /* 373 * Do not shrink on large buffers whose only a tiny fraction 374 * was consumed 375 */ 376 if (used > INPUT_CHUNK) { 377 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 378 if (ret > 0) { 379 in->cur -= ret; 380 in->consumed += ret; 381 } 382 in->end = xmlBufEnd(in->buf->buffer); 383 } 384 385 CHECK_BUFFER(in); 386 387 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 388 return; 389 } 390 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 391 content = xmlBufContent(in->buf->buffer); 392 if (in->base != content) { 393 /* 394 * the buffer has been reallocated 395 */ 396 indx = in->cur - in->base; 397 in->base = content; 398 in->cur = &content[indx]; 399 } 400 in->end = xmlBufEnd(in->buf->buffer); 401 402 CHECK_BUFFER(in); 403 } 404 405 /************************************************************************ 406 * * 407 * UTF8 character input and related functions * 408 * * 409 ************************************************************************/ 410 411 /** 412 * xmlNextChar: 413 * @ctxt: the XML parser context 414 * 415 * Skip to the next char input char. 416 */ 417 418 void 419 xmlNextChar(xmlParserCtxtPtr ctxt) 420 { 421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 422 (ctxt->input == NULL)) 423 return; 424 425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 426 if ((*ctxt->input->cur == 0) && 427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 428 (ctxt->instate != XML_PARSER_COMMENT)) { 429 /* 430 * If we are at the end of the current entity and 431 * the context allows it, we pop consumed entities 432 * automatically. 433 * the auto closing should be blocked in other cases 434 */ 435 xmlPopInput(ctxt); 436 } else { 437 const unsigned char *cur; 438 unsigned char c; 439 440 /* 441 * 2.11 End-of-Line Handling 442 * the literal two-character sequence "#xD#xA" or a standalone 443 * literal #xD, an XML processor must pass to the application 444 * the single character #xA. 445 */ 446 if (*(ctxt->input->cur) == '\n') { 447 ctxt->input->line++; ctxt->input->col = 1; 448 } else 449 ctxt->input->col++; 450 451 /* 452 * We are supposed to handle UTF8, check it's valid 453 * From rfc2044: encoding of the Unicode values on UTF-8: 454 * 455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 456 * 0000 0000-0000 007F 0xxxxxxx 457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 459 * 460 * Check for the 0x110000 limit too 461 */ 462 cur = ctxt->input->cur; 463 464 c = *cur; 465 if (c & 0x80) { 466 if (c == 0xC0) 467 goto encoding_error; 468 if (cur[1] == 0) { 469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 470 cur = ctxt->input->cur; 471 } 472 if ((cur[1] & 0xc0) != 0x80) 473 goto encoding_error; 474 if ((c & 0xe0) == 0xe0) { 475 unsigned int val; 476 477 if (cur[2] == 0) { 478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 479 cur = ctxt->input->cur; 480 } 481 if ((cur[2] & 0xc0) != 0x80) 482 goto encoding_error; 483 if ((c & 0xf0) == 0xf0) { 484 if (cur[3] == 0) { 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486 cur = ctxt->input->cur; 487 } 488 if (((c & 0xf8) != 0xf0) || 489 ((cur[3] & 0xc0) != 0x80)) 490 goto encoding_error; 491 /* 4-byte code */ 492 ctxt->input->cur += 4; 493 val = (cur[0] & 0x7) << 18; 494 val |= (cur[1] & 0x3f) << 12; 495 val |= (cur[2] & 0x3f) << 6; 496 val |= cur[3] & 0x3f; 497 } else { 498 /* 3-byte code */ 499 ctxt->input->cur += 3; 500 val = (cur[0] & 0xf) << 12; 501 val |= (cur[1] & 0x3f) << 6; 502 val |= cur[2] & 0x3f; 503 } 504 if (((val > 0xd7ff) && (val < 0xe000)) || 505 ((val > 0xfffd) && (val < 0x10000)) || 506 (val >= 0x110000)) { 507 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 508 "Char 0x%X out of allowed range\n", 509 val); 510 } 511 } else 512 /* 2-byte code */ 513 ctxt->input->cur += 2; 514 } else 515 /* 1-byte code */ 516 ctxt->input->cur++; 517 518 ctxt->nbChars++; 519 if (*ctxt->input->cur == 0) 520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 521 } 522 } else { 523 /* 524 * Assume it's a fixed length encoding (1) with 525 * a compatible encoding for the ASCII set, since 526 * XML constructs only use < 128 chars 527 */ 528 529 if (*(ctxt->input->cur) == '\n') { 530 ctxt->input->line++; ctxt->input->col = 1; 531 } else 532 ctxt->input->col++; 533 ctxt->input->cur++; 534 ctxt->nbChars++; 535 if (*ctxt->input->cur == 0) 536 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 537 } 538 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 539 xmlParserHandlePEReference(ctxt); 540 if ((*ctxt->input->cur == 0) && 541 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 542 xmlPopInput(ctxt); 543 return; 544 encoding_error: 545 /* 546 * If we detect an UTF8 error that probably mean that the 547 * input encoding didn't get properly advertised in the 548 * declaration header. Report the error and switch the encoding 549 * to ISO-Latin-1 (if you don't like this policy, just declare the 550 * encoding !) 551 */ 552 if ((ctxt == NULL) || (ctxt->input == NULL) || 553 (ctxt->input->end - ctxt->input->cur < 4)) { 554 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 555 "Input is not proper UTF-8, indicate encoding !\n", 556 NULL, NULL); 557 } else { 558 char buffer[150]; 559 560 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 561 ctxt->input->cur[0], ctxt->input->cur[1], 562 ctxt->input->cur[2], ctxt->input->cur[3]); 563 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 564 "Input is not proper UTF-8, indicate encoding !\n%s", 565 BAD_CAST buffer, NULL); 566 } 567 ctxt->charset = XML_CHAR_ENCODING_8859_1; 568 ctxt->input->cur++; 569 return; 570 } 571 572 /** 573 * xmlCurrentChar: 574 * @ctxt: the XML parser context 575 * @len: pointer to the length of the char read 576 * 577 * The current char value, if using UTF-8 this may actually span multiple 578 * bytes in the input buffer. Implement the end of line normalization: 579 * 2.11 End-of-Line Handling 580 * Wherever an external parsed entity or the literal entity value 581 * of an internal parsed entity contains either the literal two-character 582 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 583 * must pass to the application the single character #xA. 584 * This behavior can conveniently be produced by normalizing all 585 * line breaks to #xA on input, before parsing.) 586 * 587 * Returns the current char value and its length 588 */ 589 590 int 591 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 592 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 593 if (ctxt->instate == XML_PARSER_EOF) 594 return(0); 595 596 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 597 *len = 1; 598 return((int) *ctxt->input->cur); 599 } 600 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 601 /* 602 * We are supposed to handle UTF8, check it's valid 603 * From rfc2044: encoding of the Unicode values on UTF-8: 604 * 605 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 606 * 0000 0000-0000 007F 0xxxxxxx 607 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 608 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 609 * 610 * Check for the 0x110000 limit too 611 */ 612 const unsigned char *cur = ctxt->input->cur; 613 unsigned char c; 614 unsigned int val; 615 616 c = *cur; 617 if (c & 0x80) { 618 if (((c & 0x40) == 0) || (c == 0xC0)) 619 goto encoding_error; 620 if (cur[1] == 0) { 621 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 622 cur = ctxt->input->cur; 623 } 624 if ((cur[1] & 0xc0) != 0x80) 625 goto encoding_error; 626 if ((c & 0xe0) == 0xe0) { 627 if (cur[2] == 0) { 628 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 629 cur = ctxt->input->cur; 630 } 631 if ((cur[2] & 0xc0) != 0x80) 632 goto encoding_error; 633 if ((c & 0xf0) == 0xf0) { 634 if (cur[3] == 0) { 635 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 636 cur = ctxt->input->cur; 637 } 638 if (((c & 0xf8) != 0xf0) || 639 ((cur[3] & 0xc0) != 0x80)) 640 goto encoding_error; 641 /* 4-byte code */ 642 *len = 4; 643 val = (cur[0] & 0x7) << 18; 644 val |= (cur[1] & 0x3f) << 12; 645 val |= (cur[2] & 0x3f) << 6; 646 val |= cur[3] & 0x3f; 647 if (val < 0x10000) 648 goto encoding_error; 649 } else { 650 /* 3-byte code */ 651 *len = 3; 652 val = (cur[0] & 0xf) << 12; 653 val |= (cur[1] & 0x3f) << 6; 654 val |= cur[2] & 0x3f; 655 if (val < 0x800) 656 goto encoding_error; 657 } 658 } else { 659 /* 2-byte code */ 660 *len = 2; 661 val = (cur[0] & 0x1f) << 6; 662 val |= cur[1] & 0x3f; 663 if (val < 0x80) 664 goto encoding_error; 665 } 666 if (!IS_CHAR(val)) { 667 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 668 "Char 0x%X out of allowed range\n", val); 669 } 670 return(val); 671 } else { 672 /* 1-byte code */ 673 *len = 1; 674 if (*ctxt->input->cur == 0) 675 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 676 if ((*ctxt->input->cur == 0) && 677 (ctxt->input->end > ctxt->input->cur)) { 678 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 679 "Char 0x0 out of allowed range\n", 0); 680 } 681 if (*ctxt->input->cur == 0xD) { 682 if (ctxt->input->cur[1] == 0xA) { 683 ctxt->nbChars++; 684 ctxt->input->cur++; 685 } 686 return(0xA); 687 } 688 return((int) *ctxt->input->cur); 689 } 690 } 691 /* 692 * Assume it's a fixed length encoding (1) with 693 * a compatible encoding for the ASCII set, since 694 * XML constructs only use < 128 chars 695 */ 696 *len = 1; 697 if (*ctxt->input->cur == 0xD) { 698 if (ctxt->input->cur[1] == 0xA) { 699 ctxt->nbChars++; 700 ctxt->input->cur++; 701 } 702 return(0xA); 703 } 704 return((int) *ctxt->input->cur); 705 encoding_error: 706 /* 707 * An encoding problem may arise from a truncated input buffer 708 * splitting a character in the middle. In that case do not raise 709 * an error but return 0 to endicate an end of stream problem 710 */ 711 if (ctxt->input->end - ctxt->input->cur < 4) { 712 *len = 0; 713 return(0); 714 } 715 716 /* 717 * If we detect an UTF8 error that probably mean that the 718 * input encoding didn't get properly advertised in the 719 * declaration header. Report the error and switch the encoding 720 * to ISO-Latin-1 (if you don't like this policy, just declare the 721 * encoding !) 722 */ 723 { 724 char buffer[150]; 725 726 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 727 ctxt->input->cur[0], ctxt->input->cur[1], 728 ctxt->input->cur[2], ctxt->input->cur[3]); 729 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 730 "Input is not proper UTF-8, indicate encoding !\n%s", 731 BAD_CAST buffer, NULL); 732 } 733 ctxt->charset = XML_CHAR_ENCODING_8859_1; 734 *len = 1; 735 return((int) *ctxt->input->cur); 736 } 737 738 /** 739 * xmlStringCurrentChar: 740 * @ctxt: the XML parser context 741 * @cur: pointer to the beginning of the char 742 * @len: pointer to the length of the char read 743 * 744 * The current char value, if using UTF-8 this may actually span multiple 745 * bytes in the input buffer. 746 * 747 * Returns the current char value and its length 748 */ 749 750 int 751 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 752 { 753 if ((len == NULL) || (cur == NULL)) return(0); 754 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 755 /* 756 * We are supposed to handle UTF8, check it's valid 757 * From rfc2044: encoding of the Unicode values on UTF-8: 758 * 759 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 760 * 0000 0000-0000 007F 0xxxxxxx 761 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 762 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 763 * 764 * Check for the 0x110000 limit too 765 */ 766 unsigned char c; 767 unsigned int val; 768 769 c = *cur; 770 if (c & 0x80) { 771 if ((cur[1] & 0xc0) != 0x80) 772 goto encoding_error; 773 if ((c & 0xe0) == 0xe0) { 774 775 if ((cur[2] & 0xc0) != 0x80) 776 goto encoding_error; 777 if ((c & 0xf0) == 0xf0) { 778 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 779 goto encoding_error; 780 /* 4-byte code */ 781 *len = 4; 782 val = (cur[0] & 0x7) << 18; 783 val |= (cur[1] & 0x3f) << 12; 784 val |= (cur[2] & 0x3f) << 6; 785 val |= cur[3] & 0x3f; 786 } else { 787 /* 3-byte code */ 788 *len = 3; 789 val = (cur[0] & 0xf) << 12; 790 val |= (cur[1] & 0x3f) << 6; 791 val |= cur[2] & 0x3f; 792 } 793 } else { 794 /* 2-byte code */ 795 *len = 2; 796 val = (cur[0] & 0x1f) << 6; 797 val |= cur[1] & 0x3f; 798 } 799 if (!IS_CHAR(val)) { 800 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 801 "Char 0x%X out of allowed range\n", val); 802 } 803 return (val); 804 } else { 805 /* 1-byte code */ 806 *len = 1; 807 return ((int) *cur); 808 } 809 } 810 /* 811 * Assume it's a fixed length encoding (1) with 812 * a compatible encoding for the ASCII set, since 813 * XML constructs only use < 128 chars 814 */ 815 *len = 1; 816 return ((int) *cur); 817 encoding_error: 818 819 /* 820 * An encoding problem may arise from a truncated input buffer 821 * splitting a character in the middle. In that case do not raise 822 * an error but return 0 to endicate an end of stream problem 823 */ 824 if ((ctxt == NULL) || (ctxt->input == NULL) || 825 (ctxt->input->end - ctxt->input->cur < 4)) { 826 *len = 0; 827 return(0); 828 } 829 /* 830 * If we detect an UTF8 error that probably mean that the 831 * input encoding didn't get properly advertised in the 832 * declaration header. Report the error and switch the encoding 833 * to ISO-Latin-1 (if you don't like this policy, just declare the 834 * encoding !) 835 */ 836 { 837 char buffer[150]; 838 839 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 840 ctxt->input->cur[0], ctxt->input->cur[1], 841 ctxt->input->cur[2], ctxt->input->cur[3]); 842 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 843 "Input is not proper UTF-8, indicate encoding !\n%s", 844 BAD_CAST buffer, NULL); 845 } 846 *len = 1; 847 return ((int) *cur); 848 } 849 850 /** 851 * xmlCopyCharMultiByte: 852 * @out: pointer to an array of xmlChar 853 * @val: the char value 854 * 855 * append the char value in the array 856 * 857 * Returns the number of xmlChar written 858 */ 859 int 860 xmlCopyCharMultiByte(xmlChar *out, int val) { 861 if (out == NULL) return(0); 862 /* 863 * We are supposed to handle UTF8, check it's valid 864 * From rfc2044: encoding of the Unicode values on UTF-8: 865 * 866 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 867 * 0000 0000-0000 007F 0xxxxxxx 868 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 869 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 870 */ 871 if (val >= 0x80) { 872 xmlChar *savedout = out; 873 int bits; 874 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 875 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 876 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 877 else { 878 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 879 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 880 val); 881 return(0); 882 } 883 for ( ; bits >= 0; bits-= 6) 884 *out++= ((val >> bits) & 0x3F) | 0x80 ; 885 return (out - savedout); 886 } 887 *out = (xmlChar) val; 888 return 1; 889 } 890 891 /** 892 * xmlCopyChar: 893 * @len: Ignored, compatibility 894 * @out: pointer to an array of xmlChar 895 * @val: the char value 896 * 897 * append the char value in the array 898 * 899 * Returns the number of xmlChar written 900 */ 901 902 int 903 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 904 if (out == NULL) return(0); 905 /* the len parameter is ignored */ 906 if (val >= 0x80) { 907 return(xmlCopyCharMultiByte (out, val)); 908 } 909 *out = (xmlChar) val; 910 return 1; 911 } 912 913 /************************************************************************ 914 * * 915 * Commodity functions to switch encodings * 916 * * 917 ************************************************************************/ 918 919 static int 920 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 921 xmlCharEncodingHandlerPtr handler, int len); 922 static int 923 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 924 xmlCharEncodingHandlerPtr handler, int len); 925 /** 926 * xmlSwitchEncoding: 927 * @ctxt: the parser context 928 * @enc: the encoding value (number) 929 * 930 * change the input functions when discovering the character encoding 931 * of a given entity. 932 * 933 * Returns 0 in case of success, -1 otherwise 934 */ 935 int 936 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 937 { 938 xmlCharEncodingHandlerPtr handler; 939 int len = -1; 940 941 if (ctxt == NULL) return(-1); 942 switch (enc) { 943 case XML_CHAR_ENCODING_ERROR: 944 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 945 "encoding unknown\n", NULL, NULL); 946 return(-1); 947 case XML_CHAR_ENCODING_NONE: 948 /* let's assume it's UTF-8 without the XML decl */ 949 ctxt->charset = XML_CHAR_ENCODING_UTF8; 950 return(0); 951 case XML_CHAR_ENCODING_UTF8: 952 /* default encoding, no conversion should be needed */ 953 ctxt->charset = XML_CHAR_ENCODING_UTF8; 954 955 /* 956 * Errata on XML-1.0 June 20 2001 957 * Specific handling of the Byte Order Mark for 958 * UTF-8 959 */ 960 if ((ctxt->input != NULL) && 961 (ctxt->input->cur[0] == 0xEF) && 962 (ctxt->input->cur[1] == 0xBB) && 963 (ctxt->input->cur[2] == 0xBF)) { 964 ctxt->input->cur += 3; 965 } 966 return(0); 967 case XML_CHAR_ENCODING_UTF16LE: 968 case XML_CHAR_ENCODING_UTF16BE: 969 /*The raw input characters are encoded 970 *in UTF-16. As we expect this function 971 *to be called after xmlCharEncInFunc, we expect 972 *ctxt->input->cur to contain UTF-8 encoded characters. 973 *So the raw UTF16 Byte Order Mark 974 *has also been converted into 975 *an UTF-8 BOM. Let's skip that BOM. 976 */ 977 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 978 (ctxt->input->cur[0] == 0xEF) && 979 (ctxt->input->cur[1] == 0xBB) && 980 (ctxt->input->cur[2] == 0xBF)) { 981 ctxt->input->cur += 3; 982 } 983 len = 90; 984 break; 985 case XML_CHAR_ENCODING_UCS2: 986 len = 90; 987 break; 988 case XML_CHAR_ENCODING_UCS4BE: 989 case XML_CHAR_ENCODING_UCS4LE: 990 case XML_CHAR_ENCODING_UCS4_2143: 991 case XML_CHAR_ENCODING_UCS4_3412: 992 len = 180; 993 break; 994 case XML_CHAR_ENCODING_EBCDIC: 995 case XML_CHAR_ENCODING_8859_1: 996 case XML_CHAR_ENCODING_8859_2: 997 case XML_CHAR_ENCODING_8859_3: 998 case XML_CHAR_ENCODING_8859_4: 999 case XML_CHAR_ENCODING_8859_5: 1000 case XML_CHAR_ENCODING_8859_6: 1001 case XML_CHAR_ENCODING_8859_7: 1002 case XML_CHAR_ENCODING_8859_8: 1003 case XML_CHAR_ENCODING_8859_9: 1004 case XML_CHAR_ENCODING_ASCII: 1005 case XML_CHAR_ENCODING_2022_JP: 1006 case XML_CHAR_ENCODING_SHIFT_JIS: 1007 case XML_CHAR_ENCODING_EUC_JP: 1008 len = 45; 1009 break; 1010 } 1011 handler = xmlGetCharEncodingHandler(enc); 1012 if (handler == NULL) { 1013 /* 1014 * Default handlers. 1015 */ 1016 switch (enc) { 1017 case XML_CHAR_ENCODING_ASCII: 1018 /* default encoding, no conversion should be needed */ 1019 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1020 return(0); 1021 case XML_CHAR_ENCODING_UTF16LE: 1022 break; 1023 case XML_CHAR_ENCODING_UTF16BE: 1024 break; 1025 case XML_CHAR_ENCODING_UCS4LE: 1026 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1027 "encoding not supported %s\n", 1028 BAD_CAST "USC4 little endian", NULL); 1029 break; 1030 case XML_CHAR_ENCODING_UCS4BE: 1031 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1032 "encoding not supported %s\n", 1033 BAD_CAST "USC4 big endian", NULL); 1034 break; 1035 case XML_CHAR_ENCODING_EBCDIC: 1036 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1037 "encoding not supported %s\n", 1038 BAD_CAST "EBCDIC", NULL); 1039 break; 1040 case XML_CHAR_ENCODING_UCS4_2143: 1041 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1042 "encoding not supported %s\n", 1043 BAD_CAST "UCS4 2143", NULL); 1044 break; 1045 case XML_CHAR_ENCODING_UCS4_3412: 1046 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1047 "encoding not supported %s\n", 1048 BAD_CAST "UCS4 3412", NULL); 1049 break; 1050 case XML_CHAR_ENCODING_UCS2: 1051 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1052 "encoding not supported %s\n", 1053 BAD_CAST "UCS2", NULL); 1054 break; 1055 case XML_CHAR_ENCODING_8859_1: 1056 case XML_CHAR_ENCODING_8859_2: 1057 case XML_CHAR_ENCODING_8859_3: 1058 case XML_CHAR_ENCODING_8859_4: 1059 case XML_CHAR_ENCODING_8859_5: 1060 case XML_CHAR_ENCODING_8859_6: 1061 case XML_CHAR_ENCODING_8859_7: 1062 case XML_CHAR_ENCODING_8859_8: 1063 case XML_CHAR_ENCODING_8859_9: 1064 /* 1065 * We used to keep the internal content in the 1066 * document encoding however this turns being unmaintainable 1067 * So xmlGetCharEncodingHandler() will return non-null 1068 * values for this now. 1069 */ 1070 if ((ctxt->inputNr == 1) && 1071 (ctxt->encoding == NULL) && 1072 (ctxt->input != NULL) && 1073 (ctxt->input->encoding != NULL)) { 1074 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1075 } 1076 ctxt->charset = enc; 1077 return(0); 1078 case XML_CHAR_ENCODING_2022_JP: 1079 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1080 "encoding not supported %s\n", 1081 BAD_CAST "ISO-2022-JP", NULL); 1082 break; 1083 case XML_CHAR_ENCODING_SHIFT_JIS: 1084 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1085 "encoding not supported %s\n", 1086 BAD_CAST "Shift_JIS", NULL); 1087 break; 1088 case XML_CHAR_ENCODING_EUC_JP: 1089 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1090 "encoding not supported %s\n", 1091 BAD_CAST "EUC-JP", NULL); 1092 break; 1093 default: 1094 break; 1095 } 1096 } 1097 if (handler == NULL) 1098 return(-1); 1099 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1100 return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1101 } 1102 1103 /** 1104 * xmlSwitchInputEncoding: 1105 * @ctxt: the parser context 1106 * @input: the input stream 1107 * @handler: the encoding handler 1108 * @len: the number of bytes to convert for the first line or -1 1109 * 1110 * change the input functions when discovering the character encoding 1111 * of a given entity. 1112 * 1113 * Returns 0 in case of success, -1 otherwise 1114 */ 1115 static int 1116 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1117 xmlCharEncodingHandlerPtr handler, int len) 1118 { 1119 int nbchars; 1120 1121 if (handler == NULL) 1122 return (-1); 1123 if (input == NULL) 1124 return (-1); 1125 if (input->buf != NULL) { 1126 if (input->buf->encoder != NULL) { 1127 /* 1128 * Check in case the auto encoding detetection triggered 1129 * in already. 1130 */ 1131 if (input->buf->encoder == handler) 1132 return (0); 1133 1134 /* 1135 * "UTF-16" can be used for both LE and BE 1136 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1137 BAD_CAST "UTF-16", 6)) && 1138 (!xmlStrncmp(BAD_CAST handler->name, 1139 BAD_CAST "UTF-16", 6))) { 1140 return(0); 1141 } 1142 */ 1143 1144 /* 1145 * Note: this is a bit dangerous, but that's what it 1146 * takes to use nearly compatible signature for different 1147 * encodings. 1148 */ 1149 xmlCharEncCloseFunc(input->buf->encoder); 1150 input->buf->encoder = handler; 1151 return (0); 1152 } 1153 input->buf->encoder = handler; 1154 1155 /* 1156 * Is there already some content down the pipe to convert ? 1157 */ 1158 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1159 int processed; 1160 unsigned int use; 1161 1162 /* 1163 * Specific handling of the Byte Order Mark for 1164 * UTF-16 1165 */ 1166 if ((handler->name != NULL) && 1167 (!strcmp(handler->name, "UTF-16LE") || 1168 !strcmp(handler->name, "UTF-16")) && 1169 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1170 input->cur += 2; 1171 } 1172 if ((handler->name != NULL) && 1173 (!strcmp(handler->name, "UTF-16BE")) && 1174 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1175 input->cur += 2; 1176 } 1177 /* 1178 * Errata on XML-1.0 June 20 2001 1179 * Specific handling of the Byte Order Mark for 1180 * UTF-8 1181 */ 1182 if ((handler->name != NULL) && 1183 (!strcmp(handler->name, "UTF-8")) && 1184 (input->cur[0] == 0xEF) && 1185 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1186 input->cur += 3; 1187 } 1188 1189 /* 1190 * Shrink the current input buffer. 1191 * Move it as the raw buffer and create a new input buffer 1192 */ 1193 processed = input->cur - input->base; 1194 xmlBufShrink(input->buf->buffer, processed); 1195 input->buf->raw = input->buf->buffer; 1196 input->buf->buffer = xmlBufCreate(); 1197 input->buf->rawconsumed = processed; 1198 use = xmlBufUse(input->buf->raw); 1199 1200 if (ctxt->html) { 1201 /* 1202 * convert as much as possible of the buffer 1203 */ 1204 nbchars = xmlCharEncInput(input->buf, 1); 1205 } else { 1206 /* 1207 * convert just enough to get 1208 * '<?xml version="1.0" encoding="xxx"?>' 1209 * parsed with the autodetected encoding 1210 * into the parser reading buffer. 1211 */ 1212 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1213 } 1214 if (nbchars < 0) { 1215 xmlErrInternal(ctxt, 1216 "switching encoding: encoder error\n", 1217 NULL); 1218 return (-1); 1219 } 1220 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1221 xmlBufResetInput(input->buf->buffer, input); 1222 } 1223 return (0); 1224 } else if (input->length == 0) { 1225 /* 1226 * When parsing a static memory array one must know the 1227 * size to be able to convert the buffer. 1228 */ 1229 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1230 return (-1); 1231 } 1232 return (0); 1233 } 1234 1235 /** 1236 * xmlSwitchInputEncoding: 1237 * @ctxt: the parser context 1238 * @input: the input stream 1239 * @handler: the encoding handler 1240 * 1241 * change the input functions when discovering the character encoding 1242 * of a given entity. 1243 * 1244 * Returns 0 in case of success, -1 otherwise 1245 */ 1246 int 1247 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1248 xmlCharEncodingHandlerPtr handler) { 1249 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1250 } 1251 1252 /** 1253 * xmlSwitchToEncodingInt: 1254 * @ctxt: the parser context 1255 * @handler: the encoding handler 1256 * @len: the length to convert or -1 1257 * 1258 * change the input functions when discovering the character encoding 1259 * of a given entity, and convert only @len bytes of the output, this 1260 * is needed on auto detect to allows any declared encoding later to 1261 * convert the actual content after the xmlDecl 1262 * 1263 * Returns 0 in case of success, -1 otherwise 1264 */ 1265 static int 1266 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1267 xmlCharEncodingHandlerPtr handler, int len) { 1268 int ret = 0; 1269 1270 if (handler != NULL) { 1271 if (ctxt->input != NULL) { 1272 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1273 } else { 1274 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1275 NULL); 1276 return(-1); 1277 } 1278 /* 1279 * The parsing is now done in UTF8 natively 1280 */ 1281 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1282 } else 1283 return(-1); 1284 return(ret); 1285 } 1286 1287 /** 1288 * xmlSwitchToEncoding: 1289 * @ctxt: the parser context 1290 * @handler: the encoding handler 1291 * 1292 * change the input functions when discovering the character encoding 1293 * of a given entity. 1294 * 1295 * Returns 0 in case of success, -1 otherwise 1296 */ 1297 int 1298 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1299 { 1300 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1301 } 1302 1303 /************************************************************************ 1304 * * 1305 * Commodity functions to handle entities processing * 1306 * * 1307 ************************************************************************/ 1308 1309 /** 1310 * xmlFreeInputStream: 1311 * @input: an xmlParserInputPtr 1312 * 1313 * Free up an input stream. 1314 */ 1315 void 1316 xmlFreeInputStream(xmlParserInputPtr input) { 1317 if (input == NULL) return; 1318 1319 if (input->filename != NULL) xmlFree((char *) input->filename); 1320 if (input->directory != NULL) xmlFree((char *) input->directory); 1321 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1322 if (input->version != NULL) xmlFree((char *) input->version); 1323 if ((input->free != NULL) && (input->base != NULL)) 1324 input->free((xmlChar *) input->base); 1325 if (input->buf != NULL) 1326 xmlFreeParserInputBuffer(input->buf); 1327 xmlFree(input); 1328 } 1329 1330 /** 1331 * xmlNewInputStream: 1332 * @ctxt: an XML parser context 1333 * 1334 * Create a new input stream structure. 1335 * 1336 * Returns the new input stream or NULL 1337 */ 1338 xmlParserInputPtr 1339 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1340 xmlParserInputPtr input; 1341 1342 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1343 if (input == NULL) { 1344 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1345 return(NULL); 1346 } 1347 memset(input, 0, sizeof(xmlParserInput)); 1348 input->line = 1; 1349 input->col = 1; 1350 input->standalone = -1; 1351 1352 /* 1353 * If the context is NULL the id cannot be initialized, but that 1354 * should not happen while parsing which is the situation where 1355 * the id is actually needed. 1356 */ 1357 if (ctxt != NULL) 1358 input->id = ctxt->input_id++; 1359 1360 return(input); 1361 } 1362 1363 /** 1364 * xmlNewIOInputStream: 1365 * @ctxt: an XML parser context 1366 * @input: an I/O Input 1367 * @enc: the charset encoding if known 1368 * 1369 * Create a new input stream structure encapsulating the @input into 1370 * a stream suitable for the parser. 1371 * 1372 * Returns the new input stream or NULL 1373 */ 1374 xmlParserInputPtr 1375 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1376 xmlCharEncoding enc) { 1377 xmlParserInputPtr inputStream; 1378 1379 if (input == NULL) return(NULL); 1380 if (xmlParserDebugEntities) 1381 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1382 inputStream = xmlNewInputStream(ctxt); 1383 if (inputStream == NULL) { 1384 return(NULL); 1385 } 1386 inputStream->filename = NULL; 1387 inputStream->buf = input; 1388 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1389 1390 if (enc != XML_CHAR_ENCODING_NONE) { 1391 xmlSwitchEncoding(ctxt, enc); 1392 } 1393 1394 return(inputStream); 1395 } 1396 1397 /** 1398 * xmlNewEntityInputStream: 1399 * @ctxt: an XML parser context 1400 * @entity: an Entity pointer 1401 * 1402 * Create a new input stream based on an xmlEntityPtr 1403 * 1404 * Returns the new input stream or NULL 1405 */ 1406 xmlParserInputPtr 1407 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1408 xmlParserInputPtr input; 1409 1410 if (entity == NULL) { 1411 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1412 NULL); 1413 return(NULL); 1414 } 1415 if (xmlParserDebugEntities) 1416 xmlGenericError(xmlGenericErrorContext, 1417 "new input from entity: %s\n", entity->name); 1418 if (entity->content == NULL) { 1419 switch (entity->etype) { 1420 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1421 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1422 entity->name); 1423 break; 1424 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1425 case XML_EXTERNAL_PARAMETER_ENTITY: 1426 return(xmlLoadExternalEntity((char *) entity->URI, 1427 (char *) entity->ExternalID, ctxt)); 1428 case XML_INTERNAL_GENERAL_ENTITY: 1429 xmlErrInternal(ctxt, 1430 "Internal entity %s without content !\n", 1431 entity->name); 1432 break; 1433 case XML_INTERNAL_PARAMETER_ENTITY: 1434 xmlErrInternal(ctxt, 1435 "Internal parameter entity %s without content !\n", 1436 entity->name); 1437 break; 1438 case XML_INTERNAL_PREDEFINED_ENTITY: 1439 xmlErrInternal(ctxt, 1440 "Predefined entity %s without content !\n", 1441 entity->name); 1442 break; 1443 } 1444 return(NULL); 1445 } 1446 input = xmlNewInputStream(ctxt); 1447 if (input == NULL) { 1448 return(NULL); 1449 } 1450 if (entity->URI != NULL) 1451 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1452 input->base = entity->content; 1453 input->cur = entity->content; 1454 input->length = entity->length; 1455 input->end = &entity->content[input->length]; 1456 return(input); 1457 } 1458 1459 /** 1460 * xmlNewStringInputStream: 1461 * @ctxt: an XML parser context 1462 * @buffer: an memory buffer 1463 * 1464 * Create a new input stream based on a memory buffer. 1465 * Returns the new input stream 1466 */ 1467 xmlParserInputPtr 1468 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1469 xmlParserInputPtr input; 1470 1471 if (buffer == NULL) { 1472 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1473 NULL); 1474 return(NULL); 1475 } 1476 if (xmlParserDebugEntities) 1477 xmlGenericError(xmlGenericErrorContext, 1478 "new fixed input: %.30s\n", buffer); 1479 input = xmlNewInputStream(ctxt); 1480 if (input == NULL) { 1481 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1482 return(NULL); 1483 } 1484 input->base = buffer; 1485 input->cur = buffer; 1486 input->length = xmlStrlen(buffer); 1487 input->end = &buffer[input->length]; 1488 return(input); 1489 } 1490 1491 /** 1492 * xmlNewInputFromFile: 1493 * @ctxt: an XML parser context 1494 * @filename: the filename to use as entity 1495 * 1496 * Create a new input stream based on a file or an URL. 1497 * 1498 * Returns the new input stream or NULL in case of error 1499 */ 1500 xmlParserInputPtr 1501 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1502 xmlParserInputBufferPtr buf; 1503 xmlParserInputPtr inputStream; 1504 char *directory = NULL; 1505 xmlChar *URI = NULL; 1506 1507 if (xmlParserDebugEntities) 1508 xmlGenericError(xmlGenericErrorContext, 1509 "new input from file: %s\n", filename); 1510 if (ctxt == NULL) return(NULL); 1511 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1512 if (buf == NULL) { 1513 if (filename == NULL) 1514 __xmlLoaderErr(ctxt, 1515 "failed to load external entity: NULL filename \n", 1516 NULL); 1517 else 1518 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1519 (const char *) filename); 1520 return(NULL); 1521 } 1522 1523 inputStream = xmlNewInputStream(ctxt); 1524 if (inputStream == NULL) 1525 return(NULL); 1526 1527 inputStream->buf = buf; 1528 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1529 if (inputStream == NULL) 1530 return(NULL); 1531 1532 if (inputStream->filename == NULL) 1533 URI = xmlStrdup((xmlChar *) filename); 1534 else 1535 URI = xmlStrdup((xmlChar *) inputStream->filename); 1536 directory = xmlParserGetDirectory((const char *) URI); 1537 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1538 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1539 if (URI != NULL) xmlFree((char *) URI); 1540 inputStream->directory = directory; 1541 1542 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1543 if ((ctxt->directory == NULL) && (directory != NULL)) 1544 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1545 return(inputStream); 1546 } 1547 1548 /************************************************************************ 1549 * * 1550 * Commodity functions to handle parser contexts * 1551 * * 1552 ************************************************************************/ 1553 1554 /** 1555 * xmlInitParserCtxt: 1556 * @ctxt: an XML parser context 1557 * 1558 * Initialize a parser context 1559 * 1560 * Returns 0 in case of success and -1 in case of error 1561 */ 1562 1563 int 1564 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1565 { 1566 xmlParserInputPtr input; 1567 1568 if(ctxt==NULL) { 1569 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1570 return(-1); 1571 } 1572 1573 xmlDefaultSAXHandlerInit(); 1574 1575 if (ctxt->dict == NULL) 1576 ctxt->dict = xmlDictCreate(); 1577 if (ctxt->dict == NULL) { 1578 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1579 return(-1); 1580 } 1581 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1582 1583 if (ctxt->sax == NULL) 1584 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1585 if (ctxt->sax == NULL) { 1586 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1587 return(-1); 1588 } 1589 else 1590 xmlSAXVersion(ctxt->sax, 2); 1591 1592 ctxt->maxatts = 0; 1593 ctxt->atts = NULL; 1594 /* Allocate the Input stack */ 1595 if (ctxt->inputTab == NULL) { 1596 ctxt->inputTab = (xmlParserInputPtr *) 1597 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1598 ctxt->inputMax = 5; 1599 } 1600 if (ctxt->inputTab == NULL) { 1601 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1602 ctxt->inputNr = 0; 1603 ctxt->inputMax = 0; 1604 ctxt->input = NULL; 1605 return(-1); 1606 } 1607 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1608 xmlFreeInputStream(input); 1609 } 1610 ctxt->inputNr = 0; 1611 ctxt->input = NULL; 1612 1613 ctxt->version = NULL; 1614 ctxt->encoding = NULL; 1615 ctxt->standalone = -1; 1616 ctxt->hasExternalSubset = 0; 1617 ctxt->hasPErefs = 0; 1618 ctxt->html = 0; 1619 ctxt->external = 0; 1620 ctxt->instate = XML_PARSER_START; 1621 ctxt->token = 0; 1622 ctxt->directory = NULL; 1623 1624 /* Allocate the Node stack */ 1625 if (ctxt->nodeTab == NULL) { 1626 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1627 ctxt->nodeMax = 10; 1628 } 1629 if (ctxt->nodeTab == NULL) { 1630 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1631 ctxt->nodeNr = 0; 1632 ctxt->nodeMax = 0; 1633 ctxt->node = NULL; 1634 ctxt->inputNr = 0; 1635 ctxt->inputMax = 0; 1636 ctxt->input = NULL; 1637 return(-1); 1638 } 1639 ctxt->nodeNr = 0; 1640 ctxt->node = NULL; 1641 1642 /* Allocate the Name stack */ 1643 if (ctxt->nameTab == NULL) { 1644 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1645 ctxt->nameMax = 10; 1646 } 1647 if (ctxt->nameTab == NULL) { 1648 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1649 ctxt->nodeNr = 0; 1650 ctxt->nodeMax = 0; 1651 ctxt->node = NULL; 1652 ctxt->inputNr = 0; 1653 ctxt->inputMax = 0; 1654 ctxt->input = NULL; 1655 ctxt->nameNr = 0; 1656 ctxt->nameMax = 0; 1657 ctxt->name = NULL; 1658 return(-1); 1659 } 1660 ctxt->nameNr = 0; 1661 ctxt->name = NULL; 1662 1663 /* Allocate the space stack */ 1664 if (ctxt->spaceTab == NULL) { 1665 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1666 ctxt->spaceMax = 10; 1667 } 1668 if (ctxt->spaceTab == NULL) { 1669 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1670 ctxt->nodeNr = 0; 1671 ctxt->nodeMax = 0; 1672 ctxt->node = NULL; 1673 ctxt->inputNr = 0; 1674 ctxt->inputMax = 0; 1675 ctxt->input = NULL; 1676 ctxt->nameNr = 0; 1677 ctxt->nameMax = 0; 1678 ctxt->name = NULL; 1679 ctxt->spaceNr = 0; 1680 ctxt->spaceMax = 0; 1681 ctxt->space = NULL; 1682 return(-1); 1683 } 1684 ctxt->spaceNr = 1; 1685 ctxt->spaceMax = 10; 1686 ctxt->spaceTab[0] = -1; 1687 ctxt->space = &ctxt->spaceTab[0]; 1688 ctxt->userData = ctxt; 1689 ctxt->myDoc = NULL; 1690 ctxt->wellFormed = 1; 1691 ctxt->nsWellFormed = 1; 1692 ctxt->valid = 1; 1693 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1694 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1695 ctxt->pedantic = xmlPedanticParserDefaultValue; 1696 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1697 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1698 if (ctxt->keepBlanks == 0) 1699 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1700 1701 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1702 ctxt->vctxt.userData = ctxt; 1703 ctxt->vctxt.error = xmlParserValidityError; 1704 ctxt->vctxt.warning = xmlParserValidityWarning; 1705 if (ctxt->validate) { 1706 if (xmlGetWarningsDefaultValue == 0) 1707 ctxt->vctxt.warning = NULL; 1708 else 1709 ctxt->vctxt.warning = xmlParserValidityWarning; 1710 ctxt->vctxt.nodeMax = 0; 1711 } 1712 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1713 ctxt->record_info = 0; 1714 ctxt->nbChars = 0; 1715 ctxt->checkIndex = 0; 1716 ctxt->inSubset = 0; 1717 ctxt->errNo = XML_ERR_OK; 1718 ctxt->depth = 0; 1719 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1720 ctxt->catalogs = NULL; 1721 ctxt->nbentities = 0; 1722 ctxt->sizeentities = 0; 1723 ctxt->sizeentcopy = 0; 1724 ctxt->input_id = 1; 1725 xmlInitNodeInfoSeq(&ctxt->node_seq); 1726 return(0); 1727 } 1728 1729 /** 1730 * xmlFreeParserCtxt: 1731 * @ctxt: an XML parser context 1732 * 1733 * Free all the memory used by a parser context. However the parsed 1734 * document in ctxt->myDoc is not freed. 1735 */ 1736 1737 void 1738 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1739 { 1740 xmlParserInputPtr input; 1741 1742 if (ctxt == NULL) return; 1743 1744 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1745 xmlFreeInputStream(input); 1746 } 1747 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1748 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1749 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1750 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1751 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1752 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1753 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1754 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1755 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1756 #ifdef LIBXML_SAX1_ENABLED 1757 if ((ctxt->sax != NULL) && 1758 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1759 #else 1760 if (ctxt->sax != NULL) 1761 #endif /* LIBXML_SAX1_ENABLED */ 1762 xmlFree(ctxt->sax); 1763 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1764 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1765 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1766 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1767 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1768 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1769 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1770 if (ctxt->attsDefault != NULL) 1771 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1772 if (ctxt->attsSpecial != NULL) 1773 xmlHashFree(ctxt->attsSpecial, NULL); 1774 if (ctxt->freeElems != NULL) { 1775 xmlNodePtr cur, next; 1776 1777 cur = ctxt->freeElems; 1778 while (cur != NULL) { 1779 next = cur->next; 1780 xmlFree(cur); 1781 cur = next; 1782 } 1783 } 1784 if (ctxt->freeAttrs != NULL) { 1785 xmlAttrPtr cur, next; 1786 1787 cur = ctxt->freeAttrs; 1788 while (cur != NULL) { 1789 next = cur->next; 1790 xmlFree(cur); 1791 cur = next; 1792 } 1793 } 1794 /* 1795 * cleanup the error strings 1796 */ 1797 if (ctxt->lastError.message != NULL) 1798 xmlFree(ctxt->lastError.message); 1799 if (ctxt->lastError.file != NULL) 1800 xmlFree(ctxt->lastError.file); 1801 if (ctxt->lastError.str1 != NULL) 1802 xmlFree(ctxt->lastError.str1); 1803 if (ctxt->lastError.str2 != NULL) 1804 xmlFree(ctxt->lastError.str2); 1805 if (ctxt->lastError.str3 != NULL) 1806 xmlFree(ctxt->lastError.str3); 1807 1808 #ifdef LIBXML_CATALOG_ENABLED 1809 if (ctxt->catalogs != NULL) 1810 xmlCatalogFreeLocal(ctxt->catalogs); 1811 #endif 1812 xmlFree(ctxt); 1813 } 1814 1815 /** 1816 * xmlNewParserCtxt: 1817 * 1818 * Allocate and initialize a new parser context. 1819 * 1820 * Returns the xmlParserCtxtPtr or NULL 1821 */ 1822 1823 xmlParserCtxtPtr 1824 xmlNewParserCtxt(void) 1825 { 1826 xmlParserCtxtPtr ctxt; 1827 1828 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1829 if (ctxt == NULL) { 1830 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1831 return(NULL); 1832 } 1833 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1834 if (xmlInitParserCtxt(ctxt) < 0) { 1835 xmlFreeParserCtxt(ctxt); 1836 return(NULL); 1837 } 1838 return(ctxt); 1839 } 1840 1841 /************************************************************************ 1842 * * 1843 * Handling of node informations * 1844 * * 1845 ************************************************************************/ 1846 1847 /** 1848 * xmlClearParserCtxt: 1849 * @ctxt: an XML parser context 1850 * 1851 * Clear (release owned resources) and reinitialize a parser context 1852 */ 1853 1854 void 1855 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1856 { 1857 if (ctxt==NULL) 1858 return; 1859 xmlClearNodeInfoSeq(&ctxt->node_seq); 1860 xmlCtxtReset(ctxt); 1861 } 1862 1863 1864 /** 1865 * xmlParserFindNodeInfo: 1866 * @ctx: an XML parser context 1867 * @node: an XML node within the tree 1868 * 1869 * Find the parser node info struct for a given node 1870 * 1871 * Returns an xmlParserNodeInfo block pointer or NULL 1872 */ 1873 const xmlParserNodeInfo * 1874 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1875 { 1876 unsigned long pos; 1877 1878 if ((ctx == NULL) || (node == NULL)) 1879 return (NULL); 1880 /* Find position where node should be at */ 1881 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1882 if (pos < ctx->node_seq.length 1883 && ctx->node_seq.buffer[pos].node == node) 1884 return &ctx->node_seq.buffer[pos]; 1885 else 1886 return NULL; 1887 } 1888 1889 1890 /** 1891 * xmlInitNodeInfoSeq: 1892 * @seq: a node info sequence pointer 1893 * 1894 * -- Initialize (set to initial state) node info sequence 1895 */ 1896 void 1897 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1898 { 1899 if (seq == NULL) 1900 return; 1901 seq->length = 0; 1902 seq->maximum = 0; 1903 seq->buffer = NULL; 1904 } 1905 1906 /** 1907 * xmlClearNodeInfoSeq: 1908 * @seq: a node info sequence pointer 1909 * 1910 * -- Clear (release memory and reinitialize) node 1911 * info sequence 1912 */ 1913 void 1914 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1915 { 1916 if (seq == NULL) 1917 return; 1918 if (seq->buffer != NULL) 1919 xmlFree(seq->buffer); 1920 xmlInitNodeInfoSeq(seq); 1921 } 1922 1923 /** 1924 * xmlParserFindNodeInfoIndex: 1925 * @seq: a node info sequence pointer 1926 * @node: an XML node pointer 1927 * 1928 * 1929 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1930 * the given node is or should be at in a sorted sequence 1931 * 1932 * Returns a long indicating the position of the record 1933 */ 1934 unsigned long 1935 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1936 const xmlNodePtr node) 1937 { 1938 unsigned long upper, lower, middle; 1939 int found = 0; 1940 1941 if ((seq == NULL) || (node == NULL)) 1942 return ((unsigned long) -1); 1943 1944 /* Do a binary search for the key */ 1945 lower = 1; 1946 upper = seq->length; 1947 middle = 0; 1948 while (lower <= upper && !found) { 1949 middle = lower + (upper - lower) / 2; 1950 if (node == seq->buffer[middle - 1].node) 1951 found = 1; 1952 else if (node < seq->buffer[middle - 1].node) 1953 upper = middle - 1; 1954 else 1955 lower = middle + 1; 1956 } 1957 1958 /* Return position */ 1959 if (middle == 0 || seq->buffer[middle - 1].node < node) 1960 return middle; 1961 else 1962 return middle - 1; 1963 } 1964 1965 1966 /** 1967 * xmlParserAddNodeInfo: 1968 * @ctxt: an XML parser context 1969 * @info: a node info sequence pointer 1970 * 1971 * Insert node info record into the sorted sequence 1972 */ 1973 void 1974 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1975 const xmlParserNodeInfoPtr info) 1976 { 1977 unsigned long pos; 1978 1979 if ((ctxt == NULL) || (info == NULL)) return; 1980 1981 /* Find pos and check to see if node is already in the sequence */ 1982 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1983 info->node); 1984 1985 if ((pos < ctxt->node_seq.length) && 1986 (ctxt->node_seq.buffer != NULL) && 1987 (ctxt->node_seq.buffer[pos].node == info->node)) { 1988 ctxt->node_seq.buffer[pos] = *info; 1989 } 1990 1991 /* Otherwise, we need to add new node to buffer */ 1992 else { 1993 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 1994 xmlParserNodeInfo *tmp_buffer; 1995 unsigned int byte_size; 1996 1997 if (ctxt->node_seq.maximum == 0) 1998 ctxt->node_seq.maximum = 2; 1999 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2000 (2 * ctxt->node_seq.maximum)); 2001 2002 if (ctxt->node_seq.buffer == NULL) 2003 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2004 else 2005 tmp_buffer = 2006 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2007 byte_size); 2008 2009 if (tmp_buffer == NULL) { 2010 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2011 return; 2012 } 2013 ctxt->node_seq.buffer = tmp_buffer; 2014 ctxt->node_seq.maximum *= 2; 2015 } 2016 2017 /* If position is not at end, move elements out of the way */ 2018 if (pos != ctxt->node_seq.length) { 2019 unsigned long i; 2020 2021 for (i = ctxt->node_seq.length; i > pos; i--) 2022 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2023 } 2024 2025 /* Copy element and increase length */ 2026 ctxt->node_seq.buffer[pos] = *info; 2027 ctxt->node_seq.length++; 2028 } 2029 } 2030 2031 /************************************************************************ 2032 * * 2033 * Defaults settings * 2034 * * 2035 ************************************************************************/ 2036 /** 2037 * xmlPedanticParserDefault: 2038 * @val: int 0 or 1 2039 * 2040 * Set and return the previous value for enabling pedantic warnings. 2041 * 2042 * Returns the last value for 0 for no substitution, 1 for substitution. 2043 */ 2044 2045 int 2046 xmlPedanticParserDefault(int val) { 2047 int old = xmlPedanticParserDefaultValue; 2048 2049 xmlPedanticParserDefaultValue = val; 2050 return(old); 2051 } 2052 2053 /** 2054 * xmlLineNumbersDefault: 2055 * @val: int 0 or 1 2056 * 2057 * Set and return the previous value for enabling line numbers in elements 2058 * contents. This may break on old application and is turned off by default. 2059 * 2060 * Returns the last value for 0 for no substitution, 1 for substitution. 2061 */ 2062 2063 int 2064 xmlLineNumbersDefault(int val) { 2065 int old = xmlLineNumbersDefaultValue; 2066 2067 xmlLineNumbersDefaultValue = val; 2068 return(old); 2069 } 2070 2071 /** 2072 * xmlSubstituteEntitiesDefault: 2073 * @val: int 0 or 1 2074 * 2075 * Set and return the previous value for default entity support. 2076 * Initially the parser always keep entity references instead of substituting 2077 * entity values in the output. This function has to be used to change the 2078 * default parser behavior 2079 * SAX::substituteEntities() has to be used for changing that on a file by 2080 * file basis. 2081 * 2082 * Returns the last value for 0 for no substitution, 1 for substitution. 2083 */ 2084 2085 int 2086 xmlSubstituteEntitiesDefault(int val) { 2087 int old = xmlSubstituteEntitiesDefaultValue; 2088 2089 xmlSubstituteEntitiesDefaultValue = val; 2090 return(old); 2091 } 2092 2093 /** 2094 * xmlKeepBlanksDefault: 2095 * @val: int 0 or 1 2096 * 2097 * Set and return the previous value for default blanks text nodes support. 2098 * The 1.x version of the parser used an heuristic to try to detect 2099 * ignorable white spaces. As a result the SAX callback was generating 2100 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2101 * using the DOM output text nodes containing those blanks were not generated. 2102 * The 2.x and later version will switch to the XML standard way and 2103 * ignorableWhitespace() are only generated when running the parser in 2104 * validating mode and when the current element doesn't allow CDATA or 2105 * mixed content. 2106 * This function is provided as a way to force the standard behavior 2107 * on 1.X libs and to switch back to the old mode for compatibility when 2108 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2109 * by using xmlIsBlankNode() commodity function to detect the "empty" 2110 * nodes generated. 2111 * This value also affect autogeneration of indentation when saving code 2112 * if blanks sections are kept, indentation is not generated. 2113 * 2114 * Returns the last value for 0 for no substitution, 1 for substitution. 2115 */ 2116 2117 int 2118 xmlKeepBlanksDefault(int val) { 2119 int old = xmlKeepBlanksDefaultValue; 2120 2121 xmlKeepBlanksDefaultValue = val; 2122 if (!val) xmlIndentTreeOutput = 1; 2123 return(old); 2124 } 2125 2126 #define bottom_parserInternals 2127 #include "elfgcchack.h" 2128