1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel (at) veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(WIN32) && !defined (__CYGWIN__) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #ifdef HAVE_CTYPE_H 21 #include <ctype.h> 22 #endif 23 #ifdef HAVE_STDLIB_H 24 #include <stdlib.h> 25 #endif 26 #ifdef HAVE_SYS_STAT_H 27 #include <sys/stat.h> 28 #endif 29 #ifdef HAVE_FCNTL_H 30 #include <fcntl.h> 31 #endif 32 #ifdef HAVE_UNISTD_H 33 #include <unistd.h> 34 #endif 35 #ifdef HAVE_ZLIB_H 36 #include <zlib.h> 37 #endif 38 39 #include <libxml/xmlmemory.h> 40 #include <libxml/tree.h> 41 #include <libxml/parser.h> 42 #include <libxml/parserInternals.h> 43 #include <libxml/valid.h> 44 #include <libxml/entities.h> 45 #include <libxml/xmlerror.h> 46 #include <libxml/encoding.h> 47 #include <libxml/valid.h> 48 #include <libxml/xmlIO.h> 49 #include <libxml/uri.h> 50 #include <libxml/dict.h> 51 #include <libxml/SAX.h> 52 #ifdef LIBXML_CATALOG_ENABLED 53 #include <libxml/catalog.h> 54 #endif 55 #include <libxml/globals.h> 56 #include <libxml/chvalid.h> 57 58 #include "buf.h" 59 #include "enc.h" 60 61 /* 62 * Various global defaults for parsing 63 */ 64 65 /** 66 * xmlCheckVersion: 67 * @version: the include version number 68 * 69 * check the compiled lib version against the include one. 70 * This can warn or immediately kill the application 71 */ 72 void 73 xmlCheckVersion(int version) { 74 int myversion = (int) LIBXML_VERSION; 75 76 xmlInitParser(); 77 78 if ((myversion / 10000) != (version / 10000)) { 79 xmlGenericError(xmlGenericErrorContext, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 fprintf(stderr, 83 "Fatal: program compiled against libxml %d using libxml %d\n", 84 (version / 10000), (myversion / 10000)); 85 } 86 if ((myversion / 100) < (version / 100)) { 87 xmlGenericError(xmlGenericErrorContext, 88 "Warning: program compiled against libxml %d using older %d\n", 89 (version / 100), (myversion / 100)); 90 } 91 } 92 93 94 /************************************************************************ 95 * * 96 * Some factorized error routines * 97 * * 98 ************************************************************************/ 99 100 101 /** 102 * xmlErrMemory: 103 * @ctxt: an XML parser context 104 * @extra: extra informations 105 * 106 * Handle a redefinition of attribute error 107 */ 108 void 109 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 110 { 111 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 112 (ctxt->instate == XML_PARSER_EOF)) 113 return; 114 if (ctxt != NULL) { 115 ctxt->errNo = XML_ERR_NO_MEMORY; 116 ctxt->instate = XML_PARSER_EOF; 117 ctxt->disableSAX = 1; 118 } 119 if (extra) 120 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 121 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 122 NULL, NULL, 0, 0, 123 "Memory allocation failed : %s\n", extra); 124 else 125 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 126 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 127 NULL, NULL, 0, 0, "Memory allocation failed\n"); 128 } 129 130 /** 131 * __xmlErrEncoding: 132 * @ctxt: an XML parser context 133 * @xmlerr: the error number 134 * @msg: the error message 135 * @str1: an string info 136 * @str2: an string info 137 * 138 * Handle an encoding error 139 */ 140 void 141 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 142 const char *msg, const xmlChar * str1, const xmlChar * str2) 143 { 144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145 (ctxt->instate == XML_PARSER_EOF)) 146 return; 147 if (ctxt != NULL) 148 ctxt->errNo = xmlerr; 149 __xmlRaiseError(NULL, NULL, NULL, 150 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 151 NULL, 0, (const char *) str1, (const char *) str2, 152 NULL, 0, 0, msg, str1, str2); 153 if (ctxt != NULL) { 154 ctxt->wellFormed = 0; 155 if (ctxt->recovery == 0) 156 ctxt->disableSAX = 1; 157 } 158 } 159 160 /** 161 * xmlErrInternal: 162 * @ctxt: an XML parser context 163 * @msg: the error message 164 * @str: error informations 165 * 166 * Handle an internal error 167 */ 168 static void 169 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 170 { 171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 172 (ctxt->instate == XML_PARSER_EOF)) 173 return; 174 if (ctxt != NULL) 175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 176 __xmlRaiseError(NULL, NULL, NULL, 177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 179 0, 0, msg, str); 180 if (ctxt != NULL) { 181 ctxt->wellFormed = 0; 182 if (ctxt->recovery == 0) 183 ctxt->disableSAX = 1; 184 } 185 } 186 187 /** 188 * xmlErrEncodingInt: 189 * @ctxt: an XML parser context 190 * @error: the error number 191 * @msg: the error message 192 * @val: an integer value 193 * 194 * n encoding error 195 */ 196 static void 197 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 198 const char *msg, int val) 199 { 200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 201 (ctxt->instate == XML_PARSER_EOF)) 202 return; 203 if (ctxt != NULL) 204 ctxt->errNo = error; 205 __xmlRaiseError(NULL, NULL, NULL, 206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 207 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 208 if (ctxt != NULL) { 209 ctxt->wellFormed = 0; 210 if (ctxt->recovery == 0) 211 ctxt->disableSAX = 1; 212 } 213 } 214 215 /** 216 * xmlIsLetter: 217 * @c: an unicode character (int) 218 * 219 * Check whether the character is allowed by the production 220 * [84] Letter ::= BaseChar | Ideographic 221 * 222 * Returns 0 if not, non-zero otherwise 223 */ 224 int 225 xmlIsLetter(int c) { 226 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 227 } 228 229 /************************************************************************ 230 * * 231 * Input handling functions for progressive parsing * 232 * * 233 ************************************************************************/ 234 235 /* #define DEBUG_INPUT */ 236 /* #define DEBUG_STACK */ 237 /* #define DEBUG_PUSH */ 238 239 240 /* we need to keep enough input to show errors in context */ 241 #define LINE_LEN 80 242 243 #ifdef DEBUG_INPUT 244 #define CHECK_BUFFER(in) check_buffer(in) 245 246 static 247 void check_buffer(xmlParserInputPtr in) { 248 if (in->base != xmlBufContent(in->buf->buffer)) { 249 xmlGenericError(xmlGenericErrorContext, 250 "xmlParserInput: base mismatch problem\n"); 251 } 252 if (in->cur < in->base) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: cur < base problem\n"); 255 } 256 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur > base + use problem\n"); 259 } 260 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 261 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 262 xmlBufUse(in->buf->buffer)); 263 } 264 265 #else 266 #define CHECK_BUFFER(in) 267 #endif 268 269 270 /** 271 * xmlParserInputRead: 272 * @in: an XML parser input 273 * @len: an indicative size for the lookahead 274 * 275 * This function was internal and is deprecated. 276 * 277 * Returns -1 as this is an error to use it. 278 */ 279 int 280 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 281 return(-1); 282 } 283 284 /** 285 * xmlParserInputGrow: 286 * @in: an XML parser input 287 * @len: an indicative size for the lookahead 288 * 289 * This function increase the input for the parser. It tries to 290 * preserve pointers to the input buffer, and keep already read data 291 * 292 * Returns the amount of char read, or -1 in case of error, 0 indicate the 293 * end of this entity 294 */ 295 int 296 xmlParserInputGrow(xmlParserInputPtr in, int len) { 297 size_t ret; 298 size_t indx; 299 const xmlChar *content; 300 301 if ((in == NULL) || (len < 0)) return(-1); 302 #ifdef DEBUG_INPUT 303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 304 #endif 305 if (in->buf == NULL) return(-1); 306 if (in->base == NULL) return(-1); 307 if (in->cur == NULL) return(-1); 308 if (in->buf->buffer == NULL) return(-1); 309 310 CHECK_BUFFER(in); 311 312 indx = in->cur - in->base; 313 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 314 315 CHECK_BUFFER(in); 316 317 return(0); 318 } 319 if (in->buf->readcallback != NULL) { 320 ret = xmlParserInputBufferGrow(in->buf, len); 321 } else 322 return(0); 323 324 /* 325 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 326 * block, but we use it really as an integer to do some 327 * pointer arithmetic. Insure will raise it as a bug but in 328 * that specific case, that's not ! 329 */ 330 331 content = xmlBufContent(in->buf->buffer); 332 if (in->base != content) { 333 /* 334 * the buffer has been reallocated 335 */ 336 indx = in->cur - in->base; 337 in->base = content; 338 in->cur = &content[indx]; 339 } 340 in->end = xmlBufEnd(in->buf->buffer); 341 342 CHECK_BUFFER(in); 343 344 return(ret); 345 } 346 347 /** 348 * xmlParserInputShrink: 349 * @in: an XML parser input 350 * 351 * This function removes used input for the parser. 352 */ 353 void 354 xmlParserInputShrink(xmlParserInputPtr in) { 355 size_t used; 356 size_t ret; 357 size_t indx; 358 const xmlChar *content; 359 360 #ifdef DEBUG_INPUT 361 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 362 #endif 363 if (in == NULL) return; 364 if (in->buf == NULL) return; 365 if (in->base == NULL) return; 366 if (in->cur == NULL) return; 367 if (in->buf->buffer == NULL) return; 368 369 CHECK_BUFFER(in); 370 371 used = in->cur - xmlBufContent(in->buf->buffer); 372 /* 373 * Do not shrink on large buffers whose only a tiny fraction 374 * was consumed 375 */ 376 if (used > INPUT_CHUNK) { 377 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 378 if (ret > 0) { 379 in->cur -= ret; 380 in->consumed += ret; 381 } 382 in->end = xmlBufEnd(in->buf->buffer); 383 } 384 385 CHECK_BUFFER(in); 386 387 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 388 return; 389 } 390 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 391 content = xmlBufContent(in->buf->buffer); 392 if (in->base != content) { 393 /* 394 * the buffer has been reallocated 395 */ 396 indx = in->cur - in->base; 397 in->base = content; 398 in->cur = &content[indx]; 399 } 400 in->end = xmlBufEnd(in->buf->buffer); 401 402 CHECK_BUFFER(in); 403 } 404 405 /************************************************************************ 406 * * 407 * UTF8 character input and related functions * 408 * * 409 ************************************************************************/ 410 411 /** 412 * xmlNextChar: 413 * @ctxt: the XML parser context 414 * 415 * Skip to the next char input char. 416 */ 417 418 void 419 xmlNextChar(xmlParserCtxtPtr ctxt) 420 { 421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 422 (ctxt->input == NULL)) 423 return; 424 425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 426 if ((*ctxt->input->cur == 0) && 427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 428 (ctxt->instate != XML_PARSER_COMMENT)) { 429 /* 430 * If we are at the end of the current entity and 431 * the context allows it, we pop consumed entities 432 * automatically. 433 * the auto closing should be blocked in other cases 434 */ 435 xmlPopInput(ctxt); 436 } else { 437 const unsigned char *cur; 438 unsigned char c; 439 440 /* 441 * 2.11 End-of-Line Handling 442 * the literal two-character sequence "#xD#xA" or a standalone 443 * literal #xD, an XML processor must pass to the application 444 * the single character #xA. 445 */ 446 if (*(ctxt->input->cur) == '\n') { 447 ctxt->input->line++; ctxt->input->col = 1; 448 } else 449 ctxt->input->col++; 450 451 /* 452 * We are supposed to handle UTF8, check it's valid 453 * From rfc2044: encoding of the Unicode values on UTF-8: 454 * 455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 456 * 0000 0000-0000 007F 0xxxxxxx 457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 459 * 460 * Check for the 0x110000 limit too 461 */ 462 cur = ctxt->input->cur; 463 464 c = *cur; 465 if (c & 0x80) { 466 if (c == 0xC0) 467 goto encoding_error; 468 if (cur[1] == 0) { 469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 470 cur = ctxt->input->cur; 471 } 472 if ((cur[1] & 0xc0) != 0x80) 473 goto encoding_error; 474 if ((c & 0xe0) == 0xe0) { 475 unsigned int val; 476 477 if (cur[2] == 0) { 478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 479 cur = ctxt->input->cur; 480 } 481 if ((cur[2] & 0xc0) != 0x80) 482 goto encoding_error; 483 if ((c & 0xf0) == 0xf0) { 484 if (cur[3] == 0) { 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486 cur = ctxt->input->cur; 487 } 488 if (((c & 0xf8) != 0xf0) || 489 ((cur[3] & 0xc0) != 0x80)) 490 goto encoding_error; 491 /* 4-byte code */ 492 ctxt->input->cur += 4; 493 val = (cur[0] & 0x7) << 18; 494 val |= (cur[1] & 0x3f) << 12; 495 val |= (cur[2] & 0x3f) << 6; 496 val |= cur[3] & 0x3f; 497 } else { 498 /* 3-byte code */ 499 ctxt->input->cur += 3; 500 val = (cur[0] & 0xf) << 12; 501 val |= (cur[1] & 0x3f) << 6; 502 val |= cur[2] & 0x3f; 503 } 504 if (((val > 0xd7ff) && (val < 0xe000)) || 505 ((val > 0xfffd) && (val < 0x10000)) || 506 (val >= 0x110000)) { 507 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 508 "Char 0x%X out of allowed range\n", 509 val); 510 } 511 } else 512 /* 2-byte code */ 513 ctxt->input->cur += 2; 514 } else 515 /* 1-byte code */ 516 ctxt->input->cur++; 517 518 ctxt->nbChars++; 519 if (*ctxt->input->cur == 0) 520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 521 } 522 } else { 523 /* 524 * Assume it's a fixed length encoding (1) with 525 * a compatible encoding for the ASCII set, since 526 * XML constructs only use < 128 chars 527 */ 528 529 if (*(ctxt->input->cur) == '\n') { 530 ctxt->input->line++; ctxt->input->col = 1; 531 } else 532 ctxt->input->col++; 533 ctxt->input->cur++; 534 ctxt->nbChars++; 535 if (*ctxt->input->cur == 0) 536 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 537 } 538 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 539 xmlParserHandlePEReference(ctxt); 540 if ((*ctxt->input->cur == 0) && 541 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 542 xmlPopInput(ctxt); 543 return; 544 encoding_error: 545 /* 546 * If we detect an UTF8 error that probably mean that the 547 * input encoding didn't get properly advertised in the 548 * declaration header. Report the error and switch the encoding 549 * to ISO-Latin-1 (if you don't like this policy, just declare the 550 * encoding !) 551 */ 552 if ((ctxt == NULL) || (ctxt->input == NULL) || 553 (ctxt->input->end - ctxt->input->cur < 4)) { 554 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 555 "Input is not proper UTF-8, indicate encoding !\n", 556 NULL, NULL); 557 } else { 558 char buffer[150]; 559 560 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 561 ctxt->input->cur[0], ctxt->input->cur[1], 562 ctxt->input->cur[2], ctxt->input->cur[3]); 563 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 564 "Input is not proper UTF-8, indicate encoding !\n%s", 565 BAD_CAST buffer, NULL); 566 } 567 ctxt->charset = XML_CHAR_ENCODING_8859_1; 568 ctxt->input->cur++; 569 return; 570 } 571 572 /** 573 * xmlCurrentChar: 574 * @ctxt: the XML parser context 575 * @len: pointer to the length of the char read 576 * 577 * The current char value, if using UTF-8 this may actually span multiple 578 * bytes in the input buffer. Implement the end of line normalization: 579 * 2.11 End-of-Line Handling 580 * Wherever an external parsed entity or the literal entity value 581 * of an internal parsed entity contains either the literal two-character 582 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 583 * must pass to the application the single character #xA. 584 * This behavior can conveniently be produced by normalizing all 585 * line breaks to #xA on input, before parsing.) 586 * 587 * Returns the current char value and its length 588 */ 589 590 int 591 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 592 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 593 if (ctxt->instate == XML_PARSER_EOF) 594 return(0); 595 596 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 597 *len = 1; 598 return((int) *ctxt->input->cur); 599 } 600 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 601 /* 602 * We are supposed to handle UTF8, check it's valid 603 * From rfc2044: encoding of the Unicode values on UTF-8: 604 * 605 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 606 * 0000 0000-0000 007F 0xxxxxxx 607 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 608 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 609 * 610 * Check for the 0x110000 limit too 611 */ 612 const unsigned char *cur = ctxt->input->cur; 613 unsigned char c; 614 unsigned int val; 615 616 c = *cur; 617 if (c & 0x80) { 618 if (((c & 0x40) == 0) || (c == 0xC0)) 619 goto encoding_error; 620 if (cur[1] == 0) { 621 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 622 cur = ctxt->input->cur; 623 } 624 if ((cur[1] & 0xc0) != 0x80) 625 goto encoding_error; 626 if ((c & 0xe0) == 0xe0) { 627 if (cur[2] == 0) { 628 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 629 cur = ctxt->input->cur; 630 } 631 if ((cur[2] & 0xc0) != 0x80) 632 goto encoding_error; 633 if ((c & 0xf0) == 0xf0) { 634 if (cur[3] == 0) { 635 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 636 cur = ctxt->input->cur; 637 } 638 if (((c & 0xf8) != 0xf0) || 639 ((cur[3] & 0xc0) != 0x80)) 640 goto encoding_error; 641 /* 4-byte code */ 642 *len = 4; 643 val = (cur[0] & 0x7) << 18; 644 val |= (cur[1] & 0x3f) << 12; 645 val |= (cur[2] & 0x3f) << 6; 646 val |= cur[3] & 0x3f; 647 if (val < 0x10000) 648 goto encoding_error; 649 } else { 650 /* 3-byte code */ 651 *len = 3; 652 val = (cur[0] & 0xf) << 12; 653 val |= (cur[1] & 0x3f) << 6; 654 val |= cur[2] & 0x3f; 655 if (val < 0x800) 656 goto encoding_error; 657 } 658 } else { 659 /* 2-byte code */ 660 *len = 2; 661 val = (cur[0] & 0x1f) << 6; 662 val |= cur[1] & 0x3f; 663 if (val < 0x80) 664 goto encoding_error; 665 } 666 if (!IS_CHAR(val)) { 667 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 668 "Char 0x%X out of allowed range\n", val); 669 } 670 return(val); 671 } else { 672 /* 1-byte code */ 673 *len = 1; 674 if (*ctxt->input->cur == 0) 675 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 676 if ((*ctxt->input->cur == 0) && 677 (ctxt->input->end > ctxt->input->cur)) { 678 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 679 "Char 0x0 out of allowed range\n", 0); 680 } 681 if (*ctxt->input->cur == 0xD) { 682 if (ctxt->input->cur[1] == 0xA) { 683 ctxt->nbChars++; 684 ctxt->input->cur++; 685 } 686 return(0xA); 687 } 688 return((int) *ctxt->input->cur); 689 } 690 } 691 /* 692 * Assume it's a fixed length encoding (1) with 693 * a compatible encoding for the ASCII set, since 694 * XML constructs only use < 128 chars 695 */ 696 *len = 1; 697 if (*ctxt->input->cur == 0xD) { 698 if (ctxt->input->cur[1] == 0xA) { 699 ctxt->nbChars++; 700 ctxt->input->cur++; 701 } 702 return(0xA); 703 } 704 return((int) *ctxt->input->cur); 705 encoding_error: 706 /* 707 * An encoding problem may arise from a truncated input buffer 708 * splitting a character in the middle. In that case do not raise 709 * an error but return 0 to endicate an end of stream problem 710 */ 711 if (ctxt->input->end - ctxt->input->cur < 4) { 712 *len = 0; 713 return(0); 714 } 715 716 /* 717 * If we detect an UTF8 error that probably mean that the 718 * input encoding didn't get properly advertised in the 719 * declaration header. Report the error and switch the encoding 720 * to ISO-Latin-1 (if you don't like this policy, just declare the 721 * encoding !) 722 */ 723 { 724 char buffer[150]; 725 726 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 727 ctxt->input->cur[0], ctxt->input->cur[1], 728 ctxt->input->cur[2], ctxt->input->cur[3]); 729 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 730 "Input is not proper UTF-8, indicate encoding !\n%s", 731 BAD_CAST buffer, NULL); 732 } 733 ctxt->charset = XML_CHAR_ENCODING_8859_1; 734 *len = 1; 735 return((int) *ctxt->input->cur); 736 } 737 738 /** 739 * xmlStringCurrentChar: 740 * @ctxt: the XML parser context 741 * @cur: pointer to the beginning of the char 742 * @len: pointer to the length of the char read 743 * 744 * The current char value, if using UTF-8 this may actually span multiple 745 * bytes in the input buffer. 746 * 747 * Returns the current char value and its length 748 */ 749 750 int 751 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 752 { 753 if ((len == NULL) || (cur == NULL)) return(0); 754 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 755 /* 756 * We are supposed to handle UTF8, check it's valid 757 * From rfc2044: encoding of the Unicode values on UTF-8: 758 * 759 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 760 * 0000 0000-0000 007F 0xxxxxxx 761 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 762 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 763 * 764 * Check for the 0x110000 limit too 765 */ 766 unsigned char c; 767 unsigned int val; 768 769 c = *cur; 770 if (c & 0x80) { 771 if ((cur[1] & 0xc0) != 0x80) 772 goto encoding_error; 773 if ((c & 0xe0) == 0xe0) { 774 775 if ((cur[2] & 0xc0) != 0x80) 776 goto encoding_error; 777 if ((c & 0xf0) == 0xf0) { 778 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 779 goto encoding_error; 780 /* 4-byte code */ 781 *len = 4; 782 val = (cur[0] & 0x7) << 18; 783 val |= (cur[1] & 0x3f) << 12; 784 val |= (cur[2] & 0x3f) << 6; 785 val |= cur[3] & 0x3f; 786 } else { 787 /* 3-byte code */ 788 *len = 3; 789 val = (cur[0] & 0xf) << 12; 790 val |= (cur[1] & 0x3f) << 6; 791 val |= cur[2] & 0x3f; 792 } 793 } else { 794 /* 2-byte code */ 795 *len = 2; 796 val = (cur[0] & 0x1f) << 6; 797 val |= cur[1] & 0x3f; 798 } 799 if (!IS_CHAR(val)) { 800 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 801 "Char 0x%X out of allowed range\n", val); 802 } 803 return (val); 804 } else { 805 /* 1-byte code */ 806 *len = 1; 807 return ((int) *cur); 808 } 809 } 810 /* 811 * Assume it's a fixed length encoding (1) with 812 * a compatible encoding for the ASCII set, since 813 * XML constructs only use < 128 chars 814 */ 815 *len = 1; 816 return ((int) *cur); 817 encoding_error: 818 819 /* 820 * An encoding problem may arise from a truncated input buffer 821 * splitting a character in the middle. In that case do not raise 822 * an error but return 0 to endicate an end of stream problem 823 */ 824 if ((ctxt == NULL) || (ctxt->input == NULL) || 825 (ctxt->input->end - ctxt->input->cur < 4)) { 826 *len = 0; 827 return(0); 828 } 829 /* 830 * If we detect an UTF8 error that probably mean that the 831 * input encoding didn't get properly advertised in the 832 * declaration header. Report the error and switch the encoding 833 * to ISO-Latin-1 (if you don't like this policy, just declare the 834 * encoding !) 835 */ 836 { 837 char buffer[150]; 838 839 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 840 ctxt->input->cur[0], ctxt->input->cur[1], 841 ctxt->input->cur[2], ctxt->input->cur[3]); 842 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 843 "Input is not proper UTF-8, indicate encoding !\n%s", 844 BAD_CAST buffer, NULL); 845 } 846 *len = 1; 847 return ((int) *cur); 848 } 849 850 /** 851 * xmlCopyCharMultiByte: 852 * @out: pointer to an array of xmlChar 853 * @val: the char value 854 * 855 * append the char value in the array 856 * 857 * Returns the number of xmlChar written 858 */ 859 int 860 xmlCopyCharMultiByte(xmlChar *out, int val) { 861 if (out == NULL) return(0); 862 /* 863 * We are supposed to handle UTF8, check it's valid 864 * From rfc2044: encoding of the Unicode values on UTF-8: 865 * 866 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 867 * 0000 0000-0000 007F 0xxxxxxx 868 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 869 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 870 */ 871 if (val >= 0x80) { 872 xmlChar *savedout = out; 873 int bits; 874 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 875 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 876 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 877 else { 878 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 879 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 880 val); 881 return(0); 882 } 883 for ( ; bits >= 0; bits-= 6) 884 *out++= ((val >> bits) & 0x3F) | 0x80 ; 885 return (out - savedout); 886 } 887 *out = (xmlChar) val; 888 return 1; 889 } 890 891 /** 892 * xmlCopyChar: 893 * @len: Ignored, compatibility 894 * @out: pointer to an array of xmlChar 895 * @val: the char value 896 * 897 * append the char value in the array 898 * 899 * Returns the number of xmlChar written 900 */ 901 902 int 903 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 904 if (out == NULL) return(0); 905 /* the len parameter is ignored */ 906 if (val >= 0x80) { 907 return(xmlCopyCharMultiByte (out, val)); 908 } 909 *out = (xmlChar) val; 910 return 1; 911 } 912 913 /************************************************************************ 914 * * 915 * Commodity functions to switch encodings * 916 * * 917 ************************************************************************/ 918 919 static int 920 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 921 xmlCharEncodingHandlerPtr handler, int len); 922 static int 923 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 924 xmlCharEncodingHandlerPtr handler, int len); 925 /** 926 * xmlSwitchEncoding: 927 * @ctxt: the parser context 928 * @enc: the encoding value (number) 929 * 930 * change the input functions when discovering the character encoding 931 * of a given entity. 932 * 933 * Returns 0 in case of success, -1 otherwise 934 */ 935 int 936 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 937 { 938 xmlCharEncodingHandlerPtr handler; 939 int len = -1; 940 int ret; 941 942 if (ctxt == NULL) return(-1); 943 switch (enc) { 944 case XML_CHAR_ENCODING_ERROR: 945 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 946 "encoding unknown\n", NULL, NULL); 947 return(-1); 948 case XML_CHAR_ENCODING_NONE: 949 /* let's assume it's UTF-8 without the XML decl */ 950 ctxt->charset = XML_CHAR_ENCODING_UTF8; 951 return(0); 952 case XML_CHAR_ENCODING_UTF8: 953 /* default encoding, no conversion should be needed */ 954 ctxt->charset = XML_CHAR_ENCODING_UTF8; 955 956 /* 957 * Errata on XML-1.0 June 20 2001 958 * Specific handling of the Byte Order Mark for 959 * UTF-8 960 */ 961 if ((ctxt->input != NULL) && 962 (ctxt->input->cur[0] == 0xEF) && 963 (ctxt->input->cur[1] == 0xBB) && 964 (ctxt->input->cur[2] == 0xBF)) { 965 ctxt->input->cur += 3; 966 } 967 return(0); 968 case XML_CHAR_ENCODING_UTF16LE: 969 case XML_CHAR_ENCODING_UTF16BE: 970 /*The raw input characters are encoded 971 *in UTF-16. As we expect this function 972 *to be called after xmlCharEncInFunc, we expect 973 *ctxt->input->cur to contain UTF-8 encoded characters. 974 *So the raw UTF16 Byte Order Mark 975 *has also been converted into 976 *an UTF-8 BOM. Let's skip that BOM. 977 */ 978 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 979 (ctxt->input->cur[0] == 0xEF) && 980 (ctxt->input->cur[1] == 0xBB) && 981 (ctxt->input->cur[2] == 0xBF)) { 982 ctxt->input->cur += 3; 983 } 984 len = 90; 985 break; 986 case XML_CHAR_ENCODING_UCS2: 987 len = 90; 988 break; 989 case XML_CHAR_ENCODING_UCS4BE: 990 case XML_CHAR_ENCODING_UCS4LE: 991 case XML_CHAR_ENCODING_UCS4_2143: 992 case XML_CHAR_ENCODING_UCS4_3412: 993 len = 180; 994 break; 995 case XML_CHAR_ENCODING_EBCDIC: 996 case XML_CHAR_ENCODING_8859_1: 997 case XML_CHAR_ENCODING_8859_2: 998 case XML_CHAR_ENCODING_8859_3: 999 case XML_CHAR_ENCODING_8859_4: 1000 case XML_CHAR_ENCODING_8859_5: 1001 case XML_CHAR_ENCODING_8859_6: 1002 case XML_CHAR_ENCODING_8859_7: 1003 case XML_CHAR_ENCODING_8859_8: 1004 case XML_CHAR_ENCODING_8859_9: 1005 case XML_CHAR_ENCODING_ASCII: 1006 case XML_CHAR_ENCODING_2022_JP: 1007 case XML_CHAR_ENCODING_SHIFT_JIS: 1008 case XML_CHAR_ENCODING_EUC_JP: 1009 len = 45; 1010 break; 1011 } 1012 handler = xmlGetCharEncodingHandler(enc); 1013 if (handler == NULL) { 1014 /* 1015 * Default handlers. 1016 */ 1017 switch (enc) { 1018 case XML_CHAR_ENCODING_ASCII: 1019 /* default encoding, no conversion should be needed */ 1020 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1021 return(0); 1022 case XML_CHAR_ENCODING_UTF16LE: 1023 break; 1024 case XML_CHAR_ENCODING_UTF16BE: 1025 break; 1026 case XML_CHAR_ENCODING_UCS4LE: 1027 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1028 "encoding not supported %s\n", 1029 BAD_CAST "USC4 little endian", NULL); 1030 break; 1031 case XML_CHAR_ENCODING_UCS4BE: 1032 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1033 "encoding not supported %s\n", 1034 BAD_CAST "USC4 big endian", NULL); 1035 break; 1036 case XML_CHAR_ENCODING_EBCDIC: 1037 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1038 "encoding not supported %s\n", 1039 BAD_CAST "EBCDIC", NULL); 1040 break; 1041 case XML_CHAR_ENCODING_UCS4_2143: 1042 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1043 "encoding not supported %s\n", 1044 BAD_CAST "UCS4 2143", NULL); 1045 break; 1046 case XML_CHAR_ENCODING_UCS4_3412: 1047 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1048 "encoding not supported %s\n", 1049 BAD_CAST "UCS4 3412", NULL); 1050 break; 1051 case XML_CHAR_ENCODING_UCS2: 1052 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1053 "encoding not supported %s\n", 1054 BAD_CAST "UCS2", NULL); 1055 break; 1056 case XML_CHAR_ENCODING_8859_1: 1057 case XML_CHAR_ENCODING_8859_2: 1058 case XML_CHAR_ENCODING_8859_3: 1059 case XML_CHAR_ENCODING_8859_4: 1060 case XML_CHAR_ENCODING_8859_5: 1061 case XML_CHAR_ENCODING_8859_6: 1062 case XML_CHAR_ENCODING_8859_7: 1063 case XML_CHAR_ENCODING_8859_8: 1064 case XML_CHAR_ENCODING_8859_9: 1065 /* 1066 * We used to keep the internal content in the 1067 * document encoding however this turns being unmaintainable 1068 * So xmlGetCharEncodingHandler() will return non-null 1069 * values for this now. 1070 */ 1071 if ((ctxt->inputNr == 1) && 1072 (ctxt->encoding == NULL) && 1073 (ctxt->input != NULL) && 1074 (ctxt->input->encoding != NULL)) { 1075 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1076 } 1077 ctxt->charset = enc; 1078 return(0); 1079 case XML_CHAR_ENCODING_2022_JP: 1080 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1081 "encoding not supported %s\n", 1082 BAD_CAST "ISO-2022-JP", NULL); 1083 break; 1084 case XML_CHAR_ENCODING_SHIFT_JIS: 1085 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1086 "encoding not supported %s\n", 1087 BAD_CAST "Shift_JIS", NULL); 1088 break; 1089 case XML_CHAR_ENCODING_EUC_JP: 1090 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1091 "encoding not supported %s\n", 1092 BAD_CAST "EUC-JP", NULL); 1093 break; 1094 default: 1095 break; 1096 } 1097 } 1098 if (handler == NULL) 1099 return(-1); 1100 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1101 ret = xmlSwitchToEncodingInt(ctxt, handler, len); 1102 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { 1103 /* 1104 * on encoding conversion errors, stop the parser 1105 */ 1106 xmlStopParser(ctxt); 1107 ctxt->errNo = XML_I18N_CONV_FAILED; 1108 } 1109 return(ret); 1110 } 1111 1112 /** 1113 * xmlSwitchInputEncoding: 1114 * @ctxt: the parser context 1115 * @input: the input stream 1116 * @handler: the encoding handler 1117 * @len: the number of bytes to convert for the first line or -1 1118 * 1119 * change the input functions when discovering the character encoding 1120 * of a given entity. 1121 * 1122 * Returns 0 in case of success, -1 otherwise 1123 */ 1124 static int 1125 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1126 xmlCharEncodingHandlerPtr handler, int len) 1127 { 1128 int nbchars; 1129 1130 if (handler == NULL) 1131 return (-1); 1132 if (input == NULL) 1133 return (-1); 1134 if (input->buf != NULL) { 1135 if (input->buf->encoder != NULL) { 1136 /* 1137 * Check in case the auto encoding detetection triggered 1138 * in already. 1139 */ 1140 if (input->buf->encoder == handler) 1141 return (0); 1142 1143 /* 1144 * "UTF-16" can be used for both LE and BE 1145 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1146 BAD_CAST "UTF-16", 6)) && 1147 (!xmlStrncmp(BAD_CAST handler->name, 1148 BAD_CAST "UTF-16", 6))) { 1149 return(0); 1150 } 1151 */ 1152 1153 /* 1154 * Note: this is a bit dangerous, but that's what it 1155 * takes to use nearly compatible signature for different 1156 * encodings. 1157 */ 1158 xmlCharEncCloseFunc(input->buf->encoder); 1159 input->buf->encoder = handler; 1160 return (0); 1161 } 1162 input->buf->encoder = handler; 1163 1164 /* 1165 * Is there already some content down the pipe to convert ? 1166 */ 1167 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1168 int processed; 1169 unsigned int use; 1170 1171 /* 1172 * Specific handling of the Byte Order Mark for 1173 * UTF-16 1174 */ 1175 if ((handler->name != NULL) && 1176 (!strcmp(handler->name, "UTF-16LE") || 1177 !strcmp(handler->name, "UTF-16")) && 1178 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1179 input->cur += 2; 1180 } 1181 if ((handler->name != NULL) && 1182 (!strcmp(handler->name, "UTF-16BE")) && 1183 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1184 input->cur += 2; 1185 } 1186 /* 1187 * Errata on XML-1.0 June 20 2001 1188 * Specific handling of the Byte Order Mark for 1189 * UTF-8 1190 */ 1191 if ((handler->name != NULL) && 1192 (!strcmp(handler->name, "UTF-8")) && 1193 (input->cur[0] == 0xEF) && 1194 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1195 input->cur += 3; 1196 } 1197 1198 /* 1199 * Shrink the current input buffer. 1200 * Move it as the raw buffer and create a new input buffer 1201 */ 1202 processed = input->cur - input->base; 1203 xmlBufShrink(input->buf->buffer, processed); 1204 input->buf->raw = input->buf->buffer; 1205 input->buf->buffer = xmlBufCreate(); 1206 input->buf->rawconsumed = processed; 1207 use = xmlBufUse(input->buf->raw); 1208 1209 if (ctxt->html) { 1210 /* 1211 * convert as much as possible of the buffer 1212 */ 1213 nbchars = xmlCharEncInput(input->buf, 1); 1214 } else { 1215 /* 1216 * convert just enough to get 1217 * '<?xml version="1.0" encoding="xxx"?>' 1218 * parsed with the autodetected encoding 1219 * into the parser reading buffer. 1220 */ 1221 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1222 } 1223 if (nbchars < 0) { 1224 xmlErrInternal(ctxt, 1225 "switching encoding: encoder error\n", 1226 NULL); 1227 return (-1); 1228 } 1229 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1230 xmlBufResetInput(input->buf->buffer, input); 1231 } 1232 return (0); 1233 } else if (input->length == 0) { 1234 /* 1235 * When parsing a static memory array one must know the 1236 * size to be able to convert the buffer. 1237 */ 1238 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1239 return (-1); 1240 } 1241 return (0); 1242 } 1243 1244 /** 1245 * xmlSwitchInputEncoding: 1246 * @ctxt: the parser context 1247 * @input: the input stream 1248 * @handler: the encoding handler 1249 * 1250 * change the input functions when discovering the character encoding 1251 * of a given entity. 1252 * 1253 * Returns 0 in case of success, -1 otherwise 1254 */ 1255 int 1256 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1257 xmlCharEncodingHandlerPtr handler) { 1258 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1259 } 1260 1261 /** 1262 * xmlSwitchToEncodingInt: 1263 * @ctxt: the parser context 1264 * @handler: the encoding handler 1265 * @len: the length to convert or -1 1266 * 1267 * change the input functions when discovering the character encoding 1268 * of a given entity, and convert only @len bytes of the output, this 1269 * is needed on auto detect to allows any declared encoding later to 1270 * convert the actual content after the xmlDecl 1271 * 1272 * Returns 0 in case of success, -1 otherwise 1273 */ 1274 static int 1275 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1276 xmlCharEncodingHandlerPtr handler, int len) { 1277 int ret = 0; 1278 1279 if (handler != NULL) { 1280 if (ctxt->input != NULL) { 1281 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1282 } else { 1283 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1284 NULL); 1285 return(-1); 1286 } 1287 /* 1288 * The parsing is now done in UTF8 natively 1289 */ 1290 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1291 } else 1292 return(-1); 1293 return(ret); 1294 } 1295 1296 /** 1297 * xmlSwitchToEncoding: 1298 * @ctxt: the parser context 1299 * @handler: the encoding handler 1300 * 1301 * change the input functions when discovering the character encoding 1302 * of a given entity. 1303 * 1304 * Returns 0 in case of success, -1 otherwise 1305 */ 1306 int 1307 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1308 { 1309 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1310 } 1311 1312 /************************************************************************ 1313 * * 1314 * Commodity functions to handle entities processing * 1315 * * 1316 ************************************************************************/ 1317 1318 /** 1319 * xmlFreeInputStream: 1320 * @input: an xmlParserInputPtr 1321 * 1322 * Free up an input stream. 1323 */ 1324 void 1325 xmlFreeInputStream(xmlParserInputPtr input) { 1326 if (input == NULL) return; 1327 1328 if (input->filename != NULL) xmlFree((char *) input->filename); 1329 if (input->directory != NULL) xmlFree((char *) input->directory); 1330 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1331 if (input->version != NULL) xmlFree((char *) input->version); 1332 if ((input->free != NULL) && (input->base != NULL)) 1333 input->free((xmlChar *) input->base); 1334 if (input->buf != NULL) 1335 xmlFreeParserInputBuffer(input->buf); 1336 xmlFree(input); 1337 } 1338 1339 /** 1340 * xmlNewInputStream: 1341 * @ctxt: an XML parser context 1342 * 1343 * Create a new input stream structure. 1344 * 1345 * Returns the new input stream or NULL 1346 */ 1347 xmlParserInputPtr 1348 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1349 xmlParserInputPtr input; 1350 1351 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1352 if (input == NULL) { 1353 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1354 return(NULL); 1355 } 1356 memset(input, 0, sizeof(xmlParserInput)); 1357 input->line = 1; 1358 input->col = 1; 1359 input->standalone = -1; 1360 1361 /* 1362 * If the context is NULL the id cannot be initialized, but that 1363 * should not happen while parsing which is the situation where 1364 * the id is actually needed. 1365 */ 1366 if (ctxt != NULL) 1367 input->id = ctxt->input_id++; 1368 1369 return(input); 1370 } 1371 1372 /** 1373 * xmlNewIOInputStream: 1374 * @ctxt: an XML parser context 1375 * @input: an I/O Input 1376 * @enc: the charset encoding if known 1377 * 1378 * Create a new input stream structure encapsulating the @input into 1379 * a stream suitable for the parser. 1380 * 1381 * Returns the new input stream or NULL 1382 */ 1383 xmlParserInputPtr 1384 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1385 xmlCharEncoding enc) { 1386 xmlParserInputPtr inputStream; 1387 1388 if (input == NULL) return(NULL); 1389 if (xmlParserDebugEntities) 1390 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1391 inputStream = xmlNewInputStream(ctxt); 1392 if (inputStream == NULL) { 1393 return(NULL); 1394 } 1395 inputStream->filename = NULL; 1396 inputStream->buf = input; 1397 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1398 1399 if (enc != XML_CHAR_ENCODING_NONE) { 1400 xmlSwitchEncoding(ctxt, enc); 1401 } 1402 1403 return(inputStream); 1404 } 1405 1406 /** 1407 * xmlNewEntityInputStream: 1408 * @ctxt: an XML parser context 1409 * @entity: an Entity pointer 1410 * 1411 * Create a new input stream based on an xmlEntityPtr 1412 * 1413 * Returns the new input stream or NULL 1414 */ 1415 xmlParserInputPtr 1416 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1417 xmlParserInputPtr input; 1418 1419 if (entity == NULL) { 1420 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1421 NULL); 1422 return(NULL); 1423 } 1424 if (xmlParserDebugEntities) 1425 xmlGenericError(xmlGenericErrorContext, 1426 "new input from entity: %s\n", entity->name); 1427 if (entity->content == NULL) { 1428 switch (entity->etype) { 1429 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1430 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1431 entity->name); 1432 break; 1433 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1434 case XML_EXTERNAL_PARAMETER_ENTITY: 1435 return(xmlLoadExternalEntity((char *) entity->URI, 1436 (char *) entity->ExternalID, ctxt)); 1437 case XML_INTERNAL_GENERAL_ENTITY: 1438 xmlErrInternal(ctxt, 1439 "Internal entity %s without content !\n", 1440 entity->name); 1441 break; 1442 case XML_INTERNAL_PARAMETER_ENTITY: 1443 xmlErrInternal(ctxt, 1444 "Internal parameter entity %s without content !\n", 1445 entity->name); 1446 break; 1447 case XML_INTERNAL_PREDEFINED_ENTITY: 1448 xmlErrInternal(ctxt, 1449 "Predefined entity %s without content !\n", 1450 entity->name); 1451 break; 1452 } 1453 return(NULL); 1454 } 1455 input = xmlNewInputStream(ctxt); 1456 if (input == NULL) { 1457 return(NULL); 1458 } 1459 if (entity->URI != NULL) 1460 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1461 input->base = entity->content; 1462 if (entity->length == 0) 1463 entity->length = xmlStrlen(entity->content); 1464 input->cur = entity->content; 1465 input->length = entity->length; 1466 input->end = &entity->content[input->length]; 1467 return(input); 1468 } 1469 1470 /** 1471 * xmlNewStringInputStream: 1472 * @ctxt: an XML parser context 1473 * @buffer: an memory buffer 1474 * 1475 * Create a new input stream based on a memory buffer. 1476 * Returns the new input stream 1477 */ 1478 xmlParserInputPtr 1479 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1480 xmlParserInputPtr input; 1481 1482 if (buffer == NULL) { 1483 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1484 NULL); 1485 return(NULL); 1486 } 1487 if (xmlParserDebugEntities) 1488 xmlGenericError(xmlGenericErrorContext, 1489 "new fixed input: %.30s\n", buffer); 1490 input = xmlNewInputStream(ctxt); 1491 if (input == NULL) { 1492 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1493 return(NULL); 1494 } 1495 input->base = buffer; 1496 input->cur = buffer; 1497 input->length = xmlStrlen(buffer); 1498 input->end = &buffer[input->length]; 1499 return(input); 1500 } 1501 1502 /** 1503 * xmlNewInputFromFile: 1504 * @ctxt: an XML parser context 1505 * @filename: the filename to use as entity 1506 * 1507 * Create a new input stream based on a file or an URL. 1508 * 1509 * Returns the new input stream or NULL in case of error 1510 */ 1511 xmlParserInputPtr 1512 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1513 xmlParserInputBufferPtr buf; 1514 xmlParserInputPtr inputStream; 1515 char *directory = NULL; 1516 xmlChar *URI = NULL; 1517 1518 if (xmlParserDebugEntities) 1519 xmlGenericError(xmlGenericErrorContext, 1520 "new input from file: %s\n", filename); 1521 if (ctxt == NULL) return(NULL); 1522 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1523 if (buf == NULL) { 1524 if (filename == NULL) 1525 __xmlLoaderErr(ctxt, 1526 "failed to load external entity: NULL filename \n", 1527 NULL); 1528 else 1529 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1530 (const char *) filename); 1531 return(NULL); 1532 } 1533 1534 inputStream = xmlNewInputStream(ctxt); 1535 if (inputStream == NULL) 1536 return(NULL); 1537 1538 inputStream->buf = buf; 1539 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1540 if (inputStream == NULL) 1541 return(NULL); 1542 1543 if (inputStream->filename == NULL) 1544 URI = xmlStrdup((xmlChar *) filename); 1545 else 1546 URI = xmlStrdup((xmlChar *) inputStream->filename); 1547 directory = xmlParserGetDirectory((const char *) URI); 1548 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1549 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1550 if (URI != NULL) xmlFree((char *) URI); 1551 inputStream->directory = directory; 1552 1553 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1554 if ((ctxt->directory == NULL) && (directory != NULL)) 1555 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1556 return(inputStream); 1557 } 1558 1559 /************************************************************************ 1560 * * 1561 * Commodity functions to handle parser contexts * 1562 * * 1563 ************************************************************************/ 1564 1565 /** 1566 * xmlInitParserCtxt: 1567 * @ctxt: an XML parser context 1568 * 1569 * Initialize a parser context 1570 * 1571 * Returns 0 in case of success and -1 in case of error 1572 */ 1573 1574 int 1575 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1576 { 1577 xmlParserInputPtr input; 1578 1579 if(ctxt==NULL) { 1580 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1581 return(-1); 1582 } 1583 1584 xmlDefaultSAXHandlerInit(); 1585 1586 if (ctxt->dict == NULL) 1587 ctxt->dict = xmlDictCreate(); 1588 if (ctxt->dict == NULL) { 1589 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1590 return(-1); 1591 } 1592 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1593 1594 if (ctxt->sax == NULL) 1595 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1596 if (ctxt->sax == NULL) { 1597 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1598 return(-1); 1599 } 1600 else 1601 xmlSAXVersion(ctxt->sax, 2); 1602 1603 ctxt->maxatts = 0; 1604 ctxt->atts = NULL; 1605 /* Allocate the Input stack */ 1606 if (ctxt->inputTab == NULL) { 1607 ctxt->inputTab = (xmlParserInputPtr *) 1608 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1609 ctxt->inputMax = 5; 1610 } 1611 if (ctxt->inputTab == NULL) { 1612 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1613 ctxt->inputNr = 0; 1614 ctxt->inputMax = 0; 1615 ctxt->input = NULL; 1616 return(-1); 1617 } 1618 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1619 xmlFreeInputStream(input); 1620 } 1621 ctxt->inputNr = 0; 1622 ctxt->input = NULL; 1623 1624 ctxt->version = NULL; 1625 ctxt->encoding = NULL; 1626 ctxt->standalone = -1; 1627 ctxt->hasExternalSubset = 0; 1628 ctxt->hasPErefs = 0; 1629 ctxt->html = 0; 1630 ctxt->external = 0; 1631 ctxt->instate = XML_PARSER_START; 1632 ctxt->token = 0; 1633 ctxt->directory = NULL; 1634 1635 /* Allocate the Node stack */ 1636 if (ctxt->nodeTab == NULL) { 1637 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1638 ctxt->nodeMax = 10; 1639 } 1640 if (ctxt->nodeTab == NULL) { 1641 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1642 ctxt->nodeNr = 0; 1643 ctxt->nodeMax = 0; 1644 ctxt->node = NULL; 1645 ctxt->inputNr = 0; 1646 ctxt->inputMax = 0; 1647 ctxt->input = NULL; 1648 return(-1); 1649 } 1650 ctxt->nodeNr = 0; 1651 ctxt->node = NULL; 1652 1653 /* Allocate the Name stack */ 1654 if (ctxt->nameTab == NULL) { 1655 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1656 ctxt->nameMax = 10; 1657 } 1658 if (ctxt->nameTab == NULL) { 1659 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1660 ctxt->nodeNr = 0; 1661 ctxt->nodeMax = 0; 1662 ctxt->node = NULL; 1663 ctxt->inputNr = 0; 1664 ctxt->inputMax = 0; 1665 ctxt->input = NULL; 1666 ctxt->nameNr = 0; 1667 ctxt->nameMax = 0; 1668 ctxt->name = NULL; 1669 return(-1); 1670 } 1671 ctxt->nameNr = 0; 1672 ctxt->name = NULL; 1673 1674 /* Allocate the space stack */ 1675 if (ctxt->spaceTab == NULL) { 1676 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1677 ctxt->spaceMax = 10; 1678 } 1679 if (ctxt->spaceTab == NULL) { 1680 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1681 ctxt->nodeNr = 0; 1682 ctxt->nodeMax = 0; 1683 ctxt->node = NULL; 1684 ctxt->inputNr = 0; 1685 ctxt->inputMax = 0; 1686 ctxt->input = NULL; 1687 ctxt->nameNr = 0; 1688 ctxt->nameMax = 0; 1689 ctxt->name = NULL; 1690 ctxt->spaceNr = 0; 1691 ctxt->spaceMax = 0; 1692 ctxt->space = NULL; 1693 return(-1); 1694 } 1695 ctxt->spaceNr = 1; 1696 ctxt->spaceMax = 10; 1697 ctxt->spaceTab[0] = -1; 1698 ctxt->space = &ctxt->spaceTab[0]; 1699 ctxt->userData = ctxt; 1700 ctxt->myDoc = NULL; 1701 ctxt->wellFormed = 1; 1702 ctxt->nsWellFormed = 1; 1703 ctxt->valid = 1; 1704 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1705 if (ctxt->loadsubset) { 1706 ctxt->options |= XML_PARSE_DTDLOAD; 1707 } 1708 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1709 ctxt->pedantic = xmlPedanticParserDefaultValue; 1710 if (ctxt->pedantic) { 1711 ctxt->options |= XML_PARSE_PEDANTIC; 1712 } 1713 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1714 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1715 if (ctxt->keepBlanks == 0) { 1716 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1717 ctxt->options |= XML_PARSE_NOBLANKS; 1718 } 1719 1720 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1721 ctxt->vctxt.userData = ctxt; 1722 ctxt->vctxt.error = xmlParserValidityError; 1723 ctxt->vctxt.warning = xmlParserValidityWarning; 1724 if (ctxt->validate) { 1725 if (xmlGetWarningsDefaultValue == 0) 1726 ctxt->vctxt.warning = NULL; 1727 else 1728 ctxt->vctxt.warning = xmlParserValidityWarning; 1729 ctxt->vctxt.nodeMax = 0; 1730 ctxt->options |= XML_PARSE_DTDVALID; 1731 } 1732 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1733 if (ctxt->replaceEntities) { 1734 ctxt->options |= XML_PARSE_NOENT; 1735 } 1736 ctxt->record_info = 0; 1737 ctxt->nbChars = 0; 1738 ctxt->checkIndex = 0; 1739 ctxt->inSubset = 0; 1740 ctxt->errNo = XML_ERR_OK; 1741 ctxt->depth = 0; 1742 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1743 ctxt->catalogs = NULL; 1744 ctxt->nbentities = 0; 1745 ctxt->sizeentities = 0; 1746 ctxt->sizeentcopy = 0; 1747 ctxt->input_id = 1; 1748 xmlInitNodeInfoSeq(&ctxt->node_seq); 1749 return(0); 1750 } 1751 1752 /** 1753 * xmlFreeParserCtxt: 1754 * @ctxt: an XML parser context 1755 * 1756 * Free all the memory used by a parser context. However the parsed 1757 * document in ctxt->myDoc is not freed. 1758 */ 1759 1760 void 1761 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1762 { 1763 xmlParserInputPtr input; 1764 1765 if (ctxt == NULL) return; 1766 1767 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1768 xmlFreeInputStream(input); 1769 } 1770 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1771 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1772 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1773 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1774 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1775 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1776 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1777 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1778 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1779 #ifdef LIBXML_SAX1_ENABLED 1780 if ((ctxt->sax != NULL) && 1781 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1782 #else 1783 if (ctxt->sax != NULL) 1784 #endif /* LIBXML_SAX1_ENABLED */ 1785 xmlFree(ctxt->sax); 1786 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1787 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1788 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1789 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1790 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1791 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1792 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1793 if (ctxt->attsDefault != NULL) 1794 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1795 if (ctxt->attsSpecial != NULL) 1796 xmlHashFree(ctxt->attsSpecial, NULL); 1797 if (ctxt->freeElems != NULL) { 1798 xmlNodePtr cur, next; 1799 1800 cur = ctxt->freeElems; 1801 while (cur != NULL) { 1802 next = cur->next; 1803 xmlFree(cur); 1804 cur = next; 1805 } 1806 } 1807 if (ctxt->freeAttrs != NULL) { 1808 xmlAttrPtr cur, next; 1809 1810 cur = ctxt->freeAttrs; 1811 while (cur != NULL) { 1812 next = cur->next; 1813 xmlFree(cur); 1814 cur = next; 1815 } 1816 } 1817 /* 1818 * cleanup the error strings 1819 */ 1820 if (ctxt->lastError.message != NULL) 1821 xmlFree(ctxt->lastError.message); 1822 if (ctxt->lastError.file != NULL) 1823 xmlFree(ctxt->lastError.file); 1824 if (ctxt->lastError.str1 != NULL) 1825 xmlFree(ctxt->lastError.str1); 1826 if (ctxt->lastError.str2 != NULL) 1827 xmlFree(ctxt->lastError.str2); 1828 if (ctxt->lastError.str3 != NULL) 1829 xmlFree(ctxt->lastError.str3); 1830 1831 #ifdef LIBXML_CATALOG_ENABLED 1832 if (ctxt->catalogs != NULL) 1833 xmlCatalogFreeLocal(ctxt->catalogs); 1834 #endif 1835 xmlFree(ctxt); 1836 } 1837 1838 /** 1839 * xmlNewParserCtxt: 1840 * 1841 * Allocate and initialize a new parser context. 1842 * 1843 * Returns the xmlParserCtxtPtr or NULL 1844 */ 1845 1846 xmlParserCtxtPtr 1847 xmlNewParserCtxt(void) 1848 { 1849 xmlParserCtxtPtr ctxt; 1850 1851 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1852 if (ctxt == NULL) { 1853 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1854 return(NULL); 1855 } 1856 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1857 if (xmlInitParserCtxt(ctxt) < 0) { 1858 xmlFreeParserCtxt(ctxt); 1859 return(NULL); 1860 } 1861 return(ctxt); 1862 } 1863 1864 /************************************************************************ 1865 * * 1866 * Handling of node informations * 1867 * * 1868 ************************************************************************/ 1869 1870 /** 1871 * xmlClearParserCtxt: 1872 * @ctxt: an XML parser context 1873 * 1874 * Clear (release owned resources) and reinitialize a parser context 1875 */ 1876 1877 void 1878 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1879 { 1880 if (ctxt==NULL) 1881 return; 1882 xmlClearNodeInfoSeq(&ctxt->node_seq); 1883 xmlCtxtReset(ctxt); 1884 } 1885 1886 1887 /** 1888 * xmlParserFindNodeInfo: 1889 * @ctx: an XML parser context 1890 * @node: an XML node within the tree 1891 * 1892 * Find the parser node info struct for a given node 1893 * 1894 * Returns an xmlParserNodeInfo block pointer or NULL 1895 */ 1896 const xmlParserNodeInfo * 1897 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1898 { 1899 unsigned long pos; 1900 1901 if ((ctx == NULL) || (node == NULL)) 1902 return (NULL); 1903 /* Find position where node should be at */ 1904 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1905 if (pos < ctx->node_seq.length 1906 && ctx->node_seq.buffer[pos].node == node) 1907 return &ctx->node_seq.buffer[pos]; 1908 else 1909 return NULL; 1910 } 1911 1912 1913 /** 1914 * xmlInitNodeInfoSeq: 1915 * @seq: a node info sequence pointer 1916 * 1917 * -- Initialize (set to initial state) node info sequence 1918 */ 1919 void 1920 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1921 { 1922 if (seq == NULL) 1923 return; 1924 seq->length = 0; 1925 seq->maximum = 0; 1926 seq->buffer = NULL; 1927 } 1928 1929 /** 1930 * xmlClearNodeInfoSeq: 1931 * @seq: a node info sequence pointer 1932 * 1933 * -- Clear (release memory and reinitialize) node 1934 * info sequence 1935 */ 1936 void 1937 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1938 { 1939 if (seq == NULL) 1940 return; 1941 if (seq->buffer != NULL) 1942 xmlFree(seq->buffer); 1943 xmlInitNodeInfoSeq(seq); 1944 } 1945 1946 /** 1947 * xmlParserFindNodeInfoIndex: 1948 * @seq: a node info sequence pointer 1949 * @node: an XML node pointer 1950 * 1951 * 1952 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1953 * the given node is or should be at in a sorted sequence 1954 * 1955 * Returns a long indicating the position of the record 1956 */ 1957 unsigned long 1958 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1959 const xmlNodePtr node) 1960 { 1961 unsigned long upper, lower, middle; 1962 int found = 0; 1963 1964 if ((seq == NULL) || (node == NULL)) 1965 return ((unsigned long) -1); 1966 1967 /* Do a binary search for the key */ 1968 lower = 1; 1969 upper = seq->length; 1970 middle = 0; 1971 while (lower <= upper && !found) { 1972 middle = lower + (upper - lower) / 2; 1973 if (node == seq->buffer[middle - 1].node) 1974 found = 1; 1975 else if (node < seq->buffer[middle - 1].node) 1976 upper = middle - 1; 1977 else 1978 lower = middle + 1; 1979 } 1980 1981 /* Return position */ 1982 if (middle == 0 || seq->buffer[middle - 1].node < node) 1983 return middle; 1984 else 1985 return middle - 1; 1986 } 1987 1988 1989 /** 1990 * xmlParserAddNodeInfo: 1991 * @ctxt: an XML parser context 1992 * @info: a node info sequence pointer 1993 * 1994 * Insert node info record into the sorted sequence 1995 */ 1996 void 1997 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1998 const xmlParserNodeInfoPtr info) 1999 { 2000 unsigned long pos; 2001 2002 if ((ctxt == NULL) || (info == NULL)) return; 2003 2004 /* Find pos and check to see if node is already in the sequence */ 2005 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2006 info->node); 2007 2008 if ((pos < ctxt->node_seq.length) && 2009 (ctxt->node_seq.buffer != NULL) && 2010 (ctxt->node_seq.buffer[pos].node == info->node)) { 2011 ctxt->node_seq.buffer[pos] = *info; 2012 } 2013 2014 /* Otherwise, we need to add new node to buffer */ 2015 else { 2016 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2017 (ctxt->node_seq.buffer == NULL)) { 2018 xmlParserNodeInfo *tmp_buffer; 2019 unsigned int byte_size; 2020 2021 if (ctxt->node_seq.maximum == 0) 2022 ctxt->node_seq.maximum = 2; 2023 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2024 (2 * ctxt->node_seq.maximum)); 2025 2026 if (ctxt->node_seq.buffer == NULL) 2027 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2028 else 2029 tmp_buffer = 2030 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2031 byte_size); 2032 2033 if (tmp_buffer == NULL) { 2034 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2035 return; 2036 } 2037 ctxt->node_seq.buffer = tmp_buffer; 2038 ctxt->node_seq.maximum *= 2; 2039 } 2040 2041 /* If position is not at end, move elements out of the way */ 2042 if (pos != ctxt->node_seq.length) { 2043 unsigned long i; 2044 2045 for (i = ctxt->node_seq.length; i > pos; i--) 2046 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2047 } 2048 2049 /* Copy element and increase length */ 2050 ctxt->node_seq.buffer[pos] = *info; 2051 ctxt->node_seq.length++; 2052 } 2053 } 2054 2055 /************************************************************************ 2056 * * 2057 * Defaults settings * 2058 * * 2059 ************************************************************************/ 2060 /** 2061 * xmlPedanticParserDefault: 2062 * @val: int 0 or 1 2063 * 2064 * Set and return the previous value for enabling pedantic warnings. 2065 * 2066 * Returns the last value for 0 for no substitution, 1 for substitution. 2067 */ 2068 2069 int 2070 xmlPedanticParserDefault(int val) { 2071 int old = xmlPedanticParserDefaultValue; 2072 2073 xmlPedanticParserDefaultValue = val; 2074 return(old); 2075 } 2076 2077 /** 2078 * xmlLineNumbersDefault: 2079 * @val: int 0 or 1 2080 * 2081 * Set and return the previous value for enabling line numbers in elements 2082 * contents. This may break on old application and is turned off by default. 2083 * 2084 * Returns the last value for 0 for no substitution, 1 for substitution. 2085 */ 2086 2087 int 2088 xmlLineNumbersDefault(int val) { 2089 int old = xmlLineNumbersDefaultValue; 2090 2091 xmlLineNumbersDefaultValue = val; 2092 return(old); 2093 } 2094 2095 /** 2096 * xmlSubstituteEntitiesDefault: 2097 * @val: int 0 or 1 2098 * 2099 * Set and return the previous value for default entity support. 2100 * Initially the parser always keep entity references instead of substituting 2101 * entity values in the output. This function has to be used to change the 2102 * default parser behavior 2103 * SAX::substituteEntities() has to be used for changing that on a file by 2104 * file basis. 2105 * 2106 * Returns the last value for 0 for no substitution, 1 for substitution. 2107 */ 2108 2109 int 2110 xmlSubstituteEntitiesDefault(int val) { 2111 int old = xmlSubstituteEntitiesDefaultValue; 2112 2113 xmlSubstituteEntitiesDefaultValue = val; 2114 return(old); 2115 } 2116 2117 /** 2118 * xmlKeepBlanksDefault: 2119 * @val: int 0 or 1 2120 * 2121 * Set and return the previous value for default blanks text nodes support. 2122 * The 1.x version of the parser used an heuristic to try to detect 2123 * ignorable white spaces. As a result the SAX callback was generating 2124 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2125 * using the DOM output text nodes containing those blanks were not generated. 2126 * The 2.x and later version will switch to the XML standard way and 2127 * ignorableWhitespace() are only generated when running the parser in 2128 * validating mode and when the current element doesn't allow CDATA or 2129 * mixed content. 2130 * This function is provided as a way to force the standard behavior 2131 * on 1.X libs and to switch back to the old mode for compatibility when 2132 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2133 * by using xmlIsBlankNode() commodity function to detect the "empty" 2134 * nodes generated. 2135 * This value also affect autogeneration of indentation when saving code 2136 * if blanks sections are kept, indentation is not generated. 2137 * 2138 * Returns the last value for 0 for no substitution, 1 for substitution. 2139 */ 2140 2141 int 2142 xmlKeepBlanksDefault(int val) { 2143 int old = xmlKeepBlanksDefaultValue; 2144 2145 xmlKeepBlanksDefaultValue = val; 2146 if (!val) xmlIndentTreeOutput = 1; 2147 return(old); 2148 } 2149 2150 #define bottom_parserInternals 2151 #include "elfgcchack.h" 2152