1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd 2 See the file COPYING for copying permission. 3 4 runtest.c : run the Expat test suite 5 */ 6 7 #ifdef HAVE_EXPAT_CONFIG_H 8 #include <expat_config.h> 9 #endif 10 11 #include <assert.h> 12 #include <stdlib.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdint.h> 16 #include <stddef.h> /* ptrdiff_t */ 17 #ifndef __cplusplus 18 # include <stdbool.h> 19 #endif 20 21 #include "expat.h" 22 #include "chardata.h" 23 #include "internal.h" /* for UNUSED_P only */ 24 #include "minicheck.h" 25 26 #if defined(__amigaos__) && defined(__USE_INLINE__) 27 #include <proto/expat.h> 28 #endif 29 30 #ifdef XML_LARGE_SIZE 31 #define XML_FMT_INT_MOD "ll" 32 #else 33 #define XML_FMT_INT_MOD "l" 34 #endif 35 36 static XML_Parser parser; 37 38 39 static void 40 basic_setup(void) 41 { 42 parser = XML_ParserCreate(NULL); 43 if (parser == NULL) 44 fail("Parser not created."); 45 } 46 47 static void 48 basic_teardown(void) 49 { 50 if (parser != NULL) 51 XML_ParserFree(parser); 52 } 53 54 /* Generate a failure using the parser state to create an error message; 55 this should be used when the parser reports an error we weren't 56 expecting. 57 */ 58 static void 59 _xml_failure(XML_Parser parser, const char *file, int line) 60 { 61 char buffer[1024]; 62 enum XML_Error err = XML_GetErrorCode(parser); 63 sprintf(buffer, 64 " %d: %s (line %" XML_FMT_INT_MOD "u, offset %"\ 65 XML_FMT_INT_MOD "u)\n reported from %s, line %d\n", 66 err, 67 XML_ErrorString(err), 68 XML_GetCurrentLineNumber(parser), 69 XML_GetCurrentColumnNumber(parser), 70 file, line); 71 _fail_unless(0, file, line, buffer); 72 } 73 74 static enum XML_Status 75 _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal) 76 { 77 enum XML_Status res = XML_STATUS_ERROR; 78 int offset = 0; 79 80 if (len == 0) { 81 return XML_Parse(parser, s, len, isFinal); 82 } 83 84 for (; offset < len; offset++) { 85 const int innerIsFinal = (offset == len - 1) && isFinal; 86 const char c = s[offset]; /* to help out-of-bounds detection */ 87 res = XML_Parse(parser, &c, sizeof(char), innerIsFinal); 88 if (res != XML_STATUS_OK) { 89 return res; 90 } 91 } 92 return res; 93 } 94 95 #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__) 96 97 static void 98 _expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage, 99 const char *file, int lineno) 100 { 101 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) 102 /* Hackish use of _fail_unless() macro, but let's us report 103 the right filename and line number. */ 104 _fail_unless(0, file, lineno, errorMessage); 105 if (XML_GetErrorCode(parser) != errorCode) 106 _xml_failure(parser, file, lineno); 107 } 108 109 #define expect_failure(text, errorCode, errorMessage) \ 110 _expect_failure((text), (errorCode), (errorMessage), \ 111 __FILE__, __LINE__) 112 113 /* Dummy handlers for when we need to set a handler to tickle a bug, 114 but it doesn't need to do anything. 115 */ 116 117 static void XMLCALL 118 dummy_start_doctype_handler(void *UNUSED_P(userData), 119 const XML_Char *UNUSED_P(doctypeName), 120 const XML_Char *UNUSED_P(sysid), 121 const XML_Char *UNUSED_P(pubid), 122 int UNUSED_P(has_internal_subset)) 123 {} 124 125 static void XMLCALL 126 dummy_end_doctype_handler(void *UNUSED_P(userData)) 127 {} 128 129 static void XMLCALL 130 dummy_entity_decl_handler(void *UNUSED_P(userData), 131 const XML_Char *UNUSED_P(entityName), 132 int UNUSED_P(is_parameter_entity), 133 const XML_Char *UNUSED_P(value), 134 int UNUSED_P(value_length), 135 const XML_Char *UNUSED_P(base), 136 const XML_Char *UNUSED_P(systemId), 137 const XML_Char *UNUSED_P(publicId), 138 const XML_Char *UNUSED_P(notationName)) 139 {} 140 141 static void XMLCALL 142 dummy_notation_decl_handler(void *UNUSED_P(userData), 143 const XML_Char *UNUSED_P(notationName), 144 const XML_Char *UNUSED_P(base), 145 const XML_Char *UNUSED_P(systemId), 146 const XML_Char *UNUSED_P(publicId)) 147 {} 148 149 static void XMLCALL 150 dummy_element_decl_handler(void *UNUSED_P(userData), 151 const XML_Char *UNUSED_P(name), 152 XML_Content *UNUSED_P(model)) 153 {} 154 155 static void XMLCALL 156 dummy_attlist_decl_handler(void *UNUSED_P(userData), 157 const XML_Char *UNUSED_P(elname), 158 const XML_Char *UNUSED_P(attname), 159 const XML_Char *UNUSED_P(att_type), 160 const XML_Char *UNUSED_P(dflt), 161 int UNUSED_P(isrequired)) 162 {} 163 164 static void XMLCALL 165 dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data)) 166 {} 167 168 static void XMLCALL 169 dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data)) 170 {} 171 172 static void XMLCALL 173 dummy_start_element(void *UNUSED_P(userData), 174 const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) 175 {} 176 177 178 /* 179 * Character & encoding tests. 180 */ 181 182 START_TEST(test_nul_byte) 183 { 184 char text[] = "<doc>\0</doc>"; 185 186 /* test that a NUL byte (in US-ASCII data) is an error */ 187 if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK) 188 fail("Parser did not report error on NUL-byte."); 189 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) 190 xml_failure(parser); 191 } 192 END_TEST 193 194 195 START_TEST(test_u0000_char) 196 { 197 /* test that a NUL byte (in US-ASCII data) is an error */ 198 expect_failure("<doc>�</doc>", 199 XML_ERROR_BAD_CHAR_REF, 200 "Parser did not report error on NUL-byte."); 201 } 202 END_TEST 203 204 START_TEST(test_bom_utf8) 205 { 206 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 207 const char *text = "\357\273\277<e/>"; 208 209 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 210 xml_failure(parser); 211 } 212 END_TEST 213 214 START_TEST(test_bom_utf16_be) 215 { 216 char text[] = "\376\377\0<\0e\0/\0>"; 217 218 if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) 219 xml_failure(parser); 220 } 221 END_TEST 222 223 START_TEST(test_bom_utf16_le) 224 { 225 char text[] = "\377\376<\0e\0/\0>\0"; 226 227 if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) 228 xml_failure(parser); 229 } 230 END_TEST 231 232 static void XMLCALL 233 accumulate_characters(void *userData, const XML_Char *s, int len) 234 { 235 CharData_AppendXMLChars((CharData *)userData, s, len); 236 } 237 238 static void XMLCALL 239 accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name), 240 const XML_Char **atts) 241 { 242 CharData *storage = (CharData *)userData; 243 if (storage->count < 0 && atts != NULL && atts[0] != NULL) { 244 /* "accumulate" the value of the first attribute we see */ 245 CharData_AppendXMLChars(storage, atts[1], -1); 246 } 247 } 248 249 250 static void 251 _run_character_check(const XML_Char *text, const XML_Char *expected, 252 const char *file, int line) 253 { 254 CharData storage; 255 256 CharData_Init(&storage); 257 XML_SetUserData(parser, &storage); 258 XML_SetCharacterDataHandler(parser, accumulate_characters); 259 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 260 _xml_failure(parser, file, line); 261 CharData_CheckXMLChars(&storage, expected); 262 } 263 264 #define run_character_check(text, expected) \ 265 _run_character_check(text, expected, __FILE__, __LINE__) 266 267 static void 268 _run_attribute_check(const XML_Char *text, const XML_Char *expected, 269 const char *file, int line) 270 { 271 CharData storage; 272 273 CharData_Init(&storage); 274 XML_SetUserData(parser, &storage); 275 XML_SetStartElementHandler(parser, accumulate_attribute); 276 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 277 _xml_failure(parser, file, line); 278 CharData_CheckXMLChars(&storage, expected); 279 } 280 281 #define run_attribute_check(text, expected) \ 282 _run_attribute_check(text, expected, __FILE__, __LINE__) 283 284 /* Regression test for SF bug #491986. */ 285 START_TEST(test_danish_latin1) 286 { 287 const char *text = 288 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 289 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 290 run_character_check(text, 291 "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 292 } 293 END_TEST 294 295 296 /* Regression test for SF bug #514281. */ 297 START_TEST(test_french_charref_hexidecimal) 298 { 299 const char *text = 300 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 301 "<doc>éèàçêÈ</doc>"; 302 run_character_check(text, 303 "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 304 } 305 END_TEST 306 307 START_TEST(test_french_charref_decimal) 308 { 309 const char *text = 310 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 311 "<doc>éèàçêÈ</doc>"; 312 run_character_check(text, 313 "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 314 } 315 END_TEST 316 317 START_TEST(test_french_latin1) 318 { 319 const char *text = 320 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 321 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 322 run_character_check(text, 323 "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 324 } 325 END_TEST 326 327 START_TEST(test_french_utf8) 328 { 329 const char *text = 330 "<?xml version='1.0' encoding='utf-8'?>\n" 331 "<doc>\xC3\xA9</doc>"; 332 run_character_check(text, "\xC3\xA9"); 333 } 334 END_TEST 335 336 /* Regression test for SF bug #600479. 337 XXX There should be a test that exercises all legal XML Unicode 338 characters as PCDATA and attribute value content, and XML Name 339 characters as part of element and attribute names. 340 */ 341 START_TEST(test_utf8_false_rejection) 342 { 343 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 344 run_character_check(text, "\xEF\xBA\xBF"); 345 } 346 END_TEST 347 348 /* Regression test for SF bug #477667. 349 This test assures that any 8-bit character followed by a 7-bit 350 character will not be mistakenly interpreted as a valid UTF-8 351 sequence. 352 */ 353 START_TEST(test_illegal_utf8) 354 { 355 char text[100]; 356 int i; 357 358 for (i = 128; i <= 255; ++i) { 359 sprintf(text, "<e>%ccd</e>", i); 360 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) { 361 sprintf(text, 362 "expected token error for '%c' (ordinal %d) in UTF-8 text", 363 i, i); 364 fail(text); 365 } 366 else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) 367 xml_failure(parser); 368 /* Reset the parser since we use the same parser repeatedly. */ 369 XML_ParserReset(parser, NULL); 370 } 371 } 372 END_TEST 373 374 375 /* Examples, not masks: */ 376 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 377 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 378 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 379 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 380 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 381 382 START_TEST(test_utf8_auto_align) 383 { 384 struct TestCase { 385 ptrdiff_t expectedMovementInChars; 386 const char * input; 387 }; 388 389 struct TestCase cases[] = { 390 {00, ""}, 391 392 {00, UTF8_LEAD_1}, 393 394 {-1, UTF8_LEAD_2}, 395 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 396 397 {-1, UTF8_LEAD_3}, 398 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 399 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 400 401 {-1, UTF8_LEAD_4}, 402 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 403 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 404 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 405 }; 406 407 size_t i = 0; 408 bool success = true; 409 for (; i < sizeof(cases) / sizeof(*cases); i++) { 410 const char * fromLim = cases[i].input + strlen(cases[i].input); 411 const char * const fromLimInitially = fromLim; 412 ptrdiff_t actualMovementInChars; 413 414 align_limit_to_full_utf8_characters(cases[i].input, &fromLim); 415 416 actualMovementInChars = (fromLim - fromLimInitially); 417 if (actualMovementInChars != cases[i].expectedMovementInChars) { 418 size_t j = 0; 419 success = false; 420 printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars" 421 ", actually moved by %2ld chars: \"", 422 i + 1, cases[i].expectedMovementInChars, actualMovementInChars); 423 for (; j < strlen(cases[i].input); j++) { 424 printf("\\x%02x", (unsigned char)cases[i].input[j]); 425 } 426 printf("\"\n"); 427 } 428 } 429 430 if (! success) { 431 fail("UTF-8 auto-alignment is not bullet-proof\n"); 432 } 433 } 434 END_TEST 435 436 START_TEST(test_utf16) 437 { 438 /* <?xml version="1.0" encoding="UTF-16"?> 439 <doc a='123'>some text</doc> 440 */ 441 char text[] = 442 "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 443 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 444 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 445 "\000'\000?\000>\000\n" 446 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'" 447 "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/" 448 "\000d\000o\000c\000>"; 449 if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) 450 xml_failure(parser); 451 } 452 END_TEST 453 454 START_TEST(test_utf16_le_epilog_newline) 455 { 456 unsigned int first_chunk_bytes = 17; 457 char text[] = 458 "\xFF\xFE" /* BOM */ 459 "<\000e\000/\000>\000" /* document element */ 460 "\r\000\n\000\r\000\n\000"; /* epilog */ 461 462 if (first_chunk_bytes >= sizeof(text) - 1) 463 fail("bad value of first_chunk_bytes"); 464 if ( _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE) 465 == XML_STATUS_ERROR) 466 xml_failure(parser); 467 else { 468 enum XML_Status rc; 469 rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes, 470 sizeof(text) - first_chunk_bytes - 1, XML_TRUE); 471 if (rc == XML_STATUS_ERROR) 472 xml_failure(parser); 473 } 474 } 475 END_TEST 476 477 /* Regression test for SF bug #481609, #774028. */ 478 START_TEST(test_latin1_umlauts) 479 { 480 const char *text = 481 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 482 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 483 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 484 const char *utf8 = 485 "\xC3\xA4 \xC3\xB6 \xC3\xBC " 486 "\xC3\xA4 \xC3\xB6 \xC3\xBC " 487 "\xC3\xA4 \xC3\xB6 \xC3\xBC >"; 488 run_character_check(text, utf8); 489 XML_ParserReset(parser, NULL); 490 run_attribute_check(text, utf8); 491 } 492 END_TEST 493 494 /* Regression test #1 for SF bug #653180. */ 495 START_TEST(test_line_number_after_parse) 496 { 497 const char *text = 498 "<tag>\n" 499 "\n" 500 "\n</tag>"; 501 XML_Size lineno; 502 503 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR) 504 xml_failure(parser); 505 lineno = XML_GetCurrentLineNumber(parser); 506 if (lineno != 4) { 507 char buffer[100]; 508 sprintf(buffer, 509 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 510 fail(buffer); 511 } 512 } 513 END_TEST 514 515 /* Regression test #2 for SF bug #653180. */ 516 START_TEST(test_column_number_after_parse) 517 { 518 const char *text = "<tag></tag>"; 519 XML_Size colno; 520 521 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR) 522 xml_failure(parser); 523 colno = XML_GetCurrentColumnNumber(parser); 524 if (colno != 11) { 525 char buffer[100]; 526 sprintf(buffer, 527 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 528 fail(buffer); 529 } 530 } 531 END_TEST 532 533 static void XMLCALL 534 start_element_event_handler2(void *userData, const XML_Char *name, 535 const XML_Char **UNUSED_P(attr)) 536 { 537 CharData *storage = (CharData *) userData; 538 char buffer[100]; 539 540 sprintf(buffer, 541 "<%s> at col:%" XML_FMT_INT_MOD "u line:%"\ 542 XML_FMT_INT_MOD "u\n", name, 543 XML_GetCurrentColumnNumber(parser), 544 XML_GetCurrentLineNumber(parser)); 545 CharData_AppendString(storage, buffer); 546 } 547 548 static void XMLCALL 549 end_element_event_handler2(void *userData, const XML_Char *name) 550 { 551 CharData *storage = (CharData *) userData; 552 char buffer[100]; 553 554 sprintf(buffer, 555 "</%s> at col:%" XML_FMT_INT_MOD "u line:%"\ 556 XML_FMT_INT_MOD "u\n", name, 557 XML_GetCurrentColumnNumber(parser), 558 XML_GetCurrentLineNumber(parser)); 559 CharData_AppendString(storage, buffer); 560 } 561 562 /* Regression test #3 for SF bug #653180. */ 563 START_TEST(test_line_and_column_numbers_inside_handlers) 564 { 565 const char *text = 566 "<a>\n" /* Unix end-of-line */ 567 " <b>\r\n" /* Windows end-of-line */ 568 " <c/>\r" /* Mac OS end-of-line */ 569 " </b>\n" 570 " <d>\n" 571 " <f/>\n" 572 " </d>\n" 573 "</a>"; 574 const char *expected = 575 "<a> at col:0 line:1\n" 576 "<b> at col:2 line:2\n" 577 "<c> at col:4 line:3\n" 578 "</c> at col:8 line:3\n" 579 "</b> at col:2 line:4\n" 580 "<d> at col:2 line:5\n" 581 "<f> at col:4 line:6\n" 582 "</f> at col:8 line:6\n" 583 "</d> at col:2 line:7\n" 584 "</a> at col:0 line:8\n"; 585 CharData storage; 586 587 CharData_Init(&storage); 588 XML_SetUserData(parser, &storage); 589 XML_SetStartElementHandler(parser, start_element_event_handler2); 590 XML_SetEndElementHandler(parser, end_element_event_handler2); 591 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 592 xml_failure(parser); 593 594 CharData_CheckString(&storage, expected); 595 } 596 END_TEST 597 598 /* Regression test #4 for SF bug #653180. */ 599 START_TEST(test_line_number_after_error) 600 { 601 const char *text = 602 "<a>\n" 603 " <b>\n" 604 " </a>"; /* missing </b> */ 605 XML_Size lineno; 606 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR) 607 fail("Expected a parse error"); 608 609 lineno = XML_GetCurrentLineNumber(parser); 610 if (lineno != 3) { 611 char buffer[100]; 612 sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 613 fail(buffer); 614 } 615 } 616 END_TEST 617 618 /* Regression test #5 for SF bug #653180. */ 619 START_TEST(test_column_number_after_error) 620 { 621 const char *text = 622 "<a>\n" 623 " <b>\n" 624 " </a>"; /* missing </b> */ 625 XML_Size colno; 626 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR) 627 fail("Expected a parse error"); 628 629 colno = XML_GetCurrentColumnNumber(parser); 630 if (colno != 4) { 631 char buffer[100]; 632 sprintf(buffer, 633 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 634 fail(buffer); 635 } 636 } 637 END_TEST 638 639 /* Regression test for SF bug #478332. */ 640 START_TEST(test_really_long_lines) 641 { 642 /* This parses an input line longer than INIT_DATA_BUF_SIZE 643 characters long (defined to be 1024 in xmlparse.c). We take a 644 really cheesy approach to building the input buffer, because 645 this avoids writing bugs in buffer-filling code. 646 */ 647 const char *text = 648 "<e>" 649 /* 64 chars */ 650 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 651 /* until we have at least 1024 characters on the line: */ 652 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 653 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 654 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 655 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 656 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 657 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 658 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 659 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 660 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 661 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 662 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 663 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 664 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 665 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 666 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 667 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 668 "</e>"; 669 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 670 xml_failure(parser); 671 } 672 END_TEST 673 674 675 /* 676 * Element event tests. 677 */ 678 679 static void XMLCALL 680 end_element_event_handler(void *userData, const XML_Char *name) 681 { 682 CharData *storage = (CharData *) userData; 683 CharData_AppendString(storage, "/"); 684 CharData_AppendXMLChars(storage, name, -1); 685 } 686 687 START_TEST(test_end_element_events) 688 { 689 const char *text = "<a><b><c/></b><d><f/></d></a>"; 690 const char *expected = "/c/b/f/d/a"; 691 CharData storage; 692 693 CharData_Init(&storage); 694 XML_SetUserData(parser, &storage); 695 XML_SetEndElementHandler(parser, end_element_event_handler); 696 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 697 xml_failure(parser); 698 CharData_CheckString(&storage, expected); 699 } 700 END_TEST 701 702 703 /* 704 * Attribute tests. 705 */ 706 707 /* Helpers used by the following test; this checks any "attr" and "refs" 708 attributes to make sure whitespace has been normalized. 709 710 Return true if whitespace has been normalized in a string, using 711 the rules for attribute value normalization. The 'is_cdata' flag 712 is needed since CDATA attributes don't need to have multiple 713 whitespace characters collapsed to a single space, while other 714 attribute data types do. (Section 3.3.3 of the recommendation.) 715 */ 716 static int 717 is_whitespace_normalized(const XML_Char *s, int is_cdata) 718 { 719 int blanks = 0; 720 int at_start = 1; 721 while (*s) { 722 if (*s == ' ') 723 ++blanks; 724 else if (*s == '\t' || *s == '\n' || *s == '\r') 725 return 0; 726 else { 727 if (at_start) { 728 at_start = 0; 729 if (blanks && !is_cdata) 730 /* illegal leading blanks */ 731 return 0; 732 } 733 else if (blanks > 1 && !is_cdata) 734 return 0; 735 blanks = 0; 736 } 737 ++s; 738 } 739 if (blanks && !is_cdata) 740 return 0; 741 return 1; 742 } 743 744 /* Check the attribute whitespace checker: */ 745 static void 746 testhelper_is_whitespace_normalized(void) 747 { 748 assert(is_whitespace_normalized("abc", 0)); 749 assert(is_whitespace_normalized("abc", 1)); 750 assert(is_whitespace_normalized("abc def ghi", 0)); 751 assert(is_whitespace_normalized("abc def ghi", 1)); 752 assert(!is_whitespace_normalized(" abc def ghi", 0)); 753 assert(is_whitespace_normalized(" abc def ghi", 1)); 754 assert(!is_whitespace_normalized("abc def ghi", 0)); 755 assert(is_whitespace_normalized("abc def ghi", 1)); 756 assert(!is_whitespace_normalized("abc def ghi ", 0)); 757 assert(is_whitespace_normalized("abc def ghi ", 1)); 758 assert(!is_whitespace_normalized(" ", 0)); 759 assert(is_whitespace_normalized(" ", 1)); 760 assert(!is_whitespace_normalized("\t", 0)); 761 assert(!is_whitespace_normalized("\t", 1)); 762 assert(!is_whitespace_normalized("\n", 0)); 763 assert(!is_whitespace_normalized("\n", 1)); 764 assert(!is_whitespace_normalized("\r", 0)); 765 assert(!is_whitespace_normalized("\r", 1)); 766 assert(!is_whitespace_normalized("abc\t def", 1)); 767 } 768 769 static void XMLCALL 770 check_attr_contains_normalized_whitespace(void *UNUSED_P(userData), 771 const XML_Char *UNUSED_P(name), 772 const XML_Char **atts) 773 { 774 int i; 775 for (i = 0; atts[i] != NULL; i += 2) { 776 const XML_Char *attrname = atts[i]; 777 const XML_Char *value = atts[i + 1]; 778 if (strcmp("attr", attrname) == 0 779 || strcmp("ents", attrname) == 0 780 || strcmp("refs", attrname) == 0) { 781 if (!is_whitespace_normalized(value, 0)) { 782 char buffer[256]; 783 sprintf(buffer, "attribute value not normalized: %s='%s'", 784 attrname, value); 785 fail(buffer); 786 } 787 } 788 } 789 } 790 791 START_TEST(test_attr_whitespace_normalization) 792 { 793 const char *text = 794 "<!DOCTYPE doc [\n" 795 " <!ATTLIST doc\n" 796 " attr NMTOKENS #REQUIRED\n" 797 " ents ENTITIES #REQUIRED\n" 798 " refs IDREFS #REQUIRED>\n" 799 "]>\n" 800 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 801 " ents=' ent-1 \t\r\n" 802 " ent-2 ' >\n" 803 " <e id='id-1'/>\n" 804 " <e id='id-2'/>\n" 805 "</doc>"; 806 807 XML_SetStartElementHandler(parser, 808 check_attr_contains_normalized_whitespace); 809 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 810 xml_failure(parser); 811 } 812 END_TEST 813 814 815 /* 816 * XML declaration tests. 817 */ 818 819 START_TEST(test_xmldecl_misplaced) 820 { 821 expect_failure("\n" 822 "<?xml version='1.0'?>\n" 823 "<a/>", 824 XML_ERROR_MISPLACED_XML_PI, 825 "failed to report misplaced XML declaration"); 826 } 827 END_TEST 828 829 /* Regression test for SF bug #584832. */ 830 static int XMLCALL 831 UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info) 832 { 833 if (strcmp(encoding,"unsupported-encoding") == 0) { 834 int i; 835 for (i = 0; i < 256; ++i) 836 info->map[i] = i; 837 info->data = NULL; 838 info->convert = NULL; 839 info->release = NULL; 840 return XML_STATUS_OK; 841 } 842 return XML_STATUS_ERROR; 843 } 844 845 START_TEST(test_unknown_encoding_internal_entity) 846 { 847 const char *text = 848 "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 849 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 850 "<test a='&foo;'/>"; 851 852 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL); 853 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 854 xml_failure(parser); 855 } 856 END_TEST 857 858 /* Regression test for SF bug #620106. */ 859 static int XMLCALL 860 external_entity_loader_set_encoding(XML_Parser parser, 861 const XML_Char *context, 862 const XML_Char *UNUSED_P(base), 863 const XML_Char *UNUSED_P(systemId), 864 const XML_Char *UNUSED_P(publicId)) 865 { 866 /* This text says it's an unsupported encoding, but it's really 867 UTF-8, which we tell Expat using XML_SetEncoding(). 868 */ 869 const char *text = 870 "<?xml encoding='iso-8859-3'?>" 871 "\xC3\xA9"; 872 XML_Parser extparser; 873 874 extparser = XML_ExternalEntityParserCreate(parser, context, NULL); 875 if (extparser == NULL) 876 fail("Could not create external entity parser."); 877 if (!XML_SetEncoding(extparser, "utf-8")) 878 fail("XML_SetEncoding() ignored for external entity"); 879 if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) 880 == XML_STATUS_ERROR) { 881 xml_failure(parser); 882 return 0; 883 } 884 return 1; 885 } 886 887 START_TEST(test_ext_entity_set_encoding) 888 { 889 const char *text = 890 "<!DOCTYPE doc [\n" 891 " <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n" 892 "]>\n" 893 "<doc>&en;</doc>"; 894 895 XML_SetExternalEntityRefHandler(parser, 896 external_entity_loader_set_encoding); 897 run_character_check(text, "\xC3\xA9"); 898 } 899 END_TEST 900 901 /* Test that no error is reported for unknown entities if we don't 902 read an external subset. This was fixed in Expat 1.95.5. 903 */ 904 START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 905 const char *text = 906 "<!DOCTYPE doc SYSTEM 'foo'>\n" 907 "<doc>&entity;</doc>"; 908 909 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 910 xml_failure(parser); 911 } 912 END_TEST 913 914 /* Test that an error is reported for unknown entities if we don't 915 have an external subset. 916 */ 917 START_TEST(test_wfc_undeclared_entity_no_external_subset) { 918 expect_failure("<doc>&entity;</doc>", 919 XML_ERROR_UNDEFINED_ENTITY, 920 "Parser did not report undefined entity w/out a DTD."); 921 } 922 END_TEST 923 924 /* Test that an error is reported for unknown entities if we don't 925 read an external subset, but have been declared standalone. 926 */ 927 START_TEST(test_wfc_undeclared_entity_standalone) { 928 const char *text = 929 "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 930 "<!DOCTYPE doc SYSTEM 'foo'>\n" 931 "<doc>&entity;</doc>"; 932 933 expect_failure(text, 934 XML_ERROR_UNDEFINED_ENTITY, 935 "Parser did not report undefined entity (standalone)."); 936 } 937 END_TEST 938 939 static int XMLCALL 940 external_entity_loader(XML_Parser parser, 941 const XML_Char *context, 942 const XML_Char *UNUSED_P(base), 943 const XML_Char *UNUSED_P(systemId), 944 const XML_Char *UNUSED_P(publicId)) 945 { 946 char *text = (char *)XML_GetUserData(parser); 947 XML_Parser extparser; 948 949 extparser = XML_ExternalEntityParserCreate(parser, context, NULL); 950 if (extparser == NULL) 951 fail("Could not create external entity parser."); 952 if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) 953 == XML_STATUS_ERROR) { 954 xml_failure(parser); 955 return XML_STATUS_ERROR; 956 } 957 return XML_STATUS_OK; 958 } 959 960 /* Test that an error is reported for unknown entities if we have read 961 an external subset, and standalone is true. 962 */ 963 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 964 const char *text = 965 "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 966 "<!DOCTYPE doc SYSTEM 'foo'>\n" 967 "<doc>&entity;</doc>"; 968 char foo_text[] = 969 "<!ELEMENT doc (#PCDATA)*>"; 970 971 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 972 XML_SetUserData(parser, foo_text); 973 XML_SetExternalEntityRefHandler(parser, external_entity_loader); 974 expect_failure(text, 975 XML_ERROR_UNDEFINED_ENTITY, 976 "Parser did not report undefined entity (external DTD)."); 977 } 978 END_TEST 979 980 /* Test that no error is reported for unknown entities if we have read 981 an external subset, and standalone is false. 982 */ 983 START_TEST(test_wfc_undeclared_entity_with_external_subset) { 984 const char *text = 985 "<?xml version='1.0' encoding='us-ascii'?>\n" 986 "<!DOCTYPE doc SYSTEM 'foo'>\n" 987 "<doc>&entity;</doc>"; 988 char foo_text[] = 989 "<!ELEMENT doc (#PCDATA)*>"; 990 991 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 992 XML_SetUserData(parser, foo_text); 993 XML_SetExternalEntityRefHandler(parser, external_entity_loader); 994 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 995 xml_failure(parser); 996 } 997 END_TEST 998 999 START_TEST(test_wfc_no_recursive_entity_refs) 1000 { 1001 const char *text = 1002 "<!DOCTYPE doc [\n" 1003 " <!ENTITY entity '&entity;'>\n" 1004 "]>\n" 1005 "<doc>&entity;</doc>"; 1006 1007 expect_failure(text, 1008 XML_ERROR_RECURSIVE_ENTITY_REF, 1009 "Parser did not report recursive entity reference."); 1010 } 1011 END_TEST 1012 1013 /* Regression test for SF bug #483514. */ 1014 START_TEST(test_dtd_default_handling) 1015 { 1016 const char *text = 1017 "<!DOCTYPE doc [\n" 1018 "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n" 1019 "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n" 1020 "<!ELEMENT doc EMPTY>\n" 1021 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1022 "<?pi in dtd?>\n" 1023 "<!--comment in dtd-->\n" 1024 "]><doc/>"; 1025 1026 XML_SetDefaultHandler(parser, accumulate_characters); 1027 XML_SetDoctypeDeclHandler(parser, 1028 dummy_start_doctype_handler, 1029 dummy_end_doctype_handler); 1030 XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); 1031 XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); 1032 XML_SetElementDeclHandler(parser, dummy_element_decl_handler); 1033 XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); 1034 XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); 1035 XML_SetCommentHandler(parser, dummy_comment_handler); 1036 run_character_check(text, "\n\n\n\n\n\n\n<doc/>"); 1037 } 1038 END_TEST 1039 1040 /* See related SF bug #673791. 1041 When namespace processing is enabled, setting the namespace URI for 1042 a prefix is not allowed; this test ensures that it *is* allowed 1043 when namespace processing is not enabled. 1044 (See Namespaces in XML, section 2.) 1045 */ 1046 START_TEST(test_empty_ns_without_namespaces) 1047 { 1048 const char *text = 1049 "<doc xmlns:prefix='http://www.example.com/'>\n" 1050 " <e xmlns:prefix=''/>\n" 1051 "</doc>"; 1052 1053 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1054 xml_failure(parser); 1055 } 1056 END_TEST 1057 1058 /* Regression test for SF bug #824420. 1059 Checks that an xmlns:prefix attribute set in an attribute's default 1060 value isn't misinterpreted. 1061 */ 1062 START_TEST(test_ns_in_attribute_default_without_namespaces) 1063 { 1064 const char *text = 1065 "<!DOCTYPE e:element [\n" 1066 " <!ATTLIST e:element\n" 1067 " xmlns:e CDATA 'http://example.com/'>\n" 1068 " ]>\n" 1069 "<e:element/>"; 1070 1071 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1072 xml_failure(parser); 1073 } 1074 END_TEST 1075 1076 static const char *long_character_data_text = 1077 "<?xml version='1.0' encoding='iso-8859-1'?><s>" 1078 "012345678901234567890123456789012345678901234567890123456789" 1079 "012345678901234567890123456789012345678901234567890123456789" 1080 "012345678901234567890123456789012345678901234567890123456789" 1081 "012345678901234567890123456789012345678901234567890123456789" 1082 "012345678901234567890123456789012345678901234567890123456789" 1083 "012345678901234567890123456789012345678901234567890123456789" 1084 "012345678901234567890123456789012345678901234567890123456789" 1085 "012345678901234567890123456789012345678901234567890123456789" 1086 "012345678901234567890123456789012345678901234567890123456789" 1087 "012345678901234567890123456789012345678901234567890123456789" 1088 "012345678901234567890123456789012345678901234567890123456789" 1089 "012345678901234567890123456789012345678901234567890123456789" 1090 "012345678901234567890123456789012345678901234567890123456789" 1091 "012345678901234567890123456789012345678901234567890123456789" 1092 "012345678901234567890123456789012345678901234567890123456789" 1093 "012345678901234567890123456789012345678901234567890123456789" 1094 "012345678901234567890123456789012345678901234567890123456789" 1095 "012345678901234567890123456789012345678901234567890123456789" 1096 "012345678901234567890123456789012345678901234567890123456789" 1097 "012345678901234567890123456789012345678901234567890123456789" 1098 "</s>"; 1099 1100 static XML_Bool resumable = XML_FALSE; 1101 1102 static void 1103 clearing_aborting_character_handler(void *UNUSED_P(userData), 1104 const XML_Char *UNUSED_P(s), int UNUSED_P(len)) 1105 { 1106 XML_StopParser(parser, resumable); 1107 XML_SetCharacterDataHandler(parser, NULL); 1108 } 1109 1110 /* Regression test for SF bug #1515266: missing check of stopped 1111 parser in doContext() 'for' loop. */ 1112 START_TEST(test_stop_parser_between_char_data_calls) 1113 { 1114 /* The sample data must be big enough that there are two calls to 1115 the character data handler from within the inner "for" loop of 1116 the XML_TOK_DATA_CHARS case in doContent(), and the character 1117 handler must stop the parser and clear the character data 1118 handler. 1119 */ 1120 const char *text = long_character_data_text; 1121 1122 XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler); 1123 resumable = XML_FALSE; 1124 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) 1125 xml_failure(parser); 1126 if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED) 1127 xml_failure(parser); 1128 } 1129 END_TEST 1130 1131 /* Regression test for SF bug #1515266: missing check of stopped 1132 parser in doContext() 'for' loop. */ 1133 START_TEST(test_suspend_parser_between_char_data_calls) 1134 { 1135 /* The sample data must be big enough that there are two calls to 1136 the character data handler from within the inner "for" loop of 1137 the XML_TOK_DATA_CHARS case in doContent(), and the character 1138 handler must stop the parser and clear the character data 1139 handler. 1140 */ 1141 const char *text = long_character_data_text; 1142 1143 XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler); 1144 resumable = XML_TRUE; 1145 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) 1146 xml_failure(parser); 1147 if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 1148 xml_failure(parser); 1149 } 1150 END_TEST 1151 1152 START_TEST(test_good_cdata_ascii) 1153 { 1154 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1155 const char *expected = "<greeting>Hello, world!</greeting>"; 1156 1157 CharData storage; 1158 CharData_Init(&storage); 1159 XML_SetUserData(parser, &storage); 1160 XML_SetCharacterDataHandler(parser, accumulate_characters); 1161 1162 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1163 xml_failure(parser); 1164 CharData_CheckXMLChars(&storage, expected); 1165 } 1166 END_TEST 1167 1168 START_TEST(test_good_cdata_utf16) 1169 { 1170 /* Test data is: 1171 * <?xml version='1.0' encoding='utf-16'?> 1172 * <a><![CDATA[hello]]></a> 1173 */ 1174 const char text[] = 1175 "\0<\0?\0x\0m\0l\0" 1176 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1177 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" 1178 "\0?\0>\0\n" 1179 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1180 const char *expected = "hello"; 1181 1182 CharData storage; 1183 CharData_Init(&storage); 1184 XML_SetUserData(parser, &storage); 1185 XML_SetCharacterDataHandler(parser, accumulate_characters); 1186 1187 if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1188 xml_failure(parser); 1189 CharData_CheckXMLChars(&storage, expected); 1190 } 1191 END_TEST 1192 1193 START_TEST(test_bad_cdata) 1194 { 1195 struct CaseData { 1196 const char *text; 1197 enum XML_Error expectedError; 1198 }; 1199 1200 struct CaseData cases[] = { 1201 {"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1202 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1203 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1204 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1205 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1206 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1207 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1208 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1209 1210 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1211 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1212 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1213 1214 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1215 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1216 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1217 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1218 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1219 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1220 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1221 1222 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1223 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1224 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION} 1225 }; 1226 1227 size_t i = 0; 1228 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1229 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1230 parser, cases[i].text, strlen(cases[i].text), XML_TRUE); 1231 const enum XML_Error actualError = XML_GetErrorCode(parser); 1232 1233 assert(actualStatus == XML_STATUS_ERROR); 1234 1235 if (actualError != cases[i].expectedError) { 1236 char message[100]; 1237 sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n", 1238 cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text); 1239 fail(message); 1240 } 1241 1242 XML_ParserReset(parser, NULL); 1243 } 1244 } 1245 END_TEST 1246 1247 1248 /* 1249 * Namespaces tests. 1250 */ 1251 1252 static void 1253 namespace_setup(void) 1254 { 1255 parser = XML_ParserCreateNS(NULL, ' '); 1256 if (parser == NULL) 1257 fail("Parser not created."); 1258 } 1259 1260 static void 1261 namespace_teardown(void) 1262 { 1263 basic_teardown(); 1264 } 1265 1266 /* Check that an element name and attribute name match the expected values. 1267 The expected values are passed as an array reference of string pointers 1268 provided as the userData argument; the first is the expected 1269 element name, and the second is the expected attribute name. 1270 */ 1271 static void XMLCALL 1272 triplet_start_checker(void *userData, const XML_Char *name, 1273 const XML_Char **atts) 1274 { 1275 char **elemstr = (char **)userData; 1276 char buffer[1024]; 1277 if (strcmp(elemstr[0], name) != 0) { 1278 sprintf(buffer, "unexpected start string: '%s'", name); 1279 fail(buffer); 1280 } 1281 if (strcmp(elemstr[1], atts[0]) != 0) { 1282 sprintf(buffer, "unexpected attribute string: '%s'", atts[0]); 1283 fail(buffer); 1284 } 1285 } 1286 1287 /* Check that the element name passed to the end-element handler matches 1288 the expected value. The expected value is passed as the first element 1289 in an array of strings passed as the userData argument. 1290 */ 1291 static void XMLCALL 1292 triplet_end_checker(void *userData, const XML_Char *name) 1293 { 1294 char **elemstr = (char **)userData; 1295 if (strcmp(elemstr[0], name) != 0) { 1296 char buffer[1024]; 1297 sprintf(buffer, "unexpected end string: '%s'", name); 1298 fail(buffer); 1299 } 1300 } 1301 1302 START_TEST(test_return_ns_triplet) 1303 { 1304 const char *text = 1305 "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n" 1306 " xmlns:bar='http://expat.sf.net/'></foo:e>"; 1307 const char *elemstr[] = { 1308 "http://expat.sf.net/ e foo", 1309 "http://expat.sf.net/ a bar" 1310 }; 1311 XML_SetReturnNSTriplet(parser, XML_TRUE); 1312 XML_SetUserData(parser, elemstr); 1313 XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker); 1314 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1315 xml_failure(parser); 1316 } 1317 END_TEST 1318 1319 static void XMLCALL 1320 overwrite_start_checker(void *userData, const XML_Char *name, 1321 const XML_Char **atts) 1322 { 1323 CharData *storage = (CharData *) userData; 1324 CharData_AppendString(storage, "start "); 1325 CharData_AppendXMLChars(storage, name, -1); 1326 while (*atts != NULL) { 1327 CharData_AppendString(storage, "\nattribute "); 1328 CharData_AppendXMLChars(storage, *atts, -1); 1329 atts += 2; 1330 } 1331 CharData_AppendString(storage, "\n"); 1332 } 1333 1334 static void XMLCALL 1335 overwrite_end_checker(void *userData, const XML_Char *name) 1336 { 1337 CharData *storage = (CharData *) userData; 1338 CharData_AppendString(storage, "end "); 1339 CharData_AppendXMLChars(storage, name, -1); 1340 CharData_AppendString(storage, "\n"); 1341 } 1342 1343 static void 1344 run_ns_tagname_overwrite_test(const char *text, const char *result) 1345 { 1346 CharData storage; 1347 CharData_Init(&storage); 1348 XML_SetUserData(parser, &storage); 1349 XML_SetElementHandler(parser, 1350 overwrite_start_checker, overwrite_end_checker); 1351 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1352 xml_failure(parser); 1353 CharData_CheckString(&storage, result); 1354 } 1355 1356 /* Regression test for SF bug #566334. */ 1357 START_TEST(test_ns_tagname_overwrite) 1358 { 1359 const char *text = 1360 "<n:e xmlns:n='http://xml.libexpat.org/'>\n" 1361 " <n:f n:attr='foo'/>\n" 1362 " <n:g n:attr2='bar'/>\n" 1363 "</n:e>"; 1364 const char *result = 1365 "start http://xml.libexpat.org/ e\n" 1366 "start http://xml.libexpat.org/ f\n" 1367 "attribute http://xml.libexpat.org/ attr\n" 1368 "end http://xml.libexpat.org/ f\n" 1369 "start http://xml.libexpat.org/ g\n" 1370 "attribute http://xml.libexpat.org/ attr2\n" 1371 "end http://xml.libexpat.org/ g\n" 1372 "end http://xml.libexpat.org/ e\n"; 1373 run_ns_tagname_overwrite_test(text, result); 1374 } 1375 END_TEST 1376 1377 /* Regression test for SF bug #566334. */ 1378 START_TEST(test_ns_tagname_overwrite_triplet) 1379 { 1380 const char *text = 1381 "<n:e xmlns:n='http://xml.libexpat.org/'>\n" 1382 " <n:f n:attr='foo'/>\n" 1383 " <n:g n:attr2='bar'/>\n" 1384 "</n:e>"; 1385 const char *result = 1386 "start http://xml.libexpat.org/ e n\n" 1387 "start http://xml.libexpat.org/ f n\n" 1388 "attribute http://xml.libexpat.org/ attr n\n" 1389 "end http://xml.libexpat.org/ f n\n" 1390 "start http://xml.libexpat.org/ g n\n" 1391 "attribute http://xml.libexpat.org/ attr2 n\n" 1392 "end http://xml.libexpat.org/ g n\n" 1393 "end http://xml.libexpat.org/ e n\n"; 1394 XML_SetReturnNSTriplet(parser, XML_TRUE); 1395 run_ns_tagname_overwrite_test(text, result); 1396 } 1397 END_TEST 1398 1399 1400 /* Regression test for SF bug #620343. */ 1401 static void XMLCALL 1402 start_element_fail(void *UNUSED_P(userData), 1403 const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) 1404 { 1405 /* We should never get here. */ 1406 fail("should never reach start_element_fail()"); 1407 } 1408 1409 static void XMLCALL 1410 start_ns_clearing_start_element(void *userData, 1411 const XML_Char *UNUSED_P(prefix), 1412 const XML_Char *UNUSED_P(uri)) 1413 { 1414 XML_SetStartElementHandler((XML_Parser) userData, NULL); 1415 } 1416 1417 START_TEST(test_start_ns_clears_start_element) 1418 { 1419 /* This needs to use separate start/end tags; using the empty tag 1420 syntax doesn't cause the problematic path through Expat to be 1421 taken. 1422 */ 1423 const char *text = "<e xmlns='http://xml.libexpat.org/'></e>"; 1424 1425 XML_SetStartElementHandler(parser, start_element_fail); 1426 XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element); 1427 XML_UseParserAsHandlerArg(parser); 1428 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1429 xml_failure(parser); 1430 } 1431 END_TEST 1432 1433 /* Regression test for SF bug #616863. */ 1434 static int XMLCALL 1435 external_entity_handler(XML_Parser parser, 1436 const XML_Char *context, 1437 const XML_Char *UNUSED_P(base), 1438 const XML_Char *UNUSED_P(systemId), 1439 const XML_Char *UNUSED_P(publicId)) 1440 { 1441 intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser); 1442 const char *text; 1443 XML_Parser p2; 1444 1445 if (callno == 1) 1446 text = ("<!ELEMENT doc (e+)>\n" 1447 "<!ATTLIST doc xmlns CDATA #IMPLIED>\n" 1448 "<!ELEMENT e EMPTY>\n"); 1449 else 1450 text = ("<?xml version='1.0' encoding='us-ascii'?>" 1451 "<e/>"); 1452 1453 XML_SetUserData(parser, (void *) callno); 1454 p2 = XML_ExternalEntityParserCreate(parser, context, NULL); 1455 if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { 1456 xml_failure(p2); 1457 return 0; 1458 } 1459 XML_ParserFree(p2); 1460 return 1; 1461 } 1462 1463 START_TEST(test_default_ns_from_ext_subset_and_ext_ge) 1464 { 1465 const char *text = 1466 "<?xml version='1.0'?>\n" 1467 "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n" 1468 " <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n" 1469 "]>\n" 1470 "<doc xmlns='http://xml.libexpat.org/ns1'>\n" 1471 "&en;\n" 1472 "</doc>"; 1473 1474 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1475 XML_SetExternalEntityRefHandler(parser, external_entity_handler); 1476 /* We actually need to set this handler to tickle this bug. */ 1477 XML_SetStartElementHandler(parser, dummy_start_element); 1478 XML_SetUserData(parser, NULL); 1479 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1480 xml_failure(parser); 1481 } 1482 END_TEST 1483 1484 /* Regression test #1 for SF bug #673791. */ 1485 START_TEST(test_ns_prefix_with_empty_uri_1) 1486 { 1487 const char *text = 1488 "<doc xmlns:prefix='http://xml.libexpat.org/'>\n" 1489 " <e xmlns:prefix=''/>\n" 1490 "</doc>"; 1491 1492 expect_failure(text, 1493 XML_ERROR_UNDECLARING_PREFIX, 1494 "Did not report re-setting namespace" 1495 " URI with prefix to ''."); 1496 } 1497 END_TEST 1498 1499 /* Regression test #2 for SF bug #673791. */ 1500 START_TEST(test_ns_prefix_with_empty_uri_2) 1501 { 1502 const char *text = 1503 "<?xml version='1.0'?>\n" 1504 "<docelem xmlns:pre=''/>"; 1505 1506 expect_failure(text, 1507 XML_ERROR_UNDECLARING_PREFIX, 1508 "Did not report setting namespace URI with prefix to ''."); 1509 } 1510 END_TEST 1511 1512 /* Regression test #3 for SF bug #673791. */ 1513 START_TEST(test_ns_prefix_with_empty_uri_3) 1514 { 1515 const char *text = 1516 "<!DOCTYPE doc [\n" 1517 " <!ELEMENT doc EMPTY>\n" 1518 " <!ATTLIST doc\n" 1519 " xmlns:prefix CDATA ''>\n" 1520 "]>\n" 1521 "<doc/>"; 1522 1523 expect_failure(text, 1524 XML_ERROR_UNDECLARING_PREFIX, 1525 "Didn't report attr default setting NS w/ prefix to ''."); 1526 } 1527 END_TEST 1528 1529 /* Regression test #4 for SF bug #673791. */ 1530 START_TEST(test_ns_prefix_with_empty_uri_4) 1531 { 1532 const char *text = 1533 "<!DOCTYPE doc [\n" 1534 " <!ELEMENT prefix:doc EMPTY>\n" 1535 " <!ATTLIST prefix:doc\n" 1536 " xmlns:prefix CDATA 'http://xml.libexpat.org/'>\n" 1537 "]>\n" 1538 "<prefix:doc/>"; 1539 /* Packaged info expected by the end element handler; 1540 the weird structuring lets us re-use the triplet_end_checker() 1541 function also used for another test. */ 1542 const char *elemstr[] = { 1543 "http://xml.libexpat.org/ doc prefix" 1544 }; 1545 XML_SetReturnNSTriplet(parser, XML_TRUE); 1546 XML_SetUserData(parser, elemstr); 1547 XML_SetEndElementHandler(parser, triplet_end_checker); 1548 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1549 xml_failure(parser); 1550 } 1551 END_TEST 1552 1553 START_TEST(test_ns_default_with_empty_uri) 1554 { 1555 const char *text = 1556 "<doc xmlns='http://xml.libexpat.org/'>\n" 1557 " <e xmlns=''/>\n" 1558 "</doc>"; 1559 if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) 1560 xml_failure(parser); 1561 } 1562 END_TEST 1563 1564 /* Regression test for SF bug #692964: two prefixes for one namespace. */ 1565 START_TEST(test_ns_duplicate_attrs_diff_prefixes) 1566 { 1567 const char *text = 1568 "<doc xmlns:a='http://xml.libexpat.org/a'\n" 1569 " xmlns:b='http://xml.libexpat.org/a'\n" 1570 " a:a='v' b:a='v' />"; 1571 expect_failure(text, 1572 XML_ERROR_DUPLICATE_ATTRIBUTE, 1573 "did not report multiple attributes with same URI+name"); 1574 } 1575 END_TEST 1576 1577 /* Regression test for SF bug #695401: unbound prefix. */ 1578 START_TEST(test_ns_unbound_prefix_on_attribute) 1579 { 1580 const char *text = "<doc a:attr=''/>"; 1581 expect_failure(text, 1582 XML_ERROR_UNBOUND_PREFIX, 1583 "did not report unbound prefix on attribute"); 1584 } 1585 END_TEST 1586 1587 /* Regression test for SF bug #695401: unbound prefix. */ 1588 START_TEST(test_ns_unbound_prefix_on_element) 1589 { 1590 const char *text = "<a:doc/>"; 1591 expect_failure(text, 1592 XML_ERROR_UNBOUND_PREFIX, 1593 "did not report unbound prefix on element"); 1594 } 1595 END_TEST 1596 1597 static Suite * 1598 make_suite(void) 1599 { 1600 Suite *s = suite_create("basic"); 1601 TCase *tc_basic = tcase_create("basic tests"); 1602 TCase *tc_namespace = tcase_create("XML namespaces"); 1603 1604 suite_add_tcase(s, tc_basic); 1605 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 1606 tcase_add_test(tc_basic, test_nul_byte); 1607 tcase_add_test(tc_basic, test_u0000_char); 1608 tcase_add_test(tc_basic, test_bom_utf8); 1609 tcase_add_test(tc_basic, test_bom_utf16_be); 1610 tcase_add_test(tc_basic, test_bom_utf16_le); 1611 tcase_add_test(tc_basic, test_illegal_utf8); 1612 tcase_add_test(tc_basic, test_utf8_auto_align); 1613 tcase_add_test(tc_basic, test_utf16); 1614 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 1615 tcase_add_test(tc_basic, test_latin1_umlauts); 1616 /* Regression test for SF bug #491986. */ 1617 tcase_add_test(tc_basic, test_danish_latin1); 1618 /* Regression test for SF bug #514281. */ 1619 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 1620 tcase_add_test(tc_basic, test_french_charref_decimal); 1621 tcase_add_test(tc_basic, test_french_latin1); 1622 tcase_add_test(tc_basic, test_french_utf8); 1623 tcase_add_test(tc_basic, test_utf8_false_rejection); 1624 tcase_add_test(tc_basic, test_line_number_after_parse); 1625 tcase_add_test(tc_basic, test_column_number_after_parse); 1626 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 1627 tcase_add_test(tc_basic, test_line_number_after_error); 1628 tcase_add_test(tc_basic, test_column_number_after_error); 1629 tcase_add_test(tc_basic, test_really_long_lines); 1630 tcase_add_test(tc_basic, test_end_element_events); 1631 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 1632 tcase_add_test(tc_basic, test_xmldecl_misplaced); 1633 tcase_add_test(tc_basic, test_unknown_encoding_internal_entity); 1634 tcase_add_test(tc_basic, 1635 test_wfc_undeclared_entity_unread_external_subset); 1636 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 1637 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 1638 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 1639 tcase_add_test(tc_basic, 1640 test_wfc_undeclared_entity_with_external_subset_standalone); 1641 tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); 1642 tcase_add_test(tc_basic, test_ext_entity_set_encoding); 1643 tcase_add_test(tc_basic, test_dtd_default_handling); 1644 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 1645 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 1646 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 1647 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 1648 tcase_add_test(tc_basic, test_good_cdata_ascii); 1649 tcase_add_test(tc_basic, test_good_cdata_utf16); 1650 tcase_add_test(tc_basic, test_bad_cdata); 1651 1652 suite_add_tcase(s, tc_namespace); 1653 tcase_add_checked_fixture(tc_namespace, 1654 namespace_setup, namespace_teardown); 1655 tcase_add_test(tc_namespace, test_return_ns_triplet); 1656 tcase_add_test(tc_namespace, test_ns_tagname_overwrite); 1657 tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet); 1658 tcase_add_test(tc_namespace, test_start_ns_clears_start_element); 1659 tcase_add_test(tc_namespace, test_default_ns_from_ext_subset_and_ext_ge); 1660 tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1); 1661 tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2); 1662 tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3); 1663 tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4); 1664 tcase_add_test(tc_namespace, test_ns_default_with_empty_uri); 1665 tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes); 1666 tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute); 1667 tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element); 1668 1669 return s; 1670 } 1671 1672 1673 int 1674 main(int argc, char *argv[]) 1675 { 1676 int i, nf; 1677 int verbosity = CK_NORMAL; 1678 Suite *s = make_suite(); 1679 SRunner *sr = srunner_create(s); 1680 1681 /* run the tests for internal helper functions */ 1682 testhelper_is_whitespace_normalized(); 1683 1684 for (i = 1; i < argc; ++i) { 1685 char *opt = argv[i]; 1686 if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0) 1687 verbosity = CK_VERBOSE; 1688 else if (strcmp(opt, "-q") == 0 || strcmp(opt, "--quiet") == 0) 1689 verbosity = CK_SILENT; 1690 else { 1691 fprintf(stderr, "runtests: unknown option '%s'\n", opt); 1692 return 2; 1693 } 1694 } 1695 if (verbosity != CK_SILENT) 1696 printf("Expat version: %s\n", XML_ExpatVersion()); 1697 srunner_run_all(sr, verbosity); 1698 nf = srunner_ntests_failed(sr); 1699 srunner_free(sr); 1700 1701 return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE; 1702 } 1703