1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File reapits.c 9 * 10 *********************************************************************************/ 11 /*C API TEST FOR Regular Expressions */ 12 /** 13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14 * try to test the full functionality. It just calls each function and verifies that it 15 * works on a basic level. 16 * 17 * More complete testing of regular expression functionality is done with the C++ tests. 18 **/ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24 #include <stdlib.h> 25 #include <string.h> 26 #include "unicode/uloc.h" 27 #include "unicode/uregex.h" 28 #include "unicode/ustring.h" 29 #include "unicode/utext.h" 30 #include "cintltst.h" 31 32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 34 35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} 37 38 /* 39 * TEST_SETUP and TEST_TEARDOWN 40 * macros to handle the boilerplate around setting up regex test cases. 41 * parameteres to setup: 42 * pattern: The regex pattern, a (char *) null terminated C string. 43 * testString: The string data, also a (char *) C string. 44 * flags: Regex flags to set when compiling the pattern 45 * 46 * Put arbitrary test code between SETUP and TEARDOWN. 47 * 're" is the compiled, ready-to-go regular expression. 48 */ 49 #define TEST_SETUP(pattern, testString, flags) { \ 50 UChar *srcString = NULL; \ 51 status = U_ZERO_ERROR; \ 52 re = uregex_openC(pattern, flags, NULL, &status); \ 53 TEST_ASSERT_SUCCESS(status); \ 54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 55 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 56 uregex_setText(re, srcString, -1, &status); \ 57 TEST_ASSERT_SUCCESS(status); \ 58 if (U_SUCCESS(status)) { 59 60 #define TEST_TEARDOWN \ 61 } \ 62 TEST_ASSERT_SUCCESS(status); \ 63 uregex_close(re); \ 64 free(srcString); \ 65 } 66 67 68 /** 69 * @param expected utf-8 array of bytes to be expected 70 */ 71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 72 char buf_inside_macro[120]; 73 int32_t len = (int32_t)strlen(expected); 74 UBool success; 75 if (nulTerm) { 76 u_austrncpy(buf_inside_macro, (actual), len+1); 77 buf_inside_macro[len+2] = 0; 78 success = (strcmp((expected), buf_inside_macro) == 0); 79 } else { 80 u_austrncpy(buf_inside_macro, (actual), len); 81 buf_inside_macro[len+1] = 0; 82 success = (strncmp((expected), buf_inside_macro, len) == 0); 83 } 84 if (success == FALSE) { 85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 86 file, line, (expected), buf_inside_macro); 87 } 88 } 89 90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 91 92 93 static UBool equals_utf8_utext(const char *utf8, UText *utext) { 94 int32_t u8i = 0; 95 UChar32 u8c = 0; 96 UChar32 utc = 0; 97 UBool stringsEqual = TRUE; 98 utext_setNativeIndex(utext, 0); 99 for (;;) { 100 U8_NEXT_UNSAFE(utf8, u8i, u8c); 101 utc = utext_next32(utext); 102 if (u8c == 0 && utc == U_SENTINEL) { 103 break; 104 } 105 if (u8c != utc || u8c == 0) { 106 stringsEqual = FALSE; 107 break; 108 } 109 } 110 return stringsEqual; 111 } 112 113 114 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 115 utext_setNativeIndex(actual, 0); 116 if (!equals_utf8_utext(expected, actual)) { 117 UChar32 c; 118 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 119 c = utext_next32From(actual, 0); 120 while (c != U_SENTINEL) { 121 if (0x20<c && c <0x7e) { 122 log_err("%c", c); 123 } else { 124 log_err("%#x", c); 125 } 126 c = UTEXT_NEXT32(actual); 127 } 128 log_err("\"\n"); 129 } 130 } 131 132 /* 133 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) 134 * Note: Expected is a UTF-8 encoded string, _not_ the system code page. 135 */ 136 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 137 138 static UBool testUTextEqual(UText *uta, UText *utb) { 139 UChar32 ca = 0; 140 UChar32 cb = 0; 141 utext_setNativeIndex(uta, 0); 142 utext_setNativeIndex(utb, 0); 143 do { 144 ca = utext_next32(uta); 145 cb = utext_next32(utb); 146 if (ca != cb) { 147 break; 148 } 149 } while (ca != U_SENTINEL); 150 return ca == cb; 151 } 152 153 154 155 156 static void TestRegexCAPI(void); 157 static void TestBug4315(void); 158 static void TestUTextAPI(void); 159 static void TestRefreshInput(void); 160 static void TestBug8421(void); 161 162 void addURegexTest(TestNode** root); 163 164 void addURegexTest(TestNode** root) 165 { 166 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 167 addTest(root, &TestBug4315, "regex/TestBug4315"); 168 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 169 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 170 addTest(root, &TestBug8421, "regex/TestBug8421"); 171 } 172 173 /* 174 * Call back function and context struct used for testing 175 * regular expression user callbacks. This test is mostly the same as 176 * the corresponding C++ test in intltest. 177 */ 178 typedef struct callBackContext { 179 int32_t maxCalls; 180 int32_t numCalls; 181 int32_t lastSteps; 182 } callBackContext; 183 184 static UBool U_EXPORT2 U_CALLCONV 185 TestCallbackFn(const void *context, int32_t steps) { 186 callBackContext *info = (callBackContext *)context; 187 if (info->lastSteps+1 != steps) { 188 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 189 } 190 info->lastSteps = steps; 191 info->numCalls++; 192 return (info->numCalls < info->maxCalls); 193 } 194 195 /* 196 * Regular Expression C API Tests 197 */ 198 static void TestRegexCAPI(void) { 199 UErrorCode status = U_ZERO_ERROR; 200 URegularExpression *re; 201 UChar pat[200]; 202 UChar *minus1; 203 204 memset(&minus1, -1, sizeof(minus1)); 205 206 /* Mimimalist open/close */ 207 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 208 re = uregex_open(pat, -1, 0, 0, &status); 209 if (U_FAILURE(status)) { 210 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 211 return; 212 } 213 uregex_close(re); 214 215 /* Open with all flag values set */ 216 status = U_ZERO_ERROR; 217 re = uregex_open(pat, -1, 218 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL, 219 0, &status); 220 TEST_ASSERT_SUCCESS(status); 221 uregex_close(re); 222 223 /* Open with an invalid flag */ 224 status = U_ZERO_ERROR; 225 re = uregex_open(pat, -1, 0x40000000, 0, &status); 226 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 227 uregex_close(re); 228 229 /* Open with an unimplemented flag */ 230 status = U_ZERO_ERROR; 231 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); 232 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 233 uregex_close(re); 234 235 /* openC with an invalid parameter */ 236 status = U_ZERO_ERROR; 237 re = uregex_openC(NULL, 238 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 239 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 240 241 /* openC with an invalid parameter */ 242 status = U_USELESS_COLLATOR_ERROR; 243 re = uregex_openC(NULL, 244 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 245 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 246 247 /* openC open from a C string */ 248 { 249 const UChar *p; 250 int32_t len; 251 status = U_ZERO_ERROR; 252 re = uregex_openC("abc*", 0, 0, &status); 253 TEST_ASSERT_SUCCESS(status); 254 p = uregex_pattern(re, &len, &status); 255 TEST_ASSERT_SUCCESS(status); 256 257 /* The TEST_ASSERT_SUCCESS above should change too... */ 258 if(U_SUCCESS(status)) { 259 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 260 TEST_ASSERT(u_strcmp(pat, p) == 0); 261 TEST_ASSERT(len==(int32_t)strlen("abc*")); 262 } 263 264 uregex_close(re); 265 266 /* TODO: Open with ParseError parameter */ 267 } 268 269 /* 270 * clone 271 */ 272 { 273 URegularExpression *clone1; 274 URegularExpression *clone2; 275 URegularExpression *clone3; 276 UChar testString1[30]; 277 UChar testString2[30]; 278 UBool result; 279 280 281 status = U_ZERO_ERROR; 282 re = uregex_openC("abc*", 0, 0, &status); 283 TEST_ASSERT_SUCCESS(status); 284 clone1 = uregex_clone(re, &status); 285 TEST_ASSERT_SUCCESS(status); 286 TEST_ASSERT(clone1 != NULL); 287 288 status = U_ZERO_ERROR; 289 clone2 = uregex_clone(re, &status); 290 TEST_ASSERT_SUCCESS(status); 291 TEST_ASSERT(clone2 != NULL); 292 uregex_close(re); 293 294 status = U_ZERO_ERROR; 295 clone3 = uregex_clone(clone2, &status); 296 TEST_ASSERT_SUCCESS(status); 297 TEST_ASSERT(clone3 != NULL); 298 299 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 300 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 301 302 status = U_ZERO_ERROR; 303 uregex_setText(clone1, testString1, -1, &status); 304 TEST_ASSERT_SUCCESS(status); 305 result = uregex_lookingAt(clone1, 0, &status); 306 TEST_ASSERT_SUCCESS(status); 307 TEST_ASSERT(result==TRUE); 308 309 status = U_ZERO_ERROR; 310 uregex_setText(clone2, testString2, -1, &status); 311 TEST_ASSERT_SUCCESS(status); 312 result = uregex_lookingAt(clone2, 0, &status); 313 TEST_ASSERT_SUCCESS(status); 314 TEST_ASSERT(result==FALSE); 315 result = uregex_find(clone2, 0, &status); 316 TEST_ASSERT_SUCCESS(status); 317 TEST_ASSERT(result==TRUE); 318 319 uregex_close(clone1); 320 uregex_close(clone2); 321 uregex_close(clone3); 322 323 } 324 325 /* 326 * pattern() 327 */ 328 { 329 const UChar *resultPat; 330 int32_t resultLen; 331 u_uastrncpy(pat, "hello", sizeof(pat)/2); 332 status = U_ZERO_ERROR; 333 re = uregex_open(pat, -1, 0, NULL, &status); 334 resultPat = uregex_pattern(re, &resultLen, &status); 335 TEST_ASSERT_SUCCESS(status); 336 337 /* The TEST_ASSERT_SUCCESS above should change too... */ 338 if (U_SUCCESS(status)) { 339 TEST_ASSERT(resultLen == -1); 340 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 341 } 342 343 uregex_close(re); 344 345 status = U_ZERO_ERROR; 346 re = uregex_open(pat, 3, 0, NULL, &status); 347 resultPat = uregex_pattern(re, &resultLen, &status); 348 TEST_ASSERT_SUCCESS(status); 349 TEST_ASSERT_SUCCESS(status); 350 351 /* The TEST_ASSERT_SUCCESS above should change too... */ 352 if (U_SUCCESS(status)) { 353 TEST_ASSERT(resultLen == 3); 354 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 355 TEST_ASSERT(u_strlen(resultPat) == 3); 356 } 357 358 uregex_close(re); 359 } 360 361 /* 362 * flags() 363 */ 364 { 365 int32_t t; 366 367 status = U_ZERO_ERROR; 368 re = uregex_open(pat, -1, 0, NULL, &status); 369 t = uregex_flags(re, &status); 370 TEST_ASSERT_SUCCESS(status); 371 TEST_ASSERT(t == 0); 372 uregex_close(re); 373 374 status = U_ZERO_ERROR; 375 re = uregex_open(pat, -1, 0, NULL, &status); 376 t = uregex_flags(re, &status); 377 TEST_ASSERT_SUCCESS(status); 378 TEST_ASSERT(t == 0); 379 uregex_close(re); 380 381 status = U_ZERO_ERROR; 382 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 383 t = uregex_flags(re, &status); 384 TEST_ASSERT_SUCCESS(status); 385 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 386 uregex_close(re); 387 } 388 389 /* 390 * setText() and lookingAt() 391 */ 392 { 393 UChar text1[50]; 394 UChar text2[50]; 395 UBool result; 396 397 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 398 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 399 status = U_ZERO_ERROR; 400 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 401 re = uregex_open(pat, -1, 0, NULL, &status); 402 TEST_ASSERT_SUCCESS(status); 403 404 /* Operation before doing a setText should fail... */ 405 status = U_ZERO_ERROR; 406 uregex_lookingAt(re, 0, &status); 407 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 408 409 status = U_ZERO_ERROR; 410 uregex_setText(re, text1, -1, &status); 411 result = uregex_lookingAt(re, 0, &status); 412 TEST_ASSERT(result == TRUE); 413 TEST_ASSERT_SUCCESS(status); 414 415 status = U_ZERO_ERROR; 416 uregex_setText(re, text2, -1, &status); 417 result = uregex_lookingAt(re, 0, &status); 418 TEST_ASSERT(result == FALSE); 419 TEST_ASSERT_SUCCESS(status); 420 421 status = U_ZERO_ERROR; 422 uregex_setText(re, text1, -1, &status); 423 result = uregex_lookingAt(re, 0, &status); 424 TEST_ASSERT(result == TRUE); 425 TEST_ASSERT_SUCCESS(status); 426 427 status = U_ZERO_ERROR; 428 uregex_setText(re, text1, 5, &status); 429 result = uregex_lookingAt(re, 0, &status); 430 TEST_ASSERT(result == FALSE); 431 TEST_ASSERT_SUCCESS(status); 432 433 status = U_ZERO_ERROR; 434 uregex_setText(re, text1, 6, &status); 435 result = uregex_lookingAt(re, 0, &status); 436 TEST_ASSERT(result == TRUE); 437 TEST_ASSERT_SUCCESS(status); 438 439 uregex_close(re); 440 } 441 442 443 /* 444 * getText() 445 */ 446 { 447 UChar text1[50]; 448 UChar text2[50]; 449 const UChar *result; 450 int32_t textLength; 451 452 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 453 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 454 status = U_ZERO_ERROR; 455 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 456 re = uregex_open(pat, -1, 0, NULL, &status); 457 458 uregex_setText(re, text1, -1, &status); 459 result = uregex_getText(re, &textLength, &status); 460 TEST_ASSERT(result == text1); 461 TEST_ASSERT(textLength == -1); 462 TEST_ASSERT_SUCCESS(status); 463 464 status = U_ZERO_ERROR; 465 uregex_setText(re, text2, 7, &status); 466 result = uregex_getText(re, &textLength, &status); 467 TEST_ASSERT(result == text2); 468 TEST_ASSERT(textLength == 7); 469 TEST_ASSERT_SUCCESS(status); 470 471 status = U_ZERO_ERROR; 472 uregex_setText(re, text2, 4, &status); 473 result = uregex_getText(re, &textLength, &status); 474 TEST_ASSERT(result == text2); 475 TEST_ASSERT(textLength == 4); 476 TEST_ASSERT_SUCCESS(status); 477 uregex_close(re); 478 } 479 480 /* 481 * matches() 482 */ 483 { 484 UChar text1[50]; 485 UBool result; 486 int len; 487 UChar nullString[] = {0,0,0}; 488 489 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); 490 status = U_ZERO_ERROR; 491 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 492 re = uregex_open(pat, -1, 0, NULL, &status); 493 494 uregex_setText(re, text1, -1, &status); 495 result = uregex_matches(re, 0, &status); 496 TEST_ASSERT(result == FALSE); 497 TEST_ASSERT_SUCCESS(status); 498 499 status = U_ZERO_ERROR; 500 uregex_setText(re, text1, 6, &status); 501 result = uregex_matches(re, 0, &status); 502 TEST_ASSERT(result == TRUE); 503 TEST_ASSERT_SUCCESS(status); 504 505 status = U_ZERO_ERROR; 506 uregex_setText(re, text1, 6, &status); 507 result = uregex_matches(re, 1, &status); 508 TEST_ASSERT(result == FALSE); 509 TEST_ASSERT_SUCCESS(status); 510 uregex_close(re); 511 512 status = U_ZERO_ERROR; 513 re = uregex_openC(".?", 0, NULL, &status); 514 uregex_setText(re, text1, -1, &status); 515 len = u_strlen(text1); 516 result = uregex_matches(re, len, &status); 517 TEST_ASSERT(result == TRUE); 518 TEST_ASSERT_SUCCESS(status); 519 520 status = U_ZERO_ERROR; 521 uregex_setText(re, nullString, -1, &status); 522 TEST_ASSERT_SUCCESS(status); 523 result = uregex_matches(re, 0, &status); 524 TEST_ASSERT(result == TRUE); 525 TEST_ASSERT_SUCCESS(status); 526 uregex_close(re); 527 } 528 529 530 /* 531 * lookingAt() Used in setText test. 532 */ 533 534 535 /* 536 * find(), findNext, start, end, reset 537 */ 538 { 539 UChar text1[50]; 540 UBool result; 541 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 542 status = U_ZERO_ERROR; 543 re = uregex_openC("rx", 0, NULL, &status); 544 545 uregex_setText(re, text1, -1, &status); 546 result = uregex_find(re, 0, &status); 547 TEST_ASSERT(result == TRUE); 548 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 549 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 550 TEST_ASSERT_SUCCESS(status); 551 552 result = uregex_find(re, 9, &status); 553 TEST_ASSERT(result == TRUE); 554 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 555 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 556 TEST_ASSERT_SUCCESS(status); 557 558 result = uregex_find(re, 14, &status); 559 TEST_ASSERT(result == FALSE); 560 TEST_ASSERT_SUCCESS(status); 561 562 status = U_ZERO_ERROR; 563 uregex_reset(re, 0, &status); 564 565 result = uregex_findNext(re, &status); 566 TEST_ASSERT(result == TRUE); 567 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 568 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 569 TEST_ASSERT_SUCCESS(status); 570 571 result = uregex_findNext(re, &status); 572 TEST_ASSERT(result == TRUE); 573 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 574 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 575 TEST_ASSERT_SUCCESS(status); 576 577 status = U_ZERO_ERROR; 578 uregex_reset(re, 12, &status); 579 580 result = uregex_findNext(re, &status); 581 TEST_ASSERT(result == TRUE); 582 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 583 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 584 TEST_ASSERT_SUCCESS(status); 585 586 result = uregex_findNext(re, &status); 587 TEST_ASSERT(result == FALSE); 588 TEST_ASSERT_SUCCESS(status); 589 590 uregex_close(re); 591 } 592 593 /* 594 * groupCount 595 */ 596 { 597 int32_t result; 598 599 status = U_ZERO_ERROR; 600 re = uregex_openC("abc", 0, NULL, &status); 601 result = uregex_groupCount(re, &status); 602 TEST_ASSERT_SUCCESS(status); 603 TEST_ASSERT(result == 0); 604 uregex_close(re); 605 606 status = U_ZERO_ERROR; 607 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 608 result = uregex_groupCount(re, &status); 609 TEST_ASSERT_SUCCESS(status); 610 TEST_ASSERT(result == 3); 611 uregex_close(re); 612 613 } 614 615 616 /* 617 * group() 618 */ 619 { 620 UChar text1[80]; 621 UChar buf[80]; 622 UBool result; 623 int32_t resultSz; 624 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 625 626 status = U_ZERO_ERROR; 627 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 628 TEST_ASSERT_SUCCESS(status); 629 630 631 uregex_setText(re, text1, -1, &status); 632 result = uregex_find(re, 0, &status); 633 TEST_ASSERT(result==TRUE); 634 635 /* Capture Group 0, the full match. Should succeed. */ 636 status = U_ZERO_ERROR; 637 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); 638 TEST_ASSERT_SUCCESS(status); 639 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 640 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 641 642 /* Capture group #1. Should succeed. */ 643 status = U_ZERO_ERROR; 644 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); 645 TEST_ASSERT_SUCCESS(status); 646 TEST_ASSERT_STRING(" interior ", buf, TRUE); 647 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 648 649 /* Capture group out of range. Error. */ 650 status = U_ZERO_ERROR; 651 uregex_group(re, 2, buf, sizeof(buf)/2, &status); 652 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 653 654 /* NULL buffer, pure pre-flight */ 655 status = U_ZERO_ERROR; 656 resultSz = uregex_group(re, 0, NULL, 0, &status); 657 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 658 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 659 660 /* Too small buffer, truncated string */ 661 status = U_ZERO_ERROR; 662 memset(buf, -1, sizeof(buf)); 663 resultSz = uregex_group(re, 0, buf, 5, &status); 664 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 665 TEST_ASSERT_STRING("abc i", buf, FALSE); 666 TEST_ASSERT(buf[5] == (UChar)0xffff); 667 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 668 669 /* Output string just fits buffer, no NUL term. */ 670 status = U_ZERO_ERROR; 671 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 672 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 673 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 674 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 675 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 676 677 uregex_close(re); 678 679 } 680 681 /* 682 * Regions 683 */ 684 685 686 /* SetRegion(), getRegion() do something */ 687 TEST_SETUP(".*", "0123456789ABCDEF", 0) 688 UChar resultString[40]; 689 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 690 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 691 uregex_setRegion(re, 3, 6, &status); 692 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 693 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 694 TEST_ASSERT(uregex_findNext(re, &status)); 695 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) 696 TEST_ASSERT_STRING("345", resultString, TRUE); 697 TEST_TEARDOWN; 698 699 /* find(start=-1) uses regions */ 700 TEST_SETUP(".*", "0123456789ABCDEF", 0); 701 uregex_setRegion(re, 4, 6, &status); 702 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 703 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 704 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 705 TEST_TEARDOWN; 706 707 /* find (start >=0) does not use regions */ 708 TEST_SETUP(".*", "0123456789ABCDEF", 0); 709 uregex_setRegion(re, 4, 6, &status); 710 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 711 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 712 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 713 TEST_TEARDOWN; 714 715 /* findNext() obeys regions */ 716 TEST_SETUP(".", "0123456789ABCDEF", 0); 717 uregex_setRegion(re, 4, 6, &status); 718 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 719 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 720 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 721 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 722 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 723 TEST_TEARDOWN; 724 725 /* matches(start=-1) uses regions */ 726 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 727 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 728 uregex_setRegion(re, 4, 6, &status); 729 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 730 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 731 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 732 TEST_TEARDOWN; 733 734 /* matches (start >=0) does not use regions */ 735 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 736 uregex_setRegion(re, 4, 6, &status); 737 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 738 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 739 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 740 TEST_TEARDOWN; 741 742 /* lookingAt(start=-1) uses regions */ 743 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 744 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 745 uregex_setRegion(re, 4, 6, &status); 746 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 747 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 748 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 749 TEST_TEARDOWN; 750 751 /* lookingAt (start >=0) does not use regions */ 752 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 753 uregex_setRegion(re, 4, 6, &status); 754 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 755 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 756 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 757 TEST_TEARDOWN; 758 759 /* hitEnd() */ 760 TEST_SETUP("[a-f]*", "abcdefghij", 0); 761 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 762 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 763 TEST_TEARDOWN; 764 765 TEST_SETUP("[a-f]*", "abcdef", 0); 766 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 767 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 768 TEST_TEARDOWN; 769 770 /* requireEnd */ 771 TEST_SETUP("abcd", "abcd", 0); 772 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 773 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 774 TEST_TEARDOWN; 775 776 TEST_SETUP("abcd$", "abcd", 0); 777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 778 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 779 TEST_TEARDOWN; 780 781 /* anchoringBounds */ 782 TEST_SETUP("abc$", "abcdef", 0); 783 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 784 uregex_useAnchoringBounds(re, FALSE, &status); 785 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 786 787 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 788 uregex_useAnchoringBounds(re, TRUE, &status); 789 uregex_setRegion(re, 0, 3, &status); 790 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 791 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 792 TEST_TEARDOWN; 793 794 /* Transparent Bounds */ 795 TEST_SETUP("abc(?=def)", "abcdef", 0); 796 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 797 uregex_useTransparentBounds(re, TRUE, &status); 798 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 799 800 uregex_useTransparentBounds(re, FALSE, &status); 801 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 802 uregex_setRegion(re, 0, 3, &status); 803 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 804 uregex_useTransparentBounds(re, TRUE, &status); 805 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 806 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 807 TEST_TEARDOWN; 808 809 810 /* 811 * replaceFirst() 812 */ 813 { 814 UChar text1[80]; 815 UChar text2[80]; 816 UChar replText[80]; 817 UChar buf[80]; 818 int32_t resultSz; 819 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 820 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 821 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 822 823 status = U_ZERO_ERROR; 824 re = uregex_openC("x(.*?)x", 0, NULL, &status); 825 TEST_ASSERT_SUCCESS(status); 826 827 /* Normal case, with match */ 828 uregex_setText(re, text1, -1, &status); 829 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 830 TEST_ASSERT_SUCCESS(status); 831 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 832 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 833 834 /* No match. Text should copy to output with no changes. */ 835 status = U_ZERO_ERROR; 836 uregex_setText(re, text2, -1, &status); 837 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 838 TEST_ASSERT_SUCCESS(status); 839 TEST_ASSERT_STRING("No match here.", buf, TRUE); 840 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 841 842 /* Match, output just fills buffer, no termination warning. */ 843 status = U_ZERO_ERROR; 844 uregex_setText(re, text1, -1, &status); 845 memset(buf, -1, sizeof(buf)); 846 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 847 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 848 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 849 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 850 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 851 852 /* Do the replaceFirst again, without first resetting anything. 853 * Should give the same results. 854 */ 855 status = U_ZERO_ERROR; 856 memset(buf, -1, sizeof(buf)); 857 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 858 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 859 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 860 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 861 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 862 863 /* NULL buffer, zero buffer length */ 864 status = U_ZERO_ERROR; 865 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 866 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 867 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 868 869 /* Buffer too small by one */ 870 status = U_ZERO_ERROR; 871 memset(buf, -1, sizeof(buf)); 872 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 873 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 874 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 875 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 876 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 877 878 uregex_close(re); 879 } 880 881 882 /* 883 * replaceAll() 884 */ 885 { 886 UChar text1[80]; /* "Replace xaax x1x x...x." */ 887 UChar text2[80]; /* "No match Here" */ 888 UChar replText[80]; /* "<$1>" */ 889 UChar replText2[80]; /* "<<$1>>" */ 890 const char * pattern = "x(.*?)x"; 891 const char * expectedResult = "Replace <aa> <1> <...>."; 892 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 893 UChar buf[80]; 894 int32_t resultSize; 895 int32_t expectedResultSize; 896 int32_t expectedResultSize2; 897 int32_t i; 898 899 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 900 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 901 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 902 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); 903 expectedResultSize = strlen(expectedResult); 904 expectedResultSize2 = strlen(expectedResult2); 905 906 status = U_ZERO_ERROR; 907 re = uregex_openC(pattern, 0, NULL, &status); 908 TEST_ASSERT_SUCCESS(status); 909 910 /* Normal case, with match */ 911 uregex_setText(re, text1, -1, &status); 912 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 913 TEST_ASSERT_SUCCESS(status); 914 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 915 TEST_ASSERT(resultSize == expectedResultSize); 916 917 /* No match. Text should copy to output with no changes. */ 918 status = U_ZERO_ERROR; 919 uregex_setText(re, text2, -1, &status); 920 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 921 TEST_ASSERT_SUCCESS(status); 922 TEST_ASSERT_STRING("No match here.", buf, TRUE); 923 TEST_ASSERT(resultSize == u_strlen(text2)); 924 925 /* Match, output just fills buffer, no termination warning. */ 926 status = U_ZERO_ERROR; 927 uregex_setText(re, text1, -1, &status); 928 memset(buf, -1, sizeof(buf)); 929 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 930 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 931 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 932 TEST_ASSERT(resultSize == expectedResultSize); 933 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 934 935 /* Do the replaceFirst again, without first resetting anything. 936 * Should give the same results. 937 */ 938 status = U_ZERO_ERROR; 939 memset(buf, -1, sizeof(buf)); 940 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 941 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 942 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 943 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 944 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 945 946 /* NULL buffer, zero buffer length */ 947 status = U_ZERO_ERROR; 948 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 949 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 950 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 951 952 /* Buffer too small. Try every size, which will tickle edge cases 953 * in uregex_appendReplacement (used by replaceAll) */ 954 for (i=0; i<expectedResultSize; i++) { 955 char expected[80]; 956 status = U_ZERO_ERROR; 957 memset(buf, -1, sizeof(buf)); 958 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 959 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 960 strcpy(expected, expectedResult); 961 expected[i] = 0; 962 TEST_ASSERT_STRING(expected, buf, FALSE); 963 TEST_ASSERT(resultSize == expectedResultSize); 964 TEST_ASSERT(buf[i] == (UChar)0xffff); 965 } 966 967 /* Buffer too small. Same as previous test, except this time the replacement 968 * text is longer than the match capture group, making the length of the complete 969 * replacement longer than the original string. 970 */ 971 for (i=0; i<expectedResultSize2; i++) { 972 char expected[80]; 973 status = U_ZERO_ERROR; 974 memset(buf, -1, sizeof(buf)); 975 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 976 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 977 strcpy(expected, expectedResult2); 978 expected[i] = 0; 979 TEST_ASSERT_STRING(expected, buf, FALSE); 980 TEST_ASSERT(resultSize == expectedResultSize2); 981 TEST_ASSERT(buf[i] == (UChar)0xffff); 982 } 983 984 985 uregex_close(re); 986 } 987 988 989 /* 990 * appendReplacement() 991 */ 992 { 993 UChar text[100]; 994 UChar repl[100]; 995 UChar buf[100]; 996 UChar *bufPtr; 997 int32_t bufCap; 998 999 1000 status = U_ZERO_ERROR; 1001 re = uregex_openC(".*", 0, 0, &status); 1002 TEST_ASSERT_SUCCESS(status); 1003 1004 u_uastrncpy(text, "whatever", sizeof(text)/2); 1005 u_uastrncpy(repl, "some other", sizeof(repl)/2); 1006 uregex_setText(re, text, -1, &status); 1007 1008 /* match covers whole target string */ 1009 uregex_find(re, 0, &status); 1010 TEST_ASSERT_SUCCESS(status); 1011 bufPtr = buf; 1012 bufCap = sizeof(buf) / 2; 1013 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1014 TEST_ASSERT_SUCCESS(status); 1015 TEST_ASSERT_STRING("some other", buf, TRUE); 1016 1017 /* Match has \u \U escapes */ 1018 uregex_find(re, 0, &status); 1019 TEST_ASSERT_SUCCESS(status); 1020 bufPtr = buf; 1021 bufCap = sizeof(buf) / 2; 1022 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 1023 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1024 TEST_ASSERT_SUCCESS(status); 1025 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1026 1027 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 1028 status = U_ZERO_ERROR; 1029 uregex_find(re, 0, &status); 1030 TEST_ASSERT_SUCCESS(status); 1031 bufPtr = buf; 1032 status = U_BUFFER_OVERFLOW_ERROR; 1033 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 1034 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1035 1036 uregex_close(re); 1037 } 1038 1039 1040 /* 1041 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1042 */ 1043 1044 /* 1045 * split() 1046 */ 1047 { 1048 UChar textToSplit[80]; 1049 UChar text2[80]; 1050 UChar buf[200]; 1051 UChar *fields[10]; 1052 int32_t numFields; 1053 int32_t requiredCapacity; 1054 int32_t spaceNeeded; 1055 int32_t sz; 1056 1057 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1058 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1059 1060 status = U_ZERO_ERROR; 1061 re = uregex_openC(":", 0, NULL, &status); 1062 1063 1064 /* Simple split */ 1065 1066 uregex_setText(re, textToSplit, -1, &status); 1067 TEST_ASSERT_SUCCESS(status); 1068 1069 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1070 if (U_SUCCESS(status)) { 1071 memset(fields, -1, sizeof(fields)); 1072 numFields = 1073 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1074 TEST_ASSERT_SUCCESS(status); 1075 1076 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1077 if(U_SUCCESS(status)) { 1078 TEST_ASSERT(numFields == 3); 1079 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1080 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1081 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1082 TEST_ASSERT(fields[3] == NULL); 1083 1084 spaceNeeded = u_strlen(textToSplit) - 1085 (numFields - 1) + /* Field delimiters do not appear in output */ 1086 numFields; /* Each field gets a NUL terminator */ 1087 1088 TEST_ASSERT(spaceNeeded == requiredCapacity); 1089 } 1090 } 1091 1092 uregex_close(re); 1093 1094 1095 /* Split with too few output strings available */ 1096 status = U_ZERO_ERROR; 1097 re = uregex_openC(":", 0, NULL, &status); 1098 uregex_setText(re, textToSplit, -1, &status); 1099 TEST_ASSERT_SUCCESS(status); 1100 1101 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1102 if(U_SUCCESS(status)) { 1103 memset(fields, -1, sizeof(fields)); 1104 numFields = 1105 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1106 TEST_ASSERT_SUCCESS(status); 1107 1108 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1109 if(U_SUCCESS(status)) { 1110 TEST_ASSERT(numFields == 2); 1111 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1112 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1113 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1114 1115 spaceNeeded = u_strlen(textToSplit) - 1116 (numFields - 1) + /* Field delimiters do not appear in output */ 1117 numFields; /* Each field gets a NUL terminator */ 1118 1119 TEST_ASSERT(spaceNeeded == requiredCapacity); 1120 1121 /* Split with a range of output buffer sizes. */ 1122 spaceNeeded = u_strlen(textToSplit) - 1123 (numFields - 1) + /* Field delimiters do not appear in output */ 1124 numFields; /* Each field gets a NUL terminator */ 1125 1126 for (sz=0; sz < spaceNeeded+1; sz++) { 1127 memset(fields, -1, sizeof(fields)); 1128 status = U_ZERO_ERROR; 1129 numFields = 1130 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1131 if (sz >= spaceNeeded) { 1132 TEST_ASSERT_SUCCESS(status); 1133 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1134 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1135 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1136 } else { 1137 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1138 } 1139 TEST_ASSERT(numFields == 3); 1140 TEST_ASSERT(fields[3] == NULL); 1141 TEST_ASSERT(spaceNeeded == requiredCapacity); 1142 } 1143 } 1144 } 1145 1146 uregex_close(re); 1147 } 1148 1149 1150 1151 1152 /* Split(), part 2. Patterns with capture groups. The capture group text 1153 * comes out as additional fields. */ 1154 { 1155 UChar textToSplit[80]; 1156 UChar buf[200]; 1157 UChar *fields[10]; 1158 int32_t numFields; 1159 int32_t requiredCapacity; 1160 int32_t spaceNeeded; 1161 int32_t sz; 1162 1163 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1164 1165 status = U_ZERO_ERROR; 1166 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1167 1168 uregex_setText(re, textToSplit, -1, &status); 1169 TEST_ASSERT_SUCCESS(status); 1170 1171 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1172 if(U_SUCCESS(status)) { 1173 memset(fields, -1, sizeof(fields)); 1174 numFields = 1175 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1176 TEST_ASSERT_SUCCESS(status); 1177 1178 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1179 if(U_SUCCESS(status)) { 1180 TEST_ASSERT(numFields == 5); 1181 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1182 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1183 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1184 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1185 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1186 TEST_ASSERT(fields[5] == NULL); 1187 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1188 TEST_ASSERT(spaceNeeded == requiredCapacity); 1189 } 1190 } 1191 1192 /* Split with too few output strings available (2) */ 1193 status = U_ZERO_ERROR; 1194 memset(fields, -1, sizeof(fields)); 1195 numFields = 1196 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1197 TEST_ASSERT_SUCCESS(status); 1198 1199 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1200 if(U_SUCCESS(status)) { 1201 TEST_ASSERT(numFields == 2); 1202 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1203 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1204 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1205 1206 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1207 TEST_ASSERT(spaceNeeded == requiredCapacity); 1208 } 1209 1210 /* Split with too few output strings available (3) */ 1211 status = U_ZERO_ERROR; 1212 memset(fields, -1, sizeof(fields)); 1213 numFields = 1214 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); 1215 TEST_ASSERT_SUCCESS(status); 1216 1217 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1218 if(U_SUCCESS(status)) { 1219 TEST_ASSERT(numFields == 3); 1220 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1221 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1222 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1223 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1224 1225 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1226 TEST_ASSERT(spaceNeeded == requiredCapacity); 1227 } 1228 1229 /* Split with just enough output strings available (5) */ 1230 status = U_ZERO_ERROR; 1231 memset(fields, -1, sizeof(fields)); 1232 numFields = 1233 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); 1234 TEST_ASSERT_SUCCESS(status); 1235 1236 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1237 if(U_SUCCESS(status)) { 1238 TEST_ASSERT(numFields == 5); 1239 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1240 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1241 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1242 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1243 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1244 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1245 1246 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1247 TEST_ASSERT(spaceNeeded == requiredCapacity); 1248 } 1249 1250 /* Split, end of text is a field delimiter. */ 1251 status = U_ZERO_ERROR; 1252 sz = strlen("first <tag-a> second<tag-b>"); 1253 uregex_setText(re, textToSplit, sz, &status); 1254 TEST_ASSERT_SUCCESS(status); 1255 1256 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1257 if(U_SUCCESS(status)) { 1258 memset(fields, -1, sizeof(fields)); 1259 numFields = 1260 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); 1261 TEST_ASSERT_SUCCESS(status); 1262 1263 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1264 if(U_SUCCESS(status)) { 1265 TEST_ASSERT(numFields == 5); 1266 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1267 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1268 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1269 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1270 TEST_ASSERT_STRING("", fields[4], TRUE); 1271 TEST_ASSERT(fields[5] == NULL); 1272 TEST_ASSERT(fields[8] == NULL); 1273 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1274 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ 1275 TEST_ASSERT(spaceNeeded == requiredCapacity); 1276 } 1277 } 1278 1279 uregex_close(re); 1280 } 1281 1282 /* 1283 * set/getTimeLimit 1284 */ 1285 TEST_SETUP("abc$", "abcdef", 0); 1286 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1287 uregex_setTimeLimit(re, 1000, &status); 1288 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1289 TEST_ASSERT_SUCCESS(status); 1290 uregex_setTimeLimit(re, -1, &status); 1291 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1292 status = U_ZERO_ERROR; 1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1294 TEST_TEARDOWN; 1295 1296 /* 1297 * set/get Stack Limit 1298 */ 1299 TEST_SETUP("abc$", "abcdef", 0); 1300 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1301 uregex_setStackLimit(re, 40000, &status); 1302 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1303 TEST_ASSERT_SUCCESS(status); 1304 uregex_setStackLimit(re, -1, &status); 1305 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1306 status = U_ZERO_ERROR; 1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1308 TEST_TEARDOWN; 1309 1310 1311 /* 1312 * Get/Set callback functions 1313 * This test is copied from intltest regex/Callbacks 1314 * The pattern and test data will run long enough to cause the callback 1315 * to be invoked. The nested '+' operators give exponential time 1316 * behavior with increasing string length. 1317 */ 1318 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1319 callBackContext cbInfo = {4, 0, 0}; 1320 const void *pContext = &cbInfo; 1321 URegexMatchCallback *returnedFn = &TestCallbackFn; 1322 1323 /* Getting the callback fn when it hasn't been set must return NULL */ 1324 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1325 TEST_ASSERT_SUCCESS(status); 1326 TEST_ASSERT(returnedFn == NULL); 1327 TEST_ASSERT(pContext == NULL); 1328 1329 /* Set thecallback and do a match. */ 1330 /* The callback function should record that it has been called. */ 1331 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1332 TEST_ASSERT_SUCCESS(status); 1333 TEST_ASSERT(cbInfo.numCalls == 0); 1334 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1335 TEST_ASSERT_SUCCESS(status); 1336 TEST_ASSERT(cbInfo.numCalls > 0); 1337 1338 /* Getting the callback should return the values that were set above. */ 1339 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1340 TEST_ASSERT(returnedFn == &TestCallbackFn); 1341 TEST_ASSERT(pContext == &cbInfo); 1342 1343 TEST_TEARDOWN; 1344 } 1345 1346 1347 1348 static void TestBug4315(void) { 1349 UErrorCode theICUError = U_ZERO_ERROR; 1350 URegularExpression *theRegEx; 1351 UChar *textBuff; 1352 const char *thePattern; 1353 UChar theString[100]; 1354 UChar *destFields[24]; 1355 int32_t neededLength1; 1356 int32_t neededLength2; 1357 1358 int32_t wordCount = 0; 1359 int32_t destFieldsSize = 24; 1360 1361 thePattern = "ck "; 1362 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1363 1364 /* open a regex */ 1365 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1366 TEST_ASSERT_SUCCESS(theICUError); 1367 1368 /* set the input string */ 1369 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1370 TEST_ASSERT_SUCCESS(theICUError); 1371 1372 /* split */ 1373 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1374 * error occurs! */ 1375 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1376 destFieldsSize, &theICUError); 1377 1378 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1379 TEST_ASSERT(wordCount==3); 1380 1381 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1382 { 1383 theICUError = U_ZERO_ERROR; 1384 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1385 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1386 destFields, destFieldsSize, &theICUError); 1387 TEST_ASSERT(wordCount==3); 1388 TEST_ASSERT_SUCCESS(theICUError); 1389 TEST_ASSERT(neededLength1 == neededLength2); 1390 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1391 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1392 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1393 TEST_ASSERT(destFields[3] == NULL); 1394 free(textBuff); 1395 } 1396 uregex_close(theRegEx); 1397 } 1398 1399 /* Based on TestRegexCAPI() */ 1400 static void TestUTextAPI(void) { 1401 UErrorCode status = U_ZERO_ERROR; 1402 URegularExpression *re; 1403 UText patternText = UTEXT_INITIALIZER; 1404 UChar pat[200]; 1405 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1406 1407 /* Mimimalist open/close */ 1408 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1409 re = uregex_openUText(&patternText, 0, 0, &status); 1410 if (U_FAILURE(status)) { 1411 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1412 utext_close(&patternText); 1413 return; 1414 } 1415 uregex_close(re); 1416 1417 /* Open with all flag values set */ 1418 status = U_ZERO_ERROR; 1419 re = uregex_openUText(&patternText, 1420 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1421 0, &status); 1422 TEST_ASSERT_SUCCESS(status); 1423 uregex_close(re); 1424 1425 /* Open with an invalid flag */ 1426 status = U_ZERO_ERROR; 1427 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1428 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1429 uregex_close(re); 1430 1431 /* open with an invalid parameter */ 1432 status = U_ZERO_ERROR; 1433 re = uregex_openUText(NULL, 1434 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1435 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1436 1437 /* 1438 * clone 1439 */ 1440 { 1441 URegularExpression *clone1; 1442 URegularExpression *clone2; 1443 URegularExpression *clone3; 1444 UChar testString1[30]; 1445 UChar testString2[30]; 1446 UBool result; 1447 1448 1449 status = U_ZERO_ERROR; 1450 re = uregex_openUText(&patternText, 0, 0, &status); 1451 TEST_ASSERT_SUCCESS(status); 1452 clone1 = uregex_clone(re, &status); 1453 TEST_ASSERT_SUCCESS(status); 1454 TEST_ASSERT(clone1 != NULL); 1455 1456 status = U_ZERO_ERROR; 1457 clone2 = uregex_clone(re, &status); 1458 TEST_ASSERT_SUCCESS(status); 1459 TEST_ASSERT(clone2 != NULL); 1460 uregex_close(re); 1461 1462 status = U_ZERO_ERROR; 1463 clone3 = uregex_clone(clone2, &status); 1464 TEST_ASSERT_SUCCESS(status); 1465 TEST_ASSERT(clone3 != NULL); 1466 1467 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 1468 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 1469 1470 status = U_ZERO_ERROR; 1471 uregex_setText(clone1, testString1, -1, &status); 1472 TEST_ASSERT_SUCCESS(status); 1473 result = uregex_lookingAt(clone1, 0, &status); 1474 TEST_ASSERT_SUCCESS(status); 1475 TEST_ASSERT(result==TRUE); 1476 1477 status = U_ZERO_ERROR; 1478 uregex_setText(clone2, testString2, -1, &status); 1479 TEST_ASSERT_SUCCESS(status); 1480 result = uregex_lookingAt(clone2, 0, &status); 1481 TEST_ASSERT_SUCCESS(status); 1482 TEST_ASSERT(result==FALSE); 1483 result = uregex_find(clone2, 0, &status); 1484 TEST_ASSERT_SUCCESS(status); 1485 TEST_ASSERT(result==TRUE); 1486 1487 uregex_close(clone1); 1488 uregex_close(clone2); 1489 uregex_close(clone3); 1490 1491 } 1492 1493 /* 1494 * pattern() and patternText() 1495 */ 1496 { 1497 const UChar *resultPat; 1498 int32_t resultLen; 1499 UText *resultText; 1500 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1501 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1502 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ 1503 status = U_ZERO_ERROR; 1504 1505 utext_openUTF8(&patternText, str_hello, -1, &status); 1506 re = uregex_open(pat, -1, 0, NULL, &status); 1507 resultPat = uregex_pattern(re, &resultLen, &status); 1508 TEST_ASSERT_SUCCESS(status); 1509 1510 /* The TEST_ASSERT_SUCCESS above should change too... */ 1511 if (U_SUCCESS(status)) { 1512 TEST_ASSERT(resultLen == -1); 1513 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1514 } 1515 1516 resultText = uregex_patternUText(re, &status); 1517 TEST_ASSERT_SUCCESS(status); 1518 TEST_ASSERT_UTEXT(str_hello, resultText); 1519 1520 uregex_close(re); 1521 1522 status = U_ZERO_ERROR; 1523 re = uregex_open(pat, 3, 0, NULL, &status); 1524 resultPat = uregex_pattern(re, &resultLen, &status); 1525 TEST_ASSERT_SUCCESS(status); 1526 1527 /* The TEST_ASSERT_SUCCESS above should change too... */ 1528 if (U_SUCCESS(status)) { 1529 TEST_ASSERT(resultLen == 3); 1530 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1531 TEST_ASSERT(u_strlen(resultPat) == 3); 1532 } 1533 1534 resultText = uregex_patternUText(re, &status); 1535 TEST_ASSERT_SUCCESS(status); 1536 TEST_ASSERT_UTEXT(str_hel, resultText); 1537 1538 uregex_close(re); 1539 } 1540 1541 /* 1542 * setUText() and lookingAt() 1543 */ 1544 { 1545 UText text1 = UTEXT_INITIALIZER; 1546 UText text2 = UTEXT_INITIALIZER; 1547 UBool result; 1548 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1549 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1550 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1551 status = U_ZERO_ERROR; 1552 utext_openUTF8(&text1, str_abcccd, -1, &status); 1553 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1554 1555 utext_openUTF8(&patternText, str_abcd, -1, &status); 1556 re = uregex_openUText(&patternText, 0, NULL, &status); 1557 TEST_ASSERT_SUCCESS(status); 1558 1559 /* Operation before doing a setText should fail... */ 1560 status = U_ZERO_ERROR; 1561 uregex_lookingAt(re, 0, &status); 1562 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1563 1564 status = U_ZERO_ERROR; 1565 uregex_setUText(re, &text1, &status); 1566 result = uregex_lookingAt(re, 0, &status); 1567 TEST_ASSERT(result == TRUE); 1568 TEST_ASSERT_SUCCESS(status); 1569 1570 status = U_ZERO_ERROR; 1571 uregex_setUText(re, &text2, &status); 1572 result = uregex_lookingAt(re, 0, &status); 1573 TEST_ASSERT(result == FALSE); 1574 TEST_ASSERT_SUCCESS(status); 1575 1576 status = U_ZERO_ERROR; 1577 uregex_setUText(re, &text1, &status); 1578 result = uregex_lookingAt(re, 0, &status); 1579 TEST_ASSERT(result == TRUE); 1580 TEST_ASSERT_SUCCESS(status); 1581 1582 uregex_close(re); 1583 utext_close(&text1); 1584 utext_close(&text2); 1585 } 1586 1587 1588 /* 1589 * getText() and getUText() 1590 */ 1591 { 1592 UText text1 = UTEXT_INITIALIZER; 1593 UText text2 = UTEXT_INITIALIZER; 1594 UChar text2Chars[20]; 1595 UText *resultText; 1596 const UChar *result; 1597 int32_t textLength; 1598 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1599 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1600 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1601 1602 1603 status = U_ZERO_ERROR; 1604 utext_openUTF8(&text1, str_abcccd, -1, &status); 1605 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); 1606 utext_openUChars(&text2, text2Chars, -1, &status); 1607 1608 utext_openUTF8(&patternText, str_abcd, -1, &status); 1609 re = uregex_openUText(&patternText, 0, NULL, &status); 1610 1611 /* First set a UText */ 1612 uregex_setUText(re, &text1, &status); 1613 resultText = uregex_getUText(re, NULL, &status); 1614 TEST_ASSERT_SUCCESS(status); 1615 TEST_ASSERT(resultText != &text1); 1616 utext_setNativeIndex(resultText, 0); 1617 utext_setNativeIndex(&text1, 0); 1618 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1619 utext_close(resultText); 1620 1621 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1622 (void)result; /* Suppress set but not used warning. */ 1623 TEST_ASSERT(textLength == -1 || textLength == 6); 1624 resultText = uregex_getUText(re, NULL, &status); 1625 TEST_ASSERT_SUCCESS(status); 1626 TEST_ASSERT(resultText != &text1); 1627 utext_setNativeIndex(resultText, 0); 1628 utext_setNativeIndex(&text1, 0); 1629 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1630 utext_close(resultText); 1631 1632 /* Then set a UChar * */ 1633 uregex_setText(re, text2Chars, 7, &status); 1634 resultText = uregex_getUText(re, NULL, &status); 1635 TEST_ASSERT_SUCCESS(status); 1636 utext_setNativeIndex(resultText, 0); 1637 utext_setNativeIndex(&text2, 0); 1638 TEST_ASSERT(testUTextEqual(resultText, &text2)); 1639 utext_close(resultText); 1640 result = uregex_getText(re, &textLength, &status); 1641 TEST_ASSERT(textLength == 7); 1642 1643 uregex_close(re); 1644 utext_close(&text1); 1645 utext_close(&text2); 1646 } 1647 1648 /* 1649 * matches() 1650 */ 1651 { 1652 UText text1 = UTEXT_INITIALIZER; 1653 UBool result; 1654 UText nullText = UTEXT_INITIALIZER; 1655 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1656 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1657 1658 status = U_ZERO_ERROR; 1659 utext_openUTF8(&text1, str_abcccde, -1, &status); 1660 utext_openUTF8(&patternText, str_abcd, -1, &status); 1661 re = uregex_openUText(&patternText, 0, NULL, &status); 1662 1663 uregex_setUText(re, &text1, &status); 1664 result = uregex_matches(re, 0, &status); 1665 TEST_ASSERT(result == FALSE); 1666 TEST_ASSERT_SUCCESS(status); 1667 uregex_close(re); 1668 1669 status = U_ZERO_ERROR; 1670 re = uregex_openC(".?", 0, NULL, &status); 1671 uregex_setUText(re, &text1, &status); 1672 result = uregex_matches(re, 7, &status); 1673 TEST_ASSERT(result == TRUE); 1674 TEST_ASSERT_SUCCESS(status); 1675 1676 status = U_ZERO_ERROR; 1677 utext_openUTF8(&nullText, "", -1, &status); 1678 uregex_setUText(re, &nullText, &status); 1679 TEST_ASSERT_SUCCESS(status); 1680 result = uregex_matches(re, 0, &status); 1681 TEST_ASSERT(result == TRUE); 1682 TEST_ASSERT_SUCCESS(status); 1683 1684 uregex_close(re); 1685 utext_close(&text1); 1686 utext_close(&nullText); 1687 } 1688 1689 1690 /* 1691 * lookingAt() Used in setText test. 1692 */ 1693 1694 1695 /* 1696 * find(), findNext, start, end, reset 1697 */ 1698 { 1699 UChar text1[50]; 1700 UBool result; 1701 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 1702 status = U_ZERO_ERROR; 1703 re = uregex_openC("rx", 0, NULL, &status); 1704 1705 uregex_setText(re, text1, -1, &status); 1706 result = uregex_find(re, 0, &status); 1707 TEST_ASSERT(result == TRUE); 1708 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1709 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1710 TEST_ASSERT_SUCCESS(status); 1711 1712 result = uregex_find(re, 9, &status); 1713 TEST_ASSERT(result == TRUE); 1714 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1715 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1716 TEST_ASSERT_SUCCESS(status); 1717 1718 result = uregex_find(re, 14, &status); 1719 TEST_ASSERT(result == FALSE); 1720 TEST_ASSERT_SUCCESS(status); 1721 1722 status = U_ZERO_ERROR; 1723 uregex_reset(re, 0, &status); 1724 1725 result = uregex_findNext(re, &status); 1726 TEST_ASSERT(result == TRUE); 1727 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1728 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1729 TEST_ASSERT_SUCCESS(status); 1730 1731 result = uregex_findNext(re, &status); 1732 TEST_ASSERT(result == TRUE); 1733 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1734 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1735 TEST_ASSERT_SUCCESS(status); 1736 1737 status = U_ZERO_ERROR; 1738 uregex_reset(re, 12, &status); 1739 1740 result = uregex_findNext(re, &status); 1741 TEST_ASSERT(result == TRUE); 1742 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1743 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1744 TEST_ASSERT_SUCCESS(status); 1745 1746 result = uregex_findNext(re, &status); 1747 TEST_ASSERT(result == FALSE); 1748 TEST_ASSERT_SUCCESS(status); 1749 1750 uregex_close(re); 1751 } 1752 1753 /* 1754 * group() 1755 */ 1756 { 1757 UChar text1[80]; 1758 UText *actual; 1759 UBool result; 1760 1761 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */ 1762 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ 1763 1764 1765 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 1766 1767 status = U_ZERO_ERROR; 1768 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1769 TEST_ASSERT_SUCCESS(status); 1770 1771 uregex_setText(re, text1, -1, &status); 1772 result = uregex_find(re, 0, &status); 1773 TEST_ASSERT(result==TRUE); 1774 1775 /* Capture Group 0, the full match. Should succeed. */ 1776 status = U_ZERO_ERROR; 1777 actual = uregex_groupUTextDeep(re, 0, NULL, &status); 1778 TEST_ASSERT_SUCCESS(status); 1779 TEST_ASSERT_UTEXT(str_abcinteriordef, actual); 1780 utext_close(actual); 1781 1782 /* Capture Group 0 with shallow clone API. Should succeed. */ 1783 status = U_ZERO_ERROR; 1784 { 1785 int64_t group_len; 1786 int32_t len16; 1787 UErrorCode shallowStatus = U_ZERO_ERROR; 1788 int64_t nativeIndex; 1789 UChar *groupChars; 1790 UText groupText = UTEXT_INITIALIZER; 1791 1792 actual = uregex_groupUText(re, 0, NULL, &group_len, &status); 1793 TEST_ASSERT_SUCCESS(status); 1794 1795 nativeIndex = utext_getNativeIndex(actual); 1796 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */ 1797 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */ 1798 len16 = (int32_t)group_len; 1799 1800 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1)); 1801 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus); 1802 1803 utext_openUChars(&groupText, groupChars, len16, &shallowStatus); 1804 1805 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText); 1806 utext_close(&groupText); 1807 free(groupChars); 1808 } 1809 utext_close(actual); 1810 1811 /* Capture group #1. Should succeed. */ 1812 status = U_ZERO_ERROR; 1813 actual = uregex_groupUTextDeep(re, 1, NULL, &status); 1814 TEST_ASSERT_SUCCESS(status); 1815 TEST_ASSERT_UTEXT(str_interior, actual); 1816 utext_close(actual); 1817 1818 /* Capture group out of range. Error. */ 1819 status = U_ZERO_ERROR; 1820 actual = uregex_groupUTextDeep(re, 2, NULL, &status); 1821 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1822 TEST_ASSERT(utext_nativeLength(actual) == 0); 1823 utext_close(actual); 1824 1825 uregex_close(re); 1826 1827 } 1828 1829 /* 1830 * replaceFirst() 1831 */ 1832 { 1833 UChar text1[80]; 1834 UChar text2[80]; 1835 UText replText = UTEXT_INITIALIZER; 1836 UText *result; 1837 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1838 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1839 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ 1840 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1841 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1842 status = U_ZERO_ERROR; 1843 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1844 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1845 utext_openUTF8(&replText, str_1x, -1, &status); 1846 1847 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1848 TEST_ASSERT_SUCCESS(status); 1849 1850 /* Normal case, with match */ 1851 uregex_setText(re, text1, -1, &status); 1852 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1853 TEST_ASSERT_SUCCESS(status); 1854 TEST_ASSERT_UTEXT(str_Replxxx, result); 1855 utext_close(result); 1856 1857 /* No match. Text should copy to output with no changes. */ 1858 uregex_setText(re, text2, -1, &status); 1859 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1860 TEST_ASSERT_SUCCESS(status); 1861 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1862 utext_close(result); 1863 1864 /* Unicode escapes */ 1865 uregex_setText(re, text1, -1, &status); 1866 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1867 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1868 TEST_ASSERT_SUCCESS(status); 1869 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1870 utext_close(result); 1871 1872 uregex_close(re); 1873 utext_close(&replText); 1874 } 1875 1876 1877 /* 1878 * replaceAll() 1879 */ 1880 { 1881 UChar text1[80]; 1882 UChar text2[80]; 1883 UText replText = UTEXT_INITIALIZER; 1884 UText *result; 1885 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1886 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1887 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1888 status = U_ZERO_ERROR; 1889 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1890 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1891 utext_openUTF8(&replText, str_1, -1, &status); 1892 1893 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1894 TEST_ASSERT_SUCCESS(status); 1895 1896 /* Normal case, with match */ 1897 uregex_setText(re, text1, -1, &status); 1898 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1899 TEST_ASSERT_SUCCESS(status); 1900 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1901 utext_close(result); 1902 1903 /* No match. Text should copy to output with no changes. */ 1904 uregex_setText(re, text2, -1, &status); 1905 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1906 TEST_ASSERT_SUCCESS(status); 1907 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1908 utext_close(result); 1909 1910 uregex_close(re); 1911 utext_close(&replText); 1912 } 1913 1914 1915 /* 1916 * appendReplacement() 1917 */ 1918 { 1919 UChar text[100]; 1920 UChar repl[100]; 1921 UChar buf[100]; 1922 UChar *bufPtr; 1923 int32_t bufCap; 1924 1925 status = U_ZERO_ERROR; 1926 re = uregex_openC(".*", 0, 0, &status); 1927 TEST_ASSERT_SUCCESS(status); 1928 1929 u_uastrncpy(text, "whatever", sizeof(text)/2); 1930 u_uastrncpy(repl, "some other", sizeof(repl)/2); 1931 uregex_setText(re, text, -1, &status); 1932 1933 /* match covers whole target string */ 1934 uregex_find(re, 0, &status); 1935 TEST_ASSERT_SUCCESS(status); 1936 bufPtr = buf; 1937 bufCap = sizeof(buf) / 2; 1938 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1939 TEST_ASSERT_SUCCESS(status); 1940 TEST_ASSERT_STRING("some other", buf, TRUE); 1941 1942 /* Match has \u \U escapes */ 1943 uregex_find(re, 0, &status); 1944 TEST_ASSERT_SUCCESS(status); 1945 bufPtr = buf; 1946 bufCap = sizeof(buf) / 2; 1947 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 1948 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1949 TEST_ASSERT_SUCCESS(status); 1950 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1951 1952 uregex_close(re); 1953 } 1954 1955 1956 /* 1957 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1958 */ 1959 1960 /* 1961 * splitUText() 1962 */ 1963 { 1964 UChar textToSplit[80]; 1965 UChar text2[80]; 1966 UText *fields[10]; 1967 int32_t numFields; 1968 int32_t i; 1969 1970 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1971 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1972 1973 status = U_ZERO_ERROR; 1974 re = uregex_openC(":", 0, NULL, &status); 1975 1976 1977 /* Simple split */ 1978 1979 uregex_setText(re, textToSplit, -1, &status); 1980 TEST_ASSERT_SUCCESS(status); 1981 1982 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1983 if (U_SUCCESS(status)) { 1984 memset(fields, 0, sizeof(fields)); 1985 numFields = uregex_splitUText(re, fields, 10, &status); 1986 TEST_ASSERT_SUCCESS(status); 1987 1988 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1989 if(U_SUCCESS(status)) { 1990 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1991 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1992 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1993 TEST_ASSERT(numFields == 3); 1994 TEST_ASSERT_UTEXT(str_first, fields[0]); 1995 TEST_ASSERT_UTEXT(str_second, fields[1]); 1996 TEST_ASSERT_UTEXT(str_third, fields[2]); 1997 TEST_ASSERT(fields[3] == NULL); 1998 } 1999 for(i = 0; i < numFields; i++) { 2000 utext_close(fields[i]); 2001 } 2002 } 2003 2004 uregex_close(re); 2005 2006 2007 /* Split with too few output strings available */ 2008 status = U_ZERO_ERROR; 2009 re = uregex_openC(":", 0, NULL, &status); 2010 uregex_setText(re, textToSplit, -1, &status); 2011 TEST_ASSERT_SUCCESS(status); 2012 2013 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2014 if(U_SUCCESS(status)) { 2015 fields[0] = NULL; 2016 fields[1] = NULL; 2017 fields[2] = &patternText; 2018 numFields = uregex_splitUText(re, fields, 2, &status); 2019 TEST_ASSERT_SUCCESS(status); 2020 2021 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2022 if(U_SUCCESS(status)) { 2023 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2024 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 2025 TEST_ASSERT(numFields == 2); 2026 TEST_ASSERT_UTEXT(str_first, fields[0]); 2027 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 2028 TEST_ASSERT(fields[2] == &patternText); 2029 } 2030 for(i = 0; i < numFields; i++) { 2031 utext_close(fields[i]); 2032 } 2033 } 2034 2035 uregex_close(re); 2036 } 2037 2038 /* splitUText(), part 2. Patterns with capture groups. The capture group text 2039 * comes out as additional fields. */ 2040 { 2041 UChar textToSplit[80]; 2042 UText *fields[10]; 2043 int32_t numFields; 2044 int32_t i; 2045 2046 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 2047 2048 status = U_ZERO_ERROR; 2049 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2050 2051 uregex_setText(re, textToSplit, -1, &status); 2052 TEST_ASSERT_SUCCESS(status); 2053 2054 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2055 if(U_SUCCESS(status)) { 2056 memset(fields, 0, sizeof(fields)); 2057 numFields = uregex_splitUText(re, fields, 10, &status); 2058 TEST_ASSERT_SUCCESS(status); 2059 2060 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2061 if(U_SUCCESS(status)) { 2062 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2063 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2064 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2065 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2066 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2067 2068 TEST_ASSERT(numFields == 5); 2069 TEST_ASSERT_UTEXT(str_first, fields[0]); 2070 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2071 TEST_ASSERT_UTEXT(str_second, fields[2]); 2072 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2073 TEST_ASSERT_UTEXT(str_third, fields[4]); 2074 TEST_ASSERT(fields[5] == NULL); 2075 } 2076 for(i = 0; i < numFields; i++) { 2077 utext_close(fields[i]); 2078 } 2079 } 2080 2081 /* Split with too few output strings available (2) */ 2082 status = U_ZERO_ERROR; 2083 fields[0] = NULL; 2084 fields[1] = NULL; 2085 fields[2] = &patternText; 2086 numFields = uregex_splitUText(re, fields, 2, &status); 2087 TEST_ASSERT_SUCCESS(status); 2088 2089 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2090 if(U_SUCCESS(status)) { 2091 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2092 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2093 TEST_ASSERT(numFields == 2); 2094 TEST_ASSERT_UTEXT(str_first, fields[0]); 2095 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2096 TEST_ASSERT(fields[2] == &patternText); 2097 } 2098 for(i = 0; i < numFields; i++) { 2099 utext_close(fields[i]); 2100 } 2101 2102 2103 /* Split with too few output strings available (3) */ 2104 status = U_ZERO_ERROR; 2105 fields[0] = NULL; 2106 fields[1] = NULL; 2107 fields[2] = NULL; 2108 fields[3] = &patternText; 2109 numFields = uregex_splitUText(re, fields, 3, &status); 2110 TEST_ASSERT_SUCCESS(status); 2111 2112 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2113 if(U_SUCCESS(status)) { 2114 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2115 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2116 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2117 TEST_ASSERT(numFields == 3); 2118 TEST_ASSERT_UTEXT(str_first, fields[0]); 2119 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2120 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2121 TEST_ASSERT(fields[3] == &patternText); 2122 } 2123 for(i = 0; i < numFields; i++) { 2124 utext_close(fields[i]); 2125 } 2126 2127 /* Split with just enough output strings available (5) */ 2128 status = U_ZERO_ERROR; 2129 fields[0] = NULL; 2130 fields[1] = NULL; 2131 fields[2] = NULL; 2132 fields[3] = NULL; 2133 fields[4] = NULL; 2134 fields[5] = &patternText; 2135 numFields = uregex_splitUText(re, fields, 5, &status); 2136 TEST_ASSERT_SUCCESS(status); 2137 2138 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2139 if(U_SUCCESS(status)) { 2140 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2141 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2142 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2143 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2144 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2145 2146 TEST_ASSERT(numFields == 5); 2147 TEST_ASSERT_UTEXT(str_first, fields[0]); 2148 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2149 TEST_ASSERT_UTEXT(str_second, fields[2]); 2150 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2151 TEST_ASSERT_UTEXT(str_third, fields[4]); 2152 TEST_ASSERT(fields[5] == &patternText); 2153 } 2154 for(i = 0; i < numFields; i++) { 2155 utext_close(fields[i]); 2156 } 2157 2158 /* Split, end of text is a field delimiter. */ 2159 status = U_ZERO_ERROR; 2160 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2161 TEST_ASSERT_SUCCESS(status); 2162 2163 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2164 if(U_SUCCESS(status)) { 2165 memset(fields, 0, sizeof(fields)); 2166 fields[9] = &patternText; 2167 numFields = uregex_splitUText(re, fields, 9, &status); 2168 TEST_ASSERT_SUCCESS(status); 2169 2170 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2171 if(U_SUCCESS(status)) { 2172 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2173 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2174 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2175 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2176 const char str_empty[] = { 0x00 }; 2177 2178 TEST_ASSERT(numFields == 5); 2179 TEST_ASSERT_UTEXT(str_first, fields[0]); 2180 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2181 TEST_ASSERT_UTEXT(str_second, fields[2]); 2182 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2183 TEST_ASSERT_UTEXT(str_empty, fields[4]); 2184 TEST_ASSERT(fields[5] == NULL); 2185 TEST_ASSERT(fields[8] == NULL); 2186 TEST_ASSERT(fields[9] == &patternText); 2187 } 2188 for(i = 0; i < numFields; i++) { 2189 utext_close(fields[i]); 2190 } 2191 } 2192 2193 uregex_close(re); 2194 } 2195 utext_close(&patternText); 2196 } 2197 2198 2199 static void TestRefreshInput(void) { 2200 /* 2201 * RefreshInput changes out the input of a URegularExpression without 2202 * changing anything else in the match state. Used with Java JNI, 2203 * when Java moves the underlying string storage. This test 2204 * runs a find() loop, moving the text after the first match. 2205 * The right number of matches should still be found. 2206 */ 2207 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2208 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2209 UErrorCode status = U_ZERO_ERROR; 2210 URegularExpression *re; 2211 UText ut1 = UTEXT_INITIALIZER; 2212 UText ut2 = UTEXT_INITIALIZER; 2213 2214 re = uregex_openC("[ABC]", 0, 0, &status); 2215 TEST_ASSERT_SUCCESS(status); 2216 2217 utext_openUChars(&ut1, testStr, -1, &status); 2218 TEST_ASSERT_SUCCESS(status); 2219 uregex_setUText(re, &ut1, &status); 2220 TEST_ASSERT_SUCCESS(status); 2221 2222 /* Find the first match "A" in the original string */ 2223 TEST_ASSERT(uregex_findNext(re, &status)); 2224 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2225 2226 /* Move the string, kill the original string. */ 2227 u_strcpy(movedStr, testStr); 2228 u_memset(testStr, 0, u_strlen(testStr)); 2229 utext_openUChars(&ut2, movedStr, -1, &status); 2230 TEST_ASSERT_SUCCESS(status); 2231 uregex_refreshUText(re, &ut2, &status); 2232 TEST_ASSERT_SUCCESS(status); 2233 2234 /* Find the following two matches, now working in the moved string. */ 2235 TEST_ASSERT(uregex_findNext(re, &status)); 2236 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2237 TEST_ASSERT(uregex_findNext(re, &status)); 2238 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2239 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2240 2241 uregex_close(re); 2242 } 2243 2244 2245 static void TestBug8421(void) { 2246 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched 2247 * was failing. 2248 */ 2249 URegularExpression *re; 2250 UErrorCode status = U_ZERO_ERROR; 2251 int32_t limit = -1; 2252 2253 re = uregex_openC("abc", 0, 0, &status); 2254 TEST_ASSERT_SUCCESS(status); 2255 2256 limit = uregex_getTimeLimit(re, &status); 2257 TEST_ASSERT_SUCCESS(status); 2258 TEST_ASSERT(limit == 0); 2259 2260 uregex_setTimeLimit(re, 100, &status); 2261 TEST_ASSERT_SUCCESS(status); 2262 limit = uregex_getTimeLimit(re, &status); 2263 TEST_ASSERT_SUCCESS(status); 2264 TEST_ASSERT(limit == 100); 2265 2266 uregex_close(re); 2267 } 2268 2269 2270 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2271