1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 2004-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /******************************************************************************** 9 * 10 * File reapits.c 11 * 12 *********************************************************************************/ 13 /*C API TEST FOR Regular Expressions */ 14 /** 15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 16 * try to test the full functionality. It just calls each function and verifies that it 17 * works on a basic level. 18 * 19 * More complete testing of regular expression functionality is done with the C++ tests. 20 **/ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 25 26 #include <stdlib.h> 27 #include <string.h> 28 #include "unicode/uloc.h" 29 #include "unicode/uregex.h" 30 #include "unicode/ustring.h" 31 #include "unicode/utext.h" 32 #include "cintltst.h" 33 #include "cmemory.h" 34 35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 36 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 37 38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 39 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}} 40 41 /* 42 * TEST_SETUP and TEST_TEARDOWN 43 * macros to handle the boilerplate around setting up regex test cases. 44 * parameteres to setup: 45 * pattern: The regex pattern, a (char *) null terminated C string. 46 * testString: The string data, also a (char *) C string. 47 * flags: Regex flags to set when compiling the pattern 48 * 49 * Put arbitrary test code between SETUP and TEARDOWN. 50 * 're" is the compiled, ready-to-go regular expression. 51 */ 52 #define TEST_SETUP(pattern, testString, flags) { \ 53 UChar *srcString = NULL; \ 54 status = U_ZERO_ERROR; \ 55 re = uregex_openC(pattern, flags, NULL, &status); \ 56 TEST_ASSERT_SUCCESS(status); \ 57 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 58 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 59 uregex_setText(re, srcString, -1, &status); \ 60 TEST_ASSERT_SUCCESS(status); \ 61 if (U_SUCCESS(status)) { 62 63 #define TEST_TEARDOWN \ 64 } \ 65 TEST_ASSERT_SUCCESS(status); \ 66 uregex_close(re); \ 67 free(srcString); \ 68 } 69 70 71 /** 72 * @param expected utf-8 array of bytes to be expected 73 */ 74 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 75 char buf_inside_macro[120]; 76 int32_t len = (int32_t)strlen(expected); 77 UBool success; 78 if (nulTerm) { 79 u_austrncpy(buf_inside_macro, (actual), len+1); 80 buf_inside_macro[len+2] = 0; 81 success = (strcmp((expected), buf_inside_macro) == 0); 82 } else { 83 u_austrncpy(buf_inside_macro, (actual), len); 84 buf_inside_macro[len+1] = 0; 85 success = (strncmp((expected), buf_inside_macro, len) == 0); 86 } 87 if (success == FALSE) { 88 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 89 file, line, (expected), buf_inside_macro); 90 } 91 } 92 93 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 94 95 96 static UBool equals_utf8_utext(const char *utf8, UText *utext) { 97 int32_t u8i = 0; 98 UChar32 u8c = 0; 99 UChar32 utc = 0; 100 UBool stringsEqual = TRUE; 101 utext_setNativeIndex(utext, 0); 102 for (;;) { 103 U8_NEXT_UNSAFE(utf8, u8i, u8c); 104 utc = utext_next32(utext); 105 if (u8c == 0 && utc == U_SENTINEL) { 106 break; 107 } 108 if (u8c != utc || u8c == 0) { 109 stringsEqual = FALSE; 110 break; 111 } 112 } 113 return stringsEqual; 114 } 115 116 117 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 118 utext_setNativeIndex(actual, 0); 119 if (!equals_utf8_utext(expected, actual)) { 120 UChar32 c; 121 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 122 c = utext_next32From(actual, 0); 123 while (c != U_SENTINEL) { 124 if (0x20<c && c <0x7e) { 125 log_err("%c", c); 126 } else { 127 log_err("%#x", c); 128 } 129 c = UTEXT_NEXT32(actual); 130 } 131 log_err("\"\n"); 132 } 133 } 134 135 /* 136 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) 137 * Note: Expected is a UTF-8 encoded string, _not_ the system code page. 138 */ 139 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 140 141 static UBool testUTextEqual(UText *uta, UText *utb) { 142 UChar32 ca = 0; 143 UChar32 cb = 0; 144 utext_setNativeIndex(uta, 0); 145 utext_setNativeIndex(utb, 0); 146 do { 147 ca = utext_next32(uta); 148 cb = utext_next32(utb); 149 if (ca != cb) { 150 break; 151 } 152 } while (ca != U_SENTINEL); 153 return ca == cb; 154 } 155 156 157 158 159 static void TestRegexCAPI(void); 160 static void TestBug4315(void); 161 static void TestUTextAPI(void); 162 static void TestRefreshInput(void); 163 static void TestBug8421(void); 164 static void TestBug10815(void); 165 166 void addURegexTest(TestNode** root); 167 168 void addURegexTest(TestNode** root) 169 { 170 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 171 addTest(root, &TestBug4315, "regex/TestBug4315"); 172 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 173 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 174 addTest(root, &TestBug8421, "regex/TestBug8421"); 175 addTest(root, &TestBug10815, "regex/TestBug10815"); 176 } 177 178 /* 179 * Call back function and context struct used for testing 180 * regular expression user callbacks. This test is mostly the same as 181 * the corresponding C++ test in intltest. 182 */ 183 typedef struct callBackContext { 184 int32_t maxCalls; 185 int32_t numCalls; 186 int32_t lastSteps; 187 } callBackContext; 188 189 static UBool U_EXPORT2 U_CALLCONV 190 TestCallbackFn(const void *context, int32_t steps) { 191 callBackContext *info = (callBackContext *)context; 192 if (info->lastSteps+1 != steps) { 193 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 194 } 195 info->lastSteps = steps; 196 info->numCalls++; 197 return (info->numCalls < info->maxCalls); 198 } 199 200 /* 201 * Regular Expression C API Tests 202 */ 203 static void TestRegexCAPI(void) { 204 UErrorCode status = U_ZERO_ERROR; 205 URegularExpression *re; 206 UChar pat[200]; 207 UChar *minus1; 208 209 memset(&minus1, -1, sizeof(minus1)); 210 211 /* Mimimalist open/close */ 212 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 213 re = uregex_open(pat, -1, 0, 0, &status); 214 if (U_FAILURE(status)) { 215 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 216 return; 217 } 218 uregex_close(re); 219 220 /* Open with all flag values set */ 221 status = U_ZERO_ERROR; 222 re = uregex_open(pat, -1, 223 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL, 224 0, &status); 225 TEST_ASSERT_SUCCESS(status); 226 uregex_close(re); 227 228 /* Open with an invalid flag */ 229 status = U_ZERO_ERROR; 230 re = uregex_open(pat, -1, 0x40000000, 0, &status); 231 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 232 uregex_close(re); 233 234 /* Open with an unimplemented flag */ 235 status = U_ZERO_ERROR; 236 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); 237 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 238 uregex_close(re); 239 240 /* openC with an invalid parameter */ 241 status = U_ZERO_ERROR; 242 re = uregex_openC(NULL, 243 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 244 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 245 246 /* openC with an invalid parameter */ 247 status = U_USELESS_COLLATOR_ERROR; 248 re = uregex_openC(NULL, 249 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 250 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 251 252 /* openC open from a C string */ 253 { 254 const UChar *p; 255 int32_t len; 256 status = U_ZERO_ERROR; 257 re = uregex_openC("abc*", 0, 0, &status); 258 TEST_ASSERT_SUCCESS(status); 259 p = uregex_pattern(re, &len, &status); 260 TEST_ASSERT_SUCCESS(status); 261 262 /* The TEST_ASSERT_SUCCESS above should change too... */ 263 if(U_SUCCESS(status)) { 264 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 265 TEST_ASSERT(u_strcmp(pat, p) == 0); 266 TEST_ASSERT(len==(int32_t)strlen("abc*")); 267 } 268 269 uregex_close(re); 270 271 /* TODO: Open with ParseError parameter */ 272 } 273 274 /* 275 * clone 276 */ 277 { 278 URegularExpression *clone1; 279 URegularExpression *clone2; 280 URegularExpression *clone3; 281 UChar testString1[30]; 282 UChar testString2[30]; 283 UBool result; 284 285 286 status = U_ZERO_ERROR; 287 re = uregex_openC("abc*", 0, 0, &status); 288 TEST_ASSERT_SUCCESS(status); 289 clone1 = uregex_clone(re, &status); 290 TEST_ASSERT_SUCCESS(status); 291 TEST_ASSERT(clone1 != NULL); 292 293 status = U_ZERO_ERROR; 294 clone2 = uregex_clone(re, &status); 295 TEST_ASSERT_SUCCESS(status); 296 TEST_ASSERT(clone2 != NULL); 297 uregex_close(re); 298 299 status = U_ZERO_ERROR; 300 clone3 = uregex_clone(clone2, &status); 301 TEST_ASSERT_SUCCESS(status); 302 TEST_ASSERT(clone3 != NULL); 303 304 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 305 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 306 307 status = U_ZERO_ERROR; 308 uregex_setText(clone1, testString1, -1, &status); 309 TEST_ASSERT_SUCCESS(status); 310 result = uregex_lookingAt(clone1, 0, &status); 311 TEST_ASSERT_SUCCESS(status); 312 TEST_ASSERT(result==TRUE); 313 314 status = U_ZERO_ERROR; 315 uregex_setText(clone2, testString2, -1, &status); 316 TEST_ASSERT_SUCCESS(status); 317 result = uregex_lookingAt(clone2, 0, &status); 318 TEST_ASSERT_SUCCESS(status); 319 TEST_ASSERT(result==FALSE); 320 result = uregex_find(clone2, 0, &status); 321 TEST_ASSERT_SUCCESS(status); 322 TEST_ASSERT(result==TRUE); 323 324 uregex_close(clone1); 325 uregex_close(clone2); 326 uregex_close(clone3); 327 328 } 329 330 /* 331 * pattern() 332 */ 333 { 334 const UChar *resultPat; 335 int32_t resultLen; 336 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); 337 status = U_ZERO_ERROR; 338 re = uregex_open(pat, -1, 0, NULL, &status); 339 resultPat = uregex_pattern(re, &resultLen, &status); 340 TEST_ASSERT_SUCCESS(status); 341 342 /* The TEST_ASSERT_SUCCESS above should change too... */ 343 if (U_SUCCESS(status)) { 344 TEST_ASSERT(resultLen == -1); 345 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 346 } 347 348 uregex_close(re); 349 350 status = U_ZERO_ERROR; 351 re = uregex_open(pat, 3, 0, NULL, &status); 352 resultPat = uregex_pattern(re, &resultLen, &status); 353 TEST_ASSERT_SUCCESS(status); 354 TEST_ASSERT_SUCCESS(status); 355 356 /* The TEST_ASSERT_SUCCESS above should change too... */ 357 if (U_SUCCESS(status)) { 358 TEST_ASSERT(resultLen == 3); 359 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 360 TEST_ASSERT(u_strlen(resultPat) == 3); 361 } 362 363 uregex_close(re); 364 } 365 366 /* 367 * flags() 368 */ 369 { 370 int32_t t; 371 372 status = U_ZERO_ERROR; 373 re = uregex_open(pat, -1, 0, NULL, &status); 374 t = uregex_flags(re, &status); 375 TEST_ASSERT_SUCCESS(status); 376 TEST_ASSERT(t == 0); 377 uregex_close(re); 378 379 status = U_ZERO_ERROR; 380 re = uregex_open(pat, -1, 0, NULL, &status); 381 t = uregex_flags(re, &status); 382 TEST_ASSERT_SUCCESS(status); 383 TEST_ASSERT(t == 0); 384 uregex_close(re); 385 386 status = U_ZERO_ERROR; 387 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 388 t = uregex_flags(re, &status); 389 TEST_ASSERT_SUCCESS(status); 390 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 391 uregex_close(re); 392 } 393 394 /* 395 * setText() and lookingAt() 396 */ 397 { 398 UChar text1[50]; 399 UChar text2[50]; 400 UBool result; 401 402 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 403 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 404 status = U_ZERO_ERROR; 405 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 406 re = uregex_open(pat, -1, 0, NULL, &status); 407 TEST_ASSERT_SUCCESS(status); 408 409 /* Operation before doing a setText should fail... */ 410 status = U_ZERO_ERROR; 411 uregex_lookingAt(re, 0, &status); 412 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 413 414 status = U_ZERO_ERROR; 415 uregex_setText(re, text1, -1, &status); 416 result = uregex_lookingAt(re, 0, &status); 417 TEST_ASSERT(result == TRUE); 418 TEST_ASSERT_SUCCESS(status); 419 420 status = U_ZERO_ERROR; 421 uregex_setText(re, text2, -1, &status); 422 result = uregex_lookingAt(re, 0, &status); 423 TEST_ASSERT(result == FALSE); 424 TEST_ASSERT_SUCCESS(status); 425 426 status = U_ZERO_ERROR; 427 uregex_setText(re, text1, -1, &status); 428 result = uregex_lookingAt(re, 0, &status); 429 TEST_ASSERT(result == TRUE); 430 TEST_ASSERT_SUCCESS(status); 431 432 status = U_ZERO_ERROR; 433 uregex_setText(re, text1, 5, &status); 434 result = uregex_lookingAt(re, 0, &status); 435 TEST_ASSERT(result == FALSE); 436 TEST_ASSERT_SUCCESS(status); 437 438 status = U_ZERO_ERROR; 439 uregex_setText(re, text1, 6, &status); 440 result = uregex_lookingAt(re, 0, &status); 441 TEST_ASSERT(result == TRUE); 442 TEST_ASSERT_SUCCESS(status); 443 444 uregex_close(re); 445 } 446 447 448 /* 449 * getText() 450 */ 451 { 452 UChar text1[50]; 453 UChar text2[50]; 454 const UChar *result; 455 int32_t textLength; 456 457 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 458 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 459 status = U_ZERO_ERROR; 460 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 461 re = uregex_open(pat, -1, 0, NULL, &status); 462 463 uregex_setText(re, text1, -1, &status); 464 result = uregex_getText(re, &textLength, &status); 465 TEST_ASSERT(result == text1); 466 TEST_ASSERT(textLength == -1); 467 TEST_ASSERT_SUCCESS(status); 468 469 status = U_ZERO_ERROR; 470 uregex_setText(re, text2, 7, &status); 471 result = uregex_getText(re, &textLength, &status); 472 TEST_ASSERT(result == text2); 473 TEST_ASSERT(textLength == 7); 474 TEST_ASSERT_SUCCESS(status); 475 476 status = U_ZERO_ERROR; 477 uregex_setText(re, text2, 4, &status); 478 result = uregex_getText(re, &textLength, &status); 479 TEST_ASSERT(result == text2); 480 TEST_ASSERT(textLength == 4); 481 TEST_ASSERT_SUCCESS(status); 482 uregex_close(re); 483 } 484 485 /* 486 * matches() 487 */ 488 { 489 UChar text1[50]; 490 UBool result; 491 int len; 492 UChar nullString[] = {0,0,0}; 493 494 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1)); 495 status = U_ZERO_ERROR; 496 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 497 re = uregex_open(pat, -1, 0, NULL, &status); 498 499 uregex_setText(re, text1, -1, &status); 500 result = uregex_matches(re, 0, &status); 501 TEST_ASSERT(result == FALSE); 502 TEST_ASSERT_SUCCESS(status); 503 504 status = U_ZERO_ERROR; 505 uregex_setText(re, text1, 6, &status); 506 result = uregex_matches(re, 0, &status); 507 TEST_ASSERT(result == TRUE); 508 TEST_ASSERT_SUCCESS(status); 509 510 status = U_ZERO_ERROR; 511 uregex_setText(re, text1, 6, &status); 512 result = uregex_matches(re, 1, &status); 513 TEST_ASSERT(result == FALSE); 514 TEST_ASSERT_SUCCESS(status); 515 uregex_close(re); 516 517 status = U_ZERO_ERROR; 518 re = uregex_openC(".?", 0, NULL, &status); 519 uregex_setText(re, text1, -1, &status); 520 len = u_strlen(text1); 521 result = uregex_matches(re, len, &status); 522 TEST_ASSERT(result == TRUE); 523 TEST_ASSERT_SUCCESS(status); 524 525 status = U_ZERO_ERROR; 526 uregex_setText(re, nullString, -1, &status); 527 TEST_ASSERT_SUCCESS(status); 528 result = uregex_matches(re, 0, &status); 529 TEST_ASSERT(result == TRUE); 530 TEST_ASSERT_SUCCESS(status); 531 uregex_close(re); 532 } 533 534 535 /* 536 * lookingAt() Used in setText test. 537 */ 538 539 540 /* 541 * find(), findNext, start, end, reset 542 */ 543 { 544 UChar text1[50]; 545 UBool result; 546 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 547 status = U_ZERO_ERROR; 548 re = uregex_openC("rx", 0, NULL, &status); 549 550 uregex_setText(re, text1, -1, &status); 551 result = uregex_find(re, 0, &status); 552 TEST_ASSERT(result == TRUE); 553 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 554 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 555 TEST_ASSERT_SUCCESS(status); 556 557 result = uregex_find(re, 9, &status); 558 TEST_ASSERT(result == TRUE); 559 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 560 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 561 TEST_ASSERT_SUCCESS(status); 562 563 result = uregex_find(re, 14, &status); 564 TEST_ASSERT(result == FALSE); 565 TEST_ASSERT_SUCCESS(status); 566 567 status = U_ZERO_ERROR; 568 uregex_reset(re, 0, &status); 569 570 result = uregex_findNext(re, &status); 571 TEST_ASSERT(result == TRUE); 572 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 573 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 574 TEST_ASSERT_SUCCESS(status); 575 576 result = uregex_findNext(re, &status); 577 TEST_ASSERT(result == TRUE); 578 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 579 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 580 TEST_ASSERT_SUCCESS(status); 581 582 status = U_ZERO_ERROR; 583 uregex_reset(re, 12, &status); 584 585 result = uregex_findNext(re, &status); 586 TEST_ASSERT(result == TRUE); 587 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 588 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 589 TEST_ASSERT_SUCCESS(status); 590 591 result = uregex_findNext(re, &status); 592 TEST_ASSERT(result == FALSE); 593 TEST_ASSERT_SUCCESS(status); 594 595 uregex_close(re); 596 } 597 598 /* 599 * groupCount 600 */ 601 { 602 int32_t result; 603 604 status = U_ZERO_ERROR; 605 re = uregex_openC("abc", 0, NULL, &status); 606 result = uregex_groupCount(re, &status); 607 TEST_ASSERT_SUCCESS(status); 608 TEST_ASSERT(result == 0); 609 uregex_close(re); 610 611 status = U_ZERO_ERROR; 612 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 613 result = uregex_groupCount(re, &status); 614 TEST_ASSERT_SUCCESS(status); 615 TEST_ASSERT(result == 3); 616 uregex_close(re); 617 618 } 619 620 621 /* 622 * group() 623 */ 624 { 625 UChar text1[80]; 626 UChar buf[80]; 627 UBool result; 628 int32_t resultSz; 629 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 630 631 status = U_ZERO_ERROR; 632 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 633 TEST_ASSERT_SUCCESS(status); 634 635 636 uregex_setText(re, text1, -1, &status); 637 result = uregex_find(re, 0, &status); 638 TEST_ASSERT(result==TRUE); 639 640 /* Capture Group 0, the full match. Should succeed. */ 641 status = U_ZERO_ERROR; 642 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status); 643 TEST_ASSERT_SUCCESS(status); 644 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 645 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 646 647 /* Capture group #1. Should succeed. */ 648 status = U_ZERO_ERROR; 649 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status); 650 TEST_ASSERT_SUCCESS(status); 651 TEST_ASSERT_STRING(" interior ", buf, TRUE); 652 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 653 654 /* Capture group out of range. Error. */ 655 status = U_ZERO_ERROR; 656 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status); 657 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 658 659 /* NULL buffer, pure pre-flight */ 660 status = U_ZERO_ERROR; 661 resultSz = uregex_group(re, 0, NULL, 0, &status); 662 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 663 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 664 665 /* Too small buffer, truncated string */ 666 status = U_ZERO_ERROR; 667 memset(buf, -1, sizeof(buf)); 668 resultSz = uregex_group(re, 0, buf, 5, &status); 669 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 670 TEST_ASSERT_STRING("abc i", buf, FALSE); 671 TEST_ASSERT(buf[5] == (UChar)0xffff); 672 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 673 674 /* Output string just fits buffer, no NUL term. */ 675 status = U_ZERO_ERROR; 676 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 677 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 678 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 679 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 680 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 681 682 uregex_close(re); 683 684 } 685 686 /* 687 * Regions 688 */ 689 690 691 /* SetRegion(), getRegion() do something */ 692 TEST_SETUP(".*", "0123456789ABCDEF", 0) 693 UChar resultString[40]; 694 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 695 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 696 uregex_setRegion(re, 3, 6, &status); 697 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 698 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 699 TEST_ASSERT(uregex_findNext(re, &status)); 700 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3) 701 TEST_ASSERT_STRING("345", resultString, TRUE); 702 TEST_TEARDOWN; 703 704 /* find(start=-1) uses regions */ 705 TEST_SETUP(".*", "0123456789ABCDEF", 0); 706 uregex_setRegion(re, 4, 6, &status); 707 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 708 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 709 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 710 TEST_TEARDOWN; 711 712 /* find (start >=0) does not use regions */ 713 TEST_SETUP(".*", "0123456789ABCDEF", 0); 714 uregex_setRegion(re, 4, 6, &status); 715 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 716 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 717 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 718 TEST_TEARDOWN; 719 720 /* findNext() obeys regions */ 721 TEST_SETUP(".", "0123456789ABCDEF", 0); 722 uregex_setRegion(re, 4, 6, &status); 723 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 724 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 725 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 726 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 727 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 728 TEST_TEARDOWN; 729 730 /* matches(start=-1) uses regions */ 731 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 732 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 733 uregex_setRegion(re, 4, 6, &status); 734 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 735 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 736 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 737 TEST_TEARDOWN; 738 739 /* matches (start >=0) does not use regions */ 740 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 741 uregex_setRegion(re, 4, 6, &status); 742 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 743 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 744 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 745 TEST_TEARDOWN; 746 747 /* lookingAt(start=-1) uses regions */ 748 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 749 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 750 uregex_setRegion(re, 4, 6, &status); 751 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 752 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 753 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 754 TEST_TEARDOWN; 755 756 /* lookingAt (start >=0) does not use regions */ 757 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 758 uregex_setRegion(re, 4, 6, &status); 759 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 760 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 761 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 762 TEST_TEARDOWN; 763 764 /* hitEnd() */ 765 TEST_SETUP("[a-f]*", "abcdefghij", 0); 766 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 767 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 768 TEST_TEARDOWN; 769 770 TEST_SETUP("[a-f]*", "abcdef", 0); 771 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 772 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 773 TEST_TEARDOWN; 774 775 /* requireEnd */ 776 TEST_SETUP("abcd", "abcd", 0); 777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 778 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 779 TEST_TEARDOWN; 780 781 TEST_SETUP("abcd$", "abcd", 0); 782 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 783 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 784 TEST_TEARDOWN; 785 786 /* anchoringBounds */ 787 TEST_SETUP("abc$", "abcdef", 0); 788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 789 uregex_useAnchoringBounds(re, FALSE, &status); 790 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 791 792 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 793 uregex_useAnchoringBounds(re, TRUE, &status); 794 uregex_setRegion(re, 0, 3, &status); 795 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 796 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 797 TEST_TEARDOWN; 798 799 /* Transparent Bounds */ 800 TEST_SETUP("abc(?=def)", "abcdef", 0); 801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 802 uregex_useTransparentBounds(re, TRUE, &status); 803 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 804 805 uregex_useTransparentBounds(re, FALSE, &status); 806 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 807 uregex_setRegion(re, 0, 3, &status); 808 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 809 uregex_useTransparentBounds(re, TRUE, &status); 810 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 811 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 812 TEST_TEARDOWN; 813 814 815 /* 816 * replaceFirst() 817 */ 818 { 819 UChar text1[80]; 820 UChar text2[80]; 821 UChar replText[80]; 822 UChar buf[80]; 823 int32_t resultSz; 824 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 825 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 826 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 827 828 status = U_ZERO_ERROR; 829 re = uregex_openC("x(.*?)x", 0, NULL, &status); 830 TEST_ASSERT_SUCCESS(status); 831 832 /* Normal case, with match */ 833 uregex_setText(re, text1, -1, &status); 834 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 835 TEST_ASSERT_SUCCESS(status); 836 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 837 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 838 839 /* No match. Text should copy to output with no changes. */ 840 status = U_ZERO_ERROR; 841 uregex_setText(re, text2, -1, &status); 842 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 843 TEST_ASSERT_SUCCESS(status); 844 TEST_ASSERT_STRING("No match here.", buf, TRUE); 845 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 846 847 /* Match, output just fills buffer, no termination warning. */ 848 status = U_ZERO_ERROR; 849 uregex_setText(re, text1, -1, &status); 850 memset(buf, -1, sizeof(buf)); 851 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 852 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 853 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 854 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 855 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 856 857 /* Do the replaceFirst again, without first resetting anything. 858 * Should give the same results. 859 */ 860 status = U_ZERO_ERROR; 861 memset(buf, -1, sizeof(buf)); 862 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 863 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 864 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 865 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 866 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 867 868 /* NULL buffer, zero buffer length */ 869 status = U_ZERO_ERROR; 870 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 871 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 872 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 873 874 /* Buffer too small by one */ 875 status = U_ZERO_ERROR; 876 memset(buf, -1, sizeof(buf)); 877 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 878 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 879 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 880 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 881 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 882 883 uregex_close(re); 884 } 885 886 887 /* 888 * replaceAll() 889 */ 890 { 891 UChar text1[80]; /* "Replace xaax x1x x...x." */ 892 UChar text2[80]; /* "No match Here" */ 893 UChar replText[80]; /* "<$1>" */ 894 UChar replText2[80]; /* "<<$1>>" */ 895 const char * pattern = "x(.*?)x"; 896 const char * expectedResult = "Replace <aa> <1> <...>."; 897 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 898 UChar buf[80]; 899 int32_t resultSize; 900 int32_t expectedResultSize; 901 int32_t expectedResultSize2; 902 int32_t i; 903 904 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 905 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 906 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 907 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2)); 908 expectedResultSize = strlen(expectedResult); 909 expectedResultSize2 = strlen(expectedResult2); 910 911 status = U_ZERO_ERROR; 912 re = uregex_openC(pattern, 0, NULL, &status); 913 TEST_ASSERT_SUCCESS(status); 914 915 /* Normal case, with match */ 916 uregex_setText(re, text1, -1, &status); 917 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 918 TEST_ASSERT_SUCCESS(status); 919 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 920 TEST_ASSERT(resultSize == expectedResultSize); 921 922 /* No match. Text should copy to output with no changes. */ 923 status = U_ZERO_ERROR; 924 uregex_setText(re, text2, -1, &status); 925 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 926 TEST_ASSERT_SUCCESS(status); 927 TEST_ASSERT_STRING("No match here.", buf, TRUE); 928 TEST_ASSERT(resultSize == u_strlen(text2)); 929 930 /* Match, output just fills buffer, no termination warning. */ 931 status = U_ZERO_ERROR; 932 uregex_setText(re, text1, -1, &status); 933 memset(buf, -1, sizeof(buf)); 934 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 935 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 936 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 937 TEST_ASSERT(resultSize == expectedResultSize); 938 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 939 940 /* Do the replaceFirst again, without first resetting anything. 941 * Should give the same results. 942 */ 943 status = U_ZERO_ERROR; 944 memset(buf, -1, sizeof(buf)); 945 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 946 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 947 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 948 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 949 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 950 951 /* NULL buffer, zero buffer length */ 952 status = U_ZERO_ERROR; 953 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 954 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 955 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 956 957 /* Buffer too small. Try every size, which will tickle edge cases 958 * in uregex_appendReplacement (used by replaceAll) */ 959 for (i=0; i<expectedResultSize; i++) { 960 char expected[80]; 961 status = U_ZERO_ERROR; 962 memset(buf, -1, sizeof(buf)); 963 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 964 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 965 strcpy(expected, expectedResult); 966 expected[i] = 0; 967 TEST_ASSERT_STRING(expected, buf, FALSE); 968 TEST_ASSERT(resultSize == expectedResultSize); 969 TEST_ASSERT(buf[i] == (UChar)0xffff); 970 } 971 972 /* Buffer too small. Same as previous test, except this time the replacement 973 * text is longer than the match capture group, making the length of the complete 974 * replacement longer than the original string. 975 */ 976 for (i=0; i<expectedResultSize2; i++) { 977 char expected[80]; 978 status = U_ZERO_ERROR; 979 memset(buf, -1, sizeof(buf)); 980 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 981 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 982 strcpy(expected, expectedResult2); 983 expected[i] = 0; 984 TEST_ASSERT_STRING(expected, buf, FALSE); 985 TEST_ASSERT(resultSize == expectedResultSize2); 986 TEST_ASSERT(buf[i] == (UChar)0xffff); 987 } 988 989 990 uregex_close(re); 991 } 992 993 994 /* 995 * appendReplacement() 996 */ 997 { 998 UChar text[100]; 999 UChar repl[100]; 1000 UChar buf[100]; 1001 UChar *bufPtr; 1002 int32_t bufCap; 1003 1004 1005 status = U_ZERO_ERROR; 1006 re = uregex_openC(".*", 0, 0, &status); 1007 TEST_ASSERT_SUCCESS(status); 1008 1009 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1010 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1011 uregex_setText(re, text, -1, &status); 1012 1013 /* match covers whole target string */ 1014 uregex_find(re, 0, &status); 1015 TEST_ASSERT_SUCCESS(status); 1016 bufPtr = buf; 1017 bufCap = UPRV_LENGTHOF(buf); 1018 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1019 TEST_ASSERT_SUCCESS(status); 1020 TEST_ASSERT_STRING("some other", buf, TRUE); 1021 1022 /* Match has \u \U escapes */ 1023 uregex_find(re, 0, &status); 1024 TEST_ASSERT_SUCCESS(status); 1025 bufPtr = buf; 1026 bufCap = UPRV_LENGTHOF(buf); 1027 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1028 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1029 TEST_ASSERT_SUCCESS(status); 1030 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1031 1032 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 1033 status = U_ZERO_ERROR; 1034 uregex_find(re, 0, &status); 1035 TEST_ASSERT_SUCCESS(status); 1036 bufPtr = buf; 1037 status = U_BUFFER_OVERFLOW_ERROR; 1038 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 1039 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1040 1041 uregex_close(re); 1042 } 1043 1044 1045 /* 1046 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1047 */ 1048 1049 /* 1050 * split() 1051 */ 1052 { 1053 UChar textToSplit[80]; 1054 UChar text2[80]; 1055 UChar buf[200]; 1056 UChar *fields[10]; 1057 int32_t numFields; 1058 int32_t requiredCapacity; 1059 int32_t spaceNeeded; 1060 int32_t sz; 1061 1062 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1063 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1064 1065 status = U_ZERO_ERROR; 1066 re = uregex_openC(":", 0, NULL, &status); 1067 1068 1069 /* Simple split */ 1070 1071 uregex_setText(re, textToSplit, -1, &status); 1072 TEST_ASSERT_SUCCESS(status); 1073 1074 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1075 if (U_SUCCESS(status)) { 1076 memset(fields, -1, sizeof(fields)); 1077 numFields = 1078 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1079 TEST_ASSERT_SUCCESS(status); 1080 1081 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1082 if(U_SUCCESS(status)) { 1083 TEST_ASSERT(numFields == 3); 1084 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1085 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1086 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1087 TEST_ASSERT(fields[3] == NULL); 1088 1089 spaceNeeded = u_strlen(textToSplit) - 1090 (numFields - 1) + /* Field delimiters do not appear in output */ 1091 numFields; /* Each field gets a NUL terminator */ 1092 1093 TEST_ASSERT(spaceNeeded == requiredCapacity); 1094 } 1095 } 1096 1097 uregex_close(re); 1098 1099 1100 /* Split with too few output strings available */ 1101 status = U_ZERO_ERROR; 1102 re = uregex_openC(":", 0, NULL, &status); 1103 uregex_setText(re, textToSplit, -1, &status); 1104 TEST_ASSERT_SUCCESS(status); 1105 1106 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1107 if(U_SUCCESS(status)) { 1108 memset(fields, -1, sizeof(fields)); 1109 numFields = 1110 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1111 TEST_ASSERT_SUCCESS(status); 1112 1113 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1114 if(U_SUCCESS(status)) { 1115 TEST_ASSERT(numFields == 2); 1116 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1117 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1118 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1119 1120 spaceNeeded = u_strlen(textToSplit) - 1121 (numFields - 1) + /* Field delimiters do not appear in output */ 1122 numFields; /* Each field gets a NUL terminator */ 1123 1124 TEST_ASSERT(spaceNeeded == requiredCapacity); 1125 1126 /* Split with a range of output buffer sizes. */ 1127 spaceNeeded = u_strlen(textToSplit) - 1128 (numFields - 1) + /* Field delimiters do not appear in output */ 1129 numFields; /* Each field gets a NUL terminator */ 1130 1131 for (sz=0; sz < spaceNeeded+1; sz++) { 1132 memset(fields, -1, sizeof(fields)); 1133 status = U_ZERO_ERROR; 1134 numFields = 1135 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1136 if (sz >= spaceNeeded) { 1137 TEST_ASSERT_SUCCESS(status); 1138 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1139 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1140 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1141 } else { 1142 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1143 } 1144 TEST_ASSERT(numFields == 3); 1145 TEST_ASSERT(fields[3] == NULL); 1146 TEST_ASSERT(spaceNeeded == requiredCapacity); 1147 } 1148 } 1149 } 1150 1151 uregex_close(re); 1152 } 1153 1154 1155 1156 1157 /* Split(), part 2. Patterns with capture groups. The capture group text 1158 * comes out as additional fields. */ 1159 { 1160 UChar textToSplit[80]; 1161 UChar buf[200]; 1162 UChar *fields[10]; 1163 int32_t numFields; 1164 int32_t requiredCapacity; 1165 int32_t spaceNeeded; 1166 int32_t sz; 1167 1168 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 1169 1170 status = U_ZERO_ERROR; 1171 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1172 1173 uregex_setText(re, textToSplit, -1, &status); 1174 TEST_ASSERT_SUCCESS(status); 1175 1176 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1177 if(U_SUCCESS(status)) { 1178 memset(fields, -1, sizeof(fields)); 1179 numFields = 1180 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1181 TEST_ASSERT_SUCCESS(status); 1182 1183 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1184 if(U_SUCCESS(status)) { 1185 TEST_ASSERT(numFields == 5); 1186 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1187 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1188 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1189 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1190 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1191 TEST_ASSERT(fields[5] == NULL); 1192 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1193 TEST_ASSERT(spaceNeeded == requiredCapacity); 1194 } 1195 } 1196 1197 /* Split with too few output strings available (2) */ 1198 status = U_ZERO_ERROR; 1199 memset(fields, -1, sizeof(fields)); 1200 numFields = 1201 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1202 TEST_ASSERT_SUCCESS(status); 1203 1204 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1205 if(U_SUCCESS(status)) { 1206 TEST_ASSERT(numFields == 2); 1207 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1208 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1209 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1210 1211 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1212 TEST_ASSERT(spaceNeeded == requiredCapacity); 1213 } 1214 1215 /* Split with too few output strings available (3) */ 1216 status = U_ZERO_ERROR; 1217 memset(fields, -1, sizeof(fields)); 1218 numFields = 1219 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status); 1220 TEST_ASSERT_SUCCESS(status); 1221 1222 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1223 if(U_SUCCESS(status)) { 1224 TEST_ASSERT(numFields == 3); 1225 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1227 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1228 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1229 1230 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1231 TEST_ASSERT(spaceNeeded == requiredCapacity); 1232 } 1233 1234 /* Split with just enough output strings available (5) */ 1235 status = U_ZERO_ERROR; 1236 memset(fields, -1, sizeof(fields)); 1237 numFields = 1238 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status); 1239 TEST_ASSERT_SUCCESS(status); 1240 1241 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1242 if(U_SUCCESS(status)) { 1243 TEST_ASSERT(numFields == 5); 1244 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1245 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1246 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1247 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1248 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1249 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1250 1251 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1252 TEST_ASSERT(spaceNeeded == requiredCapacity); 1253 } 1254 1255 /* Split, end of text is a field delimiter. */ 1256 status = U_ZERO_ERROR; 1257 sz = strlen("first <tag-a> second<tag-b>"); 1258 uregex_setText(re, textToSplit, sz, &status); 1259 TEST_ASSERT_SUCCESS(status); 1260 1261 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1262 if(U_SUCCESS(status)) { 1263 memset(fields, -1, sizeof(fields)); 1264 numFields = 1265 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status); 1266 TEST_ASSERT_SUCCESS(status); 1267 1268 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1269 if(U_SUCCESS(status)) { 1270 TEST_ASSERT(numFields == 5); 1271 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1272 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1273 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1274 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1275 TEST_ASSERT_STRING("", fields[4], TRUE); 1276 TEST_ASSERT(fields[5] == NULL); 1277 TEST_ASSERT(fields[8] == NULL); 1278 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1279 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ 1280 TEST_ASSERT(spaceNeeded == requiredCapacity); 1281 } 1282 } 1283 1284 uregex_close(re); 1285 } 1286 1287 /* 1288 * set/getTimeLimit 1289 */ 1290 TEST_SETUP("abc$", "abcdef", 0); 1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1292 uregex_setTimeLimit(re, 1000, &status); 1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1294 TEST_ASSERT_SUCCESS(status); 1295 uregex_setTimeLimit(re, -1, &status); 1296 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1297 status = U_ZERO_ERROR; 1298 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1299 TEST_TEARDOWN; 1300 1301 /* 1302 * set/get Stack Limit 1303 */ 1304 TEST_SETUP("abc$", "abcdef", 0); 1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1306 uregex_setStackLimit(re, 40000, &status); 1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1308 TEST_ASSERT_SUCCESS(status); 1309 uregex_setStackLimit(re, -1, &status); 1310 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1311 status = U_ZERO_ERROR; 1312 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1313 TEST_TEARDOWN; 1314 1315 1316 /* 1317 * Get/Set callback functions 1318 * This test is copied from intltest regex/Callbacks 1319 * The pattern and test data will run long enough to cause the callback 1320 * to be invoked. The nested '+' operators give exponential time 1321 * behavior with increasing string length. 1322 */ 1323 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1324 callBackContext cbInfo = {4, 0, 0}; 1325 const void *pContext = &cbInfo; 1326 URegexMatchCallback *returnedFn = &TestCallbackFn; 1327 1328 /* Getting the callback fn when it hasn't been set must return NULL */ 1329 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1330 TEST_ASSERT_SUCCESS(status); 1331 TEST_ASSERT(returnedFn == NULL); 1332 TEST_ASSERT(pContext == NULL); 1333 1334 /* Set thecallback and do a match. */ 1335 /* The callback function should record that it has been called. */ 1336 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1337 TEST_ASSERT_SUCCESS(status); 1338 TEST_ASSERT(cbInfo.numCalls == 0); 1339 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1340 TEST_ASSERT_SUCCESS(status); 1341 TEST_ASSERT(cbInfo.numCalls > 0); 1342 1343 /* Getting the callback should return the values that were set above. */ 1344 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1345 TEST_ASSERT(returnedFn == &TestCallbackFn); 1346 TEST_ASSERT(pContext == &cbInfo); 1347 1348 TEST_TEARDOWN; 1349 } 1350 1351 1352 1353 static void TestBug4315(void) { 1354 UErrorCode theICUError = U_ZERO_ERROR; 1355 URegularExpression *theRegEx; 1356 UChar *textBuff; 1357 const char *thePattern; 1358 UChar theString[100]; 1359 UChar *destFields[24]; 1360 int32_t neededLength1; 1361 int32_t neededLength2; 1362 1363 int32_t wordCount = 0; 1364 int32_t destFieldsSize = 24; 1365 1366 thePattern = "ck "; 1367 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1368 1369 /* open a regex */ 1370 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1371 TEST_ASSERT_SUCCESS(theICUError); 1372 1373 /* set the input string */ 1374 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1375 TEST_ASSERT_SUCCESS(theICUError); 1376 1377 /* split */ 1378 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1379 * error occurs! */ 1380 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1381 destFieldsSize, &theICUError); 1382 1383 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1384 TEST_ASSERT(wordCount==3); 1385 1386 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1387 { 1388 theICUError = U_ZERO_ERROR; 1389 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1390 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1391 destFields, destFieldsSize, &theICUError); 1392 TEST_ASSERT(wordCount==3); 1393 TEST_ASSERT_SUCCESS(theICUError); 1394 TEST_ASSERT(neededLength1 == neededLength2); 1395 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1396 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1397 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1398 TEST_ASSERT(destFields[3] == NULL); 1399 free(textBuff); 1400 } 1401 uregex_close(theRegEx); 1402 } 1403 1404 /* Based on TestRegexCAPI() */ 1405 static void TestUTextAPI(void) { 1406 UErrorCode status = U_ZERO_ERROR; 1407 URegularExpression *re; 1408 UText patternText = UTEXT_INITIALIZER; 1409 UChar pat[200]; 1410 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1411 1412 /* Mimimalist open/close */ 1413 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1414 re = uregex_openUText(&patternText, 0, 0, &status); 1415 if (U_FAILURE(status)) { 1416 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1417 utext_close(&patternText); 1418 return; 1419 } 1420 uregex_close(re); 1421 1422 /* Open with all flag values set */ 1423 status = U_ZERO_ERROR; 1424 re = uregex_openUText(&patternText, 1425 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1426 0, &status); 1427 TEST_ASSERT_SUCCESS(status); 1428 uregex_close(re); 1429 1430 /* Open with an invalid flag */ 1431 status = U_ZERO_ERROR; 1432 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1433 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1434 uregex_close(re); 1435 1436 /* open with an invalid parameter */ 1437 status = U_ZERO_ERROR; 1438 re = uregex_openUText(NULL, 1439 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1440 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1441 1442 /* 1443 * clone 1444 */ 1445 { 1446 URegularExpression *clone1; 1447 URegularExpression *clone2; 1448 URegularExpression *clone3; 1449 UChar testString1[30]; 1450 UChar testString2[30]; 1451 UBool result; 1452 1453 1454 status = U_ZERO_ERROR; 1455 re = uregex_openUText(&patternText, 0, 0, &status); 1456 TEST_ASSERT_SUCCESS(status); 1457 clone1 = uregex_clone(re, &status); 1458 TEST_ASSERT_SUCCESS(status); 1459 TEST_ASSERT(clone1 != NULL); 1460 1461 status = U_ZERO_ERROR; 1462 clone2 = uregex_clone(re, &status); 1463 TEST_ASSERT_SUCCESS(status); 1464 TEST_ASSERT(clone2 != NULL); 1465 uregex_close(re); 1466 1467 status = U_ZERO_ERROR; 1468 clone3 = uregex_clone(clone2, &status); 1469 TEST_ASSERT_SUCCESS(status); 1470 TEST_ASSERT(clone3 != NULL); 1471 1472 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 1473 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 1474 1475 status = U_ZERO_ERROR; 1476 uregex_setText(clone1, testString1, -1, &status); 1477 TEST_ASSERT_SUCCESS(status); 1478 result = uregex_lookingAt(clone1, 0, &status); 1479 TEST_ASSERT_SUCCESS(status); 1480 TEST_ASSERT(result==TRUE); 1481 1482 status = U_ZERO_ERROR; 1483 uregex_setText(clone2, testString2, -1, &status); 1484 TEST_ASSERT_SUCCESS(status); 1485 result = uregex_lookingAt(clone2, 0, &status); 1486 TEST_ASSERT_SUCCESS(status); 1487 TEST_ASSERT(result==FALSE); 1488 result = uregex_find(clone2, 0, &status); 1489 TEST_ASSERT_SUCCESS(status); 1490 TEST_ASSERT(result==TRUE); 1491 1492 uregex_close(clone1); 1493 uregex_close(clone2); 1494 uregex_close(clone3); 1495 1496 } 1497 1498 /* 1499 * pattern() and patternText() 1500 */ 1501 { 1502 const UChar *resultPat; 1503 int32_t resultLen; 1504 UText *resultText; 1505 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1506 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1507 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */ 1508 status = U_ZERO_ERROR; 1509 1510 utext_openUTF8(&patternText, str_hello, -1, &status); 1511 re = uregex_open(pat, -1, 0, NULL, &status); 1512 resultPat = uregex_pattern(re, &resultLen, &status); 1513 TEST_ASSERT_SUCCESS(status); 1514 1515 /* The TEST_ASSERT_SUCCESS above should change too... */ 1516 if (U_SUCCESS(status)) { 1517 TEST_ASSERT(resultLen == -1); 1518 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1519 } 1520 1521 resultText = uregex_patternUText(re, &status); 1522 TEST_ASSERT_SUCCESS(status); 1523 TEST_ASSERT_UTEXT(str_hello, resultText); 1524 1525 uregex_close(re); 1526 1527 status = U_ZERO_ERROR; 1528 re = uregex_open(pat, 3, 0, NULL, &status); 1529 resultPat = uregex_pattern(re, &resultLen, &status); 1530 TEST_ASSERT_SUCCESS(status); 1531 1532 /* The TEST_ASSERT_SUCCESS above should change too... */ 1533 if (U_SUCCESS(status)) { 1534 TEST_ASSERT(resultLen == 3); 1535 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1536 TEST_ASSERT(u_strlen(resultPat) == 3); 1537 } 1538 1539 resultText = uregex_patternUText(re, &status); 1540 TEST_ASSERT_SUCCESS(status); 1541 TEST_ASSERT_UTEXT(str_hel, resultText); 1542 1543 uregex_close(re); 1544 } 1545 1546 /* 1547 * setUText() and lookingAt() 1548 */ 1549 { 1550 UText text1 = UTEXT_INITIALIZER; 1551 UText text2 = UTEXT_INITIALIZER; 1552 UBool result; 1553 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1554 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1555 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1556 status = U_ZERO_ERROR; 1557 utext_openUTF8(&text1, str_abcccd, -1, &status); 1558 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1559 1560 utext_openUTF8(&patternText, str_abcd, -1, &status); 1561 re = uregex_openUText(&patternText, 0, NULL, &status); 1562 TEST_ASSERT_SUCCESS(status); 1563 1564 /* Operation before doing a setText should fail... */ 1565 status = U_ZERO_ERROR; 1566 uregex_lookingAt(re, 0, &status); 1567 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1568 1569 status = U_ZERO_ERROR; 1570 uregex_setUText(re, &text1, &status); 1571 result = uregex_lookingAt(re, 0, &status); 1572 TEST_ASSERT(result == TRUE); 1573 TEST_ASSERT_SUCCESS(status); 1574 1575 status = U_ZERO_ERROR; 1576 uregex_setUText(re, &text2, &status); 1577 result = uregex_lookingAt(re, 0, &status); 1578 TEST_ASSERT(result == FALSE); 1579 TEST_ASSERT_SUCCESS(status); 1580 1581 status = U_ZERO_ERROR; 1582 uregex_setUText(re, &text1, &status); 1583 result = uregex_lookingAt(re, 0, &status); 1584 TEST_ASSERT(result == TRUE); 1585 TEST_ASSERT_SUCCESS(status); 1586 1587 uregex_close(re); 1588 utext_close(&text1); 1589 utext_close(&text2); 1590 } 1591 1592 1593 /* 1594 * getText() and getUText() 1595 */ 1596 { 1597 UText text1 = UTEXT_INITIALIZER; 1598 UText text2 = UTEXT_INITIALIZER; 1599 UChar text2Chars[20]; 1600 UText *resultText; 1601 const UChar *result; 1602 int32_t textLength; 1603 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1604 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1605 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1606 1607 1608 status = U_ZERO_ERROR; 1609 utext_openUTF8(&text1, str_abcccd, -1, &status); 1610 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars)); 1611 utext_openUChars(&text2, text2Chars, -1, &status); 1612 1613 utext_openUTF8(&patternText, str_abcd, -1, &status); 1614 re = uregex_openUText(&patternText, 0, NULL, &status); 1615 1616 /* First set a UText */ 1617 uregex_setUText(re, &text1, &status); 1618 resultText = uregex_getUText(re, NULL, &status); 1619 TEST_ASSERT_SUCCESS(status); 1620 TEST_ASSERT(resultText != &text1); 1621 utext_setNativeIndex(resultText, 0); 1622 utext_setNativeIndex(&text1, 0); 1623 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1624 utext_close(resultText); 1625 1626 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1627 (void)result; /* Suppress set but not used warning. */ 1628 TEST_ASSERT(textLength == -1 || textLength == 6); 1629 resultText = uregex_getUText(re, NULL, &status); 1630 TEST_ASSERT_SUCCESS(status); 1631 TEST_ASSERT(resultText != &text1); 1632 utext_setNativeIndex(resultText, 0); 1633 utext_setNativeIndex(&text1, 0); 1634 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1635 utext_close(resultText); 1636 1637 /* Then set a UChar * */ 1638 uregex_setText(re, text2Chars, 7, &status); 1639 resultText = uregex_getUText(re, NULL, &status); 1640 TEST_ASSERT_SUCCESS(status); 1641 utext_setNativeIndex(resultText, 0); 1642 utext_setNativeIndex(&text2, 0); 1643 TEST_ASSERT(testUTextEqual(resultText, &text2)); 1644 utext_close(resultText); 1645 result = uregex_getText(re, &textLength, &status); 1646 TEST_ASSERT(textLength == 7); 1647 1648 uregex_close(re); 1649 utext_close(&text1); 1650 utext_close(&text2); 1651 } 1652 1653 /* 1654 * matches() 1655 */ 1656 { 1657 UText text1 = UTEXT_INITIALIZER; 1658 UBool result; 1659 UText nullText = UTEXT_INITIALIZER; 1660 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1661 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1662 1663 status = U_ZERO_ERROR; 1664 utext_openUTF8(&text1, str_abcccde, -1, &status); 1665 utext_openUTF8(&patternText, str_abcd, -1, &status); 1666 re = uregex_openUText(&patternText, 0, NULL, &status); 1667 1668 uregex_setUText(re, &text1, &status); 1669 result = uregex_matches(re, 0, &status); 1670 TEST_ASSERT(result == FALSE); 1671 TEST_ASSERT_SUCCESS(status); 1672 uregex_close(re); 1673 1674 status = U_ZERO_ERROR; 1675 re = uregex_openC(".?", 0, NULL, &status); 1676 uregex_setUText(re, &text1, &status); 1677 result = uregex_matches(re, 7, &status); 1678 TEST_ASSERT(result == TRUE); 1679 TEST_ASSERT_SUCCESS(status); 1680 1681 status = U_ZERO_ERROR; 1682 utext_openUTF8(&nullText, "", -1, &status); 1683 uregex_setUText(re, &nullText, &status); 1684 TEST_ASSERT_SUCCESS(status); 1685 result = uregex_matches(re, 0, &status); 1686 TEST_ASSERT(result == TRUE); 1687 TEST_ASSERT_SUCCESS(status); 1688 1689 uregex_close(re); 1690 utext_close(&text1); 1691 utext_close(&nullText); 1692 } 1693 1694 1695 /* 1696 * lookingAt() Used in setText test. 1697 */ 1698 1699 1700 /* 1701 * find(), findNext, start, end, reset 1702 */ 1703 { 1704 UChar text1[50]; 1705 UBool result; 1706 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 1707 status = U_ZERO_ERROR; 1708 re = uregex_openC("rx", 0, NULL, &status); 1709 1710 uregex_setText(re, text1, -1, &status); 1711 result = uregex_find(re, 0, &status); 1712 TEST_ASSERT(result == TRUE); 1713 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1714 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1715 TEST_ASSERT_SUCCESS(status); 1716 1717 result = uregex_find(re, 9, &status); 1718 TEST_ASSERT(result == TRUE); 1719 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1720 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1721 TEST_ASSERT_SUCCESS(status); 1722 1723 result = uregex_find(re, 14, &status); 1724 TEST_ASSERT(result == FALSE); 1725 TEST_ASSERT_SUCCESS(status); 1726 1727 status = U_ZERO_ERROR; 1728 uregex_reset(re, 0, &status); 1729 1730 result = uregex_findNext(re, &status); 1731 TEST_ASSERT(result == TRUE); 1732 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1733 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1734 TEST_ASSERT_SUCCESS(status); 1735 1736 result = uregex_findNext(re, &status); 1737 TEST_ASSERT(result == TRUE); 1738 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1739 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1740 TEST_ASSERT_SUCCESS(status); 1741 1742 status = U_ZERO_ERROR; 1743 uregex_reset(re, 12, &status); 1744 1745 result = uregex_findNext(re, &status); 1746 TEST_ASSERT(result == TRUE); 1747 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1748 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1749 TEST_ASSERT_SUCCESS(status); 1750 1751 result = uregex_findNext(re, &status); 1752 TEST_ASSERT(result == FALSE); 1753 TEST_ASSERT_SUCCESS(status); 1754 1755 uregex_close(re); 1756 } 1757 1758 /* 1759 * groupUText() 1760 */ 1761 { 1762 UChar text1[80]; 1763 UText *actual; 1764 UBool result; 1765 int64_t groupLen = 0; 1766 UChar groupBuf[20]; 1767 1768 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 1769 1770 status = U_ZERO_ERROR; 1771 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1772 TEST_ASSERT_SUCCESS(status); 1773 1774 uregex_setText(re, text1, -1, &status); 1775 result = uregex_find(re, 0, &status); 1776 TEST_ASSERT(result==TRUE); 1777 1778 /* Capture Group 0 with shallow clone API. Should succeed. */ 1779 status = U_ZERO_ERROR; 1780 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status); 1781 TEST_ASSERT_SUCCESS(status); 1782 1783 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */ 1784 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */ 1785 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1786 1787 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE); 1788 utext_close(actual); 1789 1790 /* Capture group #1. Should succeed. */ 1791 status = U_ZERO_ERROR; 1792 1793 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status); 1794 TEST_ASSERT_SUCCESS(status); 1795 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */ 1796 /* (within the string text1) */ 1797 TEST_ASSERT(10 == groupLen); /* length of " interior " */ 1798 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1799 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE); 1800 1801 utext_close(actual); 1802 1803 /* Capture group out of range. Error. */ 1804 status = U_ZERO_ERROR; 1805 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status); 1806 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1807 utext_close(actual); 1808 1809 uregex_close(re); 1810 } 1811 1812 /* 1813 * replaceFirst() 1814 */ 1815 { 1816 UChar text1[80]; 1817 UChar text2[80]; 1818 UText replText = UTEXT_INITIALIZER; 1819 UText *result; 1820 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1821 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1822 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 1823 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */ 1824 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1825 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1826 status = U_ZERO_ERROR; 1827 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1828 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1829 utext_openUTF8(&replText, str_1x, -1, &status); 1830 1831 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1832 TEST_ASSERT_SUCCESS(status); 1833 1834 /* Normal case, with match */ 1835 uregex_setText(re, text1, -1, &status); 1836 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1837 TEST_ASSERT_SUCCESS(status); 1838 TEST_ASSERT_UTEXT(str_Replxxx, result); 1839 utext_close(result); 1840 1841 /* No match. Text should copy to output with no changes. */ 1842 uregex_setText(re, text2, -1, &status); 1843 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1844 TEST_ASSERT_SUCCESS(status); 1845 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1846 utext_close(result); 1847 1848 /* Unicode escapes */ 1849 uregex_setText(re, text1, -1, &status); 1850 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1851 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1852 TEST_ASSERT_SUCCESS(status); 1853 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1854 utext_close(result); 1855 1856 uregex_close(re); 1857 utext_close(&replText); 1858 } 1859 1860 1861 /* 1862 * replaceAll() 1863 */ 1864 { 1865 UChar text1[80]; 1866 UChar text2[80]; 1867 UText replText = UTEXT_INITIALIZER; 1868 UText *result; 1869 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1870 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1871 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1872 status = U_ZERO_ERROR; 1873 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1874 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1875 utext_openUTF8(&replText, str_1, -1, &status); 1876 1877 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1878 TEST_ASSERT_SUCCESS(status); 1879 1880 /* Normal case, with match */ 1881 uregex_setText(re, text1, -1, &status); 1882 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1883 TEST_ASSERT_SUCCESS(status); 1884 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1885 utext_close(result); 1886 1887 /* No match. Text should copy to output with no changes. */ 1888 uregex_setText(re, text2, -1, &status); 1889 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1890 TEST_ASSERT_SUCCESS(status); 1891 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1892 utext_close(result); 1893 1894 uregex_close(re); 1895 utext_close(&replText); 1896 } 1897 1898 1899 /* 1900 * appendReplacement() 1901 */ 1902 { 1903 UChar text[100]; 1904 UChar repl[100]; 1905 UChar buf[100]; 1906 UChar *bufPtr; 1907 int32_t bufCap; 1908 1909 status = U_ZERO_ERROR; 1910 re = uregex_openC(".*", 0, 0, &status); 1911 TEST_ASSERT_SUCCESS(status); 1912 1913 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1914 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1915 uregex_setText(re, text, -1, &status); 1916 1917 /* match covers whole target string */ 1918 uregex_find(re, 0, &status); 1919 TEST_ASSERT_SUCCESS(status); 1920 bufPtr = buf; 1921 bufCap = UPRV_LENGTHOF(buf); 1922 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1923 TEST_ASSERT_SUCCESS(status); 1924 TEST_ASSERT_STRING("some other", buf, TRUE); 1925 1926 /* Match has \u \U escapes */ 1927 uregex_find(re, 0, &status); 1928 TEST_ASSERT_SUCCESS(status); 1929 bufPtr = buf; 1930 bufCap = UPRV_LENGTHOF(buf); 1931 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1932 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1933 TEST_ASSERT_SUCCESS(status); 1934 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1935 1936 uregex_close(re); 1937 } 1938 1939 1940 /* 1941 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1942 */ 1943 1944 /* 1945 * splitUText() 1946 */ 1947 { 1948 UChar textToSplit[80]; 1949 UChar text2[80]; 1950 UText *fields[10]; 1951 int32_t numFields; 1952 int32_t i; 1953 1954 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1955 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1956 1957 status = U_ZERO_ERROR; 1958 re = uregex_openC(":", 0, NULL, &status); 1959 1960 1961 /* Simple split */ 1962 1963 uregex_setText(re, textToSplit, -1, &status); 1964 TEST_ASSERT_SUCCESS(status); 1965 1966 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1967 if (U_SUCCESS(status)) { 1968 memset(fields, 0, sizeof(fields)); 1969 numFields = uregex_splitUText(re, fields, 10, &status); 1970 TEST_ASSERT_SUCCESS(status); 1971 1972 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1973 if(U_SUCCESS(status)) { 1974 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1975 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1976 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1977 TEST_ASSERT(numFields == 3); 1978 TEST_ASSERT_UTEXT(str_first, fields[0]); 1979 TEST_ASSERT_UTEXT(str_second, fields[1]); 1980 TEST_ASSERT_UTEXT(str_third, fields[2]); 1981 TEST_ASSERT(fields[3] == NULL); 1982 } 1983 for(i = 0; i < numFields; i++) { 1984 utext_close(fields[i]); 1985 } 1986 } 1987 1988 uregex_close(re); 1989 1990 1991 /* Split with too few output strings available */ 1992 status = U_ZERO_ERROR; 1993 re = uregex_openC(":", 0, NULL, &status); 1994 uregex_setText(re, textToSplit, -1, &status); 1995 TEST_ASSERT_SUCCESS(status); 1996 1997 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1998 if(U_SUCCESS(status)) { 1999 fields[0] = NULL; 2000 fields[1] = NULL; 2001 fields[2] = &patternText; 2002 numFields = uregex_splitUText(re, fields, 2, &status); 2003 TEST_ASSERT_SUCCESS(status); 2004 2005 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2006 if(U_SUCCESS(status)) { 2007 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2008 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 2009 TEST_ASSERT(numFields == 2); 2010 TEST_ASSERT_UTEXT(str_first, fields[0]); 2011 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 2012 TEST_ASSERT(fields[2] == &patternText); 2013 } 2014 for(i = 0; i < numFields; i++) { 2015 utext_close(fields[i]); 2016 } 2017 } 2018 2019 uregex_close(re); 2020 } 2021 2022 /* splitUText(), part 2. Patterns with capture groups. The capture group text 2023 * comes out as additional fields. */ 2024 { 2025 UChar textToSplit[80]; 2026 UText *fields[10]; 2027 int32_t numFields; 2028 int32_t i; 2029 2030 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 2031 2032 status = U_ZERO_ERROR; 2033 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2034 2035 uregex_setText(re, textToSplit, -1, &status); 2036 TEST_ASSERT_SUCCESS(status); 2037 2038 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2039 if(U_SUCCESS(status)) { 2040 memset(fields, 0, sizeof(fields)); 2041 numFields = uregex_splitUText(re, fields, 10, &status); 2042 TEST_ASSERT_SUCCESS(status); 2043 2044 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2045 if(U_SUCCESS(status)) { 2046 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2047 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2048 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2049 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2050 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2051 2052 TEST_ASSERT(numFields == 5); 2053 TEST_ASSERT_UTEXT(str_first, fields[0]); 2054 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2055 TEST_ASSERT_UTEXT(str_second, fields[2]); 2056 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2057 TEST_ASSERT_UTEXT(str_third, fields[4]); 2058 TEST_ASSERT(fields[5] == NULL); 2059 } 2060 for(i = 0; i < numFields; i++) { 2061 utext_close(fields[i]); 2062 } 2063 } 2064 2065 /* Split with too few output strings available (2) */ 2066 status = U_ZERO_ERROR; 2067 fields[0] = NULL; 2068 fields[1] = NULL; 2069 fields[2] = &patternText; 2070 numFields = uregex_splitUText(re, fields, 2, &status); 2071 TEST_ASSERT_SUCCESS(status); 2072 2073 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2074 if(U_SUCCESS(status)) { 2075 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2076 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2077 TEST_ASSERT(numFields == 2); 2078 TEST_ASSERT_UTEXT(str_first, fields[0]); 2079 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2080 TEST_ASSERT(fields[2] == &patternText); 2081 } 2082 for(i = 0; i < numFields; i++) { 2083 utext_close(fields[i]); 2084 } 2085 2086 2087 /* Split with too few output strings available (3) */ 2088 status = U_ZERO_ERROR; 2089 fields[0] = NULL; 2090 fields[1] = NULL; 2091 fields[2] = NULL; 2092 fields[3] = &patternText; 2093 numFields = uregex_splitUText(re, fields, 3, &status); 2094 TEST_ASSERT_SUCCESS(status); 2095 2096 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2097 if(U_SUCCESS(status)) { 2098 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2099 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2100 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2101 TEST_ASSERT(numFields == 3); 2102 TEST_ASSERT_UTEXT(str_first, fields[0]); 2103 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2104 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2105 TEST_ASSERT(fields[3] == &patternText); 2106 } 2107 for(i = 0; i < numFields; i++) { 2108 utext_close(fields[i]); 2109 } 2110 2111 /* Split with just enough output strings available (5) */ 2112 status = U_ZERO_ERROR; 2113 fields[0] = NULL; 2114 fields[1] = NULL; 2115 fields[2] = NULL; 2116 fields[3] = NULL; 2117 fields[4] = NULL; 2118 fields[5] = &patternText; 2119 numFields = uregex_splitUText(re, fields, 5, &status); 2120 TEST_ASSERT_SUCCESS(status); 2121 2122 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2123 if(U_SUCCESS(status)) { 2124 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2125 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2126 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2127 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2128 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2129 2130 TEST_ASSERT(numFields == 5); 2131 TEST_ASSERT_UTEXT(str_first, fields[0]); 2132 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2133 TEST_ASSERT_UTEXT(str_second, fields[2]); 2134 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2135 TEST_ASSERT_UTEXT(str_third, fields[4]); 2136 TEST_ASSERT(fields[5] == &patternText); 2137 } 2138 for(i = 0; i < numFields; i++) { 2139 utext_close(fields[i]); 2140 } 2141 2142 /* Split, end of text is a field delimiter. */ 2143 status = U_ZERO_ERROR; 2144 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2145 TEST_ASSERT_SUCCESS(status); 2146 2147 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2148 if(U_SUCCESS(status)) { 2149 memset(fields, 0, sizeof(fields)); 2150 fields[9] = &patternText; 2151 numFields = uregex_splitUText(re, fields, 9, &status); 2152 TEST_ASSERT_SUCCESS(status); 2153 2154 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2155 if(U_SUCCESS(status)) { 2156 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2157 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2158 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2159 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2160 const char str_empty[] = { 0x00 }; 2161 2162 TEST_ASSERT(numFields == 5); 2163 TEST_ASSERT_UTEXT(str_first, fields[0]); 2164 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2165 TEST_ASSERT_UTEXT(str_second, fields[2]); 2166 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2167 TEST_ASSERT_UTEXT(str_empty, fields[4]); 2168 TEST_ASSERT(fields[5] == NULL); 2169 TEST_ASSERT(fields[8] == NULL); 2170 TEST_ASSERT(fields[9] == &patternText); 2171 } 2172 for(i = 0; i < numFields; i++) { 2173 utext_close(fields[i]); 2174 } 2175 } 2176 2177 uregex_close(re); 2178 } 2179 utext_close(&patternText); 2180 } 2181 2182 2183 static void TestRefreshInput(void) { 2184 /* 2185 * RefreshInput changes out the input of a URegularExpression without 2186 * changing anything else in the match state. Used with Java JNI, 2187 * when Java moves the underlying string storage. This test 2188 * runs a find() loop, moving the text after the first match. 2189 * The right number of matches should still be found. 2190 */ 2191 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2192 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2193 UErrorCode status = U_ZERO_ERROR; 2194 URegularExpression *re; 2195 UText ut1 = UTEXT_INITIALIZER; 2196 UText ut2 = UTEXT_INITIALIZER; 2197 2198 re = uregex_openC("[ABC]", 0, 0, &status); 2199 TEST_ASSERT_SUCCESS(status); 2200 2201 utext_openUChars(&ut1, testStr, -1, &status); 2202 TEST_ASSERT_SUCCESS(status); 2203 uregex_setUText(re, &ut1, &status); 2204 TEST_ASSERT_SUCCESS(status); 2205 2206 /* Find the first match "A" in the original string */ 2207 TEST_ASSERT(uregex_findNext(re, &status)); 2208 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2209 2210 /* Move the string, kill the original string. */ 2211 u_strcpy(movedStr, testStr); 2212 u_memset(testStr, 0, u_strlen(testStr)); 2213 utext_openUChars(&ut2, movedStr, -1, &status); 2214 TEST_ASSERT_SUCCESS(status); 2215 uregex_refreshUText(re, &ut2, &status); 2216 TEST_ASSERT_SUCCESS(status); 2217 2218 /* Find the following two matches, now working in the moved string. */ 2219 TEST_ASSERT(uregex_findNext(re, &status)); 2220 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2221 TEST_ASSERT(uregex_findNext(re, &status)); 2222 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2223 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2224 2225 uregex_close(re); 2226 } 2227 2228 2229 static void TestBug8421(void) { 2230 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched 2231 * was failing. 2232 */ 2233 URegularExpression *re; 2234 UErrorCode status = U_ZERO_ERROR; 2235 int32_t limit = -1; 2236 2237 re = uregex_openC("abc", 0, 0, &status); 2238 TEST_ASSERT_SUCCESS(status); 2239 2240 limit = uregex_getTimeLimit(re, &status); 2241 TEST_ASSERT_SUCCESS(status); 2242 TEST_ASSERT(limit == 0); 2243 2244 uregex_setTimeLimit(re, 100, &status); 2245 TEST_ASSERT_SUCCESS(status); 2246 limit = uregex_getTimeLimit(re, &status); 2247 TEST_ASSERT_SUCCESS(status); 2248 TEST_ASSERT(limit == 100); 2249 2250 uregex_close(re); 2251 } 2252 2253 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { 2254 return FALSE; 2255 } 2256 2257 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { 2258 return FALSE; 2259 } 2260 2261 static void TestBug10815() { 2262 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER 2263 * when the callback function specified by uregex_setMatchCallback() returns FALSE 2264 */ 2265 URegularExpression *re; 2266 UErrorCode status = U_ZERO_ERROR; 2267 UChar text[100]; 2268 2269 2270 // findNext() with a find progress callback function. 2271 2272 re = uregex_openC(".z", 0, 0, &status); 2273 TEST_ASSERT_SUCCESS(status); 2274 2275 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text)); 2276 uregex_setText(re, text, -1, &status); 2277 TEST_ASSERT_SUCCESS(status); 2278 2279 uregex_setFindProgressCallback(re, FindCallback, NULL, &status); 2280 TEST_ASSERT_SUCCESS(status); 2281 2282 uregex_findNext(re, &status); 2283 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2284 2285 uregex_close(re); 2286 2287 // findNext() with a match progress callback function. 2288 2289 status = U_ZERO_ERROR; 2290 re = uregex_openC("((xxx)*)*y", 0, 0, &status); 2291 TEST_ASSERT_SUCCESS(status); 2292 2293 // Pattern + this text gives an exponential time match. Without the callback to stop the match, 2294 // it will appear to be stuck in a (near) infinite loop. 2295 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text)); 2296 uregex_setText(re, text, -1, &status); 2297 TEST_ASSERT_SUCCESS(status); 2298 2299 uregex_setMatchCallback(re, MatchCallback, NULL, &status); 2300 TEST_ASSERT_SUCCESS(status); 2301 2302 uregex_findNext(re, &status); 2303 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2304 2305 uregex_close(re); 2306 } 2307 2308 2309 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2310