1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File reapits.c 9 * 10 *********************************************************************************/ 11 /*C API TEST FOR Regular Expressions */ 12 /** 13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14 * try to test the full functionality. It just calls each function and verifies that it 15 * works on a basic level. 16 * 17 * More complete testing of regular expression functionality is done with the C++ tests. 18 **/ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24 #include <stdlib.h> 25 #include <string.h> 26 #include "unicode/uloc.h" 27 #include "unicode/uregex.h" 28 #include "unicode/ustring.h" 29 #include "unicode/utext.h" 30 #include "cintltst.h" 31 32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 34 35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} 37 38 /* 39 * TEST_SETUP and TEST_TEARDOWN 40 * macros to handle the boilerplate around setting up regex test cases. 41 * parameteres to setup: 42 * pattern: The regex pattern, a (char *) null terminated C string. 43 * testString: The string data, also a (char *) C string. 44 * flags: Regex flags to set when compiling the pattern 45 * 46 * Put arbitrary test code between SETUP and TEARDOWN. 47 * 're" is the compiled, ready-to-go regular expression. 48 */ 49 #define TEST_SETUP(pattern, testString, flags) { \ 50 UChar *srcString = NULL; \ 51 status = U_ZERO_ERROR; \ 52 re = uregex_openC(pattern, flags, NULL, &status); \ 53 TEST_ASSERT_SUCCESS(status); \ 54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 55 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 56 uregex_setText(re, srcString, -1, &status); \ 57 TEST_ASSERT_SUCCESS(status); \ 58 if (U_SUCCESS(status)) { 59 60 #define TEST_TEARDOWN \ 61 } \ 62 TEST_ASSERT_SUCCESS(status); \ 63 uregex_close(re); \ 64 free(srcString); \ 65 } 66 67 68 /** 69 * @param expected utf-8 array of bytes to be expected 70 */ 71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 72 char buf_inside_macro[120]; 73 int32_t len = (int32_t)strlen(expected); 74 UBool success; 75 if (nulTerm) { 76 u_austrncpy(buf_inside_macro, (actual), len+1); 77 buf_inside_macro[len+2] = 0; 78 success = (strcmp((expected), buf_inside_macro) == 0); 79 } else { 80 u_austrncpy(buf_inside_macro, (actual), len); 81 buf_inside_macro[len+1] = 0; 82 success = (strncmp((expected), buf_inside_macro, len) == 0); 83 } 84 if (success == FALSE) { 85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 86 file, line, (expected), buf_inside_macro); 87 } 88 } 89 90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 91 92 93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 94 UErrorCode status = U_ZERO_ERROR; 95 UText expectedText = UTEXT_INITIALIZER; 96 utext_openUTF8(&expectedText, expected, -1, &status); 97 utext_setNativeIndex(actual, 0); 98 if (utext_compare(&expectedText, -1, actual, -1) != 0) { 99 UChar32 c; 100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 101 c = utext_next32From(actual, 0); 102 while (c != U_SENTINEL) { 103 if (0x20<c && c <0x7e) { 104 log_err("%c", c); 105 } else { 106 log_err("%#x", c); 107 } 108 c = UTEXT_NEXT32(actual); 109 } 110 log_err("\"\n"); 111 } 112 utext_close(&expectedText); 113 } 114 115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 116 117 118 119 static void TestRegexCAPI(void); 120 static void TestBug4315(void); 121 static void TestUTextAPI(void); 122 /* BEGIN android-added 123 Removed this function after Android upgrade to ICU4.6. 124 */ 125 static void TestRefreshInput(void); 126 /* END android-added */ 127 128 void addURegexTest(TestNode** root); 129 130 void addURegexTest(TestNode** root) 131 { 132 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 133 addTest(root, &TestBug4315, "regex/TestBug4315"); 134 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 135 /* BEGIN android-added 136 Removed this after Android upgrade to ICU4.6. 137 */ 138 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 139 /* END android-added */ 140 } 141 142 /* 143 * Call back function and context struct used for testing 144 * regular expression user callbacks. This test is mostly the same as 145 * the corresponding C++ test in intltest. 146 */ 147 typedef struct callBackContext { 148 int32_t maxCalls; 149 int32_t numCalls; 150 int32_t lastSteps; 151 } callBackContext; 152 153 static UBool U_EXPORT2 U_CALLCONV 154 TestCallbackFn(const void *context, int32_t steps) { 155 callBackContext *info = (callBackContext *)context; 156 if (info->lastSteps+1 != steps) { 157 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 158 } 159 info->lastSteps = steps; 160 info->numCalls++; 161 return (info->numCalls < info->maxCalls); 162 } 163 164 /* 165 * Regular Expression C API Tests 166 */ 167 static void TestRegexCAPI(void) { 168 UErrorCode status = U_ZERO_ERROR; 169 URegularExpression *re; 170 UChar pat[200]; 171 UChar *minus1; 172 173 memset(&minus1, -1, sizeof(minus1)); 174 175 /* Mimimalist open/close */ 176 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 177 re = uregex_open(pat, -1, 0, 0, &status); 178 if (U_FAILURE(status)) { 179 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 180 return; 181 } 182 uregex_close(re); 183 184 /* Open with all flag values set */ 185 status = U_ZERO_ERROR; 186 re = uregex_open(pat, -1, 187 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 188 0, &status); 189 TEST_ASSERT_SUCCESS(status); 190 uregex_close(re); 191 192 /* Open with an invalid flag */ 193 status = U_ZERO_ERROR; 194 re = uregex_open(pat, -1, 0x40000000, 0, &status); 195 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 196 uregex_close(re); 197 198 /* Open with an unimplemented flag */ 199 status = U_ZERO_ERROR; 200 re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status); 201 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 202 uregex_close(re); 203 204 /* openC with an invalid parameter */ 205 status = U_ZERO_ERROR; 206 re = uregex_openC(NULL, 207 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 208 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 209 210 /* openC with an invalid parameter */ 211 status = U_USELESS_COLLATOR_ERROR; 212 re = uregex_openC(NULL, 213 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 214 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 215 216 /* openC open from a C string */ 217 { 218 const UChar *p; 219 int32_t len; 220 status = U_ZERO_ERROR; 221 re = uregex_openC("abc*", 0, 0, &status); 222 TEST_ASSERT_SUCCESS(status); 223 p = uregex_pattern(re, &len, &status); 224 TEST_ASSERT_SUCCESS(status); 225 226 /* The TEST_ASSERT_SUCCESS above should change too... */ 227 if(U_SUCCESS(status)) { 228 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 229 TEST_ASSERT(u_strcmp(pat, p) == 0); 230 TEST_ASSERT(len==(int32_t)strlen("abc*")); 231 } 232 233 uregex_close(re); 234 235 /* TODO: Open with ParseError parameter */ 236 } 237 238 /* 239 * clone 240 */ 241 { 242 URegularExpression *clone1; 243 URegularExpression *clone2; 244 URegularExpression *clone3; 245 UChar testString1[30]; 246 UChar testString2[30]; 247 UBool result; 248 249 250 status = U_ZERO_ERROR; 251 re = uregex_openC("abc*", 0, 0, &status); 252 TEST_ASSERT_SUCCESS(status); 253 clone1 = uregex_clone(re, &status); 254 TEST_ASSERT_SUCCESS(status); 255 TEST_ASSERT(clone1 != NULL); 256 257 status = U_ZERO_ERROR; 258 clone2 = uregex_clone(re, &status); 259 TEST_ASSERT_SUCCESS(status); 260 TEST_ASSERT(clone2 != NULL); 261 uregex_close(re); 262 263 status = U_ZERO_ERROR; 264 clone3 = uregex_clone(clone2, &status); 265 TEST_ASSERT_SUCCESS(status); 266 TEST_ASSERT(clone3 != NULL); 267 268 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 269 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 270 271 status = U_ZERO_ERROR; 272 uregex_setText(clone1, testString1, -1, &status); 273 TEST_ASSERT_SUCCESS(status); 274 result = uregex_lookingAt(clone1, 0, &status); 275 TEST_ASSERT_SUCCESS(status); 276 TEST_ASSERT(result==TRUE); 277 278 status = U_ZERO_ERROR; 279 uregex_setText(clone2, testString2, -1, &status); 280 TEST_ASSERT_SUCCESS(status); 281 result = uregex_lookingAt(clone2, 0, &status); 282 TEST_ASSERT_SUCCESS(status); 283 TEST_ASSERT(result==FALSE); 284 result = uregex_find(clone2, 0, &status); 285 TEST_ASSERT_SUCCESS(status); 286 TEST_ASSERT(result==TRUE); 287 288 uregex_close(clone1); 289 uregex_close(clone2); 290 uregex_close(clone3); 291 292 } 293 294 /* 295 * pattern() 296 */ 297 { 298 const UChar *resultPat; 299 int32_t resultLen; 300 u_uastrncpy(pat, "hello", sizeof(pat)/2); 301 status = U_ZERO_ERROR; 302 re = uregex_open(pat, -1, 0, NULL, &status); 303 resultPat = uregex_pattern(re, &resultLen, &status); 304 TEST_ASSERT_SUCCESS(status); 305 306 /* The TEST_ASSERT_SUCCESS above should change too... */ 307 if (U_SUCCESS(status)) { 308 TEST_ASSERT(resultLen == -1); 309 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 310 } 311 312 uregex_close(re); 313 314 status = U_ZERO_ERROR; 315 re = uregex_open(pat, 3, 0, NULL, &status); 316 resultPat = uregex_pattern(re, &resultLen, &status); 317 TEST_ASSERT_SUCCESS(status); 318 TEST_ASSERT_SUCCESS(status); 319 320 /* The TEST_ASSERT_SUCCESS above should change too... */ 321 if (U_SUCCESS(status)) { 322 TEST_ASSERT(resultLen == 3); 323 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 324 TEST_ASSERT(u_strlen(resultPat) == 3); 325 } 326 327 uregex_close(re); 328 } 329 330 /* 331 * flags() 332 */ 333 { 334 int32_t t; 335 336 status = U_ZERO_ERROR; 337 re = uregex_open(pat, -1, 0, NULL, &status); 338 t = uregex_flags(re, &status); 339 TEST_ASSERT_SUCCESS(status); 340 TEST_ASSERT(t == 0); 341 uregex_close(re); 342 343 status = U_ZERO_ERROR; 344 re = uregex_open(pat, -1, 0, NULL, &status); 345 t = uregex_flags(re, &status); 346 TEST_ASSERT_SUCCESS(status); 347 TEST_ASSERT(t == 0); 348 uregex_close(re); 349 350 status = U_ZERO_ERROR; 351 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 352 t = uregex_flags(re, &status); 353 TEST_ASSERT_SUCCESS(status); 354 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 355 uregex_close(re); 356 } 357 358 /* 359 * setText() and lookingAt() 360 */ 361 { 362 UChar text1[50]; 363 UChar text2[50]; 364 UBool result; 365 366 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 367 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 368 status = U_ZERO_ERROR; 369 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 370 re = uregex_open(pat, -1, 0, NULL, &status); 371 TEST_ASSERT_SUCCESS(status); 372 373 /* Operation before doing a setText should fail... */ 374 status = U_ZERO_ERROR; 375 uregex_lookingAt(re, 0, &status); 376 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 377 378 status = U_ZERO_ERROR; 379 uregex_setText(re, text1, -1, &status); 380 result = uregex_lookingAt(re, 0, &status); 381 TEST_ASSERT(result == TRUE); 382 TEST_ASSERT_SUCCESS(status); 383 384 status = U_ZERO_ERROR; 385 uregex_setText(re, text2, -1, &status); 386 result = uregex_lookingAt(re, 0, &status); 387 TEST_ASSERT(result == FALSE); 388 TEST_ASSERT_SUCCESS(status); 389 390 status = U_ZERO_ERROR; 391 uregex_setText(re, text1, -1, &status); 392 result = uregex_lookingAt(re, 0, &status); 393 TEST_ASSERT(result == TRUE); 394 TEST_ASSERT_SUCCESS(status); 395 396 status = U_ZERO_ERROR; 397 uregex_setText(re, text1, 5, &status); 398 result = uregex_lookingAt(re, 0, &status); 399 TEST_ASSERT(result == FALSE); 400 TEST_ASSERT_SUCCESS(status); 401 402 status = U_ZERO_ERROR; 403 uregex_setText(re, text1, 6, &status); 404 result = uregex_lookingAt(re, 0, &status); 405 TEST_ASSERT(result == TRUE); 406 TEST_ASSERT_SUCCESS(status); 407 408 uregex_close(re); 409 } 410 411 412 /* 413 * getText() 414 */ 415 { 416 UChar text1[50]; 417 UChar text2[50]; 418 const UChar *result; 419 int32_t textLength; 420 421 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 422 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 423 status = U_ZERO_ERROR; 424 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 425 re = uregex_open(pat, -1, 0, NULL, &status); 426 427 uregex_setText(re, text1, -1, &status); 428 result = uregex_getText(re, &textLength, &status); 429 TEST_ASSERT(result == text1); 430 TEST_ASSERT(textLength == -1); 431 TEST_ASSERT_SUCCESS(status); 432 433 status = U_ZERO_ERROR; 434 uregex_setText(re, text2, 7, &status); 435 result = uregex_getText(re, &textLength, &status); 436 TEST_ASSERT(result == text2); 437 TEST_ASSERT(textLength == 7); 438 TEST_ASSERT_SUCCESS(status); 439 440 status = U_ZERO_ERROR; 441 uregex_setText(re, text2, 4, &status); 442 result = uregex_getText(re, &textLength, &status); 443 TEST_ASSERT(result == text2); 444 TEST_ASSERT(textLength == 4); 445 TEST_ASSERT_SUCCESS(status); 446 uregex_close(re); 447 } 448 449 /* 450 * matches() 451 */ 452 { 453 UChar text1[50]; 454 UBool result; 455 int len; 456 UChar nullString[] = {0,0,0}; 457 458 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); 459 status = U_ZERO_ERROR; 460 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 461 re = uregex_open(pat, -1, 0, NULL, &status); 462 463 uregex_setText(re, text1, -1, &status); 464 result = uregex_matches(re, 0, &status); 465 TEST_ASSERT(result == FALSE); 466 TEST_ASSERT_SUCCESS(status); 467 468 status = U_ZERO_ERROR; 469 uregex_setText(re, text1, 6, &status); 470 result = uregex_matches(re, 0, &status); 471 TEST_ASSERT(result == TRUE); 472 TEST_ASSERT_SUCCESS(status); 473 474 status = U_ZERO_ERROR; 475 uregex_setText(re, text1, 6, &status); 476 result = uregex_matches(re, 1, &status); 477 TEST_ASSERT(result == FALSE); 478 TEST_ASSERT_SUCCESS(status); 479 uregex_close(re); 480 481 status = U_ZERO_ERROR; 482 re = uregex_openC(".?", 0, NULL, &status); 483 uregex_setText(re, text1, -1, &status); 484 len = u_strlen(text1); 485 result = uregex_matches(re, len, &status); 486 TEST_ASSERT(result == TRUE); 487 TEST_ASSERT_SUCCESS(status); 488 489 status = U_ZERO_ERROR; 490 uregex_setText(re, nullString, -1, &status); 491 TEST_ASSERT_SUCCESS(status); 492 result = uregex_matches(re, 0, &status); 493 TEST_ASSERT(result == TRUE); 494 TEST_ASSERT_SUCCESS(status); 495 uregex_close(re); 496 } 497 498 499 /* 500 * lookingAt() Used in setText test. 501 */ 502 503 504 /* 505 * find(), findNext, start, end, reset 506 */ 507 { 508 UChar text1[50]; 509 UBool result; 510 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 511 status = U_ZERO_ERROR; 512 re = uregex_openC("rx", 0, NULL, &status); 513 514 uregex_setText(re, text1, -1, &status); 515 result = uregex_find(re, 0, &status); 516 TEST_ASSERT(result == TRUE); 517 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 518 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 519 TEST_ASSERT_SUCCESS(status); 520 521 result = uregex_find(re, 9, &status); 522 TEST_ASSERT(result == TRUE); 523 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 524 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 525 TEST_ASSERT_SUCCESS(status); 526 527 result = uregex_find(re, 14, &status); 528 TEST_ASSERT(result == FALSE); 529 TEST_ASSERT_SUCCESS(status); 530 531 status = U_ZERO_ERROR; 532 uregex_reset(re, 0, &status); 533 534 result = uregex_findNext(re, &status); 535 TEST_ASSERT(result == TRUE); 536 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 537 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 538 TEST_ASSERT_SUCCESS(status); 539 540 result = uregex_findNext(re, &status); 541 TEST_ASSERT(result == TRUE); 542 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 543 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 544 TEST_ASSERT_SUCCESS(status); 545 546 status = U_ZERO_ERROR; 547 uregex_reset(re, 12, &status); 548 549 result = uregex_findNext(re, &status); 550 TEST_ASSERT(result == TRUE); 551 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 552 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 553 TEST_ASSERT_SUCCESS(status); 554 555 result = uregex_findNext(re, &status); 556 TEST_ASSERT(result == FALSE); 557 TEST_ASSERT_SUCCESS(status); 558 559 uregex_close(re); 560 } 561 562 /* 563 * groupCount 564 */ 565 { 566 int32_t result; 567 568 status = U_ZERO_ERROR; 569 re = uregex_openC("abc", 0, NULL, &status); 570 result = uregex_groupCount(re, &status); 571 TEST_ASSERT_SUCCESS(status); 572 TEST_ASSERT(result == 0); 573 uregex_close(re); 574 575 status = U_ZERO_ERROR; 576 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 577 result = uregex_groupCount(re, &status); 578 TEST_ASSERT_SUCCESS(status); 579 TEST_ASSERT(result == 3); 580 uregex_close(re); 581 582 } 583 584 585 /* 586 * group() 587 */ 588 { 589 UChar text1[80]; 590 UChar buf[80]; 591 UBool result; 592 int32_t resultSz; 593 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 594 595 status = U_ZERO_ERROR; 596 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 597 TEST_ASSERT_SUCCESS(status); 598 599 600 uregex_setText(re, text1, -1, &status); 601 result = uregex_find(re, 0, &status); 602 TEST_ASSERT(result==TRUE); 603 604 /* Capture Group 0, the full match. Should succeed. */ 605 status = U_ZERO_ERROR; 606 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); 607 TEST_ASSERT_SUCCESS(status); 608 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 609 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 610 611 /* Capture group #1. Should succeed. */ 612 status = U_ZERO_ERROR; 613 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); 614 TEST_ASSERT_SUCCESS(status); 615 TEST_ASSERT_STRING(" interior ", buf, TRUE); 616 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 617 618 /* Capture group out of range. Error. */ 619 status = U_ZERO_ERROR; 620 uregex_group(re, 2, buf, sizeof(buf)/2, &status); 621 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 622 623 /* NULL buffer, pure pre-flight */ 624 status = U_ZERO_ERROR; 625 resultSz = uregex_group(re, 0, NULL, 0, &status); 626 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 627 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 628 629 /* Too small buffer, truncated string */ 630 status = U_ZERO_ERROR; 631 memset(buf, -1, sizeof(buf)); 632 resultSz = uregex_group(re, 0, buf, 5, &status); 633 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 634 TEST_ASSERT_STRING("abc i", buf, FALSE); 635 TEST_ASSERT(buf[5] == (UChar)0xffff); 636 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 637 638 /* Output string just fits buffer, no NUL term. */ 639 status = U_ZERO_ERROR; 640 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 641 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 642 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 643 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 644 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 645 646 uregex_close(re); 647 648 } 649 650 /* 651 * Regions 652 */ 653 654 655 /* SetRegion(), getRegion() do something */ 656 TEST_SETUP(".*", "0123456789ABCDEF", 0) 657 UChar resultString[40]; 658 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 659 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 660 uregex_setRegion(re, 3, 6, &status); 661 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 662 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 663 TEST_ASSERT(uregex_findNext(re, &status)); 664 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) 665 TEST_ASSERT_STRING("345", resultString, TRUE); 666 TEST_TEARDOWN; 667 668 /* find(start=-1) uses regions */ 669 TEST_SETUP(".*", "0123456789ABCDEF", 0); 670 uregex_setRegion(re, 4, 6, &status); 671 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 672 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 673 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 674 TEST_TEARDOWN; 675 676 /* find (start >=0) does not use regions */ 677 TEST_SETUP(".*", "0123456789ABCDEF", 0); 678 uregex_setRegion(re, 4, 6, &status); 679 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 680 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 681 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 682 TEST_TEARDOWN; 683 684 /* findNext() obeys regions */ 685 TEST_SETUP(".", "0123456789ABCDEF", 0); 686 uregex_setRegion(re, 4, 6, &status); 687 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 688 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 689 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 690 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 691 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 692 TEST_TEARDOWN; 693 694 /* matches(start=-1) uses regions */ 695 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 696 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 697 uregex_setRegion(re, 4, 6, &status); 698 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 699 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 700 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 701 TEST_TEARDOWN; 702 703 /* matches (start >=0) does not use regions */ 704 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 705 uregex_setRegion(re, 4, 6, &status); 706 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 707 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 708 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 709 TEST_TEARDOWN; 710 711 /* lookingAt(start=-1) uses regions */ 712 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 713 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 714 uregex_setRegion(re, 4, 6, &status); 715 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 716 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 717 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 718 TEST_TEARDOWN; 719 720 /* lookingAt (start >=0) does not use regions */ 721 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 722 uregex_setRegion(re, 4, 6, &status); 723 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 724 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 725 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 726 TEST_TEARDOWN; 727 728 /* hitEnd() */ 729 TEST_SETUP("[a-f]*", "abcdefghij", 0); 730 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 731 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 732 TEST_TEARDOWN; 733 734 TEST_SETUP("[a-f]*", "abcdef", 0); 735 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 736 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 737 TEST_TEARDOWN; 738 739 /* requireEnd */ 740 TEST_SETUP("abcd", "abcd", 0); 741 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 742 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 743 TEST_TEARDOWN; 744 745 TEST_SETUP("abcd$", "abcd", 0); 746 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 747 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 748 TEST_TEARDOWN; 749 750 /* anchoringBounds */ 751 TEST_SETUP("abc$", "abcdef", 0); 752 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 753 uregex_useAnchoringBounds(re, FALSE, &status); 754 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 755 756 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 757 uregex_useAnchoringBounds(re, TRUE, &status); 758 uregex_setRegion(re, 0, 3, &status); 759 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 760 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 761 TEST_TEARDOWN; 762 763 /* Transparent Bounds */ 764 TEST_SETUP("abc(?=def)", "abcdef", 0); 765 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 766 uregex_useTransparentBounds(re, TRUE, &status); 767 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 768 769 uregex_useTransparentBounds(re, FALSE, &status); 770 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 771 uregex_setRegion(re, 0, 3, &status); 772 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 773 uregex_useTransparentBounds(re, TRUE, &status); 774 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 775 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 776 TEST_TEARDOWN; 777 778 779 /* 780 * replaceFirst() 781 */ 782 { 783 UChar text1[80]; 784 UChar text2[80]; 785 UChar replText[80]; 786 UChar buf[80]; 787 int32_t resultSz; 788 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 789 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 790 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 791 792 status = U_ZERO_ERROR; 793 re = uregex_openC("x(.*?)x", 0, NULL, &status); 794 TEST_ASSERT_SUCCESS(status); 795 796 /* Normal case, with match */ 797 uregex_setText(re, text1, -1, &status); 798 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 799 TEST_ASSERT_SUCCESS(status); 800 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 801 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 802 803 /* No match. Text should copy to output with no changes. */ 804 status = U_ZERO_ERROR; 805 uregex_setText(re, text2, -1, &status); 806 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 807 TEST_ASSERT_SUCCESS(status); 808 TEST_ASSERT_STRING("No match here.", buf, TRUE); 809 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 810 811 /* Match, output just fills buffer, no termination warning. */ 812 status = U_ZERO_ERROR; 813 uregex_setText(re, text1, -1, &status); 814 memset(buf, -1, sizeof(buf)); 815 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 816 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 817 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 818 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 819 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 820 821 /* Do the replaceFirst again, without first resetting anything. 822 * Should give the same results. 823 */ 824 status = U_ZERO_ERROR; 825 memset(buf, -1, sizeof(buf)); 826 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 827 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 828 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 829 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 830 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 831 832 /* NULL buffer, zero buffer length */ 833 status = U_ZERO_ERROR; 834 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 835 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 836 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 837 838 /* Buffer too small by one */ 839 status = U_ZERO_ERROR; 840 memset(buf, -1, sizeof(buf)); 841 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 842 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 843 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 844 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 845 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 846 847 uregex_close(re); 848 } 849 850 851 /* 852 * replaceAll() 853 */ 854 { 855 UChar text1[80]; /* "Replace xaax x1x x...x." */ 856 UChar text2[80]; /* "No match Here" */ 857 UChar replText[80]; /* "<$1>" */ 858 UChar replText2[80]; /* "<<$1>>" */ 859 const char * pattern = "x(.*?)x"; 860 const char * expectedResult = "Replace <aa> <1> <...>."; 861 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 862 UChar buf[80]; 863 int32_t resultSize; 864 int32_t expectedResultSize; 865 int32_t expectedResultSize2; 866 int32_t i; 867 868 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 869 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 870 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 871 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); 872 expectedResultSize = strlen(expectedResult); 873 expectedResultSize2 = strlen(expectedResult2); 874 875 status = U_ZERO_ERROR; 876 re = uregex_openC(pattern, 0, NULL, &status); 877 TEST_ASSERT_SUCCESS(status); 878 879 /* Normal case, with match */ 880 uregex_setText(re, text1, -1, &status); 881 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 882 TEST_ASSERT_SUCCESS(status); 883 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 884 TEST_ASSERT(resultSize == expectedResultSize); 885 886 /* No match. Text should copy to output with no changes. */ 887 status = U_ZERO_ERROR; 888 uregex_setText(re, text2, -1, &status); 889 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 890 TEST_ASSERT_SUCCESS(status); 891 TEST_ASSERT_STRING("No match here.", buf, TRUE); 892 TEST_ASSERT(resultSize == u_strlen(text2)); 893 894 /* Match, output just fills buffer, no termination warning. */ 895 status = U_ZERO_ERROR; 896 uregex_setText(re, text1, -1, &status); 897 memset(buf, -1, sizeof(buf)); 898 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 899 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 900 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 901 TEST_ASSERT(resultSize == expectedResultSize); 902 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 903 904 /* Do the replaceFirst again, without first resetting anything. 905 * Should give the same results. 906 */ 907 status = U_ZERO_ERROR; 908 memset(buf, -1, sizeof(buf)); 909 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 910 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 911 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 912 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 913 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 914 915 /* NULL buffer, zero buffer length */ 916 status = U_ZERO_ERROR; 917 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 918 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 919 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 920 921 /* Buffer too small. Try every size, which will tickle edge cases 922 * in uregex_appendReplacement (used by replaceAll) */ 923 for (i=0; i<expectedResultSize; i++) { 924 char expected[80]; 925 status = U_ZERO_ERROR; 926 memset(buf, -1, sizeof(buf)); 927 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 928 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 929 strcpy(expected, expectedResult); 930 expected[i] = 0; 931 TEST_ASSERT_STRING(expected, buf, FALSE); 932 TEST_ASSERT(resultSize == expectedResultSize); 933 TEST_ASSERT(buf[i] == (UChar)0xffff); 934 } 935 936 /* Buffer too small. Same as previous test, except this time the replacement 937 * text is longer than the match capture group, making the length of the complete 938 * replacement longer than the original string. 939 */ 940 for (i=0; i<expectedResultSize2; i++) { 941 char expected[80]; 942 status = U_ZERO_ERROR; 943 memset(buf, -1, sizeof(buf)); 944 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 945 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 946 strcpy(expected, expectedResult2); 947 expected[i] = 0; 948 TEST_ASSERT_STRING(expected, buf, FALSE); 949 TEST_ASSERT(resultSize == expectedResultSize2); 950 TEST_ASSERT(buf[i] == (UChar)0xffff); 951 } 952 953 954 uregex_close(re); 955 } 956 957 958 /* 959 * appendReplacement() 960 */ 961 { 962 UChar text[100]; 963 UChar repl[100]; 964 UChar buf[100]; 965 UChar *bufPtr; 966 int32_t bufCap; 967 968 969 status = U_ZERO_ERROR; 970 re = uregex_openC(".*", 0, 0, &status); 971 TEST_ASSERT_SUCCESS(status); 972 973 u_uastrncpy(text, "whatever", sizeof(text)/2); 974 u_uastrncpy(repl, "some other", sizeof(repl)/2); 975 uregex_setText(re, text, -1, &status); 976 977 /* match covers whole target string */ 978 uregex_find(re, 0, &status); 979 TEST_ASSERT_SUCCESS(status); 980 bufPtr = buf; 981 bufCap = sizeof(buf) / 2; 982 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 983 TEST_ASSERT_SUCCESS(status); 984 TEST_ASSERT_STRING("some other", buf, TRUE); 985 986 /* Match has \u \U escapes */ 987 uregex_find(re, 0, &status); 988 TEST_ASSERT_SUCCESS(status); 989 bufPtr = buf; 990 bufCap = sizeof(buf) / 2; 991 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 992 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 993 TEST_ASSERT_SUCCESS(status); 994 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 995 996 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 997 status = U_ZERO_ERROR; 998 uregex_find(re, 0, &status); 999 TEST_ASSERT_SUCCESS(status); 1000 bufPtr = buf; 1001 status = U_BUFFER_OVERFLOW_ERROR; 1002 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 1003 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1004 1005 uregex_close(re); 1006 } 1007 1008 1009 /* 1010 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1011 */ 1012 1013 /* 1014 * split() 1015 */ 1016 { 1017 UChar textToSplit[80]; 1018 UChar text2[80]; 1019 UChar buf[200]; 1020 UChar *fields[10]; 1021 int32_t numFields; 1022 int32_t requiredCapacity; 1023 int32_t spaceNeeded; 1024 int32_t sz; 1025 1026 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1027 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1028 1029 status = U_ZERO_ERROR; 1030 re = uregex_openC(":", 0, NULL, &status); 1031 1032 1033 /* Simple split */ 1034 1035 uregex_setText(re, textToSplit, -1, &status); 1036 TEST_ASSERT_SUCCESS(status); 1037 1038 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1039 if (U_SUCCESS(status)) { 1040 memset(fields, -1, sizeof(fields)); 1041 numFields = 1042 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1043 TEST_ASSERT_SUCCESS(status); 1044 1045 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1046 if(U_SUCCESS(status)) { 1047 TEST_ASSERT(numFields == 3); 1048 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1049 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1050 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1051 TEST_ASSERT(fields[3] == NULL); 1052 1053 spaceNeeded = u_strlen(textToSplit) - 1054 (numFields - 1) + /* Field delimiters do not appear in output */ 1055 numFields; /* Each field gets a NUL terminator */ 1056 1057 TEST_ASSERT(spaceNeeded == requiredCapacity); 1058 } 1059 } 1060 1061 uregex_close(re); 1062 1063 1064 /* Split with too few output strings available */ 1065 status = U_ZERO_ERROR; 1066 re = uregex_openC(":", 0, NULL, &status); 1067 uregex_setText(re, textToSplit, -1, &status); 1068 TEST_ASSERT_SUCCESS(status); 1069 1070 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1071 if(U_SUCCESS(status)) { 1072 memset(fields, -1, sizeof(fields)); 1073 numFields = 1074 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1075 TEST_ASSERT_SUCCESS(status); 1076 1077 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1078 if(U_SUCCESS(status)) { 1079 TEST_ASSERT(numFields == 2); 1080 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1081 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1082 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1083 1084 spaceNeeded = u_strlen(textToSplit) - 1085 (numFields - 1) + /* Field delimiters do not appear in output */ 1086 numFields; /* Each field gets a NUL terminator */ 1087 1088 TEST_ASSERT(spaceNeeded == requiredCapacity); 1089 1090 /* Split with a range of output buffer sizes. */ 1091 spaceNeeded = u_strlen(textToSplit) - 1092 (numFields - 1) + /* Field delimiters do not appear in output */ 1093 numFields; /* Each field gets a NUL terminator */ 1094 1095 for (sz=0; sz < spaceNeeded+1; sz++) { 1096 memset(fields, -1, sizeof(fields)); 1097 status = U_ZERO_ERROR; 1098 numFields = 1099 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1100 if (sz >= spaceNeeded) { 1101 TEST_ASSERT_SUCCESS(status); 1102 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1103 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1104 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1105 } else { 1106 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1107 } 1108 TEST_ASSERT(numFields == 3); 1109 TEST_ASSERT(fields[3] == NULL); 1110 TEST_ASSERT(spaceNeeded == requiredCapacity); 1111 } 1112 } 1113 } 1114 1115 uregex_close(re); 1116 } 1117 1118 1119 1120 1121 /* Split(), part 2. Patterns with capture groups. The capture group text 1122 * comes out as additional fields. */ 1123 { 1124 UChar textToSplit[80]; 1125 UChar buf[200]; 1126 UChar *fields[10]; 1127 int32_t numFields; 1128 int32_t requiredCapacity; 1129 int32_t spaceNeeded; 1130 int32_t sz; 1131 1132 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1133 1134 status = U_ZERO_ERROR; 1135 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1136 1137 uregex_setText(re, textToSplit, -1, &status); 1138 TEST_ASSERT_SUCCESS(status); 1139 1140 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1141 if(U_SUCCESS(status)) { 1142 memset(fields, -1, sizeof(fields)); 1143 numFields = 1144 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1145 TEST_ASSERT_SUCCESS(status); 1146 1147 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1148 if(U_SUCCESS(status)) { 1149 TEST_ASSERT(numFields == 5); 1150 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1151 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1152 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1153 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1154 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1155 TEST_ASSERT(fields[5] == NULL); 1156 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1157 TEST_ASSERT(spaceNeeded == requiredCapacity); 1158 } 1159 } 1160 1161 /* Split with too few output strings available (2) */ 1162 status = U_ZERO_ERROR; 1163 memset(fields, -1, sizeof(fields)); 1164 numFields = 1165 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1166 TEST_ASSERT_SUCCESS(status); 1167 1168 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1169 if(U_SUCCESS(status)) { 1170 TEST_ASSERT(numFields == 2); 1171 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1172 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1173 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1174 1175 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1176 TEST_ASSERT(spaceNeeded == requiredCapacity); 1177 } 1178 1179 /* Split with too few output strings available (3) */ 1180 status = U_ZERO_ERROR; 1181 memset(fields, -1, sizeof(fields)); 1182 numFields = 1183 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); 1184 TEST_ASSERT_SUCCESS(status); 1185 1186 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1187 if(U_SUCCESS(status)) { 1188 TEST_ASSERT(numFields == 3); 1189 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1190 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1191 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1192 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1193 1194 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1195 TEST_ASSERT(spaceNeeded == requiredCapacity); 1196 } 1197 1198 /* Split with just enough output strings available (5) */ 1199 status = U_ZERO_ERROR; 1200 memset(fields, -1, sizeof(fields)); 1201 numFields = 1202 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); 1203 TEST_ASSERT_SUCCESS(status); 1204 1205 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1206 if(U_SUCCESS(status)) { 1207 TEST_ASSERT(numFields == 5); 1208 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1209 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1210 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1211 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1212 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1213 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1214 1215 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1216 TEST_ASSERT(spaceNeeded == requiredCapacity); 1217 } 1218 1219 /* Split, end of text is a field delimiter. */ 1220 status = U_ZERO_ERROR; 1221 sz = strlen("first <tag-a> second<tag-b>"); 1222 uregex_setText(re, textToSplit, sz, &status); 1223 TEST_ASSERT_SUCCESS(status); 1224 1225 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1226 if(U_SUCCESS(status)) { 1227 memset(fields, -1, sizeof(fields)); 1228 numFields = 1229 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); 1230 TEST_ASSERT_SUCCESS(status); 1231 1232 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1233 if(U_SUCCESS(status)) { 1234 TEST_ASSERT(numFields == 4); 1235 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1236 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1237 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1238 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1239 TEST_ASSERT(fields[4] == NULL); 1240 TEST_ASSERT(fields[8] == NULL); 1241 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1242 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */ 1243 TEST_ASSERT(spaceNeeded == requiredCapacity); 1244 } 1245 } 1246 1247 uregex_close(re); 1248 } 1249 1250 /* 1251 * set/getTimeLimit 1252 */ 1253 TEST_SETUP("abc$", "abcdef", 0); 1254 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1255 uregex_setTimeLimit(re, 1000, &status); 1256 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1257 TEST_ASSERT_SUCCESS(status); 1258 uregex_setTimeLimit(re, -1, &status); 1259 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1260 status = U_ZERO_ERROR; 1261 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1262 TEST_TEARDOWN; 1263 1264 /* 1265 * set/get Stack Limit 1266 */ 1267 TEST_SETUP("abc$", "abcdef", 0); 1268 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1269 uregex_setStackLimit(re, 40000, &status); 1270 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1271 TEST_ASSERT_SUCCESS(status); 1272 uregex_setStackLimit(re, -1, &status); 1273 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1274 status = U_ZERO_ERROR; 1275 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1276 TEST_TEARDOWN; 1277 1278 1279 /* 1280 * Get/Set callback functions 1281 * This test is copied from intltest regex/Callbacks 1282 * The pattern and test data will run long enough to cause the callback 1283 * to be invoked. The nested '+' operators give exponential time 1284 * behavior with increasing string length. 1285 */ 1286 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1287 callBackContext cbInfo = {4, 0, 0}; 1288 const void *pContext = &cbInfo; 1289 URegexMatchCallback *returnedFn = &TestCallbackFn; 1290 1291 /* Getting the callback fn when it hasn't been set must return NULL */ 1292 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1293 TEST_ASSERT_SUCCESS(status); 1294 TEST_ASSERT(returnedFn == NULL); 1295 TEST_ASSERT(pContext == NULL); 1296 1297 /* Set thecallback and do a match. */ 1298 /* The callback function should record that it has been called. */ 1299 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1300 TEST_ASSERT_SUCCESS(status); 1301 TEST_ASSERT(cbInfo.numCalls == 0); 1302 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1303 TEST_ASSERT_SUCCESS(status); 1304 TEST_ASSERT(cbInfo.numCalls > 0); 1305 1306 /* Getting the callback should return the values that were set above. */ 1307 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1308 TEST_ASSERT(returnedFn == &TestCallbackFn); 1309 TEST_ASSERT(pContext == &cbInfo); 1310 1311 TEST_TEARDOWN; 1312 } 1313 1314 1315 1316 static void TestBug4315(void) { 1317 UErrorCode theICUError = U_ZERO_ERROR; 1318 URegularExpression *theRegEx; 1319 UChar *textBuff; 1320 const char *thePattern; 1321 UChar theString[100]; 1322 UChar *destFields[24]; 1323 int32_t neededLength1; 1324 int32_t neededLength2; 1325 1326 int32_t wordCount = 0; 1327 int32_t destFieldsSize = 24; 1328 1329 thePattern = "ck "; 1330 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1331 1332 /* open a regex */ 1333 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1334 TEST_ASSERT_SUCCESS(theICUError); 1335 1336 /* set the input string */ 1337 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1338 TEST_ASSERT_SUCCESS(theICUError); 1339 1340 /* split */ 1341 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1342 * error occurs! */ 1343 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1344 destFieldsSize, &theICUError); 1345 1346 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1347 TEST_ASSERT(wordCount==3); 1348 1349 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1350 { 1351 theICUError = U_ZERO_ERROR; 1352 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1353 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1354 destFields, destFieldsSize, &theICUError); 1355 TEST_ASSERT(wordCount==3); 1356 TEST_ASSERT_SUCCESS(theICUError); 1357 TEST_ASSERT(neededLength1 == neededLength2); 1358 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1359 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1360 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1361 TEST_ASSERT(destFields[3] == NULL); 1362 free(textBuff); 1363 } 1364 uregex_close(theRegEx); 1365 } 1366 1367 /* Based on TestRegexCAPI() */ 1368 static void TestUTextAPI(void) { 1369 UErrorCode status = U_ZERO_ERROR; 1370 URegularExpression *re; 1371 UText patternText = UTEXT_INITIALIZER; 1372 UChar pat[200]; 1373 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1374 1375 /* Mimimalist open/close */ 1376 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1377 re = uregex_openUText(&patternText, 0, 0, &status); 1378 if (U_FAILURE(status)) { 1379 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1380 utext_close(&patternText); 1381 return; 1382 } 1383 uregex_close(re); 1384 1385 /* Open with all flag values set */ 1386 status = U_ZERO_ERROR; 1387 re = uregex_openUText(&patternText, 1388 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1389 0, &status); 1390 TEST_ASSERT_SUCCESS(status); 1391 uregex_close(re); 1392 1393 /* Open with an invalid flag */ 1394 status = U_ZERO_ERROR; 1395 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1396 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1397 uregex_close(re); 1398 1399 /* open with an invalid parameter */ 1400 status = U_ZERO_ERROR; 1401 re = uregex_openUText(NULL, 1402 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1403 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1404 1405 /* 1406 * clone 1407 */ 1408 { 1409 URegularExpression *clone1; 1410 URegularExpression *clone2; 1411 URegularExpression *clone3; 1412 UChar testString1[30]; 1413 UChar testString2[30]; 1414 UBool result; 1415 1416 1417 status = U_ZERO_ERROR; 1418 re = uregex_openUText(&patternText, 0, 0, &status); 1419 TEST_ASSERT_SUCCESS(status); 1420 clone1 = uregex_clone(re, &status); 1421 TEST_ASSERT_SUCCESS(status); 1422 TEST_ASSERT(clone1 != NULL); 1423 1424 status = U_ZERO_ERROR; 1425 clone2 = uregex_clone(re, &status); 1426 TEST_ASSERT_SUCCESS(status); 1427 TEST_ASSERT(clone2 != NULL); 1428 uregex_close(re); 1429 1430 status = U_ZERO_ERROR; 1431 clone3 = uregex_clone(clone2, &status); 1432 TEST_ASSERT_SUCCESS(status); 1433 TEST_ASSERT(clone3 != NULL); 1434 1435 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 1436 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 1437 1438 status = U_ZERO_ERROR; 1439 uregex_setText(clone1, testString1, -1, &status); 1440 TEST_ASSERT_SUCCESS(status); 1441 result = uregex_lookingAt(clone1, 0, &status); 1442 TEST_ASSERT_SUCCESS(status); 1443 TEST_ASSERT(result==TRUE); 1444 1445 status = U_ZERO_ERROR; 1446 uregex_setText(clone2, testString2, -1, &status); 1447 TEST_ASSERT_SUCCESS(status); 1448 result = uregex_lookingAt(clone2, 0, &status); 1449 TEST_ASSERT_SUCCESS(status); 1450 TEST_ASSERT(result==FALSE); 1451 result = uregex_find(clone2, 0, &status); 1452 TEST_ASSERT_SUCCESS(status); 1453 TEST_ASSERT(result==TRUE); 1454 1455 uregex_close(clone1); 1456 uregex_close(clone2); 1457 uregex_close(clone3); 1458 1459 } 1460 1461 /* 1462 * pattern() and patternText() 1463 */ 1464 { 1465 const UChar *resultPat; 1466 int32_t resultLen; 1467 UText *resultText; 1468 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1469 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1470 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ 1471 status = U_ZERO_ERROR; 1472 1473 utext_openUTF8(&patternText, str_hello, -1, &status); 1474 re = uregex_open(pat, -1, 0, NULL, &status); 1475 resultPat = uregex_pattern(re, &resultLen, &status); 1476 TEST_ASSERT_SUCCESS(status); 1477 1478 /* The TEST_ASSERT_SUCCESS above should change too... */ 1479 if (U_SUCCESS(status)) { 1480 TEST_ASSERT(resultLen == -1); 1481 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1482 } 1483 1484 resultText = uregex_patternUText(re, &status); 1485 TEST_ASSERT_SUCCESS(status); 1486 TEST_ASSERT_UTEXT(str_hello, resultText); 1487 1488 uregex_close(re); 1489 1490 status = U_ZERO_ERROR; 1491 re = uregex_open(pat, 3, 0, NULL, &status); 1492 resultPat = uregex_pattern(re, &resultLen, &status); 1493 TEST_ASSERT_SUCCESS(status); 1494 1495 /* The TEST_ASSERT_SUCCESS above should change too... */ 1496 if (U_SUCCESS(status)) { 1497 TEST_ASSERT(resultLen == 3); 1498 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1499 TEST_ASSERT(u_strlen(resultPat) == 3); 1500 } 1501 1502 resultText = uregex_patternUText(re, &status); 1503 TEST_ASSERT_SUCCESS(status); 1504 TEST_ASSERT_UTEXT(str_hel, resultText); 1505 1506 uregex_close(re); 1507 } 1508 1509 /* 1510 * setUText() and lookingAt() 1511 */ 1512 { 1513 UText text1 = UTEXT_INITIALIZER; 1514 UText text2 = UTEXT_INITIALIZER; 1515 UBool result; 1516 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1517 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1518 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1519 status = U_ZERO_ERROR; 1520 utext_openUTF8(&text1, str_abcccd, -1, &status); 1521 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1522 1523 utext_openUTF8(&patternText, str_abcd, -1, &status); 1524 re = uregex_openUText(&patternText, 0, NULL, &status); 1525 TEST_ASSERT_SUCCESS(status); 1526 1527 /* Operation before doing a setText should fail... */ 1528 status = U_ZERO_ERROR; 1529 uregex_lookingAt(re, 0, &status); 1530 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1531 1532 status = U_ZERO_ERROR; 1533 uregex_setUText(re, &text1, &status); 1534 result = uregex_lookingAt(re, 0, &status); 1535 TEST_ASSERT(result == TRUE); 1536 TEST_ASSERT_SUCCESS(status); 1537 1538 status = U_ZERO_ERROR; 1539 uregex_setUText(re, &text2, &status); 1540 result = uregex_lookingAt(re, 0, &status); 1541 TEST_ASSERT(result == FALSE); 1542 TEST_ASSERT_SUCCESS(status); 1543 1544 status = U_ZERO_ERROR; 1545 uregex_setUText(re, &text1, &status); 1546 result = uregex_lookingAt(re, 0, &status); 1547 TEST_ASSERT(result == TRUE); 1548 TEST_ASSERT_SUCCESS(status); 1549 1550 uregex_close(re); 1551 utext_close(&text1); 1552 utext_close(&text2); 1553 } 1554 1555 1556 /* 1557 * getText() and getUText() 1558 */ 1559 { 1560 UText text1 = UTEXT_INITIALIZER; 1561 UText text2 = UTEXT_INITIALIZER; 1562 UChar text2Chars[20]; 1563 UText *resultText; 1564 const UChar *result; 1565 int32_t textLength; 1566 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1567 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1568 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1569 1570 1571 status = U_ZERO_ERROR; 1572 utext_openUTF8(&text1, str_abcccd, -1, &status); 1573 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); 1574 utext_openUChars(&text2, text2Chars, -1, &status); 1575 1576 utext_openUTF8(&patternText, str_abcd, -1, &status); 1577 re = uregex_openUText(&patternText, 0, NULL, &status); 1578 1579 /* First set a UText */ 1580 uregex_setUText(re, &text1, &status); 1581 resultText = uregex_getUText(re, NULL, &status); 1582 TEST_ASSERT_SUCCESS(status); 1583 TEST_ASSERT(resultText != &text1); 1584 utext_setNativeIndex(resultText, 0); 1585 utext_setNativeIndex(&text1, 0); 1586 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1587 utext_close(resultText); 1588 1589 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1590 TEST_ASSERT(textLength == -1 || textLength == 6); 1591 resultText = uregex_getUText(re, NULL, &status); 1592 TEST_ASSERT_SUCCESS(status); 1593 TEST_ASSERT(resultText != &text1); 1594 utext_setNativeIndex(resultText, 0); 1595 utext_setNativeIndex(&text1, 0); 1596 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1597 utext_close(resultText); 1598 1599 /* Then set a UChar * */ 1600 uregex_setText(re, text2Chars, 7, &status); 1601 resultText = uregex_getUText(re, NULL, &status); 1602 TEST_ASSERT_SUCCESS(status); 1603 utext_setNativeIndex(resultText, 0); 1604 utext_setNativeIndex(&text2, 0); 1605 TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); 1606 utext_close(resultText); 1607 result = uregex_getText(re, &textLength, &status); 1608 TEST_ASSERT(textLength == 7); 1609 1610 uregex_close(re); 1611 utext_close(&text1); 1612 utext_close(&text2); 1613 } 1614 1615 /* 1616 * matches() 1617 */ 1618 { 1619 UText text1 = UTEXT_INITIALIZER; 1620 UBool result; 1621 UText nullText = UTEXT_INITIALIZER; 1622 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1623 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1624 1625 status = U_ZERO_ERROR; 1626 utext_openUTF8(&text1, str_abcccde, -1, &status); 1627 utext_openUTF8(&patternText, str_abcd, -1, &status); 1628 re = uregex_openUText(&patternText, 0, NULL, &status); 1629 1630 uregex_setUText(re, &text1, &status); 1631 result = uregex_matches(re, 0, &status); 1632 TEST_ASSERT(result == FALSE); 1633 TEST_ASSERT_SUCCESS(status); 1634 uregex_close(re); 1635 1636 status = U_ZERO_ERROR; 1637 re = uregex_openC(".?", 0, NULL, &status); 1638 uregex_setUText(re, &text1, &status); 1639 result = uregex_matches(re, 7, &status); 1640 TEST_ASSERT(result == TRUE); 1641 TEST_ASSERT_SUCCESS(status); 1642 1643 status = U_ZERO_ERROR; 1644 utext_openUTF8(&nullText, "", -1, &status); 1645 uregex_setUText(re, &nullText, &status); 1646 TEST_ASSERT_SUCCESS(status); 1647 result = uregex_matches(re, 0, &status); 1648 TEST_ASSERT(result == TRUE); 1649 TEST_ASSERT_SUCCESS(status); 1650 1651 uregex_close(re); 1652 utext_close(&text1); 1653 utext_close(&nullText); 1654 } 1655 1656 1657 /* 1658 * lookingAt() Used in setText test. 1659 */ 1660 1661 1662 /* 1663 * find(), findNext, start, end, reset 1664 */ 1665 { 1666 UChar text1[50]; 1667 UBool result; 1668 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 1669 status = U_ZERO_ERROR; 1670 re = uregex_openC("rx", 0, NULL, &status); 1671 1672 uregex_setText(re, text1, -1, &status); 1673 result = uregex_find(re, 0, &status); 1674 TEST_ASSERT(result == TRUE); 1675 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1676 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1677 TEST_ASSERT_SUCCESS(status); 1678 1679 result = uregex_find(re, 9, &status); 1680 TEST_ASSERT(result == TRUE); 1681 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1682 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1683 TEST_ASSERT_SUCCESS(status); 1684 1685 result = uregex_find(re, 14, &status); 1686 TEST_ASSERT(result == FALSE); 1687 TEST_ASSERT_SUCCESS(status); 1688 1689 status = U_ZERO_ERROR; 1690 uregex_reset(re, 0, &status); 1691 1692 result = uregex_findNext(re, &status); 1693 TEST_ASSERT(result == TRUE); 1694 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1695 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1696 TEST_ASSERT_SUCCESS(status); 1697 1698 result = uregex_findNext(re, &status); 1699 TEST_ASSERT(result == TRUE); 1700 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1701 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1702 TEST_ASSERT_SUCCESS(status); 1703 1704 status = U_ZERO_ERROR; 1705 uregex_reset(re, 12, &status); 1706 1707 result = uregex_findNext(re, &status); 1708 TEST_ASSERT(result == TRUE); 1709 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1710 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1711 TEST_ASSERT_SUCCESS(status); 1712 1713 result = uregex_findNext(re, &status); 1714 TEST_ASSERT(result == FALSE); 1715 TEST_ASSERT_SUCCESS(status); 1716 1717 uregex_close(re); 1718 } 1719 1720 /* 1721 * group() 1722 */ 1723 { 1724 UChar text1[80]; 1725 UText *actual; 1726 UBool result; 1727 1728 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */ 1729 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ 1730 1731 1732 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 1733 1734 status = U_ZERO_ERROR; 1735 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1736 TEST_ASSERT_SUCCESS(status); 1737 1738 uregex_setText(re, text1, -1, &status); 1739 result = uregex_find(re, 0, &status); 1740 TEST_ASSERT(result==TRUE); 1741 1742 /* Capture Group 0, the full match. Should succeed. */ 1743 status = U_ZERO_ERROR; 1744 actual = uregex_groupUText(re, 0, NULL, &status); 1745 TEST_ASSERT_SUCCESS(status); 1746 TEST_ASSERT_UTEXT(str_abcinteriordef, actual); 1747 utext_close(actual); 1748 1749 /* Capture group #1. Should succeed. */ 1750 status = U_ZERO_ERROR; 1751 actual = uregex_groupUText(re, 1, NULL, &status); 1752 TEST_ASSERT_SUCCESS(status); 1753 TEST_ASSERT_UTEXT(str_interior, actual); 1754 utext_close(actual); 1755 1756 /* Capture group out of range. Error. */ 1757 status = U_ZERO_ERROR; 1758 actual = uregex_groupUText(re, 2, NULL, &status); 1759 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1760 TEST_ASSERT(utext_nativeLength(actual) == 0); 1761 utext_close(actual); 1762 1763 uregex_close(re); 1764 1765 } 1766 1767 /* 1768 * replaceFirst() 1769 */ 1770 { 1771 UChar text1[80]; 1772 UChar text2[80]; 1773 UText replText = UTEXT_INITIALIZER; 1774 UText *result; 1775 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1776 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1777 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ 1778 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1779 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1780 status = U_ZERO_ERROR; 1781 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1782 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1783 utext_openUTF8(&replText, str_1x, -1, &status); 1784 1785 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1786 TEST_ASSERT_SUCCESS(status); 1787 1788 /* Normal case, with match */ 1789 uregex_setText(re, text1, -1, &status); 1790 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1791 TEST_ASSERT_SUCCESS(status); 1792 TEST_ASSERT_UTEXT(str_Replxxx, result); 1793 utext_close(result); 1794 1795 /* No match. Text should copy to output with no changes. */ 1796 uregex_setText(re, text2, -1, &status); 1797 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1798 TEST_ASSERT_SUCCESS(status); 1799 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1800 utext_close(result); 1801 1802 /* Unicode escapes */ 1803 uregex_setText(re, text1, -1, &status); 1804 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1805 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1806 TEST_ASSERT_SUCCESS(status); 1807 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1808 utext_close(result); 1809 1810 uregex_close(re); 1811 utext_close(&replText); 1812 } 1813 1814 1815 /* 1816 * replaceAll() 1817 */ 1818 { 1819 UChar text1[80]; 1820 UChar text2[80]; 1821 UText replText = UTEXT_INITIALIZER; 1822 UText *result; 1823 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1824 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1825 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1826 status = U_ZERO_ERROR; 1827 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1828 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1829 utext_openUTF8(&replText, str_1, -1, &status); 1830 1831 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1832 TEST_ASSERT_SUCCESS(status); 1833 1834 /* Normal case, with match */ 1835 uregex_setText(re, text1, -1, &status); 1836 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1837 TEST_ASSERT_SUCCESS(status); 1838 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1839 utext_close(result); 1840 1841 /* No match. Text should copy to output with no changes. */ 1842 uregex_setText(re, text2, -1, &status); 1843 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1844 TEST_ASSERT_SUCCESS(status); 1845 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1846 utext_close(result); 1847 1848 uregex_close(re); 1849 utext_close(&replText); 1850 } 1851 1852 1853 /* 1854 * appendReplacement() 1855 */ 1856 { 1857 UChar text[100]; 1858 UChar repl[100]; 1859 UChar buf[100]; 1860 UChar *bufPtr; 1861 int32_t bufCap; 1862 1863 status = U_ZERO_ERROR; 1864 re = uregex_openC(".*", 0, 0, &status); 1865 TEST_ASSERT_SUCCESS(status); 1866 1867 u_uastrncpy(text, "whatever", sizeof(text)/2); 1868 u_uastrncpy(repl, "some other", sizeof(repl)/2); 1869 uregex_setText(re, text, -1, &status); 1870 1871 /* match covers whole target string */ 1872 uregex_find(re, 0, &status); 1873 TEST_ASSERT_SUCCESS(status); 1874 bufPtr = buf; 1875 bufCap = sizeof(buf) / 2; 1876 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1877 TEST_ASSERT_SUCCESS(status); 1878 TEST_ASSERT_STRING("some other", buf, TRUE); 1879 1880 /* Match has \u \U escapes */ 1881 uregex_find(re, 0, &status); 1882 TEST_ASSERT_SUCCESS(status); 1883 bufPtr = buf; 1884 bufCap = sizeof(buf) / 2; 1885 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 1886 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1887 TEST_ASSERT_SUCCESS(status); 1888 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1889 1890 uregex_close(re); 1891 } 1892 1893 1894 /* 1895 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1896 */ 1897 1898 /* 1899 * splitUText() 1900 */ 1901 { 1902 UChar textToSplit[80]; 1903 UChar text2[80]; 1904 UText *fields[10]; 1905 int32_t numFields; 1906 int32_t i; 1907 1908 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1909 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1910 1911 status = U_ZERO_ERROR; 1912 re = uregex_openC(":", 0, NULL, &status); 1913 1914 1915 /* Simple split */ 1916 1917 uregex_setText(re, textToSplit, -1, &status); 1918 TEST_ASSERT_SUCCESS(status); 1919 1920 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1921 if (U_SUCCESS(status)) { 1922 memset(fields, 0, sizeof(fields)); 1923 numFields = uregex_splitUText(re, fields, 10, &status); 1924 TEST_ASSERT_SUCCESS(status); 1925 1926 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1927 if(U_SUCCESS(status)) { 1928 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1929 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1930 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1931 TEST_ASSERT(numFields == 3); 1932 TEST_ASSERT_UTEXT(str_first, fields[0]); 1933 TEST_ASSERT_UTEXT(str_second, fields[1]); 1934 TEST_ASSERT_UTEXT(str_third, fields[2]); 1935 TEST_ASSERT(fields[3] == NULL); 1936 } 1937 for(i = 0; i < numFields; i++) { 1938 utext_close(fields[i]); 1939 } 1940 } 1941 1942 uregex_close(re); 1943 1944 1945 /* Split with too few output strings available */ 1946 status = U_ZERO_ERROR; 1947 re = uregex_openC(":", 0, NULL, &status); 1948 uregex_setText(re, textToSplit, -1, &status); 1949 TEST_ASSERT_SUCCESS(status); 1950 1951 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1952 if(U_SUCCESS(status)) { 1953 fields[0] = NULL; 1954 fields[1] = NULL; 1955 fields[2] = &patternText; 1956 numFields = uregex_splitUText(re, fields, 2, &status); 1957 TEST_ASSERT_SUCCESS(status); 1958 1959 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1960 if(U_SUCCESS(status)) { 1961 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 1962 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 1963 TEST_ASSERT(numFields == 2); 1964 TEST_ASSERT_UTEXT(str_first, fields[0]); 1965 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 1966 TEST_ASSERT(fields[2] == &patternText); 1967 } 1968 for(i = 0; i < numFields; i++) { 1969 utext_close(fields[i]); 1970 } 1971 } 1972 1973 uregex_close(re); 1974 } 1975 1976 /* splitUText(), part 2. Patterns with capture groups. The capture group text 1977 * comes out as additional fields. */ 1978 { 1979 UChar textToSplit[80]; 1980 UText *fields[10]; 1981 int32_t numFields; 1982 int32_t i; 1983 1984 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1985 1986 status = U_ZERO_ERROR; 1987 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1988 1989 uregex_setText(re, textToSplit, -1, &status); 1990 TEST_ASSERT_SUCCESS(status); 1991 1992 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1993 if(U_SUCCESS(status)) { 1994 memset(fields, 0, sizeof(fields)); 1995 numFields = uregex_splitUText(re, fields, 10, &status); 1996 TEST_ASSERT_SUCCESS(status); 1997 1998 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1999 if(U_SUCCESS(status)) { 2000 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2001 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2002 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2003 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2004 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2005 2006 TEST_ASSERT(numFields == 5); 2007 TEST_ASSERT_UTEXT(str_first, fields[0]); 2008 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2009 TEST_ASSERT_UTEXT(str_second, fields[2]); 2010 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2011 TEST_ASSERT_UTEXT(str_third, fields[4]); 2012 TEST_ASSERT(fields[5] == NULL); 2013 } 2014 for(i = 0; i < numFields; i++) { 2015 utext_close(fields[i]); 2016 } 2017 } 2018 2019 /* Split with too few output strings available (2) */ 2020 status = U_ZERO_ERROR; 2021 fields[0] = NULL; 2022 fields[1] = NULL; 2023 fields[2] = &patternText; 2024 numFields = uregex_splitUText(re, fields, 2, &status); 2025 TEST_ASSERT_SUCCESS(status); 2026 2027 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2028 if(U_SUCCESS(status)) { 2029 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2030 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2031 TEST_ASSERT(numFields == 2); 2032 TEST_ASSERT_UTEXT(str_first, fields[0]); 2033 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2034 TEST_ASSERT(fields[2] == &patternText); 2035 } 2036 for(i = 0; i < numFields; i++) { 2037 utext_close(fields[i]); 2038 } 2039 2040 2041 /* Split with too few output strings available (3) */ 2042 status = U_ZERO_ERROR; 2043 fields[0] = NULL; 2044 fields[1] = NULL; 2045 fields[2] = NULL; 2046 fields[3] = &patternText; 2047 numFields = uregex_splitUText(re, fields, 3, &status); 2048 TEST_ASSERT_SUCCESS(status); 2049 2050 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2051 if(U_SUCCESS(status)) { 2052 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2053 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2054 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2055 TEST_ASSERT(numFields == 3); 2056 TEST_ASSERT_UTEXT(str_first, fields[0]); 2057 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2058 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2059 TEST_ASSERT(fields[3] == &patternText); 2060 } 2061 for(i = 0; i < numFields; i++) { 2062 utext_close(fields[i]); 2063 } 2064 2065 /* Split with just enough output strings available (5) */ 2066 status = U_ZERO_ERROR; 2067 fields[0] = NULL; 2068 fields[1] = NULL; 2069 fields[2] = NULL; 2070 fields[3] = NULL; 2071 fields[4] = NULL; 2072 fields[5] = &patternText; 2073 numFields = uregex_splitUText(re, fields, 5, &status); 2074 TEST_ASSERT_SUCCESS(status); 2075 2076 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2077 if(U_SUCCESS(status)) { 2078 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2079 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2080 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2081 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2082 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2083 2084 TEST_ASSERT(numFields == 5); 2085 TEST_ASSERT_UTEXT(str_first, fields[0]); 2086 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2087 TEST_ASSERT_UTEXT(str_second, fields[2]); 2088 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2089 TEST_ASSERT_UTEXT(str_third, fields[4]); 2090 TEST_ASSERT(fields[5] == &patternText); 2091 } 2092 for(i = 0; i < numFields; i++) { 2093 utext_close(fields[i]); 2094 } 2095 2096 /* Split, end of text is a field delimiter. */ 2097 status = U_ZERO_ERROR; 2098 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2099 TEST_ASSERT_SUCCESS(status); 2100 2101 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2102 if(U_SUCCESS(status)) { 2103 memset(fields, 0, sizeof(fields)); 2104 fields[9] = &patternText; 2105 numFields = uregex_splitUText(re, fields, 9, &status); 2106 TEST_ASSERT_SUCCESS(status); 2107 2108 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2109 if(U_SUCCESS(status)) { 2110 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2111 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2112 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2113 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2114 2115 TEST_ASSERT(numFields == 4); 2116 TEST_ASSERT_UTEXT(str_first, fields[0]); 2117 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2118 TEST_ASSERT_UTEXT(str_second, fields[2]); 2119 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2120 TEST_ASSERT(fields[4] == NULL); 2121 TEST_ASSERT(fields[8] == NULL); 2122 TEST_ASSERT(fields[9] == &patternText); 2123 } 2124 for(i = 0; i < numFields; i++) { 2125 utext_close(fields[i]); 2126 } 2127 } 2128 2129 uregex_close(re); 2130 } 2131 utext_close(&patternText); 2132 } 2133 2134 /* BEGIN android-added 2135 Removed this function after Android upgrade to ICU4.6. 2136 */ 2137 static void TestRefreshInput(void) { 2138 /* 2139 * RefreshInput changes out the input of a URegularExpression without 2140 * changing anything else in the match state. Used with Java JNI, 2141 * when Java moves the underlying string storage. This test 2142 * runs a find() loop, moving the text after the first match. 2143 * The right number of matches should still be found. 2144 */ 2145 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2146 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2147 UErrorCode status = U_ZERO_ERROR; 2148 URegularExpression *re; 2149 UText ut1 = UTEXT_INITIALIZER; 2150 UText ut2 = UTEXT_INITIALIZER; 2151 2152 re = uregex_openC("[ABC]", 0, 0, &status); 2153 TEST_ASSERT_SUCCESS(status); 2154 2155 utext_openUChars(&ut1, testStr, -1, &status); 2156 TEST_ASSERT_SUCCESS(status); 2157 uregex_setUText(re, &ut1, &status); 2158 TEST_ASSERT_SUCCESS(status); 2159 2160 /* Find the first match "A" in the original string */ 2161 TEST_ASSERT(uregex_findNext(re, &status)); 2162 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2163 2164 /* Move the string, kill the original string. */ 2165 u_strcpy(movedStr, testStr); 2166 u_memset(testStr, 0, u_strlen(testStr)); 2167 utext_openUChars(&ut2, movedStr, -1, &status); 2168 TEST_ASSERT_SUCCESS(status); 2169 uregex_refreshUText(re, &ut2, &status); 2170 TEST_ASSERT_SUCCESS(status); 2171 2172 /* Find the following two matches, now working in the moved string. */ 2173 TEST_ASSERT(uregex_findNext(re, &status)); 2174 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2175 TEST_ASSERT(uregex_findNext(re, &status)); 2176 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2177 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2178 2179 uregex_close(re); 2180 } 2181 /* END android-addedd */ 2182 2183 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2184