1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File reapits.c 9 * 10 *********************************************************************************/ 11 /*C API TEST FOR Regular Expressions */ 12 /** 13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14 * try to test the full functionality. It just calls each function and verifies that it 15 * works on a basic level. 16 * 17 * More complete testing of regular expression functionality is done with the C++ tests. 18 **/ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24 #include <stdlib.h> 25 #include <string.h> 26 #include "unicode/uloc.h" 27 #include "unicode/uregex.h" 28 #include "unicode/ustring.h" 29 #include "unicode/utext.h" 30 #include "cintltst.h" 31 32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 34 35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} 37 38 /* 39 * TEST_SETUP and TEST_TEARDOWN 40 * macros to handle the boilerplate around setting up regex test cases. 41 * parameteres to setup: 42 * pattern: The regex pattern, a (char *) null terminated C string. 43 * testString: The string data, also a (char *) C string. 44 * flags: Regex flags to set when compiling the pattern 45 * 46 * Put arbitrary test code between SETUP and TEARDOWN. 47 * 're" is the compiled, ready-to-go regular expression. 48 */ 49 #define TEST_SETUP(pattern, testString, flags) { \ 50 UChar *srcString = NULL; \ 51 status = U_ZERO_ERROR; \ 52 re = uregex_openC(pattern, flags, NULL, &status); \ 53 TEST_ASSERT_SUCCESS(status); \ 54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 55 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 56 uregex_setText(re, srcString, -1, &status); \ 57 TEST_ASSERT_SUCCESS(status); \ 58 if (U_SUCCESS(status)) { 59 60 #define TEST_TEARDOWN \ 61 } \ 62 TEST_ASSERT_SUCCESS(status); \ 63 uregex_close(re); \ 64 free(srcString); \ 65 } 66 67 68 /** 69 * @param expected utf-8 array of bytes to be expected 70 */ 71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 72 char buf_inside_macro[120]; 73 int32_t len = (int32_t)strlen(expected); 74 UBool success; 75 if (nulTerm) { 76 u_austrncpy(buf_inside_macro, (actual), len+1); 77 buf_inside_macro[len+2] = 0; 78 success = (strcmp((expected), buf_inside_macro) == 0); 79 } else { 80 u_austrncpy(buf_inside_macro, (actual), len); 81 buf_inside_macro[len+1] = 0; 82 success = (strncmp((expected), buf_inside_macro, len) == 0); 83 } 84 if (success == FALSE) { 85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 86 file, line, (expected), buf_inside_macro); 87 } 88 } 89 90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 91 92 93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 94 UErrorCode status = U_ZERO_ERROR; 95 UText expectedText = UTEXT_INITIALIZER; 96 utext_openUTF8(&expectedText, expected, -1, &status); 97 utext_setNativeIndex(actual, 0); 98 if (utext_compare(&expectedText, -1, actual, -1) != 0) { 99 UChar32 c; 100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 101 c = utext_next32From(actual, 0); 102 while (c != U_SENTINEL) { 103 if (0x20<c && c <0x7e) { 104 log_err("%c", c); 105 } else { 106 log_err("%#x", c); 107 } 108 c = UTEXT_NEXT32(actual); 109 } 110 log_err("\"\n"); 111 } 112 utext_close(&expectedText); 113 } 114 115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 116 117 118 119 static void TestRegexCAPI(void); 120 static void TestBug4315(void); 121 static void TestUTextAPI(void); 122 123 void addURegexTest(TestNode** root); 124 125 void addURegexTest(TestNode** root) 126 { 127 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 128 addTest(root, &TestBug4315, "regex/TestBug4315"); 129 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 130 } 131 132 /* 133 * Call back function and context struct used for testing 134 * regular expression user callbacks. This test is mostly the same as 135 * the corresponding C++ test in intltest. 136 */ 137 typedef struct callBackContext { 138 int32_t maxCalls; 139 int32_t numCalls; 140 int32_t lastSteps; 141 } callBackContext; 142 143 static UBool U_EXPORT2 U_CALLCONV 144 TestCallbackFn(const void *context, int32_t steps) { 145 callBackContext *info = (callBackContext *)context; 146 if (info->lastSteps+1 != steps) { 147 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 148 } 149 info->lastSteps = steps; 150 info->numCalls++; 151 return (info->numCalls < info->maxCalls); 152 } 153 154 /* 155 * Regular Expression C API Tests 156 */ 157 static void TestRegexCAPI(void) { 158 UErrorCode status = U_ZERO_ERROR; 159 URegularExpression *re; 160 UChar pat[200]; 161 UChar *minus1; 162 163 memset(&minus1, -1, sizeof(minus1)); 164 165 /* Mimimalist open/close */ 166 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 167 re = uregex_open(pat, -1, 0, 0, &status); 168 if (U_FAILURE(status)) { 169 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 170 return; 171 } 172 uregex_close(re); 173 174 /* Open with all flag values set */ 175 status = U_ZERO_ERROR; 176 re = uregex_open(pat, -1, 177 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 178 0, &status); 179 TEST_ASSERT_SUCCESS(status); 180 uregex_close(re); 181 182 /* Open with an invalid flag */ 183 status = U_ZERO_ERROR; 184 re = uregex_open(pat, -1, 0x40000000, 0, &status); 185 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 186 uregex_close(re); 187 188 /* Open with an unimplemented flag */ 189 status = U_ZERO_ERROR; 190 re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status); 191 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 192 uregex_close(re); 193 194 /* openC with an invalid parameter */ 195 status = U_ZERO_ERROR; 196 re = uregex_openC(NULL, 197 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 198 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 199 200 /* openC with an invalid parameter */ 201 status = U_USELESS_COLLATOR_ERROR; 202 re = uregex_openC(NULL, 203 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 204 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 205 206 /* openC open from a C string */ 207 { 208 const UChar *p; 209 int32_t len; 210 status = U_ZERO_ERROR; 211 re = uregex_openC("abc*", 0, 0, &status); 212 TEST_ASSERT_SUCCESS(status); 213 p = uregex_pattern(re, &len, &status); 214 TEST_ASSERT_SUCCESS(status); 215 216 /* The TEST_ASSERT_SUCCESS above should change too... */ 217 if(U_SUCCESS(status)) { 218 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 219 TEST_ASSERT(u_strcmp(pat, p) == 0); 220 TEST_ASSERT(len==(int32_t)strlen("abc*")); 221 } 222 223 uregex_close(re); 224 225 /* TODO: Open with ParseError parameter */ 226 } 227 228 /* 229 * clone 230 */ 231 { 232 URegularExpression *clone1; 233 URegularExpression *clone2; 234 URegularExpression *clone3; 235 UChar testString1[30]; 236 UChar testString2[30]; 237 UBool result; 238 239 240 status = U_ZERO_ERROR; 241 re = uregex_openC("abc*", 0, 0, &status); 242 TEST_ASSERT_SUCCESS(status); 243 clone1 = uregex_clone(re, &status); 244 TEST_ASSERT_SUCCESS(status); 245 TEST_ASSERT(clone1 != NULL); 246 247 status = U_ZERO_ERROR; 248 clone2 = uregex_clone(re, &status); 249 TEST_ASSERT_SUCCESS(status); 250 TEST_ASSERT(clone2 != NULL); 251 uregex_close(re); 252 253 status = U_ZERO_ERROR; 254 clone3 = uregex_clone(clone2, &status); 255 TEST_ASSERT_SUCCESS(status); 256 TEST_ASSERT(clone3 != NULL); 257 258 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 259 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 260 261 status = U_ZERO_ERROR; 262 uregex_setText(clone1, testString1, -1, &status); 263 TEST_ASSERT_SUCCESS(status); 264 result = uregex_lookingAt(clone1, 0, &status); 265 TEST_ASSERT_SUCCESS(status); 266 TEST_ASSERT(result==TRUE); 267 268 status = U_ZERO_ERROR; 269 uregex_setText(clone2, testString2, -1, &status); 270 TEST_ASSERT_SUCCESS(status); 271 result = uregex_lookingAt(clone2, 0, &status); 272 TEST_ASSERT_SUCCESS(status); 273 TEST_ASSERT(result==FALSE); 274 result = uregex_find(clone2, 0, &status); 275 TEST_ASSERT_SUCCESS(status); 276 TEST_ASSERT(result==TRUE); 277 278 uregex_close(clone1); 279 uregex_close(clone2); 280 uregex_close(clone3); 281 282 } 283 284 /* 285 * pattern() 286 */ 287 { 288 const UChar *resultPat; 289 int32_t resultLen; 290 u_uastrncpy(pat, "hello", sizeof(pat)/2); 291 status = U_ZERO_ERROR; 292 re = uregex_open(pat, -1, 0, NULL, &status); 293 resultPat = uregex_pattern(re, &resultLen, &status); 294 TEST_ASSERT_SUCCESS(status); 295 296 /* The TEST_ASSERT_SUCCESS above should change too... */ 297 if (U_SUCCESS(status)) { 298 TEST_ASSERT(resultLen == -1); 299 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 300 } 301 302 uregex_close(re); 303 304 status = U_ZERO_ERROR; 305 re = uregex_open(pat, 3, 0, NULL, &status); 306 resultPat = uregex_pattern(re, &resultLen, &status); 307 TEST_ASSERT_SUCCESS(status); 308 TEST_ASSERT_SUCCESS(status); 309 310 /* The TEST_ASSERT_SUCCESS above should change too... */ 311 if (U_SUCCESS(status)) { 312 TEST_ASSERT(resultLen == 3); 313 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 314 TEST_ASSERT(u_strlen(resultPat) == 3); 315 } 316 317 uregex_close(re); 318 } 319 320 /* 321 * flags() 322 */ 323 { 324 int32_t t; 325 326 status = U_ZERO_ERROR; 327 re = uregex_open(pat, -1, 0, NULL, &status); 328 t = uregex_flags(re, &status); 329 TEST_ASSERT_SUCCESS(status); 330 TEST_ASSERT(t == 0); 331 uregex_close(re); 332 333 status = U_ZERO_ERROR; 334 re = uregex_open(pat, -1, 0, NULL, &status); 335 t = uregex_flags(re, &status); 336 TEST_ASSERT_SUCCESS(status); 337 TEST_ASSERT(t == 0); 338 uregex_close(re); 339 340 status = U_ZERO_ERROR; 341 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 342 t = uregex_flags(re, &status); 343 TEST_ASSERT_SUCCESS(status); 344 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 345 uregex_close(re); 346 } 347 348 /* 349 * setText() and lookingAt() 350 */ 351 { 352 UChar text1[50]; 353 UChar text2[50]; 354 UBool result; 355 356 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 357 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 358 status = U_ZERO_ERROR; 359 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 360 re = uregex_open(pat, -1, 0, NULL, &status); 361 TEST_ASSERT_SUCCESS(status); 362 363 /* Operation before doing a setText should fail... */ 364 status = U_ZERO_ERROR; 365 uregex_lookingAt(re, 0, &status); 366 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 367 368 status = U_ZERO_ERROR; 369 uregex_setText(re, text1, -1, &status); 370 result = uregex_lookingAt(re, 0, &status); 371 TEST_ASSERT(result == TRUE); 372 TEST_ASSERT_SUCCESS(status); 373 374 status = U_ZERO_ERROR; 375 uregex_setText(re, text2, -1, &status); 376 result = uregex_lookingAt(re, 0, &status); 377 TEST_ASSERT(result == FALSE); 378 TEST_ASSERT_SUCCESS(status); 379 380 status = U_ZERO_ERROR; 381 uregex_setText(re, text1, -1, &status); 382 result = uregex_lookingAt(re, 0, &status); 383 TEST_ASSERT(result == TRUE); 384 TEST_ASSERT_SUCCESS(status); 385 386 status = U_ZERO_ERROR; 387 uregex_setText(re, text1, 5, &status); 388 result = uregex_lookingAt(re, 0, &status); 389 TEST_ASSERT(result == FALSE); 390 TEST_ASSERT_SUCCESS(status); 391 392 status = U_ZERO_ERROR; 393 uregex_setText(re, text1, 6, &status); 394 result = uregex_lookingAt(re, 0, &status); 395 TEST_ASSERT(result == TRUE); 396 TEST_ASSERT_SUCCESS(status); 397 398 uregex_close(re); 399 } 400 401 402 /* 403 * getText() 404 */ 405 { 406 UChar text1[50]; 407 UChar text2[50]; 408 const UChar *result; 409 int32_t textLength; 410 411 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 412 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 413 status = U_ZERO_ERROR; 414 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 415 re = uregex_open(pat, -1, 0, NULL, &status); 416 417 uregex_setText(re, text1, -1, &status); 418 result = uregex_getText(re, &textLength, &status); 419 TEST_ASSERT(result == text1); 420 TEST_ASSERT(textLength == -1); 421 TEST_ASSERT_SUCCESS(status); 422 423 status = U_ZERO_ERROR; 424 uregex_setText(re, text2, 7, &status); 425 result = uregex_getText(re, &textLength, &status); 426 TEST_ASSERT(result == text2); 427 TEST_ASSERT(textLength == 7); 428 TEST_ASSERT_SUCCESS(status); 429 430 status = U_ZERO_ERROR; 431 uregex_setText(re, text2, 4, &status); 432 result = uregex_getText(re, &textLength, &status); 433 TEST_ASSERT(result == text2); 434 TEST_ASSERT(textLength == 4); 435 TEST_ASSERT_SUCCESS(status); 436 uregex_close(re); 437 } 438 439 /* 440 * matches() 441 */ 442 { 443 UChar text1[50]; 444 UBool result; 445 int len; 446 UChar nullString[] = {0,0,0}; 447 448 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); 449 status = U_ZERO_ERROR; 450 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 451 re = uregex_open(pat, -1, 0, NULL, &status); 452 453 uregex_setText(re, text1, -1, &status); 454 result = uregex_matches(re, 0, &status); 455 TEST_ASSERT(result == FALSE); 456 TEST_ASSERT_SUCCESS(status); 457 458 status = U_ZERO_ERROR; 459 uregex_setText(re, text1, 6, &status); 460 result = uregex_matches(re, 0, &status); 461 TEST_ASSERT(result == TRUE); 462 TEST_ASSERT_SUCCESS(status); 463 464 status = U_ZERO_ERROR; 465 uregex_setText(re, text1, 6, &status); 466 result = uregex_matches(re, 1, &status); 467 TEST_ASSERT(result == FALSE); 468 TEST_ASSERT_SUCCESS(status); 469 uregex_close(re); 470 471 status = U_ZERO_ERROR; 472 re = uregex_openC(".?", 0, NULL, &status); 473 uregex_setText(re, text1, -1, &status); 474 len = u_strlen(text1); 475 result = uregex_matches(re, len, &status); 476 TEST_ASSERT(result == TRUE); 477 TEST_ASSERT_SUCCESS(status); 478 479 status = U_ZERO_ERROR; 480 uregex_setText(re, nullString, -1, &status); 481 TEST_ASSERT_SUCCESS(status); 482 result = uregex_matches(re, 0, &status); 483 TEST_ASSERT(result == TRUE); 484 TEST_ASSERT_SUCCESS(status); 485 uregex_close(re); 486 } 487 488 489 /* 490 * lookingAt() Used in setText test. 491 */ 492 493 494 /* 495 * find(), findNext, start, end, reset 496 */ 497 { 498 UChar text1[50]; 499 UBool result; 500 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 501 status = U_ZERO_ERROR; 502 re = uregex_openC("rx", 0, NULL, &status); 503 504 uregex_setText(re, text1, -1, &status); 505 result = uregex_find(re, 0, &status); 506 TEST_ASSERT(result == TRUE); 507 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 508 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 509 TEST_ASSERT_SUCCESS(status); 510 511 result = uregex_find(re, 9, &status); 512 TEST_ASSERT(result == TRUE); 513 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 514 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 515 TEST_ASSERT_SUCCESS(status); 516 517 result = uregex_find(re, 14, &status); 518 TEST_ASSERT(result == FALSE); 519 TEST_ASSERT_SUCCESS(status); 520 521 status = U_ZERO_ERROR; 522 uregex_reset(re, 0, &status); 523 524 result = uregex_findNext(re, &status); 525 TEST_ASSERT(result == TRUE); 526 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 527 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 528 TEST_ASSERT_SUCCESS(status); 529 530 result = uregex_findNext(re, &status); 531 TEST_ASSERT(result == TRUE); 532 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 533 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 534 TEST_ASSERT_SUCCESS(status); 535 536 status = U_ZERO_ERROR; 537 uregex_reset(re, 12, &status); 538 539 result = uregex_findNext(re, &status); 540 TEST_ASSERT(result == TRUE); 541 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 542 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 543 TEST_ASSERT_SUCCESS(status); 544 545 result = uregex_findNext(re, &status); 546 TEST_ASSERT(result == FALSE); 547 TEST_ASSERT_SUCCESS(status); 548 549 uregex_close(re); 550 } 551 552 /* 553 * groupCount 554 */ 555 { 556 int32_t result; 557 558 status = U_ZERO_ERROR; 559 re = uregex_openC("abc", 0, NULL, &status); 560 result = uregex_groupCount(re, &status); 561 TEST_ASSERT_SUCCESS(status); 562 TEST_ASSERT(result == 0); 563 uregex_close(re); 564 565 status = U_ZERO_ERROR; 566 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 567 result = uregex_groupCount(re, &status); 568 TEST_ASSERT_SUCCESS(status); 569 TEST_ASSERT(result == 3); 570 uregex_close(re); 571 572 } 573 574 575 /* 576 * group() 577 */ 578 { 579 UChar text1[80]; 580 UChar buf[80]; 581 UBool result; 582 int32_t resultSz; 583 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 584 585 status = U_ZERO_ERROR; 586 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 587 TEST_ASSERT_SUCCESS(status); 588 589 590 uregex_setText(re, text1, -1, &status); 591 result = uregex_find(re, 0, &status); 592 TEST_ASSERT(result==TRUE); 593 594 /* Capture Group 0, the full match. Should succeed. */ 595 status = U_ZERO_ERROR; 596 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); 597 TEST_ASSERT_SUCCESS(status); 598 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 599 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 600 601 /* Capture group #1. Should succeed. */ 602 status = U_ZERO_ERROR; 603 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); 604 TEST_ASSERT_SUCCESS(status); 605 TEST_ASSERT_STRING(" interior ", buf, TRUE); 606 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 607 608 /* Capture group out of range. Error. */ 609 status = U_ZERO_ERROR; 610 uregex_group(re, 2, buf, sizeof(buf)/2, &status); 611 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 612 613 /* NULL buffer, pure pre-flight */ 614 status = U_ZERO_ERROR; 615 resultSz = uregex_group(re, 0, NULL, 0, &status); 616 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 617 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 618 619 /* Too small buffer, truncated string */ 620 status = U_ZERO_ERROR; 621 memset(buf, -1, sizeof(buf)); 622 resultSz = uregex_group(re, 0, buf, 5, &status); 623 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 624 TEST_ASSERT_STRING("abc i", buf, FALSE); 625 TEST_ASSERT(buf[5] == (UChar)0xffff); 626 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 627 628 /* Output string just fits buffer, no NUL term. */ 629 status = U_ZERO_ERROR; 630 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 631 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 632 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 633 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 634 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 635 636 uregex_close(re); 637 638 } 639 640 /* 641 * Regions 642 */ 643 644 645 /* SetRegion(), getRegion() do something */ 646 TEST_SETUP(".*", "0123456789ABCDEF", 0) 647 UChar resultString[40]; 648 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 649 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 650 uregex_setRegion(re, 3, 6, &status); 651 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 652 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 653 TEST_ASSERT(uregex_findNext(re, &status)); 654 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) 655 TEST_ASSERT_STRING("345", resultString, TRUE); 656 TEST_TEARDOWN; 657 658 /* find(start=-1) uses regions */ 659 TEST_SETUP(".*", "0123456789ABCDEF", 0); 660 uregex_setRegion(re, 4, 6, &status); 661 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 662 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 663 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 664 TEST_TEARDOWN; 665 666 /* find (start >=0) does not use regions */ 667 TEST_SETUP(".*", "0123456789ABCDEF", 0); 668 uregex_setRegion(re, 4, 6, &status); 669 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 670 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 671 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 672 TEST_TEARDOWN; 673 674 /* findNext() obeys regions */ 675 TEST_SETUP(".", "0123456789ABCDEF", 0); 676 uregex_setRegion(re, 4, 6, &status); 677 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 678 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 679 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 680 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 681 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 682 TEST_TEARDOWN; 683 684 /* matches(start=-1) uses regions */ 685 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 686 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 687 uregex_setRegion(re, 4, 6, &status); 688 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 689 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 690 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 691 TEST_TEARDOWN; 692 693 /* matches (start >=0) does not use regions */ 694 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 695 uregex_setRegion(re, 4, 6, &status); 696 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 697 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 698 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 699 TEST_TEARDOWN; 700 701 /* lookingAt(start=-1) uses regions */ 702 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 703 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 704 uregex_setRegion(re, 4, 6, &status); 705 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 706 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 707 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 708 TEST_TEARDOWN; 709 710 /* lookingAt (start >=0) does not use regions */ 711 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 712 uregex_setRegion(re, 4, 6, &status); 713 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 714 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 715 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 716 TEST_TEARDOWN; 717 718 /* hitEnd() */ 719 TEST_SETUP("[a-f]*", "abcdefghij", 0); 720 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 721 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 722 TEST_TEARDOWN; 723 724 TEST_SETUP("[a-f]*", "abcdef", 0); 725 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 726 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 727 TEST_TEARDOWN; 728 729 /* requireEnd */ 730 TEST_SETUP("abcd", "abcd", 0); 731 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 732 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 733 TEST_TEARDOWN; 734 735 TEST_SETUP("abcd$", "abcd", 0); 736 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 737 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 738 TEST_TEARDOWN; 739 740 /* anchoringBounds */ 741 TEST_SETUP("abc$", "abcdef", 0); 742 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 743 uregex_useAnchoringBounds(re, FALSE, &status); 744 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 745 746 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 747 uregex_useAnchoringBounds(re, TRUE, &status); 748 uregex_setRegion(re, 0, 3, &status); 749 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 750 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 751 TEST_TEARDOWN; 752 753 /* Transparent Bounds */ 754 TEST_SETUP("abc(?=def)", "abcdef", 0); 755 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 756 uregex_useTransparentBounds(re, TRUE, &status); 757 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 758 759 uregex_useTransparentBounds(re, FALSE, &status); 760 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 761 uregex_setRegion(re, 0, 3, &status); 762 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 763 uregex_useTransparentBounds(re, TRUE, &status); 764 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 765 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 766 TEST_TEARDOWN; 767 768 769 /* 770 * replaceFirst() 771 */ 772 { 773 UChar text1[80]; 774 UChar text2[80]; 775 UChar replText[80]; 776 UChar buf[80]; 777 int32_t resultSz; 778 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 779 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 780 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 781 782 status = U_ZERO_ERROR; 783 re = uregex_openC("x(.*?)x", 0, NULL, &status); 784 TEST_ASSERT_SUCCESS(status); 785 786 /* Normal case, with match */ 787 uregex_setText(re, text1, -1, &status); 788 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 789 TEST_ASSERT_SUCCESS(status); 790 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 791 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 792 793 /* No match. Text should copy to output with no changes. */ 794 status = U_ZERO_ERROR; 795 uregex_setText(re, text2, -1, &status); 796 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 797 TEST_ASSERT_SUCCESS(status); 798 TEST_ASSERT_STRING("No match here.", buf, TRUE); 799 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 800 801 /* Match, output just fills buffer, no termination warning. */ 802 status = U_ZERO_ERROR; 803 uregex_setText(re, text1, -1, &status); 804 memset(buf, -1, sizeof(buf)); 805 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 806 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 807 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 808 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 809 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 810 811 /* Do the replaceFirst again, without first resetting anything. 812 * Should give the same results. 813 */ 814 status = U_ZERO_ERROR; 815 memset(buf, -1, sizeof(buf)); 816 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 817 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 818 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 819 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 820 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 821 822 /* NULL buffer, zero buffer length */ 823 status = U_ZERO_ERROR; 824 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 825 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 826 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 827 828 /* Buffer too small by one */ 829 status = U_ZERO_ERROR; 830 memset(buf, -1, sizeof(buf)); 831 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 832 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 833 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 834 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 835 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 836 837 uregex_close(re); 838 } 839 840 841 /* 842 * replaceAll() 843 */ 844 { 845 UChar text1[80]; /* "Replace xaax x1x x...x." */ 846 UChar text2[80]; /* "No match Here" */ 847 UChar replText[80]; /* "<$1>" */ 848 UChar replText2[80]; /* "<<$1>>" */ 849 const char * pattern = "x(.*?)x"; 850 const char * expectedResult = "Replace <aa> <1> <...>."; 851 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 852 UChar buf[80]; 853 int32_t resultSize; 854 int32_t expectedResultSize; 855 int32_t expectedResultSize2; 856 int32_t i; 857 858 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 859 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 860 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 861 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); 862 expectedResultSize = strlen(expectedResult); 863 expectedResultSize2 = strlen(expectedResult2); 864 865 status = U_ZERO_ERROR; 866 re = uregex_openC(pattern, 0, NULL, &status); 867 TEST_ASSERT_SUCCESS(status); 868 869 /* Normal case, with match */ 870 uregex_setText(re, text1, -1, &status); 871 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 872 TEST_ASSERT_SUCCESS(status); 873 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 874 TEST_ASSERT(resultSize == expectedResultSize); 875 876 /* No match. Text should copy to output with no changes. */ 877 status = U_ZERO_ERROR; 878 uregex_setText(re, text2, -1, &status); 879 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 880 TEST_ASSERT_SUCCESS(status); 881 TEST_ASSERT_STRING("No match here.", buf, TRUE); 882 TEST_ASSERT(resultSize == u_strlen(text2)); 883 884 /* Match, output just fills buffer, no termination warning. */ 885 status = U_ZERO_ERROR; 886 uregex_setText(re, text1, -1, &status); 887 memset(buf, -1, sizeof(buf)); 888 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 889 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 890 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 891 TEST_ASSERT(resultSize == expectedResultSize); 892 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 893 894 /* Do the replaceFirst again, without first resetting anything. 895 * Should give the same results. 896 */ 897 status = U_ZERO_ERROR; 898 memset(buf, -1, sizeof(buf)); 899 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 900 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 901 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 902 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 903 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 904 905 /* NULL buffer, zero buffer length */ 906 status = U_ZERO_ERROR; 907 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 908 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 909 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 910 911 /* Buffer too small. Try every size, which will tickle edge cases 912 * in uregex_appendReplacement (used by replaceAll) */ 913 for (i=0; i<expectedResultSize; i++) { 914 char expected[80]; 915 status = U_ZERO_ERROR; 916 memset(buf, -1, sizeof(buf)); 917 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 918 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 919 strcpy(expected, expectedResult); 920 expected[i] = 0; 921 TEST_ASSERT_STRING(expected, buf, FALSE); 922 TEST_ASSERT(resultSize == expectedResultSize); 923 TEST_ASSERT(buf[i] == (UChar)0xffff); 924 } 925 926 /* Buffer too small. Same as previous test, except this time the replacement 927 * text is longer than the match capture group, making the length of the complete 928 * replacement longer than the original string. 929 */ 930 for (i=0; i<expectedResultSize2; i++) { 931 char expected[80]; 932 status = U_ZERO_ERROR; 933 memset(buf, -1, sizeof(buf)); 934 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 935 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 936 strcpy(expected, expectedResult2); 937 expected[i] = 0; 938 TEST_ASSERT_STRING(expected, buf, FALSE); 939 TEST_ASSERT(resultSize == expectedResultSize2); 940 TEST_ASSERT(buf[i] == (UChar)0xffff); 941 } 942 943 944 uregex_close(re); 945 } 946 947 948 /* 949 * appendReplacement() 950 */ 951 { 952 UChar text[100]; 953 UChar repl[100]; 954 UChar buf[100]; 955 UChar *bufPtr; 956 int32_t bufCap; 957 958 959 status = U_ZERO_ERROR; 960 re = uregex_openC(".*", 0, 0, &status); 961 TEST_ASSERT_SUCCESS(status); 962 963 u_uastrncpy(text, "whatever", sizeof(text)/2); 964 u_uastrncpy(repl, "some other", sizeof(repl)/2); 965 uregex_setText(re, text, -1, &status); 966 967 /* match covers whole target string */ 968 uregex_find(re, 0, &status); 969 TEST_ASSERT_SUCCESS(status); 970 bufPtr = buf; 971 bufCap = sizeof(buf) / 2; 972 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 973 TEST_ASSERT_SUCCESS(status); 974 TEST_ASSERT_STRING("some other", buf, TRUE); 975 976 /* Match has \u \U escapes */ 977 uregex_find(re, 0, &status); 978 TEST_ASSERT_SUCCESS(status); 979 bufPtr = buf; 980 bufCap = sizeof(buf) / 2; 981 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 982 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 983 TEST_ASSERT_SUCCESS(status); 984 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 985 986 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 987 status = U_ZERO_ERROR; 988 uregex_find(re, 0, &status); 989 TEST_ASSERT_SUCCESS(status); 990 bufPtr = buf; 991 status = U_BUFFER_OVERFLOW_ERROR; 992 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 993 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 994 995 uregex_close(re); 996 } 997 998 999 /* 1000 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1001 */ 1002 1003 /* 1004 * split() 1005 */ 1006 { 1007 UChar textToSplit[80]; 1008 UChar text2[80]; 1009 UChar buf[200]; 1010 UChar *fields[10]; 1011 int32_t numFields; 1012 int32_t requiredCapacity; 1013 int32_t spaceNeeded; 1014 int32_t sz; 1015 1016 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1017 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1018 1019 status = U_ZERO_ERROR; 1020 re = uregex_openC(":", 0, NULL, &status); 1021 1022 1023 /* Simple split */ 1024 1025 uregex_setText(re, textToSplit, -1, &status); 1026 TEST_ASSERT_SUCCESS(status); 1027 1028 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1029 if (U_SUCCESS(status)) { 1030 memset(fields, -1, sizeof(fields)); 1031 numFields = 1032 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1033 TEST_ASSERT_SUCCESS(status); 1034 1035 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1036 if(U_SUCCESS(status)) { 1037 TEST_ASSERT(numFields == 3); 1038 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1039 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1040 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1041 TEST_ASSERT(fields[3] == NULL); 1042 1043 spaceNeeded = u_strlen(textToSplit) - 1044 (numFields - 1) + /* Field delimiters do not appear in output */ 1045 numFields; /* Each field gets a NUL terminator */ 1046 1047 TEST_ASSERT(spaceNeeded == requiredCapacity); 1048 } 1049 } 1050 1051 uregex_close(re); 1052 1053 1054 /* Split with too few output strings available */ 1055 status = U_ZERO_ERROR; 1056 re = uregex_openC(":", 0, NULL, &status); 1057 uregex_setText(re, textToSplit, -1, &status); 1058 TEST_ASSERT_SUCCESS(status); 1059 1060 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1061 if(U_SUCCESS(status)) { 1062 memset(fields, -1, sizeof(fields)); 1063 numFields = 1064 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1065 TEST_ASSERT_SUCCESS(status); 1066 1067 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1068 if(U_SUCCESS(status)) { 1069 TEST_ASSERT(numFields == 2); 1070 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1071 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1072 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1073 1074 spaceNeeded = u_strlen(textToSplit) - 1075 (numFields - 1) + /* Field delimiters do not appear in output */ 1076 numFields; /* Each field gets a NUL terminator */ 1077 1078 TEST_ASSERT(spaceNeeded == requiredCapacity); 1079 1080 /* Split with a range of output buffer sizes. */ 1081 spaceNeeded = u_strlen(textToSplit) - 1082 (numFields - 1) + /* Field delimiters do not appear in output */ 1083 numFields; /* Each field gets a NUL terminator */ 1084 1085 for (sz=0; sz < spaceNeeded+1; sz++) { 1086 memset(fields, -1, sizeof(fields)); 1087 status = U_ZERO_ERROR; 1088 numFields = 1089 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1090 if (sz >= spaceNeeded) { 1091 TEST_ASSERT_SUCCESS(status); 1092 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1093 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1094 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1095 } else { 1096 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1097 } 1098 TEST_ASSERT(numFields == 3); 1099 TEST_ASSERT(fields[3] == NULL); 1100 TEST_ASSERT(spaceNeeded == requiredCapacity); 1101 } 1102 } 1103 } 1104 1105 uregex_close(re); 1106 } 1107 1108 1109 1110 1111 /* Split(), part 2. Patterns with capture groups. The capture group text 1112 * comes out as additional fields. */ 1113 { 1114 UChar textToSplit[80]; 1115 UChar buf[200]; 1116 UChar *fields[10]; 1117 int32_t numFields; 1118 int32_t requiredCapacity; 1119 int32_t spaceNeeded; 1120 int32_t sz; 1121 1122 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1123 1124 status = U_ZERO_ERROR; 1125 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1126 1127 uregex_setText(re, textToSplit, -1, &status); 1128 TEST_ASSERT_SUCCESS(status); 1129 1130 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1131 if(U_SUCCESS(status)) { 1132 memset(fields, -1, sizeof(fields)); 1133 numFields = 1134 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1135 TEST_ASSERT_SUCCESS(status); 1136 1137 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1138 if(U_SUCCESS(status)) { 1139 TEST_ASSERT(numFields == 5); 1140 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1141 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1142 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1143 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1144 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1145 TEST_ASSERT(fields[5] == NULL); 1146 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1147 TEST_ASSERT(spaceNeeded == requiredCapacity); 1148 } 1149 } 1150 1151 /* Split with too few output strings available (2) */ 1152 status = U_ZERO_ERROR; 1153 memset(fields, -1, sizeof(fields)); 1154 numFields = 1155 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1156 TEST_ASSERT_SUCCESS(status); 1157 1158 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1159 if(U_SUCCESS(status)) { 1160 TEST_ASSERT(numFields == 2); 1161 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1162 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1163 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1164 1165 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1166 TEST_ASSERT(spaceNeeded == requiredCapacity); 1167 } 1168 1169 /* Split with too few output strings available (3) */ 1170 status = U_ZERO_ERROR; 1171 memset(fields, -1, sizeof(fields)); 1172 numFields = 1173 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); 1174 TEST_ASSERT_SUCCESS(status); 1175 1176 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1177 if(U_SUCCESS(status)) { 1178 TEST_ASSERT(numFields == 3); 1179 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1180 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1181 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1182 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1183 1184 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1185 TEST_ASSERT(spaceNeeded == requiredCapacity); 1186 } 1187 1188 /* Split with just enough output strings available (5) */ 1189 status = U_ZERO_ERROR; 1190 memset(fields, -1, sizeof(fields)); 1191 numFields = 1192 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); 1193 TEST_ASSERT_SUCCESS(status); 1194 1195 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1196 if(U_SUCCESS(status)) { 1197 TEST_ASSERT(numFields == 5); 1198 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1199 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1200 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1201 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1202 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1203 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1204 1205 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1206 TEST_ASSERT(spaceNeeded == requiredCapacity); 1207 } 1208 1209 /* Split, end of text is a field delimiter. */ 1210 status = U_ZERO_ERROR; 1211 sz = strlen("first <tag-a> second<tag-b>"); 1212 uregex_setText(re, textToSplit, sz, &status); 1213 TEST_ASSERT_SUCCESS(status); 1214 1215 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1216 if(U_SUCCESS(status)) { 1217 memset(fields, -1, sizeof(fields)); 1218 numFields = 1219 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); 1220 TEST_ASSERT_SUCCESS(status); 1221 1222 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1223 if(U_SUCCESS(status)) { 1224 TEST_ASSERT(numFields == 4); 1225 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1227 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1228 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1229 TEST_ASSERT(fields[4] == NULL); 1230 TEST_ASSERT(fields[8] == NULL); 1231 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1232 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */ 1233 TEST_ASSERT(spaceNeeded == requiredCapacity); 1234 } 1235 } 1236 1237 uregex_close(re); 1238 } 1239 1240 /* 1241 * set/getTimeLimit 1242 */ 1243 TEST_SETUP("abc$", "abcdef", 0); 1244 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1245 uregex_setTimeLimit(re, 1000, &status); 1246 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1247 TEST_ASSERT_SUCCESS(status); 1248 uregex_setTimeLimit(re, -1, &status); 1249 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1250 status = U_ZERO_ERROR; 1251 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1252 TEST_TEARDOWN; 1253 1254 /* 1255 * set/get Stack Limit 1256 */ 1257 TEST_SETUP("abc$", "abcdef", 0); 1258 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1259 uregex_setStackLimit(re, 40000, &status); 1260 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1261 TEST_ASSERT_SUCCESS(status); 1262 uregex_setStackLimit(re, -1, &status); 1263 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1264 status = U_ZERO_ERROR; 1265 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1266 TEST_TEARDOWN; 1267 1268 1269 /* 1270 * Get/Set callback functions 1271 * This test is copied from intltest regex/Callbacks 1272 * The pattern and test data will run long enough to cause the callback 1273 * to be invoked. The nested '+' operators give exponential time 1274 * behavior with increasing string length. 1275 */ 1276 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1277 callBackContext cbInfo = {4, 0, 0}; 1278 const void *pContext = &cbInfo; 1279 URegexMatchCallback *returnedFn = &TestCallbackFn; 1280 1281 /* Getting the callback fn when it hasn't been set must return NULL */ 1282 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1283 TEST_ASSERT_SUCCESS(status); 1284 TEST_ASSERT(returnedFn == NULL); 1285 TEST_ASSERT(pContext == NULL); 1286 1287 /* Set thecallback and do a match. */ 1288 /* The callback function should record that it has been called. */ 1289 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1290 TEST_ASSERT_SUCCESS(status); 1291 TEST_ASSERT(cbInfo.numCalls == 0); 1292 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1293 TEST_ASSERT_SUCCESS(status); 1294 TEST_ASSERT(cbInfo.numCalls > 0); 1295 1296 /* Getting the callback should return the values that were set above. */ 1297 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1298 TEST_ASSERT(returnedFn == &TestCallbackFn); 1299 TEST_ASSERT(pContext == &cbInfo); 1300 1301 TEST_TEARDOWN; 1302 } 1303 1304 1305 1306 static void TestBug4315(void) { 1307 UErrorCode theICUError = U_ZERO_ERROR; 1308 URegularExpression *theRegEx; 1309 UChar *textBuff; 1310 const char *thePattern; 1311 UChar theString[100]; 1312 UChar *destFields[24]; 1313 int32_t neededLength1; 1314 int32_t neededLength2; 1315 1316 int32_t wordCount = 0; 1317 int32_t destFieldsSize = 24; 1318 1319 thePattern = "ck "; 1320 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1321 1322 /* open a regex */ 1323 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1324 TEST_ASSERT_SUCCESS(theICUError); 1325 1326 /* set the input string */ 1327 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1328 TEST_ASSERT_SUCCESS(theICUError); 1329 1330 /* split */ 1331 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1332 * error occurs! */ 1333 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1334 destFieldsSize, &theICUError); 1335 1336 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1337 TEST_ASSERT(wordCount==3); 1338 1339 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1340 { 1341 theICUError = U_ZERO_ERROR; 1342 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1343 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1344 destFields, destFieldsSize, &theICUError); 1345 TEST_ASSERT(wordCount==3); 1346 TEST_ASSERT_SUCCESS(theICUError); 1347 TEST_ASSERT(neededLength1 == neededLength2); 1348 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1349 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1350 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1351 TEST_ASSERT(destFields[3] == NULL); 1352 free(textBuff); 1353 } 1354 uregex_close(theRegEx); 1355 } 1356 1357 /* Based on TestRegexCAPI() */ 1358 static void TestUTextAPI(void) { 1359 UErrorCode status = U_ZERO_ERROR; 1360 URegularExpression *re; 1361 UText patternText = UTEXT_INITIALIZER; 1362 UChar pat[200]; 1363 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1364 1365 /* Mimimalist open/close */ 1366 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1367 re = uregex_openUText(&patternText, 0, 0, &status); 1368 if (U_FAILURE(status)) { 1369 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1370 utext_close(&patternText); 1371 return; 1372 } 1373 uregex_close(re); 1374 1375 /* Open with all flag values set */ 1376 status = U_ZERO_ERROR; 1377 re = uregex_openUText(&patternText, 1378 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1379 0, &status); 1380 TEST_ASSERT_SUCCESS(status); 1381 uregex_close(re); 1382 1383 /* Open with an invalid flag */ 1384 status = U_ZERO_ERROR; 1385 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1386 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1387 uregex_close(re); 1388 1389 /* open with an invalid parameter */ 1390 status = U_ZERO_ERROR; 1391 re = uregex_openUText(NULL, 1392 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1393 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1394 1395 /* 1396 * clone 1397 */ 1398 { 1399 URegularExpression *clone1; 1400 URegularExpression *clone2; 1401 URegularExpression *clone3; 1402 UChar testString1[30]; 1403 UChar testString2[30]; 1404 UBool result; 1405 1406 1407 status = U_ZERO_ERROR; 1408 re = uregex_openUText(&patternText, 0, 0, &status); 1409 TEST_ASSERT_SUCCESS(status); 1410 clone1 = uregex_clone(re, &status); 1411 TEST_ASSERT_SUCCESS(status); 1412 TEST_ASSERT(clone1 != NULL); 1413 1414 status = U_ZERO_ERROR; 1415 clone2 = uregex_clone(re, &status); 1416 TEST_ASSERT_SUCCESS(status); 1417 TEST_ASSERT(clone2 != NULL); 1418 uregex_close(re); 1419 1420 status = U_ZERO_ERROR; 1421 clone3 = uregex_clone(clone2, &status); 1422 TEST_ASSERT_SUCCESS(status); 1423 TEST_ASSERT(clone3 != NULL); 1424 1425 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 1426 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 1427 1428 status = U_ZERO_ERROR; 1429 uregex_setText(clone1, testString1, -1, &status); 1430 TEST_ASSERT_SUCCESS(status); 1431 result = uregex_lookingAt(clone1, 0, &status); 1432 TEST_ASSERT_SUCCESS(status); 1433 TEST_ASSERT(result==TRUE); 1434 1435 status = U_ZERO_ERROR; 1436 uregex_setText(clone2, testString2, -1, &status); 1437 TEST_ASSERT_SUCCESS(status); 1438 result = uregex_lookingAt(clone2, 0, &status); 1439 TEST_ASSERT_SUCCESS(status); 1440 TEST_ASSERT(result==FALSE); 1441 result = uregex_find(clone2, 0, &status); 1442 TEST_ASSERT_SUCCESS(status); 1443 TEST_ASSERT(result==TRUE); 1444 1445 uregex_close(clone1); 1446 uregex_close(clone2); 1447 uregex_close(clone3); 1448 1449 } 1450 1451 /* 1452 * pattern() and patternText() 1453 */ 1454 { 1455 const UChar *resultPat; 1456 int32_t resultLen; 1457 UText *resultText; 1458 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1459 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1460 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ 1461 status = U_ZERO_ERROR; 1462 1463 utext_openUTF8(&patternText, str_hello, -1, &status); 1464 re = uregex_open(pat, -1, 0, NULL, &status); 1465 resultPat = uregex_pattern(re, &resultLen, &status); 1466 TEST_ASSERT_SUCCESS(status); 1467 1468 /* The TEST_ASSERT_SUCCESS above should change too... */ 1469 if (U_SUCCESS(status)) { 1470 TEST_ASSERT(resultLen == -1); 1471 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1472 } 1473 1474 resultText = uregex_patternUText(re, &status); 1475 TEST_ASSERT_SUCCESS(status); 1476 TEST_ASSERT_UTEXT(str_hello, resultText); 1477 1478 uregex_close(re); 1479 1480 status = U_ZERO_ERROR; 1481 re = uregex_open(pat, 3, 0, NULL, &status); 1482 resultPat = uregex_pattern(re, &resultLen, &status); 1483 TEST_ASSERT_SUCCESS(status); 1484 1485 /* The TEST_ASSERT_SUCCESS above should change too... */ 1486 if (U_SUCCESS(status)) { 1487 TEST_ASSERT(resultLen == 3); 1488 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1489 TEST_ASSERT(u_strlen(resultPat) == 3); 1490 } 1491 1492 resultText = uregex_patternUText(re, &status); 1493 TEST_ASSERT_SUCCESS(status); 1494 TEST_ASSERT_UTEXT(str_hel, resultText); 1495 1496 uregex_close(re); 1497 } 1498 1499 /* 1500 * setUText() and lookingAt() 1501 */ 1502 { 1503 UText text1 = UTEXT_INITIALIZER; 1504 UText text2 = UTEXT_INITIALIZER; 1505 UBool result; 1506 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1507 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1508 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1509 status = U_ZERO_ERROR; 1510 utext_openUTF8(&text1, str_abcccd, -1, &status); 1511 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1512 1513 utext_openUTF8(&patternText, str_abcd, -1, &status); 1514 re = uregex_openUText(&patternText, 0, NULL, &status); 1515 TEST_ASSERT_SUCCESS(status); 1516 1517 /* Operation before doing a setText should fail... */ 1518 status = U_ZERO_ERROR; 1519 uregex_lookingAt(re, 0, &status); 1520 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1521 1522 status = U_ZERO_ERROR; 1523 uregex_setUText(re, &text1, &status); 1524 result = uregex_lookingAt(re, 0, &status); 1525 TEST_ASSERT(result == TRUE); 1526 TEST_ASSERT_SUCCESS(status); 1527 1528 status = U_ZERO_ERROR; 1529 uregex_setUText(re, &text2, &status); 1530 result = uregex_lookingAt(re, 0, &status); 1531 TEST_ASSERT(result == FALSE); 1532 TEST_ASSERT_SUCCESS(status); 1533 1534 status = U_ZERO_ERROR; 1535 uregex_setUText(re, &text1, &status); 1536 result = uregex_lookingAt(re, 0, &status); 1537 TEST_ASSERT(result == TRUE); 1538 TEST_ASSERT_SUCCESS(status); 1539 1540 uregex_close(re); 1541 utext_close(&text1); 1542 utext_close(&text2); 1543 } 1544 1545 1546 /* 1547 * getText() and getUText() 1548 */ 1549 { 1550 UText text1 = UTEXT_INITIALIZER; 1551 UText text2 = UTEXT_INITIALIZER; 1552 UChar text2Chars[20]; 1553 UText *resultText; 1554 const UChar *result; 1555 int32_t textLength; 1556 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1557 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1558 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1559 1560 1561 status = U_ZERO_ERROR; 1562 utext_openUTF8(&text1, str_abcccd, -1, &status); 1563 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); 1564 utext_openUChars(&text2, text2Chars, -1, &status); 1565 1566 utext_openUTF8(&patternText, str_abcd, -1, &status); 1567 re = uregex_openUText(&patternText, 0, NULL, &status); 1568 1569 /* First set a UText */ 1570 uregex_setUText(re, &text1, &status); 1571 resultText = uregex_getUText(re, NULL, &status); 1572 TEST_ASSERT_SUCCESS(status); 1573 TEST_ASSERT(resultText != &text1); 1574 utext_setNativeIndex(resultText, 0); 1575 utext_setNativeIndex(&text1, 0); 1576 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1577 utext_close(resultText); 1578 1579 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1580 TEST_ASSERT(textLength == -1 || textLength == 6); 1581 resultText = uregex_getUText(re, NULL, &status); 1582 TEST_ASSERT_SUCCESS(status); 1583 TEST_ASSERT(resultText != &text1); 1584 utext_setNativeIndex(resultText, 0); 1585 utext_setNativeIndex(&text1, 0); 1586 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1587 utext_close(resultText); 1588 1589 /* Then set a UChar * */ 1590 uregex_setText(re, text2Chars, 7, &status); 1591 resultText = uregex_getUText(re, NULL, &status); 1592 TEST_ASSERT_SUCCESS(status); 1593 utext_setNativeIndex(resultText, 0); 1594 utext_setNativeIndex(&text2, 0); 1595 TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); 1596 utext_close(resultText); 1597 result = uregex_getText(re, &textLength, &status); 1598 TEST_ASSERT(textLength == 7); 1599 1600 uregex_close(re); 1601 utext_close(&text1); 1602 utext_close(&text2); 1603 } 1604 1605 /* 1606 * matches() 1607 */ 1608 { 1609 UText text1 = UTEXT_INITIALIZER; 1610 UBool result; 1611 UText nullText = UTEXT_INITIALIZER; 1612 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1613 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1614 1615 status = U_ZERO_ERROR; 1616 utext_openUTF8(&text1, str_abcccde, -1, &status); 1617 utext_openUTF8(&patternText, str_abcd, -1, &status); 1618 re = uregex_openUText(&patternText, 0, NULL, &status); 1619 1620 uregex_setUText(re, &text1, &status); 1621 result = uregex_matches(re, 0, &status); 1622 TEST_ASSERT(result == FALSE); 1623 TEST_ASSERT_SUCCESS(status); 1624 uregex_close(re); 1625 1626 status = U_ZERO_ERROR; 1627 re = uregex_openC(".?", 0, NULL, &status); 1628 uregex_setUText(re, &text1, &status); 1629 result = uregex_matches(re, 7, &status); 1630 TEST_ASSERT(result == TRUE); 1631 TEST_ASSERT_SUCCESS(status); 1632 1633 status = U_ZERO_ERROR; 1634 utext_openUTF8(&nullText, "", -1, &status); 1635 uregex_setUText(re, &nullText, &status); 1636 TEST_ASSERT_SUCCESS(status); 1637 result = uregex_matches(re, 0, &status); 1638 TEST_ASSERT(result == TRUE); 1639 TEST_ASSERT_SUCCESS(status); 1640 1641 uregex_close(re); 1642 utext_close(&text1); 1643 utext_close(&nullText); 1644 } 1645 1646 1647 /* 1648 * lookingAt() Used in setText test. 1649 */ 1650 1651 1652 /* 1653 * find(), findNext, start, end, reset 1654 */ 1655 { 1656 UChar text1[50]; 1657 UBool result; 1658 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 1659 status = U_ZERO_ERROR; 1660 re = uregex_openC("rx", 0, NULL, &status); 1661 1662 uregex_setText(re, text1, -1, &status); 1663 result = uregex_find(re, 0, &status); 1664 TEST_ASSERT(result == TRUE); 1665 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1666 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1667 TEST_ASSERT_SUCCESS(status); 1668 1669 result = uregex_find(re, 9, &status); 1670 TEST_ASSERT(result == TRUE); 1671 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1672 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1673 TEST_ASSERT_SUCCESS(status); 1674 1675 result = uregex_find(re, 14, &status); 1676 TEST_ASSERT(result == FALSE); 1677 TEST_ASSERT_SUCCESS(status); 1678 1679 status = U_ZERO_ERROR; 1680 uregex_reset(re, 0, &status); 1681 1682 result = uregex_findNext(re, &status); 1683 TEST_ASSERT(result == TRUE); 1684 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1685 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1686 TEST_ASSERT_SUCCESS(status); 1687 1688 result = uregex_findNext(re, &status); 1689 TEST_ASSERT(result == TRUE); 1690 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1691 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1692 TEST_ASSERT_SUCCESS(status); 1693 1694 status = U_ZERO_ERROR; 1695 uregex_reset(re, 12, &status); 1696 1697 result = uregex_findNext(re, &status); 1698 TEST_ASSERT(result == TRUE); 1699 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1700 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1701 TEST_ASSERT_SUCCESS(status); 1702 1703 result = uregex_findNext(re, &status); 1704 TEST_ASSERT(result == FALSE); 1705 TEST_ASSERT_SUCCESS(status); 1706 1707 uregex_close(re); 1708 } 1709 1710 /* 1711 * group() 1712 */ 1713 { 1714 UChar text1[80]; 1715 UText *actual; 1716 UBool result; 1717 1718 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */ 1719 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ 1720 1721 1722 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 1723 1724 status = U_ZERO_ERROR; 1725 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1726 TEST_ASSERT_SUCCESS(status); 1727 1728 uregex_setText(re, text1, -1, &status); 1729 result = uregex_find(re, 0, &status); 1730 TEST_ASSERT(result==TRUE); 1731 1732 /* Capture Group 0, the full match. Should succeed. */ 1733 status = U_ZERO_ERROR; 1734 actual = uregex_groupUTextDeep(re, 0, NULL, &status); 1735 TEST_ASSERT_SUCCESS(status); 1736 TEST_ASSERT_UTEXT(str_abcinteriordef, actual); 1737 utext_close(actual); 1738 1739 /* Capture Group 0 with shallow clone API. Should succeed. */ 1740 status = U_ZERO_ERROR; 1741 { 1742 int64_t group_len; 1743 int32_t len16; 1744 UErrorCode shallowStatus = U_ZERO_ERROR; 1745 int64_t nativeIndex; 1746 UChar *groupChars; 1747 UText groupText = UTEXT_INITIALIZER; 1748 1749 actual = uregex_groupUText(re, 0, NULL, &group_len, &status); 1750 TEST_ASSERT_SUCCESS(status); 1751 1752 nativeIndex = utext_getNativeIndex(actual); 1753 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */ 1754 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */ 1755 len16 = group_len; 1756 1757 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1)); 1758 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus); 1759 1760 utext_openUChars(&groupText, groupChars, len16, &shallowStatus); 1761 1762 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText); 1763 utext_close(&groupText); 1764 free(groupChars); 1765 } 1766 utext_close(actual); 1767 1768 /* Capture group #1. Should succeed. */ 1769 status = U_ZERO_ERROR; 1770 actual = uregex_groupUTextDeep(re, 1, NULL, &status); 1771 TEST_ASSERT_SUCCESS(status); 1772 TEST_ASSERT_UTEXT(str_interior, actual); 1773 utext_close(actual); 1774 1775 /* Capture group out of range. Error. */ 1776 status = U_ZERO_ERROR; 1777 actual = uregex_groupUTextDeep(re, 2, NULL, &status); 1778 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1779 TEST_ASSERT(utext_nativeLength(actual) == 0); 1780 utext_close(actual); 1781 1782 uregex_close(re); 1783 1784 } 1785 1786 /* 1787 * replaceFirst() 1788 */ 1789 { 1790 UChar text1[80]; 1791 UChar text2[80]; 1792 UText replText = UTEXT_INITIALIZER; 1793 UText *result; 1794 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1795 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1796 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ 1797 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1798 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1799 status = U_ZERO_ERROR; 1800 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1801 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1802 utext_openUTF8(&replText, str_1x, -1, &status); 1803 1804 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1805 TEST_ASSERT_SUCCESS(status); 1806 1807 /* Normal case, with match */ 1808 uregex_setText(re, text1, -1, &status); 1809 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1810 TEST_ASSERT_SUCCESS(status); 1811 TEST_ASSERT_UTEXT(str_Replxxx, result); 1812 utext_close(result); 1813 1814 /* No match. Text should copy to output with no changes. */ 1815 uregex_setText(re, text2, -1, &status); 1816 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1817 TEST_ASSERT_SUCCESS(status); 1818 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1819 utext_close(result); 1820 1821 /* Unicode escapes */ 1822 uregex_setText(re, text1, -1, &status); 1823 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1824 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1825 TEST_ASSERT_SUCCESS(status); 1826 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1827 utext_close(result); 1828 1829 uregex_close(re); 1830 utext_close(&replText); 1831 } 1832 1833 1834 /* 1835 * replaceAll() 1836 */ 1837 { 1838 UChar text1[80]; 1839 UChar text2[80]; 1840 UText replText = UTEXT_INITIALIZER; 1841 UText *result; 1842 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1843 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1844 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1845 status = U_ZERO_ERROR; 1846 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1847 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1848 utext_openUTF8(&replText, str_1, -1, &status); 1849 1850 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1851 TEST_ASSERT_SUCCESS(status); 1852 1853 /* Normal case, with match */ 1854 uregex_setText(re, text1, -1, &status); 1855 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1856 TEST_ASSERT_SUCCESS(status); 1857 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1858 utext_close(result); 1859 1860 /* No match. Text should copy to output with no changes. */ 1861 uregex_setText(re, text2, -1, &status); 1862 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1863 TEST_ASSERT_SUCCESS(status); 1864 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1865 utext_close(result); 1866 1867 uregex_close(re); 1868 utext_close(&replText); 1869 } 1870 1871 1872 /* 1873 * appendReplacement() 1874 */ 1875 { 1876 UChar text[100]; 1877 UChar repl[100]; 1878 UChar buf[100]; 1879 UChar *bufPtr; 1880 int32_t bufCap; 1881 1882 status = U_ZERO_ERROR; 1883 re = uregex_openC(".*", 0, 0, &status); 1884 TEST_ASSERT_SUCCESS(status); 1885 1886 u_uastrncpy(text, "whatever", sizeof(text)/2); 1887 u_uastrncpy(repl, "some other", sizeof(repl)/2); 1888 uregex_setText(re, text, -1, &status); 1889 1890 /* match covers whole target string */ 1891 uregex_find(re, 0, &status); 1892 TEST_ASSERT_SUCCESS(status); 1893 bufPtr = buf; 1894 bufCap = sizeof(buf) / 2; 1895 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1896 TEST_ASSERT_SUCCESS(status); 1897 TEST_ASSERT_STRING("some other", buf, TRUE); 1898 1899 /* Match has \u \U escapes */ 1900 uregex_find(re, 0, &status); 1901 TEST_ASSERT_SUCCESS(status); 1902 bufPtr = buf; 1903 bufCap = sizeof(buf) / 2; 1904 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 1905 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1906 TEST_ASSERT_SUCCESS(status); 1907 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1908 1909 uregex_close(re); 1910 } 1911 1912 1913 /* 1914 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1915 */ 1916 1917 /* 1918 * splitUText() 1919 */ 1920 { 1921 UChar textToSplit[80]; 1922 UChar text2[80]; 1923 UText *fields[10]; 1924 int32_t numFields; 1925 int32_t i; 1926 1927 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1928 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1929 1930 status = U_ZERO_ERROR; 1931 re = uregex_openC(":", 0, NULL, &status); 1932 1933 1934 /* Simple split */ 1935 1936 uregex_setText(re, textToSplit, -1, &status); 1937 TEST_ASSERT_SUCCESS(status); 1938 1939 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1940 if (U_SUCCESS(status)) { 1941 memset(fields, 0, sizeof(fields)); 1942 numFields = uregex_splitUText(re, fields, 10, &status); 1943 TEST_ASSERT_SUCCESS(status); 1944 1945 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1946 if(U_SUCCESS(status)) { 1947 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1948 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1949 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1950 TEST_ASSERT(numFields == 3); 1951 TEST_ASSERT_UTEXT(str_first, fields[0]); 1952 TEST_ASSERT_UTEXT(str_second, fields[1]); 1953 TEST_ASSERT_UTEXT(str_third, fields[2]); 1954 TEST_ASSERT(fields[3] == NULL); 1955 } 1956 for(i = 0; i < numFields; i++) { 1957 utext_close(fields[i]); 1958 } 1959 } 1960 1961 uregex_close(re); 1962 1963 1964 /* Split with too few output strings available */ 1965 status = U_ZERO_ERROR; 1966 re = uregex_openC(":", 0, NULL, &status); 1967 uregex_setText(re, textToSplit, -1, &status); 1968 TEST_ASSERT_SUCCESS(status); 1969 1970 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1971 if(U_SUCCESS(status)) { 1972 fields[0] = NULL; 1973 fields[1] = NULL; 1974 fields[2] = &patternText; 1975 numFields = uregex_splitUText(re, fields, 2, &status); 1976 TEST_ASSERT_SUCCESS(status); 1977 1978 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1979 if(U_SUCCESS(status)) { 1980 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 1981 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 1982 TEST_ASSERT(numFields == 2); 1983 TEST_ASSERT_UTEXT(str_first, fields[0]); 1984 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 1985 TEST_ASSERT(fields[2] == &patternText); 1986 } 1987 for(i = 0; i < numFields; i++) { 1988 utext_close(fields[i]); 1989 } 1990 } 1991 1992 uregex_close(re); 1993 } 1994 1995 /* splitUText(), part 2. Patterns with capture groups. The capture group text 1996 * comes out as additional fields. */ 1997 { 1998 UChar textToSplit[80]; 1999 UText *fields[10]; 2000 int32_t numFields; 2001 int32_t i; 2002 2003 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 2004 2005 status = U_ZERO_ERROR; 2006 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2007 2008 uregex_setText(re, textToSplit, -1, &status); 2009 TEST_ASSERT_SUCCESS(status); 2010 2011 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2012 if(U_SUCCESS(status)) { 2013 memset(fields, 0, sizeof(fields)); 2014 numFields = uregex_splitUText(re, fields, 10, &status); 2015 TEST_ASSERT_SUCCESS(status); 2016 2017 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2018 if(U_SUCCESS(status)) { 2019 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2020 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2021 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2022 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2023 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2024 2025 TEST_ASSERT(numFields == 5); 2026 TEST_ASSERT_UTEXT(str_first, fields[0]); 2027 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2028 TEST_ASSERT_UTEXT(str_second, fields[2]); 2029 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2030 TEST_ASSERT_UTEXT(str_third, fields[4]); 2031 TEST_ASSERT(fields[5] == NULL); 2032 } 2033 for(i = 0; i < numFields; i++) { 2034 utext_close(fields[i]); 2035 } 2036 } 2037 2038 /* Split with too few output strings available (2) */ 2039 status = U_ZERO_ERROR; 2040 fields[0] = NULL; 2041 fields[1] = NULL; 2042 fields[2] = &patternText; 2043 numFields = uregex_splitUText(re, fields, 2, &status); 2044 TEST_ASSERT_SUCCESS(status); 2045 2046 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2047 if(U_SUCCESS(status)) { 2048 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2049 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2050 TEST_ASSERT(numFields == 2); 2051 TEST_ASSERT_UTEXT(str_first, fields[0]); 2052 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2053 TEST_ASSERT(fields[2] == &patternText); 2054 } 2055 for(i = 0; i < numFields; i++) { 2056 utext_close(fields[i]); 2057 } 2058 2059 2060 /* Split with too few output strings available (3) */ 2061 status = U_ZERO_ERROR; 2062 fields[0] = NULL; 2063 fields[1] = NULL; 2064 fields[2] = NULL; 2065 fields[3] = &patternText; 2066 numFields = uregex_splitUText(re, fields, 3, &status); 2067 TEST_ASSERT_SUCCESS(status); 2068 2069 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2070 if(U_SUCCESS(status)) { 2071 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2072 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2073 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2074 TEST_ASSERT(numFields == 3); 2075 TEST_ASSERT_UTEXT(str_first, fields[0]); 2076 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2078 TEST_ASSERT(fields[3] == &patternText); 2079 } 2080 for(i = 0; i < numFields; i++) { 2081 utext_close(fields[i]); 2082 } 2083 2084 /* Split with just enough output strings available (5) */ 2085 status = U_ZERO_ERROR; 2086 fields[0] = NULL; 2087 fields[1] = NULL; 2088 fields[2] = NULL; 2089 fields[3] = NULL; 2090 fields[4] = NULL; 2091 fields[5] = &patternText; 2092 numFields = uregex_splitUText(re, fields, 5, &status); 2093 TEST_ASSERT_SUCCESS(status); 2094 2095 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2096 if(U_SUCCESS(status)) { 2097 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2098 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2099 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2100 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2101 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2102 2103 TEST_ASSERT(numFields == 5); 2104 TEST_ASSERT_UTEXT(str_first, fields[0]); 2105 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2106 TEST_ASSERT_UTEXT(str_second, fields[2]); 2107 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2108 TEST_ASSERT_UTEXT(str_third, fields[4]); 2109 TEST_ASSERT(fields[5] == &patternText); 2110 } 2111 for(i = 0; i < numFields; i++) { 2112 utext_close(fields[i]); 2113 } 2114 2115 /* Split, end of text is a field delimiter. */ 2116 status = U_ZERO_ERROR; 2117 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2118 TEST_ASSERT_SUCCESS(status); 2119 2120 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2121 if(U_SUCCESS(status)) { 2122 memset(fields, 0, sizeof(fields)); 2123 fields[9] = &patternText; 2124 numFields = uregex_splitUText(re, fields, 9, &status); 2125 TEST_ASSERT_SUCCESS(status); 2126 2127 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2128 if(U_SUCCESS(status)) { 2129 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2130 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2131 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2132 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2133 2134 TEST_ASSERT(numFields == 4); 2135 TEST_ASSERT_UTEXT(str_first, fields[0]); 2136 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2137 TEST_ASSERT_UTEXT(str_second, fields[2]); 2138 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2139 TEST_ASSERT(fields[4] == NULL); 2140 TEST_ASSERT(fields[8] == NULL); 2141 TEST_ASSERT(fields[9] == &patternText); 2142 } 2143 for(i = 0; i < numFields; i++) { 2144 utext_close(fields[i]); 2145 } 2146 } 2147 2148 uregex_close(re); 2149 } 2150 utext_close(&patternText); 2151 } 2152 2153 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2154