1 Index: source/test/cintltst/usrchtst.c 2 =================================================================== 3 --- source/test/cintltst/usrchtst.c (revision 75773) 4 +++ source/test/cintltst/usrchtst.c (working copy) 5 @@ -1,5 +1,5 @@ 6 /******************************************************************** 7 - * Copyright (c) 2001-2010 International Business Machines 8 + * Copyright (c) 2001-2011 International Business Machines 9 * Corporation and others. All Rights Reserved. 10 ******************************************************************** 11 * File usrchtst.c 12 @@ -2553,7 +2553,173 @@ 13 ucol_close(coll); 14 } 15 16 +/** 17 +* TestUsingSearchCollator 18 +*/ 19 20 +#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0])) 21 + 22 +typedef struct { 23 + const UChar * pattern; 24 + const int32_t * offsets; 25 + int32_t offsetsLen; 26 +} PatternAndOffsets; 27 + 28 +static const UChar scKoText[] = { 29 + 0x0020, 30 +/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */ 31 +/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */ 32 +/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */ 33 +/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */ 34 +/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */ 35 +/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */ 36 +/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */ 37 +/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */ 38 +/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */ 39 +/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */ 40 +/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */ 41 +/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */ 42 + 0 43 +}; 44 + 45 +static const UChar scKoPat0[] = { 0xAC01, 0 }; 46 +static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */ 47 +static const UChar scKoPat2[] = { 0xAC0F, 0 }; 48 +static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */ 49 +static const UChar scKoPat4[] = { 0xAFFF, 0 }; 50 +static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */ 51 + 52 +static const int32_t scKoSrchOff01[] = { 3, 9, 13 }; 53 +static const int32_t scKoSrchOff23[] = { 5, 21, 25 }; 54 +static const int32_t scKoSrchOff45[] = { 7, 30 }; 55 + 56 +static const PatternAndOffsets scKoSrchPatternsOffsets[] = { 57 + { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, 58 + { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, 59 + { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, 60 + { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, 61 + { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, 62 + { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, 63 + { NULL, NULL, 0 } 64 +}; 65 + 66 +static const int32_t scKoStndOff01[] = { 3, 9 }; 67 +static const int32_t scKoStndOff2[] = { 5, 21 }; 68 +static const int32_t scKoStndOff3[] = { 25 }; 69 +static const int32_t scKoStndOff45[] = { 7, 30 }; 70 + 71 +static const PatternAndOffsets scKoStndPatternsOffsets[] = { 72 + { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, 73 + { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, 74 + { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) }, 75 + { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) }, 76 + { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, 77 + { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, 78 + { NULL, NULL, 0 } 79 +}; 80 + 81 +typedef struct { 82 + const char * locale; 83 + const UChar * text; 84 + const PatternAndOffsets * patternsAndOffsets; 85 +} TUSCItem; 86 + 87 +static const TUSCItem tuscItems[] = { 88 + { "root", scKoText, scKoStndPatternsOffsets }, 89 + { "root@collation=search", scKoText, scKoSrchPatternsOffsets }, 90 + { "ko@collation=search", scKoText, scKoSrchPatternsOffsets }, 91 + { NULL, NULL, NULL } 92 +}; 93 + 94 +static const UChar dummyPat[] = { 0x0061, 0 }; 95 + 96 +static void TestUsingSearchCollator(void) 97 +{ 98 + const TUSCItem * tuscItemPtr; 99 + for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) { 100 + UErrorCode status = U_ZERO_ERROR; 101 + UCollator* ucol = ucol_open(tuscItemPtr->locale, &status); 102 + if ( U_SUCCESS(status) ) { 103 + UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status); 104 + if ( U_SUCCESS(status) ) { 105 + const PatternAndOffsets * patternsOffsetsPtr; 106 + for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) { 107 + usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status); 108 + if ( U_SUCCESS(status) ) { 109 + int32_t offset; 110 + const int32_t * nextOffsetPtr; 111 + const int32_t * limitOffsetPtr; 112 + 113 + usearch_reset(usrch); 114 + nextOffsetPtr = patternsOffsetsPtr->offsets; 115 + limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; 116 + while (TRUE) { 117 + offset = usearch_next(usrch, &status); 118 + if ( U_FAILURE(status) || offset == USEARCH_DONE ) { 119 + break; 120 + } 121 + if ( nextOffsetPtr < limitOffsetPtr ) { 122 + if (offset != *nextOffsetPtr) { 123 + log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); 124 + nextOffsetPtr = limitOffsetPtr; 125 + break; 126 + } 127 + nextOffsetPtr++; 128 + } else { 129 + log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale ); 130 + } 131 + } 132 + if ( U_FAILURE(status) ) { 133 + log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); 134 + } else if ( nextOffsetPtr < limitOffsetPtr ) { 135 + log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale ); 136 + } 137 + 138 + status = U_ZERO_ERROR; 139 + usearch_reset(usrch); 140 + nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; 141 + limitOffsetPtr = patternsOffsetsPtr->offsets; 142 + while (TRUE) { 143 + offset = usearch_previous(usrch, &status); 144 + if ( U_FAILURE(status) || offset == USEARCH_DONE ) { 145 + break; 146 + } 147 + if ( nextOffsetPtr > limitOffsetPtr ) { 148 + nextOffsetPtr--; 149 + if (offset != *nextOffsetPtr) { 150 + log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); 151 + nextOffsetPtr = limitOffsetPtr; 152 + break; 153 + } 154 + } else { 155 + log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale ); 156 + } 157 + } 158 + if ( U_FAILURE(status) ) { 159 + log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); 160 + } else if ( nextOffsetPtr > limitOffsetPtr ) { 161 + log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale ); 162 + } 163 + 164 + } else { 165 + log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); 166 + } 167 + } 168 + usearch_close(usrch); 169 + } else { 170 + log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); 171 + } 172 + ucol_close(ucol); 173 + } else { 174 + log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); 175 + } 176 + } 177 +} 178 + 179 +/** 180 +* addSearchTest 181 +*/ 182 + 183 void addSearchTest(TestNode** root) 184 { 185 addTest(root, &TestStart, "tscoll/usrchtst/TestStart"); 186 @@ -2608,6 +2774,7 @@ 187 addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward"); 188 addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull"); 189 addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical"); 190 + addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator"); 191 } 192 193 #endif /* #if !UCONFIG_NO_COLLATION */ 194 Index: source/test/cintltst/citertst.c 195 =================================================================== 196 --- source/test/cintltst/citertst.c (revision 75773) 197 +++ source/test/cintltst/citertst.c (working copy) 198 @@ -1,6 +1,6 @@ 199 /******************************************************************** 200 * COPYRIGHT: 201 - * Copyright (c) 1997-2010, International Business Machines Corporation and 202 + * Copyright (c) 1997-2011, International Business Machines Corporation and 203 * others. All Rights Reserved. 204 ********************************************************************/ 205 /******************************************************************************** 206 @@ -22,6 +22,7 @@ 207 #if !UCONFIG_NO_COLLATION 208 209 #include "unicode/ucol.h" 210 +#include "unicode/ucoleitr.h" 211 #include "unicode/uloc.h" 212 #include "unicode/uchar.h" 213 #include "unicode/ustring.h" 214 @@ -58,6 +59,7 @@ 215 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"); 216 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); 217 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); 218 + addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements"); 219 } 220 221 /* The locales we support */ 222 @@ -2017,4 +2019,141 @@ 223 T_FileStream_close(file); 224 } 225 226 +/** 227 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 228 +* normalization on AND jamo tailoring, among other things. 229 +*/ 230 +static const UChar tsceText[] = { /* Nothing in here should be ignorable */ 231 + 0x0020, 0xAC00, /* simple LV Hangul */ 232 + 0x0020, 0xAC01, /* simple LVT Hangul */ 233 + 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ 234 + 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ 235 + 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ 236 + 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ 237 + 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */ 238 + 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */ 239 + 0x0020, 0x00E6, /* small letter ae, expands */ 240 + 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */ 241 + 0x0020 242 +}; 243 +enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; 244 + 245 +static const int32_t rootStandardOffsets[] = { 246 + 0, 1,2, 247 + 2, 3,4,4, 248 + 4, 5,6,6, 249 + 6, 7,8,8, 250 + 8, 9,10,11, 251 + 12, 13,14,15, 252 + 16, 17,18,19, 253 + 20, 21,22,23, 254 + 24, 25,26,26,26, 255 + 26, 27,28,28, 256 + 28, 257 + 29 258 +}; 259 +enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) }; 260 + 261 +static const int32_t rootSearchOffsets[] = { 262 + 0, 1,2, 263 + 2, 3,4,4, 264 + 4, 5,6,6,6, 265 + 6, 7,8,8,8,8,8,8, 266 + 8, 9,10,11, 267 + 12, 13,14,15, 268 + 16, 17,18,19,20, 269 + 20, 21,22,22,23,23,23,24, 270 + 24, 25,26,26,26, 271 + 26, 27,28,28, 272 + 28, 273 + 29 274 +}; 275 +enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) }; 276 + 277 +typedef struct { 278 + const char * locale; 279 + const int32_t * offsets; 280 + int32_t offsetsLen; 281 +} TSCEItem; 282 + 283 +static const TSCEItem tsceItems[] = { 284 + { "root", rootStandardOffsets, kLen_rootStandardOffsets }, 285 + { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets }, 286 + { NULL, NULL, 0 } 287 +}; 288 + 289 +static void TestSearchCollatorElements(void) 290 +{ 291 + const TSCEItem * tsceItemPtr; 292 + for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) { 293 + UErrorCode status = U_ZERO_ERROR; 294 + UCollator* ucol = ucol_open(tsceItemPtr->locale, &status); 295 + if ( U_SUCCESS(status) ) { 296 + UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status); 297 + if ( U_SUCCESS(status) ) { 298 + int32_t offset, element; 299 + const int32_t * nextOffsetPtr; 300 + const int32_t * limitOffsetPtr; 301 + 302 + nextOffsetPtr = tsceItemPtr->offsets; 303 + limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 304 + do { 305 + offset = ucol_getOffset(uce); 306 + element = ucol_next(uce, &status); 307 + if ( element == 0 ) { 308 + log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale ); 309 + } 310 + if ( nextOffsetPtr < limitOffsetPtr ) { 311 + if (offset != *nextOffsetPtr) { 312 + log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n", 313 + tsceItemPtr->locale, *nextOffsetPtr, offset ); 314 + nextOffsetPtr = limitOffsetPtr; 315 + break; 316 + } 317 + nextOffsetPtr++; 318 + } else { 319 + log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale ); 320 + } 321 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 322 + if ( nextOffsetPtr < limitOffsetPtr ) { 323 + log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale ); 324 + } 325 + 326 + ucol_setOffset(uce, kLen_tsceText, &status); 327 + status = U_ZERO_ERROR; 328 + nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 329 + limitOffsetPtr = tsceItemPtr->offsets; 330 + do { 331 + offset = ucol_getOffset(uce); 332 + element = ucol_previous(uce, &status); 333 + if ( element == 0 ) { 334 + log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale ); 335 + } 336 + if ( nextOffsetPtr > limitOffsetPtr ) { 337 + nextOffsetPtr--; 338 + if (offset != *nextOffsetPtr) { 339 + log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n", 340 + tsceItemPtr->locale, *nextOffsetPtr, offset ); 341 + nextOffsetPtr = limitOffsetPtr; 342 + break; 343 + } 344 + } else { 345 + log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale ); 346 + } 347 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 348 + if ( nextOffsetPtr > limitOffsetPtr ) { 349 + log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale ); 350 + } 351 + 352 + ucol_closeElements(uce); 353 + } else { 354 + log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 355 + } 356 + ucol_close(ucol); 357 + } else { 358 + log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 359 + } 360 + } 361 +} 362 + 363 #endif /* #if !UCONFIG_NO_COLLATION */ 364 Index: source/test/cintltst/citertst.h 365 =================================================================== 366 --- source/test/cintltst/citertst.h (revision 75773) 367 +++ source/test/cintltst/citertst.h (working copy) 368 @@ -1,6 +1,6 @@ 369 /******************************************************************** 370 * COPYRIGHT: 371 - * Copyright (c) 1997-2008, International Business Machines Corporation and 372 + * Copyright (c) 1997-2008,2011, International Business Machines Corporation and 373 * others. All Rights Reserved. 374 ********************************************************************/ 375 /******************************************************************************** 376 @@ -101,6 +101,11 @@ 377 * Bound checkings. 378 */ 379 static void TestSortKeyValidity(void); 380 +/** 381 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 382 +* normalization on AND jamo tailoring, among other things. 383 +*/ 384 +static void TestSearchCollatorElements(void); 385 386 /*------------------------------------------------------------------------ 387 Internal utilities 388 Index: source/i18n/ucol.cpp 389 =================================================================== 390 --- source/i18n/ucol.cpp (revision 75773) 391 +++ source/i18n/ucol.cpp (working copy) 392 @@ -1,6 +1,6 @@ 393 /* 394 ******************************************************************************* 395 -* Copyright (C) 1996-2010, International Business Machines 396 +* Copyright (C) 1996-2011, International Business Machines 397 * Corporation and others. All Rights Reserved. 398 ******************************************************************************* 399 * file name: ucol.cpp 400 @@ -1444,173 +1444,176 @@ 401 UChar ch = 0; 402 collationSource->offsetReturn = NULL; 403 404 - for (;;) /* Loop handles case when incremental normalize switches */ 405 - { /* to or from the side buffer / original string, and we */ 406 - /* need to start again to get the next character. */ 407 + do { 408 + for (;;) /* Loop handles case when incremental normalize switches */ 409 + { /* to or from the side buffer / original string, and we */ 410 + /* need to start again to get the next character. */ 411 412 - if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) 413 - { 414 - // The source string is null terminated and we're not working from the side buffer, 415 - // and we're not normalizing. This is the fast path. 416 - // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) 417 - ch = *collationSource->pos++; 418 - if (ch != 0) { 419 - break; 420 + if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) 421 + { 422 + // The source string is null terminated and we're not working from the side buffer, 423 + // and we're not normalizing. This is the fast path. 424 + // (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.) 425 + ch = *collationSource->pos++; 426 + if (ch != 0) { 427 + break; 428 + } 429 + else { 430 + return UCOL_NO_MORE_CES; 431 + } 432 } 433 - else { 434 - return UCOL_NO_MORE_CES; 435 - } 436 - } 437 438 - if (collationSource->flags & UCOL_ITER_HASLEN) { 439 - // Normal path for strings when length is specified. 440 - // (We can't be in side buffer because it is always null terminated.) 441 - if (collationSource->pos >= collationSource->endp) { 442 - // Ran off of the end of the main source string. We're done. 443 - return UCOL_NO_MORE_CES; 444 + if (collationSource->flags & UCOL_ITER_HASLEN) { 445 + // Normal path for strings when length is specified. 446 + // (We can't be in side buffer because it is always null terminated.) 447 + if (collationSource->pos >= collationSource->endp) { 448 + // Ran off of the end of the main source string. We're done. 449 + return UCOL_NO_MORE_CES; 450 + } 451 + ch = *collationSource->pos++; 452 } 453 - ch = *collationSource->pos++; 454 - } 455 - else if(collationSource->flags & UCOL_USE_ITERATOR) { 456 - UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); 457 - if(iterCh == U_SENTINEL) { 458 - return UCOL_NO_MORE_CES; 459 - } 460 - ch = (UChar)iterCh; 461 - } 462 - else 463 - { 464 - // Null terminated string. 465 - ch = *collationSource->pos++; 466 - if (ch == 0) { 467 - // Ran off end of buffer. 468 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 469 - // Ran off end of main string. backing up one character. 470 - collationSource->pos--; 471 + else if(collationSource->flags & UCOL_USE_ITERATOR) { 472 + UChar32 iterCh = collationSource->iterator->next(collationSource->iterator); 473 + if(iterCh == U_SENTINEL) { 474 return UCOL_NO_MORE_CES; 475 } 476 - else 477 - { 478 - // Hit null in the normalize side buffer. 479 - // Usually this means the end of the normalized data, 480 - // except for one odd case: a null followed by combining chars, 481 - // which is the case if we are at the start of the buffer. 482 - if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { 483 - break; 484 + ch = (UChar)iterCh; 485 + } 486 + else 487 + { 488 + // Null terminated string. 489 + ch = *collationSource->pos++; 490 + if (ch == 0) { 491 + // Ran off end of buffer. 492 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 493 + // Ran off end of main string. backing up one character. 494 + collationSource->pos--; 495 + return UCOL_NO_MORE_CES; 496 } 497 + else 498 + { 499 + // Hit null in the normalize side buffer. 500 + // Usually this means the end of the normalized data, 501 + // except for one odd case: a null followed by combining chars, 502 + // which is the case if we are at the start of the buffer. 503 + if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { 504 + break; 505 + } 506 507 - // Null marked end of side buffer. 508 - // Revert to the main string and 509 - // loop back to top to try again to get a character. 510 - collationSource->pos = collationSource->fcdPosition; 511 - collationSource->flags = collationSource->origFlags; 512 - continue; 513 + // Null marked end of side buffer. 514 + // Revert to the main string and 515 + // loop back to top to try again to get a character. 516 + collationSource->pos = collationSource->fcdPosition; 517 + collationSource->flags = collationSource->origFlags; 518 + continue; 519 + } 520 } 521 } 522 - } 523 524 - if(collationSource->flags&UCOL_HIRAGANA_Q) { 525 - /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag 526 - * based on whether the previous codepoint was Hiragana or Katakana. 527 - */ 528 - if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || 529 - ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { 530 - collationSource->flags |= UCOL_WAS_HIRAGANA; 531 - } else { 532 - collationSource->flags &= ~UCOL_WAS_HIRAGANA; 533 + if(collationSource->flags&UCOL_HIRAGANA_Q) { 534 + /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag 535 + * based on whether the previous codepoint was Hiragana or Katakana. 536 + */ 537 + if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) || 538 + ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) { 539 + collationSource->flags |= UCOL_WAS_HIRAGANA; 540 + } else { 541 + collationSource->flags &= ~UCOL_WAS_HIRAGANA; 542 + } 543 } 544 - } 545 546 - // We've got a character. See if there's any fcd and/or normalization stuff to do. 547 - // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. 548 - if ((collationSource->flags & UCOL_ITER_NORM) == 0) { 549 - break; 550 - } 551 + // We've got a character. See if there's any fcd and/or normalization stuff to do. 552 + // Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer. 553 + if ((collationSource->flags & UCOL_ITER_NORM) == 0) { 554 + break; 555 + } 556 557 - if (collationSource->fcdPosition >= collationSource->pos) { 558 - // An earlier FCD check has already covered the current character. 559 - // We can go ahead and process this char. 560 - break; 561 - } 562 - 563 - if (ch < ZERO_CC_LIMIT_ ) { 564 - // Fast fcd safe path. Trailing combining class == 0. This char is OK. 565 - break; 566 - } 567 - 568 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 569 - // We need to peek at the next character in order to tell if we are FCD 570 - if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { 571 - // We are at the last char of source string. 572 - // It is always OK for FCD check. 573 + if (collationSource->fcdPosition >= collationSource->pos) { 574 + // An earlier FCD check has already covered the current character. 575 + // We can go ahead and process this char. 576 break; 577 } 578 579 - // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test 580 - if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { 581 + if (ch < ZERO_CC_LIMIT_ ) { 582 + // Fast fcd safe path. Trailing combining class == 0. This char is OK. 583 break; 584 } 585 - } 586 587 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 588 + // We need to peek at the next character in order to tell if we are FCD 589 + if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) { 590 + // We are at the last char of source string. 591 + // It is always OK for FCD check. 592 + break; 593 + } 594 595 - // Need a more complete FCD check and possible normalization. 596 - if (collIterFCD(collationSource)) { 597 - collIterNormalize(collationSource); 598 - } 599 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 600 - // No normalization was needed. Go ahead and process the char we already had. 601 - break; 602 - } 603 + // Not at last char of source string (or we'll check against terminating null). Do the FCD fast test 604 + if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { 605 + break; 606 + } 607 + } 608 609 - // Some normalization happened. Next loop iteration will pick up a char 610 - // from the normalization buffer. 611 612 - } // end for (;;) 613 + // Need a more complete FCD check and possible normalization. 614 + if (collIterFCD(collationSource)) { 615 + collIterNormalize(collationSource); 616 + } 617 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { 618 + // No normalization was needed. Go ahead and process the char we already had. 619 + break; 620 + } 621 622 + // Some normalization happened. Next loop iteration will pick up a char 623 + // from the normalization buffer. 624 625 - if (ch <= 0xFF) { 626 - /* For latin-1 characters we never need to fall back to the UCA table */ 627 - /* because all of the UCA data is replicated in the latinOneMapping array */ 628 - order = coll->latinOneMapping[ch]; 629 - if (order > UCOL_NOT_FOUND) { 630 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); 631 + } // end for (;;) 632 + 633 + 634 + if (ch <= 0xFF) { 635 + /* For latin-1 characters we never need to fall back to the UCA table */ 636 + /* because all of the UCA data is replicated in the latinOneMapping array */ 637 + order = coll->latinOneMapping[ch]; 638 + if (order > UCOL_NOT_FOUND) { 639 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); 640 + } 641 } 642 - } 643 - else 644 - { 645 - // Always use UCA for Han, Hangul 646 - // (Han extension A is before main Han block) 647 - // **** Han compatibility chars ?? **** 648 - if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 649 - (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { 650 - if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { 651 - // between the two target ranges; do normal lookup 652 - // **** this range is YI, Modifier tone letters, **** 653 - // **** Latin-D, Syloti Nagari, Phagas-pa. **** 654 - // **** Latin-D might be tailored, so we need to **** 655 - // **** do the normal lookup for these guys. **** 656 + else 657 + { 658 + // Always use UCA for Han, Hangul 659 + // (Han extension A is before main Han block) 660 + // **** Han compatibility chars ?? **** 661 + if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 662 + (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { 663 + if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { 664 + // between the two target ranges; do normal lookup 665 + // **** this range is YI, Modifier tone letters, **** 666 + // **** Latin-D, Syloti Nagari, Phagas-pa. **** 667 + // **** Latin-D might be tailored, so we need to **** 668 + // **** do the normal lookup for these guys. **** 669 + order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 670 + } else { 671 + // in one of the target ranges; use UCA 672 + order = UCOL_NOT_FOUND; 673 + } 674 + } else { 675 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 676 - } else { 677 - // in one of the target ranges; use UCA 678 - order = UCOL_NOT_FOUND; 679 } 680 - } else { 681 - order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 682 - } 683 684 - if(order > UCOL_NOT_FOUND) { /* if a CE is special */ 685 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ 686 - } 687 + if(order > UCOL_NOT_FOUND) { /* if a CE is special */ 688 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */ 689 + } 690 691 - if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ 692 - /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ 693 - order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 694 + if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */ 695 + /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */ 696 + order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 697 698 - if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ 699 - order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); 700 + if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ 701 + order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status); 702 + } 703 } 704 } 705 - } 706 + } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); 707 + 708 if(order == UCOL_NOT_FOUND) { 709 order = getImplicit(ch, collationSource); 710 } 711 @@ -1958,161 +1961,163 @@ 712 else { 713 UChar ch = 0; 714 715 - /* 716 - Loop handles case when incremental normalize switches to or from the 717 - side buffer / original string, and we need to start again to get the 718 - next character. 719 - */ 720 - for (;;) { 721 - if (data->flags & UCOL_ITER_HASLEN) { 722 - /* 723 - Normal path for strings when length is specified. 724 - Not in side buffer because it is always null terminated. 725 - */ 726 - if (data->pos <= data->string) { 727 - /* End of the main source string */ 728 - return UCOL_NO_MORE_CES; 729 - } 730 - data->pos --; 731 - ch = *data->pos; 732 - } 733 - // we are using an iterator to go back. Pray for us! 734 - else if (data->flags & UCOL_USE_ITERATOR) { 735 - UChar32 iterCh = data->iterator->previous(data->iterator); 736 - if(iterCh == U_SENTINEL) { 737 - return UCOL_NO_MORE_CES; 738 - } else { 739 - ch = (UChar)iterCh; 740 - } 741 - } 742 - else { 743 - data->pos --; 744 - ch = *data->pos; 745 - /* we are in the side buffer. */ 746 - if (ch == 0) { 747 + do { 748 + /* 749 + Loop handles case when incremental normalize switches to or from the 750 + side buffer / original string, and we need to start again to get the 751 + next character. 752 + */ 753 + for (;;) { 754 + if (data->flags & UCOL_ITER_HASLEN) { 755 /* 756 - At the start of the normalize side buffer. 757 - Go back to string. 758 - Because pointer points to the last accessed character, 759 - hence we have to increment it by one here. 760 + Normal path for strings when length is specified. 761 + Not in side buffer because it is always null terminated. 762 */ 763 - data->flags = data->origFlags; 764 - data->offsetRepeatValue = 0; 765 - 766 - if (data->fcdPosition == NULL) { 767 - data->pos = data->string; 768 + if (data->pos <= data->string) { 769 + /* End of the main source string */ 770 return UCOL_NO_MORE_CES; 771 } 772 - else { 773 - data->pos = data->fcdPosition + 1; 774 + data->pos --; 775 + ch = *data->pos; 776 + } 777 + // we are using an iterator to go back. Pray for us! 778 + else if (data->flags & UCOL_USE_ITERATOR) { 779 + UChar32 iterCh = data->iterator->previous(data->iterator); 780 + if(iterCh == U_SENTINEL) { 781 + return UCOL_NO_MORE_CES; 782 + } else { 783 + ch = (UChar)iterCh; 784 + } 785 + } 786 + else { 787 + data->pos --; 788 + ch = *data->pos; 789 + /* we are in the side buffer. */ 790 + if (ch == 0) { 791 + /* 792 + At the start of the normalize side buffer. 793 + Go back to string. 794 + Because pointer points to the last accessed character, 795 + hence we have to increment it by one here. 796 + */ 797 + data->flags = data->origFlags; 798 + data->offsetRepeatValue = 0; 799 + 800 + if (data->fcdPosition == NULL) { 801 + data->pos = data->string; 802 + return UCOL_NO_MORE_CES; 803 + } 804 + else { 805 + data->pos = data->fcdPosition + 1; 806 + } 807 + 808 + continue; 809 } 810 - 811 - continue; 812 } 813 - } 814 815 - if(data->flags&UCOL_HIRAGANA_Q) { 816 - if(ch>=0x3040 && ch<=0x309f) { 817 - data->flags |= UCOL_WAS_HIRAGANA; 818 - } else { 819 - data->flags &= ~UCOL_WAS_HIRAGANA; 820 - } 821 - } 822 + if(data->flags&UCOL_HIRAGANA_Q) { 823 + if(ch>=0x3040 && ch<=0x309f) { 824 + data->flags |= UCOL_WAS_HIRAGANA; 825 + } else { 826 + data->flags &= ~UCOL_WAS_HIRAGANA; 827 + } 828 + } 829 830 - /* 831 - * got a character to determine if there's fcd and/or normalization 832 - * stuff to do. 833 - * if the current character is not fcd. 834 - * if current character is at the start of the string 835 - * Trailing combining class == 0. 836 - * Note if pos is in the writablebuffer, norm is always 0 837 - */ 838 - if (ch < ZERO_CC_LIMIT_ || 839 - // this should propel us out of the loop in the iterator case 840 - (data->flags & UCOL_ITER_NORM) == 0 || 841 - (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 842 - || data->string == data->pos) { 843 - break; 844 - } 845 - 846 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 847 - /* if next character is FCD */ 848 - if (data->pos == data->string) { 849 - /* First char of string is always OK for FCD check */ 850 + /* 851 + * got a character to determine if there's fcd and/or normalization 852 + * stuff to do. 853 + * if the current character is not fcd. 854 + * if current character is at the start of the string 855 + * Trailing combining class == 0. 856 + * Note if pos is in the writablebuffer, norm is always 0 857 + */ 858 + if (ch < ZERO_CC_LIMIT_ || 859 + // this should propel us out of the loop in the iterator case 860 + (data->flags & UCOL_ITER_NORM) == 0 || 861 + (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 862 + || data->string == data->pos) { 863 break; 864 } 865 866 - /* Not first char of string, do the FCD fast test */ 867 - if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { 868 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { 869 + /* if next character is FCD */ 870 + if (data->pos == data->string) { 871 + /* First char of string is always OK for FCD check */ 872 + break; 873 + } 874 + 875 + /* Not first char of string, do the FCD fast test */ 876 + if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { 877 + break; 878 + } 879 + } 880 + 881 + /* Need a more complete FCD check and possible normalization. */ 882 + if (collPrevIterFCD(data)) { 883 + collPrevIterNormalize(data); 884 + } 885 + 886 + if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 887 + /* No normalization. Go ahead and process the char. */ 888 break; 889 } 890 - } 891 892 - /* Need a more complete FCD check and possible normalization. */ 893 - if (collPrevIterFCD(data)) { 894 - collPrevIterNormalize(data); 895 + /* 896 + Some normalization happened. 897 + Next loop picks up a char from the normalization buffer. 898 + */ 899 } 900 901 - if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { 902 - /* No normalization. Go ahead and process the char. */ 903 - break; 904 - } 905 - 906 - /* 907 - Some normalization happened. 908 - Next loop picks up a char from the normalization buffer. 909 + /* attempt to handle contractions, after removal of the backwards 910 + contraction 911 */ 912 - } 913 - 914 - /* attempt to handle contractions, after removal of the backwards 915 - contraction 916 - */ 917 - if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { 918 - result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); 919 - } else { 920 - if (ch <= 0xFF) { 921 - result = coll->latinOneMapping[ch]; 922 - } 923 - else { 924 - // Always use UCA for [3400..9FFF], [AC00..D7AF] 925 - // **** [FA0E..FA2F] ?? **** 926 - if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 927 - (ch >= 0x3400 && ch <= 0xD7AF)) { 928 - if (ch > 0x9FFF && ch < 0xAC00) { 929 - // between the two target ranges; do normal lookup 930 - // **** this range is YI, Modifier tone letters, **** 931 - // **** Latin-D, Syloti Nagari, Phagas-pa. **** 932 - // **** Latin-D might be tailored, so we need to **** 933 - // **** do the normal lookup for these guys. **** 934 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 935 + if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { 936 + result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status); 937 + } else { 938 + if (ch <= 0xFF) { 939 + result = coll->latinOneMapping[ch]; 940 + } 941 + else { 942 + // Always use UCA for [3400..9FFF], [AC00..D7AF] 943 + // **** [FA0E..FA2F] ?? **** 944 + if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && 945 + (ch >= 0x3400 && ch <= 0xD7AF)) { 946 + if (ch > 0x9FFF && ch < 0xAC00) { 947 + // between the two target ranges; do normal lookup 948 + // **** this range is YI, Modifier tone letters, **** 949 + // **** Latin-D, Syloti Nagari, Phagas-pa. **** 950 + // **** Latin-D might be tailored, so we need to **** 951 + // **** do the normal lookup for these guys. **** 952 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 953 + } else { 954 + result = UCOL_NOT_FOUND; 955 + } 956 } else { 957 - result = UCOL_NOT_FOUND; 958 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 959 } 960 - } else { 961 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); 962 } 963 - } 964 - if (result > UCOL_NOT_FOUND) { 965 - result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); 966 - } 967 - if (result == UCOL_NOT_FOUND) { // Not found in master list 968 - if (!isAtStartPrevIterate(data) && 969 - ucol_contractionEndCP(ch, data->coll)) 970 - { 971 - result = UCOL_CONTRACTION; 972 - } else { 973 - if(coll->UCA) { 974 - result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 975 + if (result > UCOL_NOT_FOUND) { 976 + result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status); 977 + } 978 + if (result == UCOL_NOT_FOUND) { // Not found in master list 979 + if (!isAtStartPrevIterate(data) && 980 + ucol_contractionEndCP(ch, data->coll)) 981 + { 982 + result = UCOL_CONTRACTION; 983 + } else { 984 + if(coll->UCA) { 985 + result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); 986 + } 987 } 988 - } 989 990 - if (result > UCOL_NOT_FOUND) { 991 - if(coll->UCA) { 992 - result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); 993 + if (result > UCOL_NOT_FOUND) { 994 + if(coll->UCA) { 995 + result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status); 996 + } 997 } 998 } 999 } 1000 - } 1001 + } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL ); 1002 1003 if(result == UCOL_NOT_FOUND) { 1004 result = getPrevImplicit(ch, data); 1005 @@ -3193,6 +3198,7 @@ 1006 // Since Hanguls pass the FCD check, it is 1007 // guaranteed that we won't be in 1008 // the normalization buffer if something like this happens 1009 + 1010 // However, if we are using a uchar iterator and normalization 1011 // is ON, the Hangul that lead us here is going to be in that 1012 // normalization buffer. Here we want to restore the uchar 1013 @@ -3201,6 +3207,7 @@ 1014 source->flags = source->origFlags; // restore the iterator 1015 source->pos = NULL; 1016 } 1017 + 1018 // Move Jamos into normalization buffer 1019 UChar *buffer = source->writableBuffer.getBuffer(4); 1020 int32_t bufferLength; 1021 @@ -3214,8 +3221,9 @@ 1022 } 1023 source->writableBuffer.releaseBuffer(bufferLength); 1024 1025 - source->fcdPosition = source->pos; // Indicate where to continue in main input string 1026 - // after exhausting the writableBuffer 1027 + // Indicate where to continue in main input string after exhausting the writableBuffer 1028 + source->fcdPosition = source->pos; 1029 + 1030 source->pos = source->writableBuffer.getTerminatedBuffer(); 1031 source->origFlags = source->flags; 1032 source->flags |= UCOL_ITER_INNORMBUF; 1033 @@ -3966,13 +3974,10 @@ 1034 // Since Hanguls pass the FCD check, it is 1035 // guaranteed that we won't be in 1036 // the normalization buffer if something like this happens 1037 + 1038 // Move Jamos into normalization buffer 1039 - /* 1040 - Move the Jamos into the 1041 - normalization buffer 1042 - */ 1043 UChar *tempbuffer = source->writableBuffer.getBuffer(5); 1044 - int32_t tempbufferLength; 1045 + int32_t tempbufferLength, jamoOffset; 1046 tempbuffer[0] = 0; 1047 tempbuffer[1] = (UChar)L; 1048 tempbuffer[2] = (UChar)V; 1049 @@ -3984,16 +3989,30 @@ 1050 } 1051 source->writableBuffer.releaseBuffer(tempbufferLength); 1052 1053 - /* 1054 - Indicate where to continue in main input string after exhausting 1055 - the writableBuffer 1056 - */ 1057 + // Indicate where to continue in main input string after exhausting the writableBuffer 1058 if (source->pos == source->string) { 1059 + jamoOffset = 0; 1060 source->fcdPosition = NULL; 1061 } else { 1062 + jamoOffset = source->pos - source->string; 1063 source->fcdPosition = source->pos-1; 1064 } 1065 + 1066 + // Append offsets for the additional chars 1067 + // (not the 0, and not the L whose offsets match the original Hangul) 1068 + int32_t jamoRemaining = tempbufferLength - 2; 1069 + jamoOffset++; // appended offsets should match end of original Hangul 1070 + while (jamoRemaining-- > 0) { 1071 + source->appendOffset(jamoOffset, *status); 1072 + } 1073 1074 + source->offsetRepeatValue = jamoOffset; 1075 + 1076 + source->offsetReturn = source->offsetStore - 1; 1077 + if (source->offsetReturn == source->offsetBuffer) { 1078 + source->offsetStore = source->offsetBuffer; 1079 + } 1080 + 1081 source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength; 1082 source->origFlags = source->flags; 1083 source->flags |= UCOL_ITER_INNORMBUF; 1084