Home | History | Annotate | Download | only in patches
      1 Index: source/test/cintltst/usrchtst.c
      2 ===================================================================
      3 --- source/test/cintltst/usrchtst.c	(revision 75773)
      4 +++ source/test/cintltst/usrchtst.c	(working copy)
      5 @@ -1,5 +1,5 @@
      6  /********************************************************************
      7 - * Copyright (c) 2001-2010 International Business Machines 
      8 + * Copyright (c) 2001-2011 International Business Machines 
      9   * Corporation and others. All Rights Reserved.
     10   ********************************************************************
     11   * File usrchtst.c
     12 @@ -2553,7 +2553,173 @@
     13      ucol_close(coll);
     14  }
     15  
     16 +/**
     17 +* TestUsingSearchCollator
     18 +*/
     19  
     20 +#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
     21 +
     22 +typedef struct {
     23 +    const UChar *   pattern;
     24 +    const int32_t * offsets;
     25 +    int32_t         offsetsLen;
     26 +} PatternAndOffsets;
     27 +
     28 +static const UChar scKoText[] = {
     29 +       0x0020,
     30 +/*01*/ 0xAC00, 0x0020,                         /* simple LV Hangul */
     31 +/*03*/ 0xAC01, 0x0020,                         /* simple LVT Hangul */
     32 +/*05*/ 0xAC0F, 0x0020,                         /* LVTT, last jamo expands for search */
     33 +/*07*/ 0xAFFF, 0x0020,                         /* LLVVVTT, every jamo expands for search */
     34 +/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020,         /* 0xAC01 as conjoining jamo */
     35 +/*13*/ 0x1100, 0x1161, 0x1100, 0x0020,         /* 0xAC01 as basic conjoining jamo (per search rules) */
     36 +/*17*/ 0x3131, 0x314F, 0x3131, 0x0020,         /* 0xAC01 as compatibility jamo */
     37 +/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020,         /* 0xAC0F as conjoining jamo; last expands for search */
     38 +/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */
     39 +/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020,         /* 0xAFFF as conjoining jamo; all expand for search */
     40 +/*34*/ 0x00E6, 0x0020,                         /* small letter ae, expands */
     41 +/*36*/ 0x1E4D, 0x0020,                         /* small letter o with tilde and acute, decomposes */
     42 +       0
     43 +};
     44 +
     45 +static const UChar scKoPat0[] = { 0xAC01, 0 };
     46 +static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */
     47 +static const UChar scKoPat2[] = { 0xAC0F, 0 };
     48 +static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */
     49 +static const UChar scKoPat4[] = { 0xAFFF, 0 };
     50 +static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */
     51 +
     52 +static const int32_t scKoSrchOff01[] = { 3,  9, 13 };
     53 +static const int32_t scKoSrchOff23[] = { 5, 21, 25 };
     54 +static const int32_t scKoSrchOff45[] = { 7, 30     };
     55 +
     56 +static const PatternAndOffsets scKoSrchPatternsOffsets[] = {
     57 +    { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
     58 +    { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
     59 +    { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
     60 +    { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
     61 +    { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
     62 +    { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
     63 +    { NULL,     NULL,          0                           }
     64 +};
     65 +
     66 +static const int32_t scKoStndOff01[] = { 3,  9 };
     67 +static const int32_t scKoStndOff2[]  = { 5, 21 };
     68 +static const int32_t scKoStndOff3[]  = { 25    };
     69 +static const int32_t scKoStndOff45[] = { 7, 30 };
     70 +
     71 +static const PatternAndOffsets scKoStndPatternsOffsets[] = {
     72 +    { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
     73 +    { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
     74 +    { scKoPat2, scKoStndOff2,  ARRAY_LENGTH(scKoStndOff2)  },
     75 +    { scKoPat3, scKoStndOff3,  ARRAY_LENGTH(scKoStndOff3)  },
     76 +    { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
     77 +    { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
     78 +    { NULL,     NULL,          0                           }
     79 +};
     80 +
     81 +typedef struct {
     82 +    const char *  locale;
     83 +    const UChar * text;
     84 +    const PatternAndOffsets * patternsAndOffsets;
     85 +} TUSCItem;
     86 +
     87 +static const TUSCItem tuscItems[] = {
     88 +    { "root",                  scKoText, scKoStndPatternsOffsets },
     89 +    { "root@collation=search", scKoText, scKoSrchPatternsOffsets },
     90 +    { "ko@collation=search",   scKoText, scKoSrchPatternsOffsets },
     91 +    { NULL,                    NULL,     NULL                    }
     92 +};
     93 +
     94 +static const UChar dummyPat[] = { 0x0061, 0 };
     95 +
     96 +static void TestUsingSearchCollator(void)
     97 +{
     98 +    const TUSCItem * tuscItemPtr;
     99 +    for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) {
    100 +        UErrorCode status = U_ZERO_ERROR;
    101 +        UCollator* ucol = ucol_open(tuscItemPtr->locale, &status);
    102 +        if ( U_SUCCESS(status) ) {
    103 +            UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status);
    104 +            if ( U_SUCCESS(status) ) {
    105 +                const PatternAndOffsets * patternsOffsetsPtr;
    106 +                for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {
    107 +                    usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status);
    108 +                    if ( U_SUCCESS(status) ) {
    109 +                        int32_t offset;
    110 +                        const int32_t * nextOffsetPtr;
    111 +                        const int32_t * limitOffsetPtr;
    112 +
    113 +                        usearch_reset(usrch);
    114 +                        nextOffsetPtr = patternsOffsetsPtr->offsets;
    115 +                        limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
    116 +                        while (TRUE) {
    117 +                            offset = usearch_next(usrch, &status);
    118 +                            if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
    119 +                                break;
    120 +                            }
    121 +                            if ( nextOffsetPtr < limitOffsetPtr ) {
    122 +                                 if (offset != *nextOffsetPtr) {
    123 +                                     log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
    124 +                                     nextOffsetPtr = limitOffsetPtr;
    125 +                                     break;
    126 +                                 }
    127 +                                 nextOffsetPtr++;
    128 +                            } else {
    129 +                                log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale );
    130 +                            }
    131 +                        }
    132 +                        if ( U_FAILURE(status) ) {
    133 +                            log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
    134 +                        } else if ( nextOffsetPtr < limitOffsetPtr ) {
    135 +                            log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale );
    136 +                        }
    137 +
    138 +                        status = U_ZERO_ERROR;
    139 +                        usearch_reset(usrch);
    140 +                        nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
    141 +                        limitOffsetPtr = patternsOffsetsPtr->offsets;
    142 +                        while (TRUE) {
    143 +                            offset = usearch_previous(usrch, &status);
    144 +                            if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
    145 +                                break;
    146 +                            }
    147 +                            if ( nextOffsetPtr > limitOffsetPtr ) {
    148 +                                nextOffsetPtr--;
    149 +                                if (offset != *nextOffsetPtr) {
    150 +                                     log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
    151 +                                     nextOffsetPtr = limitOffsetPtr;
    152 +                                      break;
    153 +                                }
    154 +                            } else {
    155 +                                log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale );
    156 +                            }
    157 +                        }
    158 +                        if ( U_FAILURE(status) ) {
    159 +                            log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
    160 +                        } else if ( nextOffsetPtr > limitOffsetPtr ) {
    161 +                            log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale );
    162 +                        }
    163 +
    164 +                    } else {
    165 +                        log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
    166 +                    }
    167 +                }
    168 +                usearch_close(usrch);
    169 +            } else {
    170 +                log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
    171 +            }
    172 +            ucol_close(ucol);
    173 +        } else {
    174 +            log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
    175 +        }
    176 +    }
    177 +}
    178 +
    179 +/**
    180 +* addSearchTest
    181 +*/
    182 +
    183  void addSearchTest(TestNode** root)
    184  {
    185      addTest(root, &TestStart, "tscoll/usrchtst/TestStart");
    186 @@ -2608,6 +2774,7 @@
    187      addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
    188  	addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
    189      addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");
    190 +    addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator");
    191  }
    192  
    193  #endif /* #if !UCONFIG_NO_COLLATION */
    194 Index: source/test/cintltst/citertst.c
    195 ===================================================================
    196 --- source/test/cintltst/citertst.c	(revision 75773)
    197 +++ source/test/cintltst/citertst.c	(working copy)
    198 @@ -1,6 +1,6 @@
    199  /********************************************************************
    200   * COPYRIGHT:
    201 - * Copyright (c) 1997-2010, International Business Machines Corporation and
    202 + * Copyright (c) 1997-2011, International Business Machines Corporation and
    203   * others. All Rights Reserved.
    204   ********************************************************************/
    205  /********************************************************************************
    206 @@ -22,6 +22,7 @@
    207  #if !UCONFIG_NO_COLLATION
    208  
    209  #include "unicode/ucol.h"
    210 +#include "unicode/ucoleitr.h"
    211  #include "unicode/uloc.h"
    212  #include "unicode/uchar.h"
    213  #include "unicode/ustring.h"
    214 @@ -58,6 +59,7 @@
    215      addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
    216      addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
    217      addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
    218 +    addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
    219  }
    220  
    221  /* The locales we support */
    222 @@ -2017,4 +2019,141 @@
    223      T_FileStream_close(file);
    224  }
    225  
    226 +/**
    227 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
    228 +* normalization on AND jamo tailoring, among other things.
    229 +*/
    230 +static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
    231 +    0x0020, 0xAC00,                 /* simple LV Hangul */
    232 +    0x0020, 0xAC01,                 /* simple LVT Hangul */
    233 +    0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
    234 +    0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
    235 +    0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
    236 +    0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
    237 +    0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
    238 +    0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
    239 +    0x0020, 0x00E6,                 /* small letter ae, expands */
    240 +    0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
    241 +    0x0020
    242 +};
    243 +enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
    244 +
    245 +static const int32_t rootStandardOffsets[] = {
    246 +    0,  1,2,
    247 +    2,  3,4,4,
    248 +    4,  5,6,6,
    249 +    6,  7,8,8,
    250 +    8,  9,10,11,
    251 +    12, 13,14,15,
    252 +    16, 17,18,19,
    253 +    20, 21,22,23,
    254 +    24, 25,26,26,26,
    255 +    26, 27,28,28,
    256 +    28,
    257 +    29
    258 +};
    259 +enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
    260 +
    261 +static const int32_t rootSearchOffsets[] = {
    262 +    0,  1,2,
    263 +    2,  3,4,4,
    264 +    4,  5,6,6,6,
    265 +    6,  7,8,8,8,8,8,8,
    266 +    8,  9,10,11,
    267 +    12, 13,14,15,
    268 +    16, 17,18,19,20,
    269 +    20, 21,22,22,23,23,23,24,
    270 +    24, 25,26,26,26,
    271 +    26, 27,28,28,
    272 +    28,
    273 +    29
    274 +};
    275 +enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
    276 +
    277 +typedef struct {
    278 +    const char *    locale;
    279 +    const int32_t * offsets;
    280 +    int32_t         offsetsLen;
    281 +} TSCEItem;
    282 +
    283 +static const TSCEItem tsceItems[] = {
    284 +    { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
    285 +    { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
    286 +    { NULL,                    NULL,                0                        }
    287 +};
    288 +
    289 +static void TestSearchCollatorElements(void)
    290 +{
    291 +    const TSCEItem * tsceItemPtr;
    292 +    for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
    293 +        UErrorCode status = U_ZERO_ERROR;
    294 +        UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
    295 +        if ( U_SUCCESS(status) ) {
    296 +            UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
    297 +            if ( U_SUCCESS(status) ) {
    298 +                int32_t offset, element;
    299 +                const int32_t * nextOffsetPtr;
    300 +                const int32_t * limitOffsetPtr;
    301 +
    302 +                nextOffsetPtr = tsceItemPtr->offsets;
    303 +                limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
    304 +                do {
    305 +                    offset = ucol_getOffset(uce);
    306 +                    element = ucol_next(uce, &status);
    307 +                    if ( element == 0 ) {
    308 +                        log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
    309 +                    }
    310 +                    if ( nextOffsetPtr < limitOffsetPtr ) {
    311 +                        if (offset != *nextOffsetPtr) {
    312 +                            log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
    313 +                                                            tsceItemPtr->locale, *nextOffsetPtr, offset );
    314 +                            nextOffsetPtr = limitOffsetPtr;
    315 +                            break;
    316 +                        }
    317 +                        nextOffsetPtr++;
    318 +                    } else {
    319 +                        log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
    320 +                    }
    321 +                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
    322 +                if ( nextOffsetPtr < limitOffsetPtr ) {
    323 +                    log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
    324 +                }
    325 +
    326 +                ucol_setOffset(uce, kLen_tsceText, &status);
    327 +                status = U_ZERO_ERROR;
    328 +                nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
    329 +                limitOffsetPtr = tsceItemPtr->offsets;
    330 +                do {
    331 +                    offset = ucol_getOffset(uce);
    332 +                    element = ucol_previous(uce, &status);
    333 +                    if ( element == 0 ) {
    334 +                        log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
    335 +                    }
    336 +                    if ( nextOffsetPtr > limitOffsetPtr ) {
    337 +                        nextOffsetPtr--;
    338 +                        if (offset != *nextOffsetPtr) {
    339 +                            log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
    340 +                                                                tsceItemPtr->locale, *nextOffsetPtr, offset );
    341 +                            nextOffsetPtr = limitOffsetPtr;
    342 +                            break;
    343 +                        }
    344 +                   } else {
    345 +                        log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
    346 +                    }
    347 +                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
    348 +                if ( nextOffsetPtr > limitOffsetPtr ) {
    349 +                    log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
    350 +                }
    351 +
    352 +                ucol_closeElements(uce);
    353 +            } else {
    354 +                log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
    355 +            }
    356 +            ucol_close(ucol);
    357 +        } else {
    358 +            log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
    359 +        }
    360 +    }
    361 +}
    362 +
    363  #endif /* #if !UCONFIG_NO_COLLATION */
    364 Index: source/test/cintltst/citertst.h
    365 ===================================================================
    366 --- source/test/cintltst/citertst.h	(revision 75773)
    367 +++ source/test/cintltst/citertst.h	(working copy)
    368 @@ -1,6 +1,6 @@
    369  /********************************************************************
    370   * COPYRIGHT: 
    371 - * Copyright (c) 1997-2008, International Business Machines Corporation and
    372 + * Copyright (c) 1997-2008,2011, International Business Machines Corporation and
    373   * others. All Rights Reserved.
    374   ********************************************************************/
    375  /********************************************************************************
    376 @@ -101,6 +101,11 @@
    377  * Bound checkings.
    378  */
    379  static void TestSortKeyValidity(void);
    380 +/**
    381 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
    382 +* normalization on AND jamo tailoring, among other things.
    383 +*/
    384 +static void TestSearchCollatorElements(void);
    385  
    386  /*------------------------------------------------------------------------
    387   Internal utilities
    388 Index: source/i18n/ucol.cpp
    389 ===================================================================
    390 --- source/i18n/ucol.cpp	(revision 75773)
    391 +++ source/i18n/ucol.cpp	(working copy)
    392 @@ -1,6 +1,6 @@
    393  /*
    394  *******************************************************************************
    395 -*   Copyright (C) 1996-2010, International Business Machines
    396 +*   Copyright (C) 1996-2011, International Business Machines
    397  *   Corporation and others.  All Rights Reserved.
    398  *******************************************************************************
    399  *   file name:  ucol.cpp
    400 @@ -1444,173 +1444,176 @@
    401      UChar ch = 0;
    402      collationSource->offsetReturn = NULL;
    403  
    404 -    for (;;)                           /* Loop handles case when incremental normalize switches   */
    405 -    {                                  /*   to or from the side buffer / original string, and we  */
    406 -        /*   need to start again to get the next character.        */
    407 +    do {
    408 +        for (;;)                           /* Loop handles case when incremental normalize switches   */
    409 +        {                                  /*   to or from the side buffer / original string, and we  */
    410 +            /*   need to start again to get the next character.        */
    411  
    412 -        if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
    413 -        {
    414 -            // The source string is null terminated and we're not working from the side buffer,
    415 -            //   and we're not normalizing.  This is the fast path.
    416 -            //   (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
    417 -            ch = *collationSource->pos++;
    418 -            if (ch != 0) {
    419 -                break;
    420 +            if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
    421 +            {
    422 +                // The source string is null terminated and we're not working from the side buffer,
    423 +                //   and we're not normalizing.  This is the fast path.
    424 +                //   (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
    425 +                ch = *collationSource->pos++;
    426 +                if (ch != 0) {
    427 +                    break;
    428 +                }
    429 +                else {
    430 +                    return UCOL_NO_MORE_CES;
    431 +                }
    432              }
    433 -            else {
    434 -                return UCOL_NO_MORE_CES;
    435 -            }
    436 -        }
    437  
    438 -        if (collationSource->flags & UCOL_ITER_HASLEN) {
    439 -            // Normal path for strings when length is specified.
    440 -            //   (We can't be in side buffer because it is always null terminated.)
    441 -            if (collationSource->pos >= collationSource->endp) {
    442 -                // Ran off of the end of the main source string.  We're done.
    443 -                return UCOL_NO_MORE_CES;
    444 +            if (collationSource->flags & UCOL_ITER_HASLEN) {
    445 +                // Normal path for strings when length is specified.
    446 +                //   (We can't be in side buffer because it is always null terminated.)
    447 +                if (collationSource->pos >= collationSource->endp) {
    448 +                    // Ran off of the end of the main source string.  We're done.
    449 +                    return UCOL_NO_MORE_CES;
    450 +                }
    451 +                ch = *collationSource->pos++;
    452              }
    453 -            ch = *collationSource->pos++;
    454 -        }
    455 -        else if(collationSource->flags & UCOL_USE_ITERATOR) {
    456 -            UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
    457 -            if(iterCh == U_SENTINEL) {
    458 -                return UCOL_NO_MORE_CES;
    459 -            }
    460 -            ch = (UChar)iterCh;
    461 -        }
    462 -        else
    463 -        {
    464 -            // Null terminated string.
    465 -            ch = *collationSource->pos++;
    466 -            if (ch == 0) {
    467 -                // Ran off end of buffer.
    468 -                if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
    469 -                    // Ran off end of main string. backing up one character.
    470 -                    collationSource->pos--;
    471 +            else if(collationSource->flags & UCOL_USE_ITERATOR) {
    472 +                UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
    473 +                if(iterCh == U_SENTINEL) {
    474                      return UCOL_NO_MORE_CES;
    475                  }
    476 -                else
    477 -                {
    478 -                    // Hit null in the normalize side buffer.
    479 -                    // Usually this means the end of the normalized data,
    480 -                    // except for one odd case: a null followed by combining chars,
    481 -                    //   which is the case if we are at the start of the buffer.
    482 -                    if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
    483 -                        break;
    484 +                ch = (UChar)iterCh;
    485 +            }
    486 +            else
    487 +            {
    488 +                // Null terminated string.
    489 +                ch = *collationSource->pos++;
    490 +                if (ch == 0) {
    491 +                    // Ran off end of buffer.
    492 +                    if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
    493 +                        // Ran off end of main string. backing up one character.
    494 +                        collationSource->pos--;
    495 +                        return UCOL_NO_MORE_CES;
    496                      }
    497 +                    else
    498 +                    {
    499 +                        // Hit null in the normalize side buffer.
    500 +                        // Usually this means the end of the normalized data,
    501 +                        // except for one odd case: a null followed by combining chars,
    502 +                        //   which is the case if we are at the start of the buffer.
    503 +                        if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
    504 +                            break;
    505 +                        }
    506  
    507 -                    //  Null marked end of side buffer.
    508 -                    //   Revert to the main string and
    509 -                    //   loop back to top to try again to get a character.
    510 -                    collationSource->pos   = collationSource->fcdPosition;
    511 -                    collationSource->flags = collationSource->origFlags;
    512 -                    continue;
    513 +                        //  Null marked end of side buffer.
    514 +                        //   Revert to the main string and
    515 +                        //   loop back to top to try again to get a character.
    516 +                        collationSource->pos   = collationSource->fcdPosition;
    517 +                        collationSource->flags = collationSource->origFlags;
    518 +                        continue;
    519 +                    }
    520                  }
    521              }
    522 -        }
    523  
    524 -        if(collationSource->flags&UCOL_HIRAGANA_Q) {
    525 -            /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
    526 -             * based on whether the previous codepoint was Hiragana or Katakana.
    527 -             */
    528 -            if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
    529 -                    ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
    530 -                collationSource->flags |= UCOL_WAS_HIRAGANA;
    531 -            } else {
    532 -                collationSource->flags &= ~UCOL_WAS_HIRAGANA;
    533 +            if(collationSource->flags&UCOL_HIRAGANA_Q) {
    534 +                /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
    535 +                 * based on whether the previous codepoint was Hiragana or Katakana.
    536 +                 */
    537 +                if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
    538 +                        ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
    539 +                    collationSource->flags |= UCOL_WAS_HIRAGANA;
    540 +                } else {
    541 +                    collationSource->flags &= ~UCOL_WAS_HIRAGANA;
    542 +                }
    543              }
    544 -        }
    545  
    546 -        // We've got a character.  See if there's any fcd and/or normalization stuff to do.
    547 -        //    Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
    548 -        if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
    549 -            break;
    550 -        }
    551 +            // We've got a character.  See if there's any fcd and/or normalization stuff to do.
    552 +            //    Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
    553 +            if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
    554 +                break;
    555 +            }
    556  
    557 -        if (collationSource->fcdPosition >= collationSource->pos) {
    558 -            // An earlier FCD check has already covered the current character.
    559 -            // We can go ahead and process this char.
    560 -            break;
    561 -        }
    562 -
    563 -        if (ch < ZERO_CC_LIMIT_ ) {
    564 -            // Fast fcd safe path.  Trailing combining class == 0.  This char is OK.
    565 -            break;
    566 -        }
    567 -
    568 -        if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
    569 -            // We need to peek at the next character in order to tell if we are FCD
    570 -            if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
    571 -                // We are at the last char of source string.
    572 -                //  It is always OK for FCD check.
    573 +            if (collationSource->fcdPosition >= collationSource->pos) {
    574 +                // An earlier FCD check has already covered the current character.
    575 +                // We can go ahead and process this char.
    576                  break;
    577              }
    578  
    579 -            // Not at last char of source string (or we'll check against terminating null).  Do the FCD fast test
    580 -            if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
    581 +            if (ch < ZERO_CC_LIMIT_ ) {
    582 +                // Fast fcd safe path.  Trailing combining class == 0.  This char is OK.
    583                  break;
    584              }
    585 -        }
    586  
    587 +            if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
    588 +                // We need to peek at the next character in order to tell if we are FCD
    589 +                if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
    590 +                    // We are at the last char of source string.
    591 +                    //  It is always OK for FCD check.
    592 +                    break;
    593 +                }
    594  
    595 -        // Need a more complete FCD check and possible normalization.
    596 -        if (collIterFCD(collationSource)) {
    597 -            collIterNormalize(collationSource);
    598 -        }
    599 -        if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
    600 -            //  No normalization was needed.  Go ahead and process the char we already had.
    601 -            break;
    602 -        }
    603 +                // Not at last char of source string (or we'll check against terminating null).  Do the FCD fast test
    604 +                if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
    605 +                    break;
    606 +                }
    607 +            }
    608  
    609 -        // Some normalization happened.  Next loop iteration will pick up a char
    610 -        //   from the normalization buffer.
    611  
    612 -    }   // end for (;;)
    613 +            // Need a more complete FCD check and possible normalization.
    614 +            if (collIterFCD(collationSource)) {
    615 +                collIterNormalize(collationSource);
    616 +            }
    617 +            if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
    618 +                //  No normalization was needed.  Go ahead and process the char we already had.
    619 +                break;
    620 +            }
    621  
    622 +            // Some normalization happened.  Next loop iteration will pick up a char
    623 +            //   from the normalization buffer.
    624  
    625 -    if (ch <= 0xFF) {
    626 -        /*  For latin-1 characters we never need to fall back to the UCA table        */
    627 -        /*    because all of the UCA data is replicated in the latinOneMapping array  */
    628 -        order = coll->latinOneMapping[ch];
    629 -        if (order > UCOL_NOT_FOUND) {
    630 -            order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
    631 +        }   // end for (;;)
    632 +
    633 +
    634 +        if (ch <= 0xFF) {
    635 +            /*  For latin-1 characters we never need to fall back to the UCA table        */
    636 +            /*    because all of the UCA data is replicated in the latinOneMapping array  */
    637 +            order = coll->latinOneMapping[ch];
    638 +            if (order > UCOL_NOT_FOUND) {
    639 +                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
    640 +            }
    641          }
    642 -    }
    643 -    else
    644 -    {
    645 -        // Always use UCA for Han, Hangul
    646 -        // (Han extension A is before main Han block)
    647 -        // **** Han compatibility chars ?? ****
    648 -        if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
    649 -            (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
    650 -            if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
    651 -                // between the two target ranges; do normal lookup
    652 -                // **** this range is YI, Modifier tone letters, ****
    653 -                // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
    654 -                // **** Latin-D might be tailored, so we need to ****
    655 -                // **** do the normal lookup for these guys.     ****
    656 +        else
    657 +        {
    658 +            // Always use UCA for Han, Hangul
    659 +            // (Han extension A is before main Han block)
    660 +            // **** Han compatibility chars ?? ****
    661 +            if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
    662 +                (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
    663 +                if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
    664 +                    // between the two target ranges; do normal lookup
    665 +                    // **** this range is YI, Modifier tone letters, ****
    666 +                    // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
    667 +                    // **** Latin-D might be tailored, so we need to ****
    668 +                    // **** do the normal lookup for these guys.     ****
    669 +                    order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    670 +                } else {
    671 +                    // in one of the target ranges; use UCA
    672 +                    order = UCOL_NOT_FOUND;
    673 +                }
    674 +            } else {
    675                  order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    676 -            } else {
    677 -                // in one of the target ranges; use UCA
    678 -                order = UCOL_NOT_FOUND;
    679              }
    680 -        } else {
    681 -            order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    682 -        }
    683  
    684 -        if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
    685 -            order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
    686 -        }
    687 +            if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
    688 +                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
    689 +            }
    690  
    691 -        if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
    692 -            /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
    693 -            order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
    694 +            if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
    695 +                /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
    696 +                order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
    697  
    698 -            if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
    699 -                order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
    700 +                if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
    701 +                    order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
    702 +                }
    703              }
    704          }
    705 -    }
    706 +    } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
    707 +
    708      if(order == UCOL_NOT_FOUND) {
    709          order = getImplicit(ch, collationSource);
    710      }
    711 @@ -1958,161 +1961,163 @@
    712      else {
    713          UChar ch = 0;
    714  
    715 -        /*
    716 -        Loop handles case when incremental normalize switches to or from the
    717 -        side buffer / original string, and we need to start again to get the
    718 -        next character.
    719 -        */
    720 -        for (;;) {
    721 -            if (data->flags & UCOL_ITER_HASLEN) {
    722 -                /*
    723 -                Normal path for strings when length is specified.
    724 -                Not in side buffer because it is always null terminated.
    725 -                */
    726 -                if (data->pos <= data->string) {
    727 -                    /* End of the main source string */
    728 -                    return UCOL_NO_MORE_CES;
    729 -                }
    730 -                data->pos --;
    731 -                ch = *data->pos;
    732 -            }
    733 -            // we are using an iterator to go back. Pray for us!
    734 -            else if (data->flags & UCOL_USE_ITERATOR) {
    735 -              UChar32 iterCh = data->iterator->previous(data->iterator);
    736 -              if(iterCh == U_SENTINEL) {
    737 -                return UCOL_NO_MORE_CES;
    738 -              } else {
    739 -                ch = (UChar)iterCh;
    740 -              }
    741 -            }
    742 -            else {
    743 -                data->pos --;
    744 -                ch = *data->pos;
    745 -                /* we are in the side buffer. */
    746 -                if (ch == 0) {
    747 +        do {
    748 +            /*
    749 +            Loop handles case when incremental normalize switches to or from the
    750 +            side buffer / original string, and we need to start again to get the
    751 +            next character.
    752 +            */
    753 +            for (;;) {
    754 +                if (data->flags & UCOL_ITER_HASLEN) {
    755                      /*
    756 -                    At the start of the normalize side buffer.
    757 -                    Go back to string.
    758 -                    Because pointer points to the last accessed character,
    759 -                    hence we have to increment it by one here.
    760 +                    Normal path for strings when length is specified.
    761 +                    Not in side buffer because it is always null terminated.
    762                      */
    763 -                    data->flags = data->origFlags;
    764 -                    data->offsetRepeatValue = 0;
    765 - 
    766 -                     if (data->fcdPosition == NULL) {
    767 -                        data->pos = data->string;
    768 +                    if (data->pos <= data->string) {
    769 +                        /* End of the main source string */
    770                          return UCOL_NO_MORE_CES;
    771                      }
    772 -                    else {
    773 -                        data->pos   = data->fcdPosition + 1;
    774 +                    data->pos --;
    775 +                    ch = *data->pos;
    776 +                }
    777 +                // we are using an iterator to go back. Pray for us!
    778 +                else if (data->flags & UCOL_USE_ITERATOR) {
    779 +                  UChar32 iterCh = data->iterator->previous(data->iterator);
    780 +                  if(iterCh == U_SENTINEL) {
    781 +                    return UCOL_NO_MORE_CES;
    782 +                  } else {
    783 +                    ch = (UChar)iterCh;
    784 +                  }
    785 +                }
    786 +                else {
    787 +                    data->pos --;
    788 +                    ch = *data->pos;
    789 +                    /* we are in the side buffer. */
    790 +                    if (ch == 0) {
    791 +                        /*
    792 +                        At the start of the normalize side buffer.
    793 +                        Go back to string.
    794 +                        Because pointer points to the last accessed character,
    795 +                        hence we have to increment it by one here.
    796 +                        */
    797 +                        data->flags = data->origFlags;
    798 +                        data->offsetRepeatValue = 0;
    799 +
    800 +                         if (data->fcdPosition == NULL) {
    801 +                            data->pos = data->string;
    802 +                            return UCOL_NO_MORE_CES;
    803 +                        }
    804 +                        else {
    805 +                            data->pos   = data->fcdPosition + 1;
    806 +                        }
    807 +
    808 +                       continue;
    809                      }
    810 -                    
    811 -                   continue;
    812                  }
    813 -            }
    814  
    815 -            if(data->flags&UCOL_HIRAGANA_Q) {
    816 -              if(ch>=0x3040 && ch<=0x309f) {
    817 -                data->flags |= UCOL_WAS_HIRAGANA;
    818 -              } else {
    819 -                data->flags &= ~UCOL_WAS_HIRAGANA;
    820 -              }
    821 -            }
    822 +                if(data->flags&UCOL_HIRAGANA_Q) {
    823 +                  if(ch>=0x3040 && ch<=0x309f) {
    824 +                    data->flags |= UCOL_WAS_HIRAGANA;
    825 +                  } else {
    826 +                    data->flags &= ~UCOL_WAS_HIRAGANA;
    827 +                  }
    828 +                }
    829  
    830 -            /*
    831 -            * got a character to determine if there's fcd and/or normalization
    832 -            * stuff to do.
    833 -            * if the current character is not fcd.
    834 -            * if current character is at the start of the string
    835 -            * Trailing combining class == 0.
    836 -            * Note if pos is in the writablebuffer, norm is always 0
    837 -            */
    838 -            if (ch < ZERO_CC_LIMIT_ ||
    839 -              // this should propel us out of the loop in the iterator case
    840 -                (data->flags & UCOL_ITER_NORM) == 0 ||
    841 -                (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
    842 -                || data->string == data->pos) {
    843 -                break;
    844 -            }
    845 -
    846 -            if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
    847 -                /* if next character is FCD */
    848 -                if (data->pos == data->string) {
    849 -                    /* First char of string is always OK for FCD check */
    850 +                /*
    851 +                * got a character to determine if there's fcd and/or normalization
    852 +                * stuff to do.
    853 +                * if the current character is not fcd.
    854 +                * if current character is at the start of the string
    855 +                * Trailing combining class == 0.
    856 +                * Note if pos is in the writablebuffer, norm is always 0
    857 +                */
    858 +                if (ch < ZERO_CC_LIMIT_ ||
    859 +                  // this should propel us out of the loop in the iterator case
    860 +                    (data->flags & UCOL_ITER_NORM) == 0 ||
    861 +                    (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
    862 +                    || data->string == data->pos) {
    863                      break;
    864                  }
    865  
    866 -                /* Not first char of string, do the FCD fast test */
    867 -                if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
    868 +                if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
    869 +                    /* if next character is FCD */
    870 +                    if (data->pos == data->string) {
    871 +                        /* First char of string is always OK for FCD check */
    872 +                        break;
    873 +                    }
    874 +
    875 +                    /* Not first char of string, do the FCD fast test */
    876 +                    if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
    877 +                        break;
    878 +                    }
    879 +                }
    880 +
    881 +                /* Need a more complete FCD check and possible normalization. */
    882 +                if (collPrevIterFCD(data)) {
    883 +                    collPrevIterNormalize(data);
    884 +                }
    885 +
    886 +                if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
    887 +                    /*  No normalization. Go ahead and process the char. */
    888                      break;
    889                  }
    890 -            }
    891  
    892 -            /* Need a more complete FCD check and possible normalization. */
    893 -            if (collPrevIterFCD(data)) {
    894 -                collPrevIterNormalize(data);
    895 +                /*
    896 +                Some normalization happened.
    897 +                Next loop picks up a char from the normalization buffer.
    898 +                */
    899              }
    900  
    901 -            if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
    902 -                /*  No normalization. Go ahead and process the char. */
    903 -                break;
    904 -            }
    905 -
    906 -            /*
    907 -            Some normalization happened.
    908 -            Next loop picks up a char from the normalization buffer.
    909 +            /* attempt to handle contractions, after removal of the backwards
    910 +            contraction
    911              */
    912 -        }
    913 -
    914 -        /* attempt to handle contractions, after removal of the backwards
    915 -        contraction
    916 -        */
    917 -        if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
    918 -            result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
    919 -        } else {
    920 -            if (ch <= 0xFF) {
    921 -                result = coll->latinOneMapping[ch];
    922 -            }
    923 -            else {
    924 -                // Always use UCA for [3400..9FFF], [AC00..D7AF]
    925 -                // **** [FA0E..FA2F] ?? ****
    926 -                if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
    927 -                    (ch >= 0x3400 && ch <= 0xD7AF)) {
    928 -                    if (ch > 0x9FFF && ch < 0xAC00) {
    929 -                        // between the two target ranges; do normal lookup
    930 -                        // **** this range is YI, Modifier tone letters, ****
    931 -                        // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
    932 -                        // **** Latin-D might be tailored, so we need to ****
    933 -                        // **** do the normal lookup for these guys.     ****
    934 -                         result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    935 +            if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
    936 +                result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
    937 +            } else {
    938 +                if (ch <= 0xFF) {
    939 +                    result = coll->latinOneMapping[ch];
    940 +                }
    941 +                else {
    942 +                    // Always use UCA for [3400..9FFF], [AC00..D7AF]
    943 +                    // **** [FA0E..FA2F] ?? ****
    944 +                    if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
    945 +                        (ch >= 0x3400 && ch <= 0xD7AF)) {
    946 +                        if (ch > 0x9FFF && ch < 0xAC00) {
    947 +                            // between the two target ranges; do normal lookup
    948 +                            // **** this range is YI, Modifier tone letters, ****
    949 +                            // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
    950 +                            // **** Latin-D might be tailored, so we need to ****
    951 +                            // **** do the normal lookup for these guys.     ****
    952 +                             result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    953 +                        } else {
    954 +                            result = UCOL_NOT_FOUND;
    955 +                        }
    956                      } else {
    957 -                        result = UCOL_NOT_FOUND;
    958 +                        result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    959                      }
    960 -                } else {
    961 -                    result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
    962                  }
    963 -            }
    964 -            if (result > UCOL_NOT_FOUND) {
    965 -                result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
    966 -            }
    967 -            if (result == UCOL_NOT_FOUND) { // Not found in master list
    968 -                if (!isAtStartPrevIterate(data) &&
    969 -                    ucol_contractionEndCP(ch, data->coll))
    970 -                {
    971 -                    result = UCOL_CONTRACTION;
    972 -                } else {
    973 -                    if(coll->UCA) {
    974 -                        result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
    975 +                if (result > UCOL_NOT_FOUND) {
    976 +                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
    977 +                }
    978 +                if (result == UCOL_NOT_FOUND) { // Not found in master list
    979 +                    if (!isAtStartPrevIterate(data) &&
    980 +                        ucol_contractionEndCP(ch, data->coll))
    981 +                    {
    982 +                        result = UCOL_CONTRACTION;
    983 +                    } else {
    984 +                        if(coll->UCA) {
    985 +                            result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
    986 +                        }
    987                      }
    988 -                }
    989  
    990 -                if (result > UCOL_NOT_FOUND) {
    991 -                    if(coll->UCA) {
    992 -                        result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
    993 +                    if (result > UCOL_NOT_FOUND) {
    994 +                        if(coll->UCA) {
    995 +                            result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
    996 +                        }
    997                      }
    998                  }
    999              }
   1000 -        }
   1001 +        } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
   1002  
   1003          if(result == UCOL_NOT_FOUND) {
   1004              result = getPrevImplicit(ch, data);
   1005 @@ -3193,6 +3198,7 @@
   1006                      // Since Hanguls pass the FCD check, it is
   1007                      // guaranteed that we won't be in
   1008                      // the normalization buffer if something like this happens
   1009 +
   1010                      // However, if we are using a uchar iterator and normalization
   1011                      // is ON, the Hangul that lead us here is going to be in that
   1012                      // normalization buffer. Here we want to restore the uchar
   1013 @@ -3201,6 +3207,7 @@
   1014                          source->flags = source->origFlags; // restore the iterator
   1015                          source->pos = NULL;
   1016                      }
   1017 +
   1018                      // Move Jamos into normalization buffer
   1019                      UChar *buffer = source->writableBuffer.getBuffer(4);
   1020                      int32_t bufferLength;
   1021 @@ -3214,8 +3221,9 @@
   1022                      }
   1023                      source->writableBuffer.releaseBuffer(bufferLength);
   1024  
   1025 -                    source->fcdPosition       = source->pos;   // Indicate where to continue in main input string
   1026 -                    //   after exhausting the writableBuffer
   1027 +                    // Indicate where to continue in main input string after exhausting the writableBuffer
   1028 +                    source->fcdPosition       = source->pos;
   1029 +
   1030                      source->pos   = source->writableBuffer.getTerminatedBuffer();
   1031                      source->origFlags   = source->flags;
   1032                      source->flags       |= UCOL_ITER_INNORMBUF;
   1033 @@ -3966,13 +3974,10 @@
   1034                      // Since Hanguls pass the FCD check, it is
   1035                      // guaranteed that we won't be in
   1036                      // the normalization buffer if something like this happens
   1037 +
   1038                      // Move Jamos into normalization buffer
   1039 -                    /*
   1040 -                    Move the Jamos into the
   1041 -                    normalization buffer
   1042 -                    */
   1043                      UChar *tempbuffer = source->writableBuffer.getBuffer(5);
   1044 -                    int32_t tempbufferLength;
   1045 +                    int32_t tempbufferLength, jamoOffset;
   1046                      tempbuffer[0] = 0;
   1047                      tempbuffer[1] = (UChar)L;
   1048                      tempbuffer[2] = (UChar)V;
   1049 @@ -3984,16 +3989,30 @@
   1050                      }
   1051                      source->writableBuffer.releaseBuffer(tempbufferLength);
   1052  
   1053 -                    /*
   1054 -                    Indicate where to continue in main input string after exhausting
   1055 -                    the writableBuffer
   1056 -                    */
   1057 +                    // Indicate where to continue in main input string after exhausting the writableBuffer
   1058                      if (source->pos  == source->string) {
   1059 +                        jamoOffset = 0;
   1060                          source->fcdPosition = NULL;
   1061                      } else {
   1062 +                        jamoOffset = source->pos - source->string;
   1063                          source->fcdPosition       = source->pos-1;
   1064                      }
   1065 +                    
   1066 +					// Append offsets for the additional chars
   1067 +					// (not the 0, and not the L whose offsets match the original Hangul)
   1068 +                    int32_t jamoRemaining = tempbufferLength - 2;
   1069 +                    jamoOffset++; // appended offsets should match end of original Hangul
   1070 +                    while (jamoRemaining-- > 0) {
   1071 +                        source->appendOffset(jamoOffset, *status);
   1072 +                    }
   1073  
   1074 +                    source->offsetRepeatValue = jamoOffset;
   1075 +
   1076 +                    source->offsetReturn = source->offsetStore - 1;
   1077 +                    if (source->offsetReturn == source->offsetBuffer) {
   1078 +                        source->offsetStore = source->offsetBuffer;
   1079 +                    }
   1080 +
   1081                      source->pos               = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
   1082                      source->origFlags         = source->flags;
   1083                      source->flags            |= UCOL_ITER_INNORMBUF;
   1084