Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1998-2012, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*
      7 * File utf8tst.c
      8 *
      9 * Modification History:
     10 *
     11 *   Date          Name        Description
     12 *   07/24/2000    Madhu       Creation
     13 *******************************************************************************
     14 */
     15 
     16 #include "unicode/utypes.h"
     17 #include "unicode/utf8.h"
     18 #include "cmemory.h"
     19 #include "cintltst.h"
     20 
     21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     22 
     23 /* lenient UTF-8 ------------------------------------------------------------ */
     24 
     25 /*
     26  * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate
     27  * code points with their "natural" encoding.
     28  * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of
     29  * single surrogates.
     30  *
     31  * This is not conformant with UTF-8.
     32  *
     33  * Supplementary code points may be encoded as pairs of 3-byte sequences, but
     34  * the macros below do not attempt to assemble such pairs.
     35  */
     36 
     37 #define L8_NEXT(s, i, length, c) { \
     38     (c)=(uint8_t)(s)[(i)++]; \
     39     if((c)>=0x80) { \
     40         if(U8_IS_LEAD(c)) { \
     41             (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \
     42         } else { \
     43             (c)=U_SENTINEL; \
     44         } \
     45     } \
     46 }
     47 
     48 #define L8_PREV(s, start, i, c) { \
     49     (c)=(uint8_t)(s)[--(i)]; \
     50     if((c)>=0x80) { \
     51         if((c)<=0xbf) { \
     52             (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \
     53         } else { \
     54             (c)=U_SENTINEL; \
     55         } \
     56     } \
     57 }
     58 
     59 /* -------------------------------------------------------------------------- */
     60 
     61 static void printUChars(const uint8_t *uchars, int16_t len);
     62 
     63 static void TestCodeUnitValues(void);
     64 static void TestCharLength(void);
     65 static void TestGetChar(void);
     66 static void TestNextPrevChar(void);
     67 static void TestNextPrevNonCharacters(void);
     68 static void TestNextPrevCharUnsafe(void);
     69 static void TestFwdBack(void);
     70 static void TestFwdBackUnsafe(void);
     71 static void TestSetChar(void);
     72 static void TestSetCharUnsafe(void);
     73 static void TestAppendChar(void);
     74 static void TestAppend(void);
     75 static void TestSurrogates(void);
     76 
     77 void addUTF8Test(TestNode** root);
     78 
     79 void
     80 addUTF8Test(TestNode** root)
     81 {
     82     addTest(root, &TestCodeUnitValues,          "utf8tst/TestCodeUnitValues");
     83     addTest(root, &TestCharLength,              "utf8tst/TestCharLength");
     84     addTest(root, &TestGetChar,                 "utf8tst/TestGetChar");
     85     addTest(root, &TestNextPrevChar,            "utf8tst/TestNextPrevChar");
     86     addTest(root, &TestNextPrevNonCharacters,   "utf8tst/TestNextPrevNonCharacters");
     87     addTest(root, &TestNextPrevCharUnsafe,      "utf8tst/TestNextPrevCharUnsafe");
     88     addTest(root, &TestFwdBack,                 "utf8tst/TestFwdBack");
     89     addTest(root, &TestFwdBackUnsafe,           "utf8tst/TestFwdBackUnsafe");
     90     addTest(root, &TestSetChar,                 "utf8tst/TestSetChar");
     91     addTest(root, &TestSetCharUnsafe,           "utf8tst/TestSetCharUnsafe");
     92     addTest(root, &TestAppendChar,              "utf8tst/TestAppendChar");
     93     addTest(root, &TestAppend,                  "utf8tst/TestAppend");
     94     addTest(root, &TestSurrogates,              "utf8tst/TestSurrogates");
     95 }
     96 
     97 static void TestCodeUnitValues()
     98 {
     99     static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
    100 
    101     int16_t i;
    102     for(i=0; i<LENGTHOF(codeunit); i++){
    103         uint8_t c=codeunit[i];
    104         log_verbose("Testing code unit value of %x\n", c);
    105         if(i<4){
    106             if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
    107                 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
    108                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
    109             }
    110         } else if(i< 8){
    111             if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
    112                 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
    113                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
    114             }
    115         } else if(i< 12){
    116             if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
    117                 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
    118                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
    119             }
    120         }
    121     }
    122 }
    123 
    124 static void TestCharLength()
    125 {
    126     static const uint32_t codepoint[]={
    127         1, 0x0061,
    128         1, 0x007f,
    129         2, 0x016f,
    130         2, 0x07ff,
    131         3, 0x0865,
    132         3, 0x20ac,
    133         4, 0x20402,
    134         4, 0x23456,
    135         4, 0x24506,
    136         4, 0x20402,
    137         4, 0x10402,
    138         3, 0xd7ff,
    139         3, 0xe000,
    140 
    141     };
    142 
    143     int16_t i;
    144     UBool multiple;
    145     for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){
    146         UChar32 c=codepoint[i+1];
    147         if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
    148               log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
    149         }else{
    150               log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c));
    151         }
    152         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
    153         if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
    154               log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
    155         }
    156     }
    157 }
    158 
    159 static void TestGetChar()
    160 {
    161     static const uint8_t input[]={
    162     /*  code unit,*/
    163         0x61,
    164         0x7f,
    165         0xe4,
    166         0xba,
    167         0x8c,
    168         0xF0,
    169         0x90,
    170         0x90,
    171         0x81,
    172         0xc0,
    173         0x65,
    174         0x31,
    175         0x9a,
    176         0xc9
    177     };
    178     static const UChar32 result[]={
    179     /*  codepoint-unsafe, codepoint-safe(not strict)  codepoint-safe(strict) */
    180         0x61,             0x61,                       0x61,
    181         0x7f,             0x7f,                       0x7f,
    182         0x4e8c,           0x4e8c,                     0x4e8c,
    183         0x4e8c,           0x4e8c,                     0x4e8c ,
    184         0x4e8c,           0x4e8c,                     0x4e8c,
    185         0x10401,          0x10401,                    0x10401 ,
    186         0x10401,          0x10401,                    0x10401 ,
    187         0x10401,          0x10401,                    0x10401 ,
    188         0x10401,          0x10401,                    0x10401,
    189         0x25,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
    190         0x65,             0x65,                       0x65,
    191         0x31,             0x31,                       0x31,
    192         0x31,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
    193         0x240,            UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1
    194     };
    195     uint16_t i=0;
    196     UChar32 c;
    197     uint32_t offset=0;
    198 
    199     for(offset=0; offset<sizeof(input); offset++) {
    200         if (offset < sizeof(input) - 1) {
    201             UTF8_GET_CHAR_UNSAFE(input, offset, c);
    202             if(c != result[i]){
    203                 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
    204 
    205             }
    206 
    207             U8_GET_UNSAFE(input, offset, c);
    208             if(c != result[i]){
    209                 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
    210 
    211             }
    212         }
    213 
    214         U8_GET(input, 0, offset, sizeof(input), c);
    215         if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
    216             log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
    217         }
    218 
    219         UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
    220         if(c != result[i+1]){
    221             log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
    222         }
    223 
    224         UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
    225         if(c != result[i+2]){
    226             log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
    227         }
    228 
    229          i=(uint16_t)(i+3);
    230     }
    231 }
    232 
    233 static void TestNextPrevChar() {
    234     static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
    235     static const UChar32 result[]={
    236     /*  next_unsafe    next_safe_ns        next_safe_s          prev_unsafe   prev_safe_ns        prev_safe_s */
    237         0x0061,        0x0061,             0x0061,              0x0000,       0x0000,             0x0000,
    238         0x10401,       0x10401,            0x10401,             0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    239         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841410,    UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    240         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xa1050,      UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    241         0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841,       UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    242         0x00,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x61,         0x61,               0x61,
    243         0x80,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xc2,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    244         0xfd,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x77e,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
    245         0xbe,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xfd,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    246         0xa1,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x00,         UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
    247         0x61,          0x61,               0x61,                0xc0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    248         0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x10401,      0x10401,            0x10401,
    249         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF_ERROR_VALUE,    UTF_ERROR_VALUE,
    250         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
    251         0x0840,        UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
    252         0x0000,        0x0000,             0x0000,              0x0061,       0x0061,             0x0061
    253     };
    254     static const int32_t movedOffset[]={
    255     /*  next_unsafe   next_safe_ns next_safe_s       prev_unsafe   prev_safe_ns      prev_safe_s */
    256         1,            1,           1,                15,           15,               15,
    257         5,            5,           5,                14,           14 ,              14,
    258         3,            3,           3,                9,            13,               13,
    259         4,            4,           4,                9,            12,               12,
    260         5,            5,           5,                9,            11,               11,
    261         7,            7,           7,                10,           10,               10,
    262         7,            7,           7,                9,            9,                9,
    263         8,            9,           9,                7,            7,                7,
    264         9,            9,           9,                7,            7,                7,
    265         11,           10,          10,               5,            5,                5,
    266         11,           11,          11,               5,            5,                5,
    267         12,           12,          12,               1,            1,                1,
    268         13,           13,          13,               1,            1,                1,
    269         14,           14,          14,               1,            1,                1,
    270         14,           15,          15,               1,            1,                1,
    271         14,           16,          16,               0,            0,                0,
    272     };
    273     /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */
    274 
    275     UChar32 c=0x0000;
    276     uint32_t i=0;
    277     uint32_t offset=0;
    278     int32_t setOffset=0;
    279     for(offset=0; offset<sizeof(input); offset++){
    280          setOffset=offset;
    281          UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
    282          if(setOffset != movedOffset[i+1]){
    283              log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    284                  offset, movedOffset[i+1], setOffset);
    285          }
    286          if(c != result[i+1]){
    287              log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
    288          }
    289 
    290          setOffset=offset;
    291          U8_NEXT(input, setOffset, sizeof(input), c);
    292          if(setOffset != movedOffset[i+1]){
    293              log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    294                  offset, movedOffset[i+1], setOffset);
    295          }
    296          if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
    297              log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
    298          }
    299 
    300          setOffset=offset;
    301          UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
    302          if(setOffset != movedOffset[i+1]){
    303              log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    304                  offset, movedOffset[i+2], setOffset);
    305          }
    306          if(c != result[i+2]){
    307              log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
    308          }
    309 
    310          i=i+6;
    311     }
    312 
    313     i=0;
    314     for(offset=sizeof(input); offset > 0; --offset){
    315          setOffset=offset;
    316          UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
    317          if(setOffset != movedOffset[i+4]){
    318              log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    319                  offset, movedOffset[i+4], setOffset);
    320          }
    321          if(c != result[i+4]){
    322              log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
    323          }
    324 
    325          setOffset=offset;
    326          U8_PREV(input, 0, setOffset, c);
    327          if(setOffset != movedOffset[i+4]){
    328              log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    329                  offset, movedOffset[i+4], setOffset);
    330          }
    331          if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){
    332              log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
    333          }
    334 
    335          setOffset=offset;
    336          UTF8_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
    337          if(setOffset != movedOffset[i+5]){
    338              log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
    339                  offset, movedOffset[i+5], setOffset);
    340          }
    341          if(c != result[i+5]){
    342              log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
    343          }
    344 
    345          i=i+6;
    346     }
    347 }
    348 
    349 static void TestNextPrevNonCharacters() {
    350     /* test non-characters */
    351     static const uint8_t nonChars[]={
    352         0xef, 0xb7, 0x90,       /* U+fdd0 */
    353         0xef, 0xbf, 0xbf,       /* U+feff */
    354         0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
    355         0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
    356         0xf4, 0x8f, 0xbf, 0xbe  /* U+10fffe */
    357     };
    358 
    359     UChar32 ch;
    360     int32_t idx;
    361 
    362     for(idx=0; idx<(int32_t)sizeof(nonChars);) {
    363         U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
    364         if(!U_IS_UNICODE_NONCHAR(ch)) {
    365             log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
    366         }
    367     }
    368     for(idx=(int32_t)sizeof(nonChars); idx>0;) {
    369         U8_PREV(nonChars, 0, idx, ch);
    370         if(!U_IS_UNICODE_NONCHAR(ch)) {
    371             log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
    372         }
    373     }
    374 }
    375 
    376 static void TestNextPrevCharUnsafe() {
    377     /*
    378      * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
    379      * The behavior of _UNSAFE macros for ill-formed strings is undefined.
    380      */
    381     static const uint8_t input[]={
    382         0x61,
    383         0xf0, 0x90, 0x90, 0x81,
    384         0xc0, 0x80,  /* non-shortest form */
    385         0xe2, 0x82, 0xac,
    386         0xc2, 0xa1,
    387         0xf4, 0x8f, 0xbf, 0xbf,
    388         0x00
    389     };
    390     static const UChar32 codePoints[]={
    391         0x61,
    392         0x10401,
    393         0,
    394         0x20ac,
    395         0xa1,
    396         0x10ffff,
    397         0
    398     };
    399 
    400     UChar32 c;
    401     int32_t i;
    402     uint32_t offset;
    403     for(i=0, offset=0; offset<sizeof(input); ++i) {
    404         UTF8_NEXT_CHAR_UNSAFE(input, offset, c);
    405         if(c != codePoints[i]){
    406             log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
    407                     offset, codePoints[i], c);
    408         }
    409     }
    410     for(i=0, offset=0; offset<sizeof(input); ++i) {
    411         U8_NEXT_UNSAFE(input, offset, c);
    412         if(c != codePoints[i]){
    413             log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
    414                     offset, codePoints[i], c);
    415         }
    416     }
    417 
    418     for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
    419          UTF8_PREV_CHAR_UNSAFE(input, offset, c);
    420          if(c != codePoints[i]){
    421              log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
    422                      offset, codePoints[i], c);
    423          }
    424     }
    425     for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
    426          U8_PREV_UNSAFE(input, offset, c);
    427          if(c != codePoints[i]){
    428              log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
    429                      offset, codePoints[i], c);
    430          }
    431     }
    432 }
    433 
    434 static void TestFwdBack() {
    435     static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
    436     static const uint16_t fwd_safe[]   ={1, 5, 6, 7, 9, 10, 11,  12, 13, 14, 15, 16, 17, 18};
    437     static const uint16_t back_safe[]  ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
    438 
    439     static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
    440     static const uint16_t fwd_N_safe[]   ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
    441     static const uint16_t back_N_safe[]  ={18, 17, 15, 12, 11, 9, 7, 0};
    442 
    443     uint32_t offsafe=0;
    444 
    445     uint32_t i=0;
    446     while(offsafe < sizeof(input)){
    447         UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
    448         if(offsafe != fwd_safe[i]){
    449             log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
    450         }
    451         i++;
    452     }
    453 
    454     i=0;
    455     while(offsafe < sizeof(input)){
    456         U8_FWD_1(input, offsafe, sizeof(input));
    457         if(offsafe != fwd_safe[i]){
    458             log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
    459         }
    460         i++;
    461     }
    462 
    463     i=0;
    464     offsafe=sizeof(input);
    465     while(offsafe > 0){
    466         UTF8_BACK_1_SAFE(input, 0,  offsafe);
    467         if(offsafe != back_safe[i]){
    468             log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_safe[i], offsafe);
    469         }
    470         i++;
    471     }
    472 
    473     i=0;
    474     offsafe=sizeof(input);
    475     while(offsafe > 0){
    476         U8_BACK_1(input, 0,  offsafe);
    477         if(offsafe != back_safe[i]){
    478             log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i], offsafe);
    479         }
    480         i++;
    481     }
    482 
    483     offsafe=0;
    484     for(i=0; i<LENGTHOF(Nvalue); i++){
    485         UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
    486         if(offsafe != fwd_N_safe[i]){
    487             log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
    488         }
    489 
    490     }
    491 
    492     offsafe=0;
    493     for(i=0; i<LENGTHOF(Nvalue); i++){
    494         U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
    495         if(offsafe != fwd_N_safe[i]){
    496             log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
    497         }
    498 
    499     }
    500 
    501     offsafe=sizeof(input);
    502     for(i=0; i<LENGTHOF(Nvalue); i++){
    503         UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
    504         if(offsafe != back_N_safe[i]){
    505             log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
    506         }
    507     }
    508 
    509     offsafe=sizeof(input);
    510     for(i=0; i<LENGTHOF(Nvalue); i++){
    511         U8_BACK_N(input, 0, offsafe, Nvalue[i]);
    512         if(offsafe != back_N_safe[i]){
    513             log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
    514         }
    515     }
    516 }
    517 
    518 static void TestFwdBackUnsafe() {
    519     /*
    520      * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
    521      * The behavior of _UNSAFE macros for ill-formed strings is undefined.
    522      */
    523     static const uint8_t input[]={
    524         0x61,
    525         0xf0, 0x90, 0x90, 0x81,
    526         0xc0, 0x80,  /* non-shortest form */
    527         0xe2, 0x82, 0xac,
    528         0xc2, 0xa1,
    529         0xf4, 0x8f, 0xbf, 0xbf,
    530         0x00
    531     };
    532     static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };
    533 
    534     int32_t offset;
    535     int32_t i;
    536     for(i=1, offset=0; offset<LENGTHOF(input); ++i) {
    537         UTF8_FWD_1_UNSAFE(input, offset);
    538         if(offset != boundaries[i]){
    539             log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    540         }
    541     }
    542     for(i=1, offset=0; offset<LENGTHOF(input); ++i) {
    543         U8_FWD_1_UNSAFE(input, offset);
    544         if(offset != boundaries[i]){
    545             log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    546         }
    547     }
    548 
    549     for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {
    550         UTF8_BACK_1_UNSAFE(input, offset);
    551         if(offset != boundaries[i]){
    552             log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    553         }
    554     }
    555     for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {
    556         U8_BACK_1_UNSAFE(input, offset);
    557         if(offset != boundaries[i]){
    558             log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    559         }
    560     }
    561 
    562     for(i=0; i<LENGTHOF(boundaries); ++i) {
    563         offset=0;
    564         UTF8_FWD_N_UNSAFE(input, offset, i);
    565         if(offset != boundaries[i]) {
    566             log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    567         }
    568     }
    569     for(i=0; i<LENGTHOF(boundaries); ++i) {
    570         offset=0;
    571         U8_FWD_N_UNSAFE(input, offset, i);
    572         if(offset != boundaries[i]) {
    573             log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
    574         }
    575     }
    576 
    577     for(i=0; i<LENGTHOF(boundaries); ++i) {
    578         int32_t j=LENGTHOF(boundaries)-1-i;
    579         offset=LENGTHOF(input);
    580         UTF8_BACK_N_UNSAFE(input, offset, i);
    581         if(offset != boundaries[j]) {
    582             log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
    583         }
    584     }
    585     for(i=0; i<LENGTHOF(boundaries); ++i) {
    586         int32_t j=LENGTHOF(boundaries)-1-i;
    587         offset=LENGTHOF(input);
    588         U8_BACK_N_UNSAFE(input, offset, i);
    589         if(offset != boundaries[j]) {
    590             log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
    591         }
    592     }
    593 }
    594 
    595 static void TestSetChar() {
    596     static const uint8_t input[]
    597         = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
    598     static const int16_t start_safe[]
    599         = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
    600     static const int16_t limit_safe[]
    601         = {0,    1,    4,    4,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
    602 
    603     uint32_t i=0;
    604     int32_t offset=0, setOffset=0;
    605     for(offset=0; offset<=LENGTHOF(input); offset++){
    606         if (offset<LENGTHOF(input)){
    607             setOffset=offset;
    608             UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
    609             if(setOffset != start_safe[i]){
    610                 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
    611             }
    612 
    613             setOffset=offset;
    614             U8_SET_CP_START(input, 0, setOffset);
    615             if(setOffset != start_safe[i]){
    616                 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
    617             }
    618         }
    619 
    620         setOffset=offset;
    621         UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
    622         if(setOffset != limit_safe[i]){
    623             log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
    624         }
    625 
    626         setOffset=offset;
    627         U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
    628         if(setOffset != limit_safe[i]){
    629             log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
    630         }
    631 
    632         i++;
    633     }
    634 }
    635 
    636 static void TestSetCharUnsafe() {
    637     static const uint8_t input[]
    638         = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 };
    639     static const int16_t start_unsafe[]
    640         = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    9,    9,    12,   12,   12,   15 };
    641     static const int16_t limit_unsafe[]
    642         = {0,    1,    4,    4,    4,    5,    6,    7,    9,    9,    10,   10,   10,   15,   15,   15,   16 };
    643 
    644     uint32_t i=0;
    645     int32_t offset=0, setOffset=0;
    646     for(offset=0; offset<=LENGTHOF(input); offset++){
    647         if (offset<LENGTHOF(input)){
    648             setOffset=offset;
    649             UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
    650             if(setOffset != start_unsafe[i]){
    651                 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
    652             }
    653 
    654             setOffset=offset;
    655             U8_SET_CP_START_UNSAFE(input, setOffset);
    656             if(setOffset != start_unsafe[i]){
    657                 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
    658             }
    659         }
    660 
    661         if (offset != 0) { /* Can't have it go off the end of the array */
    662             setOffset=offset;
    663             UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
    664             if(setOffset != limit_unsafe[i]){
    665                 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
    666             }
    667 
    668             setOffset=offset;
    669             U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
    670             if(setOffset != limit_unsafe[i]){
    671                 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
    672             }
    673         }
    674 
    675         i++;
    676     }
    677 }
    678 
    679 static void TestAppendChar(){
    680     static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
    681     static const uint32_t test[]={
    682     /*  append-position(unsafe),  CHAR to be appended */
    683         0,                        0x10401,
    684         2,                        0x0028,
    685         2,                        0x007f,
    686         3,                        0xd801,
    687         1,                        0x20402,
    688         8,                        0x10401,
    689         5,                        0xc0,
    690         5,                        0xc1,
    691         5,                        0xfd,
    692         6,                        0x80,
    693         6,                        0x81,
    694         6,                        0xbf,
    695         7,                        0xfe,
    696 
    697     /*  append-position(safe),    CHAR to be appended */
    698         0,                        0x10401,
    699         2,                        0x0028,
    700         3,                        0x7f,
    701         3,                        0xd801,   /* illegal for UTF-8 starting with Unicode 3.2 */
    702         1,                        0x20402,
    703         9,                        0x10401,
    704         5,                        0xc0,
    705         5,                        0xc1,
    706         5,                        0xfd,
    707         6,                        0x80,
    708         6,                        0x81,
    709         6,                        0xbf,
    710         7,                        0xfe,
    711 
    712     };
    713     static const uint16_t movedOffset[]={
    714     /* offset-moved-to(unsafe) */
    715           4,              /*for append-pos: 0 , CHAR 0x10401*/
    716           3,
    717           3,
    718           6,
    719           5,
    720           12,
    721           7,
    722           7,
    723           7,
    724           8,
    725           8,
    726           8,
    727           9,
    728 
    729     /* offset-moved-to(safe) */
    730           4,              /*for append-pos: 0, CHAR  0x10401*/
    731           3,
    732           4,
    733           6,
    734           5,
    735           11,
    736           7,
    737           7,
    738           7,
    739           8,
    740           8,
    741           8,
    742           9,
    743 
    744     };
    745 
    746     static const uint8_t result[][11]={
    747         /*unsafe*/
    748         {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    749         {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    750         {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    751         {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
    752         {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    753         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
    754 
    755         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
    756         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
    757         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
    758 
    759         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
    760         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
    761         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
    762 
    763         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
    764         /*safe*/
    765         {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    766         {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    767         {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    768         {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
    769         {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
    770         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
    771 
    772         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
    773         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
    774         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
    775 
    776         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
    777         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
    778         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
    779 
    780         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
    781 
    782     };
    783     uint16_t i, count=0;
    784     uint8_t str[12];
    785     uint32_t offset;
    786 /*    UChar32 c=0;*/
    787     uint16_t size=LENGTHOF(s);
    788     for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){
    789         uprv_memcpy(str, s, size);
    790         offset=test[i];
    791         if(count<13){
    792             UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
    793             if(offset != movedOffset[count]){
    794                 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
    795                     count, movedOffset[count], offset);
    796 
    797             }
    798             if(uprv_memcmp(str, result[count], size) !=0){
    799                 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
    800                 printUChars(result[count], size);
    801                 log_err("\nGot:      ");
    802                 printUChars(str, size);
    803                 log_err("\n");
    804             }
    805         }else{
    806             UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
    807             if(offset != movedOffset[count]){
    808                 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
    809                     count, movedOffset[count], offset);
    810 
    811             }
    812             if(uprv_memcmp(str, result[count], size) !=0){
    813                 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
    814                 printUChars(result[count], size);
    815                 log_err("\nGot:     ");
    816                 printUChars(str, size);
    817                 log_err("\n");
    818             }
    819             /*call the API instead of MACRO
    820             uprv_memcpy(str, s, size);
    821             offset=test[i];
    822             c=test[i+1];
    823             if((uint32_t)(c)<=0x7f) {
    824                   (str)[(offset)++]=(uint8_t)(c);
    825             } else {
    826                  (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
    827             }
    828             if(offset != movedOffset[count]){
    829                 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
    830                     count, movedOffset[count], offset);
    831 
    832             }
    833             if(uprv_memcmp(str, result[count], size) !=0){
    834                 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
    835                 printUChars(result[count], size);
    836                 printf("\nGot:     ");
    837                 printUChars(str, size);
    838                 printf("\n");
    839             }
    840             */
    841         }
    842         count++;
    843     }
    844 
    845 
    846 }
    847 
    848 static void TestAppend() {
    849     static const UChar32 codePoints[]={
    850         0x61, 0xdf, 0x901, 0x3040,
    851         0xac00, 0xd800, 0xdbff, 0xdcde,
    852         0xdffd, 0xe000, 0xffff, 0x10000,
    853         0x12345, 0xe0021, 0x10ffff, 0x110000,
    854         0x234567, 0x7fffffff, -1, -1000,
    855         0, 0x400
    856     };
    857     static const uint8_t expectUnsafe[]={
    858         0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
    859         0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
    860         0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
    861         0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
    862         /* none from this line */
    863         0,  0xd0, 0x80
    864     }, expectSafe[]={
    865         0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
    866         0xea, 0xb0, 0x80,  /* no surrogates */
    867         /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
    868         0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
    869         /* none from this line */
    870         0,  0xd0, 0x80
    871     };
    872 
    873     uint8_t buffer[100];
    874     UChar32 c;
    875     int32_t i, length;
    876     UBool isError, expectIsError, wrongIsError;
    877 
    878     length=0;
    879     for(i=0; i<LENGTHOF(codePoints); ++i) {
    880         c=codePoints[i];
    881         if(c<0 || 0x10ffff<c) {
    882             continue; /* skip non-code points for U8_APPEND_UNSAFE */
    883         }
    884 
    885         U8_APPEND_UNSAFE(buffer, length, c);
    886     }
    887     if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
    888         log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
    889     }
    890 
    891     length=0;
    892     wrongIsError=FALSE;
    893     for(i=0; i<LENGTHOF(codePoints); ++i) {
    894         c=codePoints[i];
    895         expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
    896         isError=FALSE;
    897 
    898         U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
    899         wrongIsError|= isError!=expectIsError;
    900     }
    901     if(wrongIsError) {
    902         log_err("U8_APPEND did not set isError correctly\n");
    903     }
    904     if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
    905         log_err("U8_APPEND did not generate the expected output\n");
    906     }
    907 }
    908 
    909 static void
    910 TestSurrogates() {
    911     static const uint8_t b[]={
    912         0xc3, 0x9f,             /*  00DF */
    913         0xed, 0x9f, 0xbf,       /*  D7FF */
    914         0xed, 0xa0, 0x81,       /*  D801 */
    915         0xed, 0xbf, 0xbe,       /*  DFFE */
    916         0xee, 0x80, 0x80,       /*  E000 */
    917         0xf0, 0x97, 0xbf, 0xbe  /* 17FFE */
    918     };
    919     static const UChar32 cp[]={
    920         0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
    921     };
    922 
    923     UChar32 cu, cs, cl;
    924     int32_t i, j, k, iu, is, il, length;
    925 
    926     k=0; /* index into cp[] */
    927     length=LENGTHOF(b);
    928     for(i=0; i<length;) {
    929         j=i;
    930         U8_NEXT_UNSAFE(b, j, cu);
    931         iu=j;
    932 
    933         j=i;
    934         U8_NEXT(b, j, length, cs);
    935         is=j;
    936 
    937         j=i;
    938         L8_NEXT(b, j, length, cl);
    939         il=j;
    940 
    941         if(cu!=cp[k]) {
    942             log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
    943         }
    944 
    945         /* U8_NEXT() returns <0 for surrogate code points */
    946         if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
    947             log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
    948         }
    949 
    950         /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
    951         if(cl!=cu) {
    952             log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
    953         }
    954 
    955         if(is!=iu || il!=iu) {
    956             log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
    957         }
    958 
    959         ++k;    /* next code point */
    960         i=iu;   /* advance by one UTF-8 sequence */
    961     }
    962 
    963     while(i>0) {
    964         --k; /* previous code point */
    965 
    966         j=i;
    967         U8_PREV_UNSAFE(b, j, cu);
    968         iu=j;
    969 
    970         j=i;
    971         U8_PREV(b, 0, j, cs);
    972         is=j;
    973 
    974         j=i;
    975         L8_PREV(b, 0, j, cl);
    976         il=j;
    977 
    978         if(cu!=cp[k]) {
    979             log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
    980         }
    981 
    982         /* U8_PREV() returns <0 for surrogate code points */
    983         if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
    984             log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
    985         }
    986 
    987         /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
    988         if(cl!=cu) {
    989             log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
    990         }
    991 
    992         if(is!=iu || il !=iu) {
    993             log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
    994         }
    995 
    996         i=iu;   /* go back by one UTF-8 sequence */
    997     }
    998 }
    999 
   1000 static void printUChars(const uint8_t *uchars, int16_t len){
   1001     int16_t i=0;
   1002     for(i=0; i<len; i++){
   1003         log_err("0x%02x ", *(uchars+i));
   1004     }
   1005 }
   1006