1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1998-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /* 7 * File utf8tst.c 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 07/24/2000 Madhu Creation 13 ******************************************************************************* 14 */ 15 16 #include "unicode/utypes.h" 17 #include "unicode/utf8.h" 18 #include "cmemory.h" 19 #include "cintltst.h" 20 21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 22 23 /* lenient UTF-8 ------------------------------------------------------------ */ 24 25 /* 26 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate 27 * code points with their "natural" encoding. 28 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of 29 * single surrogates. 30 * 31 * This is not conformant with UTF-8. 32 * 33 * Supplementary code points may be encoded as pairs of 3-byte sequences, but 34 * the macros below do not attempt to assemble such pairs. 35 */ 36 37 #define L8_NEXT(s, i, length, c) { \ 38 (c)=(uint8_t)(s)[(i)++]; \ 39 if((c)>=0x80) { \ 40 if(U8_IS_LEAD(c)) { \ 41 (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \ 42 } else { \ 43 (c)=U_SENTINEL; \ 44 } \ 45 } \ 46 } 47 48 #define L8_PREV(s, start, i, c) { \ 49 (c)=(uint8_t)(s)[--(i)]; \ 50 if((c)>=0x80) { \ 51 if((c)<=0xbf) { \ 52 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \ 53 } else { \ 54 (c)=U_SENTINEL; \ 55 } \ 56 } \ 57 } 58 59 /* -------------------------------------------------------------------------- */ 60 61 static void printUChars(const uint8_t *uchars, int16_t len); 62 63 static void TestCodeUnitValues(void); 64 static void TestCharLength(void); 65 static void TestGetChar(void); 66 static void TestNextPrevChar(void); 67 static void TestNextPrevNonCharacters(void); 68 static void TestNextPrevCharUnsafe(void); 69 static void TestFwdBack(void); 70 static void TestFwdBackUnsafe(void); 71 static void TestSetChar(void); 72 static void TestSetCharUnsafe(void); 73 static void TestAppendChar(void); 74 static void TestAppend(void); 75 static void TestSurrogates(void); 76 77 void addUTF8Test(TestNode** root); 78 79 void 80 addUTF8Test(TestNode** root) 81 { 82 addTest(root, &TestCodeUnitValues, "utf8tst/TestCodeUnitValues"); 83 addTest(root, &TestCharLength, "utf8tst/TestCharLength"); 84 addTest(root, &TestGetChar, "utf8tst/TestGetChar"); 85 addTest(root, &TestNextPrevChar, "utf8tst/TestNextPrevChar"); 86 addTest(root, &TestNextPrevNonCharacters, "utf8tst/TestNextPrevNonCharacters"); 87 addTest(root, &TestNextPrevCharUnsafe, "utf8tst/TestNextPrevCharUnsafe"); 88 addTest(root, &TestFwdBack, "utf8tst/TestFwdBack"); 89 addTest(root, &TestFwdBackUnsafe, "utf8tst/TestFwdBackUnsafe"); 90 addTest(root, &TestSetChar, "utf8tst/TestSetChar"); 91 addTest(root, &TestSetCharUnsafe, "utf8tst/TestSetCharUnsafe"); 92 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar"); 93 addTest(root, &TestAppend, "utf8tst/TestAppend"); 94 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates"); 95 } 96 97 static void TestCodeUnitValues() 98 { 99 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,}; 100 101 int16_t i; 102 for(i=0; i<LENGTHOF(codeunit); i++){ 103 uint8_t c=codeunit[i]; 104 log_verbose("Testing code unit value of %x\n", c); 105 if(i<4){ 106 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){ 107 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n", 108 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n'); 109 } 110 } else if(i< 8){ 111 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){ 112 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n", 113 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n'); 114 } 115 } else if(i< 12){ 116 if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){ 117 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n", 118 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n'); 119 } 120 } 121 } 122 } 123 124 static void TestCharLength() 125 { 126 static const uint32_t codepoint[]={ 127 1, 0x0061, 128 1, 0x007f, 129 2, 0x016f, 130 2, 0x07ff, 131 3, 0x0865, 132 3, 0x20ac, 133 4, 0x20402, 134 4, 0x23456, 135 4, 0x24506, 136 4, 0x20402, 137 4, 0x10402, 138 3, 0xd7ff, 139 3, 0xe000, 140 141 }; 142 143 int16_t i; 144 UBool multiple; 145 for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){ 146 UChar32 c=codepoint[i+1]; 147 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){ 148 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c)); 149 }else{ 150 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c)); 151 } 152 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); 153 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ 154 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); 155 } 156 } 157 } 158 159 static void TestGetChar() 160 { 161 static const uint8_t input[]={ 162 /* code unit,*/ 163 0x61, 164 0x7f, 165 0xe4, 166 0xba, 167 0x8c, 168 0xF0, 169 0x90, 170 0x90, 171 0x81, 172 0xc0, 173 0x65, 174 0x31, 175 0x9a, 176 0xc9 177 }; 178 static const UChar32 result[]={ 179 /* codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict) */ 180 0x61, 0x61, 0x61, 181 0x7f, 0x7f, 0x7f, 182 0x4e8c, 0x4e8c, 0x4e8c, 183 0x4e8c, 0x4e8c, 0x4e8c , 184 0x4e8c, 0x4e8c, 0x4e8c, 185 0x10401, 0x10401, 0x10401 , 186 0x10401, 0x10401, 0x10401 , 187 0x10401, 0x10401, 0x10401 , 188 0x10401, 0x10401, 0x10401, 189 0x25, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 190 0x65, 0x65, 0x65, 191 0x31, 0x31, 0x31, 192 0x31, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 193 0x240, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1 194 }; 195 uint16_t i=0; 196 UChar32 c; 197 uint32_t offset=0; 198 199 for(offset=0; offset<sizeof(input); offset++) { 200 if (offset < sizeof(input) - 1) { 201 UTF8_GET_CHAR_UNSAFE(input, offset, c); 202 if(c != result[i]){ 203 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 204 205 } 206 207 U8_GET_UNSAFE(input, offset, c); 208 if(c != result[i]){ 209 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 210 211 } 212 } 213 214 U8_GET(input, 0, offset, sizeof(input), c); 215 if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){ 216 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); 217 } 218 219 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE); 220 if(c != result[i+1]){ 221 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); 222 } 223 224 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE); 225 if(c != result[i+2]){ 226 log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); 227 } 228 229 i=(uint16_t)(i+3); 230 } 231 } 232 233 static void TestNextPrevChar() { 234 static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00}; 235 static const UChar32 result[]={ 236 /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */ 237 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000, 238 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 239 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841410, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 240 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1050, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 241 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 242 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61, 243 0x80, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xc2, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 244 0xfd, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x77e, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 245 0xbe, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 246 0xa1, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 247 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 248 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x10401, 0x10401, 0x10401, 249 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF_ERROR_VALUE, UTF_ERROR_VALUE, 250 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 251 0x0840, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 252 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061 253 }; 254 static const int32_t movedOffset[]={ 255 /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */ 256 1, 1, 1, 15, 15, 15, 257 5, 5, 5, 14, 14 , 14, 258 3, 3, 3, 9, 13, 13, 259 4, 4, 4, 9, 12, 12, 260 5, 5, 5, 9, 11, 11, 261 7, 7, 7, 10, 10, 10, 262 7, 7, 7, 9, 9, 9, 263 8, 9, 9, 7, 7, 7, 264 9, 9, 9, 7, 7, 7, 265 11, 10, 10, 5, 5, 5, 266 11, 11, 11, 5, 5, 5, 267 12, 12, 12, 1, 1, 1, 268 13, 13, 13, 1, 1, 1, 269 14, 14, 14, 1, 1, 1, 270 14, 15, 15, 1, 1, 1, 271 14, 16, 16, 0, 0, 0, 272 }; 273 /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */ 274 275 UChar32 c=0x0000; 276 uint32_t i=0; 277 uint32_t offset=0; 278 int32_t setOffset=0; 279 for(offset=0; offset<sizeof(input); offset++){ 280 setOffset=offset; 281 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE); 282 if(setOffset != movedOffset[i+1]){ 283 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 284 offset, movedOffset[i+1], setOffset); 285 } 286 if(c != result[i+1]){ 287 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); 288 } 289 290 setOffset=offset; 291 U8_NEXT(input, setOffset, sizeof(input), c); 292 if(setOffset != movedOffset[i+1]){ 293 log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 294 offset, movedOffset[i+1], setOffset); 295 } 296 if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){ 297 log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); 298 } 299 300 setOffset=offset; 301 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE); 302 if(setOffset != movedOffset[i+1]){ 303 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 304 offset, movedOffset[i+2], setOffset); 305 } 306 if(c != result[i+2]){ 307 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); 308 } 309 310 i=i+6; 311 } 312 313 i=0; 314 for(offset=sizeof(input); offset > 0; --offset){ 315 setOffset=offset; 316 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE); 317 if(setOffset != movedOffset[i+4]){ 318 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 319 offset, movedOffset[i+4], setOffset); 320 } 321 if(c != result[i+4]){ 322 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c); 323 } 324 325 setOffset=offset; 326 U8_PREV(input, 0, setOffset, c); 327 if(setOffset != movedOffset[i+4]){ 328 log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 329 offset, movedOffset[i+4], setOffset); 330 } 331 if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){ 332 log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c); 333 } 334 335 setOffset=offset; 336 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE); 337 if(setOffset != movedOffset[i+5]){ 338 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 339 offset, movedOffset[i+5], setOffset); 340 } 341 if(c != result[i+5]){ 342 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c); 343 } 344 345 i=i+6; 346 } 347 } 348 349 static void TestNextPrevNonCharacters() { 350 /* test non-characters */ 351 static const uint8_t nonChars[]={ 352 0xef, 0xb7, 0x90, /* U+fdd0 */ 353 0xef, 0xbf, 0xbf, /* U+feff */ 354 0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */ 355 0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */ 356 0xf4, 0x8f, 0xbf, 0xbe /* U+10fffe */ 357 }; 358 359 UChar32 ch; 360 int32_t idx; 361 362 for(idx=0; idx<(int32_t)sizeof(nonChars);) { 363 U8_NEXT(nonChars, idx, sizeof(nonChars), ch); 364 if(!U_IS_UNICODE_NONCHAR(ch)) { 365 log_err("U8_NEXT(before %d) failed to read a non-character\n", idx); 366 } 367 } 368 for(idx=(int32_t)sizeof(nonChars); idx>0;) { 369 U8_PREV(nonChars, 0, idx, ch); 370 if(!U_IS_UNICODE_NONCHAR(ch)) { 371 log_err("U8_PREV(at %d) failed to read a non-character\n", idx); 372 } 373 } 374 } 375 376 static void TestNextPrevCharUnsafe() { 377 /* 378 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries. 379 * The behavior of _UNSAFE macros for ill-formed strings is undefined. 380 */ 381 static const uint8_t input[]={ 382 0x61, 383 0xf0, 0x90, 0x90, 0x81, 384 0xc0, 0x80, /* non-shortest form */ 385 0xe2, 0x82, 0xac, 386 0xc2, 0xa1, 387 0xf4, 0x8f, 0xbf, 0xbf, 388 0x00 389 }; 390 static const UChar32 codePoints[]={ 391 0x61, 392 0x10401, 393 0, 394 0x20ac, 395 0xa1, 396 0x10ffff, 397 0 398 }; 399 400 UChar32 c; 401 int32_t i; 402 uint32_t offset; 403 for(i=0, offset=0; offset<sizeof(input); ++i) { 404 UTF8_NEXT_CHAR_UNSAFE(input, offset, c); 405 if(c != codePoints[i]){ 406 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", 407 offset, codePoints[i], c); 408 } 409 } 410 for(i=0, offset=0; offset<sizeof(input); ++i) { 411 U8_NEXT_UNSAFE(input, offset, c); 412 if(c != codePoints[i]){ 413 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", 414 offset, codePoints[i], c); 415 } 416 } 417 418 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ 419 UTF8_PREV_CHAR_UNSAFE(input, offset, c); 420 if(c != codePoints[i]){ 421 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", 422 offset, codePoints[i], c); 423 } 424 } 425 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ 426 U8_PREV_UNSAFE(input, offset, c); 427 if(c != codePoints[i]){ 428 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", 429 offset, codePoints[i], c); 430 } 431 } 432 } 433 434 static void TestFwdBack() { 435 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00}; 436 static const uint16_t fwd_safe[] ={1, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; 437 static const uint16_t back_safe[] ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0}; 438 439 static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5}; 440 static const uint16_t fwd_N_safe[] ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */ 441 static const uint16_t back_N_safe[] ={18, 17, 15, 12, 11, 9, 7, 0}; 442 443 uint32_t offsafe=0; 444 445 uint32_t i=0; 446 while(offsafe < sizeof(input)){ 447 UTF8_FWD_1_SAFE(input, offsafe, sizeof(input)); 448 if(offsafe != fwd_safe[i]){ 449 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); 450 } 451 i++; 452 } 453 454 i=0; 455 while(offsafe < sizeof(input)){ 456 U8_FWD_1(input, offsafe, sizeof(input)); 457 if(offsafe != fwd_safe[i]){ 458 log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); 459 } 460 i++; 461 } 462 463 i=0; 464 offsafe=sizeof(input); 465 while(offsafe > 0){ 466 UTF8_BACK_1_SAFE(input, 0, offsafe); 467 if(offsafe != back_safe[i]){ 468 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_safe[i], offsafe); 469 } 470 i++; 471 } 472 473 i=0; 474 offsafe=sizeof(input); 475 while(offsafe > 0){ 476 U8_BACK_1(input, 0, offsafe); 477 if(offsafe != back_safe[i]){ 478 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i], offsafe); 479 } 480 i++; 481 } 482 483 offsafe=0; 484 for(i=0; i<LENGTHOF(Nvalue); i++){ 485 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]); 486 if(offsafe != fwd_N_safe[i]){ 487 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe); 488 } 489 490 } 491 492 offsafe=0; 493 for(i=0; i<LENGTHOF(Nvalue); i++){ 494 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]); 495 if(offsafe != fwd_N_safe[i]){ 496 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe); 497 } 498 499 } 500 501 offsafe=sizeof(input); 502 for(i=0; i<LENGTHOF(Nvalue); i++){ 503 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); 504 if(offsafe != back_N_safe[i]){ 505 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe); 506 } 507 } 508 509 offsafe=sizeof(input); 510 for(i=0; i<LENGTHOF(Nvalue); i++){ 511 U8_BACK_N(input, 0, offsafe, Nvalue[i]); 512 if(offsafe != back_N_safe[i]){ 513 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe); 514 } 515 } 516 } 517 518 static void TestFwdBackUnsafe() { 519 /* 520 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries. 521 * The behavior of _UNSAFE macros for ill-formed strings is undefined. 522 */ 523 static const uint8_t input[]={ 524 0x61, 525 0xf0, 0x90, 0x90, 0x81, 526 0xc0, 0x80, /* non-shortest form */ 527 0xe2, 0x82, 0xac, 528 0xc2, 0xa1, 529 0xf4, 0x8f, 0xbf, 0xbf, 530 0x00 531 }; 532 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 }; 533 534 int32_t offset; 535 int32_t i; 536 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { 537 UTF8_FWD_1_UNSAFE(input, offset); 538 if(offset != boundaries[i]){ 539 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 540 } 541 } 542 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { 543 U8_FWD_1_UNSAFE(input, offset); 544 if(offset != boundaries[i]){ 545 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 546 } 547 } 548 549 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { 550 UTF8_BACK_1_UNSAFE(input, offset); 551 if(offset != boundaries[i]){ 552 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 553 } 554 } 555 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { 556 U8_BACK_1_UNSAFE(input, offset); 557 if(offset != boundaries[i]){ 558 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 559 } 560 } 561 562 for(i=0; i<LENGTHOF(boundaries); ++i) { 563 offset=0; 564 UTF8_FWD_N_UNSAFE(input, offset, i); 565 if(offset != boundaries[i]) { 566 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 567 } 568 } 569 for(i=0; i<LENGTHOF(boundaries); ++i) { 570 offset=0; 571 U8_FWD_N_UNSAFE(input, offset, i); 572 if(offset != boundaries[i]) { 573 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset); 574 } 575 } 576 577 for(i=0; i<LENGTHOF(boundaries); ++i) { 578 int32_t j=LENGTHOF(boundaries)-1-i; 579 offset=LENGTHOF(input); 580 UTF8_BACK_N_UNSAFE(input, offset, i); 581 if(offset != boundaries[j]) { 582 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset); 583 } 584 } 585 for(i=0; i<LENGTHOF(boundaries); ++i) { 586 int32_t j=LENGTHOF(boundaries)-1-i; 587 offset=LENGTHOF(input); 588 U8_BACK_N_UNSAFE(input, offset, i); 589 if(offset != boundaries[j]) { 590 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset); 591 } 592 } 593 } 594 595 static void TestSetChar() { 596 static const uint8_t input[] 597 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 }; 598 static const int16_t start_safe[] 599 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; 600 static const int16_t limit_safe[] 601 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; 602 603 uint32_t i=0; 604 int32_t offset=0, setOffset=0; 605 for(offset=0; offset<=LENGTHOF(input); offset++){ 606 if (offset<LENGTHOF(input)){ 607 setOffset=offset; 608 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset); 609 if(setOffset != start_safe[i]){ 610 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); 611 } 612 613 setOffset=offset; 614 U8_SET_CP_START(input, 0, setOffset); 615 if(setOffset != start_safe[i]){ 616 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); 617 } 618 } 619 620 setOffset=offset; 621 UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input)); 622 if(setOffset != limit_safe[i]){ 623 log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset); 624 } 625 626 setOffset=offset; 627 U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input)); 628 if(setOffset != limit_safe[i]){ 629 log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset); 630 } 631 632 i++; 633 } 634 } 635 636 static void TestSetCharUnsafe() { 637 static const uint8_t input[] 638 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 }; 639 static const int16_t start_unsafe[] 640 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 12, 12, 15 }; 641 static const int16_t limit_unsafe[] 642 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15, 15, 15, 16 }; 643 644 uint32_t i=0; 645 int32_t offset=0, setOffset=0; 646 for(offset=0; offset<=LENGTHOF(input); offset++){ 647 if (offset<LENGTHOF(input)){ 648 setOffset=offset; 649 UTF8_SET_CHAR_START_UNSAFE(input, setOffset); 650 if(setOffset != start_unsafe[i]){ 651 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); 652 } 653 654 setOffset=offset; 655 U8_SET_CP_START_UNSAFE(input, setOffset); 656 if(setOffset != start_unsafe[i]){ 657 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); 658 } 659 } 660 661 if (offset != 0) { /* Can't have it go off the end of the array */ 662 setOffset=offset; 663 UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset); 664 if(setOffset != limit_unsafe[i]){ 665 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset); 666 } 667 668 setOffset=offset; 669 U8_SET_CP_LIMIT_UNSAFE(input, setOffset); 670 if(setOffset != limit_unsafe[i]){ 671 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset); 672 } 673 } 674 675 i++; 676 } 677 } 678 679 static void TestAppendChar(){ 680 static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}; 681 static const uint32_t test[]={ 682 /* append-position(unsafe), CHAR to be appended */ 683 0, 0x10401, 684 2, 0x0028, 685 2, 0x007f, 686 3, 0xd801, 687 1, 0x20402, 688 8, 0x10401, 689 5, 0xc0, 690 5, 0xc1, 691 5, 0xfd, 692 6, 0x80, 693 6, 0x81, 694 6, 0xbf, 695 7, 0xfe, 696 697 /* append-position(safe), CHAR to be appended */ 698 0, 0x10401, 699 2, 0x0028, 700 3, 0x7f, 701 3, 0xd801, /* illegal for UTF-8 starting with Unicode 3.2 */ 702 1, 0x20402, 703 9, 0x10401, 704 5, 0xc0, 705 5, 0xc1, 706 5, 0xfd, 707 6, 0x80, 708 6, 0x81, 709 6, 0xbf, 710 7, 0xfe, 711 712 }; 713 static const uint16_t movedOffset[]={ 714 /* offset-moved-to(unsafe) */ 715 4, /*for append-pos: 0 , CHAR 0x10401*/ 716 3, 717 3, 718 6, 719 5, 720 12, 721 7, 722 7, 723 7, 724 8, 725 8, 726 8, 727 9, 728 729 /* offset-moved-to(safe) */ 730 4, /*for append-pos: 0, CHAR 0x10401*/ 731 3, 732 4, 733 6, 734 5, 735 11, 736 7, 737 7, 738 7, 739 8, 740 8, 741 8, 742 9, 743 744 }; 745 746 static const uint8_t result[][11]={ 747 /*unsafe*/ 748 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 749 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 750 {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 751 {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00}, 752 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 753 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90}, 754 755 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00}, 756 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00}, 757 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00}, 758 759 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00}, 760 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, 761 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, 762 763 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, 764 /*safe*/ 765 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 766 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 767 {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 768 {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00}, 769 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, 770 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/ 771 772 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00}, 773 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00}, 774 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00}, 775 776 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00}, 777 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, 778 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, 779 780 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, 781 782 }; 783 uint16_t i, count=0; 784 uint8_t str[12]; 785 uint32_t offset; 786 /* UChar32 c=0;*/ 787 uint16_t size=LENGTHOF(s); 788 for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){ 789 uprv_memcpy(str, s, size); 790 offset=test[i]; 791 if(count<13){ 792 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); 793 if(offset != movedOffset[count]){ 794 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", 795 count, movedOffset[count], offset); 796 797 } 798 if(uprv_memcmp(str, result[count], size) !=0){ 799 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count); 800 printUChars(result[count], size); 801 log_err("\nGot: "); 802 printUChars(str, size); 803 log_err("\n"); 804 } 805 }else{ 806 UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]); 807 if(offset != movedOffset[count]){ 808 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", 809 count, movedOffset[count], offset); 810 811 } 812 if(uprv_memcmp(str, result[count], size) !=0){ 813 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count); 814 printUChars(result[count], size); 815 log_err("\nGot: "); 816 printUChars(str, size); 817 log_err("\n"); 818 } 819 /*call the API instead of MACRO 820 uprv_memcpy(str, s, size); 821 offset=test[i]; 822 c=test[i+1]; 823 if((uint32_t)(c)<=0x7f) { 824 (str)[(offset)++]=(uint8_t)(c); 825 } else { 826 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c); 827 } 828 if(offset != movedOffset[count]){ 829 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", 830 count, movedOffset[count], offset); 831 832 } 833 if(uprv_memcmp(str, result[count], size) !=0){ 834 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count); 835 printUChars(result[count], size); 836 printf("\nGot: "); 837 printUChars(str, size); 838 printf("\n"); 839 } 840 */ 841 } 842 count++; 843 } 844 845 846 } 847 848 static void TestAppend() { 849 static const UChar32 codePoints[]={ 850 0x61, 0xdf, 0x901, 0x3040, 851 0xac00, 0xd800, 0xdbff, 0xdcde, 852 0xdffd, 0xe000, 0xffff, 0x10000, 853 0x12345, 0xe0021, 0x10ffff, 0x110000, 854 0x234567, 0x7fffffff, -1, -1000, 855 0, 0x400 856 }; 857 static const uint8_t expectUnsafe[]={ 858 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80, 859 0xea, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xaf, 0xbf, 0xed, 0xb3, 0x9e, 860 0xed, 0xbf, 0xbd, 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80, 861 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */ 862 /* none from this line */ 863 0, 0xd0, 0x80 864 }, expectSafe[]={ 865 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80, 866 0xea, 0xb0, 0x80, /* no surrogates */ 867 /* no surrogates */ 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80, 868 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */ 869 /* none from this line */ 870 0, 0xd0, 0x80 871 }; 872 873 uint8_t buffer[100]; 874 UChar32 c; 875 int32_t i, length; 876 UBool isError, expectIsError, wrongIsError; 877 878 length=0; 879 for(i=0; i<LENGTHOF(codePoints); ++i) { 880 c=codePoints[i]; 881 if(c<0 || 0x10ffff<c) { 882 continue; /* skip non-code points for U8_APPEND_UNSAFE */ 883 } 884 885 U8_APPEND_UNSAFE(buffer, length, c); 886 } 887 if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) { 888 log_err("U8_APPEND_UNSAFE did not generate the expected output\n"); 889 } 890 891 length=0; 892 wrongIsError=FALSE; 893 for(i=0; i<LENGTHOF(codePoints); ++i) { 894 c=codePoints[i]; 895 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); 896 isError=FALSE; 897 898 U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError); 899 wrongIsError|= isError!=expectIsError; 900 } 901 if(wrongIsError) { 902 log_err("U8_APPEND did not set isError correctly\n"); 903 } 904 if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) { 905 log_err("U8_APPEND did not generate the expected output\n"); 906 } 907 } 908 909 static void 910 TestSurrogates() { 911 static const uint8_t b[]={ 912 0xc3, 0x9f, /* 00DF */ 913 0xed, 0x9f, 0xbf, /* D7FF */ 914 0xed, 0xa0, 0x81, /* D801 */ 915 0xed, 0xbf, 0xbe, /* DFFE */ 916 0xee, 0x80, 0x80, /* E000 */ 917 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ 918 }; 919 static const UChar32 cp[]={ 920 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe 921 }; 922 923 UChar32 cu, cs, cl; 924 int32_t i, j, k, iu, is, il, length; 925 926 k=0; /* index into cp[] */ 927 length=LENGTHOF(b); 928 for(i=0; i<length;) { 929 j=i; 930 U8_NEXT_UNSAFE(b, j, cu); 931 iu=j; 932 933 j=i; 934 U8_NEXT(b, j, length, cs); 935 is=j; 936 937 j=i; 938 L8_NEXT(b, j, length, cl); 939 il=j; 940 941 if(cu!=cp[k]) { 942 log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]); 943 } 944 945 /* U8_NEXT() returns <0 for surrogate code points */ 946 if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) { 947 log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu); 948 } 949 950 /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */ 951 if(cl!=cu) { 952 log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu); 953 } 954 955 if(is!=iu || il!=iu) { 956 log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i); 957 } 958 959 ++k; /* next code point */ 960 i=iu; /* advance by one UTF-8 sequence */ 961 } 962 963 while(i>0) { 964 --k; /* previous code point */ 965 966 j=i; 967 U8_PREV_UNSAFE(b, j, cu); 968 iu=j; 969 970 j=i; 971 U8_PREV(b, 0, j, cs); 972 is=j; 973 974 j=i; 975 L8_PREV(b, 0, j, cl); 976 il=j; 977 978 if(cu!=cp[k]) { 979 log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]); 980 } 981 982 /* U8_PREV() returns <0 for surrogate code points */ 983 if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) { 984 log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu); 985 } 986 987 /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */ 988 if(cl!=cu) { 989 log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu); 990 } 991 992 if(is!=iu || il !=iu) { 993 log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i); 994 } 995 996 i=iu; /* go back by one UTF-8 sequence */ 997 } 998 } 999 1000 static void printUChars(const uint8_t *uchars, int16_t len){ 1001 int16_t i=0; 1002 for(i=0; i<len; i++){ 1003 log_err("0x%02x ", *(uchars+i)); 1004 } 1005 } 1006