1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1998-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /* 9 * File test.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/01/2000 Madhu Creation 15 ******************************************************************************* 16 */ 17 18 #include "unicode/utypes.h" 19 #include "unicode/ustring.h" 20 #include "unicode/utf16.h" 21 #include "unicode/utf_old.h" 22 #include "cmemory.h" 23 #include "cstring.h" 24 #include "cintltst.h" 25 #include <stdio.h> 26 27 // Obsolete macro from obsolete unicode/utf_old.h, for some old test data. 28 #ifndef UTF_ERROR_VALUE 29 # define UTF_ERROR_VALUE 0xffff 30 #endif 31 32 #if !U_HIDE_OBSOLETE_UTF_OLD_H 33 static void printUChars(const UChar *uchars) { 34 int16_t i=0; 35 for(i=0; i<u_strlen(uchars); i++) { 36 printf("%x ", *(uchars+i)); 37 } 38 } 39 #endif 40 41 static void TestCodeUnitValues(void); 42 static void TestCharLength(void); 43 static void TestGetChar(void); 44 static void TestNextPrevChar(void); 45 static void TestNulTerminated(void); 46 static void TestFwdBack(void); 47 static void TestSetChar(void); 48 static void TestAppendChar(void); 49 static void TestAppend(void); 50 static void TestSurrogate(void); 51 52 void addUTF16Test(TestNode** root); 53 54 void 55 addUTF16Test(TestNode** root) 56 { 57 addTest(root, &TestCodeUnitValues, "utf16tst/TestCodeUnitValues"); 58 addTest(root, &TestCharLength, "utf16tst/TestCharLength"); 59 addTest(root, &TestGetChar, "utf16tst/TestGetChar"); 60 addTest(root, &TestNextPrevChar, "utf16tst/TestNextPrevChar"); 61 addTest(root, &TestNulTerminated, "utf16tst/TestNulTerminated"); 62 addTest(root, &TestFwdBack, "utf16tst/TestFwdBack"); 63 addTest(root, &TestSetChar, "utf16tst/TestSetChar"); 64 addTest(root, &TestAppendChar, "utf16tst/TestAppendChar"); 65 addTest(root, &TestAppend, "utf16tst/TestAppend"); 66 addTest(root, &TestSurrogate, "utf16tst/TestSurrogate"); 67 } 68 69 static void TestCodeUnitValues() 70 { 71 static uint16_t codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0}; 72 73 int16_t i; 74 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){ 75 UChar c=codeunit[i]; 76 log_verbose("Testing code unit value of %x\n", c); 77 if(i<4){ 78 if( 79 #if !U_HIDE_OBSOLETE_UTF_OLD_H 80 !UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) || 81 #endif 82 !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)) { 83 log_err("ERROR: %x is a single character\n", c); 84 } 85 } 86 if(i >= 4 && i< 8){ 87 if( 88 #if !U_HIDE_OBSOLETE_UTF_OLD_H 89 !UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) || 90 #endif 91 !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){ 92 log_err("ERROR: %x is a first surrogate\n", c); 93 } 94 } 95 if(i >= 8 && i< 12){ 96 if( 97 #if !U_HIDE_OBSOLETE_UTF_OLD_H 98 !UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || 99 #endif 100 !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)) { 101 log_err("ERROR: %x is a second surrogate\n", c); 102 } 103 } 104 } 105 } 106 107 static void TestCharLength() 108 { 109 static uint32_t codepoint[]={ 110 1, 0x0061, 111 1, 0xe065, 112 1, 0x20ac, 113 2, 0x20402, 114 2, 0x23456, 115 2, 0x24506, 116 2, 0x20402, 117 2, 0x10402, 118 1, 0xd7ff, 119 1, 0xe000 120 }; 121 122 int16_t i; 123 #if !U_HIDE_OBSOLETE_UTF_OLD_H 124 UBool multiple; 125 #endif 126 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){ 127 UChar32 c=codepoint[i+1]; 128 if( 129 #if !U_HIDE_OBSOLETE_UTF_OLD_H 130 UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || 131 #endif 132 U16_LENGTH(c) != (uint16_t)codepoint[i]) { 133 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c)); 134 }else{ 135 log_verbose("The no: of code units for %lx is %d\n",c, U16_LENGTH(c)); 136 } 137 #if !U_HIDE_OBSOLETE_UTF_OLD_H 138 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); 139 if(UTF16_NEED_MULTIPLE_UCHAR(c) != multiple){ 140 log_err("ERROR: UTF16_NEED_MULTIPLE_UCHAR failed for %lx\n", c); 141 } 142 #endif 143 } 144 } 145 146 static void TestGetChar() 147 { 148 static UChar input[]={ 149 /* code unit,*/ 150 0xdc00, 151 0x20ac, 152 0xd841, 153 0x61, 154 0xd841, 155 0xdc02, 156 0xd842, 157 0xdc06, 158 0, 159 0xd842, 160 0xd7ff, 161 0xdc41, 162 0xe000, 163 0xd800 164 }; 165 static UChar32 result[]={ 166 /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/ 167 (UChar32)0xfca10000, 0xdc00, UTF_ERROR_VALUE, 168 0x20ac, 0x20ac, 0x20ac, 169 0x12861, 0xd841, UTF_ERROR_VALUE, 170 0x61, 0x61, 0x61, 171 0x20402, 0x20402, 0x20402, 172 0x20402, 0x20402, 0x20402, 173 0x20806, 0x20806, 0x20806, 174 0x20806, 0x20806, 0x20806, 175 0x00, 0x00, 0x00, 176 0x203ff, 0xd842, UTF_ERROR_VALUE, 177 0xd7ff, 0xd7ff, 0xd7ff, 178 0xfc41, 0xdc41, UTF_ERROR_VALUE, 179 0xe000, 0xe000, 0xe000, 180 0x11734, 0xd800, UTF_ERROR_VALUE 181 }; 182 uint16_t i=0; 183 UChar32 c, expected; 184 uint16_t offset=0; 185 for(offset=0; offset<UPRV_LENGTHOF(input); offset++) { 186 if(0<offset && offset<UPRV_LENGTHOF(input)-1){ 187 #if !U_HIDE_OBSOLETE_UTF_OLD_H 188 UTF16_GET_CHAR_UNSAFE(input, offset, c); 189 if(c != result[i]){ 190 log_err("ERROR: UTF16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 191 } 192 #endif 193 U16_GET_UNSAFE(input, offset, c); 194 if(c != result[i]){ 195 log_err("ERROR: U16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 196 } 197 } 198 expected=result[i+1]; 199 #if !U_HIDE_OBSOLETE_UTF_OLD_H 200 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, FALSE); 201 if(c != expected) { 202 log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 203 } 204 #endif 205 U16_GET(input, 0, offset, UPRV_LENGTHOF(input), c); 206 if(c != expected) { 207 log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 208 } 209 210 U16_GET_OR_FFFD(input, 0, offset, UPRV_LENGTHOF(input), c); 211 if(U_IS_SURROGATE(expected)) { expected=0xfffd; } 212 if(c != expected) { 213 log_err("ERROR: U16_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 214 } 215 #if !U_HIDE_OBSOLETE_UTF_OLD_H 216 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, TRUE); 217 if(c != result[i+2]){ 218 log_err("ERROR: UTF16_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); 219 } 220 #endif 221 i=(uint16_t)(i+3); 222 } 223 } 224 225 static void TestNextPrevChar(){ 226 227 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000}; 228 static UChar32 result[]={ 229 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/ 230 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000, 231 0x10000, 0x10000, 0x10000, 0x120400, 0xdc00, UTF_ERROR_VALUE, 232 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0x20441, 0x20441, 0x20441, 233 0x10ffff, 0x10ffff, 0x10ffff, 0xd841, 0xd841, UTF_ERROR_VALUE, 234 0xdfff, 0xdfff, UTF_ERROR_VALUE, 0xd7ff, 0xd7ff, 0xd7ff, 235 0x0062, 0x0062, 0x0062, 0xd841, 0xd841, UTF_ERROR_VALUE, 236 0x1ffff, 0xd841, UTF_ERROR_VALUE, 0x0062, 0x0062, 0x0062, 237 0xd7ff, 0xd7ff, 0xd7ff, 0x10ffff, 0x10ffff, 0x10ffff, 238 0x20441, 0x20441, 0x20441, 0xdbff, 0xdbff, UTF_ERROR_VALUE, 239 0xdc41, 0xdc41, UTF_ERROR_VALUE, 0x10000, 0x10000, 0x10000, 240 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0xd800, 0xd800, UTF_ERROR_VALUE, 241 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061 242 }; 243 static uint16_t movedOffset[]={ 244 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/ 245 1, 1, 1, 11, 11, 11, 246 3, 3, 3, 9, 10 , 10, 247 3, 3, 3, 8, 8, 8, 248 5, 5, 4, 8, 8, 8, 249 5, 5, 5, 7, 7, 7, 250 6, 6, 6, 6, 6, 6, 251 8, 7, 7, 5, 5, 5, 252 8, 8, 8, 3, 3, 3, 253 10, 10, 10, 3, 3, 3, 254 10, 10, 10, 1, 1, 1, 255 11, 11, 11, 1, 1, 1, 256 12, 12, 12, 0, 0, 0, 257 }; 258 259 260 UChar32 c=0x0000, expected; 261 uint16_t i=0; 262 uint16_t offset=0, setOffset=0; 263 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){ 264 setOffset=offset; 265 #if !U_HIDE_OBSOLETE_UTF_OLD_H 266 UTF16_NEXT_CHAR_UNSAFE(input, setOffset, c); 267 if(setOffset != movedOffset[i]){ 268 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 269 offset, movedOffset[i], setOffset); 270 } 271 if(c != result[i]){ 272 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 273 } 274 #endif 275 setOffset=offset; 276 U16_NEXT_UNSAFE(input, setOffset, c); 277 if(setOffset != movedOffset[i]){ 278 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 279 offset, movedOffset[i], setOffset); 280 } 281 if(c != result[i]){ 282 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); 283 } 284 expected=result[i+1]; 285 #if !U_HIDE_OBSOLETE_UTF_OLD_H 286 setOffset=offset; 287 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, FALSE); 288 if(setOffset != movedOffset[i+1]){ 289 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 290 offset, movedOffset[i+1], setOffset); 291 } 292 if(c != expected) { 293 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 294 } 295 #endif 296 setOffset=offset; 297 U16_NEXT(input, setOffset, UPRV_LENGTHOF(input), c); 298 if(setOffset != movedOffset[i+1]){ 299 log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 300 offset, movedOffset[i+1], setOffset); 301 } 302 if(c != expected){ 303 log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 304 } 305 306 setOffset=offset; 307 U16_NEXT_OR_FFFD(input, setOffset, UPRV_LENGTHOF(input), c); 308 if(setOffset != movedOffset[i+1]){ 309 log_err("ERROR: U16_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 310 offset, movedOffset[i+1], setOffset); 311 } 312 if(U_IS_SURROGATE(expected)) { expected=0xfffd; } 313 if(c != expected){ 314 log_err("ERROR: U16_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 315 } 316 #if !U_HIDE_OBSOLETE_UTF_OLD_H 317 setOffset=offset; 318 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, TRUE); 319 if(setOffset != movedOffset[i+1]){ 320 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 321 offset, movedOffset[i+2], setOffset); 322 } 323 if(c != result[i+2]){ 324 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); 325 } 326 #endif 327 i=(uint16_t)(i+6); 328 } 329 i=0; 330 for(offset=(uint16_t)UPRV_LENGTHOF(input); offset > 0; --offset){ 331 setOffset=offset; 332 #if !U_HIDE_OBSOLETE_UTF_OLD_H 333 UTF16_PREV_CHAR_UNSAFE(input, setOffset, c); 334 if(setOffset != movedOffset[i+3]){ 335 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 336 offset, movedOffset[i+3], setOffset); 337 } 338 if(c != result[i+3]){ 339 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c); 340 } 341 #endif 342 setOffset=offset; 343 U16_PREV_UNSAFE(input, setOffset, c); 344 if(setOffset != movedOffset[i+3]){ 345 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 346 offset, movedOffset[i+3], setOffset); 347 } 348 if(c != result[i+3]){ 349 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c); 350 } 351 #if !U_HIDE_OBSOLETE_UTF_OLD_H 352 setOffset=offset; 353 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE); 354 if(setOffset != movedOffset[i+4]){ 355 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 356 offset, movedOffset[i+4], setOffset); 357 } 358 if(c != result[i+4]){ 359 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c); 360 } 361 #endif 362 setOffset=offset; 363 U16_PREV(input, 0, setOffset, c); 364 if(setOffset != movedOffset[i+4]){ 365 log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 366 offset, movedOffset[i+4], setOffset); 367 } 368 expected = result[i+4]; 369 if(c != expected) { 370 log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 371 } 372 373 setOffset=offset; 374 U16_PREV_OR_FFFD(input, 0, setOffset, c); 375 if(setOffset != movedOffset[i+4]){ 376 log_err("ERROR: U16_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 377 offset, movedOffset[i+4], setOffset); 378 } 379 if(U_IS_SURROGATE(expected)) { expected=0xfffd; } 380 if(c != expected) { 381 log_err("ERROR: U16_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); 382 } 383 #if !U_HIDE_OBSOLETE_UTF_OLD_H 384 setOffset=offset; 385 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE); 386 if(setOffset != movedOffset[i+5]){ 387 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", 388 offset, movedOffset[i+5], setOffset); 389 } 390 if(c != result[i+5]){ 391 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c); 392 } 393 #endif 394 i=(uint16_t)(i+6); 395 } 396 397 } 398 399 /* keep this in sync with utf8tst.c's TestNulTerminated() */ 400 static void TestNulTerminated() { 401 static const UChar input[]={ 402 /* 0 */ 0x61, 403 /* 1 */ 0xd801, 0xdc01, 404 /* 3 */ 0xdc01, 405 /* 4 */ 0x62, 406 /* 5 */ 0xd801, 407 /* 6 */ 0x00 408 /* 7 */ 409 }; 410 static const UChar32 result[]={ 411 0x61, 412 0x10401, 413 0xdc01, 414 0x62, 415 0xd801, 416 0 417 }; 418 419 UChar32 c, c2, expected; 420 int32_t i0, i=0, j, k, expectedIndex; 421 int32_t cpIndex=0; 422 do { 423 i0=i; 424 U16_NEXT(input, i, -1, c); 425 expected=result[cpIndex]; 426 if(c!=expected) { 427 log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected); 428 } 429 j=i0; 430 U16_NEXT_OR_FFFD(input, j, -1, c); 431 if(U_IS_SURROGATE(expected)) { expected=0xfffd; } 432 if(c!=expected) { 433 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected); 434 } 435 if(j!=i) { 436 log_err("U16_NEXT_OR_FFFD() moved to index %d but U16_NEXT() moved to %d\n", j, i); 437 } 438 j=i0; 439 U16_FWD_1(input, j, -1); 440 if(j!=i) { 441 log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i); 442 } 443 ++cpIndex; 444 /* 445 * Move by this many code points from the start. 446 * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary. 447 */ 448 expectedIndex= (c==0) ? i-1 : i; 449 k=0; 450 U16_FWD_N(input, k, -1, cpIndex); 451 if(k!=expectedIndex) { 452 log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex); 453 } 454 } while(c!=0); 455 456 i=0; 457 do { 458 j=i0=i; 459 U16_NEXT(input, i, -1, c); 460 do { 461 U16_GET(input, 0, j, -1, c2); 462 if(c2!=c) { 463 log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j); 464 } 465 U16_GET_OR_FFFD(input, 0, j, -1, c2); 466 expected= U_IS_SURROGATE(c) ? 0xfffd : c; 467 if(c2!=expected) { 468 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U16_GET_OR_FFFD(at %d)\n", i0, expected, c2, j); 469 } 470 /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */ 471 k=j+1; 472 U16_SET_CP_LIMIT(input, 0, k, -1); 473 if(k!=i) { 474 log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k); 475 } 476 } while(++j<i); 477 } while(c!=0); 478 } 479 480 static void TestFwdBack(){ 481 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000}; 482 static uint16_t fwd_unsafe[] ={1, 3, 5, 6, 8, 10, 11, 12}; 483 static uint16_t fwd_safe[] ={1, 3, 5, 6, 7, 8, 10, 11, 12}; 484 static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0}; 485 static uint16_t back_safe[] ={11, 10, 8, 7, 6, 5, 3, 1, 0}; 486 487 static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1}; 488 static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11}; 489 static uint16_t fwd_N_safe[] ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */ 490 static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3}; 491 static uint16_t back_N_safe[] ={12, 11, 8, 5, 3, 0, 0}; 492 493 uint16_t offunsafe=0, offsafe=0; 494 uint16_t i=0; 495 #if !U_HIDE_OBSOLETE_UTF_OLD_H 496 while(offunsafe < UPRV_LENGTHOF(input)){ 497 UTF16_FWD_1_UNSAFE(input, offunsafe); 498 if(offunsafe != fwd_unsafe[i]){ 499 log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe); 500 } 501 i++; 502 } 503 #endif 504 offunsafe=0, offsafe=0; 505 i=0; 506 while(offunsafe < UPRV_LENGTHOF(input)){ 507 U16_FWD_1_UNSAFE(input, offunsafe); 508 if(offunsafe != fwd_unsafe[i]){ 509 log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe); 510 } 511 i++; 512 } 513 #if !U_HIDE_OBSOLETE_UTF_OLD_H 514 offunsafe=0, offsafe=0; 515 i=0; 516 while(offsafe < UPRV_LENGTHOF(input)){ 517 UTF16_FWD_1_SAFE(input, offsafe, UPRV_LENGTHOF(input)); 518 if(offsafe != fwd_safe[i]){ 519 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); 520 } 521 i++; 522 } 523 #endif 524 offunsafe=0, offsafe=0; 525 i=0; 526 while(offsafe < UPRV_LENGTHOF(input)){ 527 U16_FWD_1(input, offsafe, UPRV_LENGTHOF(input)); 528 if(offsafe != fwd_safe[i]){ 529 log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); 530 } 531 i++; 532 } 533 #if !U_HIDE_OBSOLETE_UTF_OLD_H 534 offunsafe=UPRV_LENGTHOF(input); 535 offsafe=UPRV_LENGTHOF(input); 536 i=0; 537 while(offunsafe > 0){ 538 UTF16_BACK_1_UNSAFE(input, offunsafe); 539 if(offunsafe != back_unsafe[i]){ 540 log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe); 541 } 542 i++; 543 } 544 #endif 545 offunsafe=UPRV_LENGTHOF(input); 546 offsafe=UPRV_LENGTHOF(input); 547 i=0; 548 while(offunsafe > 0){ 549 U16_BACK_1_UNSAFE(input, offunsafe); 550 if(offunsafe != back_unsafe[i]){ 551 log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe); 552 } 553 i++; 554 } 555 #if !U_HIDE_OBSOLETE_UTF_OLD_H 556 offunsafe=UPRV_LENGTHOF(input); 557 offsafe=UPRV_LENGTHOF(input); 558 i=0; 559 while(offsafe > 0){ 560 UTF16_BACK_1_SAFE(input,0, offsafe); 561 if(offsafe != back_safe[i]){ 562 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe); 563 } 564 i++; 565 } 566 #endif 567 offunsafe=UPRV_LENGTHOF(input); 568 offsafe=UPRV_LENGTHOF(input); 569 i=0; 570 while(offsafe > 0){ 571 U16_BACK_1(input,0, offsafe); 572 if(offsafe != back_safe[i]){ 573 log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe); 574 } 575 i++; 576 } 577 578 offunsafe=0; 579 offsafe=0; 580 #if !U_HIDE_OBSOLETE_UTF_OLD_H 581 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/ 582 UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]); 583 if(offunsafe != fwd_N_unsafe[i]){ 584 log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe); 585 } 586 } 587 #endif 588 offunsafe=0; 589 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/ 590 U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]); 591 if(offunsafe != fwd_N_unsafe[i]){ 592 log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe); 593 } 594 } 595 #if !U_HIDE_OBSOLETE_UTF_OLD_H 596 offsafe=0; 597 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ 598 UTF16_FWD_N_SAFE(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]); 599 if(offsafe != fwd_N_safe[i]){ 600 log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe); 601 } 602 } 603 #endif 604 offsafe=0; 605 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ 606 U16_FWD_N(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]); 607 if(offsafe != fwd_N_safe[i]){ 608 log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe); 609 } 610 } 611 #if !U_HIDE_OBSOLETE_UTF_OLD_H 612 offunsafe=UPRV_LENGTHOF(input); 613 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ 614 UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]); 615 if(offunsafe != back_N_unsafe[i]){ 616 log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe); 617 } 618 } 619 #endif 620 offunsafe=UPRV_LENGTHOF(input); 621 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ 622 U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]); 623 if(offunsafe != back_N_unsafe[i]){ 624 log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe); 625 } 626 } 627 #if !U_HIDE_OBSOLETE_UTF_OLD_H 628 offsafe=UPRV_LENGTHOF(input); 629 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ 630 UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); 631 if(offsafe != back_N_safe[i]){ 632 log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe); 633 } 634 } 635 #endif 636 offsafe=UPRV_LENGTHOF(input); 637 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ 638 U16_BACK_N(input, 0, offsafe, Nvalue[i]); 639 if(offsafe != back_N_safe[i]){ 640 log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe); 641 } 642 } 643 } 644 645 static void TestSetChar(){ 646 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000}; 647 static uint16_t start_unsafe[]={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 9, 11}; 648 static uint16_t start_safe[] ={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 10, 11}; 649 static uint16_t limit_unsafe[]={0, 1, 3, 3, 5, 5, 6, 8, 8, 10, 10, 11}; 650 static uint16_t limit_safe[] ={0, 1, 3, 3, 5, 5, 6, 7, 8, 10, 10, 11}; 651 652 uint16_t i=0; 653 uint16_t offset=0, setOffset=0; 654 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){ 655 #if !U_HIDE_OBSOLETE_UTF_OLD_H 656 setOffset=offset; 657 UTF16_SET_CHAR_START_UNSAFE(input, setOffset); 658 if(setOffset != start_unsafe[i]){ 659 log_err("ERROR: UTF16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset); 660 } 661 #endif 662 setOffset=offset; 663 U16_SET_CP_START_UNSAFE(input, setOffset); 664 if(setOffset != start_unsafe[i]){ 665 log_err("ERROR: U16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset); 666 } 667 #if !U_HIDE_OBSOLETE_UTF_OLD_H 668 setOffset=offset; 669 UTF16_SET_CHAR_START_SAFE(input, 0, setOffset); 670 if(setOffset != start_safe[i]){ 671 log_err("ERROR: UTF16_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset); 672 } 673 #endif 674 setOffset=offset; 675 U16_SET_CP_START(input, 0, setOffset); 676 if(setOffset != start_safe[i]){ 677 log_err("ERROR: U16_SET_CHAR_START failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset); 678 } 679 680 if (offset > 0) { 681 #if !U_HIDE_OBSOLETE_UTF_OLD_H 682 setOffset=offset; 683 UTF16_SET_CHAR_LIMIT_UNSAFE(input, setOffset); 684 if(setOffset != limit_unsafe[i]){ 685 log_err("ERROR: UTF16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset); 686 } 687 #endif 688 setOffset=offset; 689 U16_SET_CP_LIMIT_UNSAFE(input, setOffset); 690 if(setOffset != limit_unsafe[i]){ 691 log_err("ERROR: U16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset); 692 } 693 } 694 695 setOffset=offset; 696 U16_SET_CP_LIMIT(input,0, setOffset, UPRV_LENGTHOF(input)); 697 if(setOffset != limit_safe[i]){ 698 log_err("ERROR: U16_SET_CHAR_LIMIT failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset); 699 } 700 701 i++; 702 } 703 } 704 705 static void TestAppendChar(){ 706 #if !U_HIDE_OBSOLETE_UTF_OLD_H 707 static UChar s[5]={0x0061, 0x0062, 0x0063, 0x0064, 0x0000}; 708 static uint32_t test[]={ 709 /*append-position(unsafe), CHAR to be appended */ 710 0, 0x20441, 711 2, 0x0028, 712 2, 0xdc00, 713 3, 0xd800, 714 1, 0x20402, 715 716 /*append-position(safe), CHAR to be appended */ 717 0, 0x20441, 718 2, 0xdc00, 719 3, 0xd800, 720 1, 0x20402, 721 3, 0x20402, 722 3, 0x10402, 723 2, 0x10402, 724 725 }; 726 static uint16_t movedOffset[]={ 727 /*offset-moved-to(unsafe)*/ 728 2, /*for append-pos: 0 , CHAR 0x20441*/ 729 3, 730 3, 731 4, 732 3, 733 /*offse-moved-to(safe)*/ 734 2, /*for append-pos: 0, CHAR 0x20441*/ 735 3, 736 4, 737 3, 738 4, 739 4, 740 4 741 }; 742 743 static UChar result[][5]={ 744 /*unsafe*/ 745 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000}, 746 {0x0061, 0x0062, 0x0028, 0x0064, 0x0000}, 747 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000}, 748 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000}, 749 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000}, 750 751 /*safe*/ 752 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000}, 753 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000}, 754 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000}, 755 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000}, 756 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000}, 757 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000}, 758 {0x0061, 0x0062, 0xd801, 0xdc02, 0x0000}, 759 760 761 }; 762 uint16_t i, count=0; 763 UChar *str=(UChar*)malloc(sizeof(UChar) * (u_strlen(s)+1)); 764 uint16_t offset; 765 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){ 766 if(count<5){ 767 u_strcpy(str, s); 768 offset=(uint16_t)test[i]; 769 UTF16_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); 770 if(offset != movedOffset[count]){ 771 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", 772 count, movedOffset[count], offset); 773 774 } 775 if(u_strcmp(str, result[count]) !=0){ 776 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed for count=%d. Expected:", count); 777 printUChars(result[count]); 778 printf("\nGot:"); 779 printUChars(str); 780 printf("\n"); 781 } 782 }else{ 783 u_strcpy(str, s); 784 offset=(uint16_t)test[i]; 785 UTF16_APPEND_CHAR_SAFE(str, offset, (uint16_t)u_strlen(str), test[i+1]); 786 if(offset != movedOffset[count]){ 787 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", 788 count, movedOffset[count], offset); 789 790 } 791 if(u_strcmp(str, result[count]) !=0){ 792 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed for count=%d. Expected:", count); 793 printUChars(result[count]); 794 printf("\nGot:"); 795 printUChars(str); 796 printf("\n"); 797 } 798 } 799 count++; 800 } 801 free(str); 802 #endif 803 } 804 805 static void TestAppend() { 806 static const UChar32 codePoints[]={ 807 0x61, 0xdf, 0x901, 0x3040, 808 0xac00, 0xd800, 0xdbff, 0xdcde, 809 0xdffd, 0xe000, 0xffff, 0x10000, 810 0x12345, 0xe0021, 0x10ffff, 0x110000, 811 0x234567, 0x7fffffff, -1, -1000, 812 0, 0x400 813 }; 814 static const UChar expectUnsafe[]={ 815 0x61, 0xdf, 0x901, 0x3040, 816 0xac00, 0xd800, 0xdbff, 0xdcde, 817 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 818 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ 819 /* none from this line */ 820 0, 0x400 821 }, expectSafe[]={ 822 0x61, 0xdf, 0x901, 0x3040, 823 0xac00, 0xd800, 0xdbff, 0xdcde, 824 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 825 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ 826 /* none from this line */ 827 0, 0x400 828 }; 829 830 UChar buffer[100]; 831 UChar32 c; 832 int32_t i, length; 833 UBool isError, expectIsError, wrongIsError; 834 835 length=0; 836 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { 837 c=codePoints[i]; 838 if(c<0 || 0x10ffff<c) { 839 continue; /* skip non-code points for U16_APPEND_UNSAFE */ 840 } 841 842 U16_APPEND_UNSAFE(buffer, length, c); 843 } 844 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) { 845 log_err("U16_APPEND_UNSAFE did not generate the expected output\n"); 846 } 847 848 length=0; 849 wrongIsError=FALSE; 850 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { 851 c=codePoints[i]; 852 expectIsError= c<0 || 0x10ffff<c; /* || U_IS_SURROGATE(c); */ /* surrogates in UTF-32 shouldn't be used, but it's okay to pass them around internally. */ 853 isError=FALSE; 854 855 U16_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError); 856 wrongIsError|= isError!=expectIsError; 857 } 858 if(wrongIsError) { 859 log_err("U16_APPEND did not set isError correctly\n"); 860 } 861 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) { 862 log_err("U16_APPEND did not generate the expected output\n"); 863 } 864 } 865 866 static void TestSurrogate(){ 867 static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd}; 868 int i = 0; 869 while (i < 5) { 870 UChar first = U16_LEAD(s[i]); 871 UChar second = U16_TRAIL(s[i]); 872 /* algorithm from the Unicode consortium */ 873 UChar firstresult = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800); 874 UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00); 875 876 if ( 877 #if !U_HIDE_OBSOLETE_UTF_OLD_H 878 first != UTF16_LEAD(s[i]) || first != UTF_FIRST_SURROGATE(s[i]) || 879 #endif 880 first != firstresult) { 881 log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n", 882 s[i], firstresult); 883 } 884 if ( 885 #if !U_HIDE_OBSOLETE_UTF_OLD_H 886 second != UTF16_TRAIL(s[i]) || second != UTF_SECOND_SURROGATE(s[i]) || 887 #endif 888 second != secondresult) { 889 log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n", 890 s[i], secondresult); 891 } 892 i ++; 893 } 894 } 895