1 #include <stdint.h> 2 #include <inttypes.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <stdio.h> 6 #include "opcodes.h" 7 8 #ifndef M3 9 #define M3 0 10 #endif 11 12 /* The abstracted result of an CU14 insn */ 13 typedef struct { 14 uint64_t addr1; // target 15 uint64_t len1; 16 uint64_t addr2; // source 17 uint64_t len2; 18 uint32_t cc; 19 } cu14_t; 20 21 /* Define various input buffers. */ 22 23 /* 1-byte UTF-8 character */ 24 uint8_t pattern1[] = { 25 0x00, 0x01, 0x02, 0x03 26 }; 27 28 /* 2-byte UTF-8 character */ 29 uint8_t pattern2[] = { 30 0xc2, 0x80, 31 0xc2, 0x81, 32 0xc2, 0x82, 33 0xc2, 0x83, 34 }; 35 36 /* 3-byte UTF-8 character */ 37 uint8_t pattern3[] = { 38 0xe1, 0x80, 0x80, 39 0xe1, 0x80, 0x81, 40 0xe1, 0x80, 0x82, 41 0xe1, 0x80, 0x83, 42 }; 43 44 /* 4-byte UTF-8 character */ 45 uint8_t pattern4[] = { 46 0xf4, 0x80, 0x80, 0x80, 47 0xf4, 0x80, 0x80, 0x81, 48 0xf4, 0x80, 0x80, 0x82, 49 0xf4, 0x80, 0x80, 0x83, 50 }; 51 52 53 /* Mixed bytes */ 54 uint8_t mixed[] = { 55 0x01, // 1 byte 56 0xc3, 0x80, // 2 bytes 57 0x12, // 1 byte 58 0xe1, 0x90, 0x93, // 3 bytes 59 0x23, // 1 byte 60 0xf4, 0x80, 0x90, 0x8a, // 4 bytes 61 0x34, // 1 byte 62 0xc4, 0x8c, // 2 bytes 63 0xe1, 0x91, 0x94, // 3 bytes 64 0xc5, 0x8a, // 2 bytes 65 0xf4, 0x80, 0x90, 0x8a, // 4 bytes 66 0xc5, 0x8a, // 2 bytes 67 0xe1, 0x91, 0x94, // 3 bytes 68 0xf4, 0x80, 0x90, 0x8a, // 4 bytes 69 0xe1, 0x91, 0x94, // 3 bytes 70 }; 71 72 /* This is the buffer for the converted bytes. */ 73 uint32_t buff[500]; /* Large so we con'don't have to worry about it */ 74 75 76 static cu14_t 77 do_cu14(uint32_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len) 78 { 79 int cc = 42; 80 cu14_t regs; 81 82 /* build up the register pairs */ 83 register uint8_t *source asm("4") = src; 84 register uint64_t source_len asm("5") = src_len; 85 register uint32_t *dest asm("2") = dst; 86 register uint64_t dest_len asm("3") = dst_len; 87 88 asm volatile( 89 CU14(M3,2,4) 90 "ipm %2\n\t" 91 "srl %2,28\n\t" 92 : "+d"(dest), "+d"(source), "=d"(cc), 93 "+d"(source_len), "+d"(dest_len) 94 : 95 : "memory", "cc"); 96 97 /* Capture register contents at end of cu14 */ 98 regs.addr1 = (uint64_t)dest; 99 regs.len1 = dest_len; 100 regs.addr2 = (uint64_t)source; 101 regs.len2 = source_len; 102 regs.cc = cc; 103 104 return regs; 105 } 106 107 void 108 run_test(uint32_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len) 109 { 110 int i; 111 cu14_t result; 112 113 printf("UTF8: "); 114 if (src_len == 0) 115 printf(" <none>"); 116 else { 117 for(i = 0; i < src_len; ++i) 118 printf(" %02x", src[i]); 119 } 120 printf("\n"); 121 122 result = do_cu14(dst, dst_len, src, src_len); 123 124 // Write out the converted bytes, if any 125 printf("UTF32: "); 126 if (dst_len - result.len1 == 0) 127 printf(" <none>"); 128 else { 129 uint64_t num_bytes = dst_len - result.len1; 130 131 /* The number of bytes that were written must be divisible by 4 */ 132 if (num_bytes % 4 != 0) 133 fprintf(stderr, "*** number of bytes is not a multiple of 4\n"); 134 135 for (i = 0; i < num_bytes / 4; i++) { 136 printf(" %08x", dst[i]); 137 } 138 } 139 printf("\n"); 140 141 printf(" cc = %d\n", result.cc); 142 if (dst != NULL) 143 printf(" dst address difference: %"PRId64, result.addr1 - (uint64_t)dst); 144 printf(" dst len: %"PRId64"\n", result.len1); 145 146 if (src != NULL) 147 printf(" src address difference: %"PRId64, result.addr2 - (uint64_t)src); 148 printf(" src len: %"PRId64"\n", result.len2); 149 } 150 151 // Test conversion of a one-byte character 152 void convert_1_byte(void) 153 { 154 int i; 155 156 printf("===== Conversion of a one-byte character =====\n"); 157 158 printf("\n----- Valid characters -----\n"); 159 uint8_t valid[] = { 160 0x00, 0x7f, // corner cases 161 0x01, 0x10, 0x7e, 0x5d // misc 162 }; 163 run_test(buff, sizeof buff, valid, sizeof valid); 164 165 // As conversion stops upon encountering an invalid character, we 166 // need to test each invalid character separately, to make sure it 167 // is recognized as invalid. 168 169 printf("\n----- Invalid characters -----\n"); 170 uint8_t always_invalid[] = { 171 0x80, 0xbf, // corner cases 172 0xf8, 0xff, // corner cases 173 0x81, 0xbe, 0x95, 0xab // misc 174 }; 175 for (i = 0; i < sizeof always_invalid; ++i) { 176 uint8_t invalid_char[1]; 177 invalid_char[0] = always_invalid[i]; 178 run_test(buff, sizeof buff, invalid_char, sizeof invalid_char); 179 } 180 181 // In case of m3 == 0 we get cc=0 indicating exhaustion of source 182 printf("\n----- Invalid characters if m3 == 1 -----\n"); 183 uint8_t invalid_if_m3[] = { // contains all such invalid characters 184 0xc0, 0xc1, 185 0xf5, 0xf6, 0xf7 186 }; 187 for (i = 0; i < sizeof invalid_if_m3; ++i) { 188 uint8_t invalid_char[1]; 189 invalid_char[0] = invalid_if_m3[i]; 190 run_test(buff, sizeof buff, invalid_char, sizeof invalid_char); 191 } 192 193 printf("\n----- 1st char valid, 2nd char invalid -----\n"); 194 uint8_t valid_invalid[] = { 195 0x10, // valid 196 0xaa // invalid 197 }; 198 run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid); 199 } 200 201 // Test conversion of a two-byte character 202 void convert_2_bytes(void) 203 { 204 int i; 205 206 printf("\n===== Conversion of a two-byte character =====\n"); 207 208 printf("\n----- Valid characters -----\n"); 209 uint8_t valid[] = { 210 0xc2, 0x80, // corner case 211 0xc2, 0xbf, // corner case 212 0xdf, 0x80, // corner case 213 0xdf, 0xbf, // corner case 214 0xc3, 0xbe, 0xda, 0xbc // misc 215 }; 216 run_test(buff, sizeof buff, valid, sizeof valid); 217 218 printf("\n----- Valid characters if m3 == 0 -----\n"); 219 // First char is 0xc0 or 0xc1 220 uint8_t valid_if_not_m3[] = { 221 0xc0, 0x80, 222 0xc0, 0xbf, 223 0xc1, 0x80, 224 0xc0, 0xbf 225 }; 226 run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3); 227 228 // Test for invalid two-byte characters where the 1st byte is valid 229 // The 2nd byte is invalid if not in range 0x80..0xbf, inclusive 230 231 // As conversion stops upon encountering an invalid character, we 232 // need to test each invalid character separately, to make sure it 233 // is recognized as invalid. 234 235 printf("\n----- Invalid characters if m3 == 1 -----\n"); 236 uint8_t always_invalid[] = { 237 0xc2, 0x00, 238 0xc2, 0x7f, 239 0xc2, 0xc0, 240 0xc2, 0xff 241 }; 242 for (i = 0; i < sizeof always_invalid; i += 2) { 243 uint8_t invalid_char[2]; 244 invalid_char[0] = always_invalid[i]; 245 invalid_char[1] = always_invalid[i+1]; 246 run_test(buff, sizeof buff, invalid_char, sizeof invalid_char); 247 } 248 249 /* Nb: for a two-byte character we need not test the case where 250 invalidity of the character (cc=2) takes precedence over exhaustion 251 of the 1st operand (cc=1). Invalidity of the character has already 252 been tested when testing the 1st byte. */ 253 254 printf("\n----- 1st char valid, 2nd char invalid -----\n"); 255 uint8_t valid_invalid[] = { 256 0xc3, 0x81, // valid 257 0xc4, 0x00 // invalid 258 }; 259 run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid); 260 } 261 262 // Test conversion of a three-byte character 263 void 264 convert_3_bytes(void) 265 { 266 int i; 267 268 printf("\n===== Conversion of a three-byte character =====\n"); 269 270 /* Exhaustively test the 1st byte E0 - EF, and the interval boundaries for 271 the 2nd and 3rd bytes */ 272 printf("\n----- Valid characters -----\n"); 273 uint8_t e0[] = { 274 0xe0, 0xa0, 0x80, 275 0xe0, 0xbf, 0x80, 276 0xe0, 0xa0, 0xbf, 277 0xe0, 0xbf, 0xbf, 278 0xe0, 0xaa, 0xbb, // random e0 .. .. 279 }; 280 run_test(buff, sizeof buff, e0, sizeof e0); 281 282 uint8_t ed[] = { 283 0xed, 0x80, 0x80, 284 0xed, 0x9f, 0x80, 285 0xed, 0x80, 0xbf, 286 0xed, 0x9f, 0xbf, 287 0xed, 0x8a, 0xbb, // random ed .. .. 288 }; 289 run_test(buff, sizeof buff, ed, sizeof ed); 290 291 for (i = 0; i <= 0xf; ++i) { 292 uint8_t exxx_1[3] = { 0x0, 0x80, 0x80 }; 293 uint8_t exxx_2[3] = { 0x0, 0xbf, 0x80 }; 294 uint8_t exxx_3[3] = { 0x0, 0x80, 0xbf }; 295 uint8_t exxx_4[3] = { 0x0, 0xbf, 0xbf }; 296 297 if (i == 0x00) continue; // special case e0 298 if (i == 0x0d) continue; // special case ed 299 300 exxx_1[0] = 0xe0 | i; 301 exxx_2[0] = 0xe0 | i; 302 exxx_3[0] = 0xe0 | i; 303 exxx_4[0] = 0xe0 | i; 304 run_test(buff, sizeof buff, exxx_1, sizeof exxx_1); 305 run_test(buff, sizeof buff, exxx_2, sizeof exxx_2); 306 run_test(buff, sizeof buff, exxx_3, sizeof exxx_3); 307 run_test(buff, sizeof buff, exxx_4, sizeof exxx_4); 308 }; 309 310 printf("\n----- Invalid characters (2nd byte is invalid) -----\n"); 311 // Test for invalid three-byte characters where the 1st byte is valid 312 // The 2nd byte is invalid. 313 314 // As conversion stops upon encountering an invalid character, we 315 // need to test each invalid character separately, to make sure it 316 // is recognized as invalid. 317 318 e0[0] = 0xe0; // valid 319 e0[1] = 0x9f; // invalid because outside [0xa0 .. 0xbf] 320 e0[2] = 0x80; // valid 321 run_test(buff, sizeof buff, e0, sizeof e0); 322 e0[1] = 0xc0; // invalid because outside [0xa0 .. 0xbf] 323 run_test(buff, sizeof buff, e0, sizeof e0); 324 325 ed[0] = 0xed; // valid 326 ed[1] = 0x7f; // invalid because outside [0x80 .. 0x9f] 327 ed[2] = 0x80; // valid 328 run_test(buff, sizeof buff, ed, sizeof ed); 329 ed[1] = 0xa0; // invalid because outside [0x80 .. 0x9f] 330 run_test(buff, sizeof buff, ed, sizeof ed); 331 332 for (i = 0; i <= 0xf; ++i) { 333 uint8_t exxx_1[3] = { 0x0, 0x7f, 0x80 }; 334 uint8_t exxx_2[3] = { 0x0, 0xc0, 0x80 }; 335 336 if (i == 0x00) continue; // special case e0 337 if (i == 0x0d) continue; // special case ed 338 339 exxx_1[0] = 0xe0 | i; 340 exxx_2[0] = 0xe0 | i; 341 run_test(buff, sizeof buff, exxx_1, sizeof exxx_1); 342 run_test(buff, sizeof buff, exxx_2, sizeof exxx_2); 343 }; 344 345 printf("\n----- Invalid characters (3rd byte is invalid) -----\n"); 346 // For all 1st bytes 0xe0 .. 0xef the 3rd bytes must be in [0x80 .. 0xbf] 347 // No need to special case 0xe0 and 0xed 348 for (i = 0; i <= 0xf; ++i) { 349 uint8_t exxx_1[3] = { 0x0, 0xab, 0x7f }; 350 uint8_t exxx_2[3] = { 0x0, 0xab, 0xc0 }; 351 352 exxx_1[0] = 0xe0 | i; 353 exxx_2[0] = 0xe0 | i; 354 run_test(buff, sizeof buff, exxx_1, sizeof exxx_1); 355 run_test(buff, sizeof buff, exxx_2, sizeof exxx_2); 356 }; 357 358 printf("\n----- Invalid 2nd char AND output exhausted -----\n"); 359 /* The character is invalid in its 2nd byte AND the output buffer is 360 exhausted (2 bytes are needed) */ 361 uint8_t pat1[] = { 362 0xe0, 0x00, 0x80 363 }; 364 run_test(buff, 1, pat1, 3); 365 366 printf("\n----- Invalid 3rd char AND output exhausted -----\n"); 367 /* The character is invalid in its 3rd byte AND the output buffer is 368 exhausted (2 bytes are needed) */ 369 uint8_t pat2[] = { 370 0xe4, 0x84, 0x00 371 }; 372 run_test(buff, 1, pat2, 3); 373 374 printf("\n----- 1st char valid, 2nd char invalid -----\n"); 375 uint8_t valid_invalid[] = { 376 0xe1, 0x90, 0x90, // valid 377 0xe1, 0x00, 0x90 // invalid 378 }; 379 run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid); 380 } 381 382 // Test conversion of a four-byte character 383 void 384 convert_4_bytes(void) 385 { 386 int i, j; 387 388 printf("\n===== Conversion of a four-byte character =====\n"); 389 390 printf("\n----- Valid characters -----\n"); 391 for (i = 0; i <= 4; ++i) { 392 uint8_t valid[4]; 393 394 valid[0] = 0xf0 | i; 395 396 for (j = 0; j <= 1; ++j) { 397 // Byte 2 398 if (i == 0) { 399 valid[1] = j == 0 ? 0x90 : 0xbf; // 0xf0 400 } else if (i == 4) { 401 valid[1] = j == 0 ? 0x80 : 0x8f; // 0xf4 402 } else { 403 valid[1] = j == 0 ? 0x80 : 0xbf; // 0xf1 .. 0xf3 404 } 405 // Byte 3 and byte 4 have same interval 0x80 .. 0xbf 406 valid[2] = 0x80; 407 valid[3] = 0x80; 408 run_test(buff, sizeof buff, valid, sizeof valid); 409 valid[2] = 0x80; 410 valid[3] = 0xbf; 411 run_test(buff, sizeof buff, valid, sizeof valid); 412 valid[2] = 0xbf; 413 valid[3] = 0x80; 414 run_test(buff, sizeof buff, valid, sizeof valid); 415 valid[2] = 0xbf; 416 valid[3] = 0xbf; 417 run_test(buff, sizeof buff, valid, sizeof valid); 418 } 419 } 420 421 printf("\n----- Valid characters if m3 == 0 -----\n"); 422 // First char is 0xf5 .. 0xf7 423 uint8_t valid_if_not_m3[] = { 424 0xf5, 0x00, 0x00, 0x00, 425 0xf6, 0x11, 0x22, 0x33, 426 0xf7, 0x44, 0x55, 0x66, 427 }; 428 run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3); 429 430 // As conversion stops upon encountering an invalid character, we 431 // need to test each invalid character separately, to make sure it 432 // is recognized as invalid. 433 434 printf("\n----- Invalid characters (2nd byte is invalid) -----\n"); 435 // Test for invalid four-byte characters where the 2nd byte is invalid. 436 // All other bytes are valid 437 uint8_t f0[4], f4[4]; 438 439 f0[0] = 0xf0; // valid 440 f0[1] = 0x8f; // invalid because outside [0x90 .. 0xbf] 441 f0[2] = 0x80; // valid 442 f0[3] = 0x80; // valid 443 run_test(buff, sizeof buff, f0, sizeof f0); 444 f0[1] = 0xc0; // invalid because outside [0x90 .. 0xbf] 445 run_test(buff, sizeof buff, f0, sizeof f0); 446 447 f4[0] = 0xf4; // valid 448 f4[1] = 0x7f; // invalid because outside [0x80 .. 0x8f] 449 f4[2] = 0x80; // valid 450 f4[3] = 0x80; // valid 451 run_test(buff, sizeof buff, f4, sizeof f4); 452 f4[1] = 0x90; // invalid because outside [0x80 .. 0x9f] 453 run_test(buff, sizeof buff, f4, sizeof f4); 454 455 for (i = 0; i <= 0x4; ++i) { 456 uint8_t fxxx_1[4] = { 0x0, 0x7f, 0x80, 0x80 }; 457 uint8_t fxxx_2[4] = { 0x0, 0xc0, 0x80, 0x80 }; 458 459 if (i == 0) continue; // special case f0 460 if (i == 4) continue; // special case f4 461 462 fxxx_1[0] = 0xf0 | i; 463 fxxx_2[0] = 0xf0 | i; 464 run_test(buff, sizeof buff, fxxx_1, sizeof fxxx_1); 465 run_test(buff, sizeof buff, fxxx_2, sizeof fxxx_2); 466 }; 467 468 printf("\n----- Invalid characters (3rd byte is invalid) -----\n"); 469 // Test for invalid four-byte characters where the 3rd byte is invalid. 470 // All other bytes are valid 471 for (i = 0; i <= 0x4; ++i) { 472 uint8_t fxxx[4] = { 0x0, 0x0, 0x0, 0x80 }; 473 474 fxxx[0] = 0xf0 | i; 475 fxxx[1] = (i == 0) ? 0x94 : 0x84; 476 fxxx[2] = 0x7f; 477 run_test(buff, sizeof buff, fxxx, sizeof fxxx); 478 fxxx[2] = 0xc0; 479 run_test(buff, sizeof buff, fxxx, sizeof fxxx); 480 }; 481 482 printf("\n----- Invalid characters (4th byte is invalid) -----\n"); 483 // Test for invalid four-byte characters where the 3rd byte is invalid. 484 // All other bytes are valid 485 for (i = 0; i <= 0x4; ++i) { 486 uint8_t fxxx[4] = { 0x0, 0x0, 0x80, 0x0 }; 487 488 fxxx[0] = 0xf0 | i; 489 fxxx[1] = (i == 0) ? 0x94 : 0x84; 490 fxxx[3] = 0x7f; 491 run_test(buff, sizeof buff, fxxx, sizeof fxxx); 492 fxxx[3] = 0xc0; 493 run_test(buff, sizeof buff, fxxx, sizeof fxxx); 494 }; 495 496 printf("\n----- Invalid 2nd char AND output exhausted -----\n"); 497 /* The character is invalid in its 2nd byte AND the output buffer is 498 exhausted (4 bytes are needed) */ 499 uint8_t pat1[] = { 500 0xf0, 0x00, 0x80, 0x80 501 }; 502 run_test(buff, 1, pat1, 4); 503 504 printf("\n----- Invalid 3rd char AND output exhausted -----\n"); 505 /* The character is invalid in its 3rd byte AND the output buffer is 506 exhausted (4 bytes are needed) */ 507 uint8_t pat2[] = { 508 0xf0, 0xaa, 0x00, 0x80 509 }; 510 run_test(buff, 3, pat2, 4); 511 512 printf("\n----- Invalid 4th char AND output exhausted -----\n"); 513 /* The character is invalid in its 4th byte AND the output buffer is 514 exhausted (4 bytes are needed) */ 515 uint8_t pat3[] = { 516 0xf0, 0xaa, 0xaa, 0x00 517 }; 518 run_test(buff, 3, pat3, 4); 519 520 printf("\n----- 1st char valid, 2nd char invalid -----\n"); 521 uint8_t valid_invalid[] = { 522 0xf0, 0xaa, 0xaa, 0xaa, // valid 523 0xf0, 0x00, 0x00, 0x00 // invalid 524 }; 525 run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid); 526 } 527 528 529 int main() 530 { 531 convert_1_byte(); 532 convert_2_bytes(); 533 convert_3_bytes(); 534 convert_4_bytes(); 535 536 /* Length == 0, no memory should be read or written */ 537 printf("\n------------- test1 ----------------\n"); 538 run_test(NULL, 0, NULL, 0); 539 540 /* Test exhaustion of source length (source bytes are valid) */ 541 printf("\n------------- test2.1 ----------------\n"); 542 543 /* No character will be written to BUFF, i.e. loop in jitted code 544 is not iterated */ 545 run_test(buff, sizeof buff, NULL, 0); 546 run_test(buff, sizeof buff, pattern1, 0); 547 run_test(buff, sizeof buff, pattern2, 0); 548 run_test(buff, sizeof buff, pattern2, 1); 549 run_test(buff, sizeof buff, pattern3, 0); 550 run_test(buff, sizeof buff, pattern3, 1); 551 run_test(buff, sizeof buff, pattern3, 2); 552 run_test(buff, sizeof buff, pattern4, 0); 553 run_test(buff, sizeof buff, pattern4, 1); 554 run_test(buff, sizeof buff, pattern4, 2); 555 run_test(buff, sizeof buff, pattern4, 3); 556 557 printf("\n------------- test2.2 ----------------\n"); 558 /* At least one character will be written to BUFF, i.e. loop in jitted 559 code is iterated */ 560 run_test(buff, sizeof buff, pattern1, 2); 561 run_test(buff, sizeof buff, pattern2, 5); 562 run_test(buff, sizeof buff, pattern3, 6); 563 run_test(buff, sizeof buff, pattern4, 9); 564 565 /* Test exhaustion of destination length (source bytes are valid) */ 566 printf("\n------------- test3.1 ----------------\n"); 567 568 /* No character will be written to BUFF, i.e. loop in jitted code 569 is not iterated */ 570 571 /* Want to write 2 or 4 bytes at a time */ 572 run_test(NULL, 0, pattern1, sizeof pattern1); // 2-byte result 573 run_test(NULL, 0, pattern2, sizeof pattern2); // 2-byte result 574 run_test(NULL, 1, pattern2, sizeof pattern2); // 2-byte result 575 run_test(NULL, 0, pattern3, sizeof pattern3); // 2-byte result 576 run_test(NULL, 1, pattern3, sizeof pattern3); // 2-byte result 577 run_test(NULL, 0, pattern4, sizeof pattern4); // 4-byte result 578 run_test(NULL, 1, pattern4, sizeof pattern4); // 4-byte result 579 run_test(NULL, 2, pattern4, sizeof pattern4); // 4-byte result 580 run_test(NULL, 3, pattern4, sizeof pattern4); // 4-byte result 581 582 printf("\n------------- test3.2 ----------------\n"); 583 /* At least one character will be written to BUFF, i.e. loop in jitted 584 code is iterated */ 585 run_test(buff, 4, pattern1, sizeof pattern1); 586 run_test(buff, 5, pattern1, sizeof pattern2); 587 run_test(buff, 6, pattern1, sizeof pattern3); 588 run_test(buff, 7, pattern1, sizeof pattern4); 589 590 /* Convert buffer with mixed characters */ 591 printf("\n------------- test4 ----------------\n"); 592 run_test(buff, sizeof buff, mixed, sizeof mixed); 593 594 return 0; 595 } 596