1 2 /* Contrary to what the next comment says, this is now an amd64 CPU 3 test. */ 4 5 /* 6 * x86 CPU test 7 * 8 * Copyright (c) 2003 Fabrice Bellard 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 */ 24 25 #include <stdlib.h> 26 #include <stdio.h> 27 #include <string.h> 28 #include <inttypes.h> 29 #include <math.h> 30 #include <stdarg.h> 31 #include <assert.h> 32 33 34 ////////////////////////////////////////////////////////////////// 35 ////////////////////////////////////////////////////////////////// 36 37 /* 38 * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. 39 * MD5 Message-Digest Algorithm (RFC 1321). 40 * 41 * Homepage: 42 * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 43 * 44 * Author: 45 * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> 46 * 47 * This software was written by Alexander Peslyak in 2001. No copyright is 48 * claimed, and the software is hereby placed in the public domain. 49 * In case this attempt to disclaim copyright and place the software in the 50 * public domain is deemed null and void, then the software is 51 * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the 52 * general public under the following terms: 53 * 54 * Redistribution and use in source and binary forms, with or without 55 * modification, are permitted. 56 * 57 * There's ABSOLUTELY NO WARRANTY, express or implied. 58 * 59 * (This is a heavily cut-down "BSD license".) 60 * 61 * This differs from Colin Plumb's older public domain implementation in that 62 * no exactly 32-bit integer data type is required (any 32-bit or wider 63 * unsigned integer data type will do), there's no compile-time endianness 64 * configuration, and the function prototypes match OpenSSL's. No code from 65 * Colin Plumb's implementation has been reused; this comment merely compares 66 * the properties of the two independent implementations. 67 * 68 * The primary goals of this implementation are portability and ease of use. 69 * It is meant to be fast, but not as fast as possible. Some known 70 * optimizations are not included to reduce source code size and avoid 71 * compile-time configuration. 72 */ 73 74 #include <string.h> 75 76 // BEGIN #include "md5.h" 77 /* Any 32-bit or wider unsigned integer data type will do */ 78 typedef unsigned int MD5_u32plus; 79 80 typedef struct { 81 MD5_u32plus lo, hi; 82 MD5_u32plus a, b, c, d; 83 unsigned char buffer[64]; 84 MD5_u32plus block[16]; 85 } MD5_CTX; 86 87 void MD5_Init(MD5_CTX *ctx); 88 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); 89 void MD5_Final(unsigned char *result, MD5_CTX *ctx); 90 // END #include "md5.h" 91 92 /* 93 * The basic MD5 functions. 94 * 95 * F and G are optimized compared to their RFC 1321 definitions for 96 * architectures that lack an AND-NOT instruction, just like in Colin Plumb's 97 * implementation. 98 */ 99 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) 100 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) 101 #define H(x, y, z) (((x) ^ (y)) ^ (z)) 102 #define H2(x, y, z) ((x) ^ ((y) ^ (z))) 103 #define I(x, y, z) ((y) ^ ((x) | ~(z))) 104 105 /* 106 * The MD5 transformation for all four rounds. 107 */ 108 #define STEP(f, a, b, c, d, x, t, s) \ 109 (a) += f((b), (c), (d)) + (x) + (t); \ 110 (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ 111 (a) += (b); 112 113 /* 114 * SET reads 4 input bytes in little-endian byte order and stores them in a 115 * properly aligned word in host byte order. 116 * 117 * The check for little-endian architectures that tolerate unaligned memory 118 * accesses is just an optimization. Nothing will break if it fails to detect 119 * a suitable architecture. 120 * 121 * Unfortunately, this optimization may be a C strict aliasing rules violation 122 * if the caller's data buffer has effective type that cannot be aliased by 123 * MD5_u32plus. In practice, this problem may occur if these MD5 routines are 124 * inlined into a calling function, or with future and dangerously advanced 125 * link-time optimizations. For the time being, keeping these MD5 routines in 126 * their own translation unit avoids the problem. 127 */ 128 #if defined(__i386__) || defined(__x86_64__) || defined(__vax__) 129 #define SET(n) \ 130 (*(MD5_u32plus *)&ptr[(n) * 4]) 131 #define GET(n) \ 132 SET(n) 133 #else 134 #define SET(n) \ 135 (ctx->block[(n)] = \ 136 (MD5_u32plus)ptr[(n) * 4] | \ 137 ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ 138 ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ 139 ((MD5_u32plus)ptr[(n) * 4 + 3] << 24)) 140 #define GET(n) \ 141 (ctx->block[(n)]) 142 #endif 143 144 /* 145 * This processes one or more 64-byte data blocks, but does NOT update the bit 146 * counters. There are no alignment requirements. 147 */ 148 static const void *body(MD5_CTX *ctx, const void *data, unsigned long size) 149 { 150 const unsigned char *ptr; 151 MD5_u32plus a, b, c, d; 152 MD5_u32plus saved_a, saved_b, saved_c, saved_d; 153 154 ptr = (const unsigned char *)data; 155 156 a = ctx->a; 157 b = ctx->b; 158 c = ctx->c; 159 d = ctx->d; 160 161 do { 162 saved_a = a; 163 saved_b = b; 164 saved_c = c; 165 saved_d = d; 166 167 /* Round 1 */ 168 STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) 169 STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) 170 STEP(F, c, d, a, b, SET(2), 0x242070db, 17) 171 STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) 172 STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) 173 STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) 174 STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) 175 STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) 176 STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) 177 STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) 178 STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) 179 STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) 180 STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) 181 STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) 182 STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) 183 STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) 184 185 /* Round 2 */ 186 STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) 187 STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) 188 STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) 189 STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) 190 STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) 191 STEP(G, d, a, b, c, GET(10), 0x02441453, 9) 192 STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) 193 STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) 194 STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) 195 STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) 196 STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) 197 STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) 198 STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) 199 STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) 200 STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) 201 STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) 202 203 /* Round 3 */ 204 STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) 205 STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) 206 STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) 207 STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) 208 STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) 209 STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) 210 STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) 211 STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) 212 STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) 213 STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) 214 STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) 215 STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) 216 STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) 217 STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) 218 STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) 219 STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) 220 221 /* Round 4 */ 222 STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) 223 STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) 224 STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) 225 STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) 226 STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) 227 STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) 228 STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) 229 STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) 230 STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) 231 STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) 232 STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) 233 STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) 234 STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) 235 STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) 236 STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) 237 STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) 238 239 a += saved_a; 240 b += saved_b; 241 c += saved_c; 242 d += saved_d; 243 244 ptr += 64; 245 } while (size -= 64); 246 247 ctx->a = a; 248 ctx->b = b; 249 ctx->c = c; 250 ctx->d = d; 251 252 return ptr; 253 } 254 255 void MD5_Init(MD5_CTX *ctx) 256 { 257 ctx->a = 0x67452301; 258 ctx->b = 0xefcdab89; 259 ctx->c = 0x98badcfe; 260 ctx->d = 0x10325476; 261 262 ctx->lo = 0; 263 ctx->hi = 0; 264 } 265 266 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size) 267 { 268 MD5_u32plus saved_lo; 269 unsigned long used, available; 270 271 saved_lo = ctx->lo; 272 if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) 273 ctx->hi++; 274 ctx->hi += size >> 29; 275 276 used = saved_lo & 0x3f; 277 278 if (used) { 279 available = 64 - used; 280 281 if (size < available) { 282 memcpy(&ctx->buffer[used], data, size); 283 return; 284 } 285 286 memcpy(&ctx->buffer[used], data, available); 287 data = (const unsigned char *)data + available; 288 size -= available; 289 body(ctx, ctx->buffer, 64); 290 } 291 292 if (size >= 64) { 293 data = body(ctx, data, size & ~(unsigned long)0x3f); 294 size &= 0x3f; 295 } 296 297 memcpy(ctx->buffer, data, size); 298 } 299 300 #define OUT(dst, src) \ 301 (dst)[0] = (unsigned char)(src); \ 302 (dst)[1] = (unsigned char)((src) >> 8); \ 303 (dst)[2] = (unsigned char)((src) >> 16); \ 304 (dst)[3] = (unsigned char)((src) >> 24); 305 306 void MD5_Final(unsigned char *result, MD5_CTX *ctx) 307 { 308 unsigned long used, available; 309 310 used = ctx->lo & 0x3f; 311 312 ctx->buffer[used++] = 0x80; 313 314 available = 64 - used; 315 316 if (available < 8) { 317 memset(&ctx->buffer[used], 0, available); 318 body(ctx, ctx->buffer, 64); 319 used = 0; 320 available = 64; 321 } 322 323 memset(&ctx->buffer[used], 0, available - 8); 324 325 ctx->lo <<= 3; 326 OUT(&ctx->buffer[56], ctx->lo) 327 OUT(&ctx->buffer[60], ctx->hi) 328 329 body(ctx, ctx->buffer, 64); 330 331 OUT(&result[0], ctx->a) 332 OUT(&result[4], ctx->b) 333 OUT(&result[8], ctx->c) 334 OUT(&result[12], ctx->d) 335 336 memset(ctx, 0, sizeof(*ctx)); 337 } 338 339 340 ////////////////////////////////////////////////////////////////// 341 ////////////////////////////////////////////////////////////////// 342 343 static MD5_CTX md5ctx; 344 345 void xxprintf_start(void) 346 { 347 MD5_Init(&md5ctx); 348 } 349 350 void xxprintf_done(void) 351 { 352 const char hexchar[16] = "0123456789abcdef"; 353 unsigned char result[100]; 354 memset(result, 0, sizeof(result)); 355 MD5_Final(&result[0], &md5ctx); 356 printf("final MD5 = "); 357 int i; 358 for (i = 0; i < 16; i++) { 359 printf("%c%c", hexchar[0xF & (result[i] >> 4)], 360 hexchar[0xF & (result[i] >> 0)]); 361 } 362 printf("\n"); 363 } 364 365 __attribute__((format(__printf__, 1, 2))) 366 void xxprintf (const char *format, ...) 367 { 368 char buf[128]; 369 memset(buf, 0, sizeof(buf)); 370 371 va_list vargs; 372 va_start(vargs, format); 373 int n = vsnprintf(buf, sizeof(buf)-1, format, vargs); 374 va_end(vargs); 375 376 assert(n < sizeof(buf)-1); 377 assert(buf[sizeof(buf)-1] == 0); 378 assert(buf[sizeof(buf)-2] == 0); 379 380 MD5_Update(&md5ctx, buf, strlen(buf)); 381 if (0) printf("QQQ %s", buf); 382 } 383 384 ////////////////////////////////////////////////////////////////// 385 ////////////////////////////////////////////////////////////////// 386 387 388 /* Setting this to 1 creates a very comprehensive test of 389 integer condition codes. */ 390 #define TEST_INTEGER_VERBOSE 1 391 392 typedef long long int int64; 393 394 //#define LINUX_VM86_IOPL_FIX 395 //#define TEST_P4_FLAGS 396 397 #define xglue(x, y) x ## y 398 #define glue(x, y) xglue(x, y) 399 #define stringify(s) tostring(s) 400 #define tostring(s) #s 401 402 #define CC_C 0x0001 403 #define CC_P 0x0004 404 #define CC_A 0x0010 405 #define CC_Z 0x0040 406 #define CC_S 0x0080 407 #define CC_O 0x0800 408 409 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) 410 411 #define OP add 412 #include "fb_test_amd64.h" 413 414 #define OP sub 415 #include "fb_test_amd64.h" 416 417 #define OP xor 418 #include "fb_test_amd64.h" 419 420 #define OP and 421 #include "fb_test_amd64.h" 422 423 #define OP or 424 #include "fb_test_amd64.h" 425 426 #define OP cmp 427 #include "fb_test_amd64.h" 428 429 #define OP adc 430 #define OP_CC 431 #include "fb_test_amd64.h" 432 433 #define OP sbb 434 #define OP_CC 435 #include "fb_test_amd64.h" 436 437 #define OP adcx 438 #define NSH 439 #define OP_CC 440 #include "fb_test_amd64.h" 441 442 #define OP adox 443 #define NSH 444 #define OP_CC 445 #include "fb_test_amd64.h" 446 447 #define OP inc 448 #define OP_CC 449 #define OP1 450 #include "fb_test_amd64.h" 451 452 #define OP dec 453 #define OP_CC 454 #define OP1 455 #include "fb_test_amd64.h" 456 457 #define OP neg 458 #define OP_CC 459 #define OP1 460 #include "fb_test_amd64.h" 461 462 #define OP not 463 #define OP_CC 464 #define OP1 465 #include "fb_test_amd64.h" 466 467 #undef CC_MASK 468 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) 469 470 #define OP shl 471 #include "fb_test_amd64_shift.h" 472 473 #define OP shr 474 #include "fb_test_amd64_shift.h" 475 476 #define OP sar 477 #include "fb_test_amd64_shift.h" 478 479 #define OP rol 480 #include "fb_test_amd64_shift.h" 481 482 #define OP ror 483 #include "fb_test_amd64_shift.h" 484 485 #define OP rcr 486 #define OP_CC 487 #include "fb_test_amd64_shift.h" 488 489 #define OP rcl 490 #define OP_CC 491 #include "fb_test_amd64_shift.h" 492 493 /* XXX: should be more precise ? */ 494 #undef CC_MASK 495 #define CC_MASK (CC_C) 496 497 /* lea test (modrm support) */ 498 #define TEST_LEA(STR)\ 499 {\ 500 asm("leaq " STR ", %0"\ 501 : "=r" (res)\ 502 : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\ 503 xxprintf("lea %s = %016llx\n", STR, res);\ 504 } 505 506 #define TEST_LEA16(STR)\ 507 {\ 508 asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ 509 : "=wq" (res)\ 510 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ 511 xxprintf("lea %s = %08x\n", STR, res);\ 512 } 513 514 515 void test_lea(void) 516 { 517 int64 rax, rbx, rcx, rdx, rsi, rdi, res; 518 rax = 0x0001; 519 rbx = 0x0002; 520 rcx = 0x0004; 521 rdx = 0x0008; 522 rsi = 0x0010; 523 rdi = 0x0020; 524 525 TEST_LEA("0x4000"); 526 527 TEST_LEA("(%%rax)"); 528 TEST_LEA("(%%rbx)"); 529 TEST_LEA("(%%rcx)"); 530 TEST_LEA("(%%rdx)"); 531 TEST_LEA("(%%rsi)"); 532 TEST_LEA("(%%rdi)"); 533 534 TEST_LEA("0x40(%%rax)"); 535 TEST_LEA("0x40(%%rbx)"); 536 TEST_LEA("0x40(%%rcx)"); 537 TEST_LEA("0x40(%%rdx)"); 538 TEST_LEA("0x40(%%rsi)"); 539 TEST_LEA("0x40(%%rdi)"); 540 541 TEST_LEA("0x4000(%%rax)"); 542 TEST_LEA("0x4000(%%rbx)"); 543 TEST_LEA("0x4000(%%rcx)"); 544 TEST_LEA("0x4000(%%rdx)"); 545 TEST_LEA("0x4000(%%rsi)"); 546 TEST_LEA("0x4000(%%rdi)"); 547 548 TEST_LEA("(%%rax, %%rcx)"); 549 TEST_LEA("(%%rbx, %%rdx)"); 550 TEST_LEA("(%%rcx, %%rcx)"); 551 TEST_LEA("(%%rdx, %%rcx)"); 552 TEST_LEA("(%%rsi, %%rcx)"); 553 TEST_LEA("(%%rdi, %%rcx)"); 554 555 TEST_LEA("0x40(%%rax, %%rcx)"); 556 TEST_LEA("0x4000(%%rbx, %%rdx)"); 557 558 TEST_LEA("(%%rcx, %%rcx, 2)"); 559 TEST_LEA("(%%rdx, %%rcx, 4)"); 560 TEST_LEA("(%%rsi, %%rcx, 8)"); 561 562 TEST_LEA("(,%%rax, 2)"); 563 TEST_LEA("(,%%rbx, 4)"); 564 TEST_LEA("(,%%rcx, 8)"); 565 566 TEST_LEA("0x40(,%%rax, 2)"); 567 TEST_LEA("0x40(,%%rbx, 4)"); 568 TEST_LEA("0x40(,%%rcx, 8)"); 569 570 571 TEST_LEA("-10(%%rcx, %%rcx, 2)"); 572 TEST_LEA("-10(%%rdx, %%rcx, 4)"); 573 TEST_LEA("-10(%%rsi, %%rcx, 8)"); 574 575 TEST_LEA("0x4000(%%rcx, %%rcx, 2)"); 576 TEST_LEA("0x4000(%%rdx, %%rcx, 4)"); 577 TEST_LEA("0x4000(%%rsi, %%rcx, 8)"); 578 } 579 580 #define TEST_JCC(JCC, v1, v2)\ 581 { int one = 1; \ 582 int res;\ 583 asm("movl $1, %0\n\t"\ 584 "cmpl %2, %1\n\t"\ 585 "j" JCC " 1f\n\t"\ 586 "movl $0, %0\n\t"\ 587 "1:\n\t"\ 588 : "=r" (res)\ 589 : "r" (v1), "r" (v2));\ 590 xxprintf("%-10s %d\n", "j" JCC, res);\ 591 \ 592 asm("movl $0, %0\n\t"\ 593 "cmpl %2, %1\n\t"\ 594 "set" JCC " %b0\n\t"\ 595 : "=r" (res)\ 596 : "r" (v1), "r" (v2));\ 597 xxprintf("%-10s %d\n", "set" JCC, res);\ 598 {\ 599 asm("movl $0x12345678, %0\n\t"\ 600 "cmpl %2, %1\n\t"\ 601 "cmov" JCC "l %3, %0\n\t"\ 602 : "=r" (res)\ 603 : "r" (v1), "r" (v2), "m" (one));\ 604 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\ 605 asm("movl $0x12345678, %0\n\t"\ 606 "cmpl %2, %1\n\t"\ 607 "cmov" JCC "w %w3, %w0\n\t"\ 608 : "=r" (res)\ 609 : "r" (v1), "r" (v2), "r" (one));\ 610 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\ 611 } \ 612 } 613 614 /* various jump tests */ 615 void test_jcc(void) 616 { 617 TEST_JCC("ne", 1, 1); 618 TEST_JCC("ne", 1, 0); 619 620 TEST_JCC("e", 1, 1); 621 TEST_JCC("e", 1, 0); 622 623 TEST_JCC("l", 1, 1); 624 TEST_JCC("l", 1, 0); 625 TEST_JCC("l", 1, -1); 626 627 TEST_JCC("le", 1, 1); 628 TEST_JCC("le", 1, 0); 629 TEST_JCC("le", 1, -1); 630 631 TEST_JCC("ge", 1, 1); 632 TEST_JCC("ge", 1, 0); 633 TEST_JCC("ge", -1, 1); 634 635 TEST_JCC("g", 1, 1); 636 TEST_JCC("g", 1, 0); 637 TEST_JCC("g", 1, -1); 638 639 TEST_JCC("b", 1, 1); 640 TEST_JCC("b", 1, 0); 641 TEST_JCC("b", 1, -1); 642 643 TEST_JCC("be", 1, 1); 644 TEST_JCC("be", 1, 0); 645 TEST_JCC("be", 1, -1); 646 647 TEST_JCC("ae", 1, 1); 648 TEST_JCC("ae", 1, 0); 649 TEST_JCC("ae", 1, -1); 650 651 TEST_JCC("a", 1, 1); 652 TEST_JCC("a", 1, 0); 653 TEST_JCC("a", 1, -1); 654 655 656 TEST_JCC("p", 1, 1); 657 TEST_JCC("p", 1, 0); 658 659 TEST_JCC("np", 1, 1); 660 TEST_JCC("np", 1, 0); 661 662 TEST_JCC("o", 0x7fffffff, 0); 663 TEST_JCC("o", 0x7fffffff, -1); 664 665 TEST_JCC("no", 0x7fffffff, 0); 666 TEST_JCC("no", 0x7fffffff, -1); 667 668 TEST_JCC("s", 0, 1); 669 TEST_JCC("s", 0, -1); 670 TEST_JCC("s", 0, 0); 671 672 TEST_JCC("ns", 0, 1); 673 TEST_JCC("ns", 0, -1); 674 TEST_JCC("ns", 0, 0); 675 } 676 677 #undef CC_MASK 678 #ifdef TEST_P4_FLAGS 679 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) 680 #else 681 #define CC_MASK (CC_O | CC_C) 682 #endif 683 684 #define OP mul 685 #include "fb_test_amd64_muldiv.h" 686 687 #define OP imul 688 #include "fb_test_amd64_muldiv.h" 689 690 void test_imulw2(int64 op0, int64 op1) 691 { 692 int64 res, s1, s0, flags; 693 s0 = op0; 694 s1 = op1; 695 res = s0; 696 flags = 0; 697 asm ("pushq %4\n\t" 698 "popfq\n\t" 699 "imulw %w2, %w0\n\t" 700 "pushfq\n\t" 701 "popq %1\n\t" 702 : "=q" (res), "=g" (flags) 703 : "q" (s1), "0" (res), "1" (flags)); 704 xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n", 705 "imulw", s0, s1, res, flags & CC_MASK); 706 } 707 708 void test_imull2(int64 op0, int64 op1) 709 { 710 int res, s1; 711 int64 s0, flags; 712 s0 = op0; 713 s1 = op1; 714 res = s0; 715 flags = 0; 716 asm ("pushq %4\n\t" 717 "popfq\n\t" 718 "imull %2, %0\n\t" 719 "pushfq\n\t" 720 "popq %1\n\t" 721 : "=q" (res), "=g" (flags) 722 : "q" (s1), "0" (res), "1" (flags)); 723 xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n", 724 "imull", s0, s1, res, flags & CC_MASK); 725 } 726 727 #define TEST_IMUL_IM(size, size1, op0, op1)\ 728 {\ 729 int64 res, flags;\ 730 flags = 0;\ 731 res = 0;\ 732 asm ("pushq %3\n\t"\ 733 "popfq\n\t"\ 734 "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \ 735 "pushfq\n\t"\ 736 "popq %1\n\t"\ 737 : "=r" (res), "=g" (flags)\ 738 : "r" (op1), "1" (flags), "0" (res));\ 739 xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\ 740 "imul" size, op0, op1, res, flags & CC_MASK);\ 741 } 742 743 #define TEST_IMUL_IM_L(op0, op1)\ 744 {\ 745 int64 flags = 0;\ 746 int res = 0;\ 747 int res64 = 0;\ 748 asm ("pushq %3\n\t"\ 749 "popfq\n\t"\ 750 "imul $" #op0 ", %2, %0\n\t" \ 751 "pushfq\n\t"\ 752 "popq %1\n\t"\ 753 : "=r" (res64), "=g" (flags)\ 754 : "r" (op1), "1" (flags), "0" (res));\ 755 xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\ 756 "imull", op0, op1, res, flags & CC_MASK);\ 757 } 758 759 760 #undef CC_MASK 761 #define CC_MASK (0) 762 763 #define OP div 764 #include "fb_test_amd64_muldiv.h" 765 766 #define OP idiv 767 #include "fb_test_amd64_muldiv.h" 768 769 void test_mul(void) 770 { 771 test_imulb(0x1234561d, 4); 772 test_imulb(3, -4); 773 test_imulb(0x80, 0x80); 774 test_imulb(0x10, 0x10); 775 776 test_imulw(0, 0, 0); 777 test_imulw(0, 0xFF, 0xFF); 778 test_imulw(0, 0xFF, 0x100); 779 test_imulw(0, 0x1234001d, 45); 780 test_imulw(0, 23, -45); 781 test_imulw(0, 0x8000, 0x8000); 782 test_imulw(0, 0x100, 0x100); 783 784 test_imull(0, 0, 0); 785 test_imull(0, 0xFFFF, 0xFFFF); 786 test_imull(0, 0xFFFF, 0x10000); 787 test_imull(0, 0x1234001d, 45); 788 test_imull(0, 23, -45); 789 test_imull(0, 0x80000000, 0x80000000); 790 test_imull(0, 0x10000, 0x10000); 791 792 test_mulb(0x1234561d, 4); 793 test_mulb(3, -4); 794 test_mulb(0x80, 0x80); 795 test_mulb(0x10, 0x10); 796 797 test_mulw(0, 0x1234001d, 45); 798 test_mulw(0, 23, -45); 799 test_mulw(0, 0x8000, 0x8000); 800 test_mulw(0, 0x100, 0x100); 801 802 test_mull(0, 0x1234001d, 45); 803 test_mull(0, 23, -45); 804 test_mull(0, 0x80000000, 0x80000000); 805 test_mull(0, 0x10000, 0x10000); 806 807 test_imulw2(0x1234001d, 45); 808 test_imulw2(23, -45); 809 test_imulw2(0x8000, 0x8000); 810 test_imulw2(0x100, 0x100); 811 812 test_imull2(0x1234001d, 45); 813 test_imull2(23, -45); 814 test_imull2(0x80000000, 0x80000000); 815 test_imull2(0x10000, 0x10000); 816 817 TEST_IMUL_IM("w", "w", 45, 0x1234); 818 TEST_IMUL_IM("w", "w", -45, 23); 819 TEST_IMUL_IM("w", "w", 0x8000, 0x80000000); 820 TEST_IMUL_IM("w", "w", 0x7fff, 0x1000); 821 822 TEST_IMUL_IM_L(45, 0x1234); 823 TEST_IMUL_IM_L(-45, 23); 824 TEST_IMUL_IM_L(0x8000, 0x80000000); 825 TEST_IMUL_IM_L(0x7fff, 0x1000); 826 827 test_idivb(0x12341678, 0x127e); 828 test_idivb(0x43210123, -5); 829 test_idivb(0x12340004, -1); 830 831 test_idivw(0, 0x12345678, 12347); 832 test_idivw(0, -23223, -45); 833 test_idivw(0, 0x12348000, -1); 834 test_idivw(0x12343, 0x12345678, 0x81238567); 835 836 test_idivl(0, 0x12345678, 12347); 837 test_idivl(0, -233223, -45); 838 test_idivl(0, 0x80000000, -1); 839 test_idivl(0x12343, 0x12345678, 0x81234567); 840 841 test_idivq(0, 0x12345678, 12347); 842 test_idivq(0, -233223, -45); 843 test_idivq(0, 0x80000000, -1); 844 test_idivq(0x12343, 0x12345678, 0x81234567); 845 846 test_divb(0x12341678, 0x127e); 847 test_divb(0x43210123, -5); 848 test_divb(0x12340004, -1); 849 850 test_divw(0, 0x12345678, 12347); 851 test_divw(0, -23223, -45); 852 test_divw(0, 0x12348000, -1); 853 test_divw(0x12343, 0x12345678, 0x81238567); 854 855 test_divl(0, 0x12345678, 12347); 856 test_divl(0, -233223, -45); 857 test_divl(0, 0x80000000, -1); 858 test_divl(0x12343, 0x12345678, 0x81234567); 859 860 test_divq(0, 0x12345678, 12347); 861 test_divq(0, -233223, -45); 862 test_divq(0, 0x80000000, -1); 863 test_divq(0x12343, 0x12345678, 0x81234567); 864 } 865 866 #define TEST_BSX(op, size, op0)\ 867 {\ 868 int res, val, resz;\ 869 val = op0;\ 870 asm("xorl %1, %1\n"\ 871 "movl $0x12345678, %0\n"\ 872 #op " %" size "2, %" size "0 ; setz %b1" \ 873 : "=r" (res), "=q" (resz)\ 874 : "r" (val));\ 875 xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\ 876 } 877 878 void test_bsx(void) 879 { 880 TEST_BSX(bsrw, "w", 0); 881 TEST_BSX(bsrw, "w", 0x12340128); 882 TEST_BSX(bsrl, "", 0); 883 TEST_BSX(bsrl, "", 0x00340128); 884 TEST_BSX(bsfw, "w", 0); 885 TEST_BSX(bsfw, "w", 0x12340128); 886 TEST_BSX(bsfl, "", 0); 887 TEST_BSX(bsfl, "", 0x00340128); 888 } 889 890 /**********************************************/ 891 892 void test_fops(double a, double b) 893 { 894 xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b); 895 xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b); 896 xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b); 897 xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b); 898 xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b)); 899 xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a)); 900 xxprintf("a=%f sin(a)=%f\n", a, sin(a)); 901 xxprintf("a=%f cos(a)=%f\n", a, cos(a)); 902 xxprintf("a=%f tan(a)=%f\n", a, tan(a)); 903 xxprintf("a=%f log(a)=%f\n", a, log(a)); 904 xxprintf("a=%f exp(a)=%f\n", a, exp(a)); 905 xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b)); 906 /* just to test some op combining */ 907 xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a))); 908 xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a))); 909 xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a))); 910 } 911 912 void test_fcmp(double a, double b) 913 { 914 xxprintf("(%f<%f)=%d\n", 915 a, b, a < b); 916 xxprintf("(%f<=%f)=%d\n", 917 a, b, a <= b); 918 xxprintf("(%f==%f)=%d\n", 919 a, b, a == b); 920 xxprintf("(%f>%f)=%d\n", 921 a, b, a > b); 922 xxprintf("(%f<=%f)=%d\n", 923 a, b, a >= b); 924 { 925 unsigned long long int rflags; 926 /* test f(u)comi instruction */ 927 asm("fcomi %2, %1\n" 928 "pushfq\n" 929 "popq %0\n" 930 : "=r" (rflags) 931 : "t" (a), "u" (b)); 932 xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C)); 933 } 934 } 935 936 void test_fcvt(double a) 937 { 938 float fa; 939 long double la; 940 int16_t fpuc; 941 int i; 942 int64 lla; 943 int ia; 944 int16_t wa; 945 double ra; 946 947 fa = a; 948 la = a; 949 xxprintf("(float)%f = %f\n", a, fa); 950 xxprintf("(long double)%f = %Lf\n", a, la); 951 xxprintf("a=%016llx\n", *(unsigned long long int *) &a); 952 xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la, 953 *(unsigned short *) ((char *)(&la) + 8)); 954 955 /* test all roundings */ 956 asm volatile ("fstcw %0" : "=m" (fpuc)); 957 for(i=0;i<4;i++) { 958 short zz = (fpuc & ~0x0c00) | (i << 10); 959 asm volatile ("fldcw %0" : : "m" (zz)); 960 asm volatile ("fists %0" : "=m" (wa) : "t" (a)); 961 asm volatile ("fistl %0" : "=m" (ia) : "t" (a)); 962 asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st"); 963 asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a)); 964 asm volatile ("fldcw %0" : : "m" (fpuc)); 965 xxprintf("(short)a = %d\n", wa); 966 xxprintf("(int)a = %d\n", ia); 967 xxprintf("(int64_t)a = %lld\n", lla); 968 xxprintf("rint(a) = %f\n", ra); 969 } 970 } 971 972 #define TEST(N) \ 973 asm("fld" #N : "=t" (a)); \ 974 xxprintf("fld" #N "= %f\n", a); 975 976 void test_fconst(void) 977 { 978 double a; 979 TEST(1); 980 TEST(l2t); 981 TEST(l2e); 982 TEST(pi); 983 TEST(lg2); 984 TEST(ln2); 985 TEST(z); 986 } 987 988 void test_fbcd(double a) 989 { 990 unsigned short bcd[5]; 991 double b; 992 993 asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st"); 994 asm("fbld %1" : "=t" (b) : "m" (bcd[0])); 995 xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n", 996 a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b); 997 } 998 999 #define TEST_ENV(env, save, restore)\ 1000 {\ 1001 memset((env), 0xaa, sizeof(*(env)));\ 1002 for(i=0;i<5;i++)\ 1003 asm volatile ("fldl %0" : : "m" (dtab[i]));\ 1004 asm(save " %0\n" : : "m" (*(env)));\ 1005 asm(restore " %0\n": : "m" (*(env)));\ 1006 for(i=0;i<5;i++)\ 1007 asm volatile ("fstpl %0" : "=m" (rtab[i]));\ 1008 for(i=0;i<5;i++)\ 1009 xxprintf("res[%d]=%f\n", i, rtab[i]);\ 1010 xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\ 1011 (env)->fpuc,\ 1012 (env)->fpus & 0xff00,\ 1013 (env)->fptag);\ 1014 } 1015 1016 void test_fenv(void) 1017 { 1018 struct __attribute__((packed)) { 1019 uint16_t fpuc; 1020 uint16_t dummy1; 1021 uint16_t fpus; 1022 uint16_t dummy2; 1023 uint16_t fptag; 1024 uint16_t dummy3; 1025 uint32_t ignored[4]; 1026 long double fpregs[8]; 1027 } float_env32; 1028 double dtab[8]; 1029 double rtab[8]; 1030 int i; 1031 1032 for(i=0;i<8;i++) 1033 dtab[i] = i + 1; 1034 1035 TEST_ENV(&float_env32, "fnstenv", "fldenv"); 1036 TEST_ENV(&float_env32, "fnsave", "frstor"); 1037 1038 /* test for ffree */ 1039 for(i=0;i<5;i++) 1040 asm volatile ("fldl %0" : : "m" (dtab[i])); 1041 asm volatile("ffree %st(2)"); 1042 asm volatile ("fnstenv %0\n" : : "m" (float_env32)); 1043 asm volatile ("fninit"); 1044 xxprintf("fptag=%04x\n", float_env32.fptag); 1045 } 1046 1047 1048 #define TEST_FCMOV(a, b, rflags, CC)\ 1049 {\ 1050 double res;\ 1051 asm("pushq %3\n"\ 1052 "popfq\n"\ 1053 "fcmov" CC " %2, %0\n"\ 1054 : "=t" (res)\ 1055 : "0" (a), "u" (b), "g" (rflags));\ 1056 xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \ 1057 CC, rflags, res);\ 1058 } 1059 1060 void test_fcmov(void) 1061 { 1062 double a, b; 1063 int64 rflags, i; 1064 1065 a = 1.0; 1066 b = 2.0; 1067 for(i = 0; i < 4; i++) { 1068 rflags = 0; 1069 if (i & 1) 1070 rflags |= CC_C; 1071 if (i & 2) 1072 rflags |= CC_Z; 1073 TEST_FCMOV(a, b, rflags, "b"); 1074 TEST_FCMOV(a, b, rflags, "e"); 1075 TEST_FCMOV(a, b, rflags, "be"); 1076 TEST_FCMOV(a, b, rflags, "nb"); 1077 TEST_FCMOV(a, b, rflags, "ne"); 1078 TEST_FCMOV(a, b, rflags, "nbe"); 1079 } 1080 TEST_FCMOV(a, b, (int64)0, "u"); 1081 TEST_FCMOV(a, b, (int64)CC_P, "u"); 1082 TEST_FCMOV(a, b, (int64)0, "nu"); 1083 TEST_FCMOV(a, b, (int64)CC_P, "nu"); 1084 } 1085 1086 void test_floats(void) 1087 { 1088 test_fops(2, 3); 1089 test_fops(1.4, -5); 1090 test_fcmp(2, -1); 1091 test_fcmp(2, 2); 1092 test_fcmp(2, 3); 1093 test_fcvt(0.5); 1094 test_fcvt(-0.5); 1095 test_fcvt(1.0/7.0); 1096 test_fcvt(-1.0/9.0); 1097 test_fcvt(32768); 1098 test_fcvt(-1e20); 1099 test_fconst(); 1100 // REINSTATE (maybe): test_fbcd(1234567890123456); 1101 // REINSTATE (maybe): test_fbcd(-123451234567890); 1102 // REINSTATE: test_fenv(); 1103 // REINSTATE: test_fcmov(); 1104 } 1105 1106 /**********************************************/ 1107 1108 #define TEST_XCHG(op, size, opconst)\ 1109 {\ 1110 int op0, op1;\ 1111 op0 = 0x12345678;\ 1112 op1 = 0xfbca7654;\ 1113 asm(#op " %" size "0, %" size "1" \ 1114 : "=q" (op0), opconst (op1) \ 1115 : "0" (op0), "1" (op1));\ 1116 xxprintf("%-10s A=%08x B=%08x\n",\ 1117 #op, op0, op1);\ 1118 } 1119 1120 #define TEST_CMPXCHG(op, size, opconst, eax)\ 1121 {\ 1122 int op0, op1;\ 1123 op0 = 0x12345678;\ 1124 op1 = 0xfbca7654;\ 1125 asm(#op " %" size "0, %" size "1" \ 1126 : "=q" (op0), opconst (op1) \ 1127 : "0" (op0), "1" (op1), "a" (eax));\ 1128 xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\ 1129 #op, eax, op0, op1);\ 1130 } 1131 1132 1133 /**********************************************/ 1134 /* segmentation tests */ 1135 1136 extern char func_lret32; 1137 extern char func_iret32; 1138 1139 uint8_t str_buffer[4096]; 1140 1141 #define TEST_STRING1(OP, size, DF, REP)\ 1142 {\ 1143 int64 rsi, rdi, rax, rcx, rflags;\ 1144 \ 1145 rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\ 1146 rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\ 1147 rax = 0x12345678;\ 1148 rcx = 17;\ 1149 \ 1150 asm volatile ("pushq $0\n\t"\ 1151 "popfq\n\t"\ 1152 DF "\n\t"\ 1153 REP #OP size "\n\t"\ 1154 "cld\n\t"\ 1155 "pushfq\n\t"\ 1156 "popq %4\n\t"\ 1157 : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\ 1158 : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\ 1159 xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\ 1160 REP #OP size, rsi, rdi, rax, rcx,\ 1161 rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\ 1162 } 1163 1164 #define TEST_STRING(OP, REP)\ 1165 TEST_STRING1(OP, "b", "", REP);\ 1166 TEST_STRING1(OP, "w", "", REP);\ 1167 TEST_STRING1(OP, "l", "", REP);\ 1168 TEST_STRING1(OP, "b", "std", REP);\ 1169 TEST_STRING1(OP, "w", "std", REP);\ 1170 TEST_STRING1(OP, "l", "std", REP) 1171 1172 void test_string(void) 1173 { 1174 int64 i; 1175 for(i = 0;i < sizeof(str_buffer); i++) 1176 str_buffer[i] = i + 0x56; 1177 TEST_STRING(stos, ""); 1178 TEST_STRING(stos, "rep "); 1179 TEST_STRING(lods, ""); /* to verify stos */ 1180 // TEST_STRING(lods, "rep "); 1181 TEST_STRING(movs, ""); 1182 TEST_STRING(movs, "rep "); 1183 TEST_STRING(lods, ""); /* to verify stos */ 1184 1185 /* XXX: better tests */ 1186 TEST_STRING(scas, ""); 1187 TEST_STRING(scas, "repz "); 1188 TEST_STRING(scas, "repnz "); 1189 // REINSTATE? TEST_STRING(cmps, ""); 1190 TEST_STRING(cmps, "repz "); 1191 // REINSTATE? TEST_STRING(cmps, "repnz "); 1192 } 1193 1194 int main(int argc, char **argv) 1195 { 1196 // The three commented out test cases produce different results at different 1197 // compiler optimisation levels. This suggests to me that their inline 1198 // assembly is incorrect. I don't have time to investigate now, though. So 1199 // they are disabled. 1200 xxprintf_start(); 1201 test_adc(); 1202 test_adcx(); 1203 test_add(); 1204 test_adox(); 1205 test_and(); 1206 // test_bsx(); 1207 test_cmp(); 1208 test_dec(); 1209 test_fcmov(); 1210 test_fconst(); 1211 test_fenv(); 1212 test_floats(); 1213 test_inc(); 1214 // test_jcc(); 1215 test_lea(); 1216 test_mul(); 1217 test_neg(); 1218 test_not(); 1219 test_or(); 1220 test_rcl(); 1221 test_rcr(); 1222 test_rol(); 1223 test_ror(); 1224 test_sar(); 1225 test_sbb(); 1226 test_shl(); 1227 test_shr(); 1228 // test_string(); 1229 test_sub(); 1230 test_xor(); 1231 xxprintf_done(); 1232 // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3 1233 return 0; 1234 } 1235