Home | History | Annotate | Download | only in amd64
      1 
      2 /* Contrary to what the next comment says, this is now an amd64 CPU
      3    test. */
      4 
      5 /*
      6  *  x86 CPU test
      7  *
      8  *  Copyright (c) 2003 Fabrice Bellard
      9  *
     10  *  This program is free software; you can redistribute it and/or modify
     11  *  it under the terms of the GNU General Public License as published by
     12  *  the Free Software Foundation; either version 2 of the License, or
     13  *  (at your option) any later version.
     14  *
     15  *  This program is distributed in the hope that it will be useful,
     16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     18  *  GNU General Public License for more details.
     19  *
     20  *  You should have received a copy of the GNU General Public License
     21  *  along with this program; if not, write to the Free Software
     22  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     23  */
     24 
     25 #include <stdlib.h>
     26 #include <stdio.h>
     27 #include <string.h>
     28 #include <inttypes.h>
     29 #include <math.h>
     30 #include <stdarg.h>
     31 #include <assert.h>
     32 
     33 
     34 //////////////////////////////////////////////////////////////////
     35 //////////////////////////////////////////////////////////////////
     36 
     37 /*
     38  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
     39  * MD5 Message-Digest Algorithm (RFC 1321).
     40  *
     41  * Homepage:
     42  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
     43  *
     44  * Author:
     45  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
     46  *
     47  * This software was written by Alexander Peslyak in 2001.  No copyright is
     48  * claimed, and the software is hereby placed in the public domain.
     49  * In case this attempt to disclaim copyright and place the software in the
     50  * public domain is deemed null and void, then the software is
     51  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
     52  * general public under the following terms:
     53  *
     54  * Redistribution and use in source and binary forms, with or without
     55  * modification, are permitted.
     56  *
     57  * There's ABSOLUTELY NO WARRANTY, express or implied.
     58  *
     59  * (This is a heavily cut-down "BSD license".)
     60  *
     61  * This differs from Colin Plumb's older public domain implementation in that
     62  * no exactly 32-bit integer data type is required (any 32-bit or wider
     63  * unsigned integer data type will do), there's no compile-time endianness
     64  * configuration, and the function prototypes match OpenSSL's.  No code from
     65  * Colin Plumb's implementation has been reused; this comment merely compares
     66  * the properties of the two independent implementations.
     67  *
     68  * The primary goals of this implementation are portability and ease of use.
     69  * It is meant to be fast, but not as fast as possible.  Some known
     70  * optimizations are not included to reduce source code size and avoid
     71  * compile-time configuration.
     72  */
     73 
     74 #include <string.h>
     75 
     76 // BEGIN #include "md5.h"
     77 /* Any 32-bit or wider unsigned integer data type will do */
     78 typedef unsigned int MD5_u32plus;
     79 
     80 typedef struct {
     81 	MD5_u32plus lo, hi;
     82 	MD5_u32plus a, b, c, d;
     83 	unsigned char buffer[64];
     84 	MD5_u32plus block[16];
     85 } MD5_CTX;
     86 
     87 void MD5_Init(MD5_CTX *ctx);
     88 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
     89 void MD5_Final(unsigned char *result, MD5_CTX *ctx);
     90 // END  #include "md5.h"
     91 
     92 /*
     93  * The basic MD5 functions.
     94  *
     95  * F and G are optimized compared to their RFC 1321 definitions for
     96  * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
     97  * implementation.
     98  */
     99 #define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
    100 #define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
    101 #define H(x, y, z)			(((x) ^ (y)) ^ (z))
    102 #define H2(x, y, z)			((x) ^ ((y) ^ (z)))
    103 #define I(x, y, z)			((y) ^ ((x) | ~(z)))
    104 
    105 /*
    106  * The MD5 transformation for all four rounds.
    107  */
    108 #define STEP(f, a, b, c, d, x, t, s) \
    109 	(a) += f((b), (c), (d)) + (x) + (t); \
    110 	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
    111 	(a) += (b);
    112 
    113 /*
    114  * SET reads 4 input bytes in little-endian byte order and stores them in a
    115  * properly aligned word in host byte order.
    116  *
    117  * The check for little-endian architectures that tolerate unaligned memory
    118  * accesses is just an optimization.  Nothing will break if it fails to detect
    119  * a suitable architecture.
    120  *
    121  * Unfortunately, this optimization may be a C strict aliasing rules violation
    122  * if the caller's data buffer has effective type that cannot be aliased by
    123  * MD5_u32plus.  In practice, this problem may occur if these MD5 routines are
    124  * inlined into a calling function, or with future and dangerously advanced
    125  * link-time optimizations.  For the time being, keeping these MD5 routines in
    126  * their own translation unit avoids the problem.
    127  */
    128 #if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
    129 #define SET(n) \
    130 	(*(MD5_u32plus *)&ptr[(n) * 4])
    131 #define GET(n) \
    132 	SET(n)
    133 #else
    134 #define SET(n) \
    135 	(ctx->block[(n)] = \
    136 	(MD5_u32plus)ptr[(n) * 4] | \
    137 	((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
    138 	((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
    139 	((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
    140 #define GET(n) \
    141 	(ctx->block[(n)])
    142 #endif
    143 
    144 /*
    145  * This processes one or more 64-byte data blocks, but does NOT update the bit
    146  * counters.  There are no alignment requirements.
    147  */
    148 static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
    149 {
    150 	const unsigned char *ptr;
    151 	MD5_u32plus a, b, c, d;
    152 	MD5_u32plus saved_a, saved_b, saved_c, saved_d;
    153 
    154 	ptr = (const unsigned char *)data;
    155 
    156 	a = ctx->a;
    157 	b = ctx->b;
    158 	c = ctx->c;
    159 	d = ctx->d;
    160 
    161 	do {
    162 		saved_a = a;
    163 		saved_b = b;
    164 		saved_c = c;
    165 		saved_d = d;
    166 
    167 /* Round 1 */
    168 		STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
    169 		STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
    170 		STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
    171 		STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
    172 		STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
    173 		STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
    174 		STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
    175 		STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
    176 		STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
    177 		STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
    178 		STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
    179 		STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
    180 		STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
    181 		STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
    182 		STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
    183 		STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
    184 
    185 /* Round 2 */
    186 		STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
    187 		STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
    188 		STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
    189 		STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
    190 		STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
    191 		STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
    192 		STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
    193 		STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
    194 		STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
    195 		STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
    196 		STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
    197 		STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
    198 		STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
    199 		STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
    200 		STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
    201 		STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
    202 
    203 /* Round 3 */
    204 		STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
    205 		STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
    206 		STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
    207 		STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
    208 		STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
    209 		STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
    210 		STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
    211 		STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
    212 		STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
    213 		STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
    214 		STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
    215 		STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
    216 		STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
    217 		STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
    218 		STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
    219 		STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
    220 
    221 /* Round 4 */
    222 		STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
    223 		STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
    224 		STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
    225 		STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
    226 		STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
    227 		STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
    228 		STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
    229 		STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
    230 		STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
    231 		STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
    232 		STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
    233 		STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
    234 		STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
    235 		STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
    236 		STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
    237 		STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
    238 
    239 		a += saved_a;
    240 		b += saved_b;
    241 		c += saved_c;
    242 		d += saved_d;
    243 
    244 		ptr += 64;
    245 	} while (size -= 64);
    246 
    247 	ctx->a = a;
    248 	ctx->b = b;
    249 	ctx->c = c;
    250 	ctx->d = d;
    251 
    252 	return ptr;
    253 }
    254 
    255 void MD5_Init(MD5_CTX *ctx)
    256 {
    257 	ctx->a = 0x67452301;
    258 	ctx->b = 0xefcdab89;
    259 	ctx->c = 0x98badcfe;
    260 	ctx->d = 0x10325476;
    261 
    262 	ctx->lo = 0;
    263 	ctx->hi = 0;
    264 }
    265 
    266 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
    267 {
    268 	MD5_u32plus saved_lo;
    269 	unsigned long used, available;
    270 
    271 	saved_lo = ctx->lo;
    272 	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
    273 		ctx->hi++;
    274 	ctx->hi += size >> 29;
    275 
    276 	used = saved_lo & 0x3f;
    277 
    278 	if (used) {
    279 		available = 64 - used;
    280 
    281 		if (size < available) {
    282 			memcpy(&ctx->buffer[used], data, size);
    283 			return;
    284 		}
    285 
    286 		memcpy(&ctx->buffer[used], data, available);
    287 		data = (const unsigned char *)data + available;
    288 		size -= available;
    289 		body(ctx, ctx->buffer, 64);
    290 	}
    291 
    292 	if (size >= 64) {
    293 		data = body(ctx, data, size & ~(unsigned long)0x3f);
    294 		size &= 0x3f;
    295 	}
    296 
    297 	memcpy(ctx->buffer, data, size);
    298 }
    299 
    300 #define OUT(dst, src) \
    301 	(dst)[0] = (unsigned char)(src); \
    302 	(dst)[1] = (unsigned char)((src) >> 8); \
    303 	(dst)[2] = (unsigned char)((src) >> 16); \
    304 	(dst)[3] = (unsigned char)((src) >> 24);
    305 
    306 void MD5_Final(unsigned char *result, MD5_CTX *ctx)
    307 {
    308 	unsigned long used, available;
    309 
    310 	used = ctx->lo & 0x3f;
    311 
    312 	ctx->buffer[used++] = 0x80;
    313 
    314 	available = 64 - used;
    315 
    316 	if (available < 8) {
    317 		memset(&ctx->buffer[used], 0, available);
    318 		body(ctx, ctx->buffer, 64);
    319 		used = 0;
    320 		available = 64;
    321 	}
    322 
    323 	memset(&ctx->buffer[used], 0, available - 8);
    324 
    325 	ctx->lo <<= 3;
    326 	OUT(&ctx->buffer[56], ctx->lo)
    327 	OUT(&ctx->buffer[60], ctx->hi)
    328 
    329 	body(ctx, ctx->buffer, 64);
    330 
    331 	OUT(&result[0], ctx->a)
    332 	OUT(&result[4], ctx->b)
    333 	OUT(&result[8], ctx->c)
    334 	OUT(&result[12], ctx->d)
    335 
    336 	memset(ctx, 0, sizeof(*ctx));
    337 }
    338 
    339 
    340 //////////////////////////////////////////////////////////////////
    341 //////////////////////////////////////////////////////////////////
    342 
    343 static MD5_CTX md5ctx;
    344 
    345 void xxprintf_start(void)
    346 {
    347    MD5_Init(&md5ctx);
    348 }
    349 
    350 void xxprintf_done(void)
    351 {
    352    const char hexchar[16] = "0123456789abcdef";
    353    unsigned char result[100];
    354    memset(result, 0, sizeof(result));
    355    MD5_Final(&result[0], &md5ctx);
    356    printf("final MD5 = ");
    357    int i;
    358    for (i = 0; i < 16; i++) {
    359       printf("%c%c", hexchar[0xF & (result[i] >> 4)],
    360                      hexchar[0xF & (result[i] >> 0)]);
    361    }
    362    printf("\n");
    363 }
    364 
    365 __attribute__((format(__printf__, 1, 2)))
    366 void xxprintf (const char *format, ...)
    367 {
    368    char buf[128];
    369    memset(buf, 0, sizeof(buf));
    370 
    371    va_list vargs;
    372    va_start(vargs, format);
    373    int n = vsnprintf(buf, sizeof(buf)-1, format, vargs);
    374    va_end(vargs);
    375 
    376    assert(n < sizeof(buf)-1);
    377    assert(buf[sizeof(buf)-1] == 0);
    378    assert(buf[sizeof(buf)-2] == 0);
    379 
    380    MD5_Update(&md5ctx, buf, strlen(buf));
    381    if (0) printf("QQQ %s", buf);
    382 }
    383 
    384 //////////////////////////////////////////////////////////////////
    385 //////////////////////////////////////////////////////////////////
    386 
    387 
    388 /* Setting this to 1 creates a very comprehensive test of
    389    integer condition codes. */
    390 #define TEST_INTEGER_VERBOSE 1
    391 
    392 typedef  long long int  int64;
    393 
    394 //#define LINUX_VM86_IOPL_FIX
    395 //#define TEST_P4_FLAGS
    396 
    397 #define xglue(x, y) x ## y
    398 #define glue(x, y) xglue(x, y)
    399 #define stringify(s)	tostring(s)
    400 #define tostring(s)	#s
    401 
    402 #define CC_C   	0x0001
    403 #define CC_P 	0x0004
    404 #define CC_A	0x0010
    405 #define CC_Z	0x0040
    406 #define CC_S    0x0080
    407 #define CC_O    0x0800
    408 
    409 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
    410 
    411 #define OP add
    412 #include "fb_test_amd64.h"
    413 
    414 #define OP sub
    415 #include "fb_test_amd64.h"
    416 
    417 #define OP xor
    418 #include "fb_test_amd64.h"
    419 
    420 #define OP and
    421 #include "fb_test_amd64.h"
    422 
    423 #define OP or
    424 #include "fb_test_amd64.h"
    425 
    426 #define OP cmp
    427 #include "fb_test_amd64.h"
    428 
    429 #define OP adc
    430 #define OP_CC
    431 #include "fb_test_amd64.h"
    432 
    433 #define OP sbb
    434 #define OP_CC
    435 #include "fb_test_amd64.h"
    436 
    437 #define OP adcx
    438 #define NSH
    439 #define OP_CC
    440 #include "fb_test_amd64.h"
    441 
    442 #define OP adox
    443 #define NSH
    444 #define OP_CC
    445 #include "fb_test_amd64.h"
    446 
    447 #define OP inc
    448 #define OP_CC
    449 #define OP1
    450 #include "fb_test_amd64.h"
    451 
    452 #define OP dec
    453 #define OP_CC
    454 #define OP1
    455 #include "fb_test_amd64.h"
    456 
    457 #define OP neg
    458 #define OP_CC
    459 #define OP1
    460 #include "fb_test_amd64.h"
    461 
    462 #define OP not
    463 #define OP_CC
    464 #define OP1
    465 #include "fb_test_amd64.h"
    466 
    467 #undef CC_MASK
    468 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
    469 
    470 #define OP shl
    471 #include "fb_test_amd64_shift.h"
    472 
    473 #define OP shr
    474 #include "fb_test_amd64_shift.h"
    475 
    476 #define OP sar
    477 #include "fb_test_amd64_shift.h"
    478 
    479 #define OP rol
    480 #include "fb_test_amd64_shift.h"
    481 
    482 #define OP ror
    483 #include "fb_test_amd64_shift.h"
    484 
    485 #define OP rcr
    486 #define OP_CC
    487 #include "fb_test_amd64_shift.h"
    488 
    489 #define OP rcl
    490 #define OP_CC
    491 #include "fb_test_amd64_shift.h"
    492 
    493 /* XXX: should be more precise ? */
    494 #undef CC_MASK
    495 #define CC_MASK (CC_C)
    496 
    497 /* lea test (modrm support) */
    498 #define TEST_LEA(STR)\
    499 {\
    500     asm("leaq " STR ", %0"\
    501         : "=r" (res)\
    502         : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
    503     xxprintf("lea %s = %016llx\n", STR, res);\
    504 }
    505 
    506 #define TEST_LEA16(STR)\
    507 {\
    508     asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
    509         : "=wq" (res)\
    510         : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
    511     xxprintf("lea %s = %08x\n", STR, res);\
    512 }
    513 
    514 
    515 void test_lea(void)
    516 {
    517     int64 rax, rbx, rcx, rdx, rsi, rdi, res;
    518     rax = 0x0001;
    519     rbx = 0x0002;
    520     rcx = 0x0004;
    521     rdx = 0x0008;
    522     rsi = 0x0010;
    523     rdi = 0x0020;
    524 
    525     TEST_LEA("0x4000");
    526 
    527     TEST_LEA("(%%rax)");
    528     TEST_LEA("(%%rbx)");
    529     TEST_LEA("(%%rcx)");
    530     TEST_LEA("(%%rdx)");
    531     TEST_LEA("(%%rsi)");
    532     TEST_LEA("(%%rdi)");
    533 
    534     TEST_LEA("0x40(%%rax)");
    535     TEST_LEA("0x40(%%rbx)");
    536     TEST_LEA("0x40(%%rcx)");
    537     TEST_LEA("0x40(%%rdx)");
    538     TEST_LEA("0x40(%%rsi)");
    539     TEST_LEA("0x40(%%rdi)");
    540 
    541     TEST_LEA("0x4000(%%rax)");
    542     TEST_LEA("0x4000(%%rbx)");
    543     TEST_LEA("0x4000(%%rcx)");
    544     TEST_LEA("0x4000(%%rdx)");
    545     TEST_LEA("0x4000(%%rsi)");
    546     TEST_LEA("0x4000(%%rdi)");
    547 
    548     TEST_LEA("(%%rax, %%rcx)");
    549     TEST_LEA("(%%rbx, %%rdx)");
    550     TEST_LEA("(%%rcx, %%rcx)");
    551     TEST_LEA("(%%rdx, %%rcx)");
    552     TEST_LEA("(%%rsi, %%rcx)");
    553     TEST_LEA("(%%rdi, %%rcx)");
    554 
    555     TEST_LEA("0x40(%%rax, %%rcx)");
    556     TEST_LEA("0x4000(%%rbx, %%rdx)");
    557 
    558     TEST_LEA("(%%rcx, %%rcx, 2)");
    559     TEST_LEA("(%%rdx, %%rcx, 4)");
    560     TEST_LEA("(%%rsi, %%rcx, 8)");
    561 
    562     TEST_LEA("(,%%rax, 2)");
    563     TEST_LEA("(,%%rbx, 4)");
    564     TEST_LEA("(,%%rcx, 8)");
    565 
    566     TEST_LEA("0x40(,%%rax, 2)");
    567     TEST_LEA("0x40(,%%rbx, 4)");
    568     TEST_LEA("0x40(,%%rcx, 8)");
    569 
    570 
    571     TEST_LEA("-10(%%rcx, %%rcx, 2)");
    572     TEST_LEA("-10(%%rdx, %%rcx, 4)");
    573     TEST_LEA("-10(%%rsi, %%rcx, 8)");
    574 
    575     TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
    576     TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
    577     TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
    578 }
    579 
    580 #define TEST_JCC(JCC, v1, v2)\
    581 {   int one = 1; \
    582     int res;\
    583     asm("movl $1, %0\n\t"\
    584         "cmpl %2, %1\n\t"\
    585         "j" JCC " 1f\n\t"\
    586         "movl $0, %0\n\t"\
    587         "1:\n\t"\
    588         : "=r" (res)\
    589         : "r" (v1), "r" (v2));\
    590     xxprintf("%-10s %d\n", "j" JCC, res);\
    591 \
    592     asm("movl $0, %0\n\t"\
    593         "cmpl %2, %1\n\t"\
    594         "set" JCC " %b0\n\t"\
    595         : "=r" (res)\
    596         : "r" (v1), "r" (v2));\
    597     xxprintf("%-10s %d\n", "set" JCC, res);\
    598  {\
    599     asm("movl $0x12345678, %0\n\t"\
    600         "cmpl %2, %1\n\t"\
    601         "cmov" JCC "l %3, %0\n\t"\
    602         : "=r" (res)\
    603         : "r" (v1), "r" (v2), "m" (one));\
    604         xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
    605     asm("movl $0x12345678, %0\n\t"\
    606         "cmpl %2, %1\n\t"\
    607         "cmov" JCC "w %w3, %w0\n\t"\
    608         : "=r" (res)\
    609         : "r" (v1), "r" (v2), "r" (one));\
    610         xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
    611  } \
    612 }
    613 
    614 /* various jump tests */
    615 void test_jcc(void)
    616 {
    617     TEST_JCC("ne", 1, 1);
    618     TEST_JCC("ne", 1, 0);
    619 
    620     TEST_JCC("e", 1, 1);
    621     TEST_JCC("e", 1, 0);
    622 
    623     TEST_JCC("l", 1, 1);
    624     TEST_JCC("l", 1, 0);
    625     TEST_JCC("l", 1, -1);
    626 
    627     TEST_JCC("le", 1, 1);
    628     TEST_JCC("le", 1, 0);
    629     TEST_JCC("le", 1, -1);
    630 
    631     TEST_JCC("ge", 1, 1);
    632     TEST_JCC("ge", 1, 0);
    633     TEST_JCC("ge", -1, 1);
    634 
    635     TEST_JCC("g", 1, 1);
    636     TEST_JCC("g", 1, 0);
    637     TEST_JCC("g", 1, -1);
    638 
    639     TEST_JCC("b", 1, 1);
    640     TEST_JCC("b", 1, 0);
    641     TEST_JCC("b", 1, -1);
    642 
    643     TEST_JCC("be", 1, 1);
    644     TEST_JCC("be", 1, 0);
    645     TEST_JCC("be", 1, -1);
    646 
    647     TEST_JCC("ae", 1, 1);
    648     TEST_JCC("ae", 1, 0);
    649     TEST_JCC("ae", 1, -1);
    650 
    651     TEST_JCC("a", 1, 1);
    652     TEST_JCC("a", 1, 0);
    653     TEST_JCC("a", 1, -1);
    654 
    655 
    656     TEST_JCC("p", 1, 1);
    657     TEST_JCC("p", 1, 0);
    658 
    659     TEST_JCC("np", 1, 1);
    660     TEST_JCC("np", 1, 0);
    661 
    662     TEST_JCC("o", 0x7fffffff, 0);
    663     TEST_JCC("o", 0x7fffffff, -1);
    664 
    665     TEST_JCC("no", 0x7fffffff, 0);
    666     TEST_JCC("no", 0x7fffffff, -1);
    667 
    668     TEST_JCC("s", 0, 1);
    669     TEST_JCC("s", 0, -1);
    670     TEST_JCC("s", 0, 0);
    671 
    672     TEST_JCC("ns", 0, 1);
    673     TEST_JCC("ns", 0, -1);
    674     TEST_JCC("ns", 0, 0);
    675 }
    676 
    677 #undef CC_MASK
    678 #ifdef TEST_P4_FLAGS
    679 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
    680 #else
    681 #define CC_MASK (CC_O | CC_C)
    682 #endif
    683 
    684 #define OP mul
    685 #include "fb_test_amd64_muldiv.h"
    686 
    687 #define OP imul
    688 #include "fb_test_amd64_muldiv.h"
    689 
    690 void test_imulw2(int64 op0, int64 op1)
    691 {
    692     int64 res, s1, s0, flags;
    693     s0 = op0;
    694     s1 = op1;
    695     res = s0;
    696     flags = 0;
    697     asm ("pushq %4\n\t"
    698          "popfq\n\t"
    699          "imulw %w2, %w0\n\t"
    700          "pushfq\n\t"
    701          "popq %1\n\t"
    702          : "=q" (res), "=g" (flags)
    703          : "q" (s1), "0" (res), "1" (flags));
    704     xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
    705            "imulw", s0, s1, res, flags & CC_MASK);
    706 }
    707 
    708 void test_imull2(int64 op0, int64 op1)
    709 {
    710     int res, s1;
    711     int64 s0, flags;
    712     s0 = op0;
    713     s1 = op1;
    714     res = s0;
    715     flags = 0;
    716     asm ("pushq %4\n\t"
    717          "popfq\n\t"
    718          "imull %2, %0\n\t"
    719          "pushfq\n\t"
    720          "popq %1\n\t"
    721          : "=q" (res), "=g" (flags)
    722          : "q" (s1), "0" (res), "1" (flags));
    723     xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
    724            "imull", s0, s1, res, flags & CC_MASK);
    725 }
    726 
    727 #define TEST_IMUL_IM(size, size1, op0, op1)\
    728 {\
    729     int64 res, flags;\
    730     flags = 0;\
    731     res = 0;\
    732     asm ("pushq %3\n\t"\
    733          "popfq\n\t"\
    734          "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
    735          "pushfq\n\t"\
    736          "popq %1\n\t"\
    737          : "=r" (res), "=g" (flags)\
    738          : "r" (op1), "1" (flags), "0" (res));\
    739     xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
    740            "imul" size, op0, op1, res, flags & CC_MASK);\
    741 }
    742 
    743 #define TEST_IMUL_IM_L(op0, op1)\
    744 {\
    745     int64 flags = 0;\
    746     int res = 0;\
    747     int res64 = 0;\
    748     asm ("pushq %3\n\t"\
    749          "popfq\n\t"\
    750          "imul $" #op0 ", %2, %0\n\t" \
    751          "pushfq\n\t"\
    752          "popq %1\n\t"\
    753          : "=r" (res64), "=g" (flags)\
    754          : "r" (op1), "1" (flags), "0" (res));\
    755     xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
    756            "imull", op0, op1, res, flags & CC_MASK);\
    757 }
    758 
    759 
    760 #undef CC_MASK
    761 #define CC_MASK (0)
    762 
    763 #define OP div
    764 #include "fb_test_amd64_muldiv.h"
    765 
    766 #define OP idiv
    767 #include "fb_test_amd64_muldiv.h"
    768 
    769 void test_mul(void)
    770 {
    771     test_imulb(0x1234561d, 4);
    772     test_imulb(3, -4);
    773     test_imulb(0x80, 0x80);
    774     test_imulb(0x10, 0x10);
    775 
    776     test_imulw(0, 0, 0);
    777     test_imulw(0, 0xFF, 0xFF);
    778     test_imulw(0, 0xFF, 0x100);
    779     test_imulw(0, 0x1234001d, 45);
    780     test_imulw(0, 23, -45);
    781     test_imulw(0, 0x8000, 0x8000);
    782     test_imulw(0, 0x100, 0x100);
    783 
    784     test_imull(0, 0, 0);
    785     test_imull(0, 0xFFFF, 0xFFFF);
    786     test_imull(0, 0xFFFF, 0x10000);
    787     test_imull(0, 0x1234001d, 45);
    788     test_imull(0, 23, -45);
    789     test_imull(0, 0x80000000, 0x80000000);
    790     test_imull(0, 0x10000, 0x10000);
    791 
    792     test_mulb(0x1234561d, 4);
    793     test_mulb(3, -4);
    794     test_mulb(0x80, 0x80);
    795     test_mulb(0x10, 0x10);
    796 
    797     test_mulw(0, 0x1234001d, 45);
    798     test_mulw(0, 23, -45);
    799     test_mulw(0, 0x8000, 0x8000);
    800     test_mulw(0, 0x100, 0x100);
    801 
    802     test_mull(0, 0x1234001d, 45);
    803     test_mull(0, 23, -45);
    804     test_mull(0, 0x80000000, 0x80000000);
    805     test_mull(0, 0x10000, 0x10000);
    806 
    807     test_imulw2(0x1234001d, 45);
    808     test_imulw2(23, -45);
    809     test_imulw2(0x8000, 0x8000);
    810     test_imulw2(0x100, 0x100);
    811 
    812     test_imull2(0x1234001d, 45);
    813     test_imull2(23, -45);
    814     test_imull2(0x80000000, 0x80000000);
    815     test_imull2(0x10000, 0x10000);
    816 
    817     TEST_IMUL_IM("w", "w", 45, 0x1234);
    818     TEST_IMUL_IM("w", "w", -45, 23);
    819     TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
    820     TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
    821 
    822     TEST_IMUL_IM_L(45, 0x1234);
    823     TEST_IMUL_IM_L(-45, 23);
    824     TEST_IMUL_IM_L(0x8000, 0x80000000);
    825     TEST_IMUL_IM_L(0x7fff, 0x1000);
    826 
    827     test_idivb(0x12341678, 0x127e);
    828     test_idivb(0x43210123, -5);
    829     test_idivb(0x12340004, -1);
    830 
    831     test_idivw(0, 0x12345678, 12347);
    832     test_idivw(0, -23223, -45);
    833     test_idivw(0, 0x12348000, -1);
    834     test_idivw(0x12343, 0x12345678, 0x81238567);
    835 
    836     test_idivl(0, 0x12345678, 12347);
    837     test_idivl(0, -233223, -45);
    838     test_idivl(0, 0x80000000, -1);
    839     test_idivl(0x12343, 0x12345678, 0x81234567);
    840 
    841     test_idivq(0, 0x12345678, 12347);
    842     test_idivq(0, -233223, -45);
    843     test_idivq(0, 0x80000000, -1);
    844     test_idivq(0x12343, 0x12345678, 0x81234567);
    845 
    846     test_divb(0x12341678, 0x127e);
    847     test_divb(0x43210123, -5);
    848     test_divb(0x12340004, -1);
    849 
    850     test_divw(0, 0x12345678, 12347);
    851     test_divw(0, -23223, -45);
    852     test_divw(0, 0x12348000, -1);
    853     test_divw(0x12343, 0x12345678, 0x81238567);
    854 
    855     test_divl(0, 0x12345678, 12347);
    856     test_divl(0, -233223, -45);
    857     test_divl(0, 0x80000000, -1);
    858     test_divl(0x12343, 0x12345678, 0x81234567);
    859 
    860     test_divq(0, 0x12345678, 12347);
    861     test_divq(0, -233223, -45);
    862     test_divq(0, 0x80000000, -1);
    863     test_divq(0x12343, 0x12345678, 0x81234567);
    864 }
    865 
    866 #define TEST_BSX(op, size, op0)\
    867 {\
    868     int res, val, resz;\
    869     val = op0;\
    870     asm("xorl %1, %1\n"\
    871         "movl $0x12345678, %0\n"\
    872         #op " %" size "2, %" size "0 ; setz %b1" \
    873         : "=r" (res), "=q" (resz)\
    874         : "r" (val));\
    875     xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
    876 }
    877 
    878 void test_bsx(void)
    879 {
    880     TEST_BSX(bsrw, "w", 0);
    881     TEST_BSX(bsrw, "w", 0x12340128);
    882     TEST_BSX(bsrl, "", 0);
    883     TEST_BSX(bsrl, "", 0x00340128);
    884     TEST_BSX(bsfw, "w", 0);
    885     TEST_BSX(bsfw, "w", 0x12340128);
    886     TEST_BSX(bsfl, "", 0);
    887     TEST_BSX(bsfl, "", 0x00340128);
    888 }
    889 
    890 /**********************************************/
    891 
    892 void test_fops(double a, double b)
    893 {
    894     xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b);
    895     xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b);
    896     xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b);
    897     xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b);
    898     xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
    899     xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a));
    900     xxprintf("a=%f sin(a)=%f\n", a, sin(a));
    901     xxprintf("a=%f cos(a)=%f\n", a, cos(a));
    902     xxprintf("a=%f tan(a)=%f\n", a, tan(a));
    903     xxprintf("a=%f log(a)=%f\n", a, log(a));
    904     xxprintf("a=%f exp(a)=%f\n", a, exp(a));
    905     xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
    906     /* just to test some op combining */
    907     xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
    908     xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
    909     xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
    910 }
    911 
    912 void test_fcmp(double a, double b)
    913 {
    914     xxprintf("(%f<%f)=%d\n",
    915            a, b, a < b);
    916     xxprintf("(%f<=%f)=%d\n",
    917            a, b, a <= b);
    918     xxprintf("(%f==%f)=%d\n",
    919            a, b, a == b);
    920     xxprintf("(%f>%f)=%d\n",
    921            a, b, a > b);
    922     xxprintf("(%f<=%f)=%d\n",
    923            a, b, a >= b);
    924     {
    925         unsigned long long int rflags;
    926         /* test f(u)comi instruction */
    927         asm("fcomi %2, %1\n"
    928             "pushfq\n"
    929             "popq %0\n"
    930             : "=r" (rflags)
    931             : "t" (a), "u" (b));
    932         xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C));
    933     }
    934 }
    935 
    936 void test_fcvt(double a)
    937 {
    938     float fa;
    939     long double la;
    940     int16_t fpuc;
    941     int i;
    942     int64 lla;
    943     int ia;
    944     int16_t wa;
    945     double ra;
    946 
    947     fa = a;
    948     la = a;
    949     xxprintf("(float)%f = %f\n", a, fa);
    950     xxprintf("(long double)%f = %Lf\n", a, la);
    951     xxprintf("a=%016llx\n", *(unsigned long long int *) &a);
    952     xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la,
    953              *(unsigned short *) ((char *)(&la) + 8));
    954 
    955     /* test all roundings */
    956     asm volatile ("fstcw %0" : "=m" (fpuc));
    957     for(i=0;i<4;i++) {
    958         short zz = (fpuc & ~0x0c00) | (i << 10);
    959         asm volatile ("fldcw %0" : : "m" (zz));
    960         asm volatile ("fists %0" : "=m" (wa) : "t" (a));
    961         asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
    962         asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
    963         asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
    964         asm volatile ("fldcw %0" : : "m" (fpuc));
    965         xxprintf("(short)a = %d\n", wa);
    966         xxprintf("(int)a = %d\n", ia);
    967         xxprintf("(int64_t)a = %lld\n", lla);
    968         xxprintf("rint(a) = %f\n", ra);
    969     }
    970 }
    971 
    972 #define TEST(N) \
    973     asm("fld" #N : "=t" (a)); \
    974     xxprintf("fld" #N "= %f\n", a);
    975 
    976 void test_fconst(void)
    977 {
    978     double a;
    979     TEST(1);
    980     TEST(l2t);
    981     TEST(l2e);
    982     TEST(pi);
    983     TEST(lg2);
    984     TEST(ln2);
    985     TEST(z);
    986 }
    987 
    988 void test_fbcd(double a)
    989 {
    990     unsigned short bcd[5];
    991     double b;
    992 
    993     asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
    994     asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
    995     xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
    996            a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
    997 }
    998 
    999 #define TEST_ENV(env, save, restore)\
   1000 {\
   1001     memset((env), 0xaa, sizeof(*(env)));\
   1002     for(i=0;i<5;i++)\
   1003         asm volatile ("fldl %0" : : "m" (dtab[i]));\
   1004     asm(save " %0\n" : : "m" (*(env)));\
   1005     asm(restore " %0\n": : "m" (*(env)));\
   1006     for(i=0;i<5;i++)\
   1007         asm volatile ("fstpl %0" : "=m" (rtab[i]));\
   1008     for(i=0;i<5;i++)\
   1009         xxprintf("res[%d]=%f\n", i, rtab[i]);\
   1010     xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
   1011            (env)->fpuc,\
   1012            (env)->fpus & 0xff00,\
   1013            (env)->fptag);\
   1014 }
   1015 
   1016 void test_fenv(void)
   1017 {
   1018     struct __attribute__((packed)) {
   1019         uint16_t fpuc;
   1020         uint16_t dummy1;
   1021         uint16_t fpus;
   1022         uint16_t dummy2;
   1023         uint16_t fptag;
   1024         uint16_t dummy3;
   1025         uint32_t ignored[4];
   1026         long double fpregs[8];
   1027     } float_env32;
   1028     double dtab[8];
   1029     double rtab[8];
   1030     int i;
   1031 
   1032     for(i=0;i<8;i++)
   1033         dtab[i] = i + 1;
   1034 
   1035     TEST_ENV(&float_env32, "fnstenv", "fldenv");
   1036     TEST_ENV(&float_env32, "fnsave", "frstor");
   1037 
   1038     /* test for ffree */
   1039     for(i=0;i<5;i++)
   1040         asm volatile ("fldl %0" : : "m" (dtab[i]));
   1041     asm volatile("ffree %st(2)");
   1042     asm volatile ("fnstenv %0\n" : : "m" (float_env32));
   1043     asm volatile ("fninit");
   1044     xxprintf("fptag=%04x\n", float_env32.fptag);
   1045 }
   1046 
   1047 
   1048 #define TEST_FCMOV(a, b, rflags, CC)\
   1049 {\
   1050     double res;\
   1051     asm("pushq %3\n"\
   1052         "popfq\n"\
   1053         "fcmov" CC " %2, %0\n"\
   1054         : "=t" (res)\
   1055         : "0" (a), "u" (b), "g" (rflags));\
   1056     xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
   1057            CC, rflags, res);\
   1058 }
   1059 
   1060 void test_fcmov(void)
   1061 {
   1062     double a, b;
   1063     int64 rflags, i;
   1064 
   1065     a = 1.0;
   1066     b = 2.0;
   1067     for(i = 0; i < 4; i++) {
   1068         rflags = 0;
   1069         if (i & 1)
   1070             rflags |= CC_C;
   1071         if (i & 2)
   1072             rflags |= CC_Z;
   1073         TEST_FCMOV(a, b, rflags, "b");
   1074         TEST_FCMOV(a, b, rflags, "e");
   1075         TEST_FCMOV(a, b, rflags, "be");
   1076         TEST_FCMOV(a, b, rflags, "nb");
   1077         TEST_FCMOV(a, b, rflags, "ne");
   1078         TEST_FCMOV(a, b, rflags, "nbe");
   1079     }
   1080     TEST_FCMOV(a, b, (int64)0, "u");
   1081     TEST_FCMOV(a, b, (int64)CC_P, "u");
   1082     TEST_FCMOV(a, b, (int64)0, "nu");
   1083     TEST_FCMOV(a, b, (int64)CC_P, "nu");
   1084 }
   1085 
   1086 void test_floats(void)
   1087 {
   1088     test_fops(2, 3);
   1089     test_fops(1.4, -5);
   1090     test_fcmp(2, -1);
   1091     test_fcmp(2, 2);
   1092     test_fcmp(2, 3);
   1093     test_fcvt(0.5);
   1094     test_fcvt(-0.5);
   1095     test_fcvt(1.0/7.0);
   1096     test_fcvt(-1.0/9.0);
   1097     test_fcvt(32768);
   1098     test_fcvt(-1e20);
   1099     test_fconst();
   1100     // REINSTATE (maybe): test_fbcd(1234567890123456);
   1101     // REINSTATE (maybe): test_fbcd(-123451234567890);
   1102     // REINSTATE: test_fenv();
   1103     // REINSTATE: test_fcmov();
   1104 }
   1105 
   1106 /**********************************************/
   1107 
   1108 #define TEST_XCHG(op, size, opconst)\
   1109 {\
   1110     int op0, op1;\
   1111     op0 = 0x12345678;\
   1112     op1 = 0xfbca7654;\
   1113     asm(#op " %" size "0, %" size "1" \
   1114         : "=q" (op0), opconst (op1) \
   1115         : "0" (op0), "1" (op1));\
   1116     xxprintf("%-10s A=%08x B=%08x\n",\
   1117            #op, op0, op1);\
   1118 }
   1119 
   1120 #define TEST_CMPXCHG(op, size, opconst, eax)\
   1121 {\
   1122     int op0, op1;\
   1123     op0 = 0x12345678;\
   1124     op1 = 0xfbca7654;\
   1125     asm(#op " %" size "0, %" size "1" \
   1126         : "=q" (op0), opconst (op1) \
   1127         : "0" (op0), "1" (op1), "a" (eax));\
   1128     xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
   1129            #op, eax, op0, op1);\
   1130 }
   1131 
   1132 
   1133 /**********************************************/
   1134 /* segmentation tests */
   1135 
   1136 extern char func_lret32;
   1137 extern char func_iret32;
   1138 
   1139 uint8_t str_buffer[4096];
   1140 
   1141 #define TEST_STRING1(OP, size, DF, REP)\
   1142 {\
   1143     int64 rsi, rdi, rax, rcx, rflags;\
   1144 \
   1145     rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
   1146     rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
   1147     rax = 0x12345678;\
   1148     rcx = 17;\
   1149 \
   1150     asm volatile ("pushq $0\n\t"\
   1151                   "popfq\n\t"\
   1152                   DF "\n\t"\
   1153                   REP #OP size "\n\t"\
   1154                   "cld\n\t"\
   1155                   "pushfq\n\t"\
   1156                   "popq %4\n\t"\
   1157                   : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
   1158                   : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
   1159     xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
   1160            REP #OP size, rsi, rdi, rax, rcx,\
   1161            rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
   1162 }
   1163 
   1164 #define TEST_STRING(OP, REP)\
   1165     TEST_STRING1(OP, "b", "", REP);\
   1166     TEST_STRING1(OP, "w", "", REP);\
   1167     TEST_STRING1(OP, "l", "", REP);\
   1168     TEST_STRING1(OP, "b", "std", REP);\
   1169     TEST_STRING1(OP, "w", "std", REP);\
   1170     TEST_STRING1(OP, "l", "std", REP)
   1171 
   1172 void test_string(void)
   1173 {
   1174     int64 i;
   1175     for(i = 0;i < sizeof(str_buffer); i++)
   1176         str_buffer[i] = i + 0x56;
   1177    TEST_STRING(stos, "");
   1178    TEST_STRING(stos, "rep ");
   1179    TEST_STRING(lods, ""); /* to verify stos */
   1180    //  TEST_STRING(lods, "rep ");
   1181    TEST_STRING(movs, "");
   1182    TEST_STRING(movs, "rep ");
   1183      TEST_STRING(lods, ""); /* to verify stos */
   1184 
   1185    /* XXX: better tests */
   1186    TEST_STRING(scas, "");
   1187    TEST_STRING(scas, "repz ");
   1188    TEST_STRING(scas, "repnz ");
   1189    // REINSTATE?  TEST_STRING(cmps, "");
   1190    TEST_STRING(cmps, "repz ");
   1191    // REINSTATE?  TEST_STRING(cmps, "repnz ");
   1192 }
   1193 
   1194 int main(int argc, char **argv)
   1195 {
   1196     // The three commented out test cases produce different results at different
   1197     // compiler optimisation levels.  This suggests to me that their inline
   1198     // assembly is incorrect.  I don't have time to investigate now, though.  So
   1199     // they are disabled.
   1200     xxprintf_start();
   1201     test_adc();
   1202     test_adcx();
   1203     test_add();
   1204     test_adox();
   1205     test_and();
   1206     // test_bsx();
   1207     test_cmp();
   1208     test_dec();
   1209     test_fcmov();
   1210     test_fconst();
   1211     test_fenv();
   1212     test_floats();
   1213     test_inc();
   1214     // test_jcc();
   1215     test_lea();
   1216     test_mul();
   1217     test_neg();
   1218     test_not();
   1219     test_or();
   1220     test_rcl();
   1221     test_rcr();
   1222     test_rol();
   1223     test_ror();
   1224     test_sar();
   1225     test_sbb();
   1226     test_shl();
   1227     test_shr();
   1228     // test_string();
   1229     test_sub();
   1230     test_xor();
   1231     xxprintf_done();
   1232     // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3
   1233     return 0;
   1234 }
   1235