1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 6 #define VERBOSE 0 7 8 typedef unsigned int UInt; 9 typedef unsigned char UChar; 10 typedef unsigned long long int ULong; 11 typedef signed long long int Long; 12 typedef signed int Int; 13 typedef unsigned short UShort; 14 typedef unsigned long UWord; 15 typedef char HChar; 16 17 unsigned myrandom(void) 18 { 19 /* Simple multiply-with-carry random generator. */ 20 static unsigned m_w = 11; 21 static unsigned m_z = 13; 22 23 m_z = 36969 * (m_z & 65535) + (m_z >> 16); 24 m_w = 18000 * (m_w & 65535) + (m_w >> 16); 25 26 return (m_z << 16) + m_w; 27 } 28 29 ///////////////////////////////////////////////////////////////// 30 // BEGIN crc32 stuff // 31 ///////////////////////////////////////////////////////////////// 32 33 static const UInt crc32Table[256] = { 34 35 /*-- Ugly, innit? --*/ 36 37 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 38 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 39 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 40 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 41 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 42 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 43 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 44 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 45 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 46 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 47 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 48 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 49 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 50 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 51 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 52 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 53 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 54 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 55 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 56 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 57 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 58 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 59 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 60 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 61 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 62 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 63 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 64 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 65 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 66 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 67 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 68 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 69 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 70 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 71 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 72 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 73 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 74 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 75 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 76 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 77 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 78 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 79 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 80 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 81 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 82 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 83 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 84 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 85 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 86 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 87 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 88 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 89 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 90 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 91 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 92 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 93 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 94 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 95 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 96 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 97 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 98 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 99 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 100 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 101 }; 102 103 #define UPDATE_CRC(crcVar,cha) \ 104 { \ 105 crcVar = (crcVar << 8) ^ \ 106 crc32Table[(crcVar >> 24) ^ \ 107 ((UChar)cha)]; \ 108 } 109 110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn ) 111 { 112 UInt crc = crcIn; 113 while (nBytes >= 4) { 114 UPDATE_CRC(crc, bytes[0]); 115 UPDATE_CRC(crc, bytes[1]); 116 UPDATE_CRC(crc, bytes[2]); 117 UPDATE_CRC(crc, bytes[3]); 118 bytes += 4; 119 nBytes -= 4; 120 } 121 while (nBytes >= 1) { 122 UPDATE_CRC(crc, bytes[0]); 123 bytes += 1; 124 nBytes -= 1; 125 } 126 return crc; 127 } 128 129 static UInt crcFinalise ( UInt crc ) { 130 return ~crc; 131 } 132 133 //////// 134 135 static UInt theCRC = 0xFFFFFFFF; 136 137 static HChar outBuf[1024]; 138 // take output that's in outBuf, length as specified, and 139 // update the running crc. 140 static void send ( int nbytes ) 141 { 142 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1); 143 assert(outBuf[nbytes] == 0); 144 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC ); 145 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf); 146 } 147 148 149 ///////////////////////////////////////////////////////////////// 150 // END crc32 stuff // 151 ///////////////////////////////////////////////////////////////// 152 153 #if 0 154 155 // full version 156 #define NVALS 76 157 158 static ULong val[NVALS] 159 = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL, 160 0x3FULL, 0x40ULL, 0x41ULL, 161 0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL, 162 0xBFULL, 0xC0ULL, 0xC1ULL, 163 0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL, 164 165 0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL, 166 0xFF3FULL, 0xFF40ULL, 0xFF41ULL, 167 0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL, 168 0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL, 169 0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL, 170 171 0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL, 172 0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL, 173 0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL, 174 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL, 175 0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL, 176 177 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL, 178 0xFFFFFFFFFFFFFF03ULL, 179 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL, 180 0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL, 181 0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL, 182 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL, 183 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL, 184 0xFFFFFFFFFFFFFFFFULL 185 }; 186 187 #else 188 189 // shortened version, for use as valgrind regtest 190 #define NVALS 36 191 192 static ULong val[NVALS] 193 = { 0x00ULL, 0x01ULL, 194 0x3FULL, 0x40ULL, 195 0x7FULL, 0x80ULL, 196 0xBFULL, 0xC0ULL, 197 0xFFULL, 198 199 0xFF00ULL, 0xFF01ULL, 200 0xFF3FULL, 0xFF40ULL, 201 0xFF7FULL, 0xFF80ULL, 202 0xFFBFULL, 0xFFC0ULL, 203 0xFFFFULL, 204 205 0xFFFFFF00ULL, 0xFFFFFF01ULL, 206 0xFFFFFF3FULL, 0xFFFFFF40ULL, 207 0xFFFFFF7EULL, 0xFFFFFF7FULL, 208 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 209 0xFFFFFFFFULL, 210 211 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 212 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 213 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL, 214 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 215 0xFFFFFFFFFFFFFFFFULL 216 }; 217 218 #endif 219 220 ///////////////////////////////////// 221 222 #define CC_C 0x0001 223 #define CC_P 0x0004 224 #define CC_A 0x0010 225 #define CC_Z 0x0040 226 #define CC_S 0x0080 227 #define CC_O 0x0800 228 229 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O) 230 231 #define GEN_do_locked_G_E(_name,_eax) \ 232 \ 233 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \ 234 { \ 235 volatile Long e_val, g_val, e_val_before; \ 236 Long o, s, z, a, c, p, v1, v2, flags_in; \ 237 Long block[4]; \ 238 \ 239 for (v1 = 0; v1 < NVALS; v1++) { \ 240 for (v2 = 0; v2 < NVALS; v2++) { \ 241 \ 242 for (o = 0; o < 2; o++) { \ 243 for (s = 0; s < 2; s++) { \ 244 for (z = 0; z < 2; z++) { \ 245 for (a = 0; a < 2; a++) { \ 246 for (c = 0; c < 2; c++) { \ 247 for (p = 0; p < 2; p++) { \ 248 \ 249 flags_in = (o ? CC_O : 0) \ 250 | (s ? CC_S : 0) \ 251 | (z ? CC_Z : 0) \ 252 | (a ? CC_A : 0) \ 253 | (c ? CC_C : 0) \ 254 | (p ? CC_P : 0); \ 255 \ 256 g_val = val[v1]; \ 257 e_val = val[v2]; \ 258 e_val_before = e_val; \ 259 \ 260 block[0] = flags_in; \ 261 block[1] = g_val; \ 262 block[2] = (long)&e_val; \ 263 block[3] = 0; \ 264 __asm__ __volatile__( \ 265 "movq 0(%0), %%rax\n\t" \ 266 "pushq %%rax\n\t" \ 267 "popfq\n\t" \ 268 "movq 8(%0), %%rax\n\t" \ 269 "movq 16(%0), %%rbx\n\t" \ 270 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \ 271 "pushfq\n\t" \ 272 "popq %%rax\n\t" \ 273 "movq %%rax, 24(%0)\n\t" \ 274 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 275 ); \ 276 \ 277 send( \ 278 sprintf(outBuf, \ 279 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 280 #_name, g_val, e_val_before, flags_in, \ 281 e_val, block[3] & CC_MASK)); \ 282 \ 283 }}}}}} \ 284 \ 285 }} \ 286 } 287 288 GEN_do_locked_G_E(addb,al) 289 GEN_do_locked_G_E(addw,ax) 290 GEN_do_locked_G_E(addl,eax) 291 GEN_do_locked_G_E(addq,rax) 292 293 GEN_do_locked_G_E(orb, al) 294 GEN_do_locked_G_E(orw, ax) 295 GEN_do_locked_G_E(orl, eax) 296 GEN_do_locked_G_E(orq, rax) 297 298 GEN_do_locked_G_E(adcb,al) 299 GEN_do_locked_G_E(adcw,ax) 300 GEN_do_locked_G_E(adcl,eax) 301 GEN_do_locked_G_E(adcq,rax) 302 303 GEN_do_locked_G_E(sbbb,al) 304 GEN_do_locked_G_E(sbbw,ax) 305 GEN_do_locked_G_E(sbbl,eax) 306 GEN_do_locked_G_E(sbbq,rax) 307 308 GEN_do_locked_G_E(andb,al) 309 GEN_do_locked_G_E(andw,ax) 310 GEN_do_locked_G_E(andl,eax) 311 GEN_do_locked_G_E(andq,rax) 312 313 GEN_do_locked_G_E(subb,al) 314 GEN_do_locked_G_E(subw,ax) 315 GEN_do_locked_G_E(subl,eax) 316 GEN_do_locked_G_E(subq,rax) 317 318 GEN_do_locked_G_E(xorb,al) 319 GEN_do_locked_G_E(xorw,ax) 320 GEN_do_locked_G_E(xorl,eax) 321 GEN_do_locked_G_E(xorq,rax) 322 323 324 325 326 #define GEN_do_locked_imm_E(_name,_eax,_imm) \ 327 \ 328 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \ 329 { \ 330 volatile Long e_val, e_val_before; \ 331 Long o, s, z, a, c, p, v2, flags_in; \ 332 Long block[3]; \ 333 \ 334 for (v2 = 0; v2 < NVALS; v2++) { \ 335 \ 336 for (o = 0; o < 2; o++) { \ 337 for (s = 0; s < 2; s++) { \ 338 for (z = 0; z < 2; z++) { \ 339 for (a = 0; a < 2; a++) { \ 340 for (c = 0; c < 2; c++) { \ 341 for (p = 0; p < 2; p++) { \ 342 \ 343 flags_in = (o ? CC_O : 0) \ 344 | (s ? CC_S : 0) \ 345 | (z ? CC_Z : 0) \ 346 | (a ? CC_A : 0) \ 347 | (c ? CC_C : 0) \ 348 | (p ? CC_P : 0); \ 349 \ 350 e_val = val[v2]; \ 351 e_val_before = e_val; \ 352 \ 353 block[0] = flags_in; \ 354 block[1] = (long)&e_val; \ 355 block[2] = 0; \ 356 __asm__ __volatile__( \ 357 "movq 0(%0), %%rax\n\t" \ 358 "pushq %%rax\n\t" \ 359 "popfq\n\t" \ 360 "movq 8(%0), %%rbx\n\t" \ 361 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \ 362 "pushfq\n\t" \ 363 "popq %%rax\n\t" \ 364 "movq %%rax, 16(%0)\n\t" \ 365 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 366 ); \ 367 \ 368 send( \ 369 sprintf(outBuf, \ 370 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 371 #_name, #_imm, e_val_before, flags_in, \ 372 e_val, block[2] & CC_MASK)); \ 373 \ 374 }}}}}} \ 375 \ 376 } \ 377 } 378 379 GEN_do_locked_imm_E(addb,al,0x7F) 380 GEN_do_locked_imm_E(addb,al,0xF1) 381 GEN_do_locked_imm_E(addw,ax,0x7E) 382 GEN_do_locked_imm_E(addw,ax,0x9325) 383 GEN_do_locked_imm_E(addl,eax,0x7D) 384 GEN_do_locked_imm_E(addl,eax,0x31415927) 385 GEN_do_locked_imm_E(addq,rax,0x7D) 386 GEN_do_locked_imm_E(addq,rax,0x31415927) 387 388 GEN_do_locked_imm_E(orb,al,0x7F) 389 GEN_do_locked_imm_E(orb,al,0xF1) 390 GEN_do_locked_imm_E(orw,ax,0x7E) 391 GEN_do_locked_imm_E(orw,ax,0x9325) 392 GEN_do_locked_imm_E(orl,eax,0x7D) 393 GEN_do_locked_imm_E(orl,eax,0x31415927) 394 GEN_do_locked_imm_E(orq,rax,0x7D) 395 GEN_do_locked_imm_E(orq,rax,0x31415927) 396 397 GEN_do_locked_imm_E(adcb,al,0x7F) 398 GEN_do_locked_imm_E(adcb,al,0xF1) 399 GEN_do_locked_imm_E(adcw,ax,0x7E) 400 GEN_do_locked_imm_E(adcw,ax,0x9325) 401 GEN_do_locked_imm_E(adcl,eax,0x7D) 402 GEN_do_locked_imm_E(adcl,eax,0x31415927) 403 GEN_do_locked_imm_E(adcq,rax,0x7D) 404 GEN_do_locked_imm_E(adcq,rax,0x31415927) 405 406 GEN_do_locked_imm_E(sbbb,al,0x7F) 407 GEN_do_locked_imm_E(sbbb,al,0xF1) 408 GEN_do_locked_imm_E(sbbw,ax,0x7E) 409 GEN_do_locked_imm_E(sbbw,ax,0x9325) 410 GEN_do_locked_imm_E(sbbl,eax,0x7D) 411 GEN_do_locked_imm_E(sbbl,eax,0x31415927) 412 GEN_do_locked_imm_E(sbbq,rax,0x7D) 413 GEN_do_locked_imm_E(sbbq,rax,0x31415927) 414 415 GEN_do_locked_imm_E(andb,al,0x7F) 416 GEN_do_locked_imm_E(andb,al,0xF1) 417 GEN_do_locked_imm_E(andw,ax,0x7E) 418 GEN_do_locked_imm_E(andw,ax,0x9325) 419 GEN_do_locked_imm_E(andl,eax,0x7D) 420 GEN_do_locked_imm_E(andl,eax,0x31415927) 421 GEN_do_locked_imm_E(andq,rax,0x7D) 422 GEN_do_locked_imm_E(andq,rax,0x31415927) 423 424 GEN_do_locked_imm_E(subb,al,0x7F) 425 GEN_do_locked_imm_E(subb,al,0xF1) 426 GEN_do_locked_imm_E(subw,ax,0x7E) 427 GEN_do_locked_imm_E(subw,ax,0x9325) 428 GEN_do_locked_imm_E(subl,eax,0x7D) 429 GEN_do_locked_imm_E(subl,eax,0x31415927) 430 GEN_do_locked_imm_E(subq,rax,0x7D) 431 GEN_do_locked_imm_E(subq,rax,0x31415927) 432 433 GEN_do_locked_imm_E(xorb,al,0x7F) 434 GEN_do_locked_imm_E(xorb,al,0xF1) 435 GEN_do_locked_imm_E(xorw,ax,0x7E) 436 GEN_do_locked_imm_E(xorw,ax,0x9325) 437 GEN_do_locked_imm_E(xorl,eax,0x7D) 438 GEN_do_locked_imm_E(xorl,eax,0x31415927) 439 GEN_do_locked_imm_E(xorq,rax,0x7D) 440 GEN_do_locked_imm_E(xorq,rax,0x31415927) 441 442 #define GEN_do_locked_unary_E(_name,_eax) \ 443 \ 444 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \ 445 { \ 446 volatile Long e_val, e_val_before; \ 447 Long o, s, z, a, c, p, v2, flags_in; \ 448 Long block[3]; \ 449 \ 450 for (v2 = 0; v2 < NVALS; v2++) { \ 451 \ 452 for (o = 0; o < 2; o++) { \ 453 for (s = 0; s < 2; s++) { \ 454 for (z = 0; z < 2; z++) { \ 455 for (a = 0; a < 2; a++) { \ 456 for (c = 0; c < 2; c++) { \ 457 for (p = 0; p < 2; p++) { \ 458 \ 459 flags_in = (o ? CC_O : 0) \ 460 | (s ? CC_S : 0) \ 461 | (z ? CC_Z : 0) \ 462 | (a ? CC_A : 0) \ 463 | (c ? CC_C : 0) \ 464 | (p ? CC_P : 0); \ 465 \ 466 e_val = val[v2]; \ 467 e_val_before = e_val; \ 468 \ 469 block[0] = flags_in; \ 470 block[1] = (long)&e_val; \ 471 block[2] = 0; \ 472 __asm__ __volatile__( \ 473 "movq 0(%0), %%rax\n\t" \ 474 "pushq %%rax\n\t" \ 475 "popfq\n\t" \ 476 "movq 8(%0), %%rbx\n\t" \ 477 "lock; " #_name " (%%rbx)\n\t" \ 478 "pushfq\n\t" \ 479 "popq %%rax\n\t" \ 480 "movq %%rax, 16(%0)\n\t" \ 481 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 482 ); \ 483 \ 484 send( \ 485 sprintf(outBuf, \ 486 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 487 #_name, e_val_before, flags_in, \ 488 e_val, block[2] & CC_MASK)); \ 489 \ 490 }}}}}} \ 491 \ 492 } \ 493 } 494 495 GEN_do_locked_unary_E(decb,al) 496 GEN_do_locked_unary_E(decw,ax) 497 GEN_do_locked_unary_E(decl,eax) 498 GEN_do_locked_unary_E(decq,rax) 499 500 GEN_do_locked_unary_E(incb,al) 501 GEN_do_locked_unary_E(incw,ax) 502 GEN_do_locked_unary_E(incl,eax) 503 GEN_do_locked_unary_E(incq,rax) 504 505 GEN_do_locked_unary_E(negb,al) 506 GEN_do_locked_unary_E(negw,ax) 507 GEN_do_locked_unary_E(negl,eax) 508 GEN_do_locked_unary_E(negq,rax) 509 510 GEN_do_locked_unary_E(notb,al) 511 GEN_do_locked_unary_E(notw,ax) 512 GEN_do_locked_unary_E(notl,eax) 513 GEN_do_locked_unary_E(notq,rax) 514 515 516 ///////////////////////////////////////////////////////////////// 517 518 ULong btsq_mem ( UChar* base, int bitno ) 519 { 520 ULong res; 521 __asm__ 522 __volatile__("lock; btsq\t%2, %0\n\t" 523 "setc %%dl\n\t" 524 "movzbq %%dl,%1\n" 525 : "=m" (*base), "=r" (res) 526 : "r" ((ULong)bitno) : "rdx","cc","memory" ); 527 /* Pretty meaningless to dereference base here, but that's what you 528 have to do to get a btsl insn which refers to memory starting at 529 base. */ 530 return res; 531 } 532 ULong btsl_mem ( UChar* base, int bitno ) 533 { 534 ULong res; 535 __asm__ 536 __volatile__("lock; btsl\t%2, %0\n\t" 537 "setc %%dl\n\t" 538 "movzbq %%dl,%1\n" 539 : "=m" (*base), "=r" (res) 540 : "r" ((UInt)bitno)); 541 return res; 542 } 543 ULong btsw_mem ( UChar* base, int bitno ) 544 { 545 ULong res; 546 __asm__ 547 __volatile__("lock; btsw\t%w2, %0\n\t" 548 "setc %%dl\n\t" 549 "movzbq %%dl,%1\n" 550 : "=m" (*base), "=r" (res) 551 : "r" ((ULong)bitno)); 552 return res; 553 } 554 555 ULong btrq_mem ( UChar* base, int bitno ) 556 { 557 ULong res; 558 __asm__ 559 __volatile__("lock; btrq\t%2, %0\n\t" 560 "setc %%dl\n\t" 561 "movzbq %%dl,%1\n" 562 : "=m" (*base), "=r" (res) 563 : "r" ((ULong)bitno)); 564 return res; 565 } 566 ULong btrl_mem ( UChar* base, int bitno ) 567 { 568 ULong res; 569 __asm__ 570 __volatile__("lock; btrl\t%2, %0\n\t" 571 "setc %%dl\n\t" 572 "movzbq %%dl,%1\n" 573 : "=m" (*base), "=r" (res) 574 : "r" ((UInt)bitno)); 575 return res; 576 } 577 ULong btrw_mem ( UChar* base, int bitno ) 578 { 579 ULong res; 580 __asm__ 581 __volatile__("lock; btrw\t%w2, %0\n\t" 582 "setc %%dl\n\t" 583 "movzbq %%dl,%1\n" 584 : "=m" (*base), "=r" (res) 585 : "r" ((ULong)bitno)); 586 return res; 587 } 588 589 ULong btcq_mem ( UChar* base, int bitno ) 590 { 591 ULong res; 592 __asm__ 593 __volatile__("lock; btcq\t%2, %0\n\t" 594 "setc %%dl\n\t" 595 "movzbq %%dl,%1\n" 596 : "=m" (*base), "=r" (res) 597 : "r" ((ULong)bitno)); 598 return res; 599 } 600 ULong btcl_mem ( UChar* base, int bitno ) 601 { 602 ULong res; 603 __asm__ 604 __volatile__("lock; btcl\t%2, %0\n\t" 605 "setc %%dl\n\t" 606 "movzbq %%dl,%1\n" 607 : "=m" (*base), "=r" (res) 608 : "r" ((UInt)bitno)); 609 return res; 610 } 611 ULong btcw_mem ( UChar* base, int bitno ) 612 { 613 ULong res; 614 __asm__ 615 __volatile__("lock; btcw\t%w2, %0\n\t" 616 "setc %%dl\n\t" 617 "movzbq %%dl,%1\n" 618 : "=m" (*base), "=r" (res) 619 : "r" ((ULong)bitno)); 620 return res; 621 } 622 623 ULong btq_mem ( UChar* base, int bitno ) 624 { 625 ULong res; 626 __asm__ 627 __volatile__("btq\t%2, %0\n\t" 628 "setc %%dl\n\t" 629 "movzbq %%dl,%1\n" 630 : "=m" (*base), "=r" (res) 631 : "r" ((ULong)bitno) 632 : "cc", "memory"); 633 return res; 634 } 635 ULong btl_mem ( UChar* base, int bitno ) 636 { 637 ULong res; 638 __asm__ 639 __volatile__("btl\t%2, %0\n\t" 640 "setc %%dl\n\t" 641 "movzbq %%dl,%1\n" 642 : "=m" (*base), "=r" (res) 643 : "r" ((UInt)bitno) 644 : "cc", "memory"); 645 return res; 646 } 647 ULong btw_mem ( UChar* base, int bitno ) 648 { 649 ULong res; 650 __asm__ 651 __volatile__("btw\t%w2, %0\n\t" 652 "setc %%dl\n\t" 653 "movzbq %%dl,%1\n" 654 : "=m" (*base), "=r" (res) 655 : "r" ((ULong)bitno)); 656 return res; 657 } 658 659 ULong rol1 ( ULong x ) 660 { 661 return (x << 1) | (x >> 63); 662 } 663 664 void do_bt_G_E_tests ( void ) 665 { 666 ULong n, bitoff, op; 667 ULong c; 668 UChar* block; 669 ULong carrydep, res;; 670 671 /*------------------------ MEM-Q -----------------------*/ 672 673 carrydep = 0; 674 block = calloc(200,1); 675 block += 100; 676 /* Valid bit offsets are -800 .. 799 inclusive. */ 677 678 for (n = 0; n < 10000; n++) { 679 bitoff = (myrandom() % 1600) - 800; 680 op = myrandom() % 4; 681 c = 2; 682 switch (op) { 683 case 0: c = btsq_mem(block, bitoff); break; 684 case 1: c = btrq_mem(block, bitoff); break; 685 case 2: c = btcq_mem(block, bitoff); break; 686 case 3: c = btq_mem(block, bitoff); break; 687 } 688 c &= 255; 689 assert(c == 0 || c == 1); 690 carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep; 691 } 692 693 /* Compute final result */ 694 block -= 100; 695 res = 0; 696 for (n = 0; n < 200; n++) { 697 UChar ch = block[n]; 698 /* printf("%d ", (int)block[n]); */ 699 res = rol1(res) ^ (ULong)ch; 700 } 701 702 send( sprintf(outBuf, 703 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n", 704 res, carrydep)); 705 free(block); 706 707 /*------------------------ MEM-L -----------------------*/ 708 709 carrydep = 0; 710 block = calloc(200,1); 711 block += 100; 712 /* Valid bit offsets are -800 .. 799 inclusive. */ 713 714 for (n = 0; n < 10000; n++) { 715 bitoff = (myrandom() % 1600) - 800; 716 op = myrandom() % 4; 717 c = 2; 718 switch (op) { 719 case 0: c = btsl_mem(block, bitoff); break; 720 case 1: c = btrl_mem(block, bitoff); break; 721 case 2: c = btcl_mem(block, bitoff); break; 722 case 3: c = btl_mem(block, bitoff); break; 723 } 724 c &= 255; 725 assert(c == 0 || c == 1); 726 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 727 } 728 729 /* Compute final result */ 730 block -= 100; 731 res = 0; 732 for (n = 0; n < 200; n++) { 733 UChar ch = block[n]; 734 /* printf("%d ", (int)block[n]); */ 735 res = rol1(res) ^ (ULong)ch; 736 } 737 738 send( sprintf(outBuf, 739 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n", 740 res, carrydep)); 741 free(block); 742 743 /*------------------------ MEM-W -----------------------*/ 744 745 carrydep = 0; 746 block = calloc(200,1); 747 block += 100; 748 /* Valid bit offsets are -800 .. 799 inclusive. */ 749 750 for (n = 0; n < 10000; n++) { 751 bitoff = (myrandom() % 1600) - 800; 752 op = myrandom() % 4; 753 c = 2; 754 switch (op) { 755 case 0: c = btsw_mem(block, bitoff); break; 756 case 1: c = btrw_mem(block, bitoff); break; 757 case 2: c = btcw_mem(block, bitoff); break; 758 case 3: c = btw_mem(block, bitoff); break; 759 } 760 c &= 255; 761 assert(c == 0 || c == 1); 762 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 763 } 764 765 /* Compute final result */ 766 block -= 100; 767 res = 0; 768 for (n = 0; n < 200; n++) { 769 UChar ch = block[n]; 770 /* printf("%d ", (int)block[n]); */ 771 res = rol1(res) ^ (ULong)ch; 772 } 773 774 send(sprintf(outBuf, 775 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n", 776 res, carrydep)); 777 free(block); 778 } 779 780 781 ///////////////////////////////////////////////////////////////// 782 783 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and 784 also reconstruct the original bits 0, 1, 2, 3 by looking at the 785 carry flag. Returned result has mashed bits 0-3 at the bottom and 786 the reconstructed original bits 0-3 as 4-7. */ 787 788 ULong mash_mem_Q ( ULong* origp ) 789 { 790 ULong reconstructed, mashed; 791 __asm__ __volatile__ ( 792 "movq %2, %%rdx\n\t" 793 "" 794 "movq $0, %%rax\n\t" 795 "\n\t" 796 "btq $0, (%%rdx)\n\t" 797 "setb %%cl\n\t" 798 "movzbq %%cl, %%rcx\n\t" 799 "orq %%rcx, %%rax\n\t" 800 "\n\t" 801 "lock; btsq $1, (%%rdx)\n\t" 802 "setb %%cl\n\t" 803 "movzbq %%cl, %%rcx\n\t" 804 "shlq $1, %%rcx\n\t" 805 "orq %%rcx, %%rax\n\t" 806 "\n\t" 807 "lock; btrq $2, (%%rdx)\n\t" 808 "setb %%cl\n\t" 809 "movzbq %%cl, %%rcx\n\t" 810 "shlq $2, %%rcx\n\t" 811 "orq %%rcx, %%rax\n\t" 812 "\n\t" 813 "lock; btcq $3, (%%rdx)\n\t" 814 "setb %%cl\n\t" 815 "movzbq %%cl, %%rcx\n\t" 816 "shlq $3, %%rcx\n\t" 817 "orq %%rcx, %%rax\n\t" 818 "\n\t" 819 "movq %%rax, %0\n\t" 820 "movq (%%rdx), %1" 821 : "=r" (reconstructed), "=r" (mashed) 822 : "r" (origp) 823 : "rax", "rcx", "rdx", "cc"); 824 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 825 } 826 827 ULong mash_mem_L ( UInt* origp ) 828 { 829 ULong reconstructed; UInt mashed; 830 __asm__ __volatile__ ( 831 "movq %2, %%rdx\n\t" 832 "" 833 "movq $0, %%rax\n\t" 834 "\n\t" 835 "btl $0, (%%rdx)\n\t" 836 "setb %%cl\n\t" 837 "movzbq %%cl, %%rcx\n\t" 838 "orq %%rcx, %%rax\n\t" 839 "\n\t" 840 "lock; btsl $1, (%%rdx)\n\t" 841 "setb %%cl\n\t" 842 "movzbq %%cl, %%rcx\n\t" 843 "shlq $1, %%rcx\n\t" 844 "orq %%rcx, %%rax\n\t" 845 "\n\t" 846 "lock; btrl $2, (%%rdx)\n\t" 847 "setb %%cl\n\t" 848 "movzbq %%cl, %%rcx\n\t" 849 "shlq $2, %%rcx\n\t" 850 "orq %%rcx, %%rax\n\t" 851 "\n\t" 852 "lock; btcl $3, (%%rdx)\n\t" 853 "setb %%cl\n\t" 854 "movzbq %%cl, %%rcx\n\t" 855 "shlq $3, %%rcx\n\t" 856 "orq %%rcx, %%rax\n\t" 857 "\n\t" 858 "movq %%rax, %0\n\t" 859 "movl (%%rdx), %1" 860 : "=r" (reconstructed), "=r" (mashed) 861 : "r" (origp) 862 : "rax", "rcx", "rdx", "cc"); 863 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 864 } 865 866 ULong mash_mem_W ( UShort* origp ) 867 { 868 ULong reconstructed, mashed; 869 __asm__ __volatile__ ( 870 "movq %2, %%rdx\n\t" 871 "" 872 "movq $0, %%rax\n\t" 873 "\n\t" 874 "btw $0, (%%rdx)\n\t" 875 "setb %%cl\n\t" 876 "movzbq %%cl, %%rcx\n\t" 877 "orq %%rcx, %%rax\n\t" 878 "\n\t" 879 "lock; btsw $1, (%%rdx)\n\t" 880 "setb %%cl\n\t" 881 "movzbq %%cl, %%rcx\n\t" 882 "shlq $1, %%rcx\n\t" 883 "orq %%rcx, %%rax\n\t" 884 "\n\t" 885 "lock; btrw $2, (%%rdx)\n\t" 886 "setb %%cl\n\t" 887 "movzbq %%cl, %%rcx\n\t" 888 "shlq $2, %%rcx\n\t" 889 "orq %%rcx, %%rax\n\t" 890 "\n\t" 891 "lock; btcw $3, (%%rdx)\n\t" 892 "setb %%cl\n\t" 893 "movzbq %%cl, %%rcx\n\t" 894 "shlq $3, %%rcx\n\t" 895 "orq %%rcx, %%rax\n\t" 896 "\n\t" 897 "movq %%rax, %0\n\t" 898 "movzwq (%%rdx), %1" 899 : "=r" (reconstructed), "=r" (mashed) 900 : "r" (origp) 901 : "rax", "rcx", "rdx", "cc"); 902 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 903 } 904 905 906 void do_bt_imm_E_tests( void ) 907 { 908 ULong i; 909 ULong* iiq = malloc(sizeof(ULong)); 910 UInt* iil = malloc(sizeof(UInt)); 911 UShort* iiw = malloc(sizeof(UShort)); 912 for (i = 0; i < 0x10; i++) { 913 *iiq = i; 914 *iil = i; 915 *iiw = i; 916 send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i, 917 mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw))); 918 } 919 free(iiq); 920 free(iil); 921 free(iiw); 922 } 923 924 925 ///////////////////////////////////////////////////////////////// 926 927 int main ( void ) 928 { 929 do_locked_G_E_addb(); 930 do_locked_G_E_addw(); 931 do_locked_G_E_addl(); 932 do_locked_G_E_addq(); 933 934 do_locked_G_E_orb(); 935 do_locked_G_E_orw(); 936 do_locked_G_E_orl(); 937 do_locked_G_E_orq(); 938 939 do_locked_G_E_adcb(); 940 do_locked_G_E_adcw(); 941 do_locked_G_E_adcl(); 942 do_locked_G_E_adcq(); 943 944 do_locked_G_E_sbbb(); 945 do_locked_G_E_sbbw(); 946 do_locked_G_E_sbbl(); 947 do_locked_G_E_sbbq(); 948 949 do_locked_G_E_andb(); 950 do_locked_G_E_andw(); 951 do_locked_G_E_andl(); 952 do_locked_G_E_andq(); 953 954 do_locked_G_E_subb(); 955 do_locked_G_E_subw(); 956 do_locked_G_E_subl(); 957 do_locked_G_E_subq(); 958 959 do_locked_G_E_xorb(); 960 do_locked_G_E_xorw(); 961 do_locked_G_E_xorl(); 962 do_locked_G_E_xorq(); 963 // 4 * 7 964 965 do_locked_imm_E_addb_0x7F(); 966 do_locked_imm_E_addb_0xF1(); 967 do_locked_imm_E_addw_0x7E(); 968 do_locked_imm_E_addw_0x9325(); 969 do_locked_imm_E_addl_0x7D(); 970 do_locked_imm_E_addl_0x31415927(); 971 do_locked_imm_E_addq_0x7D(); 972 do_locked_imm_E_addq_0x31415927(); 973 974 do_locked_imm_E_orb_0x7F(); 975 do_locked_imm_E_orb_0xF1(); 976 do_locked_imm_E_orw_0x7E(); 977 do_locked_imm_E_orw_0x9325(); 978 do_locked_imm_E_orl_0x7D(); 979 do_locked_imm_E_orl_0x31415927(); 980 do_locked_imm_E_orq_0x7D(); 981 do_locked_imm_E_orq_0x31415927(); 982 983 do_locked_imm_E_adcb_0x7F(); 984 do_locked_imm_E_adcb_0xF1(); 985 do_locked_imm_E_adcw_0x7E(); 986 do_locked_imm_E_adcw_0x9325(); 987 do_locked_imm_E_adcl_0x7D(); 988 do_locked_imm_E_adcl_0x31415927(); 989 do_locked_imm_E_adcq_0x7D(); 990 do_locked_imm_E_adcq_0x31415927(); 991 992 do_locked_imm_E_sbbb_0x7F(); 993 do_locked_imm_E_sbbb_0xF1(); 994 do_locked_imm_E_sbbw_0x7E(); 995 do_locked_imm_E_sbbw_0x9325(); 996 do_locked_imm_E_sbbl_0x7D(); 997 do_locked_imm_E_sbbl_0x31415927(); 998 do_locked_imm_E_sbbq_0x7D(); 999 do_locked_imm_E_sbbq_0x31415927(); 1000 1001 do_locked_imm_E_andb_0x7F(); 1002 do_locked_imm_E_andb_0xF1(); 1003 do_locked_imm_E_andw_0x7E(); 1004 do_locked_imm_E_andw_0x9325(); 1005 do_locked_imm_E_andl_0x7D(); 1006 do_locked_imm_E_andl_0x31415927(); 1007 do_locked_imm_E_andq_0x7D(); 1008 do_locked_imm_E_andq_0x31415927(); 1009 1010 do_locked_imm_E_subb_0x7F(); 1011 do_locked_imm_E_subb_0xF1(); 1012 do_locked_imm_E_subw_0x7E(); 1013 do_locked_imm_E_subw_0x9325(); 1014 do_locked_imm_E_subl_0x7D(); 1015 do_locked_imm_E_subl_0x31415927(); 1016 do_locked_imm_E_subq_0x7D(); 1017 do_locked_imm_E_subq_0x31415927(); 1018 1019 do_locked_imm_E_xorb_0x7F(); 1020 do_locked_imm_E_xorb_0xF1(); 1021 do_locked_imm_E_xorw_0x7E(); 1022 do_locked_imm_E_xorw_0x9325(); 1023 do_locked_imm_E_xorl_0x7D(); 1024 do_locked_imm_E_xorl_0x31415927(); 1025 do_locked_imm_E_xorq_0x7D(); 1026 do_locked_imm_E_xorq_0x31415927(); 1027 // 4 * 7 + 8 * 7 == 84 1028 1029 do_locked_unary_E_decb(); 1030 do_locked_unary_E_decw(); 1031 do_locked_unary_E_decl(); 1032 do_locked_unary_E_decq(); 1033 1034 do_locked_unary_E_incb(); 1035 do_locked_unary_E_incw(); 1036 do_locked_unary_E_incl(); 1037 do_locked_unary_E_incq(); 1038 1039 do_locked_unary_E_negb(); 1040 do_locked_unary_E_negw(); 1041 do_locked_unary_E_negl(); 1042 do_locked_unary_E_negq(); 1043 1044 do_locked_unary_E_notb(); 1045 do_locked_unary_E_notw(); 1046 do_locked_unary_E_notl(); 1047 do_locked_unary_E_notq(); 1048 // 100 1049 1050 do_bt_G_E_tests(); 1051 // 109 1052 do_bt_imm_E_tests(); 1053 // 118 1054 1055 // So there should be 118 lock-prefixed instructions in the 1056 // disassembly of this compilation unit. 1057 // confirm with 1058 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc 1059 1060 1061 { UInt crcExpd = 0xDF0656F1; 1062 theCRC = crcFinalise( theCRC ); 1063 if (theCRC == crcExpd) { 1064 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n", 1065 theCRC, crcExpd); 1066 } else { 1067 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n", 1068 theCRC, crcExpd); 1069 printf("amd64locked: set #define VERBOSE 1 to diagnose\n"); 1070 } 1071 } 1072 1073 return 0; 1074 } 1075