1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 6 #define VERBOSE 0 7 8 typedef unsigned int UInt; 9 typedef unsigned char UChar; 10 typedef unsigned long long int ULong; 11 typedef signed long long int Long; 12 typedef signed int Int; 13 typedef unsigned short UShort; 14 typedef unsigned long UWord; 15 typedef char HChar; 16 17 ///////////////////////////////////////////////////////////////// 18 // BEGIN crc32 stuff // 19 ///////////////////////////////////////////////////////////////// 20 21 static const UInt crc32Table[256] = { 22 23 /*-- Ugly, innit? --*/ 24 25 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 26 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 27 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 28 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 29 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 30 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 31 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 32 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 33 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 34 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 35 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 36 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 37 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 38 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 39 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 40 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 41 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 42 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 43 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 44 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 45 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 46 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 47 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 48 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 49 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 50 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 51 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 52 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 53 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 54 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 55 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 56 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 57 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 58 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 59 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 60 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 61 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 62 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 63 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 64 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 65 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 66 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 67 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 68 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 69 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 70 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 71 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 72 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 73 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 74 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 75 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 76 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 77 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 78 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 79 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 80 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 81 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 82 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 83 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 84 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 85 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 86 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 87 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 88 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 89 }; 90 91 #define UPDATE_CRC(crcVar,cha) \ 92 { \ 93 crcVar = (crcVar << 8) ^ \ 94 crc32Table[(crcVar >> 24) ^ \ 95 ((UChar)cha)]; \ 96 } 97 98 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn ) 99 { 100 UInt crc = crcIn; 101 while (nBytes >= 4) { 102 UPDATE_CRC(crc, bytes[0]); 103 UPDATE_CRC(crc, bytes[1]); 104 UPDATE_CRC(crc, bytes[2]); 105 UPDATE_CRC(crc, bytes[3]); 106 bytes += 4; 107 nBytes -= 4; 108 } 109 while (nBytes >= 1) { 110 UPDATE_CRC(crc, bytes[0]); 111 bytes += 1; 112 nBytes -= 1; 113 } 114 return crc; 115 } 116 117 static UInt crcFinalise ( UInt crc ) { 118 return ~crc; 119 } 120 121 //////// 122 123 static UInt theCRC = 0xFFFFFFFF; 124 125 static HChar outBuf[1024]; 126 // take output that's in outBuf, length as specified, and 127 // update the running crc. 128 static void send ( int nbytes ) 129 { 130 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1); 131 assert(outBuf[nbytes] == 0); 132 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC ); 133 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf); 134 } 135 136 137 ///////////////////////////////////////////////////////////////// 138 // END crc32 stuff // 139 ///////////////////////////////////////////////////////////////// 140 141 #if 0 142 143 // full version 144 #define NVALS 57 145 146 static unsigned int val[NVALS] 147 = { 0x00, 0x01, 0x02, 0x03, 148 0x3F, 0x40, 0x41, 149 0x7E, 0x7F, 0x80, 0x81, 0x82, 150 0xBF, 0xC0, 0xC1, 151 0xFC, 0xFD, 0xFE, 0xFF, 152 153 0xFF00, 0xFF01, 0xFF02, 0xFF03, 154 0xFF3F, 0xFF40, 0xFF41, 155 0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82, 156 0xFFBF, 0xFFC0, 0xFFC1, 157 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF, 158 159 0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03, 160 0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41, 161 0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82, 162 0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1, 163 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF 164 }; 165 166 #else 167 168 // shortened version, for use as valgrind regtest 169 #define NVALS 27 170 171 static unsigned int val[NVALS] 172 = { 0x00, 0x01, 173 0x3F, 0x40, 174 0x7F, 0x80, 175 0xBF, 0xC0, 176 0xFF, 177 178 0xFF00, 0xFF01, 179 0xFF3F, 0xFF40, 180 0xFF7F, 0xFF80, 181 0xFFBF, 0xFFC0, 182 0xFFFF, 183 184 0xFFFFFF00, 0xFFFFFF01, 185 0xFFFFFF3F, 0xFFFFFF40, 186 0xFFFFFF7F, 0xFFFFFF80, 187 0xFFFFFFBF, 0xFFFFFFC0, 188 0xFFFFFFFF 189 }; 190 191 #endif 192 193 ///////////////////////////////////// 194 195 #define CC_C 0x0001 196 #define CC_P 0x0004 197 #define CC_A 0x0010 198 #define CC_Z 0x0040 199 #define CC_S 0x0080 200 #define CC_O 0x0800 201 202 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O) 203 204 #define GEN_do_locked_G_E(_name,_eax) \ 205 \ 206 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \ 207 { \ 208 volatile int e_val, g_val, e_val_before; \ 209 int o, s, z, a, c, p, v1, v2, flags_in; \ 210 int block[4]; \ 211 \ 212 for (v1 = 0; v1 < NVALS; v1++) { \ 213 for (v2 = 0; v2 < NVALS; v2++) { \ 214 \ 215 for (o = 0; o < 2; o++) { \ 216 for (s = 0; s < 2; s++) { \ 217 for (z = 0; z < 2; z++) { \ 218 for (a = 0; a < 2; a++) { \ 219 for (c = 0; c < 2; c++) { \ 220 for (p = 0; p < 2; p++) { \ 221 \ 222 flags_in = (o ? CC_O : 0) \ 223 | (s ? CC_S : 0) \ 224 | (z ? CC_Z : 0) \ 225 | (a ? CC_A : 0) \ 226 | (c ? CC_C : 0) \ 227 | (p ? CC_P : 0); \ 228 \ 229 g_val = val[v1]; \ 230 e_val = val[v2]; \ 231 e_val_before = e_val; \ 232 \ 233 block[0] = flags_in; \ 234 block[1] = g_val; \ 235 block[2] = (int)(long)&e_val; \ 236 block[3] = 0; \ 237 __asm__ __volatile__( \ 238 "movl 0(%0), %%eax\n\t" \ 239 "pushl %%eax\n\t" \ 240 "popfl\n\t" \ 241 "movl 4(%0), %%eax\n\t" \ 242 "movl 8(%0), %%ebx\n\t" \ 243 "lock; " #_name " %%" #_eax ",(%%ebx)\n\t" \ 244 "pushfl\n\t" \ 245 "popl %%eax\n\t" \ 246 "movl %%eax, 12(%0)\n\t" \ 247 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 248 ); \ 249 \ 250 send( \ 251 sprintf(outBuf, \ 252 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 253 #_name, g_val, e_val_before, flags_in, \ 254 e_val, block[3] & CC_MASK) ); \ 255 \ 256 }}}}}} \ 257 \ 258 }} \ 259 } 260 261 GEN_do_locked_G_E(addb,al) 262 GEN_do_locked_G_E(addw,ax) 263 GEN_do_locked_G_E(addl,eax) 264 265 GEN_do_locked_G_E(orb, al) 266 GEN_do_locked_G_E(orw, ax) 267 GEN_do_locked_G_E(orl, eax) 268 269 GEN_do_locked_G_E(adcb,al) 270 GEN_do_locked_G_E(adcw,ax) 271 GEN_do_locked_G_E(adcl,eax) 272 273 GEN_do_locked_G_E(sbbb,al) 274 GEN_do_locked_G_E(sbbw,ax) 275 GEN_do_locked_G_E(sbbl,eax) 276 277 GEN_do_locked_G_E(andb,al) 278 GEN_do_locked_G_E(andw,ax) 279 GEN_do_locked_G_E(andl,eax) 280 281 GEN_do_locked_G_E(subb,al) 282 GEN_do_locked_G_E(subw,ax) 283 GEN_do_locked_G_E(subl,eax) 284 285 GEN_do_locked_G_E(xorb,al) 286 GEN_do_locked_G_E(xorw,ax) 287 GEN_do_locked_G_E(xorl,eax) 288 289 290 291 292 #define GEN_do_locked_imm_E(_name,_eax,_imm) \ 293 \ 294 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \ 295 { \ 296 volatile int e_val, e_val_before; \ 297 int o, s, z, a, c, p, v2, flags_in; \ 298 int block[3]; \ 299 \ 300 for (v2 = 0; v2 < NVALS; v2++) { \ 301 \ 302 for (o = 0; o < 2; o++) { \ 303 for (s = 0; s < 2; s++) { \ 304 for (z = 0; z < 2; z++) { \ 305 for (a = 0; a < 2; a++) { \ 306 for (c = 0; c < 2; c++) { \ 307 for (p = 0; p < 2; p++) { \ 308 \ 309 flags_in = (o ? CC_O : 0) \ 310 | (s ? CC_S : 0) \ 311 | (z ? CC_Z : 0) \ 312 | (a ? CC_A : 0) \ 313 | (c ? CC_C : 0) \ 314 | (p ? CC_P : 0); \ 315 \ 316 e_val = val[v2]; \ 317 e_val_before = e_val; \ 318 \ 319 block[0] = flags_in; \ 320 block[1] = (int)(long)&e_val; \ 321 block[2] = 0; \ 322 __asm__ __volatile__( \ 323 "movl 0(%0), %%eax\n\t" \ 324 "pushl %%eax\n\t" \ 325 "popfl\n\t" \ 326 "movl 4(%0), %%ebx\n\t" \ 327 "lock; " #_name " $" #_imm ",(%%ebx)\n\t" \ 328 "pushfl\n\t" \ 329 "popl %%eax\n\t" \ 330 "movl %%eax, 8(%0)\n\t" \ 331 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 332 ); \ 333 \ 334 send( \ 335 sprintf(outBuf, \ 336 "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 337 #_name, #_imm, e_val_before, flags_in, \ 338 e_val, block[2] & CC_MASK) ); \ 339 \ 340 }}}}}} \ 341 \ 342 } \ 343 } 344 345 GEN_do_locked_imm_E(addb,al,0x7F) 346 GEN_do_locked_imm_E(addb,al,0xF1) 347 GEN_do_locked_imm_E(addw,ax,0x7E) 348 GEN_do_locked_imm_E(addw,ax,0x9325) 349 GEN_do_locked_imm_E(addl,eax,0x7D) 350 GEN_do_locked_imm_E(addl,eax,0x31415927) 351 352 GEN_do_locked_imm_E(orb,al,0x7F) 353 GEN_do_locked_imm_E(orb,al,0xF1) 354 GEN_do_locked_imm_E(orw,ax,0x7E) 355 GEN_do_locked_imm_E(orw,ax,0x9325) 356 GEN_do_locked_imm_E(orl,eax,0x7D) 357 GEN_do_locked_imm_E(orl,eax,0x31415927) 358 359 GEN_do_locked_imm_E(adcb,al,0x7F) 360 GEN_do_locked_imm_E(adcb,al,0xF1) 361 GEN_do_locked_imm_E(adcw,ax,0x7E) 362 GEN_do_locked_imm_E(adcw,ax,0x9325) 363 GEN_do_locked_imm_E(adcl,eax,0x7D) 364 GEN_do_locked_imm_E(adcl,eax,0x31415927) 365 366 GEN_do_locked_imm_E(sbbb,al,0x7F) 367 GEN_do_locked_imm_E(sbbb,al,0xF1) 368 GEN_do_locked_imm_E(sbbw,ax,0x7E) 369 GEN_do_locked_imm_E(sbbw,ax,0x9325) 370 GEN_do_locked_imm_E(sbbl,eax,0x7D) 371 GEN_do_locked_imm_E(sbbl,eax,0x31415927) 372 373 GEN_do_locked_imm_E(andb,al,0x7F) 374 GEN_do_locked_imm_E(andb,al,0xF1) 375 GEN_do_locked_imm_E(andw,ax,0x7E) 376 GEN_do_locked_imm_E(andw,ax,0x9325) 377 GEN_do_locked_imm_E(andl,eax,0x7D) 378 GEN_do_locked_imm_E(andl,eax,0x31415927) 379 380 GEN_do_locked_imm_E(subb,al,0x7F) 381 GEN_do_locked_imm_E(subb,al,0xF1) 382 GEN_do_locked_imm_E(subw,ax,0x7E) 383 GEN_do_locked_imm_E(subw,ax,0x9325) 384 GEN_do_locked_imm_E(subl,eax,0x7D) 385 GEN_do_locked_imm_E(subl,eax,0x31415927) 386 387 GEN_do_locked_imm_E(xorb,al,0x7F) 388 GEN_do_locked_imm_E(xorb,al,0xF1) 389 GEN_do_locked_imm_E(xorw,ax,0x7E) 390 GEN_do_locked_imm_E(xorw,ax,0x9325) 391 GEN_do_locked_imm_E(xorl,eax,0x7D) 392 GEN_do_locked_imm_E(xorl,eax,0x31415927) 393 394 #define GEN_do_locked_unary_E(_name,_eax) \ 395 \ 396 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \ 397 { \ 398 volatile int e_val, e_val_before; \ 399 int o, s, z, a, c, p, v2, flags_in; \ 400 int block[3]; \ 401 \ 402 for (v2 = 0; v2 < NVALS; v2++) { \ 403 \ 404 for (o = 0; o < 2; o++) { \ 405 for (s = 0; s < 2; s++) { \ 406 for (z = 0; z < 2; z++) { \ 407 for (a = 0; a < 2; a++) { \ 408 for (c = 0; c < 2; c++) { \ 409 for (p = 0; p < 2; p++) { \ 410 \ 411 flags_in = (o ? CC_O : 0) \ 412 | (s ? CC_S : 0) \ 413 | (z ? CC_Z : 0) \ 414 | (a ? CC_A : 0) \ 415 | (c ? CC_C : 0) \ 416 | (p ? CC_P : 0); \ 417 \ 418 e_val = val[v2]; \ 419 e_val_before = e_val; \ 420 \ 421 block[0] = flags_in; \ 422 block[1] = (int)(long)&e_val; \ 423 block[2] = 0; \ 424 __asm__ __volatile__( \ 425 "movl 0(%0), %%eax\n\t" \ 426 "pushl %%eax\n\t" \ 427 "popfl\n\t" \ 428 "movl 4(%0), %%ebx\n\t" \ 429 "lock; " #_name " (%%ebx)\n\t" \ 430 "pushfl\n\t" \ 431 "popl %%eax\n\t" \ 432 "movl %%eax, 8(%0)\n\t" \ 433 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 434 ); \ 435 \ 436 send( \ 437 sprintf(outBuf, \ 438 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 439 #_name, e_val_before, flags_in, \ 440 e_val, block[2] & CC_MASK)); \ 441 \ 442 }}}}}} \ 443 \ 444 } \ 445 } 446 447 GEN_do_locked_unary_E(decb,al) 448 GEN_do_locked_unary_E(decw,ax) 449 GEN_do_locked_unary_E(decl,eax) 450 451 GEN_do_locked_unary_E(incb,al) 452 GEN_do_locked_unary_E(incw,ax) 453 GEN_do_locked_unary_E(incl,eax) 454 455 GEN_do_locked_unary_E(negb,al) 456 GEN_do_locked_unary_E(negw,ax) 457 GEN_do_locked_unary_E(negl,eax) 458 459 GEN_do_locked_unary_E(notb,al) 460 GEN_do_locked_unary_E(notw,ax) 461 GEN_do_locked_unary_E(notl,eax) 462 463 464 ///////////////////////////////////////////////////////////////// 465 466 unsigned int btsl_mem ( UChar* base, int bitno ) 467 { 468 unsigned char res; 469 __asm__ 470 __volatile__("lock; btsl\t%2, %0\n\t" 471 "setc\t%1" 472 : "=m" (*base), "=q" (res) 473 : "r" (bitno)); 474 /* Pretty meaningless to dereference base here, but that's what you 475 have to do to get a btsl insn which refers to memory starting at 476 base. */ 477 return res; 478 } 479 unsigned int btsw_mem ( UChar* base, int bitno ) 480 { 481 unsigned char res; 482 __asm__ 483 __volatile__("lock; btsw\t%w2, %0\n\t" 484 "setc\t%1" 485 : "=m" (*base), "=q" (res) 486 : "r" (bitno)); 487 return res; 488 } 489 490 unsigned int btrl_mem ( UChar* base, int bitno ) 491 { 492 unsigned char res; 493 __asm__ 494 __volatile__("lock; btrl\t%2, %0\n\t" 495 "setc\t%1" 496 : "=m" (*base), "=q" (res) 497 : "r" (bitno)); 498 return res; 499 } 500 unsigned int btrw_mem ( UChar* base, int bitno ) 501 { 502 unsigned char res; 503 __asm__ 504 __volatile__("lock; btrw\t%w2, %0\n\t" 505 "setc\t%1" 506 : "=m" (*base), "=q" (res) 507 : "r" (bitno)); 508 return res; 509 } 510 511 unsigned int btcl_mem ( UChar* base, int bitno ) 512 { 513 unsigned char res; 514 __asm__ 515 __volatile__("lock; btcl\t%2, %0\n\t" 516 "setc\t%1" 517 : "=m" (*base), "=q" (res) 518 : "r" (bitno)); 519 return res; 520 } 521 unsigned int btcw_mem ( UChar* base, int bitno ) 522 { 523 unsigned char res; 524 __asm__ 525 __volatile__("lock; btcw\t%w2, %0\n\t" 526 "setc\t%1" 527 : "=m" (*base), "=q" (res) 528 : "r" (bitno)); 529 return res; 530 } 531 532 unsigned int btl_mem ( UChar* base, int bitno ) 533 { 534 unsigned char res; 535 __asm__ 536 __volatile__("btl\t%2, %0\n\t" 537 "setc\t%1" 538 : "=m" (*base), "=q" (res) 539 : "r" (bitno) 540 : "cc", "memory"); 541 return res; 542 } 543 unsigned int btw_mem ( UChar* base, int bitno ) 544 { 545 unsigned char res; 546 __asm__ 547 __volatile__("btw\t%w2, %0\n\t" 548 "setc\t%1" 549 : "=m" (*base), "=q" (res) 550 : "r" (bitno)); 551 return res; 552 } 553 554 ULong rol1 ( ULong x ) 555 { 556 return (x << 1) | (x >> 63); 557 } 558 559 void do_bt_G_E_tests ( void ) 560 { 561 UInt n, bitoff, op; 562 UInt c; 563 UChar* block; 564 ULong carrydep, res;; 565 566 /*------------------------ MEM-L -----------------------*/ 567 568 carrydep = 0; 569 block = calloc(200,1); 570 block += 100; 571 /* Valid bit offsets are -800 .. 799 inclusive. */ 572 573 for (n = 0; n < 10000; n++) { 574 bitoff = (random() % 1600) - 800; 575 op = random() % 4; 576 c = 2; 577 switch (op) { 578 case 0: c = btsl_mem(block, bitoff); break; 579 case 1: c = btrl_mem(block, bitoff); break; 580 case 2: c = btcl_mem(block, bitoff); break; 581 case 3: c = btl_mem(block, bitoff); break; 582 } 583 c &= 255; 584 assert(c == 0 || c == 1); 585 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 586 } 587 588 /* Compute final result */ 589 block -= 100; 590 res = 0; 591 for (n = 0; n < 200; n++) { 592 UChar ch = block[n]; 593 /* printf("%d ", (int)block[n]); */ 594 res = rol1(res) ^ (ULong)ch; 595 } 596 597 send( sprintf(outBuf, 598 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n", 599 res, carrydep )); 600 free(block); 601 602 /*------------------------ MEM-W -----------------------*/ 603 604 carrydep = 0; 605 block = calloc(200,1); 606 block += 100; 607 /* Valid bit offsets are -800 .. 799 inclusive. */ 608 609 for (n = 0; n < 10000; n++) { 610 bitoff = (random() % 1600) - 800; 611 op = random() % 4; 612 c = 2; 613 switch (op) { 614 case 0: c = btsw_mem(block, bitoff); break; 615 case 1: c = btrw_mem(block, bitoff); break; 616 case 2: c = btcw_mem(block, bitoff); break; 617 case 3: c = btw_mem(block, bitoff); break; 618 } 619 c &= 255; 620 assert(c == 0 || c == 1); 621 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 622 } 623 624 /* Compute final result */ 625 block -= 100; 626 res = 0; 627 for (n = 0; n < 200; n++) { 628 UChar ch = block[n]; 629 /* printf("%d ", (int)block[n]); */ 630 res = rol1(res) ^ (ULong)ch; 631 } 632 633 send( sprintf(outBuf, 634 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n", 635 res, carrydep )); 636 free(block); 637 } 638 639 640 ///////////////////////////////////////////////////////////////// 641 642 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and 643 also reconstruct the original bits 0, 1, 2, 3 by looking at the 644 carry flag. Returned result has mashed bits 0-3 at the bottom and 645 the reconstructed original bits 0-3 as 4-7. */ 646 647 UInt mash_mem_L ( UInt* origp ) 648 { 649 UInt reconstructed, mashed; 650 __asm__ __volatile__ ( 651 "movl %2, %%edx\n\t" 652 "" 653 "movl $0, %%eax\n\t" 654 "\n\t" 655 "btl $0, (%%edx)\n\t" 656 "setb %%cl\n\t" 657 "movzbl %%cl, %%ecx\n\t" 658 "orl %%ecx, %%eax\n\t" 659 "\n\t" 660 "lock; btsl $1, (%%edx)\n\t" 661 "setb %%cl\n\t" 662 "movzbl %%cl, %%ecx\n\t" 663 "shll $1, %%ecx\n\t" 664 "orl %%ecx, %%eax\n\t" 665 "\n\t" 666 "lock; btrl $2, (%%edx)\n\t" 667 "setb %%cl\n\t" 668 "movzbl %%cl, %%ecx\n\t" 669 "shll $2, %%ecx\n\t" 670 "orl %%ecx, %%eax\n\t" 671 "\n\t" 672 "lock; btcl $3, (%%edx)\n\t" 673 "setb %%cl\n\t" 674 "movzbl %%cl, %%ecx\n\t" 675 "shll $3, %%ecx\n\t" 676 "orl %%ecx, %%eax\n\t" 677 "\n\t" 678 "movl %%eax, %0\n\t" 679 "movl (%%edx), %1" 680 681 : "=r" (reconstructed), "=r" (mashed) 682 : "r" (origp) 683 : "eax", "ecx", "edx", "cc"); 684 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 685 } 686 687 UInt mash_mem_W ( UShort* origp ) 688 { 689 UInt reconstructed, mashed; 690 __asm__ __volatile__ ( 691 "movl %2, %%edx\n\t" 692 "" 693 "movl $0, %%eax\n\t" 694 "\n\t" 695 "btw $0, (%%edx)\n\t" 696 "setb %%cl\n\t" 697 "movzbl %%cl, %%ecx\n\t" 698 "orl %%ecx, %%eax\n\t" 699 "\n\t" 700 "lock; btsw $1, (%%edx)\n\t" 701 "setb %%cl\n\t" 702 "movzbl %%cl, %%ecx\n\t" 703 "shll $1, %%ecx\n\t" 704 "orl %%ecx, %%eax\n\t" 705 "\n\t" 706 "lock; btrw $2, (%%edx)\n\t" 707 "setb %%cl\n\t" 708 "movzbl %%cl, %%ecx\n\t" 709 "shll $2, %%ecx\n\t" 710 "orl %%ecx, %%eax\n\t" 711 "\n\t" 712 "lock; btcw $3, (%%edx)\n\t" 713 "setb %%cl\n\t" 714 "movzbl %%cl, %%ecx\n\t" 715 "shll $3, %%ecx\n\t" 716 "orl %%ecx, %%eax\n\t" 717 "\n\t" 718 "movl %%eax, %0\n\t" 719 "movzwl (%%edx), %1" 720 721 : "=r" (reconstructed), "=r" (mashed) 722 : "r" (origp) 723 : "eax", "ecx", "edx", "cc"); 724 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 725 } 726 727 728 void do_bt_imm_E_tests( void ) 729 { 730 int i; 731 UInt* iil = malloc(sizeof(UInt)); 732 UShort* iiw = malloc(sizeof(UShort)); 733 for (i = 0; i < 0x10; i++) { 734 *iil = i; 735 *iiw = i; 736 send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i, 737 mash_mem_L(iil), mash_mem_W(iiw))); 738 } 739 free(iil); 740 free(iiw); 741 } 742 743 744 745 ///////////////////////////////////////////////////////////////// 746 747 int main ( void ) 748 { 749 do_locked_G_E_addb(); 750 do_locked_G_E_addw(); 751 do_locked_G_E_addl(); 752 753 do_locked_G_E_orb(); 754 do_locked_G_E_orw(); 755 do_locked_G_E_orl(); 756 757 do_locked_G_E_adcb(); 758 do_locked_G_E_adcw(); 759 do_locked_G_E_adcl(); 760 761 do_locked_G_E_sbbb(); 762 do_locked_G_E_sbbw(); 763 do_locked_G_E_sbbl(); 764 765 do_locked_G_E_andb(); 766 do_locked_G_E_andw(); 767 do_locked_G_E_andl(); 768 769 do_locked_G_E_subb(); 770 do_locked_G_E_subw(); 771 do_locked_G_E_subl(); 772 773 do_locked_G_E_xorb(); 774 do_locked_G_E_xorw(); 775 do_locked_G_E_xorl(); 776 //21 777 do_locked_imm_E_addb_0x7F(); 778 do_locked_imm_E_addb_0xF1(); 779 do_locked_imm_E_addw_0x7E(); 780 do_locked_imm_E_addw_0x9325(); 781 do_locked_imm_E_addl_0x7D(); 782 do_locked_imm_E_addl_0x31415927(); 783 784 do_locked_imm_E_orb_0x7F(); 785 do_locked_imm_E_orb_0xF1(); 786 do_locked_imm_E_orw_0x7E(); 787 do_locked_imm_E_orw_0x9325(); 788 do_locked_imm_E_orl_0x7D(); 789 do_locked_imm_E_orl_0x31415927(); 790 791 do_locked_imm_E_adcb_0x7F(); 792 do_locked_imm_E_adcb_0xF1(); 793 do_locked_imm_E_adcw_0x7E(); 794 do_locked_imm_E_adcw_0x9325(); 795 do_locked_imm_E_adcl_0x7D(); 796 do_locked_imm_E_adcl_0x31415927(); 797 798 do_locked_imm_E_sbbb_0x7F(); 799 do_locked_imm_E_sbbb_0xF1(); 800 do_locked_imm_E_sbbw_0x7E(); 801 do_locked_imm_E_sbbw_0x9325(); 802 do_locked_imm_E_sbbl_0x7D(); 803 do_locked_imm_E_sbbl_0x31415927(); 804 805 do_locked_imm_E_andb_0x7F(); 806 do_locked_imm_E_andb_0xF1(); 807 do_locked_imm_E_andw_0x7E(); 808 do_locked_imm_E_andw_0x9325(); 809 do_locked_imm_E_andl_0x7D(); 810 do_locked_imm_E_andl_0x31415927(); 811 812 do_locked_imm_E_subb_0x7F(); 813 do_locked_imm_E_subb_0xF1(); 814 do_locked_imm_E_subw_0x7E(); 815 do_locked_imm_E_subw_0x9325(); 816 do_locked_imm_E_subl_0x7D(); 817 do_locked_imm_E_subl_0x31415927(); 818 819 do_locked_imm_E_xorb_0x7F(); 820 do_locked_imm_E_xorb_0xF1(); 821 do_locked_imm_E_xorw_0x7E(); 822 do_locked_imm_E_xorw_0x9325(); 823 do_locked_imm_E_xorl_0x7D(); 824 do_locked_imm_E_xorl_0x31415927(); 825 // 63 826 do_locked_unary_E_decb(); 827 do_locked_unary_E_decw(); 828 do_locked_unary_E_decl(); 829 830 do_locked_unary_E_incb(); 831 do_locked_unary_E_incw(); 832 do_locked_unary_E_incl(); 833 834 do_locked_unary_E_negb(); 835 do_locked_unary_E_negw(); 836 do_locked_unary_E_negl(); 837 838 do_locked_unary_E_notb(); 839 do_locked_unary_E_notw(); 840 do_locked_unary_E_notl(); 841 // 75 842 do_bt_G_E_tests(); 843 // 81 844 do_bt_imm_E_tests(); 845 // 87 846 // So there should be 87 lock-prefixed instructions in the 847 // disassembly of this compilation unit. 848 // confirm with 849 // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc 850 851 { UInt crcExpd = 0x8235DC9C; 852 theCRC = crcFinalise( theCRC ); 853 if (theCRC == crcExpd) { 854 printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n", 855 theCRC, crcExpd); 856 } else { 857 printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n", 858 theCRC, crcExpd); 859 printf("x86locked: set #define VERBOSE 1 to diagnose\n"); 860 } 861 } 862 863 return 0; 864 } 865