Home | History | Annotate | Download | only in x86
      1 
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <assert.h>
      5 
      6 #define VERBOSE 0
      7 
      8 typedef  unsigned int            UInt;
      9 typedef  unsigned char           UChar;
     10 typedef  unsigned long long int  ULong;
     11 typedef  signed long long int    Long;
     12 typedef  signed int              Int;
     13 typedef  unsigned short          UShort;
     14 typedef  unsigned long           UWord;
     15 typedef  char                    HChar;
     16 
     17 /////////////////////////////////////////////////////////////////
     18 // BEGIN crc32 stuff                                           //
     19 /////////////////////////////////////////////////////////////////
     20 
     21 static const UInt crc32Table[256] = {
     22 
     23    /*-- Ugly, innit? --*/
     24 
     25    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
     26    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
     27    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
     28    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
     29    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
     30    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
     31    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
     32    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
     33    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
     34    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
     35    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
     36    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
     37    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
     38    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
     39    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
     40    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
     41    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
     42    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
     43    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
     44    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
     45    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
     46    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
     47    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
     48    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
     49    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
     50    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
     51    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
     52    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
     53    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
     54    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
     55    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
     56    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
     57    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
     58    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
     59    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
     60    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
     61    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
     62    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
     63    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
     64    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
     65    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
     66    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
     67    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
     68    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
     69    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
     70    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
     71    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
     72    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
     73    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
     74    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
     75    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
     76    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
     77    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
     78    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
     79    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
     80    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
     81    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
     82    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
     83    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
     84    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
     85    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
     86    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
     87    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
     88    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
     89 };
     90 
     91 #define UPDATE_CRC(crcVar,cha)                 \
     92 {                                              \
     93    crcVar = (crcVar << 8) ^                    \
     94             crc32Table[(crcVar >> 24) ^        \
     95                        ((UChar)cha)];          \
     96 }
     97 
     98 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
     99 {
    100    UInt crc = crcIn;
    101    while (nBytes >= 4) {
    102       UPDATE_CRC(crc, bytes[0]);
    103       UPDATE_CRC(crc, bytes[1]);
    104       UPDATE_CRC(crc, bytes[2]);
    105       UPDATE_CRC(crc, bytes[3]);
    106       bytes += 4;
    107       nBytes -= 4;
    108    }
    109    while (nBytes >= 1) {
    110       UPDATE_CRC(crc, bytes[0]);
    111       bytes += 1;
    112       nBytes -= 1;
    113    }
    114    return crc;
    115 }
    116 
    117 static UInt crcFinalise ( UInt crc ) {
    118    return ~crc;
    119 }
    120 
    121 ////////
    122 
    123 static UInt theCRC = 0xFFFFFFFF;
    124 
    125 static HChar outBuf[1024];
    126 // take output that's in outBuf, length as specified, and
    127 // update the running crc.
    128 static void send ( int nbytes )
    129 {
    130    assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
    131    assert(outBuf[nbytes] == 0);
    132    theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
    133    if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
    134 }
    135 
    136 
    137 /////////////////////////////////////////////////////////////////
    138 // END crc32 stuff                                             //
    139 /////////////////////////////////////////////////////////////////
    140 
    141 #if 0
    142 
    143 // full version
    144 #define NVALS 57
    145 
    146 static unsigned int val[NVALS]
    147     = { 0x00, 0x01, 0x02, 0x03,
    148         0x3F, 0x40, 0x41,
    149         0x7E, 0x7F, 0x80, 0x81, 0x82,
    150         0xBF, 0xC0, 0xC1,
    151         0xFC, 0xFD, 0xFE, 0xFF,
    152 
    153         0xFF00, 0xFF01, 0xFF02, 0xFF03,
    154         0xFF3F, 0xFF40, 0xFF41,
    155         0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
    156         0xFFBF, 0xFFC0, 0xFFC1,
    157         0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
    158 
    159         0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
    160         0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
    161         0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
    162         0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
    163         0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
    164       };
    165 
    166 #else
    167 
    168 // shortened version, for use as valgrind regtest
    169 #define NVALS 27
    170 
    171 static unsigned int val[NVALS]
    172     = { 0x00, 0x01,
    173         0x3F, 0x40,
    174         0x7F, 0x80,
    175         0xBF, 0xC0,
    176         0xFF,
    177 
    178         0xFF00, 0xFF01,
    179         0xFF3F, 0xFF40,
    180         0xFF7F, 0xFF80,
    181         0xFFBF, 0xFFC0,
    182         0xFFFF,
    183 
    184         0xFFFFFF00, 0xFFFFFF01,
    185         0xFFFFFF3F, 0xFFFFFF40,
    186         0xFFFFFF7F, 0xFFFFFF80,
    187         0xFFFFFFBF, 0xFFFFFFC0,
    188         0xFFFFFFFF
    189       };
    190 
    191 #endif
    192 
    193 /////////////////////////////////////
    194 
    195 #define CC_C    0x0001
    196 #define CC_P    0x0004
    197 #define CC_A    0x0010
    198 #define CC_Z    0x0040
    199 #define CC_S    0x0080
    200 #define CC_O    0x0800
    201 
    202 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
    203 
    204 #define GEN_do_locked_G_E(_name,_eax)   \
    205   \
    206   __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
    207   {   \
    208     volatile int e_val, g_val, e_val_before;   \
    209     int o, s, z, a, c, p, v1, v2, flags_in;   \
    210     int block[4];   \
    211     \
    212     for (v1 = 0; v1 < NVALS; v1++) {   \
    213     for (v2 = 0; v2 < NVALS; v2++) {   \
    214     \
    215     for (o = 0; o < 2; o++) {   \
    216     for (s = 0; s < 2; s++) {   \
    217     for (z = 0; z < 2; z++) {   \
    218     for (a = 0; a < 2; a++) {   \
    219     for (c = 0; c < 2; c++) {   \
    220     for (p = 0; p < 2; p++) {   \
    221       \
    222       flags_in = (o ? CC_O : 0)   \
    223                | (s ? CC_S : 0)   \
    224                | (z ? CC_Z : 0)   \
    225                | (a ? CC_A : 0)   \
    226                | (c ? CC_C : 0)   \
    227                | (p ? CC_P : 0);   \
    228       \
    229       g_val = val[v1];   \
    230       e_val = val[v2];   \
    231       e_val_before = e_val;   \
    232       \
    233       block[0] = flags_in;   \
    234       block[1] = g_val;   \
    235       block[2] = (int)(long)&e_val;   \
    236       block[3] = 0;   \
    237       __asm__ __volatile__(   \
    238           "movl 0(%0), %%eax\n\t"   \
    239           "pushl %%eax\n\t"   \
    240           "popfl\n\t"   \
    241           "movl 4(%0), %%eax\n\t"   \
    242           "movl 8(%0), %%ebx\n\t"   \
    243           "lock; " #_name " %%" #_eax ",(%%ebx)\n\t"   \
    244           "pushfl\n\t"   \
    245           "popl %%eax\n\t"   \
    246           "movl %%eax, 12(%0)\n\t"   \
    247           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    248       );   \
    249       \
    250       send( \
    251          sprintf(outBuf,                                        \
    252                  "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
    253                  #_name, g_val, e_val_before, flags_in,   \
    254                  e_val, block[3] & CC_MASK) );            \
    255       \
    256     }}}}}}   \
    257     \
    258     }}   \
    259   }
    260 
    261 GEN_do_locked_G_E(addb,al)
    262 GEN_do_locked_G_E(addw,ax)
    263 GEN_do_locked_G_E(addl,eax)
    264 
    265 GEN_do_locked_G_E(orb, al)
    266 GEN_do_locked_G_E(orw, ax)
    267 GEN_do_locked_G_E(orl, eax)
    268 
    269 GEN_do_locked_G_E(adcb,al)
    270 GEN_do_locked_G_E(adcw,ax)
    271 GEN_do_locked_G_E(adcl,eax)
    272 
    273 GEN_do_locked_G_E(sbbb,al)
    274 GEN_do_locked_G_E(sbbw,ax)
    275 GEN_do_locked_G_E(sbbl,eax)
    276 
    277 GEN_do_locked_G_E(andb,al)
    278 GEN_do_locked_G_E(andw,ax)
    279 GEN_do_locked_G_E(andl,eax)
    280 
    281 GEN_do_locked_G_E(subb,al)
    282 GEN_do_locked_G_E(subw,ax)
    283 GEN_do_locked_G_E(subl,eax)
    284 
    285 GEN_do_locked_G_E(xorb,al)
    286 GEN_do_locked_G_E(xorw,ax)
    287 GEN_do_locked_G_E(xorl,eax)
    288 
    289 
    290 
    291 
    292 #define GEN_do_locked_imm_E(_name,_eax,_imm)        \
    293   \
    294   __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
    295   {   \
    296     volatile int e_val, e_val_before;   \
    297     int o, s, z, a, c, p, v2, flags_in;   \
    298     int block[3];   \
    299     \
    300     for (v2 = 0; v2 < NVALS; v2++) {   \
    301     \
    302     for (o = 0; o < 2; o++) {   \
    303     for (s = 0; s < 2; s++) {   \
    304     for (z = 0; z < 2; z++) {   \
    305     for (a = 0; a < 2; a++) {   \
    306     for (c = 0; c < 2; c++) {   \
    307     for (p = 0; p < 2; p++) {   \
    308       \
    309       flags_in = (o ? CC_O : 0)   \
    310                | (s ? CC_S : 0)   \
    311                | (z ? CC_Z : 0)   \
    312                | (a ? CC_A : 0)   \
    313                | (c ? CC_C : 0)   \
    314                | (p ? CC_P : 0);   \
    315       \
    316       e_val = val[v2];   \
    317       e_val_before = e_val;   \
    318       \
    319       block[0] = flags_in;   \
    320       block[1] = (int)(long)&e_val;   \
    321       block[2] = 0;   \
    322       __asm__ __volatile__(   \
    323           "movl 0(%0), %%eax\n\t"   \
    324           "pushl %%eax\n\t"   \
    325           "popfl\n\t"   \
    326           "movl 4(%0), %%ebx\n\t"   \
    327           "lock; " #_name " $" #_imm ",(%%ebx)\n\t"   \
    328           "pushfl\n\t"   \
    329           "popl %%eax\n\t"   \
    330           "movl %%eax, 8(%0)\n\t"   \
    331           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    332       );   \
    333       \
    334       send( \
    335         sprintf(outBuf, \
    336              "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",       \
    337              #_name, #_imm, e_val_before, flags_in,         \
    338                 e_val, block[2] & CC_MASK) );               \
    339       \
    340     }}}}}}   \
    341     \
    342     }   \
    343   }
    344 
    345 GEN_do_locked_imm_E(addb,al,0x7F)
    346 GEN_do_locked_imm_E(addb,al,0xF1)
    347 GEN_do_locked_imm_E(addw,ax,0x7E)
    348 GEN_do_locked_imm_E(addw,ax,0x9325)
    349 GEN_do_locked_imm_E(addl,eax,0x7D)
    350 GEN_do_locked_imm_E(addl,eax,0x31415927)
    351 
    352 GEN_do_locked_imm_E(orb,al,0x7F)
    353 GEN_do_locked_imm_E(orb,al,0xF1)
    354 GEN_do_locked_imm_E(orw,ax,0x7E)
    355 GEN_do_locked_imm_E(orw,ax,0x9325)
    356 GEN_do_locked_imm_E(orl,eax,0x7D)
    357 GEN_do_locked_imm_E(orl,eax,0x31415927)
    358 
    359 GEN_do_locked_imm_E(adcb,al,0x7F)
    360 GEN_do_locked_imm_E(adcb,al,0xF1)
    361 GEN_do_locked_imm_E(adcw,ax,0x7E)
    362 GEN_do_locked_imm_E(adcw,ax,0x9325)
    363 GEN_do_locked_imm_E(adcl,eax,0x7D)
    364 GEN_do_locked_imm_E(adcl,eax,0x31415927)
    365 
    366 GEN_do_locked_imm_E(sbbb,al,0x7F)
    367 GEN_do_locked_imm_E(sbbb,al,0xF1)
    368 GEN_do_locked_imm_E(sbbw,ax,0x7E)
    369 GEN_do_locked_imm_E(sbbw,ax,0x9325)
    370 GEN_do_locked_imm_E(sbbl,eax,0x7D)
    371 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
    372 
    373 GEN_do_locked_imm_E(andb,al,0x7F)
    374 GEN_do_locked_imm_E(andb,al,0xF1)
    375 GEN_do_locked_imm_E(andw,ax,0x7E)
    376 GEN_do_locked_imm_E(andw,ax,0x9325)
    377 GEN_do_locked_imm_E(andl,eax,0x7D)
    378 GEN_do_locked_imm_E(andl,eax,0x31415927)
    379 
    380 GEN_do_locked_imm_E(subb,al,0x7F)
    381 GEN_do_locked_imm_E(subb,al,0xF1)
    382 GEN_do_locked_imm_E(subw,ax,0x7E)
    383 GEN_do_locked_imm_E(subw,ax,0x9325)
    384 GEN_do_locked_imm_E(subl,eax,0x7D)
    385 GEN_do_locked_imm_E(subl,eax,0x31415927)
    386 
    387 GEN_do_locked_imm_E(xorb,al,0x7F)
    388 GEN_do_locked_imm_E(xorb,al,0xF1)
    389 GEN_do_locked_imm_E(xorw,ax,0x7E)
    390 GEN_do_locked_imm_E(xorw,ax,0x9325)
    391 GEN_do_locked_imm_E(xorl,eax,0x7D)
    392 GEN_do_locked_imm_E(xorl,eax,0x31415927)
    393 
    394 #define GEN_do_locked_unary_E(_name,_eax)        \
    395   \
    396   __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
    397   {   \
    398     volatile int e_val, e_val_before;   \
    399     int o, s, z, a, c, p, v2, flags_in;   \
    400     int block[3];   \
    401     \
    402     for (v2 = 0; v2 < NVALS; v2++) {   \
    403     \
    404     for (o = 0; o < 2; o++) {   \
    405     for (s = 0; s < 2; s++) {   \
    406     for (z = 0; z < 2; z++) {   \
    407     for (a = 0; a < 2; a++) {   \
    408     for (c = 0; c < 2; c++) {   \
    409     for (p = 0; p < 2; p++) {   \
    410       \
    411       flags_in = (o ? CC_O : 0)   \
    412                | (s ? CC_S : 0)   \
    413                | (z ? CC_Z : 0)   \
    414                | (a ? CC_A : 0)   \
    415                | (c ? CC_C : 0)   \
    416                | (p ? CC_P : 0);   \
    417       \
    418       e_val = val[v2];   \
    419       e_val_before = e_val;   \
    420       \
    421       block[0] = flags_in;   \
    422       block[1] = (int)(long)&e_val;   \
    423       block[2] = 0;   \
    424       __asm__ __volatile__(   \
    425           "movl 0(%0), %%eax\n\t"   \
    426           "pushl %%eax\n\t"   \
    427           "popfl\n\t"   \
    428           "movl 4(%0), %%ebx\n\t"   \
    429           "lock; " #_name " (%%ebx)\n\t"   \
    430           "pushfl\n\t"   \
    431           "popl %%eax\n\t"   \
    432           "movl %%eax, 8(%0)\n\t"   \
    433           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    434       );   \
    435       \
    436       send( \
    437          sprintf(outBuf, \
    438                 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",   \
    439              #_name, e_val_before, flags_in,         \
    440                 e_val, block[2] & CC_MASK));         \
    441       \
    442     }}}}}}   \
    443     \
    444     }   \
    445   }
    446 
    447 GEN_do_locked_unary_E(decb,al)
    448 GEN_do_locked_unary_E(decw,ax)
    449 GEN_do_locked_unary_E(decl,eax)
    450 
    451 GEN_do_locked_unary_E(incb,al)
    452 GEN_do_locked_unary_E(incw,ax)
    453 GEN_do_locked_unary_E(incl,eax)
    454 
    455 GEN_do_locked_unary_E(negb,al)
    456 GEN_do_locked_unary_E(negw,ax)
    457 GEN_do_locked_unary_E(negl,eax)
    458 
    459 GEN_do_locked_unary_E(notb,al)
    460 GEN_do_locked_unary_E(notw,ax)
    461 GEN_do_locked_unary_E(notl,eax)
    462 
    463 
    464 /////////////////////////////////////////////////////////////////
    465 
    466 unsigned int btsl_mem ( UChar* base, int bitno )
    467 {
    468    unsigned char res;
    469    __asm__
    470    __volatile__("lock; btsl\t%2, %0\n\t"
    471                 "setc\t%1"
    472                 : "=m" (*base), "=q" (res)
    473                 : "r" (bitno));
    474    /* Pretty meaningless to dereference base here, but that's what you
    475       have to do to get a btsl insn which refers to memory starting at
    476       base. */
    477    return res;
    478 }
    479 unsigned int btsw_mem ( UChar* base, int bitno )
    480 {
    481    unsigned char res;
    482    __asm__
    483    __volatile__("lock; btsw\t%w2, %0\n\t"
    484                 "setc\t%1"
    485                 : "=m" (*base), "=q" (res)
    486                 : "r" (bitno));
    487    return res;
    488 }
    489 
    490 unsigned int btrl_mem ( UChar* base, int bitno )
    491 {
    492    unsigned char res;
    493    __asm__
    494    __volatile__("lock; btrl\t%2, %0\n\t"
    495                 "setc\t%1"
    496                 : "=m" (*base), "=q" (res)
    497                 : "r" (bitno));
    498    return res;
    499 }
    500 unsigned int btrw_mem ( UChar* base, int bitno )
    501 {
    502    unsigned char res;
    503    __asm__
    504    __volatile__("lock; btrw\t%w2, %0\n\t"
    505                 "setc\t%1"
    506                 : "=m" (*base), "=q" (res)
    507                 : "r" (bitno));
    508    return res;
    509 }
    510 
    511 unsigned int btcl_mem ( UChar* base, int bitno )
    512 {
    513    unsigned char res;
    514    __asm__
    515    __volatile__("lock; btcl\t%2, %0\n\t"
    516                 "setc\t%1"
    517                 : "=m" (*base), "=q" (res)
    518                 : "r" (bitno));
    519    return res;
    520 }
    521 unsigned int btcw_mem ( UChar* base, int bitno )
    522 {
    523    unsigned char res;
    524    __asm__
    525    __volatile__("lock; btcw\t%w2, %0\n\t"
    526                 "setc\t%1"
    527                 : "=m" (*base), "=q" (res)
    528                 : "r" (bitno));
    529    return res;
    530 }
    531 
    532 unsigned int btl_mem ( UChar* base, int bitno )
    533 {
    534    unsigned char res;
    535    __asm__
    536    __volatile__("btl\t%2, %0\n\t"
    537                 "setc\t%1"
    538                 : "=m" (*base), "=q" (res)
    539                 : "r" (bitno)
    540                 : "cc", "memory");
    541    return res;
    542 }
    543 unsigned int btw_mem ( UChar* base, int bitno )
    544 {
    545    unsigned char res;
    546    __asm__
    547    __volatile__("btw\t%w2, %0\n\t"
    548                 "setc\t%1"
    549                 : "=m" (*base), "=q" (res)
    550                 : "r" (bitno));
    551    return res;
    552 }
    553 
    554 ULong rol1 ( ULong x )
    555 {
    556   return (x << 1) | (x >> 63);
    557 }
    558 
    559 void do_bt_G_E_tests ( void )
    560 {
    561    UInt   n, bitoff, op;
    562    UInt   c;
    563    UChar* block;
    564    ULong  carrydep, res;;
    565 
    566    /*------------------------ MEM-L -----------------------*/
    567 
    568    carrydep = 0;
    569    block = calloc(200,1);
    570    block += 100;
    571    /* Valid bit offsets are -800 .. 799 inclusive. */
    572 
    573    for (n = 0; n < 10000; n++) {
    574       bitoff = (random() % 1600) - 800;
    575       op = random() % 4;
    576       c = 2;
    577       switch (op) {
    578          case 0: c = btsl_mem(block, bitoff); break;
    579          case 1: c = btrl_mem(block, bitoff); break;
    580          case 2: c = btcl_mem(block, bitoff); break;
    581          case 3: c = btl_mem(block, bitoff); break;
    582       }
    583       c &= 255;
    584       assert(c == 0 || c == 1);
    585       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
    586    }
    587 
    588    /* Compute final result */
    589    block -= 100;
    590    res = 0;
    591    for (n = 0; n < 200; n++) {
    592       UChar ch = block[n];
    593       /* printf("%d ", (int)block[n]); */
    594       res = rol1(res) ^ (ULong)ch;
    595    }
    596 
    597    send( sprintf(outBuf,
    598                  "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
    599                  res, carrydep ));
    600    free(block);
    601 
    602    /*------------------------ MEM-W -----------------------*/
    603 
    604    carrydep = 0;
    605    block = calloc(200,1);
    606    block += 100;
    607    /* Valid bit offsets are -800 .. 799 inclusive. */
    608 
    609    for (n = 0; n < 10000; n++) {
    610       bitoff = (random() % 1600) - 800;
    611       op = random() % 4;
    612       c = 2;
    613       switch (op) {
    614          case 0: c = btsw_mem(block, bitoff); break;
    615          case 1: c = btrw_mem(block, bitoff); break;
    616          case 2: c = btcw_mem(block, bitoff); break;
    617          case 3: c = btw_mem(block, bitoff); break;
    618       }
    619       c &= 255;
    620       assert(c == 0 || c == 1);
    621       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
    622    }
    623 
    624    /* Compute final result */
    625    block -= 100;
    626    res = 0;
    627    for (n = 0; n < 200; n++) {
    628       UChar ch = block[n];
    629       /* printf("%d ", (int)block[n]); */
    630       res = rol1(res) ^ (ULong)ch;
    631    }
    632 
    633    send( sprintf(outBuf,
    634                  "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
    635                  res, carrydep ));
    636    free(block);
    637 }
    638 
    639 
    640 /////////////////////////////////////////////////////////////////
    641 
    642 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
    643    also reconstruct the original bits 0, 1, 2, 3 by looking at the
    644    carry flag.  Returned result has mashed bits 0-3 at the bottom and
    645    the reconstructed original bits 0-3 as 4-7. */
    646 
    647 UInt mash_mem_L ( UInt* origp )
    648 {
    649   UInt reconstructed, mashed;
    650   __asm__ __volatile__ (
    651      "movl %2, %%edx\n\t"
    652      ""
    653      "movl $0, %%eax\n\t"
    654      "\n\t"
    655      "btl  $0, (%%edx)\n\t"
    656      "setb %%cl\n\t"
    657      "movzbl %%cl, %%ecx\n\t"
    658      "orl %%ecx, %%eax\n\t"
    659      "\n\t"
    660      "lock; btsl $1, (%%edx)\n\t"
    661      "setb %%cl\n\t"
    662      "movzbl %%cl, %%ecx\n\t"
    663      "shll $1, %%ecx\n\t"
    664      "orl %%ecx, %%eax\n\t"
    665      "\n\t"
    666      "lock; btrl $2, (%%edx)\n\t"
    667      "setb %%cl\n\t"
    668      "movzbl %%cl, %%ecx\n\t"
    669      "shll $2, %%ecx\n\t"
    670      "orl %%ecx, %%eax\n\t"
    671      "\n\t"
    672      "lock; btcl $3, (%%edx)\n\t"
    673      "setb %%cl\n\t"
    674      "movzbl %%cl, %%ecx\n\t"
    675      "shll $3, %%ecx\n\t"
    676      "orl %%ecx, %%eax\n\t"
    677      "\n\t"
    678      "movl %%eax, %0\n\t"
    679      "movl (%%edx), %1"
    680 
    681      : "=r" (reconstructed), "=r" (mashed)
    682      : "r" (origp)
    683      : "eax", "ecx", "edx", "cc");
    684   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
    685 }
    686 
    687 UInt mash_mem_W ( UShort* origp )
    688 {
    689   UInt reconstructed, mashed;
    690   __asm__ __volatile__ (
    691      "movl %2, %%edx\n\t"
    692      ""
    693      "movl $0, %%eax\n\t"
    694      "\n\t"
    695      "btw  $0, (%%edx)\n\t"
    696      "setb %%cl\n\t"
    697      "movzbl %%cl, %%ecx\n\t"
    698      "orl %%ecx, %%eax\n\t"
    699      "\n\t"
    700      "lock; btsw $1, (%%edx)\n\t"
    701      "setb %%cl\n\t"
    702      "movzbl %%cl, %%ecx\n\t"
    703      "shll $1, %%ecx\n\t"
    704      "orl %%ecx, %%eax\n\t"
    705      "\n\t"
    706      "lock; btrw $2, (%%edx)\n\t"
    707      "setb %%cl\n\t"
    708      "movzbl %%cl, %%ecx\n\t"
    709      "shll $2, %%ecx\n\t"
    710      "orl %%ecx, %%eax\n\t"
    711      "\n\t"
    712      "lock; btcw $3, (%%edx)\n\t"
    713      "setb %%cl\n\t"
    714      "movzbl %%cl, %%ecx\n\t"
    715      "shll $3, %%ecx\n\t"
    716      "orl %%ecx, %%eax\n\t"
    717      "\n\t"
    718      "movl %%eax, %0\n\t"
    719      "movzwl (%%edx), %1"
    720 
    721      : "=r" (reconstructed), "=r" (mashed)
    722      : "r" (origp)
    723      : "eax", "ecx", "edx", "cc");
    724   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
    725 }
    726 
    727 
    728 void do_bt_imm_E_tests( void )
    729 {
    730   int i;
    731   UInt*   iil = malloc(sizeof(UInt));
    732   UShort* iiw = malloc(sizeof(UShort));
    733   for (i = 0; i < 0x10; i++) {
    734     *iil = i;
    735     *iiw = i;
    736     send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
    737                   mash_mem_L(iil), mash_mem_W(iiw)));
    738   }
    739   free(iil);
    740   free(iiw);
    741 }
    742 
    743 
    744 
    745 /////////////////////////////////////////////////////////////////
    746 
    747 int main ( void )
    748 {
    749   do_locked_G_E_addb();
    750   do_locked_G_E_addw();
    751   do_locked_G_E_addl();
    752 
    753   do_locked_G_E_orb();
    754   do_locked_G_E_orw();
    755   do_locked_G_E_orl();
    756 
    757   do_locked_G_E_adcb();
    758   do_locked_G_E_adcw();
    759   do_locked_G_E_adcl();
    760 
    761   do_locked_G_E_sbbb();
    762   do_locked_G_E_sbbw();
    763   do_locked_G_E_sbbl();
    764 
    765   do_locked_G_E_andb();
    766   do_locked_G_E_andw();
    767   do_locked_G_E_andl();
    768 
    769   do_locked_G_E_subb();
    770   do_locked_G_E_subw();
    771   do_locked_G_E_subl();
    772 
    773   do_locked_G_E_xorb();
    774   do_locked_G_E_xorw();
    775   do_locked_G_E_xorl();
    776   //21
    777   do_locked_imm_E_addb_0x7F();
    778   do_locked_imm_E_addb_0xF1();
    779   do_locked_imm_E_addw_0x7E();
    780   do_locked_imm_E_addw_0x9325();
    781   do_locked_imm_E_addl_0x7D();
    782   do_locked_imm_E_addl_0x31415927();
    783 
    784   do_locked_imm_E_orb_0x7F();
    785   do_locked_imm_E_orb_0xF1();
    786   do_locked_imm_E_orw_0x7E();
    787   do_locked_imm_E_orw_0x9325();
    788   do_locked_imm_E_orl_0x7D();
    789   do_locked_imm_E_orl_0x31415927();
    790 
    791   do_locked_imm_E_adcb_0x7F();
    792   do_locked_imm_E_adcb_0xF1();
    793   do_locked_imm_E_adcw_0x7E();
    794   do_locked_imm_E_adcw_0x9325();
    795   do_locked_imm_E_adcl_0x7D();
    796   do_locked_imm_E_adcl_0x31415927();
    797 
    798   do_locked_imm_E_sbbb_0x7F();
    799   do_locked_imm_E_sbbb_0xF1();
    800   do_locked_imm_E_sbbw_0x7E();
    801   do_locked_imm_E_sbbw_0x9325();
    802   do_locked_imm_E_sbbl_0x7D();
    803   do_locked_imm_E_sbbl_0x31415927();
    804 
    805   do_locked_imm_E_andb_0x7F();
    806   do_locked_imm_E_andb_0xF1();
    807   do_locked_imm_E_andw_0x7E();
    808   do_locked_imm_E_andw_0x9325();
    809   do_locked_imm_E_andl_0x7D();
    810   do_locked_imm_E_andl_0x31415927();
    811 
    812   do_locked_imm_E_subb_0x7F();
    813   do_locked_imm_E_subb_0xF1();
    814   do_locked_imm_E_subw_0x7E();
    815   do_locked_imm_E_subw_0x9325();
    816   do_locked_imm_E_subl_0x7D();
    817   do_locked_imm_E_subl_0x31415927();
    818 
    819   do_locked_imm_E_xorb_0x7F();
    820   do_locked_imm_E_xorb_0xF1();
    821   do_locked_imm_E_xorw_0x7E();
    822   do_locked_imm_E_xorw_0x9325();
    823   do_locked_imm_E_xorl_0x7D();
    824   do_locked_imm_E_xorl_0x31415927();
    825   // 63
    826   do_locked_unary_E_decb();
    827   do_locked_unary_E_decw();
    828   do_locked_unary_E_decl();
    829 
    830   do_locked_unary_E_incb();
    831   do_locked_unary_E_incw();
    832   do_locked_unary_E_incl();
    833 
    834   do_locked_unary_E_negb();
    835   do_locked_unary_E_negw();
    836   do_locked_unary_E_negl();
    837 
    838   do_locked_unary_E_notb();
    839   do_locked_unary_E_notw();
    840   do_locked_unary_E_notl();
    841   // 75
    842   do_bt_G_E_tests();
    843   // 81
    844   do_bt_imm_E_tests();
    845   // 87
    846   // So there should be 87 lock-prefixed instructions in the
    847   // disassembly of this compilation unit.
    848   // confirm with
    849   // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
    850 
    851   { UInt crcExpd = 0x8235DC9C;
    852     theCRC = crcFinalise( theCRC );
    853     if (theCRC == crcExpd) {
    854        printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
    855               theCRC, crcExpd);
    856     } else {
    857        printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
    858               theCRC, crcExpd);
    859        printf("x86locked: set #define VERBOSE 1 to diagnose\n");
    860     }
    861   }
    862 
    863   return 0;
    864 }
    865