Home | History | Annotate | Download | only in x86
      1 
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <assert.h>
      5 
      6 #define VERBOSE 0
      7 
      8 typedef  unsigned int            UInt;
      9 typedef  unsigned char           UChar;
     10 typedef  unsigned long long int  ULong;
     11 typedef  signed long long int    Long;
     12 typedef  signed int              Int;
     13 typedef  unsigned short          UShort;
     14 typedef  unsigned long           UWord;
     15 typedef  char                    HChar;
     16 
     17 unsigned myrandom(void)
     18 {
     19    /* Simple multiply-with-carry random generator. */
     20    static unsigned m_w = 11;
     21    static unsigned m_z = 13;
     22 
     23    m_z = 36969 * (m_z & 65535) + (m_z >> 16);
     24    m_w = 18000 * (m_w & 65535) + (m_w >> 16);
     25 
     26    return (m_z << 16) + m_w;
     27 }
     28 
     29 /////////////////////////////////////////////////////////////////
     30 // BEGIN crc32 stuff                                           //
     31 /////////////////////////////////////////////////////////////////
     32 
     33 static const UInt crc32Table[256] = {
     34 
     35    /*-- Ugly, innit? --*/
     36 
     37    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
     38    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
     39    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
     40    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
     41    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
     42    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
     43    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
     44    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
     45    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
     46    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
     47    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
     48    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
     49    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
     50    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
     51    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
     52    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
     53    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
     54    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
     55    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
     56    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
     57    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
     58    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
     59    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
     60    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
     61    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
     62    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
     63    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
     64    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
     65    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
     66    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
     67    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
     68    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
     69    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
     70    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
     71    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
     72    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
     73    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
     74    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
     75    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
     76    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
     77    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
     78    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
     79    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
     80    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
     81    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
     82    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
     83    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
     84    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
     85    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
     86    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
     87    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
     88    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
     89    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
     90    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
     91    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
     92    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
     93    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
     94    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
     95    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
     96    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
     97    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
     98    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
     99    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
    100    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
    101 };
    102 
    103 #define UPDATE_CRC(crcVar,cha)                 \
    104 {                                              \
    105    crcVar = (crcVar << 8) ^                    \
    106             crc32Table[(crcVar >> 24) ^        \
    107                        ((UChar)cha)];          \
    108 }
    109 
    110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
    111 {
    112    UInt crc = crcIn;
    113    while (nBytes >= 4) {
    114       UPDATE_CRC(crc, bytes[0]);
    115       UPDATE_CRC(crc, bytes[1]);
    116       UPDATE_CRC(crc, bytes[2]);
    117       UPDATE_CRC(crc, bytes[3]);
    118       bytes += 4;
    119       nBytes -= 4;
    120    }
    121    while (nBytes >= 1) {
    122       UPDATE_CRC(crc, bytes[0]);
    123       bytes += 1;
    124       nBytes -= 1;
    125    }
    126    return crc;
    127 }
    128 
    129 static UInt crcFinalise ( UInt crc ) {
    130    return ~crc;
    131 }
    132 
    133 ////////
    134 
    135 static UInt theCRC = 0xFFFFFFFF;
    136 
    137 static HChar outBuf[1024];
    138 // take output that's in outBuf, length as specified, and
    139 // update the running crc.
    140 static void send ( int nbytes )
    141 {
    142    assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
    143    assert(outBuf[nbytes] == 0);
    144    theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
    145    if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
    146 }
    147 
    148 
    149 /////////////////////////////////////////////////////////////////
    150 // END crc32 stuff                                             //
    151 /////////////////////////////////////////////////////////////////
    152 
    153 #if 0
    154 
    155 // full version
    156 #define NVALS 57
    157 
    158 static unsigned int val[NVALS]
    159     = { 0x00, 0x01, 0x02, 0x03,
    160         0x3F, 0x40, 0x41,
    161         0x7E, 0x7F, 0x80, 0x81, 0x82,
    162         0xBF, 0xC0, 0xC1,
    163         0xFC, 0xFD, 0xFE, 0xFF,
    164 
    165         0xFF00, 0xFF01, 0xFF02, 0xFF03,
    166         0xFF3F, 0xFF40, 0xFF41,
    167         0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
    168         0xFFBF, 0xFFC0, 0xFFC1,
    169         0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
    170 
    171         0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
    172         0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
    173         0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
    174         0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
    175         0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
    176       };
    177 
    178 #else
    179 
    180 // shortened version, for use as valgrind regtest
    181 #define NVALS 27
    182 
    183 static unsigned int val[NVALS]
    184     = { 0x00, 0x01,
    185         0x3F, 0x40,
    186         0x7F, 0x80,
    187         0xBF, 0xC0,
    188         0xFF,
    189 
    190         0xFF00, 0xFF01,
    191         0xFF3F, 0xFF40,
    192         0xFF7F, 0xFF80,
    193         0xFFBF, 0xFFC0,
    194         0xFFFF,
    195 
    196         0xFFFFFF00, 0xFFFFFF01,
    197         0xFFFFFF3F, 0xFFFFFF40,
    198         0xFFFFFF7F, 0xFFFFFF80,
    199         0xFFFFFFBF, 0xFFFFFFC0,
    200         0xFFFFFFFF
    201       };
    202 
    203 #endif
    204 
    205 /////////////////////////////////////
    206 
    207 #define CC_C    0x0001
    208 #define CC_P    0x0004
    209 #define CC_A    0x0010
    210 #define CC_Z    0x0040
    211 #define CC_S    0x0080
    212 #define CC_O    0x0800
    213 
    214 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
    215 
    216 #define GEN_do_locked_G_E(_name,_eax)   \
    217   \
    218   __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
    219   {   \
    220     volatile int e_val, g_val, e_val_before;   \
    221     int o, s, z, a, c, p, v1, v2, flags_in;   \
    222     int block[4];   \
    223     \
    224     for (v1 = 0; v1 < NVALS; v1++) {   \
    225     for (v2 = 0; v2 < NVALS; v2++) {   \
    226     \
    227     for (o = 0; o < 2; o++) {   \
    228     for (s = 0; s < 2; s++) {   \
    229     for (z = 0; z < 2; z++) {   \
    230     for (a = 0; a < 2; a++) {   \
    231     for (c = 0; c < 2; c++) {   \
    232     for (p = 0; p < 2; p++) {   \
    233       \
    234       flags_in = (o ? CC_O : 0)   \
    235                | (s ? CC_S : 0)   \
    236                | (z ? CC_Z : 0)   \
    237                | (a ? CC_A : 0)   \
    238                | (c ? CC_C : 0)   \
    239                | (p ? CC_P : 0);   \
    240       \
    241       g_val = val[v1];   \
    242       e_val = val[v2];   \
    243       e_val_before = e_val;   \
    244       \
    245       block[0] = flags_in;   \
    246       block[1] = g_val;   \
    247       block[2] = (int)(long)&e_val;   \
    248       block[3] = 0;   \
    249       __asm__ __volatile__(   \
    250           "movl 0(%0), %%eax\n\t"   \
    251           "pushl %%eax\n\t"   \
    252           "popfl\n\t"   \
    253           "movl 4(%0), %%eax\n\t"   \
    254           "movl 8(%0), %%ebx\n\t"   \
    255           "lock; " #_name " %%" #_eax ",(%%ebx)\n\t"   \
    256           "pushfl\n\t"   \
    257           "popl %%eax\n\t"   \
    258           "movl %%eax, 12(%0)\n\t"   \
    259           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    260       );   \
    261       \
    262       send( \
    263          sprintf(outBuf,                                        \
    264                  "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
    265                  #_name, g_val, e_val_before, flags_in,   \
    266                  e_val, block[3] & CC_MASK) );            \
    267       \
    268     }}}}}}   \
    269     \
    270     }}   \
    271   }
    272 
    273 GEN_do_locked_G_E(addb,al)
    274 GEN_do_locked_G_E(addw,ax)
    275 GEN_do_locked_G_E(addl,eax)
    276 
    277 GEN_do_locked_G_E(orb, al)
    278 GEN_do_locked_G_E(orw, ax)
    279 GEN_do_locked_G_E(orl, eax)
    280 
    281 GEN_do_locked_G_E(adcb,al)
    282 GEN_do_locked_G_E(adcw,ax)
    283 GEN_do_locked_G_E(adcl,eax)
    284 
    285 GEN_do_locked_G_E(sbbb,al)
    286 GEN_do_locked_G_E(sbbw,ax)
    287 GEN_do_locked_G_E(sbbl,eax)
    288 
    289 GEN_do_locked_G_E(andb,al)
    290 GEN_do_locked_G_E(andw,ax)
    291 GEN_do_locked_G_E(andl,eax)
    292 
    293 GEN_do_locked_G_E(subb,al)
    294 GEN_do_locked_G_E(subw,ax)
    295 GEN_do_locked_G_E(subl,eax)
    296 
    297 GEN_do_locked_G_E(xorb,al)
    298 GEN_do_locked_G_E(xorw,ax)
    299 GEN_do_locked_G_E(xorl,eax)
    300 
    301 
    302 
    303 
    304 #define GEN_do_locked_imm_E(_name,_eax,_imm)        \
    305   \
    306   __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
    307   {   \
    308     volatile int e_val, e_val_before;   \
    309     int o, s, z, a, c, p, v2, flags_in;   \
    310     int block[3];   \
    311     \
    312     for (v2 = 0; v2 < NVALS; v2++) {   \
    313     \
    314     for (o = 0; o < 2; o++) {   \
    315     for (s = 0; s < 2; s++) {   \
    316     for (z = 0; z < 2; z++) {   \
    317     for (a = 0; a < 2; a++) {   \
    318     for (c = 0; c < 2; c++) {   \
    319     for (p = 0; p < 2; p++) {   \
    320       \
    321       flags_in = (o ? CC_O : 0)   \
    322                | (s ? CC_S : 0)   \
    323                | (z ? CC_Z : 0)   \
    324                | (a ? CC_A : 0)   \
    325                | (c ? CC_C : 0)   \
    326                | (p ? CC_P : 0);   \
    327       \
    328       e_val = val[v2];   \
    329       e_val_before = e_val;   \
    330       \
    331       block[0] = flags_in;   \
    332       block[1] = (int)(long)&e_val;   \
    333       block[2] = 0;   \
    334       __asm__ __volatile__(   \
    335           "movl 0(%0), %%eax\n\t"   \
    336           "pushl %%eax\n\t"   \
    337           "popfl\n\t"   \
    338           "movl 4(%0), %%ebx\n\t"   \
    339           "lock; " #_name " $" #_imm ",(%%ebx)\n\t"   \
    340           "pushfl\n\t"   \
    341           "popl %%eax\n\t"   \
    342           "movl %%eax, 8(%0)\n\t"   \
    343           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    344       );   \
    345       \
    346       send( \
    347         sprintf(outBuf, \
    348              "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",       \
    349              #_name, #_imm, e_val_before, flags_in,         \
    350                 e_val, block[2] & CC_MASK) );               \
    351       \
    352     }}}}}}   \
    353     \
    354     }   \
    355   }
    356 
    357 GEN_do_locked_imm_E(addb,al,0x7F)
    358 GEN_do_locked_imm_E(addb,al,0xF1)
    359 GEN_do_locked_imm_E(addw,ax,0x7E)
    360 GEN_do_locked_imm_E(addw,ax,0x9325)
    361 GEN_do_locked_imm_E(addl,eax,0x7D)
    362 GEN_do_locked_imm_E(addl,eax,0x31415927)
    363 
    364 GEN_do_locked_imm_E(orb,al,0x7F)
    365 GEN_do_locked_imm_E(orb,al,0xF1)
    366 GEN_do_locked_imm_E(orw,ax,0x7E)
    367 GEN_do_locked_imm_E(orw,ax,0x9325)
    368 GEN_do_locked_imm_E(orl,eax,0x7D)
    369 GEN_do_locked_imm_E(orl,eax,0x31415927)
    370 
    371 GEN_do_locked_imm_E(adcb,al,0x7F)
    372 GEN_do_locked_imm_E(adcb,al,0xF1)
    373 GEN_do_locked_imm_E(adcw,ax,0x7E)
    374 GEN_do_locked_imm_E(adcw,ax,0x9325)
    375 GEN_do_locked_imm_E(adcl,eax,0x7D)
    376 GEN_do_locked_imm_E(adcl,eax,0x31415927)
    377 
    378 GEN_do_locked_imm_E(sbbb,al,0x7F)
    379 GEN_do_locked_imm_E(sbbb,al,0xF1)
    380 GEN_do_locked_imm_E(sbbw,ax,0x7E)
    381 GEN_do_locked_imm_E(sbbw,ax,0x9325)
    382 GEN_do_locked_imm_E(sbbl,eax,0x7D)
    383 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
    384 
    385 GEN_do_locked_imm_E(andb,al,0x7F)
    386 GEN_do_locked_imm_E(andb,al,0xF1)
    387 GEN_do_locked_imm_E(andw,ax,0x7E)
    388 GEN_do_locked_imm_E(andw,ax,0x9325)
    389 GEN_do_locked_imm_E(andl,eax,0x7D)
    390 GEN_do_locked_imm_E(andl,eax,0x31415927)
    391 
    392 GEN_do_locked_imm_E(subb,al,0x7F)
    393 GEN_do_locked_imm_E(subb,al,0xF1)
    394 GEN_do_locked_imm_E(subw,ax,0x7E)
    395 GEN_do_locked_imm_E(subw,ax,0x9325)
    396 GEN_do_locked_imm_E(subl,eax,0x7D)
    397 GEN_do_locked_imm_E(subl,eax,0x31415927)
    398 
    399 GEN_do_locked_imm_E(xorb,al,0x7F)
    400 GEN_do_locked_imm_E(xorb,al,0xF1)
    401 GEN_do_locked_imm_E(xorw,ax,0x7E)
    402 GEN_do_locked_imm_E(xorw,ax,0x9325)
    403 GEN_do_locked_imm_E(xorl,eax,0x7D)
    404 GEN_do_locked_imm_E(xorl,eax,0x31415927)
    405 
    406 #define GEN_do_locked_unary_E(_name,_eax)        \
    407   \
    408   __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
    409   {   \
    410     volatile int e_val, e_val_before;   \
    411     int o, s, z, a, c, p, v2, flags_in;   \
    412     int block[3];   \
    413     \
    414     for (v2 = 0; v2 < NVALS; v2++) {   \
    415     \
    416     for (o = 0; o < 2; o++) {   \
    417     for (s = 0; s < 2; s++) {   \
    418     for (z = 0; z < 2; z++) {   \
    419     for (a = 0; a < 2; a++) {   \
    420     for (c = 0; c < 2; c++) {   \
    421     for (p = 0; p < 2; p++) {   \
    422       \
    423       flags_in = (o ? CC_O : 0)   \
    424                | (s ? CC_S : 0)   \
    425                | (z ? CC_Z : 0)   \
    426                | (a ? CC_A : 0)   \
    427                | (c ? CC_C : 0)   \
    428                | (p ? CC_P : 0);   \
    429       \
    430       e_val = val[v2];   \
    431       e_val_before = e_val;   \
    432       \
    433       block[0] = flags_in;   \
    434       block[1] = (int)(long)&e_val;   \
    435       block[2] = 0;   \
    436       __asm__ __volatile__(   \
    437           "movl 0(%0), %%eax\n\t"   \
    438           "pushl %%eax\n\t"   \
    439           "popfl\n\t"   \
    440           "movl 4(%0), %%ebx\n\t"   \
    441           "lock; " #_name " (%%ebx)\n\t"   \
    442           "pushfl\n\t"   \
    443           "popl %%eax\n\t"   \
    444           "movl %%eax, 8(%0)\n\t"   \
    445           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
    446       );   \
    447       \
    448       send( \
    449          sprintf(outBuf, \
    450                 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",   \
    451              #_name, e_val_before, flags_in,         \
    452                 e_val, block[2] & CC_MASK));         \
    453       \
    454     }}}}}}   \
    455     \
    456     }   \
    457   }
    458 
    459 GEN_do_locked_unary_E(decb,al)
    460 GEN_do_locked_unary_E(decw,ax)
    461 GEN_do_locked_unary_E(decl,eax)
    462 
    463 GEN_do_locked_unary_E(incb,al)
    464 GEN_do_locked_unary_E(incw,ax)
    465 GEN_do_locked_unary_E(incl,eax)
    466 
    467 GEN_do_locked_unary_E(negb,al)
    468 GEN_do_locked_unary_E(negw,ax)
    469 GEN_do_locked_unary_E(negl,eax)
    470 
    471 GEN_do_locked_unary_E(notb,al)
    472 GEN_do_locked_unary_E(notw,ax)
    473 GEN_do_locked_unary_E(notl,eax)
    474 
    475 
    476 /////////////////////////////////////////////////////////////////
    477 
    478 unsigned int btsl_mem ( UChar* base, int bitno )
    479 {
    480    unsigned char res;
    481    __asm__
    482    __volatile__("lock; btsl\t%2, %0\n\t"
    483                 "setc\t%1"
    484                 : "=m" (*base), "=q" (res)
    485                 : "r" (bitno));
    486    /* Pretty meaningless to dereference base here, but that's what you
    487       have to do to get a btsl insn which refers to memory starting at
    488       base. */
    489    return res;
    490 }
    491 unsigned int btsw_mem ( UChar* base, int bitno )
    492 {
    493    unsigned char res;
    494    __asm__
    495    __volatile__("lock; btsw\t%w2, %0\n\t"
    496                 "setc\t%1"
    497                 : "=m" (*base), "=q" (res)
    498                 : "r" (bitno));
    499    return res;
    500 }
    501 
    502 unsigned int btrl_mem ( UChar* base, int bitno )
    503 {
    504    unsigned char res;
    505    __asm__
    506    __volatile__("lock; btrl\t%2, %0\n\t"
    507                 "setc\t%1"
    508                 : "=m" (*base), "=q" (res)
    509                 : "r" (bitno));
    510    return res;
    511 }
    512 unsigned int btrw_mem ( UChar* base, int bitno )
    513 {
    514    unsigned char res;
    515    __asm__
    516    __volatile__("lock; btrw\t%w2, %0\n\t"
    517                 "setc\t%1"
    518                 : "=m" (*base), "=q" (res)
    519                 : "r" (bitno));
    520    return res;
    521 }
    522 
    523 unsigned int btcl_mem ( UChar* base, int bitno )
    524 {
    525    unsigned char res;
    526    __asm__
    527    __volatile__("lock; btcl\t%2, %0\n\t"
    528                 "setc\t%1"
    529                 : "=m" (*base), "=q" (res)
    530                 : "r" (bitno));
    531    return res;
    532 }
    533 unsigned int btcw_mem ( UChar* base, int bitno )
    534 {
    535    unsigned char res;
    536    __asm__
    537    __volatile__("lock; btcw\t%w2, %0\n\t"
    538                 "setc\t%1"
    539                 : "=m" (*base), "=q" (res)
    540                 : "r" (bitno));
    541    return res;
    542 }
    543 
    544 unsigned int btl_mem ( UChar* base, int bitno )
    545 {
    546    unsigned char res;
    547    __asm__
    548    __volatile__("btl\t%2, %0\n\t"
    549                 "setc\t%1"
    550                 : "=m" (*base), "=q" (res)
    551                 : "r" (bitno)
    552                 : "cc", "memory");
    553    return res;
    554 }
    555 unsigned int btw_mem ( UChar* base, int bitno )
    556 {
    557    unsigned char res;
    558    __asm__
    559    __volatile__("btw\t%w2, %0\n\t"
    560                 "setc\t%1"
    561                 : "=m" (*base), "=q" (res)
    562                 : "r" (bitno));
    563    return res;
    564 }
    565 
    566 ULong rol1 ( ULong x )
    567 {
    568   return (x << 1) | (x >> 63);
    569 }
    570 
    571 void do_bt_G_E_tests ( void )
    572 {
    573    UInt   n, bitoff, op;
    574    UInt   c;
    575    UChar* block;
    576    ULong  carrydep, res;;
    577 
    578    /*------------------------ MEM-L -----------------------*/
    579 
    580    carrydep = 0;
    581    block = calloc(200,1);
    582    block += 100;
    583    /* Valid bit offsets are -800 .. 799 inclusive. */
    584 
    585    for (n = 0; n < 10000; n++) {
    586       bitoff = (myrandom() % 1600) - 800;
    587       op = myrandom() % 4;
    588       c = 2;
    589       switch (op) {
    590          case 0: c = btsl_mem(block, bitoff); break;
    591          case 1: c = btrl_mem(block, bitoff); break;
    592          case 2: c = btcl_mem(block, bitoff); break;
    593          case 3: c = btl_mem(block, bitoff); break;
    594       }
    595       c &= 255;
    596       assert(c == 0 || c == 1);
    597       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
    598    }
    599 
    600    /* Compute final result */
    601    block -= 100;
    602    res = 0;
    603    for (n = 0; n < 200; n++) {
    604       UChar ch = block[n];
    605       /* printf("%d ", (int)block[n]); */
    606       res = rol1(res) ^ (ULong)ch;
    607    }
    608 
    609    send( sprintf(outBuf,
    610                  "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
    611                  res, carrydep ));
    612    free(block);
    613 
    614    /*------------------------ MEM-W -----------------------*/
    615 
    616    carrydep = 0;
    617    block = calloc(200,1);
    618    block += 100;
    619    /* Valid bit offsets are -800 .. 799 inclusive. */
    620 
    621    for (n = 0; n < 10000; n++) {
    622       bitoff = (myrandom() % 1600) - 800;
    623       op = myrandom() % 4;
    624       c = 2;
    625       switch (op) {
    626          case 0: c = btsw_mem(block, bitoff); break;
    627          case 1: c = btrw_mem(block, bitoff); break;
    628          case 2: c = btcw_mem(block, bitoff); break;
    629          case 3: c = btw_mem(block, bitoff); break;
    630       }
    631       c &= 255;
    632       assert(c == 0 || c == 1);
    633       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
    634    }
    635 
    636    /* Compute final result */
    637    block -= 100;
    638    res = 0;
    639    for (n = 0; n < 200; n++) {
    640       UChar ch = block[n];
    641       /* printf("%d ", (int)block[n]); */
    642       res = rol1(res) ^ (ULong)ch;
    643    }
    644 
    645    send( sprintf(outBuf,
    646                  "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
    647                  res, carrydep ));
    648    free(block);
    649 }
    650 
    651 
    652 /////////////////////////////////////////////////////////////////
    653 
    654 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
    655    also reconstruct the original bits 0, 1, 2, 3 by looking at the
    656    carry flag.  Returned result has mashed bits 0-3 at the bottom and
    657    the reconstructed original bits 0-3 as 4-7. */
    658 
    659 UInt mash_mem_L ( UInt* origp )
    660 {
    661   UInt reconstructed, mashed;
    662   __asm__ __volatile__ (
    663      "movl %2, %%edx\n\t"
    664      ""
    665      "movl $0, %%eax\n\t"
    666      "\n\t"
    667      "btl  $0, (%%edx)\n\t"
    668      "setb %%cl\n\t"
    669      "movzbl %%cl, %%ecx\n\t"
    670      "orl %%ecx, %%eax\n\t"
    671      "\n\t"
    672      "lock; btsl $1, (%%edx)\n\t"
    673      "setb %%cl\n\t"
    674      "movzbl %%cl, %%ecx\n\t"
    675      "shll $1, %%ecx\n\t"
    676      "orl %%ecx, %%eax\n\t"
    677      "\n\t"
    678      "lock; btrl $2, (%%edx)\n\t"
    679      "setb %%cl\n\t"
    680      "movzbl %%cl, %%ecx\n\t"
    681      "shll $2, %%ecx\n\t"
    682      "orl %%ecx, %%eax\n\t"
    683      "\n\t"
    684      "lock; btcl $3, (%%edx)\n\t"
    685      "setb %%cl\n\t"
    686      "movzbl %%cl, %%ecx\n\t"
    687      "shll $3, %%ecx\n\t"
    688      "orl %%ecx, %%eax\n\t"
    689      "\n\t"
    690      "movl %%eax, %0\n\t"
    691      "movl (%%edx), %1"
    692 
    693      : "=r" (reconstructed), "=r" (mashed)
    694      : "r" (origp)
    695      : "eax", "ecx", "edx", "cc");
    696   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
    697 }
    698 
    699 UInt mash_mem_W ( UShort* origp )
    700 {
    701   UInt reconstructed, mashed;
    702   __asm__ __volatile__ (
    703      "movl %2, %%edx\n\t"
    704      ""
    705      "movl $0, %%eax\n\t"
    706      "\n\t"
    707      "btw  $0, (%%edx)\n\t"
    708      "setb %%cl\n\t"
    709      "movzbl %%cl, %%ecx\n\t"
    710      "orl %%ecx, %%eax\n\t"
    711      "\n\t"
    712      "lock; btsw $1, (%%edx)\n\t"
    713      "setb %%cl\n\t"
    714      "movzbl %%cl, %%ecx\n\t"
    715      "shll $1, %%ecx\n\t"
    716      "orl %%ecx, %%eax\n\t"
    717      "\n\t"
    718      "lock; btrw $2, (%%edx)\n\t"
    719      "setb %%cl\n\t"
    720      "movzbl %%cl, %%ecx\n\t"
    721      "shll $2, %%ecx\n\t"
    722      "orl %%ecx, %%eax\n\t"
    723      "\n\t"
    724      "lock; btcw $3, (%%edx)\n\t"
    725      "setb %%cl\n\t"
    726      "movzbl %%cl, %%ecx\n\t"
    727      "shll $3, %%ecx\n\t"
    728      "orl %%ecx, %%eax\n\t"
    729      "\n\t"
    730      "movl %%eax, %0\n\t"
    731      "movzwl (%%edx), %1"
    732 
    733      : "=r" (reconstructed), "=r" (mashed)
    734      : "r" (origp)
    735      : "eax", "ecx", "edx", "cc");
    736   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
    737 }
    738 
    739 
    740 void do_bt_imm_E_tests( void )
    741 {
    742   int i;
    743   UInt*   iil = malloc(sizeof(UInt));
    744   UShort* iiw = malloc(sizeof(UShort));
    745   for (i = 0; i < 0x10; i++) {
    746     *iil = i;
    747     *iiw = i;
    748     send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
    749                   mash_mem_L(iil), mash_mem_W(iiw)));
    750   }
    751   free(iil);
    752   free(iiw);
    753 }
    754 
    755 
    756 
    757 /////////////////////////////////////////////////////////////////
    758 
    759 int main ( void )
    760 {
    761   do_locked_G_E_addb();
    762   do_locked_G_E_addw();
    763   do_locked_G_E_addl();
    764 
    765   do_locked_G_E_orb();
    766   do_locked_G_E_orw();
    767   do_locked_G_E_orl();
    768 
    769   do_locked_G_E_adcb();
    770   do_locked_G_E_adcw();
    771   do_locked_G_E_adcl();
    772 
    773   do_locked_G_E_sbbb();
    774   do_locked_G_E_sbbw();
    775   do_locked_G_E_sbbl();
    776 
    777   do_locked_G_E_andb();
    778   do_locked_G_E_andw();
    779   do_locked_G_E_andl();
    780 
    781   do_locked_G_E_subb();
    782   do_locked_G_E_subw();
    783   do_locked_G_E_subl();
    784 
    785   do_locked_G_E_xorb();
    786   do_locked_G_E_xorw();
    787   do_locked_G_E_xorl();
    788   //21
    789   do_locked_imm_E_addb_0x7F();
    790   do_locked_imm_E_addb_0xF1();
    791   do_locked_imm_E_addw_0x7E();
    792   do_locked_imm_E_addw_0x9325();
    793   do_locked_imm_E_addl_0x7D();
    794   do_locked_imm_E_addl_0x31415927();
    795 
    796   do_locked_imm_E_orb_0x7F();
    797   do_locked_imm_E_orb_0xF1();
    798   do_locked_imm_E_orw_0x7E();
    799   do_locked_imm_E_orw_0x9325();
    800   do_locked_imm_E_orl_0x7D();
    801   do_locked_imm_E_orl_0x31415927();
    802 
    803   do_locked_imm_E_adcb_0x7F();
    804   do_locked_imm_E_adcb_0xF1();
    805   do_locked_imm_E_adcw_0x7E();
    806   do_locked_imm_E_adcw_0x9325();
    807   do_locked_imm_E_adcl_0x7D();
    808   do_locked_imm_E_adcl_0x31415927();
    809 
    810   do_locked_imm_E_sbbb_0x7F();
    811   do_locked_imm_E_sbbb_0xF1();
    812   do_locked_imm_E_sbbw_0x7E();
    813   do_locked_imm_E_sbbw_0x9325();
    814   do_locked_imm_E_sbbl_0x7D();
    815   do_locked_imm_E_sbbl_0x31415927();
    816 
    817   do_locked_imm_E_andb_0x7F();
    818   do_locked_imm_E_andb_0xF1();
    819   do_locked_imm_E_andw_0x7E();
    820   do_locked_imm_E_andw_0x9325();
    821   do_locked_imm_E_andl_0x7D();
    822   do_locked_imm_E_andl_0x31415927();
    823 
    824   do_locked_imm_E_subb_0x7F();
    825   do_locked_imm_E_subb_0xF1();
    826   do_locked_imm_E_subw_0x7E();
    827   do_locked_imm_E_subw_0x9325();
    828   do_locked_imm_E_subl_0x7D();
    829   do_locked_imm_E_subl_0x31415927();
    830 
    831   do_locked_imm_E_xorb_0x7F();
    832   do_locked_imm_E_xorb_0xF1();
    833   do_locked_imm_E_xorw_0x7E();
    834   do_locked_imm_E_xorw_0x9325();
    835   do_locked_imm_E_xorl_0x7D();
    836   do_locked_imm_E_xorl_0x31415927();
    837   // 63
    838   do_locked_unary_E_decb();
    839   do_locked_unary_E_decw();
    840   do_locked_unary_E_decl();
    841 
    842   do_locked_unary_E_incb();
    843   do_locked_unary_E_incw();
    844   do_locked_unary_E_incl();
    845 
    846   do_locked_unary_E_negb();
    847   do_locked_unary_E_negw();
    848   do_locked_unary_E_negl();
    849 
    850   do_locked_unary_E_notb();
    851   do_locked_unary_E_notw();
    852   do_locked_unary_E_notl();
    853   // 75
    854   do_bt_G_E_tests();
    855   // 81
    856   do_bt_imm_E_tests();
    857   // 87
    858   // So there should be 87 lock-prefixed instructions in the
    859   // disassembly of this compilation unit.
    860   // confirm with
    861   // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
    862 
    863   { UInt crcExpd = 0xB2D75045;
    864     theCRC = crcFinalise( theCRC );
    865     if (theCRC == crcExpd) {
    866        printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
    867               theCRC, crcExpd);
    868     } else {
    869        printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
    870               theCRC, crcExpd);
    871        printf("x86locked: set #define VERBOSE 1 to diagnose\n");
    872     }
    873   }
    874 
    875   return 0;
    876 }
    877