Home | History | Annotate | Download | only in amd64
      1 
      2 #include <stdio.h>
      3 
      4 typedef  unsigned long long int  ULong;
      5 typedef  unsigned int            UInt;
      6 
      7 __attribute__((noinline))
      8 void do_andn64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg1, ULong arg2 )
      9 {
     10   ULong tem, flag;
     11   __asm__ __volatile__(
     12     "movabsq $0x5555555555555555, %0" "\n\t"
     13     "andn %2, %3, %0"         "\n\t"
     14     "pushfq"		      "\n\t"
     15     "popq %1"                 "\n"
     16     : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
     17   );
     18   *res = tem;
     19   *flags = flag & 0x8d5;
     20   __asm__ __volatile__(
     21     "movabsq $0x5555555555555555, %0" "\n\t"
     22     "andn %2, %3, %0"         "\n\t"
     23     "pushfq"		      "\n\t"
     24     "popq %1"                 "\n"
     25     : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
     26   );
     27   if (*res != tem || *flags != (flag & 0x8d5))
     28      printf ("Difference between r and m variants\n");
     29 }
     30 
     31 __attribute__((noinline))
     32 void do_andn32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg1, UInt arg2 )
     33 {
     34   ULong tem, flag;
     35   __asm__ __volatile__(
     36     "movabsq $0x5555555555555555, %0" "\n\t"
     37     "andn %2, %3, %k0"        "\n\t"
     38     "pushfq"		      "\n\t"
     39     "popq %1"                 "\n"
     40     : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
     41   );
     42   *res = tem;
     43   *flags = flag & 0x8d5;
     44   __asm__ __volatile__(
     45     "movabsq $0x5555555555555555, %0" "\n\t"
     46     "andn %2, %3, %k0"        "\n\t"
     47     "pushfq"		      "\n\t"
     48     "popq %1"                 "\n"
     49     : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
     50   );
     51   if (*res != tem || *flags != (flag & 0x8d5))
     52      printf ("Difference between r and m variants\n");
     53 }
     54 
     55 
     56 __attribute__((noinline))
     57 void do_mulx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
     58                  ULong arg1, ULong arg2 )
     59 {
     60   ULong tem1, tem2, flag1, flag2, flag3, flag4;
     61   __asm__ __volatile__(
     62     "movabsq $0x5555555555555555, %0" "\n\t"
     63     "movabsq $0x5555555555555555, %1" "\n\t"
     64     "movq %4, %%rdx"          "\n\t"
     65     "pushfq"                  "\n\t"
     66     "xorq $0x8d5, (%%rsp)"    "\n\t"
     67     "movq (%%rsp), %2"        "\n\t"
     68     "popfq"                   "\n\t"
     69     "mulx %5, %1, %0"         "\n\t"
     70     "pushfq"                  "\n\t"
     71     "movq (%%rsp), %3"        "\n\t"
     72     "xorq $0x8d5, (%%rsp)"    "\n\t"
     73     "popfq"                   "\n"
     74     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
     75     : "g" (arg1), "r" (arg2) : "cc", "rdx"
     76   );
     77   *res1 = tem1;
     78   *res2 = tem2;
     79   __asm__ __volatile__(
     80     "movabsq $0x5555555555555555, %0" "\n\t"
     81     "movabsq $0x5555555555555555, %1" "\n\t"
     82     "movq %4, %%rdx"          "\n\t"
     83     "pushfq"                  "\n\t"
     84     "popq %2"                 "\n\t"
     85     "mulx %5, %1, %0"         "\n\t"
     86     "pushfq"                  "\n\t"
     87     "popq %3"                 "\n"
     88     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
     89     : "g" (arg1), "m" (arg2) : "cc", "rdx"
     90   );
     91   if (*res1 != tem1 || *res2 != tem2)
     92      printf ("Difference between r and m variants\n");
     93   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
     94      printf ("Flags changed\n");
     95 }
     96 
     97 __attribute__((noinline))
     98 void do_mulx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
     99                  UInt arg1, UInt arg2 )
    100 {
    101   ULong tem1, tem2, flag1, flag2, flag3, flag4;
    102   __asm__ __volatile__(
    103     "movabsq $0x5555555555555555, %0" "\n\t"
    104     "movabsq $0x5555555555555555, %1" "\n\t"
    105     "movl %4, %%edx"          "\n\t"
    106     "pushfq"                  "\n\t"
    107     "xorq $0x8d5, (%%rsp)"    "\n\t"
    108     "movq (%%rsp), %2"        "\n\t"
    109     "popfq"                    "\n\t"
    110     "mulx %5, %k1, %k0"       "\n\t"
    111     "pushfq"                  "\n\t"
    112     "movq (%%rsp), %3"        "\n\t"
    113     "xorq $0x8d5, (%%rsp)"    "\n\t"
    114     "popfq"                   "\n"
    115     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
    116     : "g" (arg1), "r" (arg2) : "cc", "rdx"
    117   );
    118   *res1 = tem1;
    119   *res2 = tem2;
    120   __asm__ __volatile__(
    121     "movabsq $0x5555555555555555, %0" "\n\t"
    122     "movabsq $0x5555555555555555, %1" "\n\t"
    123     "movl %4, %%edx"          "\n\t"
    124     "pushfq"                  "\n\t"
    125     "popq %2"                 "\n\t"
    126     "mulx %5, %k1, %k0"       "\n\t"
    127     "pushfq"                  "\n\t"
    128     "popq %3"                 "\n"
    129     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
    130     : "g" (arg1), "m" (arg2) : "cc", "rdx"
    131   );
    132   if (*res1 != tem1 || *res2 != tem2)
    133      printf ("Difference between r and m variants\n");
    134   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    135      printf ("Flags changed\n");
    136 }
    137 
    138 
    139 __attribute__((noinline))
    140 void do_sarx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
    141 {
    142   ULong tem, flag1, flag2, flag3, flag4;
    143   __asm__ __volatile__(
    144     "movabsq $0x5555555555555555, %0" "\n\t"
    145     "pushfq"                  "\n\t"
    146     "xorq $0x8d5, (%%rsp)"    "\n\t"
    147     "movq (%%rsp), %1"        "\n\t"
    148     "popfq"                   "\n\t"
    149     "sarx %3, %4, %0"         "\n\t"
    150     "pushfq"                  "\n\t"
    151     "movq (%%rsp), %2"        "\n\t"
    152     "xorq $0x8d5, (%%rsp)"    "\n\t"
    153     "popfq"                   "\n"
    154     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    155     : "r" (arg1), "r" (arg2) : "cc"
    156   );
    157   *res = tem;
    158   __asm__ __volatile__(
    159     "movabsq $0x5555555555555555, %0" "\n\t"
    160     "pushfq"                  "\n\t"
    161     "xorq $0x8d5, (%%rsp)"    "\n\t"
    162     "movq (%%rsp), %1"        "\n\t"
    163     "popfq"                   "\n\t"
    164     "sarx %3, %4, %0"         "\n\t"
    165     "pushfq"                  "\n\t"
    166     "movq (%%rsp), %2"        "\n\t"
    167     "xorq $0x8d5, (%%rsp)"    "\n\t"
    168     "popfq"                   "\n"
    169     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    170     : "r" (arg1), "m" (arg2) : "cc"
    171   );
    172   if (*res != tem)
    173      printf ("Difference between r and m variants\n");
    174   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    175      printf ("Flags changed\n");
    176 }
    177 
    178 __attribute__((noinline))
    179 void do_sarx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
    180 {
    181   ULong tem, flag1, flag2, flag3, flag4;
    182   __asm__ __volatile__(
    183     "movabsq $0x5555555555555555, %0" "\n\t"
    184     "pushfq"                  "\n\t"
    185     "xorq $0x8d5, (%%rsp)"    "\n\t"
    186     "movq (%%rsp), %1"        "\n\t"
    187     "popfq"                   "\n\t"
    188     "sarx %3, %4, %k0"        "\n\t"
    189     "pushfq"                  "\n\t"
    190     "movq (%%rsp), %2"        "\n\t"
    191     "xorq $0x8d5, (%%rsp)"    "\n\t"
    192     "popfq"                   "\n"
    193     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    194     : "r" (arg1), "r" (arg2) : "cc"
    195   );
    196   *res = tem;
    197   __asm__ __volatile__(
    198     "movabsq $0x5555555555555555, %0" "\n\t"
    199     "pushfq"                  "\n\t"
    200     "xorq $0x8d5, (%%rsp)"    "\n\t"
    201     "movq (%%rsp), %1"        "\n\t"
    202     "popfq"                   "\n\t"
    203     "sarx %3, %4, %k0"        "\n\t"
    204     "pushfq"                  "\n\t"
    205     "movq (%%rsp), %2"        "\n\t"
    206     "xorq $0x8d5, (%%rsp)"    "\n\t"
    207     "popfq"                   "\n"
    208     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    209     : "r" (arg1), "m" (arg2) : "cc"
    210   );
    211   if (*res != tem)
    212      printf ("Difference between r and m variants\n");
    213   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    214      printf ("Flags changed\n");
    215 }
    216 
    217 
    218 __attribute__((noinline))
    219 void do_shlx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
    220 {
    221   ULong tem, flag1, flag2, flag3, flag4;
    222   __asm__ __volatile__(
    223     "movabsq $0x5555555555555555, %0" "\n\t"
    224     "pushfq"                  "\n\t"
    225     "xorq $0x8d5, (%%rsp)"    "\n\t"
    226     "movq (%%rsp), %1"        "\n\t"
    227     "popfq"                   "\n\t"
    228     "shlx %3, %4, %0"         "\n\t"
    229     "pushfq"                  "\n\t"
    230     "movq (%%rsp), %2"        "\n\t"
    231     "xorq $0x8d5, (%%rsp)"    "\n\t"
    232     "popfq"                   "\n"
    233     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    234     : "r" (arg1), "r" (arg2) : "cc"
    235   );
    236   *res = tem;
    237   __asm__ __volatile__(
    238     "movabsq $0x5555555555555555, %0" "\n\t"
    239     "pushfq"                  "\n\t"
    240     "xorq $0x8d5, (%%rsp)"    "\n\t"
    241     "movq (%%rsp), %1"        "\n\t"
    242     "popfq"                   "\n\t"
    243     "shlx %3, %4, %0"         "\n\t"
    244     "pushfq"                  "\n\t"
    245     "movq (%%rsp), %2"        "\n\t"
    246     "xorq $0x8d5, (%%rsp)"    "\n\t"
    247     "popfq"                   "\n"
    248     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    249     : "r" (arg1), "m" (arg2) : "cc"
    250   );
    251   if (*res != tem)
    252      printf ("Difference between r and m variants\n");
    253   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    254      printf ("Flags changed\n");
    255 }
    256 
    257 __attribute__((noinline))
    258 void do_shlx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
    259 {
    260   ULong tem, flag1, flag2, flag3, flag4;
    261   __asm__ __volatile__(
    262     "movabsq $0x5555555555555555, %0" "\n\t"
    263     "pushfq"                  "\n\t"
    264     "xorq $0x8d5, (%%rsp)"    "\n\t"
    265     "movq (%%rsp), %1"        "\n\t"
    266     "popfq"                   "\n\t"
    267     "shlx %3, %4, %k0"        "\n\t"
    268     "pushfq"                  "\n\t"
    269     "movq (%%rsp), %2"        "\n\t"
    270     "xorq $0x8d5, (%%rsp)"    "\n\t"
    271     "popfq"                   "\n"
    272     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    273     : "r" (arg1), "r" (arg2) : "cc"
    274   );
    275   *res = tem;
    276   __asm__ __volatile__(
    277     "movabsq $0x5555555555555555, %0" "\n\t"
    278     "pushfq"                  "\n\t"
    279     "xorq $0x8d5, (%%rsp)"    "\n\t"
    280     "movq (%%rsp), %1"        "\n\t"
    281     "popfq"                   "\n\t"
    282     "shlx %3, %4, %k0"        "\n\t"
    283     "pushfq"                  "\n\t"
    284     "movq (%%rsp), %2"        "\n\t"
    285     "xorq $0x8d5, (%%rsp)"    "\n\t"
    286     "popfq"                   "\n"
    287     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    288     : "r" (arg1), "m" (arg2) : "cc"
    289   );
    290   if (*res != tem)
    291      printf ("Difference between r and m variants\n");
    292   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    293      printf ("Flags changed\n");
    294 }
    295 
    296 
    297 __attribute__((noinline))
    298 void do_shrx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
    299 {
    300   ULong tem, flag1, flag2, flag3, flag4;
    301   __asm__ __volatile__(
    302     "movabsq $0x5555555555555555, %0" "\n\t"
    303     "pushfq"                  "\n\t"
    304     "xorq $0x8d5, (%%rsp)"    "\n\t"
    305     "movq (%%rsp), %1"        "\n\t"
    306     "popfq"                   "\n\t"
    307     "shrx %3, %4, %0"         "\n\t"
    308     "pushfq"                  "\n\t"
    309     "movq (%%rsp), %2"        "\n\t"
    310     "xorq $0x8d5, (%%rsp)"    "\n\t"
    311     "popfq"                   "\n"
    312     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    313     : "r" (arg1), "r" (arg2) : "cc"
    314   );
    315   *res = tem;
    316   __asm__ __volatile__(
    317     "movabsq $0x5555555555555555, %0" "\n\t"
    318     "pushfq"                  "\n\t"
    319     "xorq $0x8d5, (%%rsp)"    "\n\t"
    320     "movq (%%rsp), %1"        "\n\t"
    321     "popfq"                   "\n\t"
    322     "shrx %3, %4, %0"         "\n\t"
    323     "pushfq"                  "\n\t"
    324     "movq (%%rsp), %2"        "\n\t"
    325     "xorq $0x8d5, (%%rsp)"    "\n\t"
    326     "popfq"                   "\n"
    327     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    328     : "r" (arg1), "m" (arg2) : "cc"
    329   );
    330   if (*res != tem)
    331      printf ("Difference between r and m variants\n");
    332   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    333      printf ("Flags changed\n");
    334 }
    335 
    336 __attribute__((noinline))
    337 void do_shrx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
    338 {
    339   ULong tem, flag1, flag2, flag3, flag4;
    340   __asm__ __volatile__(
    341     "movabsq $0x5555555555555555, %0" "\n\t"
    342     "pushfq"                  "\n\t"
    343     "xorq $0x8d5, (%%rsp)"    "\n\t"
    344     "movq (%%rsp), %1"        "\n\t"
    345     "popfq"                   "\n\t"
    346     "shrx %3, %4, %k0"        "\n\t"
    347     "pushfq"                  "\n\t"
    348     "movq (%%rsp), %2"        "\n\t"
    349     "xorq $0x8d5, (%%rsp)"    "\n\t"
    350     "popfq"                   "\n"
    351     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    352     : "r" (arg1), "r" (arg2) : "cc"
    353   );
    354   *res = tem;
    355   __asm__ __volatile__(
    356     "movabsq $0x5555555555555555, %0" "\n\t"
    357     "pushfq"                  "\n\t"
    358     "xorq $0x8d5, (%%rsp)"    "\n\t"
    359     "movq (%%rsp), %1"        "\n\t"
    360     "popfq"                   "\n\t"
    361     "shrx %3, %4, %k0"        "\n\t"
    362     "pushfq"                  "\n\t"
    363     "movq (%%rsp), %2"        "\n\t"
    364     "xorq $0x8d5, (%%rsp)"    "\n\t"
    365     "popfq"                   "\n"
    366     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    367     : "r" (arg1), "m" (arg2) : "cc"
    368   );
    369   if (*res != tem)
    370      printf ("Difference between r and m variants\n");
    371   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    372      printf ("Flags changed\n");
    373 }
    374 
    375 
    376 __attribute__((noinline))
    377 void do_rorx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, ULong arg )
    378 {
    379   ULong tem, flag1, flag2, flag3, flag4;
    380   __asm__ __volatile__(
    381     "movabsq $0x5555555555555555, %0" "\n\t"
    382     "pushfq"                  "\n\t"
    383     "xorq $0x8d5, (%%rsp)"    "\n\t"
    384     "movq (%%rsp), %1"        "\n\t"
    385     "popfq"                   "\n\t"
    386     "rorx $12, %3, %0"        "\n\t"
    387     "pushfq"                  "\n\t"
    388     "movq (%%rsp), %2"        "\n\t"
    389     "xorq $0x8d5, (%%rsp)"    "\n\t"
    390     "popfq"                   "\n"
    391     : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
    392   );
    393   *res1 = tem;
    394   __asm__ __volatile__(
    395     "movabsq $0x5555555555555555, %0" "\n\t"
    396     "pushfq"                  "\n\t"
    397     "xorq $0x8d5, (%%rsp)"    "\n\t"
    398     "movq (%%rsp), %1"        "\n\t"
    399     "popfq"                   "\n\t"
    400     "rorx $67, %3, %0"        "\n\t"
    401     "pushfq"                  "\n\t"
    402     "movq (%%rsp), %2"        "\n\t"
    403     "xorq $0x8d5, (%%rsp)"    "\n\t"
    404     "popfq"                   "\n"
    405     : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
    406   );
    407   *res2 = tem;
    408   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    409      printf ("Flags changed\n");
    410 }
    411 
    412 __attribute__((noinline))
    413 void do_rorx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, UInt arg )
    414 {
    415   ULong tem, flag1, flag2, flag3, flag4;
    416   __asm__ __volatile__(
    417     "movabsq $0x5555555555555555, %0" "\n\t"
    418     "pushfq"                  "\n\t"
    419     "xorq $0x8d5, (%%rsp)"    "\n\t"
    420     "movq (%%rsp), %1"        "\n\t"
    421     "popfq"                   "\n\t"
    422     "rorx $12, %3, %k0"       "\n\t"
    423     "pushfq"                  "\n\t"
    424     "movq (%%rsp), %2"        "\n\t"
    425     "xorq $0x8d5, (%%rsp)"    "\n\t"
    426     "popfq"                   "\n"
    427     : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
    428   );
    429   *res1 = tem;
    430   __asm__ __volatile__(
    431     "movabsq $0x5555555555555555, %0" "\n\t"
    432     "pushfq"                  "\n\t"
    433     "xorq $0x8d5, (%%rsp)"    "\n\t"
    434     "movq (%%rsp), %1"        "\n\t"
    435     "popfq"                   "\n\t"
    436     "rorx $67, %3, %k0"       "\n\t"
    437     "pushfq"                  "\n\t"
    438     "movq (%%rsp), %2"        "\n\t"
    439     "xorq $0x8d5, (%%rsp)"    "\n\t"
    440     "popfq"                   "\n"
    441     : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
    442   );
    443   *res2 = tem;
    444   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    445      printf ("Flags changed\n");
    446 }
    447 
    448 
    449 __attribute__((noinline))
    450 void do_blsi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
    451 {
    452   ULong tem, flag;
    453   __asm__ __volatile__(
    454     "movabsq $0x5555555555555555, %0" "\n\t"
    455     "blsi %2, %0"             "\n\t"
    456     "pushfq"		      "\n\t"
    457     "popq %1"                 "\n"
    458     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    459   );
    460   *res = tem;
    461   *flags = flag & 0x8d5;
    462   __asm__ __volatile__(
    463     "movabsq $0x5555555555555555, %0" "\n\t"
    464     "blsi %2, %0"             "\n\t"
    465     "pushfq"		      "\n\t"
    466     "popq %1"                 "\n"
    467     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    468   );
    469   if (*res != tem || *flags != (flag & 0x8d5))
    470      printf ("Difference between r and m variants\n");
    471 }
    472 
    473 __attribute__((noinline))
    474 void do_blsi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
    475 {
    476   ULong tem, flag;
    477   __asm__ __volatile__(
    478     "movabsq $0x5555555555555555, %0" "\n\t"
    479     "blsi %2, %k0"            "\n\t"
    480     "pushfq"		      "\n\t"
    481     "popq %1"                 "\n"
    482     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    483   );
    484   *res = tem;
    485   *flags = flag & 0x8d5;
    486   __asm__ __volatile__(
    487     "movabsq $0x5555555555555555, %0" "\n\t"
    488     "blsi %2, %k0"            "\n\t"
    489     "pushfq"		      "\n\t"
    490     "popq %1"                 "\n"
    491     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    492   );
    493   if (*res != tem || *flags != (flag & 0x8d5))
    494      printf ("Difference between r and m variants\n");
    495 }
    496 
    497 
    498 __attribute__((noinline))
    499 void do_blsmsk64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
    500 {
    501   ULong tem, flag;
    502   __asm__ __volatile__(
    503     "movabsq $0x5555555555555555, %0" "\n\t"
    504     "blsmsk %2, %0"           "\n\t"
    505     "pushfq"		      "\n\t"
    506     "popq %1"                 "\n"
    507     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    508   );
    509   *res = tem;
    510   *flags = flag & 0x8d5;
    511   __asm__ __volatile__(
    512     "movabsq $0x5555555555555555, %0" "\n\t"
    513     "blsmsk %2, %0"           "\n\t"
    514     "pushfq"		      "\n\t"
    515     "popq %1"                 "\n"
    516     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    517   );
    518   if (*res != tem || *flags != (flag & 0x8d5))
    519      printf ("Difference between r and m variants\n");
    520 }
    521 
    522 __attribute__((noinline))
    523 void do_blsmsk32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
    524 {
    525   ULong tem, flag;
    526   __asm__ __volatile__(
    527     "movabsq $0x5555555555555555, %0" "\n\t"
    528     "blsmsk %2, %k0"          "\n\t"
    529     "pushfq"		      "\n\t"
    530     "popq %1"                 "\n"
    531     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    532   );
    533   *res = tem;
    534   *flags = flag & 0x8d5;
    535   __asm__ __volatile__(
    536     "movabsq $0x5555555555555555, %0" "\n\t"
    537     "blsmsk %2, %k0"          "\n\t"
    538     "pushfq"		      "\n\t"
    539     "popq %1"                 "\n"
    540     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    541   );
    542   if (*res != tem || *flags != (flag & 0x8d5))
    543      printf ("Difference between r and m variants\n");
    544 }
    545 
    546 
    547 __attribute__((noinline))
    548 void do_blsr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
    549 {
    550   ULong tem, flag;
    551   __asm__ __volatile__(
    552     "movabsq $0x5555555555555555, %0" "\n\t"
    553     "blsr %2, %0"             "\n\t"
    554     "pushfq"		      "\n\t"
    555     "popq %1"                 "\n"
    556     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    557   );
    558   *res = tem;
    559   *flags = flag & 0x8d5;
    560   __asm__ __volatile__(
    561     "movabsq $0x5555555555555555, %0" "\n\t"
    562     "blsr %2, %0"             "\n\t"
    563     "pushfq"		      "\n\t"
    564     "popq %1"                 "\n"
    565     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    566   );
    567   if (*res != tem || *flags != (flag & 0x8d5))
    568      printf ("Difference between r and m variants\n");
    569 }
    570 
    571 __attribute__((noinline))
    572 void do_blsr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
    573 {
    574   ULong tem, flag;
    575   __asm__ __volatile__(
    576     "movabsq $0x5555555555555555, %0" "\n\t"
    577     "blsr %2, %k0"            "\n\t"
    578     "pushfq"		      "\n\t"
    579     "popq %1"                 "\n"
    580     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
    581   );
    582   *res = tem;
    583   *flags = flag & 0x8d5;
    584   __asm__ __volatile__(
    585     "movabsq $0x5555555555555555, %0" "\n\t"
    586     "blsr %2, %k0"            "\n\t"
    587     "pushfq"		      "\n\t"
    588     "popq %1"                 "\n"
    589     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
    590   );
    591   if (*res != tem || *flags != (flag & 0x8d5))
    592      printf ("Difference between r and m variants\n");
    593 }
    594 
    595 
    596 __attribute__((noinline))
    597 void do_bextr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
    598                   ULong arg1, ULong arg2 )
    599 {
    600   ULong tem, flag;
    601   __asm__ __volatile__(
    602     "movabsq $0x5555555555555555, %0" "\n\t"
    603     "bextr %2, %3, %0"        "\n\t"
    604     "pushfq"		      "\n\t"
    605     "popq %1"                 "\n"
    606     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
    607   );
    608   *res = tem;
    609   *flags = flag & 0x8d5;
    610   __asm__ __volatile__(
    611     "movabsq $0x5555555555555555, %0" "\n\t"
    612     "bextr %2, %3, %0"        "\n\t"
    613     "pushfq"		      "\n\t"
    614     "popq %1"                 "\n"
    615     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
    616   );
    617   if (*res != tem || *flags != (flag & 0x8d5))
    618      printf ("Difference between r and m variants\n");
    619 }
    620 
    621 __attribute__((noinline))
    622 void do_bextr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
    623                   UInt arg1, UInt arg2 )
    624 {
    625   ULong tem, flag;
    626   __asm__ __volatile__(
    627     "movabsq $0x5555555555555555, %0" "\n\t"
    628     "bextr %2, %3, %k0"       "\n\t"
    629     "pushfq"		      "\n\t"
    630     "popq %1"                 "\n"
    631     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
    632   );
    633   *res = tem;
    634   *flags = flag & 0x8d5;
    635   __asm__ __volatile__(
    636     "movabsq $0x5555555555555555, %0" "\n\t"
    637     "bextr %2, %3, %k0"       "\n\t"
    638     "pushfq"		      "\n\t"
    639     "popq %1"                 "\n"
    640     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
    641   );
    642   if (*res != tem || *flags != (flag & 0x8d5))
    643      printf ("Difference between r and m variants\n");
    644 }
    645 
    646 
    647 __attribute__((noinline))
    648 void do_bzhi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
    649                  ULong arg1, ULong arg2 )
    650 {
    651   ULong tem, flag;
    652   __asm__ __volatile__(
    653     "movabsq $0x5555555555555555, %0" "\n\t"
    654     "bzhi %2, %3, %0"         "\n\t"
    655     "pushfq"		      "\n\t"
    656     "popq %1"                 "\n"
    657     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
    658   );
    659   *res = tem;
    660   *flags = flag & 0x8d5;
    661   __asm__ __volatile__(
    662     "movabsq $0x5555555555555555, %0" "\n\t"
    663     "bzhi %2, %3, %0"         "\n\t"
    664     "pushfq"		      "\n\t"
    665     "popq %1"                 "\n"
    666     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
    667   );
    668   if (*res != tem || *flags != (flag & 0x8d5))
    669      printf ("Difference between r and m variants\n");
    670 }
    671 
    672 __attribute__((noinline))
    673 void do_bzhi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
    674                  UInt arg1, UInt arg2 )
    675 {
    676   ULong tem, flag;
    677   __asm__ __volatile__(
    678     "movabsq $0x5555555555555555, %0" "\n\t"
    679     "bzhi %2, %3, %k0"        "\n\t"
    680     "pushfq"		      "\n\t"
    681     "popq %1"                 "\n"
    682     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
    683   );
    684   *res = tem;
    685   *flags = flag & 0x8d5;
    686   __asm__ __volatile__(
    687     "movabsq $0x5555555555555555, %0" "\n\t"
    688     "bzhi %2, %3, %k0"        "\n\t"
    689     "pushfq"		      "\n\t"
    690     "popq %1"                 "\n"
    691     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
    692   );
    693   if (*res != tem || *flags != (flag & 0x8d5))
    694      printf ("Difference between r and m variants\n");
    695 }
    696 
    697 
    698 __attribute__((noinline))
    699 void do_pdep64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
    700 {
    701   ULong tem, flag1, flag2, flag3, flag4;
    702   __asm__ __volatile__(
    703     "movabsq $0x5555555555555555, %0" "\n\t"
    704     "pushfq"                  "\n\t"
    705     "xorq $0x8d5, (%%rsp)"    "\n\t"
    706     "movq (%%rsp), %1"        "\n\t"
    707     "popfq"                   "\n\t"
    708     "pdep %3, %4, %0"         "\n\t"
    709     "pushfq"                  "\n\t"
    710     "movq (%%rsp), %2"        "\n\t"
    711     "xorq $0x8d5, (%%rsp)"    "\n\t"
    712     "popfq"                   "\n"
    713     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    714     : "r" (arg1), "r" (arg2) : "cc"
    715   );
    716   *res = tem;
    717   __asm__ __volatile__(
    718     "movabsq $0x5555555555555555, %0" "\n\t"
    719     "pushfq"                  "\n\t"
    720     "xorq $0x8d5, (%%rsp)"    "\n\t"
    721     "movq (%%rsp), %1"        "\n\t"
    722     "popfq"                   "\n\t"
    723     "pdep %3, %4, %0"         "\n\t"
    724     "pushfq"                  "\n\t"
    725     "movq (%%rsp), %2"        "\n\t"
    726     "xorq $0x8d5, (%%rsp)"    "\n\t"
    727     "popfq"                   "\n"
    728     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    729     : "m" (arg1), "r" (arg2) : "cc"
    730   );
    731   if (*res != tem)
    732      printf ("Difference between r and m variants\n");
    733   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    734      printf ("Flags changed\n");
    735 }
    736 
    737 __attribute__((noinline))
    738 void do_pdep32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
    739 {
    740   ULong tem, flag1, flag2, flag3, flag4;
    741   __asm__ __volatile__(
    742     "movabsq $0x5555555555555555, %0" "\n\t"
    743     "pushfq"                  "\n\t"
    744     "xorq $0x8d5, (%%rsp)"    "\n\t"
    745     "movq (%%rsp), %1"        "\n\t"
    746     "popfq"                   "\n\t"
    747     "pdep %3, %4, %k0"        "\n\t"
    748     "pushfq"                  "\n\t"
    749     "movq (%%rsp), %2"        "\n\t"
    750     "xorq $0x8d5, (%%rsp)"    "\n\t"
    751     "popfq"                   "\n"
    752     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    753     : "r" (arg1), "r" (arg2) : "cc"
    754   );
    755   *res = tem;
    756   __asm__ __volatile__(
    757     "movabsq $0x5555555555555555, %0" "\n\t"
    758     "pushfq"                  "\n\t"
    759     "xorq $0x8d5, (%%rsp)"    "\n\t"
    760     "movq (%%rsp), %1"        "\n\t"
    761     "popfq"                   "\n\t"
    762     "pdep %3, %4, %k0"        "\n\t"
    763     "pushfq"                  "\n\t"
    764     "movq (%%rsp), %2"        "\n\t"
    765     "xorq $0x8d5, (%%rsp)"    "\n\t"
    766     "popfq"                   "\n"
    767     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    768     : "m" (arg1), "r" (arg2) : "cc"
    769   );
    770   if (*res != tem)
    771      printf ("Difference between r and m variants\n");
    772   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    773      printf ("Flags changed\n");
    774 }
    775 
    776 
    777 __attribute__((noinline))
    778 void do_pext64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
    779 {
    780   ULong tem, flag1, flag2, flag3, flag4;
    781   __asm__ __volatile__(
    782     "movabsq $0x5555555555555555, %0" "\n\t"
    783     "pushfq"                  "\n\t"
    784     "xorq $0x8d5, (%%rsp)"    "\n\t"
    785     "movq (%%rsp), %1"        "\n\t"
    786     "popfq"                   "\n\t"
    787     "pext %3, %4, %0"         "\n\t"
    788     "pushfq"                  "\n\t"
    789     "movq (%%rsp), %2"        "\n\t"
    790     "xorq $0x8d5, (%%rsp)"    "\n\t"
    791     "popfq"                   "\n"
    792     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    793     : "r" (arg1), "r" (arg2) : "cc"
    794   );
    795   *res = tem;
    796   __asm__ __volatile__(
    797     "movabsq $0x5555555555555555, %0" "\n\t"
    798     "pushfq"                  "\n\t"
    799     "xorq $0x8d5, (%%rsp)"    "\n\t"
    800     "movq (%%rsp), %1"        "\n\t"
    801     "popfq"                   "\n\t"
    802     "pext %3, %4, %0"         "\n\t"
    803     "pushfq"                  "\n\t"
    804     "movq (%%rsp), %2"        "\n\t"
    805     "xorq $0x8d5, (%%rsp)"    "\n\t"
    806     "popfq"                   "\n"
    807     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    808     : "m" (arg1), "r" (arg2) : "cc"
    809   );
    810   if (*res != tem)
    811      printf ("Difference between r and m variants\n");
    812   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    813      printf ("Flags changed\n");
    814 }
    815 
    816 __attribute__((noinline))
    817 void do_pext32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
    818 {
    819   ULong tem, flag1, flag2, flag3, flag4;
    820   __asm__ __volatile__(
    821     "movabsq $0x5555555555555555, %0" "\n\t"
    822     "pushfq"                  "\n\t"
    823     "xorq $0x8d5, (%%rsp)"    "\n\t"
    824     "movq (%%rsp), %1"        "\n\t"
    825     "popfq"                   "\n\t"
    826     "pext %3, %4, %k0"        "\n\t"
    827     "pushfq"                  "\n\t"
    828     "movq (%%rsp), %2"        "\n\t"
    829     "xorq $0x8d5, (%%rsp)"    "\n\t"
    830     "popfq"                   "\n"
    831     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
    832     : "r" (arg1), "r" (arg2) : "cc"
    833   );
    834   *res = tem;
    835   __asm__ __volatile__(
    836     "movabsq $0x5555555555555555, %0" "\n\t"
    837     "pushfq"                  "\n\t"
    838     "xorq $0x8d5, (%%rsp)"    "\n\t"
    839     "movq (%%rsp), %1"        "\n\t"
    840     "popfq"                   "\n\t"
    841     "pext %3, %4, %k0"        "\n\t"
    842     "pushfq"                  "\n\t"
    843     "movq (%%rsp), %2"        "\n\t"
    844     "xorq $0x8d5, (%%rsp)"    "\n\t"
    845     "popfq"                   "\n"
    846     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
    847     : "m" (arg1), "r" (arg2) : "cc"
    848   );
    849   if (*res != tem)
    850      printf ("Difference between r and m variants\n");
    851   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
    852      printf ("Flags changed\n");
    853 }
    854 
    855 
    856 int main ( void )
    857 {
    858    ULong w1, w2;
    859 
    860    w1 = 0xFEDC192837475675ULL;
    861    w2 = 0x57657438291CDEF0ULL;
    862    while (1) {
    863       ULong res;
    864       UInt  flags;
    865       do_andn64(&flags, &res, w1, w2);
    866       printf("andn64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
    867       if (w1 == 0) break;
    868       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    869       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    870    }
    871 
    872    w1 = 0xFEDC192837475675ULL;
    873    w2 = 0x57657438291CDEF0ULL;
    874    while (1) {
    875       ULong res;
    876       UInt  flags;
    877       do_andn32(&flags, &res, w1, w2);
    878       printf("andn32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
    879       if (w1 == 0) break;
    880       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    881       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    882    }
    883 
    884    w1 = 0xFEDC192837475675ULL;
    885    w2 = 0x57657438291CDEF0ULL;
    886    while (1) {
    887       ULong res1, res2;
    888       do_mulx64(&res1, &res2, w1, w2);
    889       printf("mulx64 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
    890       if (w1 == 0) break;
    891       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    892       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    893    }
    894 
    895    w1 = 0xFEDC192837475675ULL;
    896    w2 = 0x57657438291CDEF0ULL;
    897    while (1) {
    898       ULong res1, res2;
    899       do_mulx32(&res1, &res2, w1, w2);
    900       printf("mulx32 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
    901       if (w1 == 0) break;
    902       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    903       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    904    }
    905 
    906    w1 = 0xFEDC192837475675ULL;
    907    w2 = 0x57657438291CDEF0ULL;
    908    while (1) {
    909       ULong res;
    910       do_sarx64(&res, w1, w2);
    911       printf("sarx64 %016llx %016llx -> %016llx\n", w1, w2, res);
    912       if (w1 == 0) break;
    913       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    914       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    915    }
    916 
    917    w1 = 0xFEDC192837475675ULL;
    918    w2 = 0x57657438291CDEF0ULL;
    919    while (1) {
    920       ULong res;
    921       do_sarx32(&res, w1, w2);
    922       printf("sarx32 %016llx %016llx -> %016llx\n", w1, w2, res);
    923       if (w1 == 0) break;
    924       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    925       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    926    }
    927 
    928    w1 = 0xFEDC192837475675ULL;
    929    w2 = 0x57657438291CDEF0ULL;
    930    while (1) {
    931       ULong res;
    932       do_shlx64(&res, w1, w2);
    933       printf("shlx64 %016llx %016llx -> %016llx\n", w1, w2, res);
    934       if (w1 == 0) break;
    935       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    936       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    937    }
    938 
    939    w1 = 0xFEDC192837475675ULL;
    940    w2 = 0x57657438291CDEF0ULL;
    941    while (1) {
    942       ULong res;
    943       do_shlx32(&res, w1, w2);
    944       printf("shlx32 %016llx %016llx -> %016llx\n", w1, w2, res);
    945       if (w1 == 0) break;
    946       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    947       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    948    }
    949 
    950    w1 = 0xFEDC192837475675ULL;
    951    w2 = 0x57657438291CDEF0ULL;
    952    while (1) {
    953       ULong res;
    954       do_shrx64(&res, w1, w2);
    955       printf("shrx64 %016llx %016llx -> %016llx\n", w1, w2, res);
    956       if (w1 == 0) break;
    957       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    958       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    959    }
    960 
    961    w1 = 0xFEDC192837475675ULL;
    962    w2 = 0x57657438291CDEF0ULL;
    963    while (1) {
    964       ULong res;
    965       do_shrx32(&res, w1, w2);
    966       printf("shrx32 %016llx %016llx -> %016llx\n", w1, w2, res);
    967       if (w1 == 0) break;
    968       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    969       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
    970    }
    971 
    972    w1 = 0xFEDC192837475675ULL;
    973    while (1) {
    974       ULong res1, res2;
    975       do_rorx64(&res1, &res2, w1);
    976       printf("rorx64 %016llx -> %016llx %016llx\n", w1, res1, res2);
    977       if (w1 == 0) break;
    978       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    979    }
    980 
    981    w1 = 0xFEDC192837475675ULL;
    982    while (1) {
    983       ULong res1, res2;
    984       do_rorx32(&res1, &res2, w1);
    985       printf("rorx32 %016llx -> %016llx %016llx\n", w1, res1, res2);
    986       if (w1 == 0) break;
    987       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    988    }
    989 
    990    w1 = 0xFEDC192837475675ULL;
    991    while (1) {
    992       ULong res;
    993       UInt  flags;
    994       do_blsi64(&flags, &res, w1);
    995       printf("blsi64 %016llx -> %016llx %04x\n", w1, res, flags);
    996       if (w1 == 0) break;
    997       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
    998    }
    999 
   1000    w1 = 0xFEDC192837475675ULL;
   1001    while (1) {
   1002       ULong res;
   1003       UInt  flags;
   1004       do_blsi32(&flags, &res, w1);
   1005       printf("blsi32 %016llx -> %016llx %04x\n", w1, res, flags);
   1006       if (w1 == 0) break;
   1007       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1008    }
   1009 
   1010    w1 = 0xFEDC192837475675ULL;
   1011    while (1) {
   1012       ULong res;
   1013       UInt  flags;
   1014       do_blsmsk64(&flags, &res, w1);
   1015       printf("blsmsk64 %016llx -> %016llx %04x\n", w1, res, flags);
   1016       if (w1 == 0) break;
   1017       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1018    }
   1019 
   1020    w1 = 0xFEDC192837475675ULL;
   1021    while (1) {
   1022       ULong res;
   1023       UInt  flags;
   1024       do_blsmsk32(&flags, &res, w1);
   1025       printf("blsmsk32 %016llx -> %016llx %04x\n", w1, res, flags);
   1026       if (w1 == 0) break;
   1027       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1028    }
   1029 
   1030    w1 = 0xFEDC192837475675ULL;
   1031    while (1) {
   1032       ULong res;
   1033       UInt  flags;
   1034       do_blsr64(&flags, &res, w1);
   1035       printf("blsr64 %016llx -> %016llx %04x\n", w1, res, flags);
   1036       if (w1 == 0) break;
   1037       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1038    }
   1039 
   1040    w1 = 0xFEDC192837475675ULL;
   1041    while (1) {
   1042       ULong res;
   1043       UInt  flags;
   1044       do_blsr32(&flags, &res, w1);
   1045       printf("blsr32 %016llx -> %016llx %04x\n", w1, res, flags);
   1046       if (w1 == 0) break;
   1047       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1048    }
   1049 
   1050    w1 = 0xFEDC192837475675ULL;
   1051    w2 = 0x57657438291CDEF0ULL;
   1052    while (1) {
   1053       ULong res;
   1054       UInt  flags;
   1055       do_bextr64(&flags, &res, w1, w2);
   1056       printf("bextr64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
   1057       if (w1 == 0) break;
   1058       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1059       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1060    }
   1061 
   1062    w1 = 0xFEDC192837475675ULL;
   1063    w2 = 0x57657438291CDEF0ULL;
   1064    while (1) {
   1065       ULong res;
   1066       UInt  flags;
   1067       do_bextr32(&flags, &res, w1, w2);
   1068       printf("bextr32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
   1069       if (w1 == 0) break;
   1070       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1071       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1072    }
   1073 
   1074    w1 = 0xFEDC192837475675ULL;
   1075    w2 = 0x57657438291CDEF0ULL;
   1076    while (1) {
   1077       ULong res;
   1078       UInt  flags;
   1079       do_bzhi64(&flags, &res, w1, w2);
   1080       printf("bzhi64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
   1081       if (w1 == 0) break;
   1082       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1083       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1084    }
   1085 
   1086    w1 = 0xFEDC192837475675ULL;
   1087    w2 = 0x57657438291CDEF0ULL;
   1088    while (1) {
   1089       ULong res;
   1090       UInt  flags;
   1091       do_bzhi32(&flags, &res, w1, w2);
   1092       printf("bzhi32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
   1093       if (w1 == 0) break;
   1094       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1095       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1096    }
   1097 
   1098    w1 = 0xFEDC192837475675ULL;
   1099    w2 = 0x57657438291CDEF0ULL;
   1100    while (1) {
   1101       ULong res;
   1102       do_pdep64(&res, w1, w2);
   1103       printf("pdep64 %016llx %016llx -> %016llx\n", w1, w2, res);
   1104       if (w1 == 0) break;
   1105       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1106       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1107    }
   1108 
   1109    w1 = 0xFEDC192837475675ULL;
   1110    w2 = 0x57657438291CDEF0ULL;
   1111    while (1) {
   1112       ULong res;
   1113       do_pdep32(&res, w1, w2);
   1114       printf("pdep32 %016llx %016llx -> %016llx\n", w1, w2, res);
   1115       if (w1 == 0) break;
   1116       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1117       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1118    }
   1119 
   1120    w1 = 0xFEDC192837475675ULL;
   1121    w2 = 0x57657438291CDEF0ULL;
   1122    while (1) {
   1123       ULong res;
   1124       do_pext64(&res, w1, w2);
   1125       printf("pext64 %016llx %016llx -> %016llx\n", w1, w2, res);
   1126       if (w1 == 0) break;
   1127       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1128       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1129    }
   1130 
   1131    w1 = 0xFEDC192837475675ULL;
   1132    w2 = 0x57657438291CDEF0ULL;
   1133    while (1) {
   1134       ULong res;
   1135       do_pext32(&res, w1, w2);
   1136       printf("pext32 %016llx %016llx -> %016llx\n", w1, w2, res);
   1137       if (w1 == 0) break;
   1138       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
   1139       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
   1140    }
   1141 
   1142    return 0;
   1143 }
   1144