Home | History | Annotate | Download | only in amd64
      1 
      2 /* Test for a number of SSE instructions which were seen in the wild
      3    with a bogus (irrelevant) REX.W bit in their prefixes.  Some just
      4    have REX = 0x48 where REX.W is irrelevant, hence the whole REX
      5    prefix is pointless.  Probably related to #133962. */
      6 
      7 #include <stdlib.h>
      8 #include <stdio.h>
      9 #include <assert.h>
     10 #include "tests/malloc.h"
     11 
     12 typedef  unsigned char  UChar;
     13 
     14 typedef
     15    struct { __attribute__((aligned(16))) UChar b[16]; }
     16    UWord128;
     17 
     18 typedef
     19    struct { UWord128 reg[16]; }
     20    XMMRegs;
     21 
     22 typedef
     23    struct { UWord128 dqw[5]; }
     24    Mem;
     25 
     26 void pp_UWord128 ( UWord128* w ) {
     27    int i;
     28    char buf[3];
     29    for (i = 15; i >= 0; i--) {
     30       buf[2] = 0;
     31       sprintf(buf, "%02x", (unsigned int)w->b[i]);
     32       assert(buf[2] == 0);
     33       if (buf[0] == '0') buf[0] = '.';
     34       if (buf[1] == '0') buf[1] = '.';
     35       printf("%s", buf);
     36    }
     37 }
     38 
     39 void pp_XMMRegs ( char* who, XMMRegs* regs ) {
     40    int i;
     41    printf ("%s (xmms in order [15..0]) {\n", who );
     42    for (i = 0; i < 16; i++) {
     43       printf("  %%xmm%2d ", i);
     44       pp_UWord128( &regs->reg[i] );
     45       printf("\n");
     46    }
     47    printf("}\n");
     48 }
     49 
     50 void pp_Mem ( char* who, Mem* mem ) {
     51    int i;
     52    printf ("%s (dqws in order [15 .. 0]) {\n", who );
     53    for (i = 0; i < 5; i++) {
     54       printf("  [%d]    ", i);
     55       pp_UWord128( &mem->dqw[i] );
     56       printf("\n");
     57    }
     58    printf("}\n");
     59 }
     60 
     61 void xor_UWord128( UWord128* src, UWord128* dst ) {
     62    int i;
     63    for (i = 0; i < 16; i++)
     64       dst->b[i] ^= src->b[i];
     65 }
     66 void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) {
     67    int i;
     68    for (i = 0; i < 16; i++)
     69       xor_UWord128( &src->reg[i], &dst->reg[i] );
     70 }
     71 
     72 void xor_Mem ( Mem* src, Mem* dst ) {
     73    int i;
     74    for (i = 0; i < 5; i++)
     75       xor_UWord128( &src->dqw[i], &dst->dqw[i] );
     76 }
     77 
     78 void setup_regs_mem ( XMMRegs* regs, Mem* mem ) {
     79    int ctr, i, j;
     80    ctr = 0;
     81    for (i = 0; i < 16; i++) {
     82       for (j = 0; j < 16; j++)
     83         regs->reg[i].b[j] = 0x51 + (ctr++ % 7);
     84    }
     85    for (i = 0; i < 5; i++) {
     86       for (j = 0; j < 16; j++)
     87         mem->dqw[i].b[j] = 0x52 + (ctr++ % 13);
     88    }
     89 }
     90 
     91 void before_test ( XMMRegs* regs, Mem* mem ) {
     92    setup_regs_mem( regs, mem );
     93 }
     94 
     95 void after_test ( char* who, XMMRegs* regs, Mem* mem ) {
     96    XMMRegs rdiff;
     97    Mem     mdiff;
     98    char s[128];
     99    setup_regs_mem( &rdiff, &mdiff );
    100    xor_XMMRegs( regs, &rdiff );
    101    xor_Mem( mem, &mdiff );
    102    sprintf(s, "after \"%s\"", who );
    103    pp_Mem( s, &mdiff );
    104    pp_XMMRegs( s, &rdiff );
    105    printf("\n");
    106 }
    107 
    108 #define LOAD_XMMREGS_from_r14       \
    109    "\tmovupd   0(%%r14),  %%xmm0\n" \
    110    "\tmovupd  16(%%r14),  %%xmm1\n" \
    111    "\tmovupd  32(%%r14),  %%xmm2\n" \
    112    "\tmovupd  48(%%r14),  %%xmm3\n" \
    113    "\tmovupd  64(%%r14),  %%xmm4\n" \
    114    "\tmovupd  80(%%r14),  %%xmm5\n" \
    115    "\tmovupd  96(%%r14),  %%xmm6\n" \
    116    "\tmovupd 112(%%r14),  %%xmm7\n" \
    117    "\tmovupd 128(%%r14),  %%xmm8\n" \
    118    "\tmovupd 144(%%r14),  %%xmm9\n" \
    119    "\tmovupd 160(%%r14), %%xmm10\n" \
    120    "\tmovupd 176(%%r14), %%xmm11\n" \
    121    "\tmovupd 192(%%r14), %%xmm12\n" \
    122    "\tmovupd 208(%%r14), %%xmm13\n" \
    123    "\tmovupd 224(%%r14), %%xmm14\n" \
    124    "\tmovupd 240(%%r14), %%xmm15\n"
    125 
    126 #define SAVE_XMMREGS_to_r14         \
    127    "\tmovupd %%xmm0,    0(%%r14)\n" \
    128    "\tmovupd %%xmm1,   16(%%r14)\n" \
    129    "\tmovupd %%xmm2,   32(%%r14)\n" \
    130    "\tmovupd %%xmm3,   48(%%r14)\n" \
    131    "\tmovupd %%xmm4,   64(%%r14)\n" \
    132    "\tmovupd %%xmm5,   80(%%r14)\n" \
    133    "\tmovupd %%xmm6,   96(%%r14)\n" \
    134    "\tmovupd %%xmm7,  112(%%r14)\n" \
    135    "\tmovupd %%xmm8,  128(%%r14)\n" \
    136    "\tmovupd %%xmm9,  144(%%r14)\n" \
    137    "\tmovupd %%xmm10, 160(%%r14)\n" \
    138    "\tmovupd %%xmm11, 176(%%r14)\n" \
    139    "\tmovupd %%xmm12, 192(%%r14)\n" \
    140    "\tmovupd %%xmm13, 208(%%r14)\n" \
    141    "\tmovupd %%xmm14, 224(%%r14)\n" \
    142    "\tmovupd %%xmm15, 240(%%r14)"
    143 
    144 #define XMMREGS \
    145    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \
    146    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
    147 
    148 #if 0
    149    /* Boilerplate for test */
    150    {
    151      before_test( regs, mem );
    152      __asm__ __volatile__(
    153          "movq %0, %%r14\n"
    154        "\tmovq %1, %%r15\n"
    155        LOAD_XMMREGS_from_r14
    156        "\tmovq %%r15, %%rx\n"
    157        "\t.byte 0x\n"
    158        SAVE_XMMREGS_to_r14
    159           : /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] )
    160                     : /*trash*/ "r14","r15","memory", XMMREGS,
    161                                 "x"
    162      );
    163      after_test( "", regs, mem );
    164    }
    165 #endif
    166 
    167 int main ( void )
    168 {
    169    XMMRegs* regs;
    170    Mem*     mem;
    171    regs = memalign16(sizeof(XMMRegs) + 16);
    172    mem  = memalign16(sizeof(Mem) + 16);
    173 
    174    /* addpd mem, reg   66 49 0f 58 48 00  rex.WB addpd  0x0(%r8),%xmm1 */
    175    {
    176      before_test( regs, mem );
    177      __asm__ __volatile__(
    178          "movq %0, %%r14\n"
    179        "\tmovq %1, %%r15\n"
    180        LOAD_XMMREGS_from_r14
    181        "\tmovq %%r15, %%r8\n"
    182        "\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n"
    183        SAVE_XMMREGS_to_r14
    184           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    185                     : /*trash*/ "r14","r15","memory", XMMREGS,
    186                                 "r8"
    187      );
    188      after_test( "rex.WB addpd  0x0(%r8),%xmm1", regs, mem );
    189    }
    190 
    191    /* addsd mem, reg   f2 48 0f 58 27     rex.W addsd  (%rdi),%xmm4 */
    192    {
    193      before_test( regs, mem );
    194      __asm__ __volatile__(
    195          "movq %0, %%r14\n"
    196        "\tmovq %1, %%r15\n"
    197        LOAD_XMMREGS_from_r14
    198        "\tmovq %%r15, %%rdi\n"
    199        "\t.byte 0xf2,0x48,0x0f,0x58,0x27\n"
    200        SAVE_XMMREGS_to_r14
    201           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    202                     : /*trash*/ "r14","r15","memory", XMMREGS,
    203                                 "rdi"
    204      );
    205      after_test( "rex.W addsd  (%rdi),%xmm4", regs, mem );
    206    }
    207 
    208    /* movapd mem, reg  66 48 0f 28 0a     rex.W movapd (%rdx),%xmm1 */
    209    {
    210      before_test( regs, mem );
    211      __asm__ __volatile__(
    212          "movq %0, %%r14\n"
    213        "\tmovq %1, %%r15\n"
    214        LOAD_XMMREGS_from_r14
    215        "\tmovq %%r15, %%rdx\n"
    216        "\t.byte 0x66,0x48,0x0f,0x28,0x0a\n"
    217        SAVE_XMMREGS_to_r14
    218           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    219                     : /*trash*/ "r14","r15","memory", XMMREGS,
    220                                 "rdx"
    221      );
    222      after_test( "rex.W movapd (%rdx),%xmm1", regs, mem );
    223    }
    224 
    225    /* movapd reg, mem  66 48 0f 29 0a     rex.W movapd %xmm1,(%rdx) */
    226    {
    227      before_test( regs, mem );
    228      __asm__ __volatile__(
    229          "movq %0, %%r14\n"
    230        "\tmovq %1, %%r15\n"
    231        LOAD_XMMREGS_from_r14
    232        "\tmovq %%r15, %%rdx\n"
    233        "\t.byte 0x66,0x48,0x0f,0x29,0x0a\n"
    234        SAVE_XMMREGS_to_r14
    235           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    236                     : /*trash*/ "r14","r15","memory", XMMREGS,
    237                                 "rdx"
    238      );
    239      after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem );
    240    }
    241 
    242    /* movaps mem, reg  48 0f 28 42 30     rex.W movaps 0x30(%rdx),%xmm0 */
    243    {
    244      before_test( regs, mem );
    245      __asm__ __volatile__(
    246          "movq %0, %%r14\n"
    247        "\tmovq %1, %%r15\n"
    248        LOAD_XMMREGS_from_r14
    249        "\tmovq %%r15, %%rdx\n"
    250        "\t.byte 0x48,0x0f,0x28,0x42,0x30\n"
    251        SAVE_XMMREGS_to_r14
    252           : /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] )
    253                     : /*trash*/ "r14","r15","memory", XMMREGS,
    254                                 "rdx"
    255      );
    256      after_test( "movaps 0x30(%rdx),%xmm0", regs, mem );
    257    }
    258 
    259    /* movaps reg, mem  49 0f 29 48 00     rex.WB movaps %xmm1,0x0(%r8) */
    260    {
    261      before_test( regs, mem );
    262      __asm__ __volatile__(
    263          "movq %0, %%r14\n"
    264        "\tmovq %1, %%r15\n"
    265        LOAD_XMMREGS_from_r14
    266        "\tmovq %%r15, %%r8\n"
    267        "\t.byte 0x49,0x0f,0x29,0x48,0x00\n"
    268        SAVE_XMMREGS_to_r14
    269           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    270                     : /*trash*/ "r14","r15","memory", XMMREGS,
    271                                 "r8"
    272      );
    273      after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem );
    274    }
    275 
    276    /* movddup mem, reg f2 48 0f 12 2a     rex.W movddup (%rdx),%xmm5 */
    277    {
    278      before_test( regs, mem );
    279      __asm__ __volatile__(
    280          "movq %0, %%r14\n"
    281        "\tmovq %1, %%r15\n"
    282        LOAD_XMMREGS_from_r14
    283        "\tmovq %%r15, %%rdx\n"
    284        "\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n"
    285        SAVE_XMMREGS_to_r14
    286           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    287                     : /*trash*/ "r14","r15","memory", XMMREGS,
    288                                 "rdx"
    289      );
    290      after_test( "movddup (%rdx),%xmm5", regs, mem );
    291    }
    292 
    293    /* movhpd mem, reg  66 48 0f 16 06     rex.W movhpd (%rsi),%xmm0 */
    294    {
    295      before_test( regs, mem );
    296      __asm__ __volatile__(
    297          "movq %0, %%r14\n"
    298        "\tmovq %1, %%r15\n"
    299        LOAD_XMMREGS_from_r14
    300        "\tmovq %%r15, %%rsi\n"
    301        "\t.byte 0x66,0x48,0x0f,0x16,0x06\n"
    302        SAVE_XMMREGS_to_r14
    303           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    304                     : /*trash*/ "r14","r15","memory", XMMREGS,
    305                                 "rsi"
    306      );
    307      after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem );
    308    }
    309 
    310    /* movhpd reg, mem  66 48 0f 17 07     rex.W movhpd %xmm0,(%rdi) */
    311    {
    312      before_test( regs, mem );
    313      __asm__ __volatile__(
    314          "movq %0, %%r14\n"
    315        "\tmovq %1, %%r15\n"
    316        LOAD_XMMREGS_from_r14
    317        "\tmovq %%r15, %%rdi\n"
    318        "\t.byte 0x66,0x48,0x0f,0x17,0x07\n"
    319        SAVE_XMMREGS_to_r14
    320           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    321                     : /*trash*/ "r14","r15","memory", XMMREGS,
    322                                 "rdi"
    323      );
    324      after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem );
    325    }
    326 
    327    /* movhps mem, reg  48 0f 16 36        rex.W movhps (%rsi),%xmm6 */
    328    {
    329      before_test( regs, mem );
    330      __asm__ __volatile__(
    331          "movq %0, %%r14\n"
    332        "\tmovq %1, %%r15\n"
    333        LOAD_XMMREGS_from_r14
    334        "\tmovq %%r15, %%rsi\n"
    335        "\t.byte 0x48,0x0f,0x16,0x36\n"
    336        SAVE_XMMREGS_to_r14
    337           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    338                     : /*trash*/ "r14","r15","memory", XMMREGS,
    339                                 "rsi"
    340      );
    341      after_test( "rex.W movhps (%rsi),%xmm6", regs, mem );
    342    }
    343    /* movhps reg, mem  49 0f 17 03        rex.WB movhps %xmm0,(%r11) */
    344    {
    345      before_test( regs, mem );
    346      __asm__ __volatile__(
    347          "movq %0, %%r14\n"
    348        "\tmovq %1, %%r15\n"
    349        LOAD_XMMREGS_from_r14
    350        "\tmovq %%r15, %%r11\n"
    351        "\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */
    352        SAVE_XMMREGS_to_r14
    353          : /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] )
    354                     : /*trash*/ "r14","r15","memory", XMMREGS,
    355                                 "r11"
    356      );
    357      after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem );
    358    }
    359 
    360    /* movlpd mem, reg  66 48 0f 12 4a 00  rex.W movlpd 0x0(%rdx),%xmm1 */
    361    {
    362      before_test( regs, mem );
    363      __asm__ __volatile__(
    364          "movq %0, %%r14\n"
    365        "\tmovq %1, %%r15\n"
    366        LOAD_XMMREGS_from_r14
    367        "\tmovq %%r15, %%rdx\n"
    368        "\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n"
    369        SAVE_XMMREGS_to_r14
    370           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    371                     : /*trash*/ "r14","r15","memory", XMMREGS,
    372                                 "rdx"
    373      );
    374      after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem );
    375    }
    376 
    377    /* movlpd reg, mem  66 48 0f 13 30     rex.W movlpd %xmm6,(%rax) */
    378    {
    379      before_test( regs, mem );
    380      __asm__ __volatile__(
    381          "movq %0, %%r14\n"
    382        "\tmovq %1, %%r15\n"
    383        LOAD_XMMREGS_from_r14
    384        "\tmovq %%r15, %%rax\n"
    385        "\t.byte 0x66,0x48,0x0f,0x13,0x30\n"
    386        SAVE_XMMREGS_to_r14
    387           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    388                     : /*trash*/ "r14","r15","memory", XMMREGS,
    389                                 "rax"
    390      );
    391      after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem );
    392    }
    393 
    394    /* movlps mem, reg  48 0f 12 07        rex.W movlps (%rdi),%xmm0 */
    395    {
    396      before_test( regs, mem );
    397      __asm__ __volatile__(
    398          "movq %0, %%r14\n"
    399        "\tmovq %1, %%r15\n"
    400        LOAD_XMMREGS_from_r14
    401        "\tmovq %%r15, %%rdi\n"
    402        "\t.byte 0x48,0x0f,0x12,0x07\n"
    403        SAVE_XMMREGS_to_r14
    404           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    405                     : /*trash*/ "r14","r15","memory", XMMREGS,
    406                                 "rdi"
    407      );
    408      after_test( "rex.W movlps (%rdi),%xmm0", regs, mem );
    409    }
    410 
    411    /* movlps reg, mem  49 0f 13 02        rex.WB movlps %xmm0,(%r10) */
    412    {
    413      before_test( regs, mem );
    414      __asm__ __volatile__(
    415          "movq %0, %%r14\n"
    416        "\tmovq %1, %%r15\n"
    417        LOAD_XMMREGS_from_r14
    418        "\tmovq %%r15, %%r10\n"
    419        "\t.byte 0x49,0x0f,0x13,0x02\n"
    420        SAVE_XMMREGS_to_r14
    421           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    422                     : /*trash*/ "r14","r15","memory", XMMREGS,
    423                                 "r10"
    424      );
    425      after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem );
    426    }
    427 
    428    /* movq mem, reg    f3 48 0f 7e 00     rex.W movq   (%rax),%xmm0 */
    429    {
    430      before_test( regs, mem );
    431      __asm__ __volatile__(
    432          "movq %0, %%r14\n"
    433        "\tmovq %1, %%r15\n"
    434        LOAD_XMMREGS_from_r14
    435        "\tmovq %%r15, %%rax\n"
    436        "\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n"
    437        SAVE_XMMREGS_to_r14
    438           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    439                     : /*trash*/ "r14","r15","memory", XMMREGS,
    440                                 "rax"
    441      );
    442      after_test( "rex.W movq (%rax),%xmm0", regs, mem );
    443    }
    444 
    445    /* movq reg, mem    66 48 0f d6 00     rex.W movq   %xmm0,(%rax) */
    446    {
    447      before_test( regs, mem );
    448      __asm__ __volatile__(
    449          "movq %0, %%r14\n"
    450        "\tmovq %1, %%r15\n"
    451        LOAD_XMMREGS_from_r14
    452        "\tmovq %%r15, %%rax\n"
    453        "\t.byte 0x66,0x48,0x0f,0xd6,0x00\n"
    454        SAVE_XMMREGS_to_r14
    455           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    456                     : /*trash*/ "r14","r15","memory", XMMREGS,
    457                                 "rax"
    458      );
    459      after_test( "rex.W movq %xmm0,(%rax)", regs, mem );
    460    }
    461 
    462    /* movsd mem, reg   f2 48 0f 10 11     rex.W movsd  (%rcx),%xmm2 */
    463    {
    464      before_test( regs, mem );
    465      __asm__ __volatile__(
    466          "movq %0, %%r14\n"
    467        "\tmovq %1, %%r15\n"
    468        LOAD_XMMREGS_from_r14
    469        "\tmovq %%r15, %%rcx\n"
    470        "\t.byte 0xf2,0x48,0x0f,0x10,0x11\n"
    471        SAVE_XMMREGS_to_r14
    472           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    473                     : /*trash*/ "r14","r15","memory", XMMREGS,
    474                                 "rcx"
    475      );
    476      after_test( "rex.W movsd (%rcx),%xmm2", regs, mem );
    477    }
    478 
    479    /* movsd reg, mem   f2 48 0f 11 3f     rex.W movsd  %xmm7,(%rdi) */
    480    {
    481      before_test( regs, mem );
    482      __asm__ __volatile__(
    483          "movq %0, %%r14\n"
    484        "\tmovq %1, %%r15\n"
    485        LOAD_XMMREGS_from_r14
    486        "\tmovq %%r15, %%rdi\n"
    487        "\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n"
    488        SAVE_XMMREGS_to_r14
    489           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    490                     : /*trash*/ "r14","r15","memory", XMMREGS,
    491                                 "rdi"
    492      );
    493      after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem );
    494    }
    495 
    496    /* movss mem, reg   f3 48 0f 10 5e 04  rex.W movss  0x4(%rsi),%xmm3 */
    497    {
    498      before_test( regs, mem );
    499      __asm__ __volatile__(
    500          "movq %0, %%r14\n"
    501        "\tmovq %1, %%r15\n"
    502        LOAD_XMMREGS_from_r14
    503        "\tmovq %%r15, %%rsi\n"
    504        "\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n"
    505        SAVE_XMMREGS_to_r14
    506           : /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] )
    507                     : /*trash*/ "r14","r15","memory", XMMREGS,
    508                                 "rsi"
    509      );
    510      after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem );
    511    }
    512 
    513    /* movupd reg, mem  66 48 0f 11 07     rex.W movupd %xmm0,(%rdi) */
    514    {
    515      before_test( regs, mem );
    516      __asm__ __volatile__(
    517          "movq %0, %%r14\n"
    518        "\tmovq %1, %%r15\n"
    519        LOAD_XMMREGS_from_r14
    520        "\tmovq %%r15, %%rdi\n"
    521        "\t.byte 0x66,0x48,0x0f,0x11,0x07\n"
    522        SAVE_XMMREGS_to_r14
    523           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    524                     : /*trash*/ "r14","r15","memory", XMMREGS,
    525                                 "rdi"
    526      );
    527      after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem );
    528    }
    529 
    530    /* mulpd mem, reg   66 48 0f 59 61 00  rex.W mulpd  0x0(%rcx),%xmm4 */
    531    {
    532      before_test( regs, mem );
    533      __asm__ __volatile__(
    534          "movq %0, %%r14\n"
    535        "\tmovq %1, %%r15\n"
    536        LOAD_XMMREGS_from_r14
    537        "\tmovq %%r15, %%rcx\n"
    538        "\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n"
    539        SAVE_XMMREGS_to_r14
    540           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    541                     : /*trash*/ "r14","r15","memory", XMMREGS,
    542                                 "rcx"
    543      );
    544      after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem );
    545    }
    546 
    547    /* mulsd mem, reg   f2 48 0f 59 1f     rex.W mulsd  (%rdi),%xmm3 */
    548    {
    549      before_test( regs, mem );
    550      __asm__ __volatile__(
    551          "movq %0, %%r14\n"
    552        "\tmovq %1, %%r15\n"
    553        LOAD_XMMREGS_from_r14
    554        "\tmovq %%r15, %%rdi\n"
    555        "\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n"
    556        SAVE_XMMREGS_to_r14
    557           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
    558                     : /*trash*/ "r14","r15","memory", XMMREGS,
    559                                 "rdi"
    560      );
    561      after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem );
    562    }
    563 
    564    /* prefetchnt0    49 0f 18 4c f2 a0  rex.WB prefetcht0 -0x60(%r10,%rsi,8) */
    565    {
    566      before_test( regs, mem );
    567      __asm__ __volatile__(
    568          "movq %0, %%r14\n"
    569        "\tmovq %1, %%r15\n"
    570        LOAD_XMMREGS_from_r14
    571        "\tmovq %%r15, %%r10\n"
    572        "\txorq %%rsi, %%rsi\n"
    573        "\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n"
    574        SAVE_XMMREGS_to_r14
    575           : /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] )
    576                     : /*trash*/ "r14","r15","memory", XMMREGS,
    577                                 "r10","rsi"
    578      );
    579      after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem );
    580    }
    581 
    582    /* subsd mem, reg   f2 49 0f 5c 4d f8  rex.WB subsd  -0x8(%r13),%xmm1 */
    583    {
    584      before_test( regs, mem );
    585      __asm__ __volatile__(
    586          "movq %0, %%r14\n"
    587        "\tmovq %1, %%r15\n"
    588        LOAD_XMMREGS_from_r14
    589        "\tmovq %%r15, %%r13\n"
    590        "\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n"
    591        SAVE_XMMREGS_to_r14
    592           : /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] )
    593                     : /*trash*/ "r14","r15","memory", XMMREGS,
    594                                 "r13"
    595      );
    596      after_test( "rex.WB subsd  -0x8(%r13),%xmm1", regs, mem );
    597    }
    598 
    599    free(regs);
    600    free(mem);
    601    return 0;
    602 }
    603