1 2 /* Test for a number of SSE instructions which were seen in the wild 3 with a bogus (irrelevant) REX.W bit in their prefixes. Some just 4 have REX = 0x48 where REX.W is irrelevant, hence the whole REX 5 prefix is pointless. Probably related to #133962. */ 6 7 #include <stdlib.h> 8 #include <stdio.h> 9 #include <assert.h> 10 #include "tests/malloc.h" 11 12 typedef unsigned char UChar; 13 14 typedef 15 struct { __attribute__((aligned(16))) UChar b[16]; } 16 UWord128; 17 18 typedef 19 struct { UWord128 reg[16]; } 20 XMMRegs; 21 22 typedef 23 struct { UWord128 dqw[5]; } 24 Mem; 25 26 void pp_UWord128 ( UWord128* w ) { 27 int i; 28 char buf[3]; 29 for (i = 15; i >= 0; i--) { 30 buf[2] = 0; 31 sprintf(buf, "%02x", (unsigned int)w->b[i]); 32 assert(buf[2] == 0); 33 if (buf[0] == '0') buf[0] = '.'; 34 if (buf[1] == '0') buf[1] = '.'; 35 printf("%s", buf); 36 } 37 } 38 39 void pp_XMMRegs ( char* who, XMMRegs* regs ) { 40 int i; 41 printf ("%s (xmms in order [15..0]) {\n", who ); 42 for (i = 0; i < 16; i++) { 43 printf(" %%xmm%2d ", i); 44 pp_UWord128( ®s->reg[i] ); 45 printf("\n"); 46 } 47 printf("}\n"); 48 } 49 50 void pp_Mem ( char* who, Mem* mem ) { 51 int i; 52 printf ("%s (dqws in order [15 .. 0]) {\n", who ); 53 for (i = 0; i < 5; i++) { 54 printf(" [%d] ", i); 55 pp_UWord128( &mem->dqw[i] ); 56 printf("\n"); 57 } 58 printf("}\n"); 59 } 60 61 void xor_UWord128( UWord128* src, UWord128* dst ) { 62 int i; 63 for (i = 0; i < 16; i++) 64 dst->b[i] ^= src->b[i]; 65 } 66 void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) { 67 int i; 68 for (i = 0; i < 16; i++) 69 xor_UWord128( &src->reg[i], &dst->reg[i] ); 70 } 71 72 void xor_Mem ( Mem* src, Mem* dst ) { 73 int i; 74 for (i = 0; i < 5; i++) 75 xor_UWord128( &src->dqw[i], &dst->dqw[i] ); 76 } 77 78 void setup_regs_mem ( XMMRegs* regs, Mem* mem ) { 79 int ctr, i, j; 80 ctr = 0; 81 for (i = 0; i < 16; i++) { 82 for (j = 0; j < 16; j++) 83 regs->reg[i].b[j] = 0x51 + (ctr++ % 7); 84 } 85 for (i = 0; i < 5; i++) { 86 for (j = 0; j < 16; j++) 87 mem->dqw[i].b[j] = 0x52 + (ctr++ % 13); 88 } 89 } 90 91 void before_test ( XMMRegs* regs, Mem* mem ) { 92 setup_regs_mem( regs, mem ); 93 } 94 95 void after_test ( char* who, XMMRegs* regs, Mem* mem ) { 96 XMMRegs rdiff; 97 Mem mdiff; 98 char s[128]; 99 setup_regs_mem( &rdiff, &mdiff ); 100 xor_XMMRegs( regs, &rdiff ); 101 xor_Mem( mem, &mdiff ); 102 sprintf(s, "after \"%s\"", who ); 103 pp_Mem( s, &mdiff ); 104 pp_XMMRegs( s, &rdiff ); 105 printf("\n"); 106 } 107 108 #define LOAD_XMMREGS_from_r14 \ 109 "\tmovupd 0(%%r14), %%xmm0\n" \ 110 "\tmovupd 16(%%r14), %%xmm1\n" \ 111 "\tmovupd 32(%%r14), %%xmm2\n" \ 112 "\tmovupd 48(%%r14), %%xmm3\n" \ 113 "\tmovupd 64(%%r14), %%xmm4\n" \ 114 "\tmovupd 80(%%r14), %%xmm5\n" \ 115 "\tmovupd 96(%%r14), %%xmm6\n" \ 116 "\tmovupd 112(%%r14), %%xmm7\n" \ 117 "\tmovupd 128(%%r14), %%xmm8\n" \ 118 "\tmovupd 144(%%r14), %%xmm9\n" \ 119 "\tmovupd 160(%%r14), %%xmm10\n" \ 120 "\tmovupd 176(%%r14), %%xmm11\n" \ 121 "\tmovupd 192(%%r14), %%xmm12\n" \ 122 "\tmovupd 208(%%r14), %%xmm13\n" \ 123 "\tmovupd 224(%%r14), %%xmm14\n" \ 124 "\tmovupd 240(%%r14), %%xmm15\n" 125 126 #define SAVE_XMMREGS_to_r14 \ 127 "\tmovupd %%xmm0, 0(%%r14)\n" \ 128 "\tmovupd %%xmm1, 16(%%r14)\n" \ 129 "\tmovupd %%xmm2, 32(%%r14)\n" \ 130 "\tmovupd %%xmm3, 48(%%r14)\n" \ 131 "\tmovupd %%xmm4, 64(%%r14)\n" \ 132 "\tmovupd %%xmm5, 80(%%r14)\n" \ 133 "\tmovupd %%xmm6, 96(%%r14)\n" \ 134 "\tmovupd %%xmm7, 112(%%r14)\n" \ 135 "\tmovupd %%xmm8, 128(%%r14)\n" \ 136 "\tmovupd %%xmm9, 144(%%r14)\n" \ 137 "\tmovupd %%xmm10, 160(%%r14)\n" \ 138 "\tmovupd %%xmm11, 176(%%r14)\n" \ 139 "\tmovupd %%xmm12, 192(%%r14)\n" \ 140 "\tmovupd %%xmm13, 208(%%r14)\n" \ 141 "\tmovupd %%xmm14, 224(%%r14)\n" \ 142 "\tmovupd %%xmm15, 240(%%r14)" 143 144 #define XMMREGS \ 145 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \ 146 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" 147 148 #if 0 149 /* Boilerplate for test */ 150 { 151 before_test( regs, mem ); 152 __asm__ __volatile__( 153 "movq %0, %%r14\n" 154 "\tmovq %1, %%r15\n" 155 LOAD_XMMREGS_from_r14 156 "\tmovq %%r15, %%rx\n" 157 "\t.byte 0x\n" 158 SAVE_XMMREGS_to_r14 159 : /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] ) 160 : /*trash*/ "r14","r15","memory", XMMREGS, 161 "x" 162 ); 163 after_test( "", regs, mem ); 164 } 165 #endif 166 167 int main ( void ) 168 { 169 XMMRegs* regs; 170 Mem* mem; 171 regs = memalign16(sizeof(XMMRegs) + 16); 172 mem = memalign16(sizeof(Mem) + 16); 173 174 /* addpd mem, reg 66 49 0f 58 48 00 rex.WB addpd 0x0(%r8),%xmm1 */ 175 { 176 before_test( regs, mem ); 177 __asm__ __volatile__( 178 "movq %0, %%r14\n" 179 "\tmovq %1, %%r15\n" 180 LOAD_XMMREGS_from_r14 181 "\tmovq %%r15, %%r8\n" 182 "\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n" 183 SAVE_XMMREGS_to_r14 184 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 185 : /*trash*/ "r14","r15","memory", XMMREGS, 186 "r8" 187 ); 188 after_test( "rex.WB addpd 0x0(%r8),%xmm1", regs, mem ); 189 } 190 191 /* addsd mem, reg f2 48 0f 58 27 rex.W addsd (%rdi),%xmm4 */ 192 { 193 before_test( regs, mem ); 194 __asm__ __volatile__( 195 "movq %0, %%r14\n" 196 "\tmovq %1, %%r15\n" 197 LOAD_XMMREGS_from_r14 198 "\tmovq %%r15, %%rdi\n" 199 "\t.byte 0xf2,0x48,0x0f,0x58,0x27\n" 200 SAVE_XMMREGS_to_r14 201 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 202 : /*trash*/ "r14","r15","memory", XMMREGS, 203 "rdi" 204 ); 205 after_test( "rex.W addsd (%rdi),%xmm4", regs, mem ); 206 } 207 208 /* movapd mem, reg 66 48 0f 28 0a rex.W movapd (%rdx),%xmm1 */ 209 { 210 before_test( regs, mem ); 211 __asm__ __volatile__( 212 "movq %0, %%r14\n" 213 "\tmovq %1, %%r15\n" 214 LOAD_XMMREGS_from_r14 215 "\tmovq %%r15, %%rdx\n" 216 "\t.byte 0x66,0x48,0x0f,0x28,0x0a\n" 217 SAVE_XMMREGS_to_r14 218 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 219 : /*trash*/ "r14","r15","memory", XMMREGS, 220 "rdx" 221 ); 222 after_test( "rex.W movapd (%rdx),%xmm1", regs, mem ); 223 } 224 225 /* movapd reg, mem 66 48 0f 29 0a rex.W movapd %xmm1,(%rdx) */ 226 { 227 before_test( regs, mem ); 228 __asm__ __volatile__( 229 "movq %0, %%r14\n" 230 "\tmovq %1, %%r15\n" 231 LOAD_XMMREGS_from_r14 232 "\tmovq %%r15, %%rdx\n" 233 "\t.byte 0x66,0x48,0x0f,0x29,0x0a\n" 234 SAVE_XMMREGS_to_r14 235 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 236 : /*trash*/ "r14","r15","memory", XMMREGS, 237 "rdx" 238 ); 239 after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem ); 240 } 241 242 /* movaps mem, reg 48 0f 28 42 30 rex.W movaps 0x30(%rdx),%xmm0 */ 243 { 244 before_test( regs, mem ); 245 __asm__ __volatile__( 246 "movq %0, %%r14\n" 247 "\tmovq %1, %%r15\n" 248 LOAD_XMMREGS_from_r14 249 "\tmovq %%r15, %%rdx\n" 250 "\t.byte 0x48,0x0f,0x28,0x42,0x30\n" 251 SAVE_XMMREGS_to_r14 252 : /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] ) 253 : /*trash*/ "r14","r15","memory", XMMREGS, 254 "rdx" 255 ); 256 after_test( "movaps 0x30(%rdx),%xmm0", regs, mem ); 257 } 258 259 /* movaps reg, mem 49 0f 29 48 00 rex.WB movaps %xmm1,0x0(%r8) */ 260 { 261 before_test( regs, mem ); 262 __asm__ __volatile__( 263 "movq %0, %%r14\n" 264 "\tmovq %1, %%r15\n" 265 LOAD_XMMREGS_from_r14 266 "\tmovq %%r15, %%r8\n" 267 "\t.byte 0x49,0x0f,0x29,0x48,0x00\n" 268 SAVE_XMMREGS_to_r14 269 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 270 : /*trash*/ "r14","r15","memory", XMMREGS, 271 "r8" 272 ); 273 after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem ); 274 } 275 276 /* movddup mem, reg f2 48 0f 12 2a rex.W movddup (%rdx),%xmm5 */ 277 { 278 before_test( regs, mem ); 279 __asm__ __volatile__( 280 "movq %0, %%r14\n" 281 "\tmovq %1, %%r15\n" 282 LOAD_XMMREGS_from_r14 283 "\tmovq %%r15, %%rdx\n" 284 "\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n" 285 SAVE_XMMREGS_to_r14 286 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 287 : /*trash*/ "r14","r15","memory", XMMREGS, 288 "rdx" 289 ); 290 after_test( "movddup (%rdx),%xmm5", regs, mem ); 291 } 292 293 /* movhpd mem, reg 66 48 0f 16 06 rex.W movhpd (%rsi),%xmm0 */ 294 { 295 before_test( regs, mem ); 296 __asm__ __volatile__( 297 "movq %0, %%r14\n" 298 "\tmovq %1, %%r15\n" 299 LOAD_XMMREGS_from_r14 300 "\tmovq %%r15, %%rsi\n" 301 "\t.byte 0x66,0x48,0x0f,0x16,0x06\n" 302 SAVE_XMMREGS_to_r14 303 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 304 : /*trash*/ "r14","r15","memory", XMMREGS, 305 "rsi" 306 ); 307 after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem ); 308 } 309 310 /* movhpd reg, mem 66 48 0f 17 07 rex.W movhpd %xmm0,(%rdi) */ 311 { 312 before_test( regs, mem ); 313 __asm__ __volatile__( 314 "movq %0, %%r14\n" 315 "\tmovq %1, %%r15\n" 316 LOAD_XMMREGS_from_r14 317 "\tmovq %%r15, %%rdi\n" 318 "\t.byte 0x66,0x48,0x0f,0x17,0x07\n" 319 SAVE_XMMREGS_to_r14 320 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 321 : /*trash*/ "r14","r15","memory", XMMREGS, 322 "rdi" 323 ); 324 after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem ); 325 } 326 327 /* movhps mem, reg 48 0f 16 36 rex.W movhps (%rsi),%xmm6 */ 328 { 329 before_test( regs, mem ); 330 __asm__ __volatile__( 331 "movq %0, %%r14\n" 332 "\tmovq %1, %%r15\n" 333 LOAD_XMMREGS_from_r14 334 "\tmovq %%r15, %%rsi\n" 335 "\t.byte 0x48,0x0f,0x16,0x36\n" 336 SAVE_XMMREGS_to_r14 337 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 338 : /*trash*/ "r14","r15","memory", XMMREGS, 339 "rsi" 340 ); 341 after_test( "rex.W movhps (%rsi),%xmm6", regs, mem ); 342 } 343 /* movhps reg, mem 49 0f 17 03 rex.WB movhps %xmm0,(%r11) */ 344 { 345 before_test( regs, mem ); 346 __asm__ __volatile__( 347 "movq %0, %%r14\n" 348 "\tmovq %1, %%r15\n" 349 LOAD_XMMREGS_from_r14 350 "\tmovq %%r15, %%r11\n" 351 "\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */ 352 SAVE_XMMREGS_to_r14 353 : /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] ) 354 : /*trash*/ "r14","r15","memory", XMMREGS, 355 "r11" 356 ); 357 after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem ); 358 } 359 360 /* movlpd mem, reg 66 48 0f 12 4a 00 rex.W movlpd 0x0(%rdx),%xmm1 */ 361 { 362 before_test( regs, mem ); 363 __asm__ __volatile__( 364 "movq %0, %%r14\n" 365 "\tmovq %1, %%r15\n" 366 LOAD_XMMREGS_from_r14 367 "\tmovq %%r15, %%rdx\n" 368 "\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n" 369 SAVE_XMMREGS_to_r14 370 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 371 : /*trash*/ "r14","r15","memory", XMMREGS, 372 "rdx" 373 ); 374 after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem ); 375 } 376 377 /* movlpd reg, mem 66 48 0f 13 30 rex.W movlpd %xmm6,(%rax) */ 378 { 379 before_test( regs, mem ); 380 __asm__ __volatile__( 381 "movq %0, %%r14\n" 382 "\tmovq %1, %%r15\n" 383 LOAD_XMMREGS_from_r14 384 "\tmovq %%r15, %%rax\n" 385 "\t.byte 0x66,0x48,0x0f,0x13,0x30\n" 386 SAVE_XMMREGS_to_r14 387 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 388 : /*trash*/ "r14","r15","memory", XMMREGS, 389 "rax" 390 ); 391 after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem ); 392 } 393 394 /* movlps mem, reg 48 0f 12 07 rex.W movlps (%rdi),%xmm0 */ 395 { 396 before_test( regs, mem ); 397 __asm__ __volatile__( 398 "movq %0, %%r14\n" 399 "\tmovq %1, %%r15\n" 400 LOAD_XMMREGS_from_r14 401 "\tmovq %%r15, %%rdi\n" 402 "\t.byte 0x48,0x0f,0x12,0x07\n" 403 SAVE_XMMREGS_to_r14 404 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 405 : /*trash*/ "r14","r15","memory", XMMREGS, 406 "rdi" 407 ); 408 after_test( "rex.W movlps (%rdi),%xmm0", regs, mem ); 409 } 410 411 /* movlps reg, mem 49 0f 13 02 rex.WB movlps %xmm0,(%r10) */ 412 { 413 before_test( regs, mem ); 414 __asm__ __volatile__( 415 "movq %0, %%r14\n" 416 "\tmovq %1, %%r15\n" 417 LOAD_XMMREGS_from_r14 418 "\tmovq %%r15, %%r10\n" 419 "\t.byte 0x49,0x0f,0x13,0x02\n" 420 SAVE_XMMREGS_to_r14 421 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 422 : /*trash*/ "r14","r15","memory", XMMREGS, 423 "r10" 424 ); 425 after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem ); 426 } 427 428 /* movq mem, reg f3 48 0f 7e 00 rex.W movq (%rax),%xmm0 */ 429 { 430 before_test( regs, mem ); 431 __asm__ __volatile__( 432 "movq %0, %%r14\n" 433 "\tmovq %1, %%r15\n" 434 LOAD_XMMREGS_from_r14 435 "\tmovq %%r15, %%rax\n" 436 "\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n" 437 SAVE_XMMREGS_to_r14 438 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 439 : /*trash*/ "r14","r15","memory", XMMREGS, 440 "rax" 441 ); 442 after_test( "rex.W movq (%rax),%xmm0", regs, mem ); 443 } 444 445 /* movq reg, mem 66 48 0f d6 00 rex.W movq %xmm0,(%rax) */ 446 { 447 before_test( regs, mem ); 448 __asm__ __volatile__( 449 "movq %0, %%r14\n" 450 "\tmovq %1, %%r15\n" 451 LOAD_XMMREGS_from_r14 452 "\tmovq %%r15, %%rax\n" 453 "\t.byte 0x66,0x48,0x0f,0xd6,0x00\n" 454 SAVE_XMMREGS_to_r14 455 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 456 : /*trash*/ "r14","r15","memory", XMMREGS, 457 "rax" 458 ); 459 after_test( "rex.W movq %xmm0,(%rax)", regs, mem ); 460 } 461 462 /* movsd mem, reg f2 48 0f 10 11 rex.W movsd (%rcx),%xmm2 */ 463 { 464 before_test( regs, mem ); 465 __asm__ __volatile__( 466 "movq %0, %%r14\n" 467 "\tmovq %1, %%r15\n" 468 LOAD_XMMREGS_from_r14 469 "\tmovq %%r15, %%rcx\n" 470 "\t.byte 0xf2,0x48,0x0f,0x10,0x11\n" 471 SAVE_XMMREGS_to_r14 472 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 473 : /*trash*/ "r14","r15","memory", XMMREGS, 474 "rcx" 475 ); 476 after_test( "rex.W movsd (%rcx),%xmm2", regs, mem ); 477 } 478 479 /* movsd reg, mem f2 48 0f 11 3f rex.W movsd %xmm7,(%rdi) */ 480 { 481 before_test( regs, mem ); 482 __asm__ __volatile__( 483 "movq %0, %%r14\n" 484 "\tmovq %1, %%r15\n" 485 LOAD_XMMREGS_from_r14 486 "\tmovq %%r15, %%rdi\n" 487 "\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n" 488 SAVE_XMMREGS_to_r14 489 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 490 : /*trash*/ "r14","r15","memory", XMMREGS, 491 "rdi" 492 ); 493 after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem ); 494 } 495 496 /* movss mem, reg f3 48 0f 10 5e 04 rex.W movss 0x4(%rsi),%xmm3 */ 497 { 498 before_test( regs, mem ); 499 __asm__ __volatile__( 500 "movq %0, %%r14\n" 501 "\tmovq %1, %%r15\n" 502 LOAD_XMMREGS_from_r14 503 "\tmovq %%r15, %%rsi\n" 504 "\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n" 505 SAVE_XMMREGS_to_r14 506 : /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] ) 507 : /*trash*/ "r14","r15","memory", XMMREGS, 508 "rsi" 509 ); 510 after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem ); 511 } 512 513 /* movupd reg, mem 66 48 0f 11 07 rex.W movupd %xmm0,(%rdi) */ 514 { 515 before_test( regs, mem ); 516 __asm__ __volatile__( 517 "movq %0, %%r14\n" 518 "\tmovq %1, %%r15\n" 519 LOAD_XMMREGS_from_r14 520 "\tmovq %%r15, %%rdi\n" 521 "\t.byte 0x66,0x48,0x0f,0x11,0x07\n" 522 SAVE_XMMREGS_to_r14 523 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 524 : /*trash*/ "r14","r15","memory", XMMREGS, 525 "rdi" 526 ); 527 after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem ); 528 } 529 530 /* mulpd mem, reg 66 48 0f 59 61 00 rex.W mulpd 0x0(%rcx),%xmm4 */ 531 { 532 before_test( regs, mem ); 533 __asm__ __volatile__( 534 "movq %0, %%r14\n" 535 "\tmovq %1, %%r15\n" 536 LOAD_XMMREGS_from_r14 537 "\tmovq %%r15, %%rcx\n" 538 "\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n" 539 SAVE_XMMREGS_to_r14 540 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 541 : /*trash*/ "r14","r15","memory", XMMREGS, 542 "rcx" 543 ); 544 after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem ); 545 } 546 547 /* mulsd mem, reg f2 48 0f 59 1f rex.W mulsd (%rdi),%xmm3 */ 548 { 549 before_test( regs, mem ); 550 __asm__ __volatile__( 551 "movq %0, %%r14\n" 552 "\tmovq %1, %%r15\n" 553 LOAD_XMMREGS_from_r14 554 "\tmovq %%r15, %%rdi\n" 555 "\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n" 556 SAVE_XMMREGS_to_r14 557 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] ) 558 : /*trash*/ "r14","r15","memory", XMMREGS, 559 "rdi" 560 ); 561 after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem ); 562 } 563 564 /* prefetchnt0 49 0f 18 4c f2 a0 rex.WB prefetcht0 -0x60(%r10,%rsi,8) */ 565 { 566 before_test( regs, mem ); 567 __asm__ __volatile__( 568 "movq %0, %%r14\n" 569 "\tmovq %1, %%r15\n" 570 LOAD_XMMREGS_from_r14 571 "\tmovq %%r15, %%r10\n" 572 "\txorq %%rsi, %%rsi\n" 573 "\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n" 574 SAVE_XMMREGS_to_r14 575 : /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] ) 576 : /*trash*/ "r14","r15","memory", XMMREGS, 577 "r10","rsi" 578 ); 579 after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem ); 580 } 581 582 /* subsd mem, reg f2 49 0f 5c 4d f8 rex.WB subsd -0x8(%r13),%xmm1 */ 583 { 584 before_test( regs, mem ); 585 __asm__ __volatile__( 586 "movq %0, %%r14\n" 587 "\tmovq %1, %%r15\n" 588 LOAD_XMMREGS_from_r14 589 "\tmovq %%r15, %%r13\n" 590 "\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n" 591 SAVE_XMMREGS_to_r14 592 : /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] ) 593 : /*trash*/ "r14","r15","memory", XMMREGS, 594 "r13" 595 ); 596 after_test( "rex.WB subsd -0x8(%r13),%xmm1", regs, mem ); 597 } 598 599 free(regs); 600 free(mem); 601 return 0; 602 } 603