1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 6 ; X86-LABEL: test_pavgusb: 7 ; X86: # %bb.0: # %entry 8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X86-NEXT: pavgusb %mm1, %mm0 10 ; X86-NEXT: movq %mm0, (%eax) 11 ; X86-NEXT: retl $4 12 ; 13 ; X64-LABEL: test_pavgusb: 14 ; X64: # %bb.0: # %entry 15 ; X64-NEXT: pavgusb %mm1, %mm0 16 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 17 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 18 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 19 ; X64-NEXT: retq 20 entry: 21 %0 = bitcast x86_mmx %a.coerce to <8 x i8> 22 %1 = bitcast x86_mmx %b.coerce to <8 x i8> 23 %2 = bitcast <8 x i8> %0 to x86_mmx 24 %3 = bitcast <8 x i8> %1 to x86_mmx 25 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3) 26 %5 = bitcast x86_mmx %4 to <8 x i8> 27 ret <8 x i8> %5 28 } 29 30 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone 31 32 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { 33 ; X86-LABEL: test_pf2id: 34 ; X86: # %bb.0: # %entry 35 ; X86-NEXT: pushl %ebp 36 ; X86-NEXT: movl %esp, %ebp 37 ; X86-NEXT: andl $-8, %esp 38 ; X86-NEXT: subl $8, %esp 39 ; X86-NEXT: movd 12(%ebp), %mm0 40 ; X86-NEXT: movd 8(%ebp), %mm1 41 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 42 ; X86-NEXT: pf2id %mm1, %mm0 43 ; X86-NEXT: movq %mm0, (%esp) 44 ; X86-NEXT: movl (%esp), %eax 45 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 46 ; X86-NEXT: movl %ebp, %esp 47 ; X86-NEXT: popl %ebp 48 ; X86-NEXT: retl 49 ; 50 ; X64-LABEL: test_pf2id: 51 ; X64: # %bb.0: # %entry 52 ; X64-NEXT: movdq2q %xmm0, %mm0 53 ; X64-NEXT: pf2id %mm0, %mm0 54 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 55 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 56 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 57 ; X64-NEXT: retq 58 entry: 59 %0 = bitcast <2 x float> %a to x86_mmx 60 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) 61 %2 = bitcast x86_mmx %1 to <2 x i32> 62 ret <2 x i32> %2 63 } 64 65 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone 66 67 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 68 ; X86-LABEL: test_pfacc: 69 ; X86: # %bb.0: # %entry 70 ; X86-NEXT: pushl %ebp 71 ; X86-NEXT: movl %esp, %ebp 72 ; X86-NEXT: andl $-8, %esp 73 ; X86-NEXT: subl $8, %esp 74 ; X86-NEXT: movd 20(%ebp), %mm0 75 ; X86-NEXT: movd 16(%ebp), %mm1 76 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 77 ; X86-NEXT: movd 12(%ebp), %mm0 78 ; X86-NEXT: movd 8(%ebp), %mm2 79 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 80 ; X86-NEXT: pfacc %mm1, %mm2 81 ; X86-NEXT: movq %mm2, (%esp) 82 ; X86-NEXT: flds {{[0-9]+}}(%esp) 83 ; X86-NEXT: flds (%esp) 84 ; X86-NEXT: movl %ebp, %esp 85 ; X86-NEXT: popl %ebp 86 ; X86-NEXT: retl 87 ; 88 ; X64-LABEL: test_pfacc: 89 ; X64: # %bb.0: # %entry 90 ; X64-NEXT: movdq2q %xmm1, %mm0 91 ; X64-NEXT: movdq2q %xmm0, %mm1 92 ; X64-NEXT: pfacc %mm0, %mm1 93 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 94 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 95 ; X64-NEXT: retq 96 entry: 97 %0 = bitcast <2 x float> %a to x86_mmx 98 %1 = bitcast <2 x float> %b to x86_mmx 99 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1) 100 %3 = bitcast x86_mmx %2 to <2 x float> 101 ret <2 x float> %3 102 } 103 104 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone 105 106 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { 107 ; X86-LABEL: test_pfadd: 108 ; X86: # %bb.0: # %entry 109 ; X86-NEXT: pushl %ebp 110 ; X86-NEXT: movl %esp, %ebp 111 ; X86-NEXT: andl $-8, %esp 112 ; X86-NEXT: subl $8, %esp 113 ; X86-NEXT: movd 20(%ebp), %mm0 114 ; X86-NEXT: movd 16(%ebp), %mm1 115 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 116 ; X86-NEXT: movd 12(%ebp), %mm0 117 ; X86-NEXT: movd 8(%ebp), %mm2 118 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 119 ; X86-NEXT: pfadd %mm1, %mm2 120 ; X86-NEXT: movq %mm2, (%esp) 121 ; X86-NEXT: flds {{[0-9]+}}(%esp) 122 ; X86-NEXT: flds (%esp) 123 ; X86-NEXT: movl %ebp, %esp 124 ; X86-NEXT: popl %ebp 125 ; X86-NEXT: retl 126 ; 127 ; X64-LABEL: test_pfadd: 128 ; X64: # %bb.0: # %entry 129 ; X64-NEXT: movdq2q %xmm1, %mm0 130 ; X64-NEXT: movdq2q %xmm0, %mm1 131 ; X64-NEXT: pfadd %mm0, %mm1 132 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 133 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 134 ; X64-NEXT: retq 135 entry: 136 %0 = bitcast <2 x float> %a to x86_mmx 137 %1 = bitcast <2 x float> %b to x86_mmx 138 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1) 139 %3 = bitcast x86_mmx %2 to <2 x float> 140 ret <2 x float> %3 141 } 142 143 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone 144 145 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { 146 ; X86-LABEL: test_pfcmpeq: 147 ; X86: # %bb.0: # %entry 148 ; X86-NEXT: pushl %ebp 149 ; X86-NEXT: movl %esp, %ebp 150 ; X86-NEXT: andl $-8, %esp 151 ; X86-NEXT: subl $8, %esp 152 ; X86-NEXT: movd 20(%ebp), %mm0 153 ; X86-NEXT: movd 16(%ebp), %mm1 154 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 155 ; X86-NEXT: movd 12(%ebp), %mm0 156 ; X86-NEXT: movd 8(%ebp), %mm2 157 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 158 ; X86-NEXT: pfcmpeq %mm1, %mm2 159 ; X86-NEXT: movq %mm2, (%esp) 160 ; X86-NEXT: movl (%esp), %eax 161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 162 ; X86-NEXT: movl %ebp, %esp 163 ; X86-NEXT: popl %ebp 164 ; X86-NEXT: retl 165 ; 166 ; X64-LABEL: test_pfcmpeq: 167 ; X64: # %bb.0: # %entry 168 ; X64-NEXT: movdq2q %xmm1, %mm0 169 ; X64-NEXT: movdq2q %xmm0, %mm1 170 ; X64-NEXT: pfcmpeq %mm0, %mm1 171 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 172 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 173 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 174 ; X64-NEXT: retq 175 entry: 176 %0 = bitcast <2 x float> %a to x86_mmx 177 %1 = bitcast <2 x float> %b to x86_mmx 178 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1) 179 %3 = bitcast x86_mmx %2 to <2 x i32> 180 ret <2 x i32> %3 181 } 182 183 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone 184 185 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { 186 ; X86-LABEL: test_pfcmpge: 187 ; X86: # %bb.0: # %entry 188 ; X86-NEXT: pushl %ebp 189 ; X86-NEXT: movl %esp, %ebp 190 ; X86-NEXT: andl $-8, %esp 191 ; X86-NEXT: subl $8, %esp 192 ; X86-NEXT: movd 20(%ebp), %mm0 193 ; X86-NEXT: movd 16(%ebp), %mm1 194 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 195 ; X86-NEXT: movd 12(%ebp), %mm0 196 ; X86-NEXT: movd 8(%ebp), %mm2 197 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 198 ; X86-NEXT: pfcmpge %mm1, %mm2 199 ; X86-NEXT: movq %mm2, (%esp) 200 ; X86-NEXT: movl (%esp), %eax 201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 202 ; X86-NEXT: movl %ebp, %esp 203 ; X86-NEXT: popl %ebp 204 ; X86-NEXT: retl 205 ; 206 ; X64-LABEL: test_pfcmpge: 207 ; X64: # %bb.0: # %entry 208 ; X64-NEXT: movdq2q %xmm1, %mm0 209 ; X64-NEXT: movdq2q %xmm0, %mm1 210 ; X64-NEXT: pfcmpge %mm0, %mm1 211 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 212 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 213 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 214 ; X64-NEXT: retq 215 entry: 216 %0 = bitcast <2 x float> %a to x86_mmx 217 %1 = bitcast <2 x float> %b to x86_mmx 218 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1) 219 %3 = bitcast x86_mmx %2 to <2 x i32> 220 ret <2 x i32> %3 221 } 222 223 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone 224 225 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { 226 ; X86-LABEL: test_pfcmpgt: 227 ; X86: # %bb.0: # %entry 228 ; X86-NEXT: pushl %ebp 229 ; X86-NEXT: movl %esp, %ebp 230 ; X86-NEXT: andl $-8, %esp 231 ; X86-NEXT: subl $8, %esp 232 ; X86-NEXT: movd 20(%ebp), %mm0 233 ; X86-NEXT: movd 16(%ebp), %mm1 234 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 235 ; X86-NEXT: movd 12(%ebp), %mm0 236 ; X86-NEXT: movd 8(%ebp), %mm2 237 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 238 ; X86-NEXT: pfcmpgt %mm1, %mm2 239 ; X86-NEXT: movq %mm2, (%esp) 240 ; X86-NEXT: movl (%esp), %eax 241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 242 ; X86-NEXT: movl %ebp, %esp 243 ; X86-NEXT: popl %ebp 244 ; X86-NEXT: retl 245 ; 246 ; X64-LABEL: test_pfcmpgt: 247 ; X64: # %bb.0: # %entry 248 ; X64-NEXT: movdq2q %xmm1, %mm0 249 ; X64-NEXT: movdq2q %xmm0, %mm1 250 ; X64-NEXT: pfcmpgt %mm0, %mm1 251 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 252 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 253 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 254 ; X64-NEXT: retq 255 entry: 256 %0 = bitcast <2 x float> %a to x86_mmx 257 %1 = bitcast <2 x float> %b to x86_mmx 258 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1) 259 %3 = bitcast x86_mmx %2 to <2 x i32> 260 ret <2 x i32> %3 261 } 262 263 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone 264 265 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { 266 ; X86-LABEL: test_pfmax: 267 ; X86: # %bb.0: # %entry 268 ; X86-NEXT: pushl %ebp 269 ; X86-NEXT: movl %esp, %ebp 270 ; X86-NEXT: andl $-8, %esp 271 ; X86-NEXT: subl $8, %esp 272 ; X86-NEXT: movd 20(%ebp), %mm0 273 ; X86-NEXT: movd 16(%ebp), %mm1 274 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 275 ; X86-NEXT: movd 12(%ebp), %mm0 276 ; X86-NEXT: movd 8(%ebp), %mm2 277 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 278 ; X86-NEXT: pfmax %mm1, %mm2 279 ; X86-NEXT: movq %mm2, (%esp) 280 ; X86-NEXT: flds {{[0-9]+}}(%esp) 281 ; X86-NEXT: flds (%esp) 282 ; X86-NEXT: movl %ebp, %esp 283 ; X86-NEXT: popl %ebp 284 ; X86-NEXT: retl 285 ; 286 ; X64-LABEL: test_pfmax: 287 ; X64: # %bb.0: # %entry 288 ; X64-NEXT: movdq2q %xmm1, %mm0 289 ; X64-NEXT: movdq2q %xmm0, %mm1 290 ; X64-NEXT: pfmax %mm0, %mm1 291 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 292 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 293 ; X64-NEXT: retq 294 entry: 295 %0 = bitcast <2 x float> %a to x86_mmx 296 %1 = bitcast <2 x float> %b to x86_mmx 297 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1) 298 %3 = bitcast x86_mmx %2 to <2 x float> 299 ret <2 x float> %3 300 } 301 302 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone 303 304 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { 305 ; X86-LABEL: test_pfmin: 306 ; X86: # %bb.0: # %entry 307 ; X86-NEXT: pushl %ebp 308 ; X86-NEXT: movl %esp, %ebp 309 ; X86-NEXT: andl $-8, %esp 310 ; X86-NEXT: subl $8, %esp 311 ; X86-NEXT: movd 20(%ebp), %mm0 312 ; X86-NEXT: movd 16(%ebp), %mm1 313 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 314 ; X86-NEXT: movd 12(%ebp), %mm0 315 ; X86-NEXT: movd 8(%ebp), %mm2 316 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 317 ; X86-NEXT: pfmin %mm1, %mm2 318 ; X86-NEXT: movq %mm2, (%esp) 319 ; X86-NEXT: flds {{[0-9]+}}(%esp) 320 ; X86-NEXT: flds (%esp) 321 ; X86-NEXT: movl %ebp, %esp 322 ; X86-NEXT: popl %ebp 323 ; X86-NEXT: retl 324 ; 325 ; X64-LABEL: test_pfmin: 326 ; X64: # %bb.0: # %entry 327 ; X64-NEXT: movdq2q %xmm1, %mm0 328 ; X64-NEXT: movdq2q %xmm0, %mm1 329 ; X64-NEXT: pfmin %mm0, %mm1 330 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 331 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 332 ; X64-NEXT: retq 333 entry: 334 %0 = bitcast <2 x float> %a to x86_mmx 335 %1 = bitcast <2 x float> %b to x86_mmx 336 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1) 337 %3 = bitcast x86_mmx %2 to <2 x float> 338 ret <2 x float> %3 339 } 340 341 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone 342 343 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { 344 ; X86-LABEL: test_pfmul: 345 ; X86: # %bb.0: # %entry 346 ; X86-NEXT: pushl %ebp 347 ; X86-NEXT: movl %esp, %ebp 348 ; X86-NEXT: andl $-8, %esp 349 ; X86-NEXT: subl $8, %esp 350 ; X86-NEXT: movd 20(%ebp), %mm0 351 ; X86-NEXT: movd 16(%ebp), %mm1 352 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 353 ; X86-NEXT: movd 12(%ebp), %mm0 354 ; X86-NEXT: movd 8(%ebp), %mm2 355 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 356 ; X86-NEXT: pfmul %mm1, %mm2 357 ; X86-NEXT: movq %mm2, (%esp) 358 ; X86-NEXT: flds {{[0-9]+}}(%esp) 359 ; X86-NEXT: flds (%esp) 360 ; X86-NEXT: movl %ebp, %esp 361 ; X86-NEXT: popl %ebp 362 ; X86-NEXT: retl 363 ; 364 ; X64-LABEL: test_pfmul: 365 ; X64: # %bb.0: # %entry 366 ; X64-NEXT: movdq2q %xmm1, %mm0 367 ; X64-NEXT: movdq2q %xmm0, %mm1 368 ; X64-NEXT: pfmul %mm0, %mm1 369 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 370 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 371 ; X64-NEXT: retq 372 entry: 373 %0 = bitcast <2 x float> %a to x86_mmx 374 %1 = bitcast <2 x float> %b to x86_mmx 375 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1) 376 %3 = bitcast x86_mmx %2 to <2 x float> 377 ret <2 x float> %3 378 } 379 380 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone 381 382 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { 383 ; X86-LABEL: test_pfrcp: 384 ; X86: # %bb.0: # %entry 385 ; X86-NEXT: pushl %ebp 386 ; X86-NEXT: movl %esp, %ebp 387 ; X86-NEXT: andl $-8, %esp 388 ; X86-NEXT: subl $8, %esp 389 ; X86-NEXT: movd 12(%ebp), %mm0 390 ; X86-NEXT: movd 8(%ebp), %mm1 391 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 392 ; X86-NEXT: pfrcp %mm1, %mm0 393 ; X86-NEXT: movq %mm0, (%esp) 394 ; X86-NEXT: flds {{[0-9]+}}(%esp) 395 ; X86-NEXT: flds (%esp) 396 ; X86-NEXT: movl %ebp, %esp 397 ; X86-NEXT: popl %ebp 398 ; X86-NEXT: retl 399 ; 400 ; X64-LABEL: test_pfrcp: 401 ; X64: # %bb.0: # %entry 402 ; X64-NEXT: movdq2q %xmm0, %mm0 403 ; X64-NEXT: pfrcp %mm0, %mm0 404 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 405 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 406 ; X64-NEXT: retq 407 entry: 408 %0 = bitcast <2 x float> %a to x86_mmx 409 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) 410 %2 = bitcast x86_mmx %1 to <2 x float> 411 ret <2 x float> %2 412 } 413 414 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone 415 416 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 417 ; X86-LABEL: test_pfrcpit1: 418 ; X86: # %bb.0: # %entry 419 ; X86-NEXT: pushl %ebp 420 ; X86-NEXT: movl %esp, %ebp 421 ; X86-NEXT: andl $-8, %esp 422 ; X86-NEXT: subl $8, %esp 423 ; X86-NEXT: movd 20(%ebp), %mm0 424 ; X86-NEXT: movd 16(%ebp), %mm1 425 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 426 ; X86-NEXT: movd 12(%ebp), %mm0 427 ; X86-NEXT: movd 8(%ebp), %mm2 428 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 429 ; X86-NEXT: pfrcpit1 %mm1, %mm2 430 ; X86-NEXT: movq %mm2, (%esp) 431 ; X86-NEXT: flds {{[0-9]+}}(%esp) 432 ; X86-NEXT: flds (%esp) 433 ; X86-NEXT: movl %ebp, %esp 434 ; X86-NEXT: popl %ebp 435 ; X86-NEXT: retl 436 ; 437 ; X64-LABEL: test_pfrcpit1: 438 ; X64: # %bb.0: # %entry 439 ; X64-NEXT: movdq2q %xmm1, %mm0 440 ; X64-NEXT: movdq2q %xmm0, %mm1 441 ; X64-NEXT: pfrcpit1 %mm0, %mm1 442 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 443 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 444 ; X64-NEXT: retq 445 entry: 446 %0 = bitcast <2 x float> %a to x86_mmx 447 %1 = bitcast <2 x float> %b to x86_mmx 448 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1) 449 %3 = bitcast x86_mmx %2 to <2 x float> 450 ret <2 x float> %3 451 } 452 453 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone 454 455 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { 456 ; X86-LABEL: test_pfrcpit2: 457 ; X86: # %bb.0: # %entry 458 ; X86-NEXT: pushl %ebp 459 ; X86-NEXT: movl %esp, %ebp 460 ; X86-NEXT: andl $-8, %esp 461 ; X86-NEXT: subl $8, %esp 462 ; X86-NEXT: movd 20(%ebp), %mm0 463 ; X86-NEXT: movd 16(%ebp), %mm1 464 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 465 ; X86-NEXT: movd 12(%ebp), %mm0 466 ; X86-NEXT: movd 8(%ebp), %mm2 467 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 468 ; X86-NEXT: pfrcpit2 %mm1, %mm2 469 ; X86-NEXT: movq %mm2, (%esp) 470 ; X86-NEXT: flds {{[0-9]+}}(%esp) 471 ; X86-NEXT: flds (%esp) 472 ; X86-NEXT: movl %ebp, %esp 473 ; X86-NEXT: popl %ebp 474 ; X86-NEXT: retl 475 ; 476 ; X64-LABEL: test_pfrcpit2: 477 ; X64: # %bb.0: # %entry 478 ; X64-NEXT: movdq2q %xmm1, %mm0 479 ; X64-NEXT: movdq2q %xmm0, %mm1 480 ; X64-NEXT: pfrcpit2 %mm0, %mm1 481 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 482 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 483 ; X64-NEXT: retq 484 entry: 485 %0 = bitcast <2 x float> %a to x86_mmx 486 %1 = bitcast <2 x float> %b to x86_mmx 487 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1) 488 %3 = bitcast x86_mmx %2 to <2 x float> 489 ret <2 x float> %3 490 } 491 492 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone 493 494 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { 495 ; X86-LABEL: test_pfrsqrt: 496 ; X86: # %bb.0: # %entry 497 ; X86-NEXT: pushl %ebp 498 ; X86-NEXT: movl %esp, %ebp 499 ; X86-NEXT: andl $-8, %esp 500 ; X86-NEXT: subl $8, %esp 501 ; X86-NEXT: movd 12(%ebp), %mm0 502 ; X86-NEXT: movd 8(%ebp), %mm1 503 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 504 ; X86-NEXT: pfrsqrt %mm1, %mm0 505 ; X86-NEXT: movq %mm0, (%esp) 506 ; X86-NEXT: flds {{[0-9]+}}(%esp) 507 ; X86-NEXT: flds (%esp) 508 ; X86-NEXT: movl %ebp, %esp 509 ; X86-NEXT: popl %ebp 510 ; X86-NEXT: retl 511 ; 512 ; X64-LABEL: test_pfrsqrt: 513 ; X64: # %bb.0: # %entry 514 ; X64-NEXT: movdq2q %xmm0, %mm0 515 ; X64-NEXT: pfrsqrt %mm0, %mm0 516 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 517 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 518 ; X64-NEXT: retq 519 entry: 520 %0 = bitcast <2 x float> %a to x86_mmx 521 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) 522 %2 = bitcast x86_mmx %1 to <2 x float> 523 ret <2 x float> %2 524 } 525 526 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone 527 528 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 529 ; X86-LABEL: test_pfrsqit1: 530 ; X86: # %bb.0: # %entry 531 ; X86-NEXT: pushl %ebp 532 ; X86-NEXT: movl %esp, %ebp 533 ; X86-NEXT: andl $-8, %esp 534 ; X86-NEXT: subl $8, %esp 535 ; X86-NEXT: movd 20(%ebp), %mm0 536 ; X86-NEXT: movd 16(%ebp), %mm1 537 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 538 ; X86-NEXT: movd 12(%ebp), %mm0 539 ; X86-NEXT: movd 8(%ebp), %mm2 540 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 541 ; X86-NEXT: pfrsqit1 %mm1, %mm2 542 ; X86-NEXT: movq %mm2, (%esp) 543 ; X86-NEXT: flds {{[0-9]+}}(%esp) 544 ; X86-NEXT: flds (%esp) 545 ; X86-NEXT: movl %ebp, %esp 546 ; X86-NEXT: popl %ebp 547 ; X86-NEXT: retl 548 ; 549 ; X64-LABEL: test_pfrsqit1: 550 ; X64: # %bb.0: # %entry 551 ; X64-NEXT: movdq2q %xmm1, %mm0 552 ; X64-NEXT: movdq2q %xmm0, %mm1 553 ; X64-NEXT: pfrsqit1 %mm0, %mm1 554 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 555 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 556 ; X64-NEXT: retq 557 entry: 558 %0 = bitcast <2 x float> %a to x86_mmx 559 %1 = bitcast <2 x float> %b to x86_mmx 560 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1) 561 %3 = bitcast x86_mmx %2 to <2 x float> 562 ret <2 x float> %3 563 } 564 565 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone 566 567 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { 568 ; X86-LABEL: test_pfsub: 569 ; X86: # %bb.0: # %entry 570 ; X86-NEXT: pushl %ebp 571 ; X86-NEXT: movl %esp, %ebp 572 ; X86-NEXT: andl $-8, %esp 573 ; X86-NEXT: subl $8, %esp 574 ; X86-NEXT: movd 20(%ebp), %mm0 575 ; X86-NEXT: movd 16(%ebp), %mm1 576 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 577 ; X86-NEXT: movd 12(%ebp), %mm0 578 ; X86-NEXT: movd 8(%ebp), %mm2 579 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 580 ; X86-NEXT: pfsub %mm1, %mm2 581 ; X86-NEXT: movq %mm2, (%esp) 582 ; X86-NEXT: flds {{[0-9]+}}(%esp) 583 ; X86-NEXT: flds (%esp) 584 ; X86-NEXT: movl %ebp, %esp 585 ; X86-NEXT: popl %ebp 586 ; X86-NEXT: retl 587 ; 588 ; X64-LABEL: test_pfsub: 589 ; X64: # %bb.0: # %entry 590 ; X64-NEXT: movdq2q %xmm1, %mm0 591 ; X64-NEXT: movdq2q %xmm0, %mm1 592 ; X64-NEXT: pfsub %mm0, %mm1 593 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 594 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 595 ; X64-NEXT: retq 596 entry: 597 %0 = bitcast <2 x float> %a to x86_mmx 598 %1 = bitcast <2 x float> %b to x86_mmx 599 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1) 600 %3 = bitcast x86_mmx %2 to <2 x float> 601 ret <2 x float> %3 602 } 603 604 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone 605 606 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { 607 ; X86-LABEL: test_pfsubr: 608 ; X86: # %bb.0: # %entry 609 ; X86-NEXT: pushl %ebp 610 ; X86-NEXT: movl %esp, %ebp 611 ; X86-NEXT: andl $-8, %esp 612 ; X86-NEXT: subl $8, %esp 613 ; X86-NEXT: movd 20(%ebp), %mm0 614 ; X86-NEXT: movd 16(%ebp), %mm1 615 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 616 ; X86-NEXT: movd 12(%ebp), %mm0 617 ; X86-NEXT: movd 8(%ebp), %mm2 618 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 619 ; X86-NEXT: pfsubr %mm1, %mm2 620 ; X86-NEXT: movq %mm2, (%esp) 621 ; X86-NEXT: flds {{[0-9]+}}(%esp) 622 ; X86-NEXT: flds (%esp) 623 ; X86-NEXT: movl %ebp, %esp 624 ; X86-NEXT: popl %ebp 625 ; X86-NEXT: retl 626 ; 627 ; X64-LABEL: test_pfsubr: 628 ; X64: # %bb.0: # %entry 629 ; X64-NEXT: movdq2q %xmm1, %mm0 630 ; X64-NEXT: movdq2q %xmm0, %mm1 631 ; X64-NEXT: pfsubr %mm0, %mm1 632 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 633 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 634 ; X64-NEXT: retq 635 entry: 636 %0 = bitcast <2 x float> %a to x86_mmx 637 %1 = bitcast <2 x float> %b to x86_mmx 638 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1) 639 %3 = bitcast x86_mmx %2 to <2 x float> 640 ret <2 x float> %3 641 } 642 643 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone 644 645 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { 646 ; X86-LABEL: test_pi2fd: 647 ; X86: # %bb.0: # %entry 648 ; X86-NEXT: pushl %ebp 649 ; X86-NEXT: movl %esp, %ebp 650 ; X86-NEXT: andl $-8, %esp 651 ; X86-NEXT: subl $8, %esp 652 ; X86-NEXT: pi2fd %mm0, %mm0 653 ; X86-NEXT: movq %mm0, (%esp) 654 ; X86-NEXT: flds {{[0-9]+}}(%esp) 655 ; X86-NEXT: flds (%esp) 656 ; X86-NEXT: movl %ebp, %esp 657 ; X86-NEXT: popl %ebp 658 ; X86-NEXT: retl 659 ; 660 ; X64-LABEL: test_pi2fd: 661 ; X64: # %bb.0: # %entry 662 ; X64-NEXT: pi2fd %mm0, %mm0 663 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 664 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 665 ; X64-NEXT: retq 666 entry: 667 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 668 %1 = bitcast <2 x i32> %0 to x86_mmx 669 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) 670 %3 = bitcast x86_mmx %2 to <2 x float> 671 ret <2 x float> %3 672 } 673 674 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone 675 676 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 677 ; X86-LABEL: test_pmulhrw: 678 ; X86: # %bb.0: # %entry 679 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 680 ; X86-NEXT: pmulhrw %mm1, %mm0 681 ; X86-NEXT: movq %mm0, (%eax) 682 ; X86-NEXT: retl $4 683 ; 684 ; X64-LABEL: test_pmulhrw: 685 ; X64: # %bb.0: # %entry 686 ; X64-NEXT: pmulhrw %mm1, %mm0 687 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 688 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 689 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 690 ; X64-NEXT: retq 691 entry: 692 %0 = bitcast x86_mmx %a.coerce to <4 x i16> 693 %1 = bitcast x86_mmx %b.coerce to <4 x i16> 694 %2 = bitcast <4 x i16> %0 to x86_mmx 695 %3 = bitcast <4 x i16> %1 to x86_mmx 696 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3) 697 %5 = bitcast x86_mmx %4 to <4 x i16> 698 ret <4 x i16> %5 699 } 700 701 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone 702 703 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { 704 ; X86-LABEL: test_pf2iw: 705 ; X86: # %bb.0: # %entry 706 ; X86-NEXT: pushl %ebp 707 ; X86-NEXT: movl %esp, %ebp 708 ; X86-NEXT: andl $-8, %esp 709 ; X86-NEXT: subl $8, %esp 710 ; X86-NEXT: movd 12(%ebp), %mm0 711 ; X86-NEXT: movd 8(%ebp), %mm1 712 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 713 ; X86-NEXT: pf2iw %mm1, %mm0 714 ; X86-NEXT: movq %mm0, (%esp) 715 ; X86-NEXT: movl (%esp), %eax 716 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 717 ; X86-NEXT: movl %ebp, %esp 718 ; X86-NEXT: popl %ebp 719 ; X86-NEXT: retl 720 ; 721 ; X64-LABEL: test_pf2iw: 722 ; X64: # %bb.0: # %entry 723 ; X64-NEXT: movdq2q %xmm0, %mm0 724 ; X64-NEXT: pf2iw %mm0, %mm0 725 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 726 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 727 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 728 ; X64-NEXT: retq 729 entry: 730 %0 = bitcast <2 x float> %a to x86_mmx 731 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) 732 %2 = bitcast x86_mmx %1 to <2 x i32> 733 ret <2 x i32> %2 734 } 735 736 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone 737 738 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 739 ; X86-LABEL: test_pfnacc: 740 ; X86: # %bb.0: # %entry 741 ; X86-NEXT: pushl %ebp 742 ; X86-NEXT: movl %esp, %ebp 743 ; X86-NEXT: andl $-8, %esp 744 ; X86-NEXT: subl $8, %esp 745 ; X86-NEXT: movd 20(%ebp), %mm0 746 ; X86-NEXT: movd 16(%ebp), %mm1 747 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 748 ; X86-NEXT: movd 12(%ebp), %mm0 749 ; X86-NEXT: movd 8(%ebp), %mm2 750 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 751 ; X86-NEXT: pfnacc %mm1, %mm2 752 ; X86-NEXT: movq %mm2, (%esp) 753 ; X86-NEXT: flds {{[0-9]+}}(%esp) 754 ; X86-NEXT: flds (%esp) 755 ; X86-NEXT: movl %ebp, %esp 756 ; X86-NEXT: popl %ebp 757 ; X86-NEXT: retl 758 ; 759 ; X64-LABEL: test_pfnacc: 760 ; X64: # %bb.0: # %entry 761 ; X64-NEXT: movdq2q %xmm1, %mm0 762 ; X64-NEXT: movdq2q %xmm0, %mm1 763 ; X64-NEXT: pfnacc %mm0, %mm1 764 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 765 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 766 ; X64-NEXT: retq 767 entry: 768 %0 = bitcast <2 x float> %a to x86_mmx 769 %1 = bitcast <2 x float> %b to x86_mmx 770 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1) 771 %3 = bitcast x86_mmx %2 to <2 x float> 772 ret <2 x float> %3 773 } 774 775 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone 776 777 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 778 ; X86-LABEL: test_pfpnacc: 779 ; X86: # %bb.0: # %entry 780 ; X86-NEXT: pushl %ebp 781 ; X86-NEXT: movl %esp, %ebp 782 ; X86-NEXT: andl $-8, %esp 783 ; X86-NEXT: subl $8, %esp 784 ; X86-NEXT: movd 20(%ebp), %mm0 785 ; X86-NEXT: movd 16(%ebp), %mm1 786 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 787 ; X86-NEXT: movd 12(%ebp), %mm0 788 ; X86-NEXT: movd 8(%ebp), %mm2 789 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 790 ; X86-NEXT: pfpnacc %mm1, %mm2 791 ; X86-NEXT: movq %mm2, (%esp) 792 ; X86-NEXT: flds {{[0-9]+}}(%esp) 793 ; X86-NEXT: flds (%esp) 794 ; X86-NEXT: movl %ebp, %esp 795 ; X86-NEXT: popl %ebp 796 ; X86-NEXT: retl 797 ; 798 ; X64-LABEL: test_pfpnacc: 799 ; X64: # %bb.0: # %entry 800 ; X64-NEXT: movdq2q %xmm1, %mm0 801 ; X64-NEXT: movdq2q %xmm0, %mm1 802 ; X64-NEXT: pfpnacc %mm0, %mm1 803 ; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 804 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 805 ; X64-NEXT: retq 806 entry: 807 %0 = bitcast <2 x float> %a to x86_mmx 808 %1 = bitcast <2 x float> %b to x86_mmx 809 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1) 810 %3 = bitcast x86_mmx %2 to <2 x float> 811 ret <2 x float> %3 812 } 813 814 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone 815 816 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { 817 ; X86-LABEL: test_pi2fw: 818 ; X86: # %bb.0: # %entry 819 ; X86-NEXT: pushl %ebp 820 ; X86-NEXT: movl %esp, %ebp 821 ; X86-NEXT: andl $-8, %esp 822 ; X86-NEXT: subl $8, %esp 823 ; X86-NEXT: pi2fw %mm0, %mm0 824 ; X86-NEXT: movq %mm0, (%esp) 825 ; X86-NEXT: flds {{[0-9]+}}(%esp) 826 ; X86-NEXT: flds (%esp) 827 ; X86-NEXT: movl %ebp, %esp 828 ; X86-NEXT: popl %ebp 829 ; X86-NEXT: retl 830 ; 831 ; X64-LABEL: test_pi2fw: 832 ; X64: # %bb.0: # %entry 833 ; X64-NEXT: pi2fw %mm0, %mm0 834 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 835 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 836 ; X64-NEXT: retq 837 entry: 838 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 839 %1 = bitcast <2 x i32> %0 to x86_mmx 840 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) 841 %3 = bitcast x86_mmx %2 to <2 x float> 842 ret <2 x float> %3 843 } 844 845 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone 846 847 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { 848 ; X86-LABEL: test_pswapdsf: 849 ; X86: # %bb.0: # %entry 850 ; X86-NEXT: pushl %ebp 851 ; X86-NEXT: movl %esp, %ebp 852 ; X86-NEXT: andl $-8, %esp 853 ; X86-NEXT: subl $8, %esp 854 ; X86-NEXT: movd 12(%ebp), %mm0 855 ; X86-NEXT: movd 8(%ebp), %mm1 856 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 857 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 858 ; X86-NEXT: movq %mm0, (%esp) 859 ; X86-NEXT: flds {{[0-9]+}}(%esp) 860 ; X86-NEXT: flds (%esp) 861 ; X86-NEXT: movl %ebp, %esp 862 ; X86-NEXT: popl %ebp 863 ; X86-NEXT: retl 864 ; 865 ; X64-LABEL: test_pswapdsf: 866 ; X64: # %bb.0: # %entry 867 ; X64-NEXT: movdq2q %xmm0, %mm0 868 ; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] 869 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 870 ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 871 ; X64-NEXT: retq 872 entry: 873 %0 = bitcast <2 x float> %a to x86_mmx 874 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 875 %2 = bitcast x86_mmx %1 to <2 x float> 876 ret <2 x float> %2 877 } 878 879 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { 880 ; X86-LABEL: test_pswapdsi: 881 ; X86: # %bb.0: # %entry 882 ; X86-NEXT: pushl %ebp 883 ; X86-NEXT: movl %esp, %ebp 884 ; X86-NEXT: andl $-8, %esp 885 ; X86-NEXT: subl $8, %esp 886 ; X86-NEXT: movd 12(%ebp), %mm0 887 ; X86-NEXT: movd 8(%ebp), %mm1 888 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 889 ; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 890 ; X86-NEXT: movq %mm0, (%esp) 891 ; X86-NEXT: movl (%esp), %eax 892 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 893 ; X86-NEXT: movl %ebp, %esp 894 ; X86-NEXT: popl %ebp 895 ; X86-NEXT: retl 896 ; 897 ; X64-LABEL: test_pswapdsi: 898 ; X64: # %bb.0: # %entry 899 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 900 ; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) 901 ; X64-NEXT: pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0] 902 ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 903 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 904 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 905 ; X64-NEXT: retq 906 entry: 907 %0 = bitcast <2 x i32> %a to x86_mmx 908 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 909 %2 = bitcast x86_mmx %1 to <2 x i32> 910 ret <2 x i32> %2 911 } 912 913 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone 914