Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
      6 ; X86-LABEL: test_pavgusb:
      7 ; X86:       # %bb.0: # %entry
      8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X86-NEXT:    pavgusb %mm1, %mm0
     10 ; X86-NEXT:    movq %mm0, (%eax)
     11 ; X86-NEXT:    retl $4
     12 ;
     13 ; X64-LABEL: test_pavgusb:
     14 ; X64:       # %bb.0: # %entry
     15 ; X64-NEXT:    pavgusb %mm1, %mm0
     16 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
     17 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
     18 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     19 ; X64-NEXT:    retq
     20 entry:
     21   %0 = bitcast x86_mmx %a.coerce to <8 x i8>
     22   %1 = bitcast x86_mmx %b.coerce to <8 x i8>
     23   %2 = bitcast <8 x i8> %0 to x86_mmx
     24   %3 = bitcast <8 x i8> %1 to x86_mmx
     25   %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
     26   %5 = bitcast x86_mmx %4 to <8 x i8>
     27   ret <8 x i8> %5
     28 }
     29 
     30 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
     31 
     32 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
     33 ; X86-LABEL: test_pf2id:
     34 ; X86:       # %bb.0: # %entry
     35 ; X86-NEXT:    pushl %ebp
     36 ; X86-NEXT:    movl %esp, %ebp
     37 ; X86-NEXT:    andl $-8, %esp
     38 ; X86-NEXT:    subl $8, %esp
     39 ; X86-NEXT:    movd 12(%ebp), %mm0
     40 ; X86-NEXT:    movd 8(%ebp), %mm1
     41 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
     42 ; X86-NEXT:    pf2id %mm1, %mm0
     43 ; X86-NEXT:    movq %mm0, (%esp)
     44 ; X86-NEXT:    movl (%esp), %eax
     45 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
     46 ; X86-NEXT:    movl %ebp, %esp
     47 ; X86-NEXT:    popl %ebp
     48 ; X86-NEXT:    retl
     49 ;
     50 ; X64-LABEL: test_pf2id:
     51 ; X64:       # %bb.0: # %entry
     52 ; X64-NEXT:    movdq2q %xmm0, %mm0
     53 ; X64-NEXT:    pf2id %mm0, %mm0
     54 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
     55 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
     56 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
     57 ; X64-NEXT:    retq
     58 entry:
     59   %0 = bitcast <2 x float> %a to x86_mmx
     60   %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
     61   %2 = bitcast x86_mmx %1 to <2 x i32>
     62   ret <2 x i32> %2
     63 }
     64 
     65 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
     66 
     67 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
     68 ; X86-LABEL: test_pfacc:
     69 ; X86:       # %bb.0: # %entry
     70 ; X86-NEXT:    pushl %ebp
     71 ; X86-NEXT:    movl %esp, %ebp
     72 ; X86-NEXT:    andl $-8, %esp
     73 ; X86-NEXT:    subl $8, %esp
     74 ; X86-NEXT:    movd 20(%ebp), %mm0
     75 ; X86-NEXT:    movd 16(%ebp), %mm1
     76 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
     77 ; X86-NEXT:    movd 12(%ebp), %mm0
     78 ; X86-NEXT:    movd 8(%ebp), %mm2
     79 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
     80 ; X86-NEXT:    pfacc %mm1, %mm2
     81 ; X86-NEXT:    movq %mm2, (%esp)
     82 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
     83 ; X86-NEXT:    flds (%esp)
     84 ; X86-NEXT:    movl %ebp, %esp
     85 ; X86-NEXT:    popl %ebp
     86 ; X86-NEXT:    retl
     87 ;
     88 ; X64-LABEL: test_pfacc:
     89 ; X64:       # %bb.0: # %entry
     90 ; X64-NEXT:    movdq2q %xmm1, %mm0
     91 ; X64-NEXT:    movdq2q %xmm0, %mm1
     92 ; X64-NEXT:    pfacc %mm0, %mm1
     93 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
     94 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
     95 ; X64-NEXT:    retq
     96 entry:
     97   %0 = bitcast <2 x float> %a to x86_mmx
     98   %1 = bitcast <2 x float> %b to x86_mmx
     99   %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
    100   %3 = bitcast x86_mmx %2 to <2 x float>
    101   ret <2 x float> %3
    102 }
    103 
    104 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
    105 
    106 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
    107 ; X86-LABEL: test_pfadd:
    108 ; X86:       # %bb.0: # %entry
    109 ; X86-NEXT:    pushl %ebp
    110 ; X86-NEXT:    movl %esp, %ebp
    111 ; X86-NEXT:    andl $-8, %esp
    112 ; X86-NEXT:    subl $8, %esp
    113 ; X86-NEXT:    movd 20(%ebp), %mm0
    114 ; X86-NEXT:    movd 16(%ebp), %mm1
    115 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    116 ; X86-NEXT:    movd 12(%ebp), %mm0
    117 ; X86-NEXT:    movd 8(%ebp), %mm2
    118 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    119 ; X86-NEXT:    pfadd %mm1, %mm2
    120 ; X86-NEXT:    movq %mm2, (%esp)
    121 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    122 ; X86-NEXT:    flds (%esp)
    123 ; X86-NEXT:    movl %ebp, %esp
    124 ; X86-NEXT:    popl %ebp
    125 ; X86-NEXT:    retl
    126 ;
    127 ; X64-LABEL: test_pfadd:
    128 ; X64:       # %bb.0: # %entry
    129 ; X64-NEXT:    movdq2q %xmm1, %mm0
    130 ; X64-NEXT:    movdq2q %xmm0, %mm1
    131 ; X64-NEXT:    pfadd %mm0, %mm1
    132 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    133 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    134 ; X64-NEXT:    retq
    135 entry:
    136   %0 = bitcast <2 x float> %a to x86_mmx
    137   %1 = bitcast <2 x float> %b to x86_mmx
    138   %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
    139   %3 = bitcast x86_mmx %2 to <2 x float>
    140   ret <2 x float> %3
    141 }
    142 
    143 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
    144 
    145 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
    146 ; X86-LABEL: test_pfcmpeq:
    147 ; X86:       # %bb.0: # %entry
    148 ; X86-NEXT:    pushl %ebp
    149 ; X86-NEXT:    movl %esp, %ebp
    150 ; X86-NEXT:    andl $-8, %esp
    151 ; X86-NEXT:    subl $8, %esp
    152 ; X86-NEXT:    movd 20(%ebp), %mm0
    153 ; X86-NEXT:    movd 16(%ebp), %mm1
    154 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    155 ; X86-NEXT:    movd 12(%ebp), %mm0
    156 ; X86-NEXT:    movd 8(%ebp), %mm2
    157 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    158 ; X86-NEXT:    pfcmpeq %mm1, %mm2
    159 ; X86-NEXT:    movq %mm2, (%esp)
    160 ; X86-NEXT:    movl (%esp), %eax
    161 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    162 ; X86-NEXT:    movl %ebp, %esp
    163 ; X86-NEXT:    popl %ebp
    164 ; X86-NEXT:    retl
    165 ;
    166 ; X64-LABEL: test_pfcmpeq:
    167 ; X64:       # %bb.0: # %entry
    168 ; X64-NEXT:    movdq2q %xmm1, %mm0
    169 ; X64-NEXT:    movdq2q %xmm0, %mm1
    170 ; X64-NEXT:    pfcmpeq %mm0, %mm1
    171 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    172 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    173 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
    174 ; X64-NEXT:    retq
    175 entry:
    176   %0 = bitcast <2 x float> %a to x86_mmx
    177   %1 = bitcast <2 x float> %b to x86_mmx
    178   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
    179   %3 = bitcast x86_mmx %2 to <2 x i32>
    180   ret <2 x i32> %3
    181 }
    182 
    183 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
    184 
    185 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
    186 ; X86-LABEL: test_pfcmpge:
    187 ; X86:       # %bb.0: # %entry
    188 ; X86-NEXT:    pushl %ebp
    189 ; X86-NEXT:    movl %esp, %ebp
    190 ; X86-NEXT:    andl $-8, %esp
    191 ; X86-NEXT:    subl $8, %esp
    192 ; X86-NEXT:    movd 20(%ebp), %mm0
    193 ; X86-NEXT:    movd 16(%ebp), %mm1
    194 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    195 ; X86-NEXT:    movd 12(%ebp), %mm0
    196 ; X86-NEXT:    movd 8(%ebp), %mm2
    197 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    198 ; X86-NEXT:    pfcmpge %mm1, %mm2
    199 ; X86-NEXT:    movq %mm2, (%esp)
    200 ; X86-NEXT:    movl (%esp), %eax
    201 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    202 ; X86-NEXT:    movl %ebp, %esp
    203 ; X86-NEXT:    popl %ebp
    204 ; X86-NEXT:    retl
    205 ;
    206 ; X64-LABEL: test_pfcmpge:
    207 ; X64:       # %bb.0: # %entry
    208 ; X64-NEXT:    movdq2q %xmm1, %mm0
    209 ; X64-NEXT:    movdq2q %xmm0, %mm1
    210 ; X64-NEXT:    pfcmpge %mm0, %mm1
    211 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    212 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    213 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
    214 ; X64-NEXT:    retq
    215 entry:
    216   %0 = bitcast <2 x float> %a to x86_mmx
    217   %1 = bitcast <2 x float> %b to x86_mmx
    218   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
    219   %3 = bitcast x86_mmx %2 to <2 x i32>
    220   ret <2 x i32> %3
    221 }
    222 
    223 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
    224 
    225 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
    226 ; X86-LABEL: test_pfcmpgt:
    227 ; X86:       # %bb.0: # %entry
    228 ; X86-NEXT:    pushl %ebp
    229 ; X86-NEXT:    movl %esp, %ebp
    230 ; X86-NEXT:    andl $-8, %esp
    231 ; X86-NEXT:    subl $8, %esp
    232 ; X86-NEXT:    movd 20(%ebp), %mm0
    233 ; X86-NEXT:    movd 16(%ebp), %mm1
    234 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    235 ; X86-NEXT:    movd 12(%ebp), %mm0
    236 ; X86-NEXT:    movd 8(%ebp), %mm2
    237 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    238 ; X86-NEXT:    pfcmpgt %mm1, %mm2
    239 ; X86-NEXT:    movq %mm2, (%esp)
    240 ; X86-NEXT:    movl (%esp), %eax
    241 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    242 ; X86-NEXT:    movl %ebp, %esp
    243 ; X86-NEXT:    popl %ebp
    244 ; X86-NEXT:    retl
    245 ;
    246 ; X64-LABEL: test_pfcmpgt:
    247 ; X64:       # %bb.0: # %entry
    248 ; X64-NEXT:    movdq2q %xmm1, %mm0
    249 ; X64-NEXT:    movdq2q %xmm0, %mm1
    250 ; X64-NEXT:    pfcmpgt %mm0, %mm1
    251 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    252 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    253 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
    254 ; X64-NEXT:    retq
    255 entry:
    256   %0 = bitcast <2 x float> %a to x86_mmx
    257   %1 = bitcast <2 x float> %b to x86_mmx
    258   %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
    259   %3 = bitcast x86_mmx %2 to <2 x i32>
    260   ret <2 x i32> %3
    261 }
    262 
    263 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
    264 
    265 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
    266 ; X86-LABEL: test_pfmax:
    267 ; X86:       # %bb.0: # %entry
    268 ; X86-NEXT:    pushl %ebp
    269 ; X86-NEXT:    movl %esp, %ebp
    270 ; X86-NEXT:    andl $-8, %esp
    271 ; X86-NEXT:    subl $8, %esp
    272 ; X86-NEXT:    movd 20(%ebp), %mm0
    273 ; X86-NEXT:    movd 16(%ebp), %mm1
    274 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    275 ; X86-NEXT:    movd 12(%ebp), %mm0
    276 ; X86-NEXT:    movd 8(%ebp), %mm2
    277 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    278 ; X86-NEXT:    pfmax %mm1, %mm2
    279 ; X86-NEXT:    movq %mm2, (%esp)
    280 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    281 ; X86-NEXT:    flds (%esp)
    282 ; X86-NEXT:    movl %ebp, %esp
    283 ; X86-NEXT:    popl %ebp
    284 ; X86-NEXT:    retl
    285 ;
    286 ; X64-LABEL: test_pfmax:
    287 ; X64:       # %bb.0: # %entry
    288 ; X64-NEXT:    movdq2q %xmm1, %mm0
    289 ; X64-NEXT:    movdq2q %xmm0, %mm1
    290 ; X64-NEXT:    pfmax %mm0, %mm1
    291 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    292 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    293 ; X64-NEXT:    retq
    294 entry:
    295   %0 = bitcast <2 x float> %a to x86_mmx
    296   %1 = bitcast <2 x float> %b to x86_mmx
    297   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
    298   %3 = bitcast x86_mmx %2 to <2 x float>
    299   ret <2 x float> %3
    300 }
    301 
    302 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
    303 
    304 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
    305 ; X86-LABEL: test_pfmin:
    306 ; X86:       # %bb.0: # %entry
    307 ; X86-NEXT:    pushl %ebp
    308 ; X86-NEXT:    movl %esp, %ebp
    309 ; X86-NEXT:    andl $-8, %esp
    310 ; X86-NEXT:    subl $8, %esp
    311 ; X86-NEXT:    movd 20(%ebp), %mm0
    312 ; X86-NEXT:    movd 16(%ebp), %mm1
    313 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    314 ; X86-NEXT:    movd 12(%ebp), %mm0
    315 ; X86-NEXT:    movd 8(%ebp), %mm2
    316 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    317 ; X86-NEXT:    pfmin %mm1, %mm2
    318 ; X86-NEXT:    movq %mm2, (%esp)
    319 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    320 ; X86-NEXT:    flds (%esp)
    321 ; X86-NEXT:    movl %ebp, %esp
    322 ; X86-NEXT:    popl %ebp
    323 ; X86-NEXT:    retl
    324 ;
    325 ; X64-LABEL: test_pfmin:
    326 ; X64:       # %bb.0: # %entry
    327 ; X64-NEXT:    movdq2q %xmm1, %mm0
    328 ; X64-NEXT:    movdq2q %xmm0, %mm1
    329 ; X64-NEXT:    pfmin %mm0, %mm1
    330 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    331 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    332 ; X64-NEXT:    retq
    333 entry:
    334   %0 = bitcast <2 x float> %a to x86_mmx
    335   %1 = bitcast <2 x float> %b to x86_mmx
    336   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
    337   %3 = bitcast x86_mmx %2 to <2 x float>
    338   ret <2 x float> %3
    339 }
    340 
    341 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
    342 
    343 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
    344 ; X86-LABEL: test_pfmul:
    345 ; X86:       # %bb.0: # %entry
    346 ; X86-NEXT:    pushl %ebp
    347 ; X86-NEXT:    movl %esp, %ebp
    348 ; X86-NEXT:    andl $-8, %esp
    349 ; X86-NEXT:    subl $8, %esp
    350 ; X86-NEXT:    movd 20(%ebp), %mm0
    351 ; X86-NEXT:    movd 16(%ebp), %mm1
    352 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    353 ; X86-NEXT:    movd 12(%ebp), %mm0
    354 ; X86-NEXT:    movd 8(%ebp), %mm2
    355 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    356 ; X86-NEXT:    pfmul %mm1, %mm2
    357 ; X86-NEXT:    movq %mm2, (%esp)
    358 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    359 ; X86-NEXT:    flds (%esp)
    360 ; X86-NEXT:    movl %ebp, %esp
    361 ; X86-NEXT:    popl %ebp
    362 ; X86-NEXT:    retl
    363 ;
    364 ; X64-LABEL: test_pfmul:
    365 ; X64:       # %bb.0: # %entry
    366 ; X64-NEXT:    movdq2q %xmm1, %mm0
    367 ; X64-NEXT:    movdq2q %xmm0, %mm1
    368 ; X64-NEXT:    pfmul %mm0, %mm1
    369 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    370 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    371 ; X64-NEXT:    retq
    372 entry:
    373   %0 = bitcast <2 x float> %a to x86_mmx
    374   %1 = bitcast <2 x float> %b to x86_mmx
    375   %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
    376   %3 = bitcast x86_mmx %2 to <2 x float>
    377   ret <2 x float> %3
    378 }
    379 
    380 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
    381 
    382 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
    383 ; X86-LABEL: test_pfrcp:
    384 ; X86:       # %bb.0: # %entry
    385 ; X86-NEXT:    pushl %ebp
    386 ; X86-NEXT:    movl %esp, %ebp
    387 ; X86-NEXT:    andl $-8, %esp
    388 ; X86-NEXT:    subl $8, %esp
    389 ; X86-NEXT:    movd 12(%ebp), %mm0
    390 ; X86-NEXT:    movd 8(%ebp), %mm1
    391 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    392 ; X86-NEXT:    pfrcp %mm1, %mm0
    393 ; X86-NEXT:    movq %mm0, (%esp)
    394 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    395 ; X86-NEXT:    flds (%esp)
    396 ; X86-NEXT:    movl %ebp, %esp
    397 ; X86-NEXT:    popl %ebp
    398 ; X86-NEXT:    retl
    399 ;
    400 ; X64-LABEL: test_pfrcp:
    401 ; X64:       # %bb.0: # %entry
    402 ; X64-NEXT:    movdq2q %xmm0, %mm0
    403 ; X64-NEXT:    pfrcp %mm0, %mm0
    404 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    405 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    406 ; X64-NEXT:    retq
    407 entry:
    408   %0 = bitcast <2 x float> %a to x86_mmx
    409   %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
    410   %2 = bitcast x86_mmx %1 to <2 x float>
    411   ret <2 x float> %2
    412 }
    413 
    414 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
    415 
    416 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
    417 ; X86-LABEL: test_pfrcpit1:
    418 ; X86:       # %bb.0: # %entry
    419 ; X86-NEXT:    pushl %ebp
    420 ; X86-NEXT:    movl %esp, %ebp
    421 ; X86-NEXT:    andl $-8, %esp
    422 ; X86-NEXT:    subl $8, %esp
    423 ; X86-NEXT:    movd 20(%ebp), %mm0
    424 ; X86-NEXT:    movd 16(%ebp), %mm1
    425 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    426 ; X86-NEXT:    movd 12(%ebp), %mm0
    427 ; X86-NEXT:    movd 8(%ebp), %mm2
    428 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    429 ; X86-NEXT:    pfrcpit1 %mm1, %mm2
    430 ; X86-NEXT:    movq %mm2, (%esp)
    431 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    432 ; X86-NEXT:    flds (%esp)
    433 ; X86-NEXT:    movl %ebp, %esp
    434 ; X86-NEXT:    popl %ebp
    435 ; X86-NEXT:    retl
    436 ;
    437 ; X64-LABEL: test_pfrcpit1:
    438 ; X64:       # %bb.0: # %entry
    439 ; X64-NEXT:    movdq2q %xmm1, %mm0
    440 ; X64-NEXT:    movdq2q %xmm0, %mm1
    441 ; X64-NEXT:    pfrcpit1 %mm0, %mm1
    442 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    443 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    444 ; X64-NEXT:    retq
    445 entry:
    446   %0 = bitcast <2 x float> %a to x86_mmx
    447   %1 = bitcast <2 x float> %b to x86_mmx
    448   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
    449   %3 = bitcast x86_mmx %2 to <2 x float>
    450   ret <2 x float> %3
    451 }
    452 
    453 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
    454 
    455 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
    456 ; X86-LABEL: test_pfrcpit2:
    457 ; X86:       # %bb.0: # %entry
    458 ; X86-NEXT:    pushl %ebp
    459 ; X86-NEXT:    movl %esp, %ebp
    460 ; X86-NEXT:    andl $-8, %esp
    461 ; X86-NEXT:    subl $8, %esp
    462 ; X86-NEXT:    movd 20(%ebp), %mm0
    463 ; X86-NEXT:    movd 16(%ebp), %mm1
    464 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    465 ; X86-NEXT:    movd 12(%ebp), %mm0
    466 ; X86-NEXT:    movd 8(%ebp), %mm2
    467 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    468 ; X86-NEXT:    pfrcpit2 %mm1, %mm2
    469 ; X86-NEXT:    movq %mm2, (%esp)
    470 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    471 ; X86-NEXT:    flds (%esp)
    472 ; X86-NEXT:    movl %ebp, %esp
    473 ; X86-NEXT:    popl %ebp
    474 ; X86-NEXT:    retl
    475 ;
    476 ; X64-LABEL: test_pfrcpit2:
    477 ; X64:       # %bb.0: # %entry
    478 ; X64-NEXT:    movdq2q %xmm1, %mm0
    479 ; X64-NEXT:    movdq2q %xmm0, %mm1
    480 ; X64-NEXT:    pfrcpit2 %mm0, %mm1
    481 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    482 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    483 ; X64-NEXT:    retq
    484 entry:
    485   %0 = bitcast <2 x float> %a to x86_mmx
    486   %1 = bitcast <2 x float> %b to x86_mmx
    487   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
    488   %3 = bitcast x86_mmx %2 to <2 x float>
    489   ret <2 x float> %3
    490 }
    491 
    492 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
    493 
    494 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
    495 ; X86-LABEL: test_pfrsqrt:
    496 ; X86:       # %bb.0: # %entry
    497 ; X86-NEXT:    pushl %ebp
    498 ; X86-NEXT:    movl %esp, %ebp
    499 ; X86-NEXT:    andl $-8, %esp
    500 ; X86-NEXT:    subl $8, %esp
    501 ; X86-NEXT:    movd 12(%ebp), %mm0
    502 ; X86-NEXT:    movd 8(%ebp), %mm1
    503 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    504 ; X86-NEXT:    pfrsqrt %mm1, %mm0
    505 ; X86-NEXT:    movq %mm0, (%esp)
    506 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    507 ; X86-NEXT:    flds (%esp)
    508 ; X86-NEXT:    movl %ebp, %esp
    509 ; X86-NEXT:    popl %ebp
    510 ; X86-NEXT:    retl
    511 ;
    512 ; X64-LABEL: test_pfrsqrt:
    513 ; X64:       # %bb.0: # %entry
    514 ; X64-NEXT:    movdq2q %xmm0, %mm0
    515 ; X64-NEXT:    pfrsqrt %mm0, %mm0
    516 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    517 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    518 ; X64-NEXT:    retq
    519 entry:
    520   %0 = bitcast <2 x float> %a to x86_mmx
    521   %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
    522   %2 = bitcast x86_mmx %1 to <2 x float>
    523   ret <2 x float> %2
    524 }
    525 
    526 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
    527 
    528 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
    529 ; X86-LABEL: test_pfrsqit1:
    530 ; X86:       # %bb.0: # %entry
    531 ; X86-NEXT:    pushl %ebp
    532 ; X86-NEXT:    movl %esp, %ebp
    533 ; X86-NEXT:    andl $-8, %esp
    534 ; X86-NEXT:    subl $8, %esp
    535 ; X86-NEXT:    movd 20(%ebp), %mm0
    536 ; X86-NEXT:    movd 16(%ebp), %mm1
    537 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    538 ; X86-NEXT:    movd 12(%ebp), %mm0
    539 ; X86-NEXT:    movd 8(%ebp), %mm2
    540 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    541 ; X86-NEXT:    pfrsqit1 %mm1, %mm2
    542 ; X86-NEXT:    movq %mm2, (%esp)
    543 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    544 ; X86-NEXT:    flds (%esp)
    545 ; X86-NEXT:    movl %ebp, %esp
    546 ; X86-NEXT:    popl %ebp
    547 ; X86-NEXT:    retl
    548 ;
    549 ; X64-LABEL: test_pfrsqit1:
    550 ; X64:       # %bb.0: # %entry
    551 ; X64-NEXT:    movdq2q %xmm1, %mm0
    552 ; X64-NEXT:    movdq2q %xmm0, %mm1
    553 ; X64-NEXT:    pfrsqit1 %mm0, %mm1
    554 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    555 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    556 ; X64-NEXT:    retq
    557 entry:
    558   %0 = bitcast <2 x float> %a to x86_mmx
    559   %1 = bitcast <2 x float> %b to x86_mmx
    560   %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
    561   %3 = bitcast x86_mmx %2 to <2 x float>
    562   ret <2 x float> %3
    563 }
    564 
    565 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
    566 
    567 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
    568 ; X86-LABEL: test_pfsub:
    569 ; X86:       # %bb.0: # %entry
    570 ; X86-NEXT:    pushl %ebp
    571 ; X86-NEXT:    movl %esp, %ebp
    572 ; X86-NEXT:    andl $-8, %esp
    573 ; X86-NEXT:    subl $8, %esp
    574 ; X86-NEXT:    movd 20(%ebp), %mm0
    575 ; X86-NEXT:    movd 16(%ebp), %mm1
    576 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    577 ; X86-NEXT:    movd 12(%ebp), %mm0
    578 ; X86-NEXT:    movd 8(%ebp), %mm2
    579 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    580 ; X86-NEXT:    pfsub %mm1, %mm2
    581 ; X86-NEXT:    movq %mm2, (%esp)
    582 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    583 ; X86-NEXT:    flds (%esp)
    584 ; X86-NEXT:    movl %ebp, %esp
    585 ; X86-NEXT:    popl %ebp
    586 ; X86-NEXT:    retl
    587 ;
    588 ; X64-LABEL: test_pfsub:
    589 ; X64:       # %bb.0: # %entry
    590 ; X64-NEXT:    movdq2q %xmm1, %mm0
    591 ; X64-NEXT:    movdq2q %xmm0, %mm1
    592 ; X64-NEXT:    pfsub %mm0, %mm1
    593 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    594 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    595 ; X64-NEXT:    retq
    596 entry:
    597   %0 = bitcast <2 x float> %a to x86_mmx
    598   %1 = bitcast <2 x float> %b to x86_mmx
    599   %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
    600   %3 = bitcast x86_mmx %2 to <2 x float>
    601   ret <2 x float> %3
    602 }
    603 
    604 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
    605 
    606 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
    607 ; X86-LABEL: test_pfsubr:
    608 ; X86:       # %bb.0: # %entry
    609 ; X86-NEXT:    pushl %ebp
    610 ; X86-NEXT:    movl %esp, %ebp
    611 ; X86-NEXT:    andl $-8, %esp
    612 ; X86-NEXT:    subl $8, %esp
    613 ; X86-NEXT:    movd 20(%ebp), %mm0
    614 ; X86-NEXT:    movd 16(%ebp), %mm1
    615 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    616 ; X86-NEXT:    movd 12(%ebp), %mm0
    617 ; X86-NEXT:    movd 8(%ebp), %mm2
    618 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    619 ; X86-NEXT:    pfsubr %mm1, %mm2
    620 ; X86-NEXT:    movq %mm2, (%esp)
    621 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    622 ; X86-NEXT:    flds (%esp)
    623 ; X86-NEXT:    movl %ebp, %esp
    624 ; X86-NEXT:    popl %ebp
    625 ; X86-NEXT:    retl
    626 ;
    627 ; X64-LABEL: test_pfsubr:
    628 ; X64:       # %bb.0: # %entry
    629 ; X64-NEXT:    movdq2q %xmm1, %mm0
    630 ; X64-NEXT:    movdq2q %xmm0, %mm1
    631 ; X64-NEXT:    pfsubr %mm0, %mm1
    632 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    633 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    634 ; X64-NEXT:    retq
    635 entry:
    636   %0 = bitcast <2 x float> %a to x86_mmx
    637   %1 = bitcast <2 x float> %b to x86_mmx
    638   %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
    639   %3 = bitcast x86_mmx %2 to <2 x float>
    640   ret <2 x float> %3
    641 }
    642 
    643 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
    644 
    645 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
    646 ; X86-LABEL: test_pi2fd:
    647 ; X86:       # %bb.0: # %entry
    648 ; X86-NEXT:    pushl %ebp
    649 ; X86-NEXT:    movl %esp, %ebp
    650 ; X86-NEXT:    andl $-8, %esp
    651 ; X86-NEXT:    subl $8, %esp
    652 ; X86-NEXT:    pi2fd %mm0, %mm0
    653 ; X86-NEXT:    movq %mm0, (%esp)
    654 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    655 ; X86-NEXT:    flds (%esp)
    656 ; X86-NEXT:    movl %ebp, %esp
    657 ; X86-NEXT:    popl %ebp
    658 ; X86-NEXT:    retl
    659 ;
    660 ; X64-LABEL: test_pi2fd:
    661 ; X64:       # %bb.0: # %entry
    662 ; X64-NEXT:    pi2fd %mm0, %mm0
    663 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    664 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    665 ; X64-NEXT:    retq
    666 entry:
    667   %0 = bitcast x86_mmx %a.coerce to <2 x i32>
    668   %1 = bitcast <2 x i32> %0 to x86_mmx
    669   %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
    670   %3 = bitcast x86_mmx %2 to <2 x float>
    671   ret <2 x float> %3
    672 }
    673 
    674 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
    675 
    676 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
    677 ; X86-LABEL: test_pmulhrw:
    678 ; X86:       # %bb.0: # %entry
    679 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    680 ; X86-NEXT:    pmulhrw %mm1, %mm0
    681 ; X86-NEXT:    movq %mm0, (%eax)
    682 ; X86-NEXT:    retl $4
    683 ;
    684 ; X64-LABEL: test_pmulhrw:
    685 ; X64:       # %bb.0: # %entry
    686 ; X64-NEXT:    pmulhrw %mm1, %mm0
    687 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    688 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    689 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    690 ; X64-NEXT:    retq
    691 entry:
    692   %0 = bitcast x86_mmx %a.coerce to <4 x i16>
    693   %1 = bitcast x86_mmx %b.coerce to <4 x i16>
    694   %2 = bitcast <4 x i16> %0 to x86_mmx
    695   %3 = bitcast <4 x i16> %1 to x86_mmx
    696   %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
    697   %5 = bitcast x86_mmx %4 to <4 x i16>
    698   ret <4 x i16> %5
    699 }
    700 
    701 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
    702 
    703 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
    704 ; X86-LABEL: test_pf2iw:
    705 ; X86:       # %bb.0: # %entry
    706 ; X86-NEXT:    pushl %ebp
    707 ; X86-NEXT:    movl %esp, %ebp
    708 ; X86-NEXT:    andl $-8, %esp
    709 ; X86-NEXT:    subl $8, %esp
    710 ; X86-NEXT:    movd 12(%ebp), %mm0
    711 ; X86-NEXT:    movd 8(%ebp), %mm1
    712 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    713 ; X86-NEXT:    pf2iw %mm1, %mm0
    714 ; X86-NEXT:    movq %mm0, (%esp)
    715 ; X86-NEXT:    movl (%esp), %eax
    716 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    717 ; X86-NEXT:    movl %ebp, %esp
    718 ; X86-NEXT:    popl %ebp
    719 ; X86-NEXT:    retl
    720 ;
    721 ; X64-LABEL: test_pf2iw:
    722 ; X64:       # %bb.0: # %entry
    723 ; X64-NEXT:    movdq2q %xmm0, %mm0
    724 ; X64-NEXT:    pf2iw %mm0, %mm0
    725 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    726 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    727 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
    728 ; X64-NEXT:    retq
    729 entry:
    730   %0 = bitcast <2 x float> %a to x86_mmx
    731   %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
    732   %2 = bitcast x86_mmx %1 to <2 x i32>
    733   ret <2 x i32> %2
    734 }
    735 
    736 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
    737 
    738 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
    739 ; X86-LABEL: test_pfnacc:
    740 ; X86:       # %bb.0: # %entry
    741 ; X86-NEXT:    pushl %ebp
    742 ; X86-NEXT:    movl %esp, %ebp
    743 ; X86-NEXT:    andl $-8, %esp
    744 ; X86-NEXT:    subl $8, %esp
    745 ; X86-NEXT:    movd 20(%ebp), %mm0
    746 ; X86-NEXT:    movd 16(%ebp), %mm1
    747 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    748 ; X86-NEXT:    movd 12(%ebp), %mm0
    749 ; X86-NEXT:    movd 8(%ebp), %mm2
    750 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    751 ; X86-NEXT:    pfnacc %mm1, %mm2
    752 ; X86-NEXT:    movq %mm2, (%esp)
    753 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    754 ; X86-NEXT:    flds (%esp)
    755 ; X86-NEXT:    movl %ebp, %esp
    756 ; X86-NEXT:    popl %ebp
    757 ; X86-NEXT:    retl
    758 ;
    759 ; X64-LABEL: test_pfnacc:
    760 ; X64:       # %bb.0: # %entry
    761 ; X64-NEXT:    movdq2q %xmm1, %mm0
    762 ; X64-NEXT:    movdq2q %xmm0, %mm1
    763 ; X64-NEXT:    pfnacc %mm0, %mm1
    764 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    765 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    766 ; X64-NEXT:    retq
    767 entry:
    768   %0 = bitcast <2 x float> %a to x86_mmx
    769   %1 = bitcast <2 x float> %b to x86_mmx
    770   %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
    771   %3 = bitcast x86_mmx %2 to <2 x float>
    772   ret <2 x float> %3
    773 }
    774 
    775 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
    776 
    777 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
    778 ; X86-LABEL: test_pfpnacc:
    779 ; X86:       # %bb.0: # %entry
    780 ; X86-NEXT:    pushl %ebp
    781 ; X86-NEXT:    movl %esp, %ebp
    782 ; X86-NEXT:    andl $-8, %esp
    783 ; X86-NEXT:    subl $8, %esp
    784 ; X86-NEXT:    movd 20(%ebp), %mm0
    785 ; X86-NEXT:    movd 16(%ebp), %mm1
    786 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    787 ; X86-NEXT:    movd 12(%ebp), %mm0
    788 ; X86-NEXT:    movd 8(%ebp), %mm2
    789 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    790 ; X86-NEXT:    pfpnacc %mm1, %mm2
    791 ; X86-NEXT:    movq %mm2, (%esp)
    792 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    793 ; X86-NEXT:    flds (%esp)
    794 ; X86-NEXT:    movl %ebp, %esp
    795 ; X86-NEXT:    popl %ebp
    796 ; X86-NEXT:    retl
    797 ;
    798 ; X64-LABEL: test_pfpnacc:
    799 ; X64:       # %bb.0: # %entry
    800 ; X64-NEXT:    movdq2q %xmm1, %mm0
    801 ; X64-NEXT:    movdq2q %xmm0, %mm1
    802 ; X64-NEXT:    pfpnacc %mm0, %mm1
    803 ; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
    804 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    805 ; X64-NEXT:    retq
    806 entry:
    807   %0 = bitcast <2 x float> %a to x86_mmx
    808   %1 = bitcast <2 x float> %b to x86_mmx
    809   %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
    810   %3 = bitcast x86_mmx %2 to <2 x float>
    811   ret <2 x float> %3
    812 }
    813 
    814 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
    815 
    816 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
    817 ; X86-LABEL: test_pi2fw:
    818 ; X86:       # %bb.0: # %entry
    819 ; X86-NEXT:    pushl %ebp
    820 ; X86-NEXT:    movl %esp, %ebp
    821 ; X86-NEXT:    andl $-8, %esp
    822 ; X86-NEXT:    subl $8, %esp
    823 ; X86-NEXT:    pi2fw %mm0, %mm0
    824 ; X86-NEXT:    movq %mm0, (%esp)
    825 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    826 ; X86-NEXT:    flds (%esp)
    827 ; X86-NEXT:    movl %ebp, %esp
    828 ; X86-NEXT:    popl %ebp
    829 ; X86-NEXT:    retl
    830 ;
    831 ; X64-LABEL: test_pi2fw:
    832 ; X64:       # %bb.0: # %entry
    833 ; X64-NEXT:    pi2fw %mm0, %mm0
    834 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    835 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    836 ; X64-NEXT:    retq
    837 entry:
    838   %0 = bitcast x86_mmx %a.coerce to <2 x i32>
    839   %1 = bitcast <2 x i32> %0 to x86_mmx
    840   %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
    841   %3 = bitcast x86_mmx %2 to <2 x float>
    842   ret <2 x float> %3
    843 }
    844 
    845 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
    846 
    847 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
    848 ; X86-LABEL: test_pswapdsf:
    849 ; X86:       # %bb.0: # %entry
    850 ; X86-NEXT:    pushl %ebp
    851 ; X86-NEXT:    movl %esp, %ebp
    852 ; X86-NEXT:    andl $-8, %esp
    853 ; X86-NEXT:    subl $8, %esp
    854 ; X86-NEXT:    movd 12(%ebp), %mm0
    855 ; X86-NEXT:    movd 8(%ebp), %mm1
    856 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    857 ; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
    858 ; X86-NEXT:    movq %mm0, (%esp)
    859 ; X86-NEXT:    flds {{[0-9]+}}(%esp)
    860 ; X86-NEXT:    flds (%esp)
    861 ; X86-NEXT:    movl %ebp, %esp
    862 ; X86-NEXT:    popl %ebp
    863 ; X86-NEXT:    retl
    864 ;
    865 ; X64-LABEL: test_pswapdsf:
    866 ; X64:       # %bb.0: # %entry
    867 ; X64-NEXT:    movdq2q %xmm0, %mm0
    868 ; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
    869 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    870 ; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
    871 ; X64-NEXT:    retq
    872 entry:
    873   %0 = bitcast <2 x float> %a to x86_mmx
    874   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
    875   %2 = bitcast x86_mmx %1 to <2 x float>
    876   ret <2 x float> %2
    877 }
    878 
    879 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
    880 ; X86-LABEL: test_pswapdsi:
    881 ; X86:       # %bb.0: # %entry
    882 ; X86-NEXT:    pushl %ebp
    883 ; X86-NEXT:    movl %esp, %ebp
    884 ; X86-NEXT:    andl $-8, %esp
    885 ; X86-NEXT:    subl $8, %esp
    886 ; X86-NEXT:    movd 12(%ebp), %mm0
    887 ; X86-NEXT:    movd 8(%ebp), %mm1
    888 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    889 ; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
    890 ; X86-NEXT:    movq %mm0, (%esp)
    891 ; X86-NEXT:    movl (%esp), %eax
    892 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    893 ; X86-NEXT:    movl %ebp, %esp
    894 ; X86-NEXT:    popl %ebp
    895 ; X86-NEXT:    retl
    896 ;
    897 ; X64-LABEL: test_pswapdsi:
    898 ; X64:       # %bb.0: # %entry
    899 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    900 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
    901 ; X64-NEXT:    pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0]
    902 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
    903 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    904 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
    905 ; X64-NEXT:    retq
    906 entry:
    907   %0 = bitcast <2 x i32> %a to x86_mmx
    908   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
    909   %2 = bitcast x86_mmx %1 to <2 x i32>
    910   ret <2 x i32> %2
    911 }
    912 
    913 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
    914