Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
      2 ; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
      3 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
      4 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
      5 
      6 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
      7 
      8 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
      9 ; ALL-LABEL: @test1
     10 ; ALL: phaddw
     11 entry:
     12   %0 = bitcast <1 x i64> %b to <4 x i16>
     13   %1 = bitcast <1 x i64> %a to <4 x i16>
     14   %2 = bitcast <4 x i16> %1 to x86_mmx
     15   %3 = bitcast <4 x i16> %0 to x86_mmx
     16   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
     17   %5 = bitcast x86_mmx %4 to <4 x i16>
     18   %6 = bitcast <4 x i16> %5 to <1 x i64>
     19   %7 = extractelement <1 x i64> %6, i32 0
     20   ret i64 %7
     21 }
     22 
     23 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
     24 
     25 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     26 ; ALL-LABEL: @test88
     27 ; ALL: pcmpgtd
     28 entry:
     29   %0 = bitcast <1 x i64> %b to <2 x i32>
     30   %1 = bitcast <1 x i64> %a to <2 x i32>
     31   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     32   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     33   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     34   %3 = bitcast x86_mmx %2 to <2 x i32>
     35   %4 = bitcast <2 x i32> %3 to <1 x i64>
     36   %5 = extractelement <1 x i64> %4, i32 0
     37   ret i64 %5
     38 }
     39 
     40 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
     41 
     42 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     43 ; ALL-LABEL: @test87
     44 ; ALL: pcmpgtw
     45 entry:
     46   %0 = bitcast <1 x i64> %b to <4 x i16>
     47   %1 = bitcast <1 x i64> %a to <4 x i16>
     48   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     49   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     50   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     51   %3 = bitcast x86_mmx %2 to <4 x i16>
     52   %4 = bitcast <4 x i16> %3 to <1 x i64>
     53   %5 = extractelement <1 x i64> %4, i32 0
     54   ret i64 %5
     55 }
     56 
     57 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
     58 
     59 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     60 ; ALL-LABEL: @test86
     61 ; ALL: pcmpgtb
     62 entry:
     63   %0 = bitcast <1 x i64> %b to <8 x i8>
     64   %1 = bitcast <1 x i64> %a to <8 x i8>
     65   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
     66   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
     67   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     68   %3 = bitcast x86_mmx %2 to <8 x i8>
     69   %4 = bitcast <8 x i8> %3 to <1 x i64>
     70   %5 = extractelement <1 x i64> %4, i32 0
     71   ret i64 %5
     72 }
     73 
     74 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
     75 
     76 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     77 ; ALL-LABEL: @test85
     78 ; ALL: pcmpeqd
     79 entry:
     80   %0 = bitcast <1 x i64> %b to <2 x i32>
     81   %1 = bitcast <1 x i64> %a to <2 x i32>
     82   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     83   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     84   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     85   %3 = bitcast x86_mmx %2 to <2 x i32>
     86   %4 = bitcast <2 x i32> %3 to <1 x i64>
     87   %5 = extractelement <1 x i64> %4, i32 0
     88   ret i64 %5
     89 }
     90 
     91 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
     92 
     93 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     94 ; ALL-LABEL: @test84
     95 ; ALL: pcmpeqw
     96 entry:
     97   %0 = bitcast <1 x i64> %b to <4 x i16>
     98   %1 = bitcast <1 x i64> %a to <4 x i16>
     99   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    100   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    101   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    102   %3 = bitcast x86_mmx %2 to <4 x i16>
    103   %4 = bitcast <4 x i16> %3 to <1 x i64>
    104   %5 = extractelement <1 x i64> %4, i32 0
    105   ret i64 %5
    106 }
    107 
    108 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
    109 
    110 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    111 ; ALL-LABEL: @test83
    112 ; ALL: pcmpeqb
    113 entry:
    114   %0 = bitcast <1 x i64> %b to <8 x i8>
    115   %1 = bitcast <1 x i64> %a to <8 x i8>
    116   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    117   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    118   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    119   %3 = bitcast x86_mmx %2 to <8 x i8>
    120   %4 = bitcast <8 x i8> %3 to <1 x i64>
    121   %5 = extractelement <1 x i64> %4, i32 0
    122   ret i64 %5
    123 }
    124 
    125 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
    126 
    127 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    128 ; ALL-LABEL: @test82
    129 ; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
    130 ; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
    131 entry:
    132   %0 = bitcast <1 x i64> %b to <2 x i32>
    133   %1 = bitcast <1 x i64> %a to <2 x i32>
    134   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    135   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    136   %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    137   %3 = bitcast x86_mmx %2 to <2 x i32>
    138   %4 = bitcast <2 x i32> %3 to <1 x i64>
    139   %5 = extractelement <1 x i64> %4, i32 0
    140   ret i64 %5
    141 }
    142 
    143 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
    144 
    145 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    146 ; ALL-LABEL: @test81
    147 ; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
    148 ; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    149 entry:
    150   %0 = bitcast <1 x i64> %b to <4 x i16>
    151   %1 = bitcast <1 x i64> %a to <4 x i16>
    152   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    153   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    154   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    155   %3 = bitcast x86_mmx %2 to <4 x i16>
    156   %4 = bitcast <4 x i16> %3 to <1 x i64>
    157   %5 = extractelement <1 x i64> %4, i32 0
    158   ret i64 %5
    159 }
    160 
    161 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
    162 
    163 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    164 ; ALL-LABEL: @test80
    165 ; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
    166 ; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
    167 entry:
    168   %0 = bitcast <1 x i64> %b to <8 x i8>
    169   %1 = bitcast <1 x i64> %a to <8 x i8>
    170   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    171   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    172   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    173   %3 = bitcast x86_mmx %2 to <8 x i8>
    174   %4 = bitcast <8 x i8> %3 to <1 x i64>
    175   %5 = extractelement <1 x i64> %4, i32 0
    176   ret i64 %5
    177 }
    178 
    179 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
    180 
    181 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    182 ; ALL-LABEL: @test79
    183 ; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
    184 ; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
    185 entry:
    186   %0 = bitcast <1 x i64> %b to <2 x i32>
    187   %1 = bitcast <1 x i64> %a to <2 x i32>
    188   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    189   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    190   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    191   %3 = bitcast x86_mmx %2 to <2 x i32>
    192   %4 = bitcast <2 x i32> %3 to <1 x i64>
    193   %5 = extractelement <1 x i64> %4, i32 0
    194   ret i64 %5
    195 }
    196 
    197 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
    198 
    199 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    200 ; ALL-LABEL: @test78
    201 ; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
    202 ; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
    203 entry:
    204   %0 = bitcast <1 x i64> %b to <4 x i16>
    205   %1 = bitcast <1 x i64> %a to <4 x i16>
    206   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    207   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    208   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    209   %3 = bitcast x86_mmx %2 to <4 x i16>
    210   %4 = bitcast <4 x i16> %3 to <1 x i64>
    211   %5 = extractelement <1 x i64> %4, i32 0
    212   ret i64 %5
    213 }
    214 
    215 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
    216 
    217 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    218 ; ALL-LABEL: @test77
    219 ; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
    220 ; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
    221 entry:
    222   %0 = bitcast <1 x i64> %b to <8 x i8>
    223   %1 = bitcast <1 x i64> %a to <8 x i8>
    224   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    225   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    226   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    227   %3 = bitcast x86_mmx %2 to <8 x i8>
    228   %4 = bitcast <8 x i8> %3 to <1 x i64>
    229   %5 = extractelement <1 x i64> %4, i32 0
    230   ret i64 %5
    231 }
    232 
    233 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
    234 
    235 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    236 ; ALL-LABEL: @test76
    237 ; ALL: packuswb
    238 entry:
    239   %0 = bitcast <1 x i64> %b to <4 x i16>
    240   %1 = bitcast <1 x i64> %a to <4 x i16>
    241   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    242   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    243   %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    244   %3 = bitcast x86_mmx %2 to <8 x i8>
    245   %4 = bitcast <8 x i8> %3 to <1 x i64>
    246   %5 = extractelement <1 x i64> %4, i32 0
    247   ret i64 %5
    248 }
    249 
    250 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
    251 
    252 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    253 ; ALL-LABEL: @test75
    254 ; ALL: packssdw
    255 entry:
    256   %0 = bitcast <1 x i64> %b to <2 x i32>
    257   %1 = bitcast <1 x i64> %a to <2 x i32>
    258   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    259   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    260   %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    261   %3 = bitcast x86_mmx %2 to <4 x i16>
    262   %4 = bitcast <4 x i16> %3 to <1 x i64>
    263   %5 = extractelement <1 x i64> %4, i32 0
    264   ret i64 %5
    265 }
    266 
    267 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
    268 
    269 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    270 ; ALL-LABEL: @test74
    271 ; ALL: packsswb
    272 entry:
    273   %0 = bitcast <1 x i64> %b to <4 x i16>
    274   %1 = bitcast <1 x i64> %a to <4 x i16>
    275   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    276   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    277   %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    278   %3 = bitcast x86_mmx %2 to <8 x i8>
    279   %4 = bitcast <8 x i8> %3 to <1 x i64>
    280   %5 = extractelement <1 x i64> %4, i32 0
    281   ret i64 %5
    282 }
    283 
    284 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
    285 
    286 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
    287 ; ALL-LABEL: @test73
    288 ; ALL: psrad
    289 entry:
    290   %0 = bitcast <1 x i64> %a to <2 x i32>
    291   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    292   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
    293   %2 = bitcast x86_mmx %1 to <2 x i32>
    294   %3 = bitcast <2 x i32> %2 to <1 x i64>
    295   %4 = extractelement <1 x i64> %3, i32 0
    296   ret i64 %4
    297 }
    298 
    299 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
    300 
    301 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
    302 ; ALL-LABEL: @test72
    303 ; ALL: psraw
    304 entry:
    305   %0 = bitcast <1 x i64> %a to <4 x i16>
    306   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    307   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
    308   %2 = bitcast x86_mmx %1 to <4 x i16>
    309   %3 = bitcast <4 x i16> %2 to <1 x i64>
    310   %4 = extractelement <1 x i64> %3, i32 0
    311   ret i64 %4
    312 }
    313 
    314 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
    315 
    316 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
    317 ; ALL-LABEL: @test71
    318 ; ALL: psrlq
    319 entry:
    320   %0 = extractelement <1 x i64> %a, i32 0
    321   %mmx_var.i = bitcast i64 %0 to x86_mmx
    322   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    323   %2 = bitcast x86_mmx %1 to i64
    324   ret i64 %2
    325 }
    326 
    327 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
    328 
    329 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
    330 ; ALL-LABEL: @test70
    331 ; ALL: psrld
    332 entry:
    333   %0 = bitcast <1 x i64> %a to <2 x i32>
    334   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    335   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    336   %2 = bitcast x86_mmx %1 to <2 x i32>
    337   %3 = bitcast <2 x i32> %2 to <1 x i64>
    338   %4 = extractelement <1 x i64> %3, i32 0
    339   ret i64 %4
    340 }
    341 
    342 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
    343 
    344 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
    345 ; ALL-LABEL: @test69
    346 ; ALL: psrlw
    347 entry:
    348   %0 = bitcast <1 x i64> %a to <4 x i16>
    349   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    350   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    351   %2 = bitcast x86_mmx %1 to <4 x i16>
    352   %3 = bitcast <4 x i16> %2 to <1 x i64>
    353   %4 = extractelement <1 x i64> %3, i32 0
    354   ret i64 %4
    355 }
    356 
    357 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
    358 
    359 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
    360 ; ALL-LABEL: @test68
    361 ; ALL: psllq
    362 entry:
    363   %0 = extractelement <1 x i64> %a, i32 0
    364   %mmx_var.i = bitcast i64 %0 to x86_mmx
    365   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    366   %2 = bitcast x86_mmx %1 to i64
    367   ret i64 %2
    368 }
    369 
    370 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
    371 
    372 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
    373 ; ALL-LABEL: @test67
    374 ; ALL: pslld
    375 entry:
    376   %0 = bitcast <1 x i64> %a to <2 x i32>
    377   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    378   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    379   %2 = bitcast x86_mmx %1 to <2 x i32>
    380   %3 = bitcast <2 x i32> %2 to <1 x i64>
    381   %4 = extractelement <1 x i64> %3, i32 0
    382   ret i64 %4
    383 }
    384 
    385 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
    386 
    387 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
    388 ; ALL-LABEL: @test66
    389 ; ALL: psllw
    390 entry:
    391   %0 = bitcast <1 x i64> %a to <4 x i16>
    392   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    393   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    394   %2 = bitcast x86_mmx %1 to <4 x i16>
    395   %3 = bitcast <4 x i16> %2 to <1 x i64>
    396   %4 = extractelement <1 x i64> %3, i32 0
    397   ret i64 %4
    398 }
    399 
    400 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
    401 
    402 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    403 ; ALL-LABEL: @test65
    404 ; ALL: psrad
    405 entry:
    406   %0 = bitcast <1 x i64> %a to <2 x i32>
    407   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    408   %1 = extractelement <1 x i64> %b, i32 0
    409   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    410   %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    411   %3 = bitcast x86_mmx %2 to <2 x i32>
    412   %4 = bitcast <2 x i32> %3 to <1 x i64>
    413   %5 = extractelement <1 x i64> %4, i32 0
    414   ret i64 %5
    415 }
    416 
    417 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
    418 
    419 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    420 ; ALL-LABEL: @test64
    421 ; ALL: psraw
    422 entry:
    423   %0 = bitcast <1 x i64> %a to <4 x i16>
    424   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    425   %1 = extractelement <1 x i64> %b, i32 0
    426   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    427   %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    428   %3 = bitcast x86_mmx %2 to <4 x i16>
    429   %4 = bitcast <4 x i16> %3 to <1 x i64>
    430   %5 = extractelement <1 x i64> %4, i32 0
    431   ret i64 %5
    432 }
    433 
    434 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
    435 
    436 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    437 ; ALL-LABEL: @test63
    438 ; ALL: psrlq
    439 entry:
    440   %0 = extractelement <1 x i64> %a, i32 0
    441   %mmx_var.i = bitcast i64 %0 to x86_mmx
    442   %1 = extractelement <1 x i64> %b, i32 0
    443   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    444   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    445   %3 = bitcast x86_mmx %2 to i64
    446   ret i64 %3
    447 }
    448 
    449 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
    450 
    451 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    452 ; ALL-LABEL: @test62
    453 ; ALL: psrld
    454 entry:
    455   %0 = bitcast <1 x i64> %a to <2 x i32>
    456   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    457   %1 = extractelement <1 x i64> %b, i32 0
    458   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    459   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    460   %3 = bitcast x86_mmx %2 to <2 x i32>
    461   %4 = bitcast <2 x i32> %3 to <1 x i64>
    462   %5 = extractelement <1 x i64> %4, i32 0
    463   ret i64 %5
    464 }
    465 
    466 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
    467 
    468 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    469 ; ALL-LABEL: @test61
    470 ; ALL: psrlw
    471 entry:
    472   %0 = bitcast <1 x i64> %a to <4 x i16>
    473   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    474   %1 = extractelement <1 x i64> %b, i32 0
    475   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    476   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    477   %3 = bitcast x86_mmx %2 to <4 x i16>
    478   %4 = bitcast <4 x i16> %3 to <1 x i64>
    479   %5 = extractelement <1 x i64> %4, i32 0
    480   ret i64 %5
    481 }
    482 
    483 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
    484 
    485 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    486 ; ALL-LABEL: @test60
    487 ; ALL: psllq
    488 entry:
    489   %0 = extractelement <1 x i64> %a, i32 0
    490   %mmx_var.i = bitcast i64 %0 to x86_mmx
    491   %1 = extractelement <1 x i64> %b, i32 0
    492   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    493   %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    494   %3 = bitcast x86_mmx %2 to i64
    495   ret i64 %3
    496 }
    497 
    498 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
    499 
    500 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    501 ; ALL-LABEL: @test59
    502 ; ALL: pslld
    503 entry:
    504   %0 = bitcast <1 x i64> %a to <2 x i32>
    505   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    506   %1 = extractelement <1 x i64> %b, i32 0
    507   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    508   %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    509   %3 = bitcast x86_mmx %2 to <2 x i32>
    510   %4 = bitcast <2 x i32> %3 to <1 x i64>
    511   %5 = extractelement <1 x i64> %4, i32 0
    512   ret i64 %5
    513 }
    514 
    515 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
    516 
    517 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    518 ; ALL-LABEL: @test58
    519 ; ALL: psllw
    520 entry:
    521   %0 = bitcast <1 x i64> %a to <4 x i16>
    522   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    523   %1 = extractelement <1 x i64> %b, i32 0
    524   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    525   %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    526   %3 = bitcast x86_mmx %2 to <4 x i16>
    527   %4 = bitcast <4 x i16> %3 to <1 x i64>
    528   %5 = extractelement <1 x i64> %4, i32 0
    529   ret i64 %5
    530 }
    531 
    532 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
    533 
    534 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    535 ; ALL-LABEL: @test56
    536 ; ALL: pxor
    537 entry:
    538   %0 = bitcast <1 x i64> %b to <2 x i32>
    539   %1 = bitcast <1 x i64> %a to <2 x i32>
    540   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    541   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    542   %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    543   %3 = bitcast x86_mmx %2 to <2 x i32>
    544   %4 = bitcast <2 x i32> %3 to <1 x i64>
    545   %5 = extractelement <1 x i64> %4, i32 0
    546   ret i64 %5
    547 }
    548 
    549 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
    550 
    551 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    552 ; ALL-LABEL: @test55
    553 ; ALL: por
    554 entry:
    555   %0 = bitcast <1 x i64> %b to <2 x i32>
    556   %1 = bitcast <1 x i64> %a to <2 x i32>
    557   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    558   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    559   %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    560   %3 = bitcast x86_mmx %2 to <2 x i32>
    561   %4 = bitcast <2 x i32> %3 to <1 x i64>
    562   %5 = extractelement <1 x i64> %4, i32 0
    563   ret i64 %5
    564 }
    565 
    566 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
    567 
    568 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    569 ; ALL-LABEL: @test54
    570 ; ALL: pandn
    571 entry:
    572   %0 = bitcast <1 x i64> %b to <2 x i32>
    573   %1 = bitcast <1 x i64> %a to <2 x i32>
    574   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    575   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    576   %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    577   %3 = bitcast x86_mmx %2 to <2 x i32>
    578   %4 = bitcast <2 x i32> %3 to <1 x i64>
    579   %5 = extractelement <1 x i64> %4, i32 0
    580   ret i64 %5
    581 }
    582 
    583 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
    584 
    585 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    586 ; ALL-LABEL: @test53
    587 ; ALL: pand
    588 entry:
    589   %0 = bitcast <1 x i64> %b to <2 x i32>
    590   %1 = bitcast <1 x i64> %a to <2 x i32>
    591   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    592   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    593   %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    594   %3 = bitcast x86_mmx %2 to <2 x i32>
    595   %4 = bitcast <2 x i32> %3 to <1 x i64>
    596   %5 = extractelement <1 x i64> %4, i32 0
    597   ret i64 %5
    598 }
    599 
    600 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
    601 
    602 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    603 ; ALL-LABEL: @test52
    604 ; ALL: pmullw
    605 entry:
    606   %0 = bitcast <1 x i64> %b to <4 x i16>
    607   %1 = bitcast <1 x i64> %a to <4 x i16>
    608   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    609   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    610   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    611   %3 = bitcast x86_mmx %2 to <4 x i16>
    612   %4 = bitcast <4 x i16> %3 to <1 x i64>
    613   %5 = extractelement <1 x i64> %4, i32 0
    614   ret i64 %5
    615 }
    616 
    617 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    618 ; ALL-LABEL: @test51
    619 ; ALL: pmullw
    620 entry:
    621   %0 = bitcast <1 x i64> %b to <4 x i16>
    622   %1 = bitcast <1 x i64> %a to <4 x i16>
    623   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    624   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    625   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    626   %3 = bitcast x86_mmx %2 to <4 x i16>
    627   %4 = bitcast <4 x i16> %3 to <1 x i64>
    628   %5 = extractelement <1 x i64> %4, i32 0
    629   ret i64 %5
    630 }
    631 
    632 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
    633 
    634 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    635 ; ALL-LABEL: @test50
    636 ; ALL: pmulhw
    637 entry:
    638   %0 = bitcast <1 x i64> %b to <4 x i16>
    639   %1 = bitcast <1 x i64> %a to <4 x i16>
    640   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    641   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    642   %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    643   %3 = bitcast x86_mmx %2 to <4 x i16>
    644   %4 = bitcast <4 x i16> %3 to <1 x i64>
    645   %5 = extractelement <1 x i64> %4, i32 0
    646   ret i64 %5
    647 }
    648 
    649 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
    650 
    651 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    652 ; ALL-LABEL: @test49
    653 ; ALL: pmaddwd
    654 entry:
    655   %0 = bitcast <1 x i64> %b to <4 x i16>
    656   %1 = bitcast <1 x i64> %a to <4 x i16>
    657   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    658   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    659   %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    660   %3 = bitcast x86_mmx %2 to <2 x i32>
    661   %4 = bitcast <2 x i32> %3 to <1 x i64>
    662   %5 = extractelement <1 x i64> %4, i32 0
    663   ret i64 %5
    664 }
    665 
    666 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
    667 
    668 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    669 ; ALL-LABEL: @test48
    670 ; ALL: psubusw
    671 entry:
    672   %0 = bitcast <1 x i64> %b to <4 x i16>
    673   %1 = bitcast <1 x i64> %a to <4 x i16>
    674   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    675   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    676   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    677   %3 = bitcast x86_mmx %2 to <4 x i16>
    678   %4 = bitcast <4 x i16> %3 to <1 x i64>
    679   %5 = extractelement <1 x i64> %4, i32 0
    680   ret i64 %5
    681 }
    682 
    683 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
    684 
    685 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    686 ; ALL-LABEL: @test47
    687 ; ALL: psubusb
    688 entry:
    689   %0 = bitcast <1 x i64> %b to <8 x i8>
    690   %1 = bitcast <1 x i64> %a to <8 x i8>
    691   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    692   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    693   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    694   %3 = bitcast x86_mmx %2 to <8 x i8>
    695   %4 = bitcast <8 x i8> %3 to <1 x i64>
    696   %5 = extractelement <1 x i64> %4, i32 0
    697   ret i64 %5
    698 }
    699 
    700 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
    701 
    702 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    703 ; ALL-LABEL: @test46
    704 ; ALL: psubsw
    705 entry:
    706   %0 = bitcast <1 x i64> %b to <4 x i16>
    707   %1 = bitcast <1 x i64> %a to <4 x i16>
    708   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    709   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    710   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    711   %3 = bitcast x86_mmx %2 to <4 x i16>
    712   %4 = bitcast <4 x i16> %3 to <1 x i64>
    713   %5 = extractelement <1 x i64> %4, i32 0
    714   ret i64 %5
    715 }
    716 
    717 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
    718 
    719 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    720 ; ALL-LABEL: @test45
    721 ; ALL: psubsb
    722 entry:
    723   %0 = bitcast <1 x i64> %b to <8 x i8>
    724   %1 = bitcast <1 x i64> %a to <8 x i8>
    725   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    726   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    727   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    728   %3 = bitcast x86_mmx %2 to <8 x i8>
    729   %4 = bitcast <8 x i8> %3 to <1 x i64>
    730   %5 = extractelement <1 x i64> %4, i32 0
    731   ret i64 %5
    732 }
    733 
    734 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    735 ; ALL-LABEL: @test44
    736 ; ALL: psubq
    737 entry:
    738   %0 = extractelement <1 x i64> %a, i32 0
    739   %mmx_var = bitcast i64 %0 to x86_mmx
    740   %1 = extractelement <1 x i64> %b, i32 0
    741   %mmx_var1 = bitcast i64 %1 to x86_mmx
    742   %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    743   %3 = bitcast x86_mmx %2 to i64
    744   ret i64 %3
    745 }
    746 
    747 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
    748 
    749 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
    750 
    751 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    752 ; ALL-LABEL: @test43
    753 ; ALL: psubd
    754 entry:
    755   %0 = bitcast <1 x i64> %b to <2 x i32>
    756   %1 = bitcast <1 x i64> %a to <2 x i32>
    757   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    758   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    759   %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    760   %3 = bitcast x86_mmx %2 to <2 x i32>
    761   %4 = bitcast <2 x i32> %3 to <1 x i64>
    762   %5 = extractelement <1 x i64> %4, i32 0
    763   ret i64 %5
    764 }
    765 
    766 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
    767 
    768 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    769 ; ALL-LABEL: @test42
    770 ; ALL: psubw
    771 entry:
    772   %0 = bitcast <1 x i64> %b to <4 x i16>
    773   %1 = bitcast <1 x i64> %a to <4 x i16>
    774   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    775   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    776   %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    777   %3 = bitcast x86_mmx %2 to <4 x i16>
    778   %4 = bitcast <4 x i16> %3 to <1 x i64>
    779   %5 = extractelement <1 x i64> %4, i32 0
    780   ret i64 %5
    781 }
    782 
    783 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
    784 
    785 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    786 ; ALL-LABEL: @test41
    787 ; ALL: psubb
    788 entry:
    789   %0 = bitcast <1 x i64> %b to <8 x i8>
    790   %1 = bitcast <1 x i64> %a to <8 x i8>
    791   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    792   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    793   %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    794   %3 = bitcast x86_mmx %2 to <8 x i8>
    795   %4 = bitcast <8 x i8> %3 to <1 x i64>
    796   %5 = extractelement <1 x i64> %4, i32 0
    797   ret i64 %5
    798 }
    799 
    800 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
    801 
    802 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    803 ; ALL-LABEL: @test40
    804 ; ALL: paddusw
    805 entry:
    806   %0 = bitcast <1 x i64> %b to <4 x i16>
    807   %1 = bitcast <1 x i64> %a to <4 x i16>
    808   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    809   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    810   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    811   %3 = bitcast x86_mmx %2 to <4 x i16>
    812   %4 = bitcast <4 x i16> %3 to <1 x i64>
    813   %5 = extractelement <1 x i64> %4, i32 0
    814   ret i64 %5
    815 }
    816 
    817 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
    818 
    819 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    820 ; ALL-LABEL: @test39
    821 ; ALL: paddusb
    822 entry:
    823   %0 = bitcast <1 x i64> %b to <8 x i8>
    824   %1 = bitcast <1 x i64> %a to <8 x i8>
    825   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    826   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    827   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    828   %3 = bitcast x86_mmx %2 to <8 x i8>
    829   %4 = bitcast <8 x i8> %3 to <1 x i64>
    830   %5 = extractelement <1 x i64> %4, i32 0
    831   ret i64 %5
    832 }
    833 
    834 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
    835 
    836 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    837 ; ALL-LABEL: @test38
    838 ; ALL: paddsw
    839 entry:
    840   %0 = bitcast <1 x i64> %b to <4 x i16>
    841   %1 = bitcast <1 x i64> %a to <4 x i16>
    842   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    843   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    844   %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    845   %3 = bitcast x86_mmx %2 to <4 x i16>
    846   %4 = bitcast <4 x i16> %3 to <1 x i64>
    847   %5 = extractelement <1 x i64> %4, i32 0
    848   ret i64 %5
    849 }
    850 
    851 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
    852 
    853 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    854 ; ALL-LABEL: @test37
    855 ; ALL: paddsb
    856 entry:
    857   %0 = bitcast <1 x i64> %b to <8 x i8>
    858   %1 = bitcast <1 x i64> %a to <8 x i8>
    859   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    860   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    861   %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    862   %3 = bitcast x86_mmx %2 to <8 x i8>
    863   %4 = bitcast <8 x i8> %3 to <1 x i64>
    864   %5 = extractelement <1 x i64> %4, i32 0
    865   ret i64 %5
    866 }
    867 
    868 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
    869 
    870 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    871 ; ALL-LABEL: @test36
    872 ; ALL: paddq
    873 entry:
    874   %0 = extractelement <1 x i64> %a, i32 0
    875   %mmx_var = bitcast i64 %0 to x86_mmx
    876   %1 = extractelement <1 x i64> %b, i32 0
    877   %mmx_var1 = bitcast i64 %1 to x86_mmx
    878   %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    879   %3 = bitcast x86_mmx %2 to i64
    880   ret i64 %3
    881 }
    882 
    883 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
    884 
    885 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    886 ; ALL-LABEL: @test35
    887 ; ALL: paddd
    888 entry:
    889   %0 = bitcast <1 x i64> %b to <2 x i32>
    890   %1 = bitcast <1 x i64> %a to <2 x i32>
    891   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    892   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    893   %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    894   %3 = bitcast x86_mmx %2 to <2 x i32>
    895   %4 = bitcast <2 x i32> %3 to <1 x i64>
    896   %5 = extractelement <1 x i64> %4, i32 0
    897   ret i64 %5
    898 }
    899 
    900 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
    901 
    902 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    903 ; ALL-LABEL: @test34
    904 ; ALL: paddw
    905 entry:
    906   %0 = bitcast <1 x i64> %b to <4 x i16>
    907   %1 = bitcast <1 x i64> %a to <4 x i16>
    908   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    909   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    910   %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    911   %3 = bitcast x86_mmx %2 to <4 x i16>
    912   %4 = bitcast <4 x i16> %3 to <1 x i64>
    913   %5 = extractelement <1 x i64> %4, i32 0
    914   ret i64 %5
    915 }
    916 
    917 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
    918 
    919 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    920 ; ALL-LABEL: @test33
    921 ; ALL: paddb
    922 entry:
    923   %0 = bitcast <1 x i64> %b to <8 x i8>
    924   %1 = bitcast <1 x i64> %a to <8 x i8>
    925   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    926   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    927   %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    928   %3 = bitcast x86_mmx %2 to <8 x i8>
    929   %4 = bitcast <8 x i8> %3 to <1 x i64>
    930   %5 = extractelement <1 x i64> %4, i32 0
    931   ret i64 %5
    932 }
    933 
    934 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
    935 
    936 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    937 ; ALL-LABEL: @test32
    938 ; ALL: psadbw
    939 entry:
    940   %0 = bitcast <1 x i64> %b to <8 x i8>
    941   %1 = bitcast <1 x i64> %a to <8 x i8>
    942   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    943   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    944   %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    945   %3 = bitcast x86_mmx %2 to i64
    946   ret i64 %3
    947 }
    948 
    949 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
    950 
    951 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    952 ; ALL-LABEL: @test31
    953 ; ALL: pminsw
    954 entry:
    955   %0 = bitcast <1 x i64> %b to <4 x i16>
    956   %1 = bitcast <1 x i64> %a to <4 x i16>
    957   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    958   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    959   %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    960   %3 = bitcast x86_mmx %2 to <4 x i16>
    961   %4 = bitcast <4 x i16> %3 to <1 x i64>
    962   %5 = extractelement <1 x i64> %4, i32 0
    963   ret i64 %5
    964 }
    965 
    966 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
    967 
    968 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    969 ; ALL-LABEL: @test30
    970 ; ALL: pminub
    971 entry:
    972   %0 = bitcast <1 x i64> %b to <8 x i8>
    973   %1 = bitcast <1 x i64> %a to <8 x i8>
    974   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    975   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    976   %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    977   %3 = bitcast x86_mmx %2 to <8 x i8>
    978   %4 = bitcast <8 x i8> %3 to <1 x i64>
    979   %5 = extractelement <1 x i64> %4, i32 0
    980   ret i64 %5
    981 }
    982 
    983 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
    984 
    985 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    986 ; ALL-LABEL: @test29
    987 ; ALL: pmaxsw
    988 entry:
    989   %0 = bitcast <1 x i64> %b to <4 x i16>
    990   %1 = bitcast <1 x i64> %a to <4 x i16>
    991   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    992   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    993   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    994   %3 = bitcast x86_mmx %2 to <4 x i16>
    995   %4 = bitcast <4 x i16> %3 to <1 x i64>
    996   %5 = extractelement <1 x i64> %4, i32 0
    997   ret i64 %5
    998 }
    999 
   1000 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
   1001 
   1002 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1003 ; ALL-LABEL: @test28
   1004 ; ALL: pmaxub
   1005 entry:
   1006   %0 = bitcast <1 x i64> %b to <8 x i8>
   1007   %1 = bitcast <1 x i64> %a to <8 x i8>
   1008   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1009   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1010   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1011   %3 = bitcast x86_mmx %2 to <8 x i8>
   1012   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1013   %5 = extractelement <1 x i64> %4, i32 0
   1014   ret i64 %5
   1015 }
   1016 
   1017 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
   1018 
   1019 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1020 ; ALL-LABEL: @test27
   1021 ; ALL: pavgw
   1022 entry:
   1023   %0 = bitcast <1 x i64> %b to <4 x i16>
   1024   %1 = bitcast <1 x i64> %a to <4 x i16>
   1025   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
   1026   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
   1027   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1028   %3 = bitcast x86_mmx %2 to <4 x i16>
   1029   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1030   %5 = extractelement <1 x i64> %4, i32 0
   1031   ret i64 %5
   1032 }
   1033 
   1034 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
   1035 
   1036 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1037 ; ALL-LABEL: @test26
   1038 ; ALL: pavgb
   1039 entry:
   1040   %0 = bitcast <1 x i64> %b to <8 x i8>
   1041   %1 = bitcast <1 x i64> %a to <8 x i8>
   1042   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1043   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1044   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1045   %3 = bitcast x86_mmx %2 to <8 x i8>
   1046   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1047   %5 = extractelement <1 x i64> %4, i32 0
   1048   ret i64 %5
   1049 }
   1050 
   1051 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
   1052 
   1053 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
   1054 ; ALL-LABEL: @test25
   1055 ; ALL: movntq
   1056 entry:
   1057   %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
   1058   %0 = extractelement <1 x i64> %a, i32 0
   1059   %mmx_var.i = bitcast i64 %0 to x86_mmx
   1060   tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
   1061   ret void
   1062 }
   1063 
   1064 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
   1065 
   1066 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
   1067 ; ALL-LABEL: @test24
   1068 ; ALL: pmovmskb
   1069 entry:
   1070   %0 = bitcast <1 x i64> %a to <8 x i8>
   1071   %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
   1072   %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
   1073   ret i32 %1
   1074 }
   1075 
   1076 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
   1077 
   1078 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
   1079 ; ALL-LABEL: @test23
   1080 ; ALL: maskmovq
   1081 entry:
   1082   %0 = bitcast <1 x i64> %n to <8 x i8>
   1083   %1 = bitcast <1 x i64> %d to <8 x i8>
   1084   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1085   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1086   tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
   1087   ret void
   1088 }
   1089 
   1090 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
   1091 
   1092 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1093 ; ALL-LABEL: @test22
   1094 ; ALL: pmulhuw
   1095 entry:
   1096   %0 = bitcast <1 x i64> %b to <4 x i16>
   1097   %1 = bitcast <1 x i64> %a to <4 x i16>
   1098   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
   1099   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
   1100   %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1101   %3 = bitcast x86_mmx %2 to <4 x i16>
   1102   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1103   %5 = extractelement <1 x i64> %4, i32 0
   1104   ret i64 %5
   1105 }
   1106 
   1107 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
   1108 
   1109 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
   1110 ; ALL-LABEL: @test21
   1111 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
   1112 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
   1113 entry:
   1114   %0 = bitcast <1 x i64> %a to <4 x i16>
   1115   %1 = bitcast <4 x i16> %0 to x86_mmx
   1116   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1117   %3 = bitcast x86_mmx %2 to <4 x i16>
   1118   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1119   %5 = extractelement <1 x i64> %4, i32 0
   1120   ret i64 %5
   1121 }
   1122 
   1123 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
   1124 ; ALL-LABEL: @test21_2
   1125 ; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
   1126 ; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
   1127 ; ALL: movd
   1128 entry:
   1129   %0 = bitcast <1 x i64> %a to <4 x i16>
   1130   %1 = bitcast <4 x i16> %0 to x86_mmx
   1131   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1132   %3 = bitcast x86_mmx %2 to <4 x i16>
   1133   %4 = bitcast <4 x i16> %3 to <2 x i32>
   1134   %5 = extractelement <2 x i32> %4, i32 0
   1135   ret i32 %5
   1136 }
   1137 
   1138 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
   1139 
   1140 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1141 ; ALL-LABEL: @test20
   1142 ; ALL: pmuludq
   1143 entry:
   1144   %0 = bitcast <1 x i64> %b to <2 x i32>
   1145   %1 = bitcast <1 x i64> %a to <2 x i32>
   1146   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
   1147   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
   1148   %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1149   %3 = bitcast x86_mmx %2 to i64
   1150   ret i64 %3
   1151 }
   1152 
   1153 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
   1154 
   1155 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
   1156 ; ALL-LABEL: @test19
   1157 ; ALL: cvtpi2pd
   1158 entry:
   1159   %0 = bitcast <1 x i64> %a to <2 x i32>
   1160   %1 = bitcast <2 x i32> %0 to x86_mmx
   1161   %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
   1162   ret <2 x double> %2
   1163 }
   1164 
   1165 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
   1166 
   1167 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
   1168 ; ALL-LABEL: @test18
   1169 ; ALL: cvttpd2pi
   1170 entry:
   1171   %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
   1172   %1 = bitcast x86_mmx %0 to <2 x i32>
   1173   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1174   %3 = extractelement <1 x i64> %2, i32 0
   1175   ret i64 %3
   1176 }
   1177 
   1178 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
   1179 
   1180 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
   1181 ; ALL-LABEL: @test17
   1182 ; ALL: cvtpd2pi
   1183 entry:
   1184   %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
   1185   %1 = bitcast x86_mmx %0 to <2 x i32>
   1186   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1187   %3 = extractelement <1 x i64> %2, i32 0
   1188   ret i64 %3
   1189 }
   1190 
   1191 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
   1192 
   1193 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1194 ; ALL-LABEL: @test16
   1195 ; ALL: palignr
   1196 entry:
   1197   %0 = extractelement <1 x i64> %a, i32 0
   1198   %mmx_var = bitcast i64 %0 to x86_mmx
   1199   %1 = extractelement <1 x i64> %b, i32 0
   1200   %mmx_var1 = bitcast i64 %1 to x86_mmx
   1201   %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
   1202   %3 = bitcast x86_mmx %2 to i64
   1203   ret i64 %3
   1204 }
   1205 
   1206 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
   1207 
   1208 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
   1209 ; ALL-LABEL: @test15
   1210 ; ALL: pabsd
   1211 entry:
   1212   %0 = bitcast <1 x i64> %a to <2 x i32>
   1213   %1 = bitcast <2 x i32> %0 to x86_mmx
   1214   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
   1215   %3 = bitcast x86_mmx %2 to <2 x i32>
   1216   %4 = bitcast <2 x i32> %3 to <1 x i64>
   1217   %5 = extractelement <1 x i64> %4, i32 0
   1218   ret i64 %5
   1219 }
   1220 
   1221 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
   1222 
   1223 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
   1224 ; ALL-LABEL: @test14
   1225 ; ALL: pabsw
   1226 entry:
   1227   %0 = bitcast <1 x i64> %a to <4 x i16>
   1228   %1 = bitcast <4 x i16> %0 to x86_mmx
   1229   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
   1230   %3 = bitcast x86_mmx %2 to <4 x i16>
   1231   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1232   %5 = extractelement <1 x i64> %4, i32 0
   1233   ret i64 %5
   1234 }
   1235 
   1236 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
   1237 
   1238 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
   1239 ; ALL-LABEL: @test13
   1240 ; ALL: pabsb
   1241 entry:
   1242   %0 = bitcast <1 x i64> %a to <8 x i8>
   1243   %1 = bitcast <8 x i8> %0 to x86_mmx
   1244   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
   1245   %3 = bitcast x86_mmx %2 to <8 x i8>
   1246   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1247   %5 = extractelement <1 x i64> %4, i32 0
   1248   ret i64 %5
   1249 }
   1250 
   1251 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
   1252 
   1253 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1254 ; ALL-LABEL: @test12
   1255 ; ALL: psignd
   1256 entry:
   1257   %0 = bitcast <1 x i64> %b to <2 x i32>
   1258   %1 = bitcast <1 x i64> %a to <2 x i32>
   1259   %2 = bitcast <2 x i32> %1 to x86_mmx
   1260   %3 = bitcast <2 x i32> %0 to x86_mmx
   1261   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1262   %5 = bitcast x86_mmx %4 to <2 x i32>
   1263   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1264   %7 = extractelement <1 x i64> %6, i32 0
   1265   ret i64 %7
   1266 }
   1267 
   1268 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
   1269 
   1270 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1271 ; ALL-LABEL: @test11
   1272 ; ALL: psignw
   1273 entry:
   1274   %0 = bitcast <1 x i64> %b to <4 x i16>
   1275   %1 = bitcast <1 x i64> %a to <4 x i16>
   1276   %2 = bitcast <4 x i16> %1 to x86_mmx
   1277   %3 = bitcast <4 x i16> %0 to x86_mmx
   1278   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1279   %5 = bitcast x86_mmx %4 to <4 x i16>
   1280   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1281   %7 = extractelement <1 x i64> %6, i32 0
   1282   ret i64 %7
   1283 }
   1284 
   1285 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
   1286 
   1287 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1288 ; ALL-LABEL: @test10
   1289 ; ALL: psignb
   1290 entry:
   1291   %0 = bitcast <1 x i64> %b to <8 x i8>
   1292   %1 = bitcast <1 x i64> %a to <8 x i8>
   1293   %2 = bitcast <8 x i8> %1 to x86_mmx
   1294   %3 = bitcast <8 x i8> %0 to x86_mmx
   1295   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1296   %5 = bitcast x86_mmx %4 to <8 x i8>
   1297   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1298   %7 = extractelement <1 x i64> %6, i32 0
   1299   ret i64 %7
   1300 }
   1301 
   1302 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
   1303 
   1304 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1305 ; ALL-LABEL: @test9
   1306 ; ALL: pshufb
   1307 entry:
   1308   %0 = bitcast <1 x i64> %b to <8 x i8>
   1309   %1 = bitcast <1 x i64> %a to <8 x i8>
   1310   %2 = bitcast <8 x i8> %1 to x86_mmx
   1311   %3 = bitcast <8 x i8> %0 to x86_mmx
   1312   %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1313   %5 = bitcast x86_mmx %4 to <8 x i8>
   1314   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1315   %7 = extractelement <1 x i64> %6, i32 0
   1316   ret i64 %7
   1317 }
   1318 
   1319 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
   1320 
   1321 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1322 ; ALL-LABEL: @test8
   1323 ; ALL: pmulhrsw
   1324 entry:
   1325   %0 = bitcast <1 x i64> %b to <4 x i16>
   1326   %1 = bitcast <1 x i64> %a to <4 x i16>
   1327   %2 = bitcast <4 x i16> %1 to x86_mmx
   1328   %3 = bitcast <4 x i16> %0 to x86_mmx
   1329   %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1330   %5 = bitcast x86_mmx %4 to <4 x i16>
   1331   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1332   %7 = extractelement <1 x i64> %6, i32 0
   1333   ret i64 %7
   1334 }
   1335 
   1336 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
   1337 
   1338 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1339 ; ALL-LABEL: @test7
   1340 ; ALL: pmaddubsw
   1341 entry:
   1342   %0 = bitcast <1 x i64> %b to <8 x i8>
   1343   %1 = bitcast <1 x i64> %a to <8 x i8>
   1344   %2 = bitcast <8 x i8> %1 to x86_mmx
   1345   %3 = bitcast <8 x i8> %0 to x86_mmx
   1346   %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1347   %5 = bitcast x86_mmx %4 to <8 x i8>
   1348   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1349   %7 = extractelement <1 x i64> %6, i32 0
   1350   ret i64 %7
   1351 }
   1352 
   1353 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
   1354 
   1355 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1356 ; ALL-LABEL: @test6
   1357 ; ALL: phsubsw
   1358 entry:
   1359   %0 = bitcast <1 x i64> %b to <4 x i16>
   1360   %1 = bitcast <1 x i64> %a to <4 x i16>
   1361   %2 = bitcast <4 x i16> %1 to x86_mmx
   1362   %3 = bitcast <4 x i16> %0 to x86_mmx
   1363   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1364   %5 = bitcast x86_mmx %4 to <4 x i16>
   1365   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1366   %7 = extractelement <1 x i64> %6, i32 0
   1367   ret i64 %7
   1368 }
   1369 
   1370 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
   1371 
   1372 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1373 ; ALL-LABEL: @test5
   1374 ; ALL: phsubd
   1375 entry:
   1376   %0 = bitcast <1 x i64> %b to <2 x i32>
   1377   %1 = bitcast <1 x i64> %a to <2 x i32>
   1378   %2 = bitcast <2 x i32> %1 to x86_mmx
   1379   %3 = bitcast <2 x i32> %0 to x86_mmx
   1380   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1381   %5 = bitcast x86_mmx %4 to <2 x i32>
   1382   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1383   %7 = extractelement <1 x i64> %6, i32 0
   1384   ret i64 %7
   1385 }
   1386 
   1387 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
   1388 
   1389 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1390 ; ALL-LABEL: @test4
   1391 ; ALL: phsubw
   1392 entry:
   1393   %0 = bitcast <1 x i64> %b to <4 x i16>
   1394   %1 = bitcast <1 x i64> %a to <4 x i16>
   1395   %2 = bitcast <4 x i16> %1 to x86_mmx
   1396   %3 = bitcast <4 x i16> %0 to x86_mmx
   1397   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1398   %5 = bitcast x86_mmx %4 to <4 x i16>
   1399   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1400   %7 = extractelement <1 x i64> %6, i32 0
   1401   ret i64 %7
   1402 }
   1403 
   1404 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
   1405 
   1406 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1407 ; ALL-LABEL: @test3
   1408 ; ALL: phaddsw
   1409 entry:
   1410   %0 = bitcast <1 x i64> %b to <4 x i16>
   1411   %1 = bitcast <1 x i64> %a to <4 x i16>
   1412   %2 = bitcast <4 x i16> %1 to x86_mmx
   1413   %3 = bitcast <4 x i16> %0 to x86_mmx
   1414   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1415   %5 = bitcast x86_mmx %4 to <4 x i16>
   1416   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1417   %7 = extractelement <1 x i64> %6, i32 0
   1418   ret i64 %7
   1419 }
   1420 
   1421 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
   1422 
   1423 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1424 ; ALL-LABEL: @test2
   1425 ; ALL: phaddd
   1426 entry:
   1427   %0 = bitcast <1 x i64> %b to <2 x i32>
   1428   %1 = bitcast <1 x i64> %a to <2 x i32>
   1429   %2 = bitcast <2 x i32> %1 to x86_mmx
   1430   %3 = bitcast <2 x i32> %0 to x86_mmx
   1431   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1432   %5 = bitcast x86_mmx %4 to <2 x i32>
   1433   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1434   %7 = extractelement <1 x i64> %6, i32 0
   1435   ret i64 %7
   1436 }
   1437 
   1438 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
   1439 ; ALL-LABEL: @test89
   1440 ; ALL: cvtpi2ps
   1441   %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
   1442   ret <4 x float> %c
   1443 }
   1444 
   1445 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
   1446 
   1447 ; ALL-LABEL: test90
   1448 define void @test90() {
   1449 ; ALL-LABEL: @test90
   1450 ; ALL: emms
   1451   call void @llvm.x86.mmx.emms()
   1452   ret void
   1453 }
   1454 
   1455 declare void @llvm.x86.mmx.emms()
   1456