Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
      2 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
      3 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
      4 ; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
      5 
      6 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
      7 
      8 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
      9 ; CHECK: phaddw
     10 entry:
     11   %0 = bitcast <1 x i64> %b to <4 x i16>
     12   %1 = bitcast <1 x i64> %a to <4 x i16>
     13   %2 = bitcast <4 x i16> %1 to x86_mmx
     14   %3 = bitcast <4 x i16> %0 to x86_mmx
     15   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
     16   %5 = bitcast x86_mmx %4 to <4 x i16>
     17   %6 = bitcast <4 x i16> %5 to <1 x i64>
     18   %7 = extractelement <1 x i64> %6, i32 0
     19   ret i64 %7
     20 }
     21 
     22 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
     23 
     24 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     25 ; CHECK: pcmpgtd
     26 entry:
     27   %0 = bitcast <1 x i64> %b to <2 x i32>
     28   %1 = bitcast <1 x i64> %a to <2 x i32>
     29   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     30   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     31   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     32   %3 = bitcast x86_mmx %2 to <2 x i32>
     33   %4 = bitcast <2 x i32> %3 to <1 x i64>
     34   %5 = extractelement <1 x i64> %4, i32 0
     35   ret i64 %5
     36 }
     37 
     38 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
     39 
     40 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     41 ; CHECK: pcmpgtw
     42 entry:
     43   %0 = bitcast <1 x i64> %b to <4 x i16>
     44   %1 = bitcast <1 x i64> %a to <4 x i16>
     45   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     46   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     47   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     48   %3 = bitcast x86_mmx %2 to <4 x i16>
     49   %4 = bitcast <4 x i16> %3 to <1 x i64>
     50   %5 = extractelement <1 x i64> %4, i32 0
     51   ret i64 %5
     52 }
     53 
     54 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
     55 
     56 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     57 ; CHECK: pcmpgtb
     58 entry:
     59   %0 = bitcast <1 x i64> %b to <8 x i8>
     60   %1 = bitcast <1 x i64> %a to <8 x i8>
     61   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
     62   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
     63   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     64   %3 = bitcast x86_mmx %2 to <8 x i8>
     65   %4 = bitcast <8 x i8> %3 to <1 x i64>
     66   %5 = extractelement <1 x i64> %4, i32 0
     67   ret i64 %5
     68 }
     69 
     70 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
     71 
     72 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     73 ; CHECK: pcmpeqd
     74 entry:
     75   %0 = bitcast <1 x i64> %b to <2 x i32>
     76   %1 = bitcast <1 x i64> %a to <2 x i32>
     77   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     78   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     79   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     80   %3 = bitcast x86_mmx %2 to <2 x i32>
     81   %4 = bitcast <2 x i32> %3 to <1 x i64>
     82   %5 = extractelement <1 x i64> %4, i32 0
     83   ret i64 %5
     84 }
     85 
     86 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
     87 
     88 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     89 ; CHECK: pcmpeqw
     90 entry:
     91   %0 = bitcast <1 x i64> %b to <4 x i16>
     92   %1 = bitcast <1 x i64> %a to <4 x i16>
     93   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     94   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     95   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     96   %3 = bitcast x86_mmx %2 to <4 x i16>
     97   %4 = bitcast <4 x i16> %3 to <1 x i64>
     98   %5 = extractelement <1 x i64> %4, i32 0
     99   ret i64 %5
    100 }
    101 
    102 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
    103 
    104 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    105 ; CHECK: pcmpeqb
    106 entry:
    107   %0 = bitcast <1 x i64> %b to <8 x i8>
    108   %1 = bitcast <1 x i64> %a to <8 x i8>
    109   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    110   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    111   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    112   %3 = bitcast x86_mmx %2 to <8 x i8>
    113   %4 = bitcast <8 x i8> %3 to <1 x i64>
    114   %5 = extractelement <1 x i64> %4, i32 0
    115   ret i64 %5
    116 }
    117 
    118 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
    119 
    120 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    121 ; CHECK: punpckldq
    122 entry:
    123   %0 = bitcast <1 x i64> %b to <2 x i32>
    124   %1 = bitcast <1 x i64> %a to <2 x i32>
    125   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    126   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    127   %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    128   %3 = bitcast x86_mmx %2 to <2 x i32>
    129   %4 = bitcast <2 x i32> %3 to <1 x i64>
    130   %5 = extractelement <1 x i64> %4, i32 0
    131   ret i64 %5
    132 }
    133 
    134 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
    135 
    136 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    137 ; CHECK: punpcklwd
    138 entry:
    139   %0 = bitcast <1 x i64> %b to <4 x i16>
    140   %1 = bitcast <1 x i64> %a to <4 x i16>
    141   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    142   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    143   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    144   %3 = bitcast x86_mmx %2 to <4 x i16>
    145   %4 = bitcast <4 x i16> %3 to <1 x i64>
    146   %5 = extractelement <1 x i64> %4, i32 0
    147   ret i64 %5
    148 }
    149 
    150 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
    151 
    152 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    153 ; CHECK: punpcklbw
    154 entry:
    155   %0 = bitcast <1 x i64> %b to <8 x i8>
    156   %1 = bitcast <1 x i64> %a to <8 x i8>
    157   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    158   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    159   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    160   %3 = bitcast x86_mmx %2 to <8 x i8>
    161   %4 = bitcast <8 x i8> %3 to <1 x i64>
    162   %5 = extractelement <1 x i64> %4, i32 0
    163   ret i64 %5
    164 }
    165 
    166 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
    167 
    168 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    169 ; CHECK: punpckhdq
    170 entry:
    171   %0 = bitcast <1 x i64> %b to <2 x i32>
    172   %1 = bitcast <1 x i64> %a to <2 x i32>
    173   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    174   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    175   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    176   %3 = bitcast x86_mmx %2 to <2 x i32>
    177   %4 = bitcast <2 x i32> %3 to <1 x i64>
    178   %5 = extractelement <1 x i64> %4, i32 0
    179   ret i64 %5
    180 }
    181 
    182 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
    183 
    184 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    185 ; CHECK: punpckhwd
    186 entry:
    187   %0 = bitcast <1 x i64> %b to <4 x i16>
    188   %1 = bitcast <1 x i64> %a to <4 x i16>
    189   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    190   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    191   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    192   %3 = bitcast x86_mmx %2 to <4 x i16>
    193   %4 = bitcast <4 x i16> %3 to <1 x i64>
    194   %5 = extractelement <1 x i64> %4, i32 0
    195   ret i64 %5
    196 }
    197 
    198 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
    199 
    200 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    201 ; CHECK: punpckhbw
    202 entry:
    203   %0 = bitcast <1 x i64> %b to <8 x i8>
    204   %1 = bitcast <1 x i64> %a to <8 x i8>
    205   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    206   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    207   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    208   %3 = bitcast x86_mmx %2 to <8 x i8>
    209   %4 = bitcast <8 x i8> %3 to <1 x i64>
    210   %5 = extractelement <1 x i64> %4, i32 0
    211   ret i64 %5
    212 }
    213 
    214 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
    215 
    216 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    217 ; CHECK: packuswb
    218 entry:
    219   %0 = bitcast <1 x i64> %b to <4 x i16>
    220   %1 = bitcast <1 x i64> %a to <4 x i16>
    221   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    222   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    223   %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    224   %3 = bitcast x86_mmx %2 to <8 x i8>
    225   %4 = bitcast <8 x i8> %3 to <1 x i64>
    226   %5 = extractelement <1 x i64> %4, i32 0
    227   ret i64 %5
    228 }
    229 
    230 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
    231 
    232 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    233 ; CHECK: packssdw
    234 entry:
    235   %0 = bitcast <1 x i64> %b to <2 x i32>
    236   %1 = bitcast <1 x i64> %a to <2 x i32>
    237   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    238   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    239   %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    240   %3 = bitcast x86_mmx %2 to <4 x i16>
    241   %4 = bitcast <4 x i16> %3 to <1 x i64>
    242   %5 = extractelement <1 x i64> %4, i32 0
    243   ret i64 %5
    244 }
    245 
    246 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
    247 
    248 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    249 ; CHECK: packsswb
    250 entry:
    251   %0 = bitcast <1 x i64> %b to <4 x i16>
    252   %1 = bitcast <1 x i64> %a to <4 x i16>
    253   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    254   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    255   %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    256   %3 = bitcast x86_mmx %2 to <8 x i8>
    257   %4 = bitcast <8 x i8> %3 to <1 x i64>
    258   %5 = extractelement <1 x i64> %4, i32 0
    259   ret i64 %5
    260 }
    261 
    262 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
    263 
    264 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
    265 ; CHECK: psrad
    266 entry:
    267   %0 = bitcast <1 x i64> %a to <2 x i32>
    268   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    269   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
    270   %2 = bitcast x86_mmx %1 to <2 x i32>
    271   %3 = bitcast <2 x i32> %2 to <1 x i64>
    272   %4 = extractelement <1 x i64> %3, i32 0
    273   ret i64 %4
    274 }
    275 
    276 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
    277 
    278 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
    279 ; CHECK: psraw
    280 entry:
    281   %0 = bitcast <1 x i64> %a to <4 x i16>
    282   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    283   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
    284   %2 = bitcast x86_mmx %1 to <4 x i16>
    285   %3 = bitcast <4 x i16> %2 to <1 x i64>
    286   %4 = extractelement <1 x i64> %3, i32 0
    287   ret i64 %4
    288 }
    289 
    290 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
    291 
    292 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
    293 ; CHECK: psrlq
    294 entry:
    295   %0 = extractelement <1 x i64> %a, i32 0
    296   %mmx_var.i = bitcast i64 %0 to x86_mmx
    297   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    298   %2 = bitcast x86_mmx %1 to i64
    299   ret i64 %2
    300 }
    301 
    302 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
    303 
    304 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
    305 ; CHECK: psrld
    306 entry:
    307   %0 = bitcast <1 x i64> %a to <2 x i32>
    308   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    309   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    310   %2 = bitcast x86_mmx %1 to <2 x i32>
    311   %3 = bitcast <2 x i32> %2 to <1 x i64>
    312   %4 = extractelement <1 x i64> %3, i32 0
    313   ret i64 %4
    314 }
    315 
    316 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
    317 
    318 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
    319 ; CHECK: psrlw
    320 entry:
    321   %0 = bitcast <1 x i64> %a to <4 x i16>
    322   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    323   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    324   %2 = bitcast x86_mmx %1 to <4 x i16>
    325   %3 = bitcast <4 x i16> %2 to <1 x i64>
    326   %4 = extractelement <1 x i64> %3, i32 0
    327   ret i64 %4
    328 }
    329 
    330 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
    331 
    332 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
    333 ; CHECK: psllq
    334 entry:
    335   %0 = extractelement <1 x i64> %a, i32 0
    336   %mmx_var.i = bitcast i64 %0 to x86_mmx
    337   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    338   %2 = bitcast x86_mmx %1 to i64
    339   ret i64 %2
    340 }
    341 
    342 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
    343 
    344 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
    345 ; CHECK: pslld
    346 entry:
    347   %0 = bitcast <1 x i64> %a to <2 x i32>
    348   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    349   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    350   %2 = bitcast x86_mmx %1 to <2 x i32>
    351   %3 = bitcast <2 x i32> %2 to <1 x i64>
    352   %4 = extractelement <1 x i64> %3, i32 0
    353   ret i64 %4
    354 }
    355 
    356 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
    357 
    358 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
    359 ; CHECK: psllw
    360 entry:
    361   %0 = bitcast <1 x i64> %a to <4 x i16>
    362   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    363   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    364   %2 = bitcast x86_mmx %1 to <4 x i16>
    365   %3 = bitcast <4 x i16> %2 to <1 x i64>
    366   %4 = extractelement <1 x i64> %3, i32 0
    367   ret i64 %4
    368 }
    369 
    370 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
    371 
    372 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    373 ; CHECK: psrad
    374 entry:
    375   %0 = bitcast <1 x i64> %a to <2 x i32>
    376   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    377   %1 = extractelement <1 x i64> %b, i32 0
    378   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    379   %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    380   %3 = bitcast x86_mmx %2 to <2 x i32>
    381   %4 = bitcast <2 x i32> %3 to <1 x i64>
    382   %5 = extractelement <1 x i64> %4, i32 0
    383   ret i64 %5
    384 }
    385 
    386 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
    387 
    388 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    389 ; CHECK: psraw
    390 entry:
    391   %0 = bitcast <1 x i64> %a to <4 x i16>
    392   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    393   %1 = extractelement <1 x i64> %b, i32 0
    394   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    395   %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    396   %3 = bitcast x86_mmx %2 to <4 x i16>
    397   %4 = bitcast <4 x i16> %3 to <1 x i64>
    398   %5 = extractelement <1 x i64> %4, i32 0
    399   ret i64 %5
    400 }
    401 
    402 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
    403 
    404 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    405 ; CHECK: psrlq
    406 entry:
    407   %0 = extractelement <1 x i64> %a, i32 0
    408   %mmx_var.i = bitcast i64 %0 to x86_mmx
    409   %1 = extractelement <1 x i64> %b, i32 0
    410   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    411   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    412   %3 = bitcast x86_mmx %2 to i64
    413   ret i64 %3
    414 }
    415 
    416 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
    417 
    418 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    419 ; CHECK: psrld
    420 entry:
    421   %0 = bitcast <1 x i64> %a to <2 x i32>
    422   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    423   %1 = extractelement <1 x i64> %b, i32 0
    424   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    425   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    426   %3 = bitcast x86_mmx %2 to <2 x i32>
    427   %4 = bitcast <2 x i32> %3 to <1 x i64>
    428   %5 = extractelement <1 x i64> %4, i32 0
    429   ret i64 %5
    430 }
    431 
    432 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
    433 
    434 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    435 ; CHECK: psrlw
    436 entry:
    437   %0 = bitcast <1 x i64> %a to <4 x i16>
    438   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    439   %1 = extractelement <1 x i64> %b, i32 0
    440   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    441   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    442   %3 = bitcast x86_mmx %2 to <4 x i16>
    443   %4 = bitcast <4 x i16> %3 to <1 x i64>
    444   %5 = extractelement <1 x i64> %4, i32 0
    445   ret i64 %5
    446 }
    447 
    448 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
    449 
    450 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    451 ; CHECK: psllq
    452 entry:
    453   %0 = extractelement <1 x i64> %a, i32 0
    454   %mmx_var.i = bitcast i64 %0 to x86_mmx
    455   %1 = extractelement <1 x i64> %b, i32 0
    456   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    457   %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    458   %3 = bitcast x86_mmx %2 to i64
    459   ret i64 %3
    460 }
    461 
    462 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
    463 
    464 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    465 ; CHECK: pslld
    466 entry:
    467   %0 = bitcast <1 x i64> %a to <2 x i32>
    468   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    469   %1 = extractelement <1 x i64> %b, i32 0
    470   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    471   %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    472   %3 = bitcast x86_mmx %2 to <2 x i32>
    473   %4 = bitcast <2 x i32> %3 to <1 x i64>
    474   %5 = extractelement <1 x i64> %4, i32 0
    475   ret i64 %5
    476 }
    477 
    478 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
    479 
    480 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    481 ; CHECK: psllw
    482 entry:
    483   %0 = bitcast <1 x i64> %a to <4 x i16>
    484   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    485   %1 = extractelement <1 x i64> %b, i32 0
    486   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    487   %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    488   %3 = bitcast x86_mmx %2 to <4 x i16>
    489   %4 = bitcast <4 x i16> %3 to <1 x i64>
    490   %5 = extractelement <1 x i64> %4, i32 0
    491   ret i64 %5
    492 }
    493 
    494 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
    495 
    496 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    497 ; CHECK: pxor
    498 entry:
    499   %0 = bitcast <1 x i64> %b to <2 x i32>
    500   %1 = bitcast <1 x i64> %a to <2 x i32>
    501   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    502   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    503   %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    504   %3 = bitcast x86_mmx %2 to <2 x i32>
    505   %4 = bitcast <2 x i32> %3 to <1 x i64>
    506   %5 = extractelement <1 x i64> %4, i32 0
    507   ret i64 %5
    508 }
    509 
    510 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
    511 
    512 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    513 ; CHECK: por
    514 entry:
    515   %0 = bitcast <1 x i64> %b to <2 x i32>
    516   %1 = bitcast <1 x i64> %a to <2 x i32>
    517   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    518   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    519   %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    520   %3 = bitcast x86_mmx %2 to <2 x i32>
    521   %4 = bitcast <2 x i32> %3 to <1 x i64>
    522   %5 = extractelement <1 x i64> %4, i32 0
    523   ret i64 %5
    524 }
    525 
    526 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
    527 
    528 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    529 ; CHECK: pandn
    530 entry:
    531   %0 = bitcast <1 x i64> %b to <2 x i32>
    532   %1 = bitcast <1 x i64> %a to <2 x i32>
    533   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    534   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    535   %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    536   %3 = bitcast x86_mmx %2 to <2 x i32>
    537   %4 = bitcast <2 x i32> %3 to <1 x i64>
    538   %5 = extractelement <1 x i64> %4, i32 0
    539   ret i64 %5
    540 }
    541 
    542 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
    543 
    544 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    545 ; CHECK: pand
    546 entry:
    547   %0 = bitcast <1 x i64> %b to <2 x i32>
    548   %1 = bitcast <1 x i64> %a to <2 x i32>
    549   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    550   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    551   %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    552   %3 = bitcast x86_mmx %2 to <2 x i32>
    553   %4 = bitcast <2 x i32> %3 to <1 x i64>
    554   %5 = extractelement <1 x i64> %4, i32 0
    555   ret i64 %5
    556 }
    557 
    558 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
    559 
    560 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    561 ; CHECK: pmullw
    562 entry:
    563   %0 = bitcast <1 x i64> %b to <4 x i16>
    564   %1 = bitcast <1 x i64> %a to <4 x i16>
    565   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    566   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    567   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    568   %3 = bitcast x86_mmx %2 to <4 x i16>
    569   %4 = bitcast <4 x i16> %3 to <1 x i64>
    570   %5 = extractelement <1 x i64> %4, i32 0
    571   ret i64 %5
    572 }
    573 
    574 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    575 ; CHECK: pmullw
    576 entry:
    577   %0 = bitcast <1 x i64> %b to <4 x i16>
    578   %1 = bitcast <1 x i64> %a to <4 x i16>
    579   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    580   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    581   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    582   %3 = bitcast x86_mmx %2 to <4 x i16>
    583   %4 = bitcast <4 x i16> %3 to <1 x i64>
    584   %5 = extractelement <1 x i64> %4, i32 0
    585   ret i64 %5
    586 }
    587 
    588 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
    589 
    590 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    591 ; CHECK: pmulhw
    592 entry:
    593   %0 = bitcast <1 x i64> %b to <4 x i16>
    594   %1 = bitcast <1 x i64> %a to <4 x i16>
    595   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    596   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    597   %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    598   %3 = bitcast x86_mmx %2 to <4 x i16>
    599   %4 = bitcast <4 x i16> %3 to <1 x i64>
    600   %5 = extractelement <1 x i64> %4, i32 0
    601   ret i64 %5
    602 }
    603 
    604 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
    605 
    606 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    607 ; CHECK: pmaddwd
    608 entry:
    609   %0 = bitcast <1 x i64> %b to <4 x i16>
    610   %1 = bitcast <1 x i64> %a to <4 x i16>
    611   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    612   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    613   %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    614   %3 = bitcast x86_mmx %2 to <2 x i32>
    615   %4 = bitcast <2 x i32> %3 to <1 x i64>
    616   %5 = extractelement <1 x i64> %4, i32 0
    617   ret i64 %5
    618 }
    619 
    620 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
    621 
    622 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    623 ; CHECK: psubusw
    624 entry:
    625   %0 = bitcast <1 x i64> %b to <4 x i16>
    626   %1 = bitcast <1 x i64> %a to <4 x i16>
    627   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    628   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    629   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    630   %3 = bitcast x86_mmx %2 to <4 x i16>
    631   %4 = bitcast <4 x i16> %3 to <1 x i64>
    632   %5 = extractelement <1 x i64> %4, i32 0
    633   ret i64 %5
    634 }
    635 
    636 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
    637 
    638 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    639 ; CHECK: psubusb
    640 entry:
    641   %0 = bitcast <1 x i64> %b to <8 x i8>
    642   %1 = bitcast <1 x i64> %a to <8 x i8>
    643   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    644   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    645   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    646   %3 = bitcast x86_mmx %2 to <8 x i8>
    647   %4 = bitcast <8 x i8> %3 to <1 x i64>
    648   %5 = extractelement <1 x i64> %4, i32 0
    649   ret i64 %5
    650 }
    651 
    652 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
    653 
    654 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    655 ; CHECK: psubsw
    656 entry:
    657   %0 = bitcast <1 x i64> %b to <4 x i16>
    658   %1 = bitcast <1 x i64> %a to <4 x i16>
    659   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    660   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    661   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    662   %3 = bitcast x86_mmx %2 to <4 x i16>
    663   %4 = bitcast <4 x i16> %3 to <1 x i64>
    664   %5 = extractelement <1 x i64> %4, i32 0
    665   ret i64 %5
    666 }
    667 
    668 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
    669 
    670 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    671 ; CHECK: psubsb
    672 entry:
    673   %0 = bitcast <1 x i64> %b to <8 x i8>
    674   %1 = bitcast <1 x i64> %a to <8 x i8>
    675   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    676   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    677   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    678   %3 = bitcast x86_mmx %2 to <8 x i8>
    679   %4 = bitcast <8 x i8> %3 to <1 x i64>
    680   %5 = extractelement <1 x i64> %4, i32 0
    681   ret i64 %5
    682 }
    683 
    684 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    685 ; CHECK: psubq
    686 entry:
    687   %0 = extractelement <1 x i64> %a, i32 0
    688   %mmx_var = bitcast i64 %0 to x86_mmx
    689   %1 = extractelement <1 x i64> %b, i32 0
    690   %mmx_var1 = bitcast i64 %1 to x86_mmx
    691   %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    692   %3 = bitcast x86_mmx %2 to i64
    693   ret i64 %3
    694 }
    695 
    696 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
    697 
    698 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
    699 
    700 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    701 ; CHECK: psubd
    702 entry:
    703   %0 = bitcast <1 x i64> %b to <2 x i32>
    704   %1 = bitcast <1 x i64> %a to <2 x i32>
    705   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    706   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    707   %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    708   %3 = bitcast x86_mmx %2 to <2 x i32>
    709   %4 = bitcast <2 x i32> %3 to <1 x i64>
    710   %5 = extractelement <1 x i64> %4, i32 0
    711   ret i64 %5
    712 }
    713 
    714 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
    715 
    716 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    717 ; CHECK: psubw
    718 entry:
    719   %0 = bitcast <1 x i64> %b to <4 x i16>
    720   %1 = bitcast <1 x i64> %a to <4 x i16>
    721   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    722   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    723   %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    724   %3 = bitcast x86_mmx %2 to <4 x i16>
    725   %4 = bitcast <4 x i16> %3 to <1 x i64>
    726   %5 = extractelement <1 x i64> %4, i32 0
    727   ret i64 %5
    728 }
    729 
    730 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
    731 
    732 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    733 ; CHECK: psubb
    734 entry:
    735   %0 = bitcast <1 x i64> %b to <8 x i8>
    736   %1 = bitcast <1 x i64> %a to <8 x i8>
    737   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    738   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    739   %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    740   %3 = bitcast x86_mmx %2 to <8 x i8>
    741   %4 = bitcast <8 x i8> %3 to <1 x i64>
    742   %5 = extractelement <1 x i64> %4, i32 0
    743   ret i64 %5
    744 }
    745 
    746 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
    747 
    748 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    749 ; CHECK: paddusw
    750 entry:
    751   %0 = bitcast <1 x i64> %b to <4 x i16>
    752   %1 = bitcast <1 x i64> %a to <4 x i16>
    753   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    754   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    755   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    756   %3 = bitcast x86_mmx %2 to <4 x i16>
    757   %4 = bitcast <4 x i16> %3 to <1 x i64>
    758   %5 = extractelement <1 x i64> %4, i32 0
    759   ret i64 %5
    760 }
    761 
    762 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
    763 
    764 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    765 ; CHECK: paddusb
    766 entry:
    767   %0 = bitcast <1 x i64> %b to <8 x i8>
    768   %1 = bitcast <1 x i64> %a to <8 x i8>
    769   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    770   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    771   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    772   %3 = bitcast x86_mmx %2 to <8 x i8>
    773   %4 = bitcast <8 x i8> %3 to <1 x i64>
    774   %5 = extractelement <1 x i64> %4, i32 0
    775   ret i64 %5
    776 }
    777 
    778 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
    779 
    780 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    781 ; CHECK: paddsw
    782 entry:
    783   %0 = bitcast <1 x i64> %b to <4 x i16>
    784   %1 = bitcast <1 x i64> %a to <4 x i16>
    785   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    786   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    787   %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    788   %3 = bitcast x86_mmx %2 to <4 x i16>
    789   %4 = bitcast <4 x i16> %3 to <1 x i64>
    790   %5 = extractelement <1 x i64> %4, i32 0
    791   ret i64 %5
    792 }
    793 
    794 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
    795 
    796 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    797 ; CHECK: paddsb
    798 entry:
    799   %0 = bitcast <1 x i64> %b to <8 x i8>
    800   %1 = bitcast <1 x i64> %a to <8 x i8>
    801   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    802   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    803   %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    804   %3 = bitcast x86_mmx %2 to <8 x i8>
    805   %4 = bitcast <8 x i8> %3 to <1 x i64>
    806   %5 = extractelement <1 x i64> %4, i32 0
    807   ret i64 %5
    808 }
    809 
    810 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
    811 
    812 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    813 ; CHECK: paddq
    814 entry:
    815   %0 = extractelement <1 x i64> %a, i32 0
    816   %mmx_var = bitcast i64 %0 to x86_mmx
    817   %1 = extractelement <1 x i64> %b, i32 0
    818   %mmx_var1 = bitcast i64 %1 to x86_mmx
    819   %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    820   %3 = bitcast x86_mmx %2 to i64
    821   ret i64 %3
    822 }
    823 
    824 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
    825 
    826 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    827 ; CHECK: paddd
    828 entry:
    829   %0 = bitcast <1 x i64> %b to <2 x i32>
    830   %1 = bitcast <1 x i64> %a to <2 x i32>
    831   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    832   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    833   %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    834   %3 = bitcast x86_mmx %2 to <2 x i32>
    835   %4 = bitcast <2 x i32> %3 to <1 x i64>
    836   %5 = extractelement <1 x i64> %4, i32 0
    837   ret i64 %5
    838 }
    839 
    840 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
    841 
    842 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    843 ; CHECK: paddw
    844 entry:
    845   %0 = bitcast <1 x i64> %b to <4 x i16>
    846   %1 = bitcast <1 x i64> %a to <4 x i16>
    847   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    848   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    849   %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    850   %3 = bitcast x86_mmx %2 to <4 x i16>
    851   %4 = bitcast <4 x i16> %3 to <1 x i64>
    852   %5 = extractelement <1 x i64> %4, i32 0
    853   ret i64 %5
    854 }
    855 
    856 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
    857 
    858 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    859 ; CHECK: paddb
    860 entry:
    861   %0 = bitcast <1 x i64> %b to <8 x i8>
    862   %1 = bitcast <1 x i64> %a to <8 x i8>
    863   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    864   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    865   %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    866   %3 = bitcast x86_mmx %2 to <8 x i8>
    867   %4 = bitcast <8 x i8> %3 to <1 x i64>
    868   %5 = extractelement <1 x i64> %4, i32 0
    869   ret i64 %5
    870 }
    871 
    872 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
    873 
    874 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    875 ; CHECK: psadbw
    876 entry:
    877   %0 = bitcast <1 x i64> %b to <8 x i8>
    878   %1 = bitcast <1 x i64> %a to <8 x i8>
    879   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    880   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    881   %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    882   %3 = bitcast x86_mmx %2 to i64
    883   ret i64 %3
    884 }
    885 
    886 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
    887 
    888 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    889 ; CHECK: pminsw
    890 entry:
    891   %0 = bitcast <1 x i64> %b to <4 x i16>
    892   %1 = bitcast <1 x i64> %a to <4 x i16>
    893   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    894   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    895   %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    896   %3 = bitcast x86_mmx %2 to <4 x i16>
    897   %4 = bitcast <4 x i16> %3 to <1 x i64>
    898   %5 = extractelement <1 x i64> %4, i32 0
    899   ret i64 %5
    900 }
    901 
    902 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
    903 
    904 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    905 ; CHECK: pminub
    906 entry:
    907   %0 = bitcast <1 x i64> %b to <8 x i8>
    908   %1 = bitcast <1 x i64> %a to <8 x i8>
    909   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    910   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    911   %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    912   %3 = bitcast x86_mmx %2 to <8 x i8>
    913   %4 = bitcast <8 x i8> %3 to <1 x i64>
    914   %5 = extractelement <1 x i64> %4, i32 0
    915   ret i64 %5
    916 }
    917 
    918 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
    919 
    920 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    921 ; CHECK: pmaxsw
    922 entry:
    923   %0 = bitcast <1 x i64> %b to <4 x i16>
    924   %1 = bitcast <1 x i64> %a to <4 x i16>
    925   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    926   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    927   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    928   %3 = bitcast x86_mmx %2 to <4 x i16>
    929   %4 = bitcast <4 x i16> %3 to <1 x i64>
    930   %5 = extractelement <1 x i64> %4, i32 0
    931   ret i64 %5
    932 }
    933 
    934 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
    935 
    936 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    937 ; CHECK: pmaxub
    938 entry:
    939   %0 = bitcast <1 x i64> %b to <8 x i8>
    940   %1 = bitcast <1 x i64> %a to <8 x i8>
    941   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    942   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    943   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    944   %3 = bitcast x86_mmx %2 to <8 x i8>
    945   %4 = bitcast <8 x i8> %3 to <1 x i64>
    946   %5 = extractelement <1 x i64> %4, i32 0
    947   ret i64 %5
    948 }
    949 
    950 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
    951 
    952 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    953 ; CHECK: pavgw
    954 entry:
    955   %0 = bitcast <1 x i64> %b to <4 x i16>
    956   %1 = bitcast <1 x i64> %a to <4 x i16>
    957   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    958   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    959   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    960   %3 = bitcast x86_mmx %2 to <4 x i16>
    961   %4 = bitcast <4 x i16> %3 to <1 x i64>
    962   %5 = extractelement <1 x i64> %4, i32 0
    963   ret i64 %5
    964 }
    965 
    966 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
    967 
    968 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    969 ; CHECK: pavgb
    970 entry:
    971   %0 = bitcast <1 x i64> %b to <8 x i8>
    972   %1 = bitcast <1 x i64> %a to <8 x i8>
    973   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    974   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    975   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    976   %3 = bitcast x86_mmx %2 to <8 x i8>
    977   %4 = bitcast <8 x i8> %3 to <1 x i64>
    978   %5 = extractelement <1 x i64> %4, i32 0
    979   ret i64 %5
    980 }
    981 
    982 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
    983 
    984 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
    985 ; CHECK: movntq
    986 entry:
    987   %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
    988   %0 = extractelement <1 x i64> %a, i32 0
    989   %mmx_var.i = bitcast i64 %0 to x86_mmx
    990   tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
    991   ret void
    992 }
    993 
    994 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
    995 
    996 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
    997 ; CHECK: pmovmskb
    998 entry:
    999   %0 = bitcast <1 x i64> %a to <8 x i8>
   1000   %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
   1001   %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
   1002   ret i32 %1
   1003 }
   1004 
   1005 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
   1006 
   1007 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
   1008 ; CHECK: maskmovq
   1009 entry:
   1010   %0 = bitcast <1 x i64> %n to <8 x i8>
   1011   %1 = bitcast <1 x i64> %d to <8 x i8>
   1012   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1013   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1014   tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
   1015   ret void
   1016 }
   1017 
   1018 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
   1019 
   1020 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1021 ; CHECK: pmulhuw
   1022 entry:
   1023   %0 = bitcast <1 x i64> %b to <4 x i16>
   1024   %1 = bitcast <1 x i64> %a to <4 x i16>
   1025   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
   1026   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
   1027   %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1028   %3 = bitcast x86_mmx %2 to <4 x i16>
   1029   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1030   %5 = extractelement <1 x i64> %4, i32 0
   1031   ret i64 %5
   1032 }
   1033 
   1034 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
   1035 
   1036 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
   1037 ; CHECK: pshufw
   1038 entry:
   1039   %0 = bitcast <1 x i64> %a to <4 x i16>
   1040   %1 = bitcast <4 x i16> %0 to x86_mmx
   1041   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1042   %3 = bitcast x86_mmx %2 to <4 x i16>
   1043   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1044   %5 = extractelement <1 x i64> %4, i32 0
   1045   ret i64 %5
   1046 }
   1047 
   1048 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
   1049 ; CHECK: test21_2
   1050 ; CHECK: pshufw
   1051 ; CHECK: movd
   1052 entry:
   1053   %0 = bitcast <1 x i64> %a to <4 x i16>
   1054   %1 = bitcast <4 x i16> %0 to x86_mmx
   1055   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1056   %3 = bitcast x86_mmx %2 to <4 x i16>
   1057   %4 = bitcast <4 x i16> %3 to <2 x i32>
   1058   %5 = extractelement <2 x i32> %4, i32 0
   1059   ret i32 %5
   1060 }
   1061 
   1062 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
   1063 
   1064 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1065 ; CHECK: pmuludq
   1066 entry:
   1067   %0 = bitcast <1 x i64> %b to <2 x i32>
   1068   %1 = bitcast <1 x i64> %a to <2 x i32>
   1069   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
   1070   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
   1071   %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1072   %3 = bitcast x86_mmx %2 to i64
   1073   ret i64 %3
   1074 }
   1075 
   1076 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
   1077 
   1078 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
   1079 ; CHECK: cvtpi2pd
   1080 entry:
   1081   %0 = bitcast <1 x i64> %a to <2 x i32>
   1082   %1 = bitcast <2 x i32> %0 to x86_mmx
   1083   %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
   1084   ret <2 x double> %2
   1085 }
   1086 
   1087 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
   1088 
   1089 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
   1090 ; CHECK: cvttpd2pi
   1091 entry:
   1092   %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
   1093   %1 = bitcast x86_mmx %0 to <2 x i32>
   1094   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1095   %3 = extractelement <1 x i64> %2, i32 0
   1096   ret i64 %3
   1097 }
   1098 
   1099 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
   1100 
   1101 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
   1102 ; CHECK: cvtpd2pi
   1103 entry:
   1104   %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
   1105   %1 = bitcast x86_mmx %0 to <2 x i32>
   1106   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1107   %3 = extractelement <1 x i64> %2, i32 0
   1108   ret i64 %3
   1109 }
   1110 
   1111 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
   1112 
   1113 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1114 ; CHECK: palignr
   1115 entry:
   1116   %0 = extractelement <1 x i64> %a, i32 0
   1117   %mmx_var = bitcast i64 %0 to x86_mmx
   1118   %1 = extractelement <1 x i64> %b, i32 0
   1119   %mmx_var1 = bitcast i64 %1 to x86_mmx
   1120   %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
   1121   %3 = bitcast x86_mmx %2 to i64
   1122   ret i64 %3
   1123 }
   1124 
   1125 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
   1126 
   1127 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
   1128 ; CHECK: pabsd
   1129 entry:
   1130   %0 = bitcast <1 x i64> %a to <2 x i32>
   1131   %1 = bitcast <2 x i32> %0 to x86_mmx
   1132   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
   1133   %3 = bitcast x86_mmx %2 to <2 x i32>
   1134   %4 = bitcast <2 x i32> %3 to <1 x i64>
   1135   %5 = extractelement <1 x i64> %4, i32 0
   1136   ret i64 %5
   1137 }
   1138 
   1139 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
   1140 
   1141 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
   1142 ; CHECK: pabsw
   1143 entry:
   1144   %0 = bitcast <1 x i64> %a to <4 x i16>
   1145   %1 = bitcast <4 x i16> %0 to x86_mmx
   1146   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
   1147   %3 = bitcast x86_mmx %2 to <4 x i16>
   1148   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1149   %5 = extractelement <1 x i64> %4, i32 0
   1150   ret i64 %5
   1151 }
   1152 
   1153 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
   1154 
   1155 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
   1156 ; CHECK: pabsb
   1157 entry:
   1158   %0 = bitcast <1 x i64> %a to <8 x i8>
   1159   %1 = bitcast <8 x i8> %0 to x86_mmx
   1160   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
   1161   %3 = bitcast x86_mmx %2 to <8 x i8>
   1162   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1163   %5 = extractelement <1 x i64> %4, i32 0
   1164   ret i64 %5
   1165 }
   1166 
   1167 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
   1168 
   1169 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1170 ; CHECK: psignd
   1171 entry:
   1172   %0 = bitcast <1 x i64> %b to <2 x i32>
   1173   %1 = bitcast <1 x i64> %a to <2 x i32>
   1174   %2 = bitcast <2 x i32> %1 to x86_mmx
   1175   %3 = bitcast <2 x i32> %0 to x86_mmx
   1176   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1177   %5 = bitcast x86_mmx %4 to <2 x i32>
   1178   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1179   %7 = extractelement <1 x i64> %6, i32 0
   1180   ret i64 %7
   1181 }
   1182 
   1183 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
   1184 
   1185 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1186 ; CHECK: psignw
   1187 entry:
   1188   %0 = bitcast <1 x i64> %b to <4 x i16>
   1189   %1 = bitcast <1 x i64> %a to <4 x i16>
   1190   %2 = bitcast <4 x i16> %1 to x86_mmx
   1191   %3 = bitcast <4 x i16> %0 to x86_mmx
   1192   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1193   %5 = bitcast x86_mmx %4 to <4 x i16>
   1194   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1195   %7 = extractelement <1 x i64> %6, i32 0
   1196   ret i64 %7
   1197 }
   1198 
   1199 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
   1200 
   1201 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1202 ; CHECK: psignb
   1203 entry:
   1204   %0 = bitcast <1 x i64> %b to <8 x i8>
   1205   %1 = bitcast <1 x i64> %a to <8 x i8>
   1206   %2 = bitcast <8 x i8> %1 to x86_mmx
   1207   %3 = bitcast <8 x i8> %0 to x86_mmx
   1208   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1209   %5 = bitcast x86_mmx %4 to <8 x i8>
   1210   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1211   %7 = extractelement <1 x i64> %6, i32 0
   1212   ret i64 %7
   1213 }
   1214 
   1215 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
   1216 
   1217 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1218 ; CHECK: pshufb
   1219 entry:
   1220   %0 = bitcast <1 x i64> %b to <8 x i8>
   1221   %1 = bitcast <1 x i64> %a to <8 x i8>
   1222   %2 = bitcast <8 x i8> %1 to x86_mmx
   1223   %3 = bitcast <8 x i8> %0 to x86_mmx
   1224   %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1225   %5 = bitcast x86_mmx %4 to <8 x i8>
   1226   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1227   %7 = extractelement <1 x i64> %6, i32 0
   1228   ret i64 %7
   1229 }
   1230 
   1231 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
   1232 
   1233 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1234 ; CHECK: pmulhrsw
   1235 entry:
   1236   %0 = bitcast <1 x i64> %b to <4 x i16>
   1237   %1 = bitcast <1 x i64> %a to <4 x i16>
   1238   %2 = bitcast <4 x i16> %1 to x86_mmx
   1239   %3 = bitcast <4 x i16> %0 to x86_mmx
   1240   %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1241   %5 = bitcast x86_mmx %4 to <4 x i16>
   1242   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1243   %7 = extractelement <1 x i64> %6, i32 0
   1244   ret i64 %7
   1245 }
   1246 
   1247 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
   1248 
   1249 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1250 ; CHECK: pmaddubsw
   1251 entry:
   1252   %0 = bitcast <1 x i64> %b to <8 x i8>
   1253   %1 = bitcast <1 x i64> %a to <8 x i8>
   1254   %2 = bitcast <8 x i8> %1 to x86_mmx
   1255   %3 = bitcast <8 x i8> %0 to x86_mmx
   1256   %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1257   %5 = bitcast x86_mmx %4 to <8 x i8>
   1258   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1259   %7 = extractelement <1 x i64> %6, i32 0
   1260   ret i64 %7
   1261 }
   1262 
   1263 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
   1264 
   1265 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1266 ; CHECK: phsubsw
   1267 entry:
   1268   %0 = bitcast <1 x i64> %b to <4 x i16>
   1269   %1 = bitcast <1 x i64> %a to <4 x i16>
   1270   %2 = bitcast <4 x i16> %1 to x86_mmx
   1271   %3 = bitcast <4 x i16> %0 to x86_mmx
   1272   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1273   %5 = bitcast x86_mmx %4 to <4 x i16>
   1274   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1275   %7 = extractelement <1 x i64> %6, i32 0
   1276   ret i64 %7
   1277 }
   1278 
   1279 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
   1280 
   1281 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1282 ; CHECK: phsubd
   1283 entry:
   1284   %0 = bitcast <1 x i64> %b to <2 x i32>
   1285   %1 = bitcast <1 x i64> %a to <2 x i32>
   1286   %2 = bitcast <2 x i32> %1 to x86_mmx
   1287   %3 = bitcast <2 x i32> %0 to x86_mmx
   1288   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1289   %5 = bitcast x86_mmx %4 to <2 x i32>
   1290   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1291   %7 = extractelement <1 x i64> %6, i32 0
   1292   ret i64 %7
   1293 }
   1294 
   1295 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
   1296 
   1297 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1298 ; CHECK: phsubw
   1299 entry:
   1300   %0 = bitcast <1 x i64> %b to <4 x i16>
   1301   %1 = bitcast <1 x i64> %a to <4 x i16>
   1302   %2 = bitcast <4 x i16> %1 to x86_mmx
   1303   %3 = bitcast <4 x i16> %0 to x86_mmx
   1304   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1305   %5 = bitcast x86_mmx %4 to <4 x i16>
   1306   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1307   %7 = extractelement <1 x i64> %6, i32 0
   1308   ret i64 %7
   1309 }
   1310 
   1311 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
   1312 
   1313 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1314 ; CHECK: phaddsw
   1315 entry:
   1316   %0 = bitcast <1 x i64> %b to <4 x i16>
   1317   %1 = bitcast <1 x i64> %a to <4 x i16>
   1318   %2 = bitcast <4 x i16> %1 to x86_mmx
   1319   %3 = bitcast <4 x i16> %0 to x86_mmx
   1320   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1321   %5 = bitcast x86_mmx %4 to <4 x i16>
   1322   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1323   %7 = extractelement <1 x i64> %6, i32 0
   1324   ret i64 %7
   1325 }
   1326 
   1327 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
   1328 
   1329 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1330 ; CHECK: phaddd
   1331 entry:
   1332   %0 = bitcast <1 x i64> %b to <2 x i32>
   1333   %1 = bitcast <1 x i64> %a to <2 x i32>
   1334   %2 = bitcast <2 x i32> %1 to x86_mmx
   1335   %3 = bitcast <2 x i32> %0 to x86_mmx
   1336   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1337   %5 = bitcast x86_mmx %4 to <2 x i32>
   1338   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1339   %7 = extractelement <1 x i64> %6, i32 0
   1340   ret i64 %7
   1341 }
   1342 
   1343 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
   1344 ; CHECK: cvtpi2ps
   1345   %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
   1346   ret <4 x float> %c
   1347 }
   1348 
   1349 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
   1350