Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
      2 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
      3 
      4 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
      5 
      6 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
      7 ; CHECK: phaddw
      8 entry:
      9   %0 = bitcast <1 x i64> %b to <4 x i16>
     10   %1 = bitcast <1 x i64> %a to <4 x i16>
     11   %2 = bitcast <4 x i16> %1 to x86_mmx
     12   %3 = bitcast <4 x i16> %0 to x86_mmx
     13   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
     14   %5 = bitcast x86_mmx %4 to <4 x i16>
     15   %6 = bitcast <4 x i16> %5 to <1 x i64>
     16   %7 = extractelement <1 x i64> %6, i32 0
     17   ret i64 %7
     18 }
     19 
     20 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
     21 
     22 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     23 ; CHECK: pcmpgtd
     24 entry:
     25   %0 = bitcast <1 x i64> %b to <2 x i32>
     26   %1 = bitcast <1 x i64> %a to <2 x i32>
     27   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     28   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     29   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     30   %3 = bitcast x86_mmx %2 to <2 x i32>
     31   %4 = bitcast <2 x i32> %3 to <1 x i64>
     32   %5 = extractelement <1 x i64> %4, i32 0
     33   ret i64 %5
     34 }
     35 
     36 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
     37 
     38 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     39 ; CHECK: pcmpgtw
     40 entry:
     41   %0 = bitcast <1 x i64> %b to <4 x i16>
     42   %1 = bitcast <1 x i64> %a to <4 x i16>
     43   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     44   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     45   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     46   %3 = bitcast x86_mmx %2 to <4 x i16>
     47   %4 = bitcast <4 x i16> %3 to <1 x i64>
     48   %5 = extractelement <1 x i64> %4, i32 0
     49   ret i64 %5
     50 }
     51 
     52 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
     53 
     54 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     55 ; CHECK: pcmpgtb
     56 entry:
     57   %0 = bitcast <1 x i64> %b to <8 x i8>
     58   %1 = bitcast <1 x i64> %a to <8 x i8>
     59   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
     60   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
     61   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     62   %3 = bitcast x86_mmx %2 to <8 x i8>
     63   %4 = bitcast <8 x i8> %3 to <1 x i64>
     64   %5 = extractelement <1 x i64> %4, i32 0
     65   ret i64 %5
     66 }
     67 
     68 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
     69 
     70 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     71 ; CHECK: pcmpeqd
     72 entry:
     73   %0 = bitcast <1 x i64> %b to <2 x i32>
     74   %1 = bitcast <1 x i64> %a to <2 x i32>
     75   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     76   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     77   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     78   %3 = bitcast x86_mmx %2 to <2 x i32>
     79   %4 = bitcast <2 x i32> %3 to <1 x i64>
     80   %5 = extractelement <1 x i64> %4, i32 0
     81   ret i64 %5
     82 }
     83 
     84 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
     85 
     86 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     87 ; CHECK: pcmpeqw
     88 entry:
     89   %0 = bitcast <1 x i64> %b to <4 x i16>
     90   %1 = bitcast <1 x i64> %a to <4 x i16>
     91   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     92   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     93   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     94   %3 = bitcast x86_mmx %2 to <4 x i16>
     95   %4 = bitcast <4 x i16> %3 to <1 x i64>
     96   %5 = extractelement <1 x i64> %4, i32 0
     97   ret i64 %5
     98 }
     99 
    100 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
    101 
    102 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    103 ; CHECK: pcmpeqb
    104 entry:
    105   %0 = bitcast <1 x i64> %b to <8 x i8>
    106   %1 = bitcast <1 x i64> %a to <8 x i8>
    107   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    108   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    109   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    110   %3 = bitcast x86_mmx %2 to <8 x i8>
    111   %4 = bitcast <8 x i8> %3 to <1 x i64>
    112   %5 = extractelement <1 x i64> %4, i32 0
    113   ret i64 %5
    114 }
    115 
    116 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
    117 
    118 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    119 ; CHECK: punpckldq
    120 entry:
    121   %0 = bitcast <1 x i64> %b to <2 x i32>
    122   %1 = bitcast <1 x i64> %a to <2 x i32>
    123   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    124   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    125   %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    126   %3 = bitcast x86_mmx %2 to <2 x i32>
    127   %4 = bitcast <2 x i32> %3 to <1 x i64>
    128   %5 = extractelement <1 x i64> %4, i32 0
    129   ret i64 %5
    130 }
    131 
    132 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
    133 
    134 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    135 ; CHECK: punpcklwd
    136 entry:
    137   %0 = bitcast <1 x i64> %b to <4 x i16>
    138   %1 = bitcast <1 x i64> %a to <4 x i16>
    139   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    140   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    141   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    142   %3 = bitcast x86_mmx %2 to <4 x i16>
    143   %4 = bitcast <4 x i16> %3 to <1 x i64>
    144   %5 = extractelement <1 x i64> %4, i32 0
    145   ret i64 %5
    146 }
    147 
    148 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
    149 
    150 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    151 ; CHECK: punpcklbw
    152 entry:
    153   %0 = bitcast <1 x i64> %b to <8 x i8>
    154   %1 = bitcast <1 x i64> %a to <8 x i8>
    155   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    156   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    157   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    158   %3 = bitcast x86_mmx %2 to <8 x i8>
    159   %4 = bitcast <8 x i8> %3 to <1 x i64>
    160   %5 = extractelement <1 x i64> %4, i32 0
    161   ret i64 %5
    162 }
    163 
    164 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
    165 
    166 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    167 ; CHECK: punpckhdq
    168 entry:
    169   %0 = bitcast <1 x i64> %b to <2 x i32>
    170   %1 = bitcast <1 x i64> %a to <2 x i32>
    171   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    172   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    173   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    174   %3 = bitcast x86_mmx %2 to <2 x i32>
    175   %4 = bitcast <2 x i32> %3 to <1 x i64>
    176   %5 = extractelement <1 x i64> %4, i32 0
    177   ret i64 %5
    178 }
    179 
    180 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
    181 
    182 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    183 ; CHECK: punpckhwd
    184 entry:
    185   %0 = bitcast <1 x i64> %b to <4 x i16>
    186   %1 = bitcast <1 x i64> %a to <4 x i16>
    187   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    188   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    189   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    190   %3 = bitcast x86_mmx %2 to <4 x i16>
    191   %4 = bitcast <4 x i16> %3 to <1 x i64>
    192   %5 = extractelement <1 x i64> %4, i32 0
    193   ret i64 %5
    194 }
    195 
    196 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
    197 
    198 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    199 ; CHECK: punpckhbw
    200 entry:
    201   %0 = bitcast <1 x i64> %b to <8 x i8>
    202   %1 = bitcast <1 x i64> %a to <8 x i8>
    203   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    204   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    205   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    206   %3 = bitcast x86_mmx %2 to <8 x i8>
    207   %4 = bitcast <8 x i8> %3 to <1 x i64>
    208   %5 = extractelement <1 x i64> %4, i32 0
    209   ret i64 %5
    210 }
    211 
    212 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
    213 
    214 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    215 ; CHECK: packuswb
    216 entry:
    217   %0 = bitcast <1 x i64> %b to <4 x i16>
    218   %1 = bitcast <1 x i64> %a to <4 x i16>
    219   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    220   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    221   %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    222   %3 = bitcast x86_mmx %2 to <8 x i8>
    223   %4 = bitcast <8 x i8> %3 to <1 x i64>
    224   %5 = extractelement <1 x i64> %4, i32 0
    225   ret i64 %5
    226 }
    227 
    228 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
    229 
    230 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    231 ; CHECK: packssdw
    232 entry:
    233   %0 = bitcast <1 x i64> %b to <2 x i32>
    234   %1 = bitcast <1 x i64> %a to <2 x i32>
    235   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    236   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    237   %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    238   %3 = bitcast x86_mmx %2 to <4 x i16>
    239   %4 = bitcast <4 x i16> %3 to <1 x i64>
    240   %5 = extractelement <1 x i64> %4, i32 0
    241   ret i64 %5
    242 }
    243 
    244 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
    245 
    246 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    247 ; CHECK: packsswb
    248 entry:
    249   %0 = bitcast <1 x i64> %b to <4 x i16>
    250   %1 = bitcast <1 x i64> %a to <4 x i16>
    251   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    252   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    253   %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    254   %3 = bitcast x86_mmx %2 to <8 x i8>
    255   %4 = bitcast <8 x i8> %3 to <1 x i64>
    256   %5 = extractelement <1 x i64> %4, i32 0
    257   ret i64 %5
    258 }
    259 
    260 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
    261 
    262 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
    263 ; CHECK: psrad
    264 entry:
    265   %0 = bitcast <1 x i64> %a to <2 x i32>
    266   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    267   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
    268   %2 = bitcast x86_mmx %1 to <2 x i32>
    269   %3 = bitcast <2 x i32> %2 to <1 x i64>
    270   %4 = extractelement <1 x i64> %3, i32 0
    271   ret i64 %4
    272 }
    273 
    274 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
    275 
    276 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
    277 ; CHECK: psraw
    278 entry:
    279   %0 = bitcast <1 x i64> %a to <4 x i16>
    280   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    281   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
    282   %2 = bitcast x86_mmx %1 to <4 x i16>
    283   %3 = bitcast <4 x i16> %2 to <1 x i64>
    284   %4 = extractelement <1 x i64> %3, i32 0
    285   ret i64 %4
    286 }
    287 
    288 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
    289 
    290 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
    291 ; CHECK: psrlq
    292 entry:
    293   %0 = extractelement <1 x i64> %a, i32 0
    294   %mmx_var.i = bitcast i64 %0 to x86_mmx
    295   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    296   %2 = bitcast x86_mmx %1 to i64
    297   ret i64 %2
    298 }
    299 
    300 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
    301 
    302 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
    303 ; CHECK: psrld
    304 entry:
    305   %0 = bitcast <1 x i64> %a to <2 x i32>
    306   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    307   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    308   %2 = bitcast x86_mmx %1 to <2 x i32>
    309   %3 = bitcast <2 x i32> %2 to <1 x i64>
    310   %4 = extractelement <1 x i64> %3, i32 0
    311   ret i64 %4
    312 }
    313 
    314 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
    315 
    316 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
    317 ; CHECK: psrlw
    318 entry:
    319   %0 = bitcast <1 x i64> %a to <4 x i16>
    320   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    321   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    322   %2 = bitcast x86_mmx %1 to <4 x i16>
    323   %3 = bitcast <4 x i16> %2 to <1 x i64>
    324   %4 = extractelement <1 x i64> %3, i32 0
    325   ret i64 %4
    326 }
    327 
    328 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
    329 
    330 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
    331 ; CHECK: psllq
    332 entry:
    333   %0 = extractelement <1 x i64> %a, i32 0
    334   %mmx_var.i = bitcast i64 %0 to x86_mmx
    335   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    336   %2 = bitcast x86_mmx %1 to i64
    337   ret i64 %2
    338 }
    339 
    340 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
    341 
    342 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
    343 ; CHECK: pslld
    344 entry:
    345   %0 = bitcast <1 x i64> %a to <2 x i32>
    346   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    347   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    348   %2 = bitcast x86_mmx %1 to <2 x i32>
    349   %3 = bitcast <2 x i32> %2 to <1 x i64>
    350   %4 = extractelement <1 x i64> %3, i32 0
    351   ret i64 %4
    352 }
    353 
    354 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
    355 
    356 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
    357 ; CHECK: psllw
    358 entry:
    359   %0 = bitcast <1 x i64> %a to <4 x i16>
    360   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    361   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    362   %2 = bitcast x86_mmx %1 to <4 x i16>
    363   %3 = bitcast <4 x i16> %2 to <1 x i64>
    364   %4 = extractelement <1 x i64> %3, i32 0
    365   ret i64 %4
    366 }
    367 
    368 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
    369 
    370 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    371 ; CHECK: psrad
    372 entry:
    373   %0 = bitcast <1 x i64> %a to <2 x i32>
    374   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    375   %1 = extractelement <1 x i64> %b, i32 0
    376   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    377   %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    378   %3 = bitcast x86_mmx %2 to <2 x i32>
    379   %4 = bitcast <2 x i32> %3 to <1 x i64>
    380   %5 = extractelement <1 x i64> %4, i32 0
    381   ret i64 %5
    382 }
    383 
    384 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
    385 
    386 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    387 ; CHECK: psraw
    388 entry:
    389   %0 = bitcast <1 x i64> %a to <4 x i16>
    390   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    391   %1 = extractelement <1 x i64> %b, i32 0
    392   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    393   %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    394   %3 = bitcast x86_mmx %2 to <4 x i16>
    395   %4 = bitcast <4 x i16> %3 to <1 x i64>
    396   %5 = extractelement <1 x i64> %4, i32 0
    397   ret i64 %5
    398 }
    399 
    400 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
    401 
    402 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    403 ; CHECK: psrlq
    404 entry:
    405   %0 = extractelement <1 x i64> %a, i32 0
    406   %mmx_var.i = bitcast i64 %0 to x86_mmx
    407   %1 = extractelement <1 x i64> %b, i32 0
    408   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    409   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    410   %3 = bitcast x86_mmx %2 to i64
    411   ret i64 %3
    412 }
    413 
    414 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
    415 
    416 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    417 ; CHECK: psrld
    418 entry:
    419   %0 = bitcast <1 x i64> %a to <2 x i32>
    420   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    421   %1 = extractelement <1 x i64> %b, i32 0
    422   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    423   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    424   %3 = bitcast x86_mmx %2 to <2 x i32>
    425   %4 = bitcast <2 x i32> %3 to <1 x i64>
    426   %5 = extractelement <1 x i64> %4, i32 0
    427   ret i64 %5
    428 }
    429 
    430 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
    431 
    432 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    433 ; CHECK: psrlw
    434 entry:
    435   %0 = bitcast <1 x i64> %a to <4 x i16>
    436   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    437   %1 = extractelement <1 x i64> %b, i32 0
    438   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    439   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    440   %3 = bitcast x86_mmx %2 to <4 x i16>
    441   %4 = bitcast <4 x i16> %3 to <1 x i64>
    442   %5 = extractelement <1 x i64> %4, i32 0
    443   ret i64 %5
    444 }
    445 
    446 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
    447 
    448 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    449 ; CHECK: psllq
    450 entry:
    451   %0 = extractelement <1 x i64> %a, i32 0
    452   %mmx_var.i = bitcast i64 %0 to x86_mmx
    453   %1 = extractelement <1 x i64> %b, i32 0
    454   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    455   %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    456   %3 = bitcast x86_mmx %2 to i64
    457   ret i64 %3
    458 }
    459 
    460 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
    461 
    462 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    463 ; CHECK: pslld
    464 entry:
    465   %0 = bitcast <1 x i64> %a to <2 x i32>
    466   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    467   %1 = extractelement <1 x i64> %b, i32 0
    468   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    469   %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    470   %3 = bitcast x86_mmx %2 to <2 x i32>
    471   %4 = bitcast <2 x i32> %3 to <1 x i64>
    472   %5 = extractelement <1 x i64> %4, i32 0
    473   ret i64 %5
    474 }
    475 
    476 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
    477 
    478 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    479 ; CHECK: psllw
    480 entry:
    481   %0 = bitcast <1 x i64> %a to <4 x i16>
    482   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    483   %1 = extractelement <1 x i64> %b, i32 0
    484   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    485   %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    486   %3 = bitcast x86_mmx %2 to <4 x i16>
    487   %4 = bitcast <4 x i16> %3 to <1 x i64>
    488   %5 = extractelement <1 x i64> %4, i32 0
    489   ret i64 %5
    490 }
    491 
    492 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
    493 
    494 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    495 ; CHECK: pxor
    496 entry:
    497   %0 = bitcast <1 x i64> %b to <2 x i32>
    498   %1 = bitcast <1 x i64> %a to <2 x i32>
    499   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    500   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    501   %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    502   %3 = bitcast x86_mmx %2 to <2 x i32>
    503   %4 = bitcast <2 x i32> %3 to <1 x i64>
    504   %5 = extractelement <1 x i64> %4, i32 0
    505   ret i64 %5
    506 }
    507 
    508 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
    509 
    510 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    511 ; CHECK: por
    512 entry:
    513   %0 = bitcast <1 x i64> %b to <2 x i32>
    514   %1 = bitcast <1 x i64> %a to <2 x i32>
    515   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    516   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    517   %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    518   %3 = bitcast x86_mmx %2 to <2 x i32>
    519   %4 = bitcast <2 x i32> %3 to <1 x i64>
    520   %5 = extractelement <1 x i64> %4, i32 0
    521   ret i64 %5
    522 }
    523 
    524 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
    525 
    526 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    527 ; CHECK: pandn
    528 entry:
    529   %0 = bitcast <1 x i64> %b to <2 x i32>
    530   %1 = bitcast <1 x i64> %a to <2 x i32>
    531   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    532   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    533   %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    534   %3 = bitcast x86_mmx %2 to <2 x i32>
    535   %4 = bitcast <2 x i32> %3 to <1 x i64>
    536   %5 = extractelement <1 x i64> %4, i32 0
    537   ret i64 %5
    538 }
    539 
    540 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
    541 
    542 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    543 ; CHECK: pand
    544 entry:
    545   %0 = bitcast <1 x i64> %b to <2 x i32>
    546   %1 = bitcast <1 x i64> %a to <2 x i32>
    547   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    548   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    549   %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    550   %3 = bitcast x86_mmx %2 to <2 x i32>
    551   %4 = bitcast <2 x i32> %3 to <1 x i64>
    552   %5 = extractelement <1 x i64> %4, i32 0
    553   ret i64 %5
    554 }
    555 
    556 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
    557 
    558 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    559 ; CHECK: pmullw
    560 entry:
    561   %0 = bitcast <1 x i64> %b to <4 x i16>
    562   %1 = bitcast <1 x i64> %a to <4 x i16>
    563   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    564   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    565   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    566   %3 = bitcast x86_mmx %2 to <4 x i16>
    567   %4 = bitcast <4 x i16> %3 to <1 x i64>
    568   %5 = extractelement <1 x i64> %4, i32 0
    569   ret i64 %5
    570 }
    571 
    572 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    573 ; CHECK: pmullw
    574 entry:
    575   %0 = bitcast <1 x i64> %b to <4 x i16>
    576   %1 = bitcast <1 x i64> %a to <4 x i16>
    577   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    578   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    579   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    580   %3 = bitcast x86_mmx %2 to <4 x i16>
    581   %4 = bitcast <4 x i16> %3 to <1 x i64>
    582   %5 = extractelement <1 x i64> %4, i32 0
    583   ret i64 %5
    584 }
    585 
    586 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
    587 
    588 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    589 ; CHECK: pmulhw
    590 entry:
    591   %0 = bitcast <1 x i64> %b to <4 x i16>
    592   %1 = bitcast <1 x i64> %a to <4 x i16>
    593   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    594   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    595   %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    596   %3 = bitcast x86_mmx %2 to <4 x i16>
    597   %4 = bitcast <4 x i16> %3 to <1 x i64>
    598   %5 = extractelement <1 x i64> %4, i32 0
    599   ret i64 %5
    600 }
    601 
    602 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
    603 
    604 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    605 ; CHECK: pmaddwd
    606 entry:
    607   %0 = bitcast <1 x i64> %b to <4 x i16>
    608   %1 = bitcast <1 x i64> %a to <4 x i16>
    609   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    610   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    611   %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    612   %3 = bitcast x86_mmx %2 to <2 x i32>
    613   %4 = bitcast <2 x i32> %3 to <1 x i64>
    614   %5 = extractelement <1 x i64> %4, i32 0
    615   ret i64 %5
    616 }
    617 
    618 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
    619 
    620 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    621 ; CHECK: psubusw
    622 entry:
    623   %0 = bitcast <1 x i64> %b to <4 x i16>
    624   %1 = bitcast <1 x i64> %a to <4 x i16>
    625   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    626   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    627   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    628   %3 = bitcast x86_mmx %2 to <4 x i16>
    629   %4 = bitcast <4 x i16> %3 to <1 x i64>
    630   %5 = extractelement <1 x i64> %4, i32 0
    631   ret i64 %5
    632 }
    633 
    634 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
    635 
    636 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    637 ; CHECK: psubusb
    638 entry:
    639   %0 = bitcast <1 x i64> %b to <8 x i8>
    640   %1 = bitcast <1 x i64> %a to <8 x i8>
    641   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    642   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    643   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    644   %3 = bitcast x86_mmx %2 to <8 x i8>
    645   %4 = bitcast <8 x i8> %3 to <1 x i64>
    646   %5 = extractelement <1 x i64> %4, i32 0
    647   ret i64 %5
    648 }
    649 
    650 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
    651 
    652 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    653 ; CHECK: psubsw
    654 entry:
    655   %0 = bitcast <1 x i64> %b to <4 x i16>
    656   %1 = bitcast <1 x i64> %a to <4 x i16>
    657   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    658   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    659   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    660   %3 = bitcast x86_mmx %2 to <4 x i16>
    661   %4 = bitcast <4 x i16> %3 to <1 x i64>
    662   %5 = extractelement <1 x i64> %4, i32 0
    663   ret i64 %5
    664 }
    665 
    666 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
    667 
    668 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    669 ; CHECK: psubsb
    670 entry:
    671   %0 = bitcast <1 x i64> %b to <8 x i8>
    672   %1 = bitcast <1 x i64> %a to <8 x i8>
    673   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    674   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    675   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    676   %3 = bitcast x86_mmx %2 to <8 x i8>
    677   %4 = bitcast <8 x i8> %3 to <1 x i64>
    678   %5 = extractelement <1 x i64> %4, i32 0
    679   ret i64 %5
    680 }
    681 
    682 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    683 ; CHECK: psubq
    684 entry:
    685   %0 = extractelement <1 x i64> %a, i32 0
    686   %mmx_var = bitcast i64 %0 to x86_mmx
    687   %1 = extractelement <1 x i64> %b, i32 0
    688   %mmx_var1 = bitcast i64 %1 to x86_mmx
    689   %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    690   %3 = bitcast x86_mmx %2 to i64
    691   ret i64 %3
    692 }
    693 
    694 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
    695 
    696 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
    697 
    698 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    699 ; CHECK: psubd
    700 entry:
    701   %0 = bitcast <1 x i64> %b to <2 x i32>
    702   %1 = bitcast <1 x i64> %a to <2 x i32>
    703   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    704   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    705   %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    706   %3 = bitcast x86_mmx %2 to <2 x i32>
    707   %4 = bitcast <2 x i32> %3 to <1 x i64>
    708   %5 = extractelement <1 x i64> %4, i32 0
    709   ret i64 %5
    710 }
    711 
    712 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
    713 
    714 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    715 ; CHECK: psubw
    716 entry:
    717   %0 = bitcast <1 x i64> %b to <4 x i16>
    718   %1 = bitcast <1 x i64> %a to <4 x i16>
    719   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    720   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    721   %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    722   %3 = bitcast x86_mmx %2 to <4 x i16>
    723   %4 = bitcast <4 x i16> %3 to <1 x i64>
    724   %5 = extractelement <1 x i64> %4, i32 0
    725   ret i64 %5
    726 }
    727 
    728 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
    729 
    730 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    731 ; CHECK: psubb
    732 entry:
    733   %0 = bitcast <1 x i64> %b to <8 x i8>
    734   %1 = bitcast <1 x i64> %a to <8 x i8>
    735   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    736   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    737   %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    738   %3 = bitcast x86_mmx %2 to <8 x i8>
    739   %4 = bitcast <8 x i8> %3 to <1 x i64>
    740   %5 = extractelement <1 x i64> %4, i32 0
    741   ret i64 %5
    742 }
    743 
    744 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
    745 
    746 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    747 ; CHECK: paddusw
    748 entry:
    749   %0 = bitcast <1 x i64> %b to <4 x i16>
    750   %1 = bitcast <1 x i64> %a to <4 x i16>
    751   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    752   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    753   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    754   %3 = bitcast x86_mmx %2 to <4 x i16>
    755   %4 = bitcast <4 x i16> %3 to <1 x i64>
    756   %5 = extractelement <1 x i64> %4, i32 0
    757   ret i64 %5
    758 }
    759 
    760 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
    761 
    762 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    763 ; CHECK: paddusb
    764 entry:
    765   %0 = bitcast <1 x i64> %b to <8 x i8>
    766   %1 = bitcast <1 x i64> %a to <8 x i8>
    767   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    768   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    769   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    770   %3 = bitcast x86_mmx %2 to <8 x i8>
    771   %4 = bitcast <8 x i8> %3 to <1 x i64>
    772   %5 = extractelement <1 x i64> %4, i32 0
    773   ret i64 %5
    774 }
    775 
    776 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
    777 
    778 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    779 ; CHECK: paddsw
    780 entry:
    781   %0 = bitcast <1 x i64> %b to <4 x i16>
    782   %1 = bitcast <1 x i64> %a to <4 x i16>
    783   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    784   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    785   %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    786   %3 = bitcast x86_mmx %2 to <4 x i16>
    787   %4 = bitcast <4 x i16> %3 to <1 x i64>
    788   %5 = extractelement <1 x i64> %4, i32 0
    789   ret i64 %5
    790 }
    791 
    792 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
    793 
    794 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    795 ; CHECK: paddsb
    796 entry:
    797   %0 = bitcast <1 x i64> %b to <8 x i8>
    798   %1 = bitcast <1 x i64> %a to <8 x i8>
    799   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    800   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    801   %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    802   %3 = bitcast x86_mmx %2 to <8 x i8>
    803   %4 = bitcast <8 x i8> %3 to <1 x i64>
    804   %5 = extractelement <1 x i64> %4, i32 0
    805   ret i64 %5
    806 }
    807 
    808 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
    809 
    810 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    811 ; CHECK: paddq
    812 entry:
    813   %0 = extractelement <1 x i64> %a, i32 0
    814   %mmx_var = bitcast i64 %0 to x86_mmx
    815   %1 = extractelement <1 x i64> %b, i32 0
    816   %mmx_var1 = bitcast i64 %1 to x86_mmx
    817   %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    818   %3 = bitcast x86_mmx %2 to i64
    819   ret i64 %3
    820 }
    821 
    822 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
    823 
    824 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    825 ; CHECK: paddd
    826 entry:
    827   %0 = bitcast <1 x i64> %b to <2 x i32>
    828   %1 = bitcast <1 x i64> %a to <2 x i32>
    829   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    830   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    831   %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    832   %3 = bitcast x86_mmx %2 to <2 x i32>
    833   %4 = bitcast <2 x i32> %3 to <1 x i64>
    834   %5 = extractelement <1 x i64> %4, i32 0
    835   ret i64 %5
    836 }
    837 
    838 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
    839 
    840 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    841 ; CHECK: paddw
    842 entry:
    843   %0 = bitcast <1 x i64> %b to <4 x i16>
    844   %1 = bitcast <1 x i64> %a to <4 x i16>
    845   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    846   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    847   %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    848   %3 = bitcast x86_mmx %2 to <4 x i16>
    849   %4 = bitcast <4 x i16> %3 to <1 x i64>
    850   %5 = extractelement <1 x i64> %4, i32 0
    851   ret i64 %5
    852 }
    853 
    854 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
    855 
    856 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    857 ; CHECK: paddb
    858 entry:
    859   %0 = bitcast <1 x i64> %b to <8 x i8>
    860   %1 = bitcast <1 x i64> %a to <8 x i8>
    861   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    862   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    863   %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    864   %3 = bitcast x86_mmx %2 to <8 x i8>
    865   %4 = bitcast <8 x i8> %3 to <1 x i64>
    866   %5 = extractelement <1 x i64> %4, i32 0
    867   ret i64 %5
    868 }
    869 
    870 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
    871 
    872 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    873 ; CHECK: psadbw
    874 entry:
    875   %0 = bitcast <1 x i64> %b to <8 x i8>
    876   %1 = bitcast <1 x i64> %a to <8 x i8>
    877   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    878   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    879   %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    880   %3 = bitcast x86_mmx %2 to i64
    881   ret i64 %3
    882 }
    883 
    884 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
    885 
    886 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    887 ; CHECK: pminsw
    888 entry:
    889   %0 = bitcast <1 x i64> %b to <4 x i16>
    890   %1 = bitcast <1 x i64> %a to <4 x i16>
    891   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    892   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    893   %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    894   %3 = bitcast x86_mmx %2 to <4 x i16>
    895   %4 = bitcast <4 x i16> %3 to <1 x i64>
    896   %5 = extractelement <1 x i64> %4, i32 0
    897   ret i64 %5
    898 }
    899 
    900 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
    901 
    902 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    903 ; CHECK: pminub
    904 entry:
    905   %0 = bitcast <1 x i64> %b to <8 x i8>
    906   %1 = bitcast <1 x i64> %a to <8 x i8>
    907   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    908   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    909   %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    910   %3 = bitcast x86_mmx %2 to <8 x i8>
    911   %4 = bitcast <8 x i8> %3 to <1 x i64>
    912   %5 = extractelement <1 x i64> %4, i32 0
    913   ret i64 %5
    914 }
    915 
    916 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
    917 
    918 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    919 ; CHECK: pmaxsw
    920 entry:
    921   %0 = bitcast <1 x i64> %b to <4 x i16>
    922   %1 = bitcast <1 x i64> %a to <4 x i16>
    923   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    924   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    925   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    926   %3 = bitcast x86_mmx %2 to <4 x i16>
    927   %4 = bitcast <4 x i16> %3 to <1 x i64>
    928   %5 = extractelement <1 x i64> %4, i32 0
    929   ret i64 %5
    930 }
    931 
    932 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
    933 
    934 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    935 ; CHECK: pmaxub
    936 entry:
    937   %0 = bitcast <1 x i64> %b to <8 x i8>
    938   %1 = bitcast <1 x i64> %a to <8 x i8>
    939   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    940   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    941   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    942   %3 = bitcast x86_mmx %2 to <8 x i8>
    943   %4 = bitcast <8 x i8> %3 to <1 x i64>
    944   %5 = extractelement <1 x i64> %4, i32 0
    945   ret i64 %5
    946 }
    947 
    948 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
    949 
    950 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    951 ; CHECK: pavgw
    952 entry:
    953   %0 = bitcast <1 x i64> %b to <4 x i16>
    954   %1 = bitcast <1 x i64> %a to <4 x i16>
    955   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    956   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    957   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    958   %3 = bitcast x86_mmx %2 to <4 x i16>
    959   %4 = bitcast <4 x i16> %3 to <1 x i64>
    960   %5 = extractelement <1 x i64> %4, i32 0
    961   ret i64 %5
    962 }
    963 
    964 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
    965 
    966 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    967 ; CHECK: pavgb
    968 entry:
    969   %0 = bitcast <1 x i64> %b to <8 x i8>
    970   %1 = bitcast <1 x i64> %a to <8 x i8>
    971   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    972   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    973   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    974   %3 = bitcast x86_mmx %2 to <8 x i8>
    975   %4 = bitcast <8 x i8> %3 to <1 x i64>
    976   %5 = extractelement <1 x i64> %4, i32 0
    977   ret i64 %5
    978 }
    979 
    980 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
    981 
    982 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
    983 ; CHECK: movntq
    984 entry:
    985   %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
    986   %0 = extractelement <1 x i64> %a, i32 0
    987   %mmx_var.i = bitcast i64 %0 to x86_mmx
    988   tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
    989   ret void
    990 }
    991 
    992 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
    993 
    994 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
    995 ; CHECK: pmovmskb
    996 entry:
    997   %0 = bitcast <1 x i64> %a to <8 x i8>
    998   %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
    999   %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
   1000   ret i32 %1
   1001 }
   1002 
   1003 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
   1004 
   1005 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
   1006 ; CHECK: maskmovq
   1007 entry:
   1008   %0 = bitcast <1 x i64> %n to <8 x i8>
   1009   %1 = bitcast <1 x i64> %d to <8 x i8>
   1010   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1011   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1012   tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
   1013   ret void
   1014 }
   1015 
   1016 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
   1017 
   1018 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1019 ; CHECK: pmulhuw
   1020 entry:
   1021   %0 = bitcast <1 x i64> %b to <4 x i16>
   1022   %1 = bitcast <1 x i64> %a to <4 x i16>
   1023   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
   1024   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
   1025   %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1026   %3 = bitcast x86_mmx %2 to <4 x i16>
   1027   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1028   %5 = extractelement <1 x i64> %4, i32 0
   1029   ret i64 %5
   1030 }
   1031 
   1032 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
   1033 
   1034 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
   1035 ; CHECK: pshufw
   1036 entry:
   1037   %0 = bitcast <1 x i64> %a to <4 x i16>
   1038   %1 = bitcast <4 x i16> %0 to x86_mmx
   1039   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1040   %3 = bitcast x86_mmx %2 to <4 x i16>
   1041   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1042   %5 = extractelement <1 x i64> %4, i32 0
   1043   ret i64 %5
   1044 }
   1045 
   1046 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
   1047 
   1048 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1049 ; CHECK: pmuludq
   1050 entry:
   1051   %0 = bitcast <1 x i64> %b to <2 x i32>
   1052   %1 = bitcast <1 x i64> %a to <2 x i32>
   1053   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
   1054   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
   1055   %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1056   %3 = bitcast x86_mmx %2 to i64
   1057   ret i64 %3
   1058 }
   1059 
   1060 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
   1061 
   1062 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
   1063 ; CHECK: cvtpi2pd
   1064 entry:
   1065   %0 = bitcast <1 x i64> %a to <2 x i32>
   1066   %1 = bitcast <2 x i32> %0 to x86_mmx
   1067   %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
   1068   ret <2 x double> %2
   1069 }
   1070 
   1071 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
   1072 
   1073 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
   1074 ; CHECK: cvttpd2pi
   1075 entry:
   1076   %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
   1077   %1 = bitcast x86_mmx %0 to <2 x i32>
   1078   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1079   %3 = extractelement <1 x i64> %2, i32 0
   1080   ret i64 %3
   1081 }
   1082 
   1083 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
   1084 
   1085 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
   1086 ; CHECK: cvtpd2pi
   1087 entry:
   1088   %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
   1089   %1 = bitcast x86_mmx %0 to <2 x i32>
   1090   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1091   %3 = extractelement <1 x i64> %2, i32 0
   1092   ret i64 %3
   1093 }
   1094 
   1095 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
   1096 
   1097 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1098 ; CHECK: palignr
   1099 entry:
   1100   %0 = extractelement <1 x i64> %a, i32 0
   1101   %mmx_var = bitcast i64 %0 to x86_mmx
   1102   %1 = extractelement <1 x i64> %b, i32 0
   1103   %mmx_var1 = bitcast i64 %1 to x86_mmx
   1104   %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
   1105   %3 = bitcast x86_mmx %2 to i64
   1106   ret i64 %3
   1107 }
   1108 
   1109 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
   1110 
   1111 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
   1112 ; CHECK: pabsd
   1113 entry:
   1114   %0 = bitcast <1 x i64> %a to <2 x i32>
   1115   %1 = bitcast <2 x i32> %0 to x86_mmx
   1116   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
   1117   %3 = bitcast x86_mmx %2 to <2 x i32>
   1118   %4 = bitcast <2 x i32> %3 to <1 x i64>
   1119   %5 = extractelement <1 x i64> %4, i32 0
   1120   ret i64 %5
   1121 }
   1122 
   1123 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
   1124 
   1125 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
   1126 ; CHECK: pabsw
   1127 entry:
   1128   %0 = bitcast <1 x i64> %a to <4 x i16>
   1129   %1 = bitcast <4 x i16> %0 to x86_mmx
   1130   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
   1131   %3 = bitcast x86_mmx %2 to <4 x i16>
   1132   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1133   %5 = extractelement <1 x i64> %4, i32 0
   1134   ret i64 %5
   1135 }
   1136 
   1137 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
   1138 
   1139 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
   1140 ; CHECK: pabsb
   1141 entry:
   1142   %0 = bitcast <1 x i64> %a to <8 x i8>
   1143   %1 = bitcast <8 x i8> %0 to x86_mmx
   1144   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
   1145   %3 = bitcast x86_mmx %2 to <8 x i8>
   1146   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1147   %5 = extractelement <1 x i64> %4, i32 0
   1148   ret i64 %5
   1149 }
   1150 
   1151 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
   1152 
   1153 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1154 ; CHECK: psignd
   1155 entry:
   1156   %0 = bitcast <1 x i64> %b to <2 x i32>
   1157   %1 = bitcast <1 x i64> %a to <2 x i32>
   1158   %2 = bitcast <2 x i32> %1 to x86_mmx
   1159   %3 = bitcast <2 x i32> %0 to x86_mmx
   1160   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1161   %5 = bitcast x86_mmx %4 to <2 x i32>
   1162   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1163   %7 = extractelement <1 x i64> %6, i32 0
   1164   ret i64 %7
   1165 }
   1166 
   1167 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
   1168 
   1169 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1170 ; CHECK: psignw
   1171 entry:
   1172   %0 = bitcast <1 x i64> %b to <4 x i16>
   1173   %1 = bitcast <1 x i64> %a to <4 x i16>
   1174   %2 = bitcast <4 x i16> %1 to x86_mmx
   1175   %3 = bitcast <4 x i16> %0 to x86_mmx
   1176   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1177   %5 = bitcast x86_mmx %4 to <4 x i16>
   1178   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1179   %7 = extractelement <1 x i64> %6, i32 0
   1180   ret i64 %7
   1181 }
   1182 
   1183 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
   1184 
   1185 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1186 ; CHECK: psignb
   1187 entry:
   1188   %0 = bitcast <1 x i64> %b to <8 x i8>
   1189   %1 = bitcast <1 x i64> %a to <8 x i8>
   1190   %2 = bitcast <8 x i8> %1 to x86_mmx
   1191   %3 = bitcast <8 x i8> %0 to x86_mmx
   1192   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1193   %5 = bitcast x86_mmx %4 to <8 x i8>
   1194   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1195   %7 = extractelement <1 x i64> %6, i32 0
   1196   ret i64 %7
   1197 }
   1198 
   1199 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
   1200 
   1201 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1202 ; CHECK: pshufb
   1203 entry:
   1204   %0 = bitcast <1 x i64> %b to <8 x i8>
   1205   %1 = bitcast <1 x i64> %a to <8 x i8>
   1206   %2 = bitcast <8 x i8> %1 to x86_mmx
   1207   %3 = bitcast <8 x i8> %0 to x86_mmx
   1208   %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1209   %5 = bitcast x86_mmx %4 to <8 x i8>
   1210   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1211   %7 = extractelement <1 x i64> %6, i32 0
   1212   ret i64 %7
   1213 }
   1214 
   1215 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
   1216 
   1217 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1218 ; CHECK: pmulhrsw
   1219 entry:
   1220   %0 = bitcast <1 x i64> %b to <4 x i16>
   1221   %1 = bitcast <1 x i64> %a to <4 x i16>
   1222   %2 = bitcast <4 x i16> %1 to x86_mmx
   1223   %3 = bitcast <4 x i16> %0 to x86_mmx
   1224   %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1225   %5 = bitcast x86_mmx %4 to <4 x i16>
   1226   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1227   %7 = extractelement <1 x i64> %6, i32 0
   1228   ret i64 %7
   1229 }
   1230 
   1231 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
   1232 
   1233 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1234 ; CHECK: pmaddubsw
   1235 entry:
   1236   %0 = bitcast <1 x i64> %b to <8 x i8>
   1237   %1 = bitcast <1 x i64> %a to <8 x i8>
   1238   %2 = bitcast <8 x i8> %1 to x86_mmx
   1239   %3 = bitcast <8 x i8> %0 to x86_mmx
   1240   %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1241   %5 = bitcast x86_mmx %4 to <8 x i8>
   1242   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1243   %7 = extractelement <1 x i64> %6, i32 0
   1244   ret i64 %7
   1245 }
   1246 
   1247 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
   1248 
   1249 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1250 ; CHECK: phsubsw
   1251 entry:
   1252   %0 = bitcast <1 x i64> %b to <4 x i16>
   1253   %1 = bitcast <1 x i64> %a to <4 x i16>
   1254   %2 = bitcast <4 x i16> %1 to x86_mmx
   1255   %3 = bitcast <4 x i16> %0 to x86_mmx
   1256   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1257   %5 = bitcast x86_mmx %4 to <4 x i16>
   1258   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1259   %7 = extractelement <1 x i64> %6, i32 0
   1260   ret i64 %7
   1261 }
   1262 
   1263 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
   1264 
   1265 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1266 ; CHECK: phsubd
   1267 entry:
   1268   %0 = bitcast <1 x i64> %b to <2 x i32>
   1269   %1 = bitcast <1 x i64> %a to <2 x i32>
   1270   %2 = bitcast <2 x i32> %1 to x86_mmx
   1271   %3 = bitcast <2 x i32> %0 to x86_mmx
   1272   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1273   %5 = bitcast x86_mmx %4 to <2 x i32>
   1274   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1275   %7 = extractelement <1 x i64> %6, i32 0
   1276   ret i64 %7
   1277 }
   1278 
   1279 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
   1280 
   1281 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1282 ; CHECK: phsubw
   1283 entry:
   1284   %0 = bitcast <1 x i64> %b to <4 x i16>
   1285   %1 = bitcast <1 x i64> %a to <4 x i16>
   1286   %2 = bitcast <4 x i16> %1 to x86_mmx
   1287   %3 = bitcast <4 x i16> %0 to x86_mmx
   1288   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1289   %5 = bitcast x86_mmx %4 to <4 x i16>
   1290   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1291   %7 = extractelement <1 x i64> %6, i32 0
   1292   ret i64 %7
   1293 }
   1294 
   1295 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
   1296 
   1297 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1298 ; CHECK: phaddsw
   1299 entry:
   1300   %0 = bitcast <1 x i64> %b to <4 x i16>
   1301   %1 = bitcast <1 x i64> %a to <4 x i16>
   1302   %2 = bitcast <4 x i16> %1 to x86_mmx
   1303   %3 = bitcast <4 x i16> %0 to x86_mmx
   1304   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1305   %5 = bitcast x86_mmx %4 to <4 x i16>
   1306   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1307   %7 = extractelement <1 x i64> %6, i32 0
   1308   ret i64 %7
   1309 }
   1310 
   1311 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
   1312 
   1313 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1314 ; CHECK: phaddd
   1315 entry:
   1316   %0 = bitcast <1 x i64> %b to <2 x i32>
   1317   %1 = bitcast <1 x i64> %a to <2 x i32>
   1318   %2 = bitcast <2 x i32> %1 to x86_mmx
   1319   %3 = bitcast <2 x i32> %0 to x86_mmx
   1320   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1321   %5 = bitcast x86_mmx %4 to <2 x i32>
   1322   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1323   %7 = extractelement <1 x i64> %6, i32 0
   1324   ret i64 %7
   1325 }
   1326