Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
      2 
      3 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
      4 
      5 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
      6 ; CHECK: phaddw
      7 entry:
      8   %0 = bitcast <1 x i64> %b to <4 x i16>
      9   %1 = bitcast <1 x i64> %a to <4 x i16>
     10   %2 = bitcast <4 x i16> %1 to x86_mmx
     11   %3 = bitcast <4 x i16> %0 to x86_mmx
     12   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
     13   %5 = bitcast x86_mmx %4 to <4 x i16>
     14   %6 = bitcast <4 x i16> %5 to <1 x i64>
     15   %7 = extractelement <1 x i64> %6, i32 0
     16   ret i64 %7
     17 }
     18 
     19 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
     20 
     21 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     22 ; CHECK: pcmpgtd
     23 entry:
     24   %0 = bitcast <1 x i64> %b to <2 x i32>
     25   %1 = bitcast <1 x i64> %a to <2 x i32>
     26   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     27   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     28   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     29   %3 = bitcast x86_mmx %2 to <2 x i32>
     30   %4 = bitcast <2 x i32> %3 to <1 x i64>
     31   %5 = extractelement <1 x i64> %4, i32 0
     32   ret i64 %5
     33 }
     34 
     35 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
     36 
     37 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     38 ; CHECK: pcmpgtw
     39 entry:
     40   %0 = bitcast <1 x i64> %b to <4 x i16>
     41   %1 = bitcast <1 x i64> %a to <4 x i16>
     42   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     43   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     44   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     45   %3 = bitcast x86_mmx %2 to <4 x i16>
     46   %4 = bitcast <4 x i16> %3 to <1 x i64>
     47   %5 = extractelement <1 x i64> %4, i32 0
     48   ret i64 %5
     49 }
     50 
     51 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
     52 
     53 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     54 ; CHECK: pcmpgtb
     55 entry:
     56   %0 = bitcast <1 x i64> %b to <8 x i8>
     57   %1 = bitcast <1 x i64> %a to <8 x i8>
     58   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
     59   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
     60   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     61   %3 = bitcast x86_mmx %2 to <8 x i8>
     62   %4 = bitcast <8 x i8> %3 to <1 x i64>
     63   %5 = extractelement <1 x i64> %4, i32 0
     64   ret i64 %5
     65 }
     66 
     67 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
     68 
     69 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     70 ; CHECK: pcmpeqd
     71 entry:
     72   %0 = bitcast <1 x i64> %b to <2 x i32>
     73   %1 = bitcast <1 x i64> %a to <2 x i32>
     74   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
     75   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
     76   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     77   %3 = bitcast x86_mmx %2 to <2 x i32>
     78   %4 = bitcast <2 x i32> %3 to <1 x i64>
     79   %5 = extractelement <1 x i64> %4, i32 0
     80   ret i64 %5
     81 }
     82 
     83 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
     84 
     85 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
     86 ; CHECK: pcmpeqw
     87 entry:
     88   %0 = bitcast <1 x i64> %b to <4 x i16>
     89   %1 = bitcast <1 x i64> %a to <4 x i16>
     90   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
     91   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
     92   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
     93   %3 = bitcast x86_mmx %2 to <4 x i16>
     94   %4 = bitcast <4 x i16> %3 to <1 x i64>
     95   %5 = extractelement <1 x i64> %4, i32 0
     96   ret i64 %5
     97 }
     98 
     99 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
    100 
    101 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    102 ; CHECK: pcmpeqb
    103 entry:
    104   %0 = bitcast <1 x i64> %b to <8 x i8>
    105   %1 = bitcast <1 x i64> %a to <8 x i8>
    106   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    107   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    108   %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    109   %3 = bitcast x86_mmx %2 to <8 x i8>
    110   %4 = bitcast <8 x i8> %3 to <1 x i64>
    111   %5 = extractelement <1 x i64> %4, i32 0
    112   ret i64 %5
    113 }
    114 
    115 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
    116 
    117 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    118 ; CHECK: punpckldq
    119 entry:
    120   %0 = bitcast <1 x i64> %b to <2 x i32>
    121   %1 = bitcast <1 x i64> %a to <2 x i32>
    122   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    123   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    124   %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    125   %3 = bitcast x86_mmx %2 to <2 x i32>
    126   %4 = bitcast <2 x i32> %3 to <1 x i64>
    127   %5 = extractelement <1 x i64> %4, i32 0
    128   ret i64 %5
    129 }
    130 
    131 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
    132 
    133 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    134 ; CHECK: punpcklwd
    135 entry:
    136   %0 = bitcast <1 x i64> %b to <4 x i16>
    137   %1 = bitcast <1 x i64> %a to <4 x i16>
    138   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    139   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    140   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    141   %3 = bitcast x86_mmx %2 to <4 x i16>
    142   %4 = bitcast <4 x i16> %3 to <1 x i64>
    143   %5 = extractelement <1 x i64> %4, i32 0
    144   ret i64 %5
    145 }
    146 
    147 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
    148 
    149 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    150 ; CHECK: punpcklbw
    151 entry:
    152   %0 = bitcast <1 x i64> %b to <8 x i8>
    153   %1 = bitcast <1 x i64> %a to <8 x i8>
    154   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    155   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    156   %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    157   %3 = bitcast x86_mmx %2 to <8 x i8>
    158   %4 = bitcast <8 x i8> %3 to <1 x i64>
    159   %5 = extractelement <1 x i64> %4, i32 0
    160   ret i64 %5
    161 }
    162 
    163 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
    164 
    165 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    166 ; CHECK: punpckhdq
    167 entry:
    168   %0 = bitcast <1 x i64> %b to <2 x i32>
    169   %1 = bitcast <1 x i64> %a to <2 x i32>
    170   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    171   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    172   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    173   %3 = bitcast x86_mmx %2 to <2 x i32>
    174   %4 = bitcast <2 x i32> %3 to <1 x i64>
    175   %5 = extractelement <1 x i64> %4, i32 0
    176   ret i64 %5
    177 }
    178 
    179 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
    180 
    181 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    182 ; CHECK: punpckhwd
    183 entry:
    184   %0 = bitcast <1 x i64> %b to <4 x i16>
    185   %1 = bitcast <1 x i64> %a to <4 x i16>
    186   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    187   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    188   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    189   %3 = bitcast x86_mmx %2 to <4 x i16>
    190   %4 = bitcast <4 x i16> %3 to <1 x i64>
    191   %5 = extractelement <1 x i64> %4, i32 0
    192   ret i64 %5
    193 }
    194 
    195 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
    196 
    197 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    198 ; CHECK: punpckhbw
    199 entry:
    200   %0 = bitcast <1 x i64> %b to <8 x i8>
    201   %1 = bitcast <1 x i64> %a to <8 x i8>
    202   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    203   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    204   %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    205   %3 = bitcast x86_mmx %2 to <8 x i8>
    206   %4 = bitcast <8 x i8> %3 to <1 x i64>
    207   %5 = extractelement <1 x i64> %4, i32 0
    208   ret i64 %5
    209 }
    210 
    211 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
    212 
    213 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    214 ; CHECK: packuswb
    215 entry:
    216   %0 = bitcast <1 x i64> %b to <4 x i16>
    217   %1 = bitcast <1 x i64> %a to <4 x i16>
    218   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    219   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    220   %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    221   %3 = bitcast x86_mmx %2 to <8 x i8>
    222   %4 = bitcast <8 x i8> %3 to <1 x i64>
    223   %5 = extractelement <1 x i64> %4, i32 0
    224   ret i64 %5
    225 }
    226 
    227 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
    228 
    229 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    230 ; CHECK: packssdw
    231 entry:
    232   %0 = bitcast <1 x i64> %b to <2 x i32>
    233   %1 = bitcast <1 x i64> %a to <2 x i32>
    234   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    235   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    236   %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    237   %3 = bitcast x86_mmx %2 to <4 x i16>
    238   %4 = bitcast <4 x i16> %3 to <1 x i64>
    239   %5 = extractelement <1 x i64> %4, i32 0
    240   ret i64 %5
    241 }
    242 
    243 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
    244 
    245 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    246 ; CHECK: packsswb
    247 entry:
    248   %0 = bitcast <1 x i64> %b to <4 x i16>
    249   %1 = bitcast <1 x i64> %a to <4 x i16>
    250   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    251   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    252   %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    253   %3 = bitcast x86_mmx %2 to <8 x i8>
    254   %4 = bitcast <8 x i8> %3 to <1 x i64>
    255   %5 = extractelement <1 x i64> %4, i32 0
    256   ret i64 %5
    257 }
    258 
    259 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
    260 
    261 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
    262 ; CHECK: psrad
    263 entry:
    264   %0 = bitcast <1 x i64> %a to <2 x i32>
    265   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    266   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
    267   %2 = bitcast x86_mmx %1 to <2 x i32>
    268   %3 = bitcast <2 x i32> %2 to <1 x i64>
    269   %4 = extractelement <1 x i64> %3, i32 0
    270   ret i64 %4
    271 }
    272 
    273 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
    274 
    275 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
    276 ; CHECK: psraw
    277 entry:
    278   %0 = bitcast <1 x i64> %a to <4 x i16>
    279   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    280   %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
    281   %2 = bitcast x86_mmx %1 to <4 x i16>
    282   %3 = bitcast <4 x i16> %2 to <1 x i64>
    283   %4 = extractelement <1 x i64> %3, i32 0
    284   ret i64 %4
    285 }
    286 
    287 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
    288 
    289 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
    290 ; CHECK: psrlq
    291 entry:
    292   %0 = extractelement <1 x i64> %a, i32 0
    293   %mmx_var.i = bitcast i64 %0 to x86_mmx
    294   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    295   %2 = bitcast x86_mmx %1 to i64
    296   ret i64 %2
    297 }
    298 
    299 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
    300 
    301 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
    302 ; CHECK: psrld
    303 entry:
    304   %0 = bitcast <1 x i64> %a to <2 x i32>
    305   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    306   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    307   %2 = bitcast x86_mmx %1 to <2 x i32>
    308   %3 = bitcast <2 x i32> %2 to <1 x i64>
    309   %4 = extractelement <1 x i64> %3, i32 0
    310   ret i64 %4
    311 }
    312 
    313 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
    314 
    315 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
    316 ; CHECK: psrlw
    317 entry:
    318   %0 = bitcast <1 x i64> %a to <4 x i16>
    319   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    320   %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    321   %2 = bitcast x86_mmx %1 to <4 x i16>
    322   %3 = bitcast <4 x i16> %2 to <1 x i64>
    323   %4 = extractelement <1 x i64> %3, i32 0
    324   ret i64 %4
    325 }
    326 
    327 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
    328 
    329 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
    330 ; CHECK: psllq
    331 entry:
    332   %0 = extractelement <1 x i64> %a, i32 0
    333   %mmx_var.i = bitcast i64 %0 to x86_mmx
    334   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
    335   %2 = bitcast x86_mmx %1 to i64
    336   ret i64 %2
    337 }
    338 
    339 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
    340 
    341 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
    342 ; CHECK: pslld
    343 entry:
    344   %0 = bitcast <1 x i64> %a to <2 x i32>
    345   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    346   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
    347   %2 = bitcast x86_mmx %1 to <2 x i32>
    348   %3 = bitcast <2 x i32> %2 to <1 x i64>
    349   %4 = extractelement <1 x i64> %3, i32 0
    350   ret i64 %4
    351 }
    352 
    353 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
    354 
    355 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
    356 ; CHECK: psllw
    357 entry:
    358   %0 = bitcast <1 x i64> %a to <4 x i16>
    359   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    360   %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
    361   %2 = bitcast x86_mmx %1 to <4 x i16>
    362   %3 = bitcast <4 x i16> %2 to <1 x i64>
    363   %4 = extractelement <1 x i64> %3, i32 0
    364   ret i64 %4
    365 }
    366 
    367 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
    368 
    369 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    370 ; CHECK: psrad
    371 entry:
    372   %0 = bitcast <1 x i64> %a to <2 x i32>
    373   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    374   %1 = extractelement <1 x i64> %b, i32 0
    375   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    376   %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    377   %3 = bitcast x86_mmx %2 to <2 x i32>
    378   %4 = bitcast <2 x i32> %3 to <1 x i64>
    379   %5 = extractelement <1 x i64> %4, i32 0
    380   ret i64 %5
    381 }
    382 
    383 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
    384 
    385 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    386 ; CHECK: psraw
    387 entry:
    388   %0 = bitcast <1 x i64> %a to <4 x i16>
    389   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    390   %1 = extractelement <1 x i64> %b, i32 0
    391   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    392   %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    393   %3 = bitcast x86_mmx %2 to <4 x i16>
    394   %4 = bitcast <4 x i16> %3 to <1 x i64>
    395   %5 = extractelement <1 x i64> %4, i32 0
    396   ret i64 %5
    397 }
    398 
    399 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
    400 
    401 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    402 ; CHECK: psrlq
    403 entry:
    404   %0 = extractelement <1 x i64> %a, i32 0
    405   %mmx_var.i = bitcast i64 %0 to x86_mmx
    406   %1 = extractelement <1 x i64> %b, i32 0
    407   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    408   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    409   %3 = bitcast x86_mmx %2 to i64
    410   ret i64 %3
    411 }
    412 
    413 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
    414 
    415 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    416 ; CHECK: psrld
    417 entry:
    418   %0 = bitcast <1 x i64> %a to <2 x i32>
    419   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    420   %1 = extractelement <1 x i64> %b, i32 0
    421   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    422   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    423   %3 = bitcast x86_mmx %2 to <2 x i32>
    424   %4 = bitcast <2 x i32> %3 to <1 x i64>
    425   %5 = extractelement <1 x i64> %4, i32 0
    426   ret i64 %5
    427 }
    428 
    429 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
    430 
    431 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    432 ; CHECK: psrlw
    433 entry:
    434   %0 = bitcast <1 x i64> %a to <4 x i16>
    435   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    436   %1 = extractelement <1 x i64> %b, i32 0
    437   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    438   %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    439   %3 = bitcast x86_mmx %2 to <4 x i16>
    440   %4 = bitcast <4 x i16> %3 to <1 x i64>
    441   %5 = extractelement <1 x i64> %4, i32 0
    442   ret i64 %5
    443 }
    444 
    445 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
    446 
    447 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    448 ; CHECK: psllq
    449 entry:
    450   %0 = extractelement <1 x i64> %a, i32 0
    451   %mmx_var.i = bitcast i64 %0 to x86_mmx
    452   %1 = extractelement <1 x i64> %b, i32 0
    453   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    454   %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    455   %3 = bitcast x86_mmx %2 to i64
    456   ret i64 %3
    457 }
    458 
    459 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
    460 
    461 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    462 ; CHECK: pslld
    463 entry:
    464   %0 = bitcast <1 x i64> %a to <2 x i32>
    465   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
    466   %1 = extractelement <1 x i64> %b, i32 0
    467   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    468   %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    469   %3 = bitcast x86_mmx %2 to <2 x i32>
    470   %4 = bitcast <2 x i32> %3 to <1 x i64>
    471   %5 = extractelement <1 x i64> %4, i32 0
    472   ret i64 %5
    473 }
    474 
    475 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
    476 
    477 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    478 ; CHECK: psllw
    479 entry:
    480   %0 = bitcast <1 x i64> %a to <4 x i16>
    481   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
    482   %1 = extractelement <1 x i64> %b, i32 0
    483   %mmx_var1.i = bitcast i64 %1 to x86_mmx
    484   %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    485   %3 = bitcast x86_mmx %2 to <4 x i16>
    486   %4 = bitcast <4 x i16> %3 to <1 x i64>
    487   %5 = extractelement <1 x i64> %4, i32 0
    488   ret i64 %5
    489 }
    490 
    491 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
    492 
    493 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    494 ; CHECK: pxor
    495 entry:
    496   %0 = bitcast <1 x i64> %b to <2 x i32>
    497   %1 = bitcast <1 x i64> %a to <2 x i32>
    498   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    499   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    500   %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    501   %3 = bitcast x86_mmx %2 to <2 x i32>
    502   %4 = bitcast <2 x i32> %3 to <1 x i64>
    503   %5 = extractelement <1 x i64> %4, i32 0
    504   ret i64 %5
    505 }
    506 
    507 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
    508 
    509 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    510 ; CHECK: por
    511 entry:
    512   %0 = bitcast <1 x i64> %b to <2 x i32>
    513   %1 = bitcast <1 x i64> %a to <2 x i32>
    514   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    515   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    516   %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    517   %3 = bitcast x86_mmx %2 to <2 x i32>
    518   %4 = bitcast <2 x i32> %3 to <1 x i64>
    519   %5 = extractelement <1 x i64> %4, i32 0
    520   ret i64 %5
    521 }
    522 
    523 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
    524 
    525 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    526 ; CHECK: pandn
    527 entry:
    528   %0 = bitcast <1 x i64> %b to <2 x i32>
    529   %1 = bitcast <1 x i64> %a to <2 x i32>
    530   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    531   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    532   %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    533   %3 = bitcast x86_mmx %2 to <2 x i32>
    534   %4 = bitcast <2 x i32> %3 to <1 x i64>
    535   %5 = extractelement <1 x i64> %4, i32 0
    536   ret i64 %5
    537 }
    538 
    539 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
    540 
    541 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    542 ; CHECK: pand
    543 entry:
    544   %0 = bitcast <1 x i64> %b to <2 x i32>
    545   %1 = bitcast <1 x i64> %a to <2 x i32>
    546   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    547   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    548   %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    549   %3 = bitcast x86_mmx %2 to <2 x i32>
    550   %4 = bitcast <2 x i32> %3 to <1 x i64>
    551   %5 = extractelement <1 x i64> %4, i32 0
    552   ret i64 %5
    553 }
    554 
    555 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
    556 
    557 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    558 ; CHECK: pmullw
    559 entry:
    560   %0 = bitcast <1 x i64> %b to <4 x i16>
    561   %1 = bitcast <1 x i64> %a to <4 x i16>
    562   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    563   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    564   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    565   %3 = bitcast x86_mmx %2 to <4 x i16>
    566   %4 = bitcast <4 x i16> %3 to <1 x i64>
    567   %5 = extractelement <1 x i64> %4, i32 0
    568   ret i64 %5
    569 }
    570 
    571 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    572 ; CHECK: pmullw
    573 entry:
    574   %0 = bitcast <1 x i64> %b to <4 x i16>
    575   %1 = bitcast <1 x i64> %a to <4 x i16>
    576   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    577   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    578   %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    579   %3 = bitcast x86_mmx %2 to <4 x i16>
    580   %4 = bitcast <4 x i16> %3 to <1 x i64>
    581   %5 = extractelement <1 x i64> %4, i32 0
    582   ret i64 %5
    583 }
    584 
    585 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
    586 
    587 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    588 ; CHECK: pmulhw
    589 entry:
    590   %0 = bitcast <1 x i64> %b to <4 x i16>
    591   %1 = bitcast <1 x i64> %a to <4 x i16>
    592   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    593   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    594   %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    595   %3 = bitcast x86_mmx %2 to <4 x i16>
    596   %4 = bitcast <4 x i16> %3 to <1 x i64>
    597   %5 = extractelement <1 x i64> %4, i32 0
    598   ret i64 %5
    599 }
    600 
    601 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
    602 
    603 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    604 ; CHECK: pmaddwd
    605 entry:
    606   %0 = bitcast <1 x i64> %b to <4 x i16>
    607   %1 = bitcast <1 x i64> %a to <4 x i16>
    608   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    609   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    610   %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    611   %3 = bitcast x86_mmx %2 to <2 x i32>
    612   %4 = bitcast <2 x i32> %3 to <1 x i64>
    613   %5 = extractelement <1 x i64> %4, i32 0
    614   ret i64 %5
    615 }
    616 
    617 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
    618 
    619 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    620 ; CHECK: psubusw
    621 entry:
    622   %0 = bitcast <1 x i64> %b to <4 x i16>
    623   %1 = bitcast <1 x i64> %a to <4 x i16>
    624   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    625   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    626   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    627   %3 = bitcast x86_mmx %2 to <4 x i16>
    628   %4 = bitcast <4 x i16> %3 to <1 x i64>
    629   %5 = extractelement <1 x i64> %4, i32 0
    630   ret i64 %5
    631 }
    632 
    633 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
    634 
    635 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    636 ; CHECK: psubusb
    637 entry:
    638   %0 = bitcast <1 x i64> %b to <8 x i8>
    639   %1 = bitcast <1 x i64> %a to <8 x i8>
    640   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    641   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    642   %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    643   %3 = bitcast x86_mmx %2 to <8 x i8>
    644   %4 = bitcast <8 x i8> %3 to <1 x i64>
    645   %5 = extractelement <1 x i64> %4, i32 0
    646   ret i64 %5
    647 }
    648 
    649 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
    650 
    651 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    652 ; CHECK: psubsw
    653 entry:
    654   %0 = bitcast <1 x i64> %b to <4 x i16>
    655   %1 = bitcast <1 x i64> %a to <4 x i16>
    656   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    657   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    658   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    659   %3 = bitcast x86_mmx %2 to <4 x i16>
    660   %4 = bitcast <4 x i16> %3 to <1 x i64>
    661   %5 = extractelement <1 x i64> %4, i32 0
    662   ret i64 %5
    663 }
    664 
    665 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
    666 
    667 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    668 ; CHECK: psubsb
    669 entry:
    670   %0 = bitcast <1 x i64> %b to <8 x i8>
    671   %1 = bitcast <1 x i64> %a to <8 x i8>
    672   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    673   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    674   %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    675   %3 = bitcast x86_mmx %2 to <8 x i8>
    676   %4 = bitcast <8 x i8> %3 to <1 x i64>
    677   %5 = extractelement <1 x i64> %4, i32 0
    678   ret i64 %5
    679 }
    680 
    681 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    682 ; CHECK: psubq
    683 entry:
    684   %0 = extractelement <1 x i64> %a, i32 0
    685   %mmx_var = bitcast i64 %0 to x86_mmx
    686   %1 = extractelement <1 x i64> %b, i32 0
    687   %mmx_var1 = bitcast i64 %1 to x86_mmx
    688   %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    689   %3 = bitcast x86_mmx %2 to i64
    690   ret i64 %3
    691 }
    692 
    693 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
    694 
    695 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
    696 
    697 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    698 ; CHECK: psubd
    699 entry:
    700   %0 = bitcast <1 x i64> %b to <2 x i32>
    701   %1 = bitcast <1 x i64> %a to <2 x i32>
    702   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    703   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    704   %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    705   %3 = bitcast x86_mmx %2 to <2 x i32>
    706   %4 = bitcast <2 x i32> %3 to <1 x i64>
    707   %5 = extractelement <1 x i64> %4, i32 0
    708   ret i64 %5
    709 }
    710 
    711 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
    712 
    713 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    714 ; CHECK: psubw
    715 entry:
    716   %0 = bitcast <1 x i64> %b to <4 x i16>
    717   %1 = bitcast <1 x i64> %a to <4 x i16>
    718   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    719   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    720   %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    721   %3 = bitcast x86_mmx %2 to <4 x i16>
    722   %4 = bitcast <4 x i16> %3 to <1 x i64>
    723   %5 = extractelement <1 x i64> %4, i32 0
    724   ret i64 %5
    725 }
    726 
    727 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
    728 
    729 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    730 ; CHECK: psubb
    731 entry:
    732   %0 = bitcast <1 x i64> %b to <8 x i8>
    733   %1 = bitcast <1 x i64> %a to <8 x i8>
    734   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    735   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    736   %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    737   %3 = bitcast x86_mmx %2 to <8 x i8>
    738   %4 = bitcast <8 x i8> %3 to <1 x i64>
    739   %5 = extractelement <1 x i64> %4, i32 0
    740   ret i64 %5
    741 }
    742 
    743 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
    744 
    745 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    746 ; CHECK: paddusw
    747 entry:
    748   %0 = bitcast <1 x i64> %b to <4 x i16>
    749   %1 = bitcast <1 x i64> %a to <4 x i16>
    750   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    751   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    752   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    753   %3 = bitcast x86_mmx %2 to <4 x i16>
    754   %4 = bitcast <4 x i16> %3 to <1 x i64>
    755   %5 = extractelement <1 x i64> %4, i32 0
    756   ret i64 %5
    757 }
    758 
    759 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
    760 
    761 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    762 ; CHECK: paddusb
    763 entry:
    764   %0 = bitcast <1 x i64> %b to <8 x i8>
    765   %1 = bitcast <1 x i64> %a to <8 x i8>
    766   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    767   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    768   %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    769   %3 = bitcast x86_mmx %2 to <8 x i8>
    770   %4 = bitcast <8 x i8> %3 to <1 x i64>
    771   %5 = extractelement <1 x i64> %4, i32 0
    772   ret i64 %5
    773 }
    774 
    775 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
    776 
    777 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    778 ; CHECK: paddsw
    779 entry:
    780   %0 = bitcast <1 x i64> %b to <4 x i16>
    781   %1 = bitcast <1 x i64> %a to <4 x i16>
    782   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    783   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    784   %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    785   %3 = bitcast x86_mmx %2 to <4 x i16>
    786   %4 = bitcast <4 x i16> %3 to <1 x i64>
    787   %5 = extractelement <1 x i64> %4, i32 0
    788   ret i64 %5
    789 }
    790 
    791 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
    792 
    793 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    794 ; CHECK: paddsb
    795 entry:
    796   %0 = bitcast <1 x i64> %b to <8 x i8>
    797   %1 = bitcast <1 x i64> %a to <8 x i8>
    798   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    799   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    800   %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    801   %3 = bitcast x86_mmx %2 to <8 x i8>
    802   %4 = bitcast <8 x i8> %3 to <1 x i64>
    803   %5 = extractelement <1 x i64> %4, i32 0
    804   ret i64 %5
    805 }
    806 
    807 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
    808 
    809 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    810 ; CHECK: paddq
    811 entry:
    812   %0 = extractelement <1 x i64> %a, i32 0
    813   %mmx_var = bitcast i64 %0 to x86_mmx
    814   %1 = extractelement <1 x i64> %b, i32 0
    815   %mmx_var1 = bitcast i64 %1 to x86_mmx
    816   %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
    817   %3 = bitcast x86_mmx %2 to i64
    818   ret i64 %3
    819 }
    820 
    821 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
    822 
    823 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    824 ; CHECK: paddd
    825 entry:
    826   %0 = bitcast <1 x i64> %b to <2 x i32>
    827   %1 = bitcast <1 x i64> %a to <2 x i32>
    828   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
    829   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
    830   %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    831   %3 = bitcast x86_mmx %2 to <2 x i32>
    832   %4 = bitcast <2 x i32> %3 to <1 x i64>
    833   %5 = extractelement <1 x i64> %4, i32 0
    834   ret i64 %5
    835 }
    836 
    837 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
    838 
    839 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    840 ; CHECK: paddw
    841 entry:
    842   %0 = bitcast <1 x i64> %b to <4 x i16>
    843   %1 = bitcast <1 x i64> %a to <4 x i16>
    844   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    845   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    846   %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    847   %3 = bitcast x86_mmx %2 to <4 x i16>
    848   %4 = bitcast <4 x i16> %3 to <1 x i64>
    849   %5 = extractelement <1 x i64> %4, i32 0
    850   ret i64 %5
    851 }
    852 
    853 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
    854 
    855 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    856 ; CHECK: paddb
    857 entry:
    858   %0 = bitcast <1 x i64> %b to <8 x i8>
    859   %1 = bitcast <1 x i64> %a to <8 x i8>
    860   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    861   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    862   %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    863   %3 = bitcast x86_mmx %2 to <8 x i8>
    864   %4 = bitcast <8 x i8> %3 to <1 x i64>
    865   %5 = extractelement <1 x i64> %4, i32 0
    866   ret i64 %5
    867 }
    868 
    869 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
    870 
    871 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    872 ; CHECK: psadbw
    873 entry:
    874   %0 = bitcast <1 x i64> %b to <8 x i8>
    875   %1 = bitcast <1 x i64> %a to <8 x i8>
    876   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    877   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    878   %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    879   %3 = bitcast x86_mmx %2 to i64
    880   ret i64 %3
    881 }
    882 
    883 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
    884 
    885 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    886 ; CHECK: pminsw
    887 entry:
    888   %0 = bitcast <1 x i64> %b to <4 x i16>
    889   %1 = bitcast <1 x i64> %a to <4 x i16>
    890   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    891   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    892   %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    893   %3 = bitcast x86_mmx %2 to <4 x i16>
    894   %4 = bitcast <4 x i16> %3 to <1 x i64>
    895   %5 = extractelement <1 x i64> %4, i32 0
    896   ret i64 %5
    897 }
    898 
    899 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
    900 
    901 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    902 ; CHECK: pminub
    903 entry:
    904   %0 = bitcast <1 x i64> %b to <8 x i8>
    905   %1 = bitcast <1 x i64> %a to <8 x i8>
    906   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    907   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    908   %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    909   %3 = bitcast x86_mmx %2 to <8 x i8>
    910   %4 = bitcast <8 x i8> %3 to <1 x i64>
    911   %5 = extractelement <1 x i64> %4, i32 0
    912   ret i64 %5
    913 }
    914 
    915 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
    916 
    917 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    918 ; CHECK: pmaxsw
    919 entry:
    920   %0 = bitcast <1 x i64> %b to <4 x i16>
    921   %1 = bitcast <1 x i64> %a to <4 x i16>
    922   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    923   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    924   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    925   %3 = bitcast x86_mmx %2 to <4 x i16>
    926   %4 = bitcast <4 x i16> %3 to <1 x i64>
    927   %5 = extractelement <1 x i64> %4, i32 0
    928   ret i64 %5
    929 }
    930 
    931 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
    932 
    933 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    934 ; CHECK: pmaxub
    935 entry:
    936   %0 = bitcast <1 x i64> %b to <8 x i8>
    937   %1 = bitcast <1 x i64> %a to <8 x i8>
    938   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    939   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    940   %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    941   %3 = bitcast x86_mmx %2 to <8 x i8>
    942   %4 = bitcast <8 x i8> %3 to <1 x i64>
    943   %5 = extractelement <1 x i64> %4, i32 0
    944   ret i64 %5
    945 }
    946 
    947 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
    948 
    949 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    950 ; CHECK: pavgw
    951 entry:
    952   %0 = bitcast <1 x i64> %b to <4 x i16>
    953   %1 = bitcast <1 x i64> %a to <4 x i16>
    954   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
    955   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
    956   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    957   %3 = bitcast x86_mmx %2 to <4 x i16>
    958   %4 = bitcast <4 x i16> %3 to <1 x i64>
    959   %5 = extractelement <1 x i64> %4, i32 0
    960   ret i64 %5
    961 }
    962 
    963 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
    964 
    965 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
    966 ; CHECK: pavgb
    967 entry:
    968   %0 = bitcast <1 x i64> %b to <8 x i8>
    969   %1 = bitcast <1 x i64> %a to <8 x i8>
    970   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
    971   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
    972   %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
    973   %3 = bitcast x86_mmx %2 to <8 x i8>
    974   %4 = bitcast <8 x i8> %3 to <1 x i64>
    975   %5 = extractelement <1 x i64> %4, i32 0
    976   ret i64 %5
    977 }
    978 
    979 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
    980 
    981 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
    982 ; CHECK: movntq
    983 entry:
    984   %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
    985   %0 = extractelement <1 x i64> %a, i32 0
    986   %mmx_var.i = bitcast i64 %0 to x86_mmx
    987   tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
    988   ret void
    989 }
    990 
    991 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
    992 
    993 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
    994 ; CHECK: pmovmskb
    995 entry:
    996   %0 = bitcast <1 x i64> %a to <8 x i8>
    997   %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
    998   %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
    999   ret i32 %1
   1000 }
   1001 
   1002 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
   1003 
   1004 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
   1005 ; CHECK: maskmovq
   1006 entry:
   1007   %0 = bitcast <1 x i64> %n to <8 x i8>
   1008   %1 = bitcast <1 x i64> %d to <8 x i8>
   1009   %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
   1010   %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
   1011   tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
   1012   ret void
   1013 }
   1014 
   1015 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
   1016 
   1017 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1018 ; CHECK: pmulhuw
   1019 entry:
   1020   %0 = bitcast <1 x i64> %b to <4 x i16>
   1021   %1 = bitcast <1 x i64> %a to <4 x i16>
   1022   %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
   1023   %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
   1024   %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1025   %3 = bitcast x86_mmx %2 to <4 x i16>
   1026   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1027   %5 = extractelement <1 x i64> %4, i32 0
   1028   ret i64 %5
   1029 }
   1030 
   1031 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
   1032 
   1033 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
   1034 ; CHECK: pshufw
   1035 entry:
   1036   %0 = bitcast <1 x i64> %a to <4 x i16>
   1037   %1 = bitcast <4 x i16> %0 to x86_mmx
   1038   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
   1039   %3 = bitcast x86_mmx %2 to <4 x i16>
   1040   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1041   %5 = extractelement <1 x i64> %4, i32 0
   1042   ret i64 %5
   1043 }
   1044 
   1045 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
   1046 
   1047 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1048 ; CHECK: pmuludq
   1049 entry:
   1050   %0 = bitcast <1 x i64> %b to <2 x i32>
   1051   %1 = bitcast <1 x i64> %a to <2 x i32>
   1052   %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
   1053   %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
   1054   %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
   1055   %3 = bitcast x86_mmx %2 to i64
   1056   ret i64 %3
   1057 }
   1058 
   1059 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
   1060 
   1061 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
   1062 ; CHECK: cvtpi2pd
   1063 entry:
   1064   %0 = bitcast <1 x i64> %a to <2 x i32>
   1065   %1 = bitcast <2 x i32> %0 to x86_mmx
   1066   %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
   1067   ret <2 x double> %2
   1068 }
   1069 
   1070 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
   1071 
   1072 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
   1073 ; CHECK: cvttpd2pi
   1074 entry:
   1075   %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
   1076   %1 = bitcast x86_mmx %0 to <2 x i32>
   1077   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1078   %3 = extractelement <1 x i64> %2, i32 0
   1079   ret i64 %3
   1080 }
   1081 
   1082 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
   1083 
   1084 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
   1085 ; CHECK: cvtpd2pi
   1086 entry:
   1087   %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
   1088   %1 = bitcast x86_mmx %0 to <2 x i32>
   1089   %2 = bitcast <2 x i32> %1 to <1 x i64>
   1090   %3 = extractelement <1 x i64> %2, i32 0
   1091   ret i64 %3
   1092 }
   1093 
   1094 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
   1095 
   1096 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1097 ; CHECK: palignr
   1098 entry:
   1099   %0 = extractelement <1 x i64> %a, i32 0
   1100   %mmx_var = bitcast i64 %0 to x86_mmx
   1101   %1 = extractelement <1 x i64> %b, i32 0
   1102   %mmx_var1 = bitcast i64 %1 to x86_mmx
   1103   %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
   1104   %3 = bitcast x86_mmx %2 to i64
   1105   ret i64 %3
   1106 }
   1107 
   1108 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
   1109 
   1110 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
   1111 ; CHECK: pabsd
   1112 entry:
   1113   %0 = bitcast <1 x i64> %a to <2 x i32>
   1114   %1 = bitcast <2 x i32> %0 to x86_mmx
   1115   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
   1116   %3 = bitcast x86_mmx %2 to <2 x i32>
   1117   %4 = bitcast <2 x i32> %3 to <1 x i64>
   1118   %5 = extractelement <1 x i64> %4, i32 0
   1119   ret i64 %5
   1120 }
   1121 
   1122 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
   1123 
   1124 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
   1125 ; CHECK: pabsw
   1126 entry:
   1127   %0 = bitcast <1 x i64> %a to <4 x i16>
   1128   %1 = bitcast <4 x i16> %0 to x86_mmx
   1129   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
   1130   %3 = bitcast x86_mmx %2 to <4 x i16>
   1131   %4 = bitcast <4 x i16> %3 to <1 x i64>
   1132   %5 = extractelement <1 x i64> %4, i32 0
   1133   ret i64 %5
   1134 }
   1135 
   1136 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
   1137 
   1138 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
   1139 ; CHECK: pabsb
   1140 entry:
   1141   %0 = bitcast <1 x i64> %a to <8 x i8>
   1142   %1 = bitcast <8 x i8> %0 to x86_mmx
   1143   %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
   1144   %3 = bitcast x86_mmx %2 to <8 x i8>
   1145   %4 = bitcast <8 x i8> %3 to <1 x i64>
   1146   %5 = extractelement <1 x i64> %4, i32 0
   1147   ret i64 %5
   1148 }
   1149 
   1150 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
   1151 
   1152 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1153 ; CHECK: psignd
   1154 entry:
   1155   %0 = bitcast <1 x i64> %b to <2 x i32>
   1156   %1 = bitcast <1 x i64> %a to <2 x i32>
   1157   %2 = bitcast <2 x i32> %1 to x86_mmx
   1158   %3 = bitcast <2 x i32> %0 to x86_mmx
   1159   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1160   %5 = bitcast x86_mmx %4 to <2 x i32>
   1161   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1162   %7 = extractelement <1 x i64> %6, i32 0
   1163   ret i64 %7
   1164 }
   1165 
   1166 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
   1167 
   1168 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1169 ; CHECK: psignw
   1170 entry:
   1171   %0 = bitcast <1 x i64> %b to <4 x i16>
   1172   %1 = bitcast <1 x i64> %a to <4 x i16>
   1173   %2 = bitcast <4 x i16> %1 to x86_mmx
   1174   %3 = bitcast <4 x i16> %0 to x86_mmx
   1175   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1176   %5 = bitcast x86_mmx %4 to <4 x i16>
   1177   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1178   %7 = extractelement <1 x i64> %6, i32 0
   1179   ret i64 %7
   1180 }
   1181 
   1182 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
   1183 
   1184 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1185 ; CHECK: psignb
   1186 entry:
   1187   %0 = bitcast <1 x i64> %b to <8 x i8>
   1188   %1 = bitcast <1 x i64> %a to <8 x i8>
   1189   %2 = bitcast <8 x i8> %1 to x86_mmx
   1190   %3 = bitcast <8 x i8> %0 to x86_mmx
   1191   %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1192   %5 = bitcast x86_mmx %4 to <8 x i8>
   1193   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1194   %7 = extractelement <1 x i64> %6, i32 0
   1195   ret i64 %7
   1196 }
   1197 
   1198 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
   1199 
   1200 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1201 ; CHECK: pshufb
   1202 entry:
   1203   %0 = bitcast <1 x i64> %b to <8 x i8>
   1204   %1 = bitcast <1 x i64> %a to <8 x i8>
   1205   %2 = bitcast <8 x i8> %1 to x86_mmx
   1206   %3 = bitcast <8 x i8> %0 to x86_mmx
   1207   %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
   1208   %5 = bitcast x86_mmx %4 to <8 x i8>
   1209   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1210   %7 = extractelement <1 x i64> %6, i32 0
   1211   ret i64 %7
   1212 }
   1213 
   1214 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
   1215 
   1216 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1217 ; CHECK: pmulhrsw
   1218 entry:
   1219   %0 = bitcast <1 x i64> %b to <4 x i16>
   1220   %1 = bitcast <1 x i64> %a to <4 x i16>
   1221   %2 = bitcast <4 x i16> %1 to x86_mmx
   1222   %3 = bitcast <4 x i16> %0 to x86_mmx
   1223   %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1224   %5 = bitcast x86_mmx %4 to <4 x i16>
   1225   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1226   %7 = extractelement <1 x i64> %6, i32 0
   1227   ret i64 %7
   1228 }
   1229 
   1230 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
   1231 
   1232 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1233 ; CHECK: pmaddubsw
   1234 entry:
   1235   %0 = bitcast <1 x i64> %b to <8 x i8>
   1236   %1 = bitcast <1 x i64> %a to <8 x i8>
   1237   %2 = bitcast <8 x i8> %1 to x86_mmx
   1238   %3 = bitcast <8 x i8> %0 to x86_mmx
   1239   %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1240   %5 = bitcast x86_mmx %4 to <8 x i8>
   1241   %6 = bitcast <8 x i8> %5 to <1 x i64>
   1242   %7 = extractelement <1 x i64> %6, i32 0
   1243   ret i64 %7
   1244 }
   1245 
   1246 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
   1247 
   1248 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1249 ; CHECK: phsubsw
   1250 entry:
   1251   %0 = bitcast <1 x i64> %b to <4 x i16>
   1252   %1 = bitcast <1 x i64> %a to <4 x i16>
   1253   %2 = bitcast <4 x i16> %1 to x86_mmx
   1254   %3 = bitcast <4 x i16> %0 to x86_mmx
   1255   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1256   %5 = bitcast x86_mmx %4 to <4 x i16>
   1257   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1258   %7 = extractelement <1 x i64> %6, i32 0
   1259   ret i64 %7
   1260 }
   1261 
   1262 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
   1263 
   1264 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1265 ; CHECK: phsubd
   1266 entry:
   1267   %0 = bitcast <1 x i64> %b to <2 x i32>
   1268   %1 = bitcast <1 x i64> %a to <2 x i32>
   1269   %2 = bitcast <2 x i32> %1 to x86_mmx
   1270   %3 = bitcast <2 x i32> %0 to x86_mmx
   1271   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1272   %5 = bitcast x86_mmx %4 to <2 x i32>
   1273   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1274   %7 = extractelement <1 x i64> %6, i32 0
   1275   ret i64 %7
   1276 }
   1277 
   1278 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
   1279 
   1280 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1281 ; CHECK: phsubw
   1282 entry:
   1283   %0 = bitcast <1 x i64> %b to <4 x i16>
   1284   %1 = bitcast <1 x i64> %a to <4 x i16>
   1285   %2 = bitcast <4 x i16> %1 to x86_mmx
   1286   %3 = bitcast <4 x i16> %0 to x86_mmx
   1287   %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
   1288   %5 = bitcast x86_mmx %4 to <4 x i16>
   1289   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1290   %7 = extractelement <1 x i64> %6, i32 0
   1291   ret i64 %7
   1292 }
   1293 
   1294 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
   1295 
   1296 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1297 ; CHECK: phaddsw
   1298 entry:
   1299   %0 = bitcast <1 x i64> %b to <4 x i16>
   1300   %1 = bitcast <1 x i64> %a to <4 x i16>
   1301   %2 = bitcast <4 x i16> %1 to x86_mmx
   1302   %3 = bitcast <4 x i16> %0 to x86_mmx
   1303   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
   1304   %5 = bitcast x86_mmx %4 to <4 x i16>
   1305   %6 = bitcast <4 x i16> %5 to <1 x i64>
   1306   %7 = extractelement <1 x i64> %6, i32 0
   1307   ret i64 %7
   1308 }
   1309 
   1310 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
   1311 
   1312 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
   1313 ; CHECK: phaddd
   1314 entry:
   1315   %0 = bitcast <1 x i64> %b to <2 x i32>
   1316   %1 = bitcast <1 x i64> %a to <2 x i32>
   1317   %2 = bitcast <2 x i32> %1 to x86_mmx
   1318   %3 = bitcast <2 x i32> %0 to x86_mmx
   1319   %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
   1320   %5 = bitcast x86_mmx %4 to <2 x i32>
   1321   %6 = bitcast <2 x i32> %5 to <1 x i64>
   1322   %7 = extractelement <1 x i64> %6, i32 0
   1323   ret i64 %7
   1324 }
   1325