Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
      2 
      3 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
      4   ; CHECK: addsd
      5   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
      6   ret <2 x double> %res
      7 }
      8 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
      9 
     10 
     11 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     12   ; CHECK: cmpordpd
     13   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     14   ret <2 x double> %res
     15 }
     16 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     17 
     18 
     19 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     20   ; CHECK: cmpordsd
     21   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     22   ret <2 x double> %res
     23 }
     24 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
     25 
     26 
     27 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
     28   ; CHECK: comisd
     29   ; CHECK: sete
     30   ; CHECK: movzbl
     31   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     32   ret i32 %res
     33 }
     34 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
     35 
     36 
     37 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
     38   ; CHECK: comisd
     39   ; CHECK: setae
     40   ; CHECK: movzbl
     41   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     42   ret i32 %res
     43 }
     44 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
     45 
     46 
     47 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
     48   ; CHECK: comisd
     49   ; CHECK: seta
     50   ; CHECK: movzbl
     51   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     52   ret i32 %res
     53 }
     54 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
     55 
     56 
     57 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
     58   ; CHECK: comisd
     59   ; CHECK: setbe
     60   ; CHECK: movzbl
     61   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     62   ret i32 %res
     63 }
     64 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
     65 
     66 
     67 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
     68   ; CHECK: comisd
     69   ; CHECK: sbbl    %eax, %eax
     70   ; CHECK: andl    $1, %eax
     71   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     72   ret i32 %res
     73 }
     74 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
     75 
     76 
     77 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
     78   ; CHECK: comisd
     79   ; CHECK: setne
     80   ; CHECK: movzbl
     81   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     82   ret i32 %res
     83 }
     84 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
     85 
     86 
     87 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
     88   ; CHECK: cvtdq2pd
     89   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
     90   ret <2 x double> %res
     91 }
     92 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
     93 
     94 
     95 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
     96   ; CHECK: cvtdq2ps
     97   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
     98   ret <4 x float> %res
     99 }
    100 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    101 
    102 
    103 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    104   ; CHECK: cvtpd2dq
    105   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    106   ret <4 x i32> %res
    107 }
    108 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    109 
    110 
    111 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    112   ; CHECK: cvtpd2ps
    113   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    114   ret <4 x float> %res
    115 }
    116 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    117 
    118 
    119 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    120   ; CHECK: cvtps2dq
    121   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    122   ret <4 x i32> %res
    123 }
    124 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    125 
    126 
    127 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    128   ; CHECK: cvtps2pd
    129   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    130   ret <2 x double> %res
    131 }
    132 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    133 
    134 
    135 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    136   ; CHECK: cvtsd2si
    137   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    138   ret i32 %res
    139 }
    140 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    141 
    142 
    143 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    144   ; CHECK: cvtsd2ss 
    145   ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 
    146   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    147   ret <4 x float> %res
    148 }
    149 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    150 
    151 
    152 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    153   ; CHECK: movl
    154   ; CHECK: cvtsi2sd
    155   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    156   ret <2 x double> %res
    157 }
    158 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    159 
    160 
    161 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    162   ; CHECK: cvtss2sd
    163   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    164   ret <2 x double> %res
    165 }
    166 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    167 
    168 
    169 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    170   ; CHECK: cvttpd2dq
    171   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    172   ret <4 x i32> %res
    173 }
    174 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    175 
    176 
    177 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    178   ; CHECK: cvttps2dq
    179   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    180   ret <4 x i32> %res
    181 }
    182 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    183 
    184 
    185 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    186   ; CHECK: cvttsd2si
    187   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    188   ret i32 %res
    189 }
    190 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    191 
    192 
    193 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    194   ; CHECK: divsd
    195   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    196   ret <2 x double> %res
    197 }
    198 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    199 
    200 
    201 
    202 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    203   ; CHECK: maxpd
    204   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    205   ret <2 x double> %res
    206 }
    207 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    208 
    209 
    210 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    211   ; CHECK: maxsd
    212   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    213   ret <2 x double> %res
    214 }
    215 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    216 
    217 
    218 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    219   ; CHECK: minpd
    220   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    221   ret <2 x double> %res
    222 }
    223 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    224 
    225 
    226 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    227   ; CHECK: minsd
    228   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    229   ret <2 x double> %res
    230 }
    231 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    232 
    233 
    234 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    235   ; CHECK: movmskpd
    236   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    237   ret i32 %res
    238 }
    239 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    240 
    241 
    242 
    243 
    244 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    245   ; CHECK: test_x86_sse2_mul_sd
    246   ; CHECK: mulsd
    247   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    248   ret <2 x double> %res
    249 }
    250 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    251 
    252 
    253 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    254   ; CHECK: packssdw
    255   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    256   ret <8 x i16> %res
    257 }
    258 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    259 
    260 
    261 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    262   ; CHECK: packsswb
    263   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    264   ret <16 x i8> %res
    265 }
    266 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    267 
    268 
    269 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    270   ; CHECK: packuswb
    271   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    272   ret <16 x i8> %res
    273 }
    274 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    275 
    276 
    277 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    278   ; CHECK: paddsb
    279   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    280   ret <16 x i8> %res
    281 }
    282 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    283 
    284 
    285 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    286   ; CHECK: paddsw
    287   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    288   ret <8 x i16> %res
    289 }
    290 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    291 
    292 
    293 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    294   ; CHECK: paddusb
    295   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    296   ret <16 x i8> %res
    297 }
    298 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    299 
    300 
    301 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    302   ; CHECK: paddusw
    303   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    304   ret <8 x i16> %res
    305 }
    306 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    307 
    308 
    309 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    310   ; CHECK: pavgb
    311   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    312   ret <16 x i8> %res
    313 }
    314 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    315 
    316 
    317 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    318   ; CHECK: pavgw
    319   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    320   ret <8 x i16> %res
    321 }
    322 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    323 
    324 
    325 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    326   ; CHECK: pmaddwd
    327   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    328   ret <4 x i32> %res
    329 }
    330 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    331 
    332 
    333 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    334   ; CHECK: pmaxsw
    335   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    336   ret <8 x i16> %res
    337 }
    338 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    339 
    340 
    341 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    342   ; CHECK: pmaxub
    343   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    344   ret <16 x i8> %res
    345 }
    346 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    347 
    348 
    349 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    350   ; CHECK: pminsw
    351   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    352   ret <8 x i16> %res
    353 }
    354 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    355 
    356 
    357 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    358   ; CHECK: pminub
    359   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    360   ret <16 x i8> %res
    361 }
    362 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    363 
    364 
    365 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    366   ; CHECK: pmovmskb
    367   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    368   ret i32 %res
    369 }
    370 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    371 
    372 
    373 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    374   ; CHECK: pmulhw
    375   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    376   ret <8 x i16> %res
    377 }
    378 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    379 
    380 
    381 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    382   ; CHECK: pmulhuw
    383   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    384   ret <8 x i16> %res
    385 }
    386 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    387 
    388 
    389 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    390   ; CHECK: pmuludq
    391   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    392   ret <2 x i64> %res
    393 }
    394 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    395 
    396 
    397 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    398   ; CHECK: psadbw
    399   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    400   ret <2 x i64> %res
    401 }
    402 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    403 
    404 
    405 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    406   ; CHECK: pslld
    407   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    408   ret <4 x i32> %res
    409 }
    410 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    411 
    412 
    413 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    414   ; CHECK: pslldq
    415   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    416   ret <2 x i64> %res
    417 }
    418 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    419 
    420 
    421 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
    422   ; CHECK: pslldq
    423   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    424   ret <2 x i64> %res
    425 }
    426 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
    427 
    428 
    429 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    430   ; CHECK: psllq
    431   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    432   ret <2 x i64> %res
    433 }
    434 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    435 
    436 
    437 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    438   ; CHECK: psllw
    439   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    440   ret <8 x i16> %res
    441 }
    442 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    443 
    444 
    445 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    446   ; CHECK: pslld
    447   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    448   ret <4 x i32> %res
    449 }
    450 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    451 
    452 
    453 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    454   ; CHECK: psllq
    455   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    456   ret <2 x i64> %res
    457 }
    458 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    459 
    460 
    461 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    462   ; CHECK: psllw
    463   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    464   ret <8 x i16> %res
    465 }
    466 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    467 
    468 
    469 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    470   ; CHECK: psrad
    471   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    472   ret <4 x i32> %res
    473 }
    474 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    475 
    476 
    477 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    478   ; CHECK: psraw
    479   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    480   ret <8 x i16> %res
    481 }
    482 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    483 
    484 
    485 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    486   ; CHECK: psrad
    487   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    488   ret <4 x i32> %res
    489 }
    490 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    491 
    492 
    493 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    494   ; CHECK: psraw
    495   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    496   ret <8 x i16> %res
    497 }
    498 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    499 
    500 
    501 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    502   ; CHECK: psrld
    503   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    504   ret <4 x i32> %res
    505 }
    506 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    507 
    508 
    509 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    510   ; CHECK: psrldq
    511   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    512   ret <2 x i64> %res
    513 }
    514 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    515 
    516 
    517 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
    518   ; CHECK: psrldq
    519   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    520   ret <2 x i64> %res
    521 }
    522 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
    523 
    524 
    525 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    526   ; CHECK: psrlq
    527   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    528   ret <2 x i64> %res
    529 }
    530 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    531 
    532 
    533 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    534   ; CHECK: psrlw
    535   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    536   ret <8 x i16> %res
    537 }
    538 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    539 
    540 
    541 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    542   ; CHECK: psrld
    543   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    544   ret <4 x i32> %res
    545 }
    546 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    547 
    548 
    549 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    550   ; CHECK: psrlq
    551   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    552   ret <2 x i64> %res
    553 }
    554 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    555 
    556 
    557 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    558   ; CHECK: psrlw
    559   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    560   ret <8 x i16> %res
    561 }
    562 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    563 
    564 
    565 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    566   ; CHECK: psubsb
    567   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    568   ret <16 x i8> %res
    569 }
    570 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    571 
    572 
    573 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    574   ; CHECK: psubsw
    575   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    576   ret <8 x i16> %res
    577 }
    578 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    579 
    580 
    581 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    582   ; CHECK: psubusb
    583   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    584   ret <16 x i8> %res
    585 }
    586 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    587 
    588 
    589 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    590   ; CHECK: psubusw
    591   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    592   ret <8 x i16> %res
    593 }
    594 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    595 
    596 
    597 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    598   ; CHECK: sqrtpd
    599   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    600   ret <2 x double> %res
    601 }
    602 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    603 
    604 
    605 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    606   ; CHECK: sqrtsd
    607   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    608   ret <2 x double> %res
    609 }
    610 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    611 
    612 
    613 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    614   ; CHECK: test_x86_sse2_storel_dq
    615   ; CHECK: movl
    616   ; CHECK: movq
    617   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    618   ret void
    619 }
    620 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    621 
    622 
    623 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    624   ; CHECK: test_x86_sse2_storeu_dq
    625   ; CHECK: movl
    626   ; CHECK: movdqu
    627   ; add operation forces the execution domain.
    628   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    629   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    630   ret void
    631 }
    632 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    633 
    634 
    635 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    636   ; CHECK: test_x86_sse2_storeu_pd
    637   ; CHECK: movl
    638   ; CHECK: movupd
    639   ; fadd operation forces the execution domain.
    640   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    641   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    642   ret void
    643 }
    644 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    645 
    646 
    647 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    648   ; CHECK: test_x86_sse2_sub_sd
    649   ; CHECK: subsd
    650   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    651   ret <2 x double> %res
    652 }
    653 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    654 
    655 
    656 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    657   ; CHECK: ucomisd
    658   ; CHECK: sete
    659   ; CHECK: movzbl
    660   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    661   ret i32 %res
    662 }
    663 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    664 
    665 
    666 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    667   ; CHECK: ucomisd
    668   ; CHECK: setae
    669   ; CHECK: movzbl
    670   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    671   ret i32 %res
    672 }
    673 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    674 
    675 
    676 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    677   ; CHECK: ucomisd
    678   ; CHECK: seta
    679   ; CHECK: movzbl
    680   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    681   ret i32 %res
    682 }
    683 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    684 
    685 
    686 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    687   ; CHECK: ucomisd
    688   ; CHECK: setbe
    689   ; CHECK: movzbl
    690   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    691   ret i32 %res
    692 }
    693 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    694 
    695 
    696 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    697   ; CHECK: ucomisd
    698   ; CHECK: sbbl
    699   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    700   ret i32 %res
    701 }
    702 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    703 
    704 
    705 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    706   ; CHECK: ucomisd
    707   ; CHECK: setne
    708   ; CHECK: movzbl
    709   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    710   ret i32 %res
    711 }
    712 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    713 
    714 define void @test_x86_sse2_pause() {
    715   ; CHECK: pause
    716   tail call void @llvm.x86.sse2.pause()
    717   ret void 
    718 }
    719 declare void @llvm.x86.sse2.pause() nounwind
    720 
    721 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
    722 ; CHECK-LABEL: test_x86_sse2_pshuf_d:
    723 ; CHECK: pshufd $27
    724 entry:
    725    %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
    726    ret <4 x i32> %res
    727 }
    728 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
    729 
    730 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
    731 ; CHECK-LABEL: test_x86_sse2_pshufl_w:
    732 ; CHECK: pshuflw $27
    733 entry:
    734    %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
    735    ret <8 x i16> %res
    736 }
    737 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
    738 
    739 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
    740 ; CHECK-LABEL: test_x86_sse2_pshufh_w:
    741 ; CHECK: pshufhw $27
    742 entry:
    743    %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
    744    ret <8 x i16> %res
    745 }
    746 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
    747