Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
      2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
      3 
      4 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
      5   ; CHECK: addsd
      6   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
      7   ret <2 x double> %res
      8 }
      9 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
     10 
     11 
     12 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     13   ; CHECK: cmpordpd
     14   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     15   ret <2 x double> %res
     16 }
     17 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     18 
     19 
     20 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     21   ; CHECK: cmpordsd
     22   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     23   ret <2 x double> %res
     24 }
     25 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
     26 
     27 
     28 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
     29   ; CHECK: comisd
     30   ; CHECK: sete
     31   ; CHECK: movzbl
     32   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     33   ret i32 %res
     34 }
     35 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
     36 
     37 
     38 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
     39   ; CHECK: comisd
     40   ; CHECK: setae
     41   ; CHECK: movzbl
     42   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     43   ret i32 %res
     44 }
     45 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
     46 
     47 
     48 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
     49   ; CHECK: comisd
     50   ; CHECK: seta
     51   ; CHECK: movzbl
     52   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     53   ret i32 %res
     54 }
     55 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
     56 
     57 
     58 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
     59   ; CHECK: comisd
     60   ; CHECK: setbe
     61   ; CHECK: movzbl
     62   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     63   ret i32 %res
     64 }
     65 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
     66 
     67 
     68 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
     69   ; CHECK: comisd
     70   ; CHECK: sbbl    %eax, %eax
     71   ; CHECK: andl    $1, %eax
     72   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     73   ret i32 %res
     74 }
     75 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
     76 
     77 
     78 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
     79   ; CHECK: comisd
     80   ; CHECK: setne
     81   ; CHECK: movzbl
     82   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     83   ret i32 %res
     84 }
     85 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
     86 
     87 
     88 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
     89   ; CHECK: cvtdq2pd
     90   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
     91   ret <2 x double> %res
     92 }
     93 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
     94 
     95 
     96 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
     97   ; CHECK: cvtdq2ps
     98   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
     99   ret <4 x float> %res
    100 }
    101 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    102 
    103 
    104 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    105   ; CHECK: cvtpd2dq
    106   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    107   ret <4 x i32> %res
    108 }
    109 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    110 
    111 
    112 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    113   ; CHECK: cvtpd2ps
    114   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    115   ret <4 x float> %res
    116 }
    117 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    118 
    119 
    120 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    121   ; CHECK: cvtps2dq
    122   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    123   ret <4 x i32> %res
    124 }
    125 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    126 
    127 
    128 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    129   ; CHECK: cvtps2pd
    130   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    131   ret <2 x double> %res
    132 }
    133 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    134 
    135 
    136 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    137   ; CHECK: cvtsd2si
    138   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    139   ret i32 %res
    140 }
    141 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    142 
    143 
    144 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    145   ; CHECK: cvtsd2ss 
    146   ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 
    147   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    148   ret <4 x float> %res
    149 }
    150 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    151 
    152 
    153 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    154   ; CHECK: movl
    155   ; CHECK: cvtsi2sd
    156   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    157   ret <2 x double> %res
    158 }
    159 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    160 
    161 
    162 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    163   ; CHECK: cvtss2sd
    164   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    165   ret <2 x double> %res
    166 }
    167 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    168 
    169 
    170 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    171   ; CHECK: cvttpd2dq
    172   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    173   ret <4 x i32> %res
    174 }
    175 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    176 
    177 
    178 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    179   ; CHECK: cvttps2dq
    180   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    181   ret <4 x i32> %res
    182 }
    183 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    184 
    185 
    186 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    187   ; CHECK: cvttsd2si
    188   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    189   ret i32 %res
    190 }
    191 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    192 
    193 
    194 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    195   ; CHECK: divsd
    196   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    197   ret <2 x double> %res
    198 }
    199 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    200 
    201 
    202 
    203 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    204   ; CHECK: maxpd
    205   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    206   ret <2 x double> %res
    207 }
    208 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    209 
    210 
    211 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    212   ; CHECK: maxsd
    213   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    214   ret <2 x double> %res
    215 }
    216 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    217 
    218 
    219 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    220   ; CHECK: minpd
    221   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    222   ret <2 x double> %res
    223 }
    224 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    225 
    226 
    227 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    228   ; CHECK: minsd
    229   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    230   ret <2 x double> %res
    231 }
    232 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    233 
    234 
    235 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    236   ; CHECK: movmskpd
    237   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    238   ret i32 %res
    239 }
    240 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    241 
    242 
    243 
    244 
    245 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    246   ; CHECK: test_x86_sse2_mul_sd
    247   ; CHECK: mulsd
    248   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    249   ret <2 x double> %res
    250 }
    251 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    252 
    253 
    254 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    255   ; CHECK: packssdw
    256   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    257   ret <8 x i16> %res
    258 }
    259 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    260 
    261 
    262 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    263   ; CHECK: packsswb
    264   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    265   ret <16 x i8> %res
    266 }
    267 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    268 
    269 
    270 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    271   ; CHECK: packuswb
    272   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    273   ret <16 x i8> %res
    274 }
    275 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    276 
    277 
    278 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    279   ; CHECK: paddsb
    280   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    281   ret <16 x i8> %res
    282 }
    283 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    284 
    285 
    286 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    287   ; CHECK: paddsw
    288   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    289   ret <8 x i16> %res
    290 }
    291 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    292 
    293 
    294 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    295   ; CHECK: paddusb
    296   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    297   ret <16 x i8> %res
    298 }
    299 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    300 
    301 
    302 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    303   ; CHECK: paddusw
    304   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    305   ret <8 x i16> %res
    306 }
    307 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    308 
    309 
    310 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    311   ; CHECK: pavgb
    312   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    313   ret <16 x i8> %res
    314 }
    315 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    316 
    317 
    318 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    319   ; CHECK: pavgw
    320   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    321   ret <8 x i16> %res
    322 }
    323 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    324 
    325 
    326 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    327   ; CHECK: pmaddwd
    328   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    329   ret <4 x i32> %res
    330 }
    331 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    332 
    333 
    334 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    335   ; CHECK: pmaxsw
    336   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    337   ret <8 x i16> %res
    338 }
    339 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    340 
    341 
    342 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    343   ; CHECK: pmaxub
    344   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    345   ret <16 x i8> %res
    346 }
    347 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    348 
    349 
    350 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    351   ; CHECK: pminsw
    352   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    353   ret <8 x i16> %res
    354 }
    355 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    356 
    357 
    358 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    359   ; CHECK: pminub
    360   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    361   ret <16 x i8> %res
    362 }
    363 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    364 
    365 
    366 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    367   ; CHECK: pmovmskb
    368   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    369   ret i32 %res
    370 }
    371 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    372 
    373 
    374 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    375   ; CHECK: pmulhw
    376   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    377   ret <8 x i16> %res
    378 }
    379 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    380 
    381 
    382 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    383   ; CHECK: pmulhuw
    384   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    385   ret <8 x i16> %res
    386 }
    387 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    388 
    389 
    390 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    391   ; CHECK: pmuludq
    392   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    393   ret <2 x i64> %res
    394 }
    395 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    396 
    397 
    398 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    399   ; CHECK: psadbw
    400   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    401   ret <2 x i64> %res
    402 }
    403 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    404 
    405 
    406 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    407   ; CHECK: pslld
    408   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    409   ret <4 x i32> %res
    410 }
    411 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    412 
    413 
    414 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    415   ; CHECK: psllq
    416   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    417   ret <2 x i64> %res
    418 }
    419 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    420 
    421 
    422 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    423   ; CHECK: psllw
    424   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    425   ret <8 x i16> %res
    426 }
    427 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    428 
    429 
    430 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    431   ; CHECK: pslld
    432   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    433   ret <4 x i32> %res
    434 }
    435 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    436 
    437 
    438 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    439   ; CHECK: psllq
    440   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    441   ret <2 x i64> %res
    442 }
    443 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    444 
    445 
    446 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    447   ; CHECK: psllw
    448   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    449   ret <8 x i16> %res
    450 }
    451 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    452 
    453 
    454 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    455   ; CHECK: psrad
    456   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    457   ret <4 x i32> %res
    458 }
    459 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    460 
    461 
    462 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    463   ; CHECK: psraw
    464   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    465   ret <8 x i16> %res
    466 }
    467 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    468 
    469 
    470 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    471   ; CHECK: psrad
    472   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    473   ret <4 x i32> %res
    474 }
    475 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    476 
    477 
    478 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    479   ; CHECK: psraw
    480   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    481   ret <8 x i16> %res
    482 }
    483 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    484 
    485 
    486 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    487   ; CHECK: psrld
    488   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    489   ret <4 x i32> %res
    490 }
    491 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    492 
    493 
    494 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    495   ; CHECK: psrlq
    496   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    497   ret <2 x i64> %res
    498 }
    499 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    500 
    501 
    502 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    503   ; CHECK: psrlw
    504   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    505   ret <8 x i16> %res
    506 }
    507 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    508 
    509 
    510 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    511   ; CHECK: psrld
    512   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    513   ret <4 x i32> %res
    514 }
    515 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    516 
    517 
    518 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    519   ; CHECK: psrlq
    520   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    521   ret <2 x i64> %res
    522 }
    523 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    524 
    525 
    526 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    527   ; CHECK: psrlw
    528   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    529   ret <8 x i16> %res
    530 }
    531 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    532 
    533 
    534 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    535   ; CHECK: psubsb
    536   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    537   ret <16 x i8> %res
    538 }
    539 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    540 
    541 
    542 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    543   ; CHECK: psubsw
    544   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    545   ret <8 x i16> %res
    546 }
    547 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    548 
    549 
    550 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    551   ; CHECK: psubusb
    552   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    553   ret <16 x i8> %res
    554 }
    555 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    556 
    557 
    558 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    559   ; CHECK: psubusw
    560   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    561   ret <8 x i16> %res
    562 }
    563 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    564 
    565 
    566 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    567   ; CHECK: sqrtpd
    568   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    569   ret <2 x double> %res
    570 }
    571 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    572 
    573 
    574 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    575   ; CHECK: sqrtsd
    576   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    577   ret <2 x double> %res
    578 }
    579 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    580 
    581 
    582 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    583   ; CHECK: test_x86_sse2_storel_dq
    584   ; CHECK: movl
    585   ; CHECK: movlps
    586   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    587   ret void
    588 }
    589 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    590 
    591 
    592 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    593   ; CHECK: test_x86_sse2_storeu_dq
    594   ; CHECK: movl
    595   ; CHECK: movdqu
    596   ; add operation forces the execution domain.
    597   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    598   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    599   ret void
    600 }
    601 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    602 
    603 
    604 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    605   ; CHECK: test_x86_sse2_storeu_pd
    606   ; CHECK: movl
    607   ; CHECK: movupd
    608   ; fadd operation forces the execution domain.
    609   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    610   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    611   ret void
    612 }
    613 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    614 
    615 
    616 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    617   ; CHECK: test_x86_sse2_sub_sd
    618   ; CHECK: subsd
    619   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    620   ret <2 x double> %res
    621 }
    622 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    623 
    624 
    625 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    626   ; CHECK: ucomisd
    627   ; CHECK: sete
    628   ; CHECK: movzbl
    629   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    630   ret i32 %res
    631 }
    632 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    633 
    634 
    635 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    636   ; CHECK: ucomisd
    637   ; CHECK: setae
    638   ; CHECK: movzbl
    639   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    640   ret i32 %res
    641 }
    642 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    643 
    644 
    645 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    646   ; CHECK: ucomisd
    647   ; CHECK: seta
    648   ; CHECK: movzbl
    649   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    650   ret i32 %res
    651 }
    652 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    653 
    654 
    655 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    656   ; CHECK: ucomisd
    657   ; CHECK: setbe
    658   ; CHECK: movzbl
    659   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    660   ret i32 %res
    661 }
    662 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    663 
    664 
    665 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    666   ; CHECK: ucomisd
    667   ; CHECK: sbbl
    668   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    669   ret i32 %res
    670 }
    671 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    672 
    673 
    674 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    675   ; CHECK: ucomisd
    676   ; CHECK: setne
    677   ; CHECK: movzbl
    678   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    679   ret i32 %res
    680 }
    681 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    682 
    683 define void @test_x86_sse2_pause() {
    684   ; CHECK: pause
    685   tail call void @llvm.x86.sse2.pause()
    686   ret void 
    687 }
    688 declare void @llvm.x86.sse2.pause() nounwind
    689 
    690 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
    691 ; CHECK-LABEL: test_x86_sse2_pshuf_d:
    692 ; CHECK: pshufd $27
    693 entry:
    694    %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
    695    ret <4 x i32> %res
    696 }
    697 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
    698 
    699 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
    700 ; CHECK-LABEL: test_x86_sse2_pshufl_w:
    701 ; CHECK: pshuflw $27
    702 entry:
    703    %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
    704    ret <8 x i16> %res
    705 }
    706 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
    707 
    708 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
    709 ; CHECK-LABEL: test_x86_sse2_pshufh_w:
    710 ; CHECK: pshufhw $27
    711 entry:
    712    %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
    713    ret <8 x i16> %res
    714 }
    715 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
    716