Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
      2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
      3 
      4 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
      5   ; CHECK: blendpd
      6   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
      7   ret <2 x double> %res
      8 }
      9 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
     10 
     11 
     12 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
     13   ; CHECK: blendps
     14   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
     15   ret <4 x float> %res
     16 }
     17 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
     18 
     19 
     20 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     21   ; CHECK: blendvpd
     22   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
     23   ret <2 x double> %res
     24 }
     25 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
     26 
     27 
     28 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     29   ; CHECK: blendvps
     30   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
     31   ret <4 x float> %res
     32 }
     33 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
     34 
     35 
     36 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
     37   ; CHECK: dppd
     38   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     39   ret <2 x double> %res
     40 }
     41 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
     42 
     43 
     44 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
     45   ; CHECK: dpps
     46   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
     47   ret <4 x float> %res
     48 }
     49 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
     50 
     51 
     52 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
     53   ; CHECK: insertps
     54   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
     55   ret <4 x float> %res
     56 }
     57 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
     58 
     59 
     60 
     61 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
     62   ; CHECK: mpsadbw
     63   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
     64   ret <8 x i16> %res
     65 }
     66 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
     67 
     68 
     69 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
     70   ; CHECK: packusdw
     71   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
     72   ret <8 x i16> %res
     73 }
     74 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
     75 
     76 
     77 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
     78   ; CHECK: pblendvb
     79   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
     80   ret <16 x i8> %res
     81 }
     82 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
     83 
     84 
     85 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
     86   ; CHECK: pblendw
     87   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
     88   ret <8 x i16> %res
     89 }
     90 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
     91 
     92 
     93 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
     94   ; CHECK: phminposuw
     95   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
     96   ret <8 x i16> %res
     97 }
     98 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
     99 
    100 
    101 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
    102   ; CHECK: pmaxsb
    103   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    104   ret <16 x i8> %res
    105 }
    106 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
    107 
    108 
    109 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
    110   ; CHECK: pmaxsd
    111   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    112   ret <4 x i32> %res
    113 }
    114 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    115 
    116 
    117 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
    118   ; CHECK: pmaxud
    119   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    120   ret <4 x i32> %res
    121 }
    122 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    123 
    124 
    125 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
    126   ; CHECK: pmaxuw
    127   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    128   ret <8 x i16> %res
    129 }
    130 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
    131 
    132 
    133 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
    134   ; CHECK: pminsb
    135   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    136   ret <16 x i8> %res
    137 }
    138 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
    139 
    140 
    141 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
    142   ; CHECK: pminsd
    143   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    144   ret <4 x i32> %res
    145 }
    146 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    147 
    148 
    149 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
    150   ; CHECK: pminud
    151   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    152   ret <4 x i32> %res
    153 }
    154 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    155 
    156 
    157 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
    158   ; CHECK: pminuw
    159   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    160   ret <8 x i16> %res
    161 }
    162 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
    163 
    164 
    165 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
    166   ; CHECK: pmovzxbd
    167   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    168   ret <4 x i32> %res
    169 }
    170 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
    171 
    172 
    173 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
    174   ; CHECK: pmovzxbq
    175   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    176   ret <2 x i64> %res
    177 }
    178 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
    179 
    180 
    181 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
    182   ; CHECK: pmovzxbw
    183   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    184   ret <8 x i16> %res
    185 }
    186 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
    187 
    188 
    189 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
    190   ; CHECK: pmovzxdq
    191   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    192   ret <2 x i64> %res
    193 }
    194 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
    195 
    196 
    197 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
    198   ; CHECK: pmovzxwd
    199   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    200   ret <4 x i32> %res
    201 }
    202 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
    203 
    204 
    205 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
    206   ; CHECK: pmovzxwq
    207   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    208   ret <2 x i64> %res
    209 }
    210 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
    211 
    212 
    213 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
    214   ; CHECK: pmuldq
    215   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    216   ret <2 x i64> %res
    217 }
    218 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
    219 
    220 
    221 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
    222   ; CHECK: ptest 
    223   ; CHECK: sbbl
    224   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    225   ret i32 %res
    226 }
    227 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
    228 
    229 
    230 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
    231   ; CHECK: ptest 
    232   ; CHECK: seta
    233   ; CHECK: movzbl
    234   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    235   ret i32 %res
    236 }
    237 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
    238 
    239 
    240 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
    241   ; CHECK: ptest 
    242   ; CHECK: sete
    243   ; CHECK: movzbl
    244   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    245   ret i32 %res
    246 }
    247 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
    248 
    249 
    250 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
    251   ; CHECK: roundpd
    252   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    253   ret <2 x double> %res
    254 }
    255 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
    256 
    257 
    258 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
    259   ; CHECK: roundps
    260   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
    261   ret <4 x float> %res
    262 }
    263 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
    264 
    265 
    266 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
    267   ; CHECK: roundsd
    268   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    269   ret <2 x double> %res
    270 }
    271 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
    272 
    273 
    274 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
    275   ; CHECK: roundss
    276   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    277   ret <4 x float> %res
    278 }
    279 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
    280