Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
      2 
      3 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
      4   ; CHECK: blendpd
      5   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
      6   ret <2 x double> %res
      7 }
      8 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
      9 
     10 
     11 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
     12   ; CHECK: blendps
     13   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
     14   ret <4 x float> %res
     15 }
     16 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
     17 
     18 
     19 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     20   ; CHECK: blendvpd
     21   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
     22   ret <2 x double> %res
     23 }
     24 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
     25 
     26 
     27 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     28   ; CHECK: blendvps
     29   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
     30   ret <4 x float> %res
     31 }
     32 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
     33 
     34 
     35 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
     36   ; CHECK: dppd
     37   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
     38   ret <2 x double> %res
     39 }
     40 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
     41 
     42 
     43 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
     44   ; CHECK: dpps
     45   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
     46   ret <4 x float> %res
     47 }
     48 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
     49 
     50 
     51 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
     52   ; CHECK: insertps
     53   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
     54   ret <4 x float> %res
     55 }
     56 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
     57 
     58 
     59 
     60 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
     61   ; CHECK: mpsadbw
     62   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
     63   ret <8 x i16> %res
     64 }
     65 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
     66 
     67 
     68 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
     69   ; CHECK: packusdw
     70   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
     71   ret <8 x i16> %res
     72 }
     73 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
     74 
     75 
     76 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
     77   ; CHECK: pblendvb
     78   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
     79   ret <16 x i8> %res
     80 }
     81 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
     82 
     83 
     84 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
     85   ; CHECK: pblendw
     86   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
     87   ret <8 x i16> %res
     88 }
     89 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
     90 
     91 
     92 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
     93   ; CHECK: phminposuw
     94   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
     95   ret <8 x i16> %res
     96 }
     97 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
     98 
     99 
    100 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
    101   ; CHECK: pmaxsb
    102   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    103   ret <16 x i8> %res
    104 }
    105 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
    106 
    107 
    108 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
    109   ; CHECK: pmaxsd
    110   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    111   ret <4 x i32> %res
    112 }
    113 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    114 
    115 
    116 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
    117   ; CHECK: pmaxud
    118   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    119   ret <4 x i32> %res
    120 }
    121 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    122 
    123 
    124 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
    125   ; CHECK: pmaxuw
    126   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    127   ret <8 x i16> %res
    128 }
    129 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
    130 
    131 
    132 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
    133   ; CHECK: pminsb
    134   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    135   ret <16 x i8> %res
    136 }
    137 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
    138 
    139 
    140 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
    141   ; CHECK: pminsd
    142   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    143   ret <4 x i32> %res
    144 }
    145 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    146 
    147 
    148 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
    149   ; CHECK: pminud
    150   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    151   ret <4 x i32> %res
    152 }
    153 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    154 
    155 
    156 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
    157   ; CHECK: pminuw
    158   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    159   ret <8 x i16> %res
    160 }
    161 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
    162 
    163 
    164 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
    165   ; CHECK: pmovsxbd
    166   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    167   ret <4 x i32> %res
    168 }
    169 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
    170 
    171 
    172 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
    173   ; CHECK: pmovsxbq
    174   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    175   ret <2 x i64> %res
    176 }
    177 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
    178 
    179 
    180 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
    181   ; CHECK: pmovsxbw
    182   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    183   ret <8 x i16> %res
    184 }
    185 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
    186 
    187 
    188 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
    189   ; CHECK: pmovsxdq
    190   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    191   ret <2 x i64> %res
    192 }
    193 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
    194 
    195 
    196 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
    197   ; CHECK: pmovsxwd
    198   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    199   ret <4 x i32> %res
    200 }
    201 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
    202 
    203 
    204 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
    205   ; CHECK: pmovsxwq
    206   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    207   ret <2 x i64> %res
    208 }
    209 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
    210 
    211 
    212 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
    213   ; CHECK: pmovzxbd
    214   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    215   ret <4 x i32> %res
    216 }
    217 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
    218 
    219 
    220 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
    221   ; CHECK: pmovzxbq
    222   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    223   ret <2 x i64> %res
    224 }
    225 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
    226 
    227 
    228 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
    229   ; CHECK: pmovzxbw
    230   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    231   ret <8 x i16> %res
    232 }
    233 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
    234 
    235 
    236 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
    237   ; CHECK: pmovzxdq
    238   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    239   ret <2 x i64> %res
    240 }
    241 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
    242 
    243 
    244 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
    245   ; CHECK: pmovzxwd
    246   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    247   ret <4 x i32> %res
    248 }
    249 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
    250 
    251 
    252 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
    253   ; CHECK: pmovzxwq
    254   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    255   ret <2 x i64> %res
    256 }
    257 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
    258 
    259 
    260 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
    261   ; CHECK: pmuldq
    262   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    263   ret <2 x i64> %res
    264 }
    265 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
    266 
    267 
    268 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
    269   ; CHECK: ptest 
    270   ; CHECK: sbbl
    271   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    272   ret i32 %res
    273 }
    274 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
    275 
    276 
    277 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
    278   ; CHECK: ptest 
    279   ; CHECK: seta
    280   ; CHECK: movzbl
    281   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    282   ret i32 %res
    283 }
    284 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
    285 
    286 
    287 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
    288   ; CHECK: ptest 
    289   ; CHECK: sete
    290   ; CHECK: movzbl
    291   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    292   ret i32 %res
    293 }
    294 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
    295 
    296 
    297 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
    298   ; CHECK: roundpd
    299   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    300   ret <2 x double> %res
    301 }
    302 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
    303 
    304 
    305 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
    306   ; CHECK: roundps
    307   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
    308   ret <4 x float> %res
    309 }
    310 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
    311 
    312 
    313 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
    314   ; CHECK: roundsd
    315   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    316   ret <2 x double> %res
    317 }
    318 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
    319 
    320 
    321 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
    322   ; CHECK: roundss
    323   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    324   ret <4 x float> %res
    325 }
    326 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
    327