Home | History | Annotate | Download | only in X86
      1 ; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s --check-prefix=AVX2
      2 ; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck %s --check-prefix=KNL
      3 ; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze < %s | FileCheck %s --check-prefix=SKX
      4 
      5 
      6 ; AVX2-LABEL: test1
      7 ; AVX2: Found an estimated cost of 4 {{.*}}.masked
      8 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
      9   %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     10   %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
     11   ret <2 x double> %res
     12 }
     13 
     14 ; AVX2-LABEL: test2
     15 ; AVX2: Found an estimated cost of 4 {{.*}}.masked
     16 define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
     17   %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     18   %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
     19   ret <4 x i32> %res
     20 }
     21 
     22 ; AVX2-LABEL: test3
     23 ; AVX2: Found an estimated cost of 4 {{.*}}.masked
     24 define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
     25   %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     26   call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
     27   ret void
     28 }
     29 
     30 ; AVX2-LABEL: test4
     31 ; AVX2: Found an estimated cost of 4 {{.*}}.masked
     32 define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
     33   %mask = icmp eq <8 x i32> %trigger, zeroinitializer
     34   %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
     35   ret <8 x float> %res
     36 }
     37 
     38 ; AVX2-LABEL: test5
     39 ; AVX2: Found an estimated cost of 5 {{.*}}.masked
     40 define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
     41   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
     42   call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
     43   ret void
     44 }
     45 
     46 ; AVX2-LABEL: test6
     47 ; AVX2: Found an estimated cost of 6 {{.*}}.masked
     48 define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
     49   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
     50   call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
     51   ret void
     52 }
     53 
     54 ; AVX2-LABEL: test7
     55 ; AVX2: Found an estimated cost of 5 {{.*}}.masked
     56 define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
     57   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
     58   %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
     59   ret <2 x float> %res
     60 }
     61 
     62 ; AVX2-LABEL: test8
     63 ; AVX2: Found an estimated cost of 6 {{.*}}.masked
     64 define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
     65   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
     66   %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
     67   ret <2 x i32> %res
     68 }
     69 
     70 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
     71 
     72 ; AVX2-LABEL: test_gather_2f64
     73 ; AVX2: Found an estimated cost of 7 {{.*}}.gather
     74 
     75 ; KNL-LABEL: test_gather_2f64
     76 ; KNL: Found an estimated cost of 7 {{.*}}.gather
     77 
     78 ; SKX-LABEL: test_gather_2f64
     79 ; SKX: Found an estimated cost of 7 {{.*}}.gather
     80 
     81 %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
     82   ret <2 x double> %res
     83 }
     84 declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
     85 
     86 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
     87 
     88 ; AVX2-LABEL: test_gather_4i32
     89 ; AVX2: Found an estimated cost of 16 {{.*}}.gather
     90 
     91 ; KNL-LABEL: test_gather_4i32
     92 ; KNL: Found an estimated cost of 16 {{.*}}.gather
     93 
     94 ; SKX-LABEL: test_gather_4i32
     95 ; SKX: Found an estimated cost of 6 {{.*}}.gather
     96 
     97 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
     98   ret <4 x i32> %res
     99 }
    100 
    101 define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)  {
    102 
    103 ; AVX2-LABEL: test_gather_4i32_const_mask
    104 ; AVX2: Found an estimated cost of 8 {{.*}}.gather
    105 
    106 ; KNL-LABEL: test_gather_4i32_const_mask
    107 ; KNL: Found an estimated cost of 8 {{.*}}.gather
    108 
    109 ; SKX-LABEL: test_gather_4i32_const_mask
    110 ; SKX: Found an estimated cost of 6 {{.*}}.gather
    111 
    112 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    113   ret <4 x i32> %res
    114 }
    115 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
    116 
    117 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
    118 
    119 ; AVX2-LABEL: test_gather_16f32_const_mask
    120 ; AVX2: Found an estimated cost of 30 {{.*}}.gather
    121 
    122 ; KNL-LABEL: test_gather_16f32_const_mask
    123 ; KNL: Found an estimated cost of 18 {{.*}}.gather
    124 
    125 ; SKX-LABEL: test_gather_16f32_const_mask
    126 ; SKX: Found an estimated cost of 18 {{.*}}.gather
    127 
    128   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    129   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    130 
    131   %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    132   ret <16 x float>%res
    133 }
    134 
    135 define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
    136 
    137 ; AVX2-LABEL: test_gather_16f32_var_mask
    138 ; AVX2: Found an estimated cost of 62 {{.*}}.gather
    139 
    140 ; KNL-LABEL: test_gather_16f32_var_mask
    141 ; KNL: Found an estimated cost of 18 {{.*}}.gather
    142 
    143 ; SKX-LABEL: test_gather_16f32_var_mask
    144 ; SKX: Found an estimated cost of 18 {{.*}}.gather
    145 
    146   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    147   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    148 
    149   %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    150   ret <16 x float>%res
    151 }
    152 
    153 define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
    154 
    155 ; AVX2-LABEL: test_gather_16f32_ra_var_mask
    156 ; AVX2: Found an estimated cost of 62 {{.*}}.gather
    157 
    158 ; KNL-LABEL: test_gather_16f32_ra_var_mask
    159 ; KNL: Found an estimated cost of 20 {{.*}}.gather
    160 
    161 ; SKX-LABEL: test_gather_16f32_ra_var_mask
    162 ; SKX: Found an estimated cost of 20 {{.*}}.gather
    163 
    164   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    165   %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    166 
    167   %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    168   ret <16 x float>%res
    169 }
    170 
    171 define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
    172 
    173 ; AVX2-LABEL: test_gather_16f32_const_mask2
    174 ; AVX2: Found an estimated cost of 30 {{.*}}.gather
    175 
    176 ; KNL-LABEL: test_gather_16f32_const_mask2
    177 ; KNL: Found an estimated cost of 18 {{.*}}.gather
    178 
    179 ; SKX-LABEL: test_gather_16f32_const_mask2
    180 ; SKX: Found an estimated cost of 18 {{.*}}.gather
    181 
    182   %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    183   %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    184 
    185   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    186   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    187 
    188   %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    189   ret <16 x float>%res
    190 }
    191 
    192 define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
    193 ; AVX2-LABEL: test_scatter_16i32
    194 ; AVX2: Found an estimated cost of 64 {{.*}}.scatter
    195 
    196 ; KNL-LABEL: test_scatter_16i32
    197 ; KNL: Found an estimated cost of 18 {{.*}}.scatter
    198 
    199 ; SKX-LABEL: test_scatter_16i32
    200 ; SKX: Found an estimated cost of 18 {{.*}}.scatter
    201 
    202   %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    203   %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    204 
    205   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    206   %imask = bitcast i16 %mask to <16 x i1>
    207   call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    208   ret void
    209 }
    210 
    211 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
    212 ; AVX2-LABEL: test_scatter_8i32
    213 ; AVX2: Found an estimated cost of 32 {{.*}}.scatter
    214 
    215 ; KNL-LABEL: test_scatter_8i32
    216 ; KNL: Found an estimated cost of 10 {{.*}}.scatter
    217 
    218 ; SKX-LABEL: test_scatter_8i32
    219 ; SKX: Found an estimated cost of 10 {{.*}}.scatter
    220 
    221   call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    222   ret void
    223 }
    224 
    225 declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
    226 
    227 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
    228 ; AVX2-LABEL: test_scatter_4i32
    229 ; AVX2: Found an estimated cost of 16 {{.*}}.scatter
    230 
    231 ; KNL-LABEL: test_scatter_4i32
    232 ; KNL: Found an estimated cost of 16 {{.*}}.scatter
    233 
    234 ; SKX-LABEL: test_scatter_4i32
    235 ; SKX: Found an estimated cost of 6 {{.*}}.scatter
    236 
    237   call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    238   ret void
    239 }
    240 
    241 define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
    242 
    243 ; AVX2-LABEL: test_gather_4f32
    244 ; AVX2: Found an estimated cost of 15 {{.*}}.gather
    245 
    246 ; KNL-LABEL: test_gather_4f32
    247 ; KNL: Found an estimated cost of 15 {{.*}}.gather
    248 
    249 ; SKX-LABEL: test_gather_4f32
    250 ; SKX: Found an estimated cost of 6 {{.*}}.gather
    251 
    252   %sext_ind = sext <4 x i32> %ind to <4 x i64>
    253   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    254 
    255   %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    256   ret <4 x float>%res
    257 }
    258 
    259 define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
    260 
    261 ; AVX2-LABEL: test_gather_4f32_const_mask
    262 ; AVX2: Found an estimated cost of 7 {{.*}}.gather
    263 
    264 ; KNL-LABEL: test_gather_4f32_const_mask
    265 ; KNL: Found an estimated cost of 7 {{.*}}.gather
    266 
    267 ; SKX-LABEL: test_gather_4f32_const_mask
    268 ; SKX: Found an estimated cost of 6 {{.*}}.gather
    269 
    270   %sext_ind = sext <4 x i32> %ind to <4 x i64>
    271   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    272 
    273   %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    274   ret <4 x float>%res
    275 }
    276 
    277 declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
    278 declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
    279 declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
    280 declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
    281 
    282 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
    283 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
    284 declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
    285 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
    286 declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
    287 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
    288 declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
    289 declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
    290 declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
    291 declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
    292 declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
    293 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
    294 declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
    295 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
    296 declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
    297 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
    298 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
    299 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
    300 declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
    301