Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
      2 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze | FileCheck %s --check-prefix=AVX2
      3 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze | FileCheck %s --check-prefix=SKL
      4 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze | FileCheck %s --check-prefix=KNL
      5 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze | FileCheck %s --check-prefix=SKX
      6 
      7 
      8 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
      9 ; AVX2-LABEL: 'test1'
     10 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     11 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
     12 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
     13 ;
     14 ; SKL-LABEL: 'test1'
     15 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     16 ; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
     17 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
     18 ;
     19 ; KNL-LABEL: 'test1'
     20 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     21 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
     22 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
     23 ;
     24 ; SKX-LABEL: 'test1'
     25 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     26 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
     27 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
     28 ;
     29   %mask = icmp eq <2 x i64> %trigger, zeroinitializer
     30   %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
     31   ret <2 x double> %res
     32 }
     33 
     34 define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
     35 ; AVX2-LABEL: 'test2'
     36 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     37 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
     38 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
     39 ;
     40 ; SKL-LABEL: 'test2'
     41 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     42 ; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
     43 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
     44 ;
     45 ; KNL-LABEL: 'test2'
     46 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     47 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
     48 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
     49 ;
     50 ; SKX-LABEL: 'test2'
     51 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     52 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
     53 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
     54 ;
     55   %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     56   %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
     57   ret <4 x i32> %res
     58 }
     59 
     60 define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
     61 ; AVX2-LABEL: 'test3'
     62 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     63 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
     64 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
     65 ;
     66 ; SKL-LABEL: 'test3'
     67 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     68 ; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
     69 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
     70 ;
     71 ; KNL-LABEL: 'test3'
     72 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     73 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
     74 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
     75 ;
     76 ; SKX-LABEL: 'test3'
     77 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     78 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
     79 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
     80 ;
     81   %mask = icmp eq <4 x i32> %trigger, zeroinitializer
     82   call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
     83   ret void
     84 }
     85 
     86 define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
     87 ; AVX2-LABEL: 'test4'
     88 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
     89 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
     90 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
     91 ;
     92 ; SKL-LABEL: 'test4'
     93 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
     94 ; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
     95 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
     96 ;
     97 ; KNL-LABEL: 'test4'
     98 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
     99 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
    100 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
    101 ;
    102 ; SKX-LABEL: 'test4'
    103 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
    104 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
    105 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
    106 ;
    107   %mask = icmp eq <8 x i32> %trigger, zeroinitializer
    108   %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
    109   ret <8 x float> %res
    110 }
    111 
    112 define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
    113 ; AVX2-LABEL: 'test5'
    114 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    115 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
    116 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    117 ;
    118 ; SKL-LABEL: 'test5'
    119 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    120 ; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
    121 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    122 ;
    123 ; KNL-LABEL: 'test5'
    124 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    125 ; KNL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
    126 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    127 ;
    128 ; SKX-LABEL: 'test5'
    129 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    130 ; SKX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
    131 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    132 ;
    133   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    134   call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
    135   ret void
    136 }
    137 
    138 define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
    139 ; AVX2-LABEL: 'test6'
    140 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    141 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
    142 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    143 ;
    144 ; SKL-LABEL: 'test6'
    145 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    146 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
    147 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    148 ;
    149 ; KNL-LABEL: 'test6'
    150 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    151 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
    152 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    153 ;
    154 ; SKX-LABEL: 'test6'
    155 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    156 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
    157 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    158 ;
    159   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    160   call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
    161   ret void
    162 }
    163 
    164 define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
    165 ; AVX2-LABEL: 'test7'
    166 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    167 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
    168 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
    169 ;
    170 ; SKL-LABEL: 'test7'
    171 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    172 ; SKL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
    173 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
    174 ;
    175 ; KNL-LABEL: 'test7'
    176 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    177 ; KNL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
    178 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
    179 ;
    180 ; SKX-LABEL: 'test7'
    181 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    182 ; SKX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
    183 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
    184 ;
    185   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    186   %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
    187   ret <2 x float> %res
    188 }
    189 
    190 define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
    191 ; AVX2-LABEL: 'test8'
    192 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    193 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
    194 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
    195 ;
    196 ; SKL-LABEL: 'test8'
    197 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    198 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
    199 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
    200 ;
    201 ; KNL-LABEL: 'test8'
    202 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    203 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
    204 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
    205 ;
    206 ; SKX-LABEL: 'test8'
    207 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    208 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
    209 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
    210 ;
    211   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    212   %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
    213   ret <2 x i32> %res
    214 }
    215 
    216 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
    217 ; AVX2-LABEL: 'test_gather_2f64'
    218 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
    219 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
    220 ;
    221 ; SKL-LABEL: 'test_gather_2f64'
    222 ; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
    223 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
    224 ;
    225 ; KNL-LABEL: 'test_gather_2f64'
    226 ; KNL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
    227 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
    228 ;
    229 ; SKX-LABEL: 'test_gather_2f64'
    230 ; SKX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
    231 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
    232 ;
    233 
    234 
    235 
    236 
    237 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
    238   ret <2 x double> %res
    239 }
    240 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
    241 
    242 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
    243 ; AVX2-LABEL: 'test_gather_4i32'
    244 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
    245 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    246 ;
    247 ; SKL-LABEL: 'test_gather_4i32'
    248 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
    249 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    250 ;
    251 ; KNL-LABEL: 'test_gather_4i32'
    252 ; KNL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
    253 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    254 ;
    255 ; SKX-LABEL: 'test_gather_4i32'
    256 ; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
    257 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    258 ;
    259 
    260 
    261 
    262 
    263 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
    264   ret <4 x i32> %res
    265 }
    266 
    267 define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)  {
    268 ; AVX2-LABEL: 'test_gather_4i32_const_mask'
    269 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    270 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    271 ;
    272 ; SKL-LABEL: 'test_gather_4i32_const_mask'
    273 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    274 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    275 ;
    276 ; KNL-LABEL: 'test_gather_4i32_const_mask'
    277 ; KNL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    278 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    279 ;
    280 ; SKX-LABEL: 'test_gather_4i32_const_mask'
    281 ; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    282 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
    283 ;
    284 
    285 
    286 
    287 
    288 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
    289   ret <4 x i32> %res
    290 }
    291 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
    292 
    293 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
    294 ; AVX2-LABEL: 'test_gather_16f32_const_mask'
    295 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    296 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    297 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    298 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    299 ;
    300 ; SKL-LABEL: 'test_gather_16f32_const_mask'
    301 ; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    302 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    303 ; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    304 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    305 ;
    306 ; KNL-LABEL: 'test_gather_16f32_const_mask'
    307 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    308 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    309 ; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    310 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    311 ;
    312 ; SKX-LABEL: 'test_gather_16f32_const_mask'
    313 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    314 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    315 ; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    316 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    317 ;
    318   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    319   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    320 
    321   %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    322   ret <16 x float>%res
    323 }
    324 
    325 define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
    326 ; AVX2-LABEL: 'test_gather_16f32_var_mask'
    327 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    328 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    329 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    330 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    331 ;
    332 ; SKL-LABEL: 'test_gather_16f32_var_mask'
    333 ; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    334 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    335 ; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    336 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    337 ;
    338 ; KNL-LABEL: 'test_gather_16f32_var_mask'
    339 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    340 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    341 ; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    342 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    343 ;
    344 ; SKX-LABEL: 'test_gather_16f32_var_mask'
    345 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    346 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    347 ; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    348 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    349 ;
    350   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    351   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
    352 
    353   %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    354   ret <16 x float>%res
    355 }
    356 
    357 define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
    358 ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
    359 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    360 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    361 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    362 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    363 ;
    364 ; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
    365 ; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    366 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    367 ; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    368 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    369 ;
    370 ; KNL-LABEL: 'test_gather_16f32_ra_var_mask'
    371 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    372 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    373 ; KNL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    374 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    375 ;
    376 ; SKX-LABEL: 'test_gather_16f32_ra_var_mask'
    377 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    378 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    379 ; SKX-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    380 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    381 ;
    382   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    383   %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
    384 
    385   %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
    386   ret <16 x float>%res
    387 }
    388 
    389 define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
    390 ; AVX2-LABEL: 'test_gather_16f32_const_mask2'
    391 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    392 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    393 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    394 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    395 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    396 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    397 ;
    398 ; SKL-LABEL: 'test_gather_16f32_const_mask2'
    399 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    400 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    401 ; SKL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    402 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    403 ; SKL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    404 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    405 ;
    406 ; KNL-LABEL: 'test_gather_16f32_const_mask2'
    407 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    408 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    409 ; KNL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    410 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    411 ; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    412 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    413 ;
    414 ; SKX-LABEL: 'test_gather_16f32_const_mask2'
    415 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    416 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    417 ; SKX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
    418 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    419 ; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    420 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
    421 ;
    422   %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
    423   %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
    424 
    425   %sext_ind = sext <16 x i32> %ind to <16 x i64>
    426   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
    427 
    428   %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
    429   ret <16 x float>%res
    430 }
    431 
    432 define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
    433 ; AVX2-LABEL: 'test_scatter_16i32'
    434 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    435 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    436 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    437 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
    438 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    439 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    440 ;
    441 ; SKL-LABEL: 'test_scatter_16i32'
    442 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    443 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    444 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    445 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
    446 ; SKL-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    447 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    448 ;
    449 ; KNL-LABEL: 'test_scatter_16i32'
    450 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    451 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    452 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    453 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
    454 ; KNL-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    455 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    456 ;
    457 ; SKX-LABEL: 'test_scatter_16i32'
    458 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    459 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    460 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    461 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
    462 ; SKX-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    463 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    464 ;
    465   %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
    466   %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
    467 
    468   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
    469   %imask = bitcast i16 %mask to <16 x i1>
    470   call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
    471   ret void
    472 }
    473 
    474 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
    475 ; AVX2-LABEL: 'test_scatter_8i32'
    476 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    477 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    478 ;
    479 ; SKL-LABEL: 'test_scatter_8i32'
    480 ; SKL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    481 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    482 ;
    483 ; KNL-LABEL: 'test_scatter_8i32'
    484 ; KNL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    485 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    486 ;
    487 ; SKX-LABEL: 'test_scatter_8i32'
    488 ; SKX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    489 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    490 ;
    491 
    492   call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
    493   ret void
    494 }
    495 
    496 declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
    497 
    498 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
    499 ; AVX2-LABEL: 'test_scatter_4i32'
    500 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    501 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    502 ;
    503 ; SKL-LABEL: 'test_scatter_4i32'
    504 ; SKL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    505 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    506 ;
    507 ; KNL-LABEL: 'test_scatter_4i32'
    508 ; KNL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    509 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    510 ;
    511 ; SKX-LABEL: 'test_scatter_4i32'
    512 ; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    513 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
    514 ;
    515   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
    516   ret void
    517 }
    518 
    519 define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
    520 ; AVX2-LABEL: 'test_gather_4f32'
    521 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    522 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    523 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    524 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    525 ;
    526 ; SKL-LABEL: 'test_gather_4f32'
    527 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    528 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    529 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    530 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    531 ;
    532 ; KNL-LABEL: 'test_gather_4f32'
    533 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    534 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    535 ; KNL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    536 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    537 ;
    538 ; SKX-LABEL: 'test_gather_4f32'
    539 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    540 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    541 ; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    542 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    543 ;
    544   %sext_ind = sext <4 x i32> %ind to <4 x i64>
    545   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    546 
    547   %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
    548   ret <4 x float>%res
    549 }
    550 
    551 define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
    552 ; AVX2-LABEL: 'test_gather_4f32_const_mask'
    553 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    554 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    555 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    556 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    557 ;
    558 ; SKL-LABEL: 'test_gather_4f32_const_mask'
    559 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    560 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    561 ; SKL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    562 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    563 ;
    564 ; KNL-LABEL: 'test_gather_4f32_const_mask'
    565 ; KNL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    566 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    567 ; KNL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    568 ; KNL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    569 ;
    570 ; SKX-LABEL: 'test_gather_4f32_const_mask'
    571 ; SKX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
    572 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    573 ; SKX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    574 ; SKX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
    575 ;
    576   %sext_ind = sext <4 x i32> %ind to <4 x i64>
    577   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
    578 
    579   %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
    580   ret <4 x float>%res
    581 }
    582 
    583 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
    584 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
    585 declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
    586 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
    587 
    588 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
    589 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
    590 declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
    591 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
    592 declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
    593 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
    594 declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
    595 declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
    596 declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
    597 declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
    598 declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
    599 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
    600 declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
    601 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
    602 declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
    603 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
    604 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
    605 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
    606 declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
    607