1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze | FileCheck %s --check-prefix=AVX2 3 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze | FileCheck %s --check-prefix=SKL 4 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze | FileCheck %s --check-prefix=KNL 5 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze | FileCheck %s --check-prefix=SKX 6 7 8 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { 9 ; AVX2-LABEL: 'test1' 10 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer 11 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) 12 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 13 ; 14 ; SKL-LABEL: 'test1' 15 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer 16 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) 17 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 18 ; 19 ; KNL-LABEL: 'test1' 20 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer 21 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) 22 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 23 ; 24 ; SKX-LABEL: 'test1' 25 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer 26 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) 27 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 28 ; 29 %mask = icmp eq <2 x i64> %trigger, zeroinitializer 30 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) 31 ret <2 x double> %res 32 } 33 34 define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { 35 ; AVX2-LABEL: 'test2' 36 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 37 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) 38 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 39 ; 40 ; SKL-LABEL: 'test2' 41 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 42 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) 43 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 44 ; 45 ; KNL-LABEL: 'test2' 46 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 47 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) 48 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 49 ; 50 ; SKX-LABEL: 'test2' 51 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 52 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) 53 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 54 ; 55 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 56 %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) 57 ret <4 x i32> %res 58 } 59 60 define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { 61 ; AVX2-LABEL: 'test3' 62 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 63 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 65 ; 66 ; SKL-LABEL: 'test3' 67 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 68 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) 69 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 70 ; 71 ; KNL-LABEL: 'test3' 72 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 73 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) 74 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 75 ; 76 ; SKX-LABEL: 'test3' 77 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer 78 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) 79 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 80 ; 81 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 82 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask) 83 ret void 84 } 85 86 define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { 87 ; AVX2-LABEL: 'test4' 88 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer 89 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) 90 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res 91 ; 92 ; SKL-LABEL: 'test4' 93 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer 94 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) 95 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res 96 ; 97 ; KNL-LABEL: 'test4' 98 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer 99 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) 100 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res 101 ; 102 ; SKX-LABEL: 'test4' 103 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer 104 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) 105 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res 106 ; 107 %mask = icmp eq <8 x i32> %trigger, zeroinitializer 108 %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) 109 ret <8 x float> %res 110 } 111 112 define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { 113 ; AVX2-LABEL: 'test5' 114 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 115 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) 116 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 117 ; 118 ; SKL-LABEL: 'test5' 119 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 120 ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) 121 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 122 ; 123 ; KNL-LABEL: 'test5' 124 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 125 ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) 126 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 127 ; 128 ; SKX-LABEL: 'test5' 129 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 130 ; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) 131 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 132 ; 133 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 134 call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask) 135 ret void 136 } 137 138 define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { 139 ; AVX2-LABEL: 'test6' 140 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 141 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) 142 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 143 ; 144 ; SKL-LABEL: 'test6' 145 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 146 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) 147 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 148 ; 149 ; KNL-LABEL: 'test6' 150 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 151 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) 152 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 153 ; 154 ; SKX-LABEL: 'test6' 155 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 156 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) 157 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 158 ; 159 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 160 call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) 161 ret void 162 } 163 164 define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { 165 ; AVX2-LABEL: 'test7' 166 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 167 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) 168 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res 169 ; 170 ; SKL-LABEL: 'test7' 171 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 172 ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) 173 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res 174 ; 175 ; KNL-LABEL: 'test7' 176 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 177 ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) 178 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res 179 ; 180 ; SKX-LABEL: 'test7' 181 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 182 ; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) 183 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res 184 ; 185 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 186 %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) 187 ret <2 x float> %res 188 } 189 190 define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { 191 ; AVX2-LABEL: 'test8' 192 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 193 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) 194 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res 195 ; 196 ; SKL-LABEL: 'test8' 197 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 198 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) 199 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res 200 ; 201 ; KNL-LABEL: 'test8' 202 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 203 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) 204 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res 205 ; 206 ; SKX-LABEL: 'test8' 207 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer 208 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) 209 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res 210 ; 211 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 212 %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) 213 ret <2 x i32> %res 214 } 215 216 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { 217 ; AVX2-LABEL: 'test_gather_2f64' 218 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 219 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 220 ; 221 ; SKL-LABEL: 'test_gather_2f64' 222 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 223 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 224 ; 225 ; KNL-LABEL: 'test_gather_2f64' 226 ; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 227 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 228 ; 229 ; SKX-LABEL: 'test_gather_2f64' 230 ; SKX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 231 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res 232 ; 233 234 235 236 237 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 238 ret <2 x double> %res 239 } 240 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) 241 242 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { 243 ; AVX2-LABEL: 'test_gather_4i32' 244 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 245 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 246 ; 247 ; SKL-LABEL: 'test_gather_4i32' 248 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 249 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 250 ; 251 ; KNL-LABEL: 'test_gather_4i32' 252 ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 253 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 254 ; 255 ; SKX-LABEL: 'test_gather_4i32' 256 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 257 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 258 ; 259 260 261 262 263 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 264 ret <4 x i32> %res 265 } 266 267 define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) { 268 ; AVX2-LABEL: 'test_gather_4i32_const_mask' 269 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 270 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 271 ; 272 ; SKL-LABEL: 'test_gather_4i32_const_mask' 273 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 274 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 275 ; 276 ; KNL-LABEL: 'test_gather_4i32_const_mask' 277 ; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 278 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 279 ; 280 ; SKX-LABEL: 'test_gather_4i32_const_mask' 281 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 282 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res 283 ; 284 285 286 287 288 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 289 ret <4 x i32> %res 290 } 291 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) 292 293 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { 294 ; AVX2-LABEL: 'test_gather_16f32_const_mask' 295 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 296 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 297 ; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 298 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 299 ; 300 ; SKL-LABEL: 'test_gather_16f32_const_mask' 301 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 302 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 303 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 304 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 305 ; 306 ; KNL-LABEL: 'test_gather_16f32_const_mask' 307 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 308 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 309 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 310 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 311 ; 312 ; SKX-LABEL: 'test_gather_16f32_const_mask' 313 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 314 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 315 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 316 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 317 ; 318 %sext_ind = sext <16 x i32> %ind to <16 x i64> 319 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 320 321 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 322 ret <16 x float>%res 323 } 324 325 define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { 326 ; AVX2-LABEL: 'test_gather_16f32_var_mask' 327 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 328 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 329 ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 330 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 331 ; 332 ; SKL-LABEL: 'test_gather_16f32_var_mask' 333 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 334 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 335 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 336 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 337 ; 338 ; KNL-LABEL: 'test_gather_16f32_var_mask' 339 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 340 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 341 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 342 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 343 ; 344 ; SKX-LABEL: 'test_gather_16f32_var_mask' 345 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 346 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 347 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 348 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 349 ; 350 %sext_ind = sext <16 x i32> %ind to <16 x i64> 351 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 352 353 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 354 ret <16 x float>%res 355 } 356 357 define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { 358 ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' 359 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 360 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 361 ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 362 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 363 ; 364 ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' 365 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 366 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 367 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 368 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 369 ; 370 ; KNL-LABEL: 'test_gather_16f32_ra_var_mask' 371 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 372 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 373 ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 374 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 375 ; 376 ; SKX-LABEL: 'test_gather_16f32_ra_var_mask' 377 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 378 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 379 ; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 380 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 381 ; 382 %sext_ind = sext <16 x i32> %ind to <16 x i64> 383 %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 384 385 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 386 ret <16 x float>%res 387 } 388 389 define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) { 390 ; AVX2-LABEL: 'test_gather_16f32_const_mask2' 391 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 392 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 393 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 394 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 395 ; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 396 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 397 ; 398 ; SKL-LABEL: 'test_gather_16f32_const_mask2' 399 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 400 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 401 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 402 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 403 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 404 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 405 ; 406 ; KNL-LABEL: 'test_gather_16f32_const_mask2' 407 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 408 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 409 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 410 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 411 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 412 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 413 ; 414 ; SKX-LABEL: 'test_gather_16f32_const_mask2' 415 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 416 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 417 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> 418 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 419 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 420 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res 421 ; 422 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 423 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 424 425 %sext_ind = sext <16 x i32> %ind to <16 x i64> 426 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 427 428 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 429 ret <16 x float>%res 430 } 431 432 define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { 433 ; AVX2-LABEL: 'test_scatter_16i32' 434 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 435 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 436 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 437 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> 438 ; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 439 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 440 ; 441 ; SKL-LABEL: 'test_scatter_16i32' 442 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 443 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 444 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 445 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> 446 ; SKL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 447 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 448 ; 449 ; KNL-LABEL: 'test_scatter_16i32' 450 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 451 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 452 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 453 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1> 454 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 455 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 456 ; 457 ; SKX-LABEL: 'test_scatter_16i32' 458 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 459 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 460 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 461 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1> 462 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 463 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 464 ; 465 %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 466 %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 467 468 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 469 %imask = bitcast i16 %mask to <16 x i1> 470 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 471 ret void 472 } 473 474 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { 475 ; AVX2-LABEL: 'test_scatter_8i32' 476 ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 477 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 478 ; 479 ; SKL-LABEL: 'test_scatter_8i32' 480 ; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 481 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 482 ; 483 ; KNL-LABEL: 'test_scatter_8i32' 484 ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 485 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 486 ; 487 ; SKX-LABEL: 'test_scatter_8i32' 488 ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 489 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 490 ; 491 492 call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 493 ret void 494 } 495 496 declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) 497 498 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { 499 ; AVX2-LABEL: 'test_scatter_4i32' 500 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 501 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 502 ; 503 ; SKL-LABEL: 'test_scatter_4i32' 504 ; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 505 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 506 ; 507 ; KNL-LABEL: 'test_scatter_4i32' 508 ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 509 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 510 ; 511 ; SKX-LABEL: 'test_scatter_4i32' 512 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 513 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 514 ; 515 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 516 ret void 517 } 518 519 define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) { 520 ; AVX2-LABEL: 'test_gather_4f32' 521 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 522 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 523 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 524 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 525 ; 526 ; SKL-LABEL: 'test_gather_4f32' 527 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 528 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 529 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 530 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 531 ; 532 ; KNL-LABEL: 'test_gather_4f32' 533 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 534 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 535 ; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 536 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 537 ; 538 ; SKX-LABEL: 'test_gather_4f32' 539 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 540 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 541 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 542 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 543 ; 544 %sext_ind = sext <4 x i32> %ind to <4 x i64> 545 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 546 547 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 548 ret <4 x float>%res 549 } 550 551 define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { 552 ; AVX2-LABEL: 'test_gather_4f32_const_mask' 553 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 554 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 555 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 556 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 557 ; 558 ; SKL-LABEL: 'test_gather_4f32_const_mask' 559 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 560 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 561 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 562 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 563 ; 564 ; KNL-LABEL: 'test_gather_4f32_const_mask' 565 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 566 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 567 ; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 568 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 569 ; 570 ; SKX-LABEL: 'test_gather_4f32_const_mask' 571 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> 572 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 573 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 574 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res 575 ; 576 %sext_ind = sext <4 x i32> %ind to <4 x i64> 577 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 578 579 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 580 ret <4 x float>%res 581 } 582 583 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) 584 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) 585 declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) 586 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) 587 588 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) 589 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) 590 declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) 591 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) 592 declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) 593 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) 594 declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>) 595 declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) 596 declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) 597 declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) 598 declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) 599 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) 600 declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>) 601 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) 602 declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>) 603 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) 604 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) 605 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) 606 declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) 607