1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s 3 4 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) 5 6 define <4 x float> @test_llvm_x86_avx2_gather_d_ps(i8* %b, <4 x i32> %iv, <4 x float> %mask) #0 { 7 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps: 8 ; CHECK: # %bb.0: # %entry 9 ; CHECK-NEXT: movq %rsp, %rax 10 ; CHECK-NEXT: movq $-1, %rcx 11 ; CHECK-NEXT: sarq $63, %rax 12 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 13 ; CHECK-NEXT: orq %rax, %rdi 14 ; CHECK-NEXT: vmovq %rax, %xmm3 15 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 16 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 17 ; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2 18 ; CHECK-NEXT: shlq $47, %rax 19 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 20 ; CHECK-NEXT: orq %rax, %rsp 21 ; CHECK-NEXT: retq 22 entry: 23 %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x float> %mask, i8 1) 24 ret <4 x float> %v 25 } 26 27 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) 28 29 define <4 x float> @test_llvm_x86_avx2_gather_q_ps(i8* %b, <2 x i64> %iv, <4 x float> %mask) #0 { 30 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps: 31 ; CHECK: # %bb.0: # %entry 32 ; CHECK-NEXT: movq %rsp, %rax 33 ; CHECK-NEXT: movq $-1, %rcx 34 ; CHECK-NEXT: sarq $63, %rax 35 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 36 ; CHECK-NEXT: orq %rax, %rdi 37 ; CHECK-NEXT: vmovq %rax, %xmm3 38 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 39 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 40 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2 41 ; CHECK-NEXT: shlq $47, %rax 42 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 43 ; CHECK-NEXT: orq %rax, %rsp 44 ; CHECK-NEXT: retq 45 entry: 46 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x float> %mask, i8 1) 47 ret <4 x float> %v 48 } 49 50 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) 51 52 define <2 x double> @test_llvm_x86_avx2_gather_d_pd(i8* %b, <4 x i32> %iv, <2 x double> %mask) #0 { 53 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd: 54 ; CHECK: # %bb.0: # %entry 55 ; CHECK-NEXT: movq %rsp, %rax 56 ; CHECK-NEXT: movq $-1, %rcx 57 ; CHECK-NEXT: sarq $63, %rax 58 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 59 ; CHECK-NEXT: orq %rax, %rdi 60 ; CHECK-NEXT: vmovq %rax, %xmm3 61 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 62 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 63 ; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2 64 ; CHECK-NEXT: shlq $47, %rax 65 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 66 ; CHECK-NEXT: orq %rax, %rsp 67 ; CHECK-NEXT: retq 68 entry: 69 %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x double> %mask, i8 1) 70 ret <2 x double> %v 71 } 72 73 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) 74 75 define <2 x double> @test_llvm_x86_avx2_gather_q_pd(i8* %b, <2 x i64> %iv, <2 x double> %mask) #0 { 76 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd: 77 ; CHECK: # %bb.0: # %entry 78 ; CHECK-NEXT: movq %rsp, %rax 79 ; CHECK-NEXT: movq $-1, %rcx 80 ; CHECK-NEXT: sarq $63, %rax 81 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 82 ; CHECK-NEXT: orq %rax, %rdi 83 ; CHECK-NEXT: vmovq %rax, %xmm3 84 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 85 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 86 ; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2 87 ; CHECK-NEXT: shlq $47, %rax 88 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 89 ; CHECK-NEXT: orq %rax, %rsp 90 ; CHECK-NEXT: retq 91 entry: 92 %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x double> %mask, i8 1) 93 ret <2 x double> %v 94 } 95 96 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) 97 98 define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(i8* %b, <8 x i32> %iv, <8 x float> %mask) #0 { 99 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256: 100 ; CHECK: # %bb.0: # %entry 101 ; CHECK-NEXT: movq %rsp, %rax 102 ; CHECK-NEXT: movq $-1, %rcx 103 ; CHECK-NEXT: sarq $63, %rax 104 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 105 ; CHECK-NEXT: orq %rax, %rdi 106 ; CHECK-NEXT: vmovq %rax, %xmm3 107 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 108 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 109 ; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2 110 ; CHECK-NEXT: shlq $47, %rax 111 ; CHECK-NEXT: vmovaps %ymm2, %ymm0 112 ; CHECK-NEXT: orq %rax, %rsp 113 ; CHECK-NEXT: retq 114 entry: 115 %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x float> %mask, i8 1) 116 ret <8 x float> %v 117 } 118 119 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) 120 121 define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(i8* %b, <4 x i64> %iv, <4 x float> %mask) #0 { 122 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256: 123 ; CHECK: # %bb.0: # %entry 124 ; CHECK-NEXT: movq %rsp, %rax 125 ; CHECK-NEXT: movq $-1, %rcx 126 ; CHECK-NEXT: sarq $63, %rax 127 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 128 ; CHECK-NEXT: orq %rax, %rdi 129 ; CHECK-NEXT: vmovq %rax, %xmm3 130 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 131 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 132 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2 133 ; CHECK-NEXT: shlq $47, %rax 134 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 135 ; CHECK-NEXT: orq %rax, %rsp 136 ; CHECK-NEXT: vzeroupper 137 ; CHECK-NEXT: retq 138 entry: 139 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x float> %mask, i8 1) 140 ret <4 x float> %v 141 } 142 143 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) 144 145 define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(i8* %b, <4 x i32> %iv, <4 x double> %mask) #0 { 146 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256: 147 ; CHECK: # %bb.0: # %entry 148 ; CHECK-NEXT: movq %rsp, %rax 149 ; CHECK-NEXT: movq $-1, %rcx 150 ; CHECK-NEXT: sarq $63, %rax 151 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 152 ; CHECK-NEXT: orq %rax, %rdi 153 ; CHECK-NEXT: vmovq %rax, %xmm3 154 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 155 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 156 ; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2 157 ; CHECK-NEXT: shlq $47, %rax 158 ; CHECK-NEXT: vmovapd %ymm2, %ymm0 159 ; CHECK-NEXT: orq %rax, %rsp 160 ; CHECK-NEXT: retq 161 entry: 162 %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x double> %mask, i8 1) 163 ret <4 x double> %v 164 } 165 166 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) 167 168 define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(i8* %b, <4 x i64> %iv, <4 x double> %mask) #0 { 169 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256: 170 ; CHECK: # %bb.0: # %entry 171 ; CHECK-NEXT: movq %rsp, %rax 172 ; CHECK-NEXT: movq $-1, %rcx 173 ; CHECK-NEXT: sarq $63, %rax 174 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 175 ; CHECK-NEXT: orq %rax, %rdi 176 ; CHECK-NEXT: vmovq %rax, %xmm3 177 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 178 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 179 ; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2 180 ; CHECK-NEXT: shlq $47, %rax 181 ; CHECK-NEXT: vmovapd %ymm2, %ymm0 182 ; CHECK-NEXT: orq %rax, %rsp 183 ; CHECK-NEXT: retq 184 entry: 185 %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x double> %mask, i8 1) 186 ret <4 x double> %v 187 } 188 189 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) 190 191 define <4 x i32> @test_llvm_x86_avx2_gather_d_d(i8* %b, <4 x i32> %iv, <4 x i32> %mask) #0 { 192 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d: 193 ; CHECK: # %bb.0: # %entry 194 ; CHECK-NEXT: movq %rsp, %rax 195 ; CHECK-NEXT: movq $-1, %rcx 196 ; CHECK-NEXT: sarq $63, %rax 197 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 198 ; CHECK-NEXT: orq %rax, %rdi 199 ; CHECK-NEXT: vmovq %rax, %xmm3 200 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 201 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 202 ; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2 203 ; CHECK-NEXT: shlq $47, %rax 204 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 205 ; CHECK-NEXT: orq %rax, %rsp 206 ; CHECK-NEXT: retq 207 entry: 208 %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i32> %mask, i8 1) 209 ret <4 x i32> %v 210 } 211 212 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) 213 214 define <4 x i32> @test_llvm_x86_avx2_gather_q_d(i8* %b, <2 x i64> %iv, <4 x i32> %mask) #0 { 215 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d: 216 ; CHECK: # %bb.0: # %entry 217 ; CHECK-NEXT: movq %rsp, %rax 218 ; CHECK-NEXT: movq $-1, %rcx 219 ; CHECK-NEXT: sarq $63, %rax 220 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 221 ; CHECK-NEXT: orq %rax, %rdi 222 ; CHECK-NEXT: vmovq %rax, %xmm3 223 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 224 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 225 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2 226 ; CHECK-NEXT: shlq $47, %rax 227 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 228 ; CHECK-NEXT: orq %rax, %rsp 229 ; CHECK-NEXT: retq 230 entry: 231 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x i32> %mask, i8 1) 232 ret <4 x i32> %v 233 } 234 235 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) 236 237 define <2 x i64> @test_llvm_x86_avx2_gather_d_q(i8* %b, <4 x i32> %iv, <2 x i64> %mask) #0 { 238 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q: 239 ; CHECK: # %bb.0: # %entry 240 ; CHECK-NEXT: movq %rsp, %rax 241 ; CHECK-NEXT: movq $-1, %rcx 242 ; CHECK-NEXT: sarq $63, %rax 243 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 244 ; CHECK-NEXT: orq %rax, %rdi 245 ; CHECK-NEXT: vmovq %rax, %xmm3 246 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 247 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 248 ; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2 249 ; CHECK-NEXT: shlq $47, %rax 250 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 251 ; CHECK-NEXT: orq %rax, %rsp 252 ; CHECK-NEXT: retq 253 entry: 254 %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x i64> %mask, i8 1) 255 ret <2 x i64> %v 256 } 257 258 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) 259 260 define <2 x i64> @test_llvm_x86_avx2_gather_q_q(i8* %b, <2 x i64> %iv, <2 x i64> %mask) #0 { 261 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q: 262 ; CHECK: # %bb.0: # %entry 263 ; CHECK-NEXT: movq %rsp, %rax 264 ; CHECK-NEXT: movq $-1, %rcx 265 ; CHECK-NEXT: sarq $63, %rax 266 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 267 ; CHECK-NEXT: orq %rax, %rdi 268 ; CHECK-NEXT: vmovq %rax, %xmm3 269 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 270 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 271 ; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2 272 ; CHECK-NEXT: shlq $47, %rax 273 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 274 ; CHECK-NEXT: orq %rax, %rsp 275 ; CHECK-NEXT: retq 276 entry: 277 %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x i64> %mask, i8 1) 278 ret <2 x i64> %v 279 } 280 281 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) 282 283 define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(i8* %b, <8 x i32> %iv, <8 x i32> %mask) #0 { 284 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256: 285 ; CHECK: # %bb.0: # %entry 286 ; CHECK-NEXT: movq %rsp, %rax 287 ; CHECK-NEXT: movq $-1, %rcx 288 ; CHECK-NEXT: sarq $63, %rax 289 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 290 ; CHECK-NEXT: orq %rax, %rdi 291 ; CHECK-NEXT: vmovq %rax, %xmm3 292 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 293 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 294 ; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2 295 ; CHECK-NEXT: shlq $47, %rax 296 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 297 ; CHECK-NEXT: orq %rax, %rsp 298 ; CHECK-NEXT: retq 299 entry: 300 %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x i32> %mask, i8 1) 301 ret <8 x i32> %v 302 } 303 304 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) 305 306 define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(i8* %b, <4 x i64> %iv, <4 x i32> %mask) #0 { 307 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256: 308 ; CHECK: # %bb.0: # %entry 309 ; CHECK-NEXT: movq %rsp, %rax 310 ; CHECK-NEXT: movq $-1, %rcx 311 ; CHECK-NEXT: sarq $63, %rax 312 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 313 ; CHECK-NEXT: orq %rax, %rdi 314 ; CHECK-NEXT: vmovq %rax, %xmm3 315 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 316 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 317 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2 318 ; CHECK-NEXT: shlq $47, %rax 319 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 320 ; CHECK-NEXT: orq %rax, %rsp 321 ; CHECK-NEXT: vzeroupper 322 ; CHECK-NEXT: retq 323 entry: 324 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i32> %mask, i8 1) 325 ret <4 x i32> %v 326 } 327 328 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) 329 330 define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(i8* %b, <4 x i32> %iv, <4 x i64> %mask) #0 { 331 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256: 332 ; CHECK: # %bb.0: # %entry 333 ; CHECK-NEXT: movq %rsp, %rax 334 ; CHECK-NEXT: movq $-1, %rcx 335 ; CHECK-NEXT: sarq $63, %rax 336 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 337 ; CHECK-NEXT: orq %rax, %rdi 338 ; CHECK-NEXT: vmovq %rax, %xmm3 339 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 340 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 341 ; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2 342 ; CHECK-NEXT: shlq $47, %rax 343 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 344 ; CHECK-NEXT: orq %rax, %rsp 345 ; CHECK-NEXT: retq 346 entry: 347 %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i64> %mask, i8 1) 348 ret <4 x i64> %v 349 } 350 351 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) 352 353 define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(i8* %b, <4 x i64> %iv, <4 x i64> %mask) #0 { 354 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256: 355 ; CHECK: # %bb.0: # %entry 356 ; CHECK-NEXT: movq %rsp, %rax 357 ; CHECK-NEXT: movq $-1, %rcx 358 ; CHECK-NEXT: sarq $63, %rax 359 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 360 ; CHECK-NEXT: orq %rax, %rdi 361 ; CHECK-NEXT: vmovq %rax, %xmm3 362 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 363 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 364 ; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2 365 ; CHECK-NEXT: shlq $47, %rax 366 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 367 ; CHECK-NEXT: orq %rax, %rsp 368 ; CHECK-NEXT: retq 369 entry: 370 %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i64> %mask, i8 1) 371 ret <4 x i64> %v 372 } 373 374 declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, i8*, <16 x i32>, i16, i32) 375 376 define <16 x float> @test_llvm_x86_avx512_gather_dps_512(i8* %b, <16 x i32> %iv) #1 { 377 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512: 378 ; CHECK: # %bb.0: # %entry 379 ; CHECK-NEXT: movq %rsp, %rax 380 ; CHECK-NEXT: movq $-1, %rcx 381 ; CHECK-NEXT: sarq $63, %rax 382 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 383 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 384 ; CHECK-NEXT: orq %rax, %rdi 385 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 386 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 387 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1} 388 ; CHECK-NEXT: shlq $47, %rax 389 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 390 ; CHECK-NEXT: orq %rax, %rsp 391 ; CHECK-NEXT: retq 392 entry: 393 %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1) 394 ret <16 x float> %v 395 } 396 397 declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, i8*, <8 x i32>, i8, i32) 398 399 define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(i8* %b, <8 x i32> %iv) #1 { 400 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512: 401 ; CHECK: # %bb.0: # %entry 402 ; CHECK-NEXT: movq %rsp, %rax 403 ; CHECK-NEXT: movq $-1, %rcx 404 ; CHECK-NEXT: sarq $63, %rax 405 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 406 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 407 ; CHECK-NEXT: orq %rax, %rdi 408 ; CHECK-NEXT: vmovq %rax, %xmm2 409 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 410 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 411 ; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1} 412 ; CHECK-NEXT: shlq $47, %rax 413 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 414 ; CHECK-NEXT: orq %rax, %rsp 415 ; CHECK-NEXT: retq 416 entry: 417 %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 418 ret <8 x double> %v 419 } 420 421 declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, i8*, <8 x i64>, i8, i32) 422 423 define <8 x float> @test_llvm_x86_avx512_gather_qps_512(i8* %b, <8 x i64> %iv) #1 { 424 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512: 425 ; CHECK: # %bb.0: # %entry 426 ; CHECK-NEXT: movq %rsp, %rax 427 ; CHECK-NEXT: movq $-1, %rcx 428 ; CHECK-NEXT: sarq $63, %rax 429 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 430 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 431 ; CHECK-NEXT: orq %rax, %rdi 432 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 433 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 434 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1} 435 ; CHECK-NEXT: shlq $47, %rax 436 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 437 ; CHECK-NEXT: orq %rax, %rsp 438 ; CHECK-NEXT: retq 439 entry: 440 %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 441 ret <8 x float> %v 442 } 443 444 declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, i8*, <8 x i64>, i8, i32) 445 446 define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(i8* %b, <8 x i64> %iv) #1 { 447 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512: 448 ; CHECK: # %bb.0: # %entry 449 ; CHECK-NEXT: movq %rsp, %rax 450 ; CHECK-NEXT: movq $-1, %rcx 451 ; CHECK-NEXT: sarq $63, %rax 452 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 453 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 454 ; CHECK-NEXT: orq %rax, %rdi 455 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 456 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 457 ; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1} 458 ; CHECK-NEXT: shlq $47, %rax 459 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 460 ; CHECK-NEXT: orq %rax, %rsp 461 ; CHECK-NEXT: retq 462 entry: 463 %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 464 ret <8 x double> %v 465 } 466 467 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, i16, i32) 468 469 define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(i8* %b, <16 x i32> %iv) #1 { 470 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512: 471 ; CHECK: # %bb.0: # %entry 472 ; CHECK-NEXT: movq %rsp, %rax 473 ; CHECK-NEXT: movq $-1, %rcx 474 ; CHECK-NEXT: sarq $63, %rax 475 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 476 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 477 ; CHECK-NEXT: orq %rax, %rdi 478 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 479 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 480 ; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} 481 ; CHECK-NEXT: shlq $47, %rax 482 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 483 ; CHECK-NEXT: orq %rax, %rsp 484 ; CHECK-NEXT: retq 485 entry: 486 %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1) 487 ret <16 x i32> %v 488 } 489 490 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, i8, i32) 491 492 define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(i8* %b, <8 x i32> %iv) #1 { 493 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512: 494 ; CHECK: # %bb.0: # %entry 495 ; CHECK-NEXT: movq %rsp, %rax 496 ; CHECK-NEXT: movq $-1, %rcx 497 ; CHECK-NEXT: sarq $63, %rax 498 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 499 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 500 ; CHECK-NEXT: orq %rax, %rdi 501 ; CHECK-NEXT: vmovq %rax, %xmm2 502 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 503 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 504 ; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1} 505 ; CHECK-NEXT: shlq $47, %rax 506 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 507 ; CHECK-NEXT: orq %rax, %rsp 508 ; CHECK-NEXT: retq 509 entry: 510 %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 511 ret <8 x i64> %v 512 } 513 514 515 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, i8, i32) 516 517 define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(i8* %b, <8 x i64> %iv) #1 { 518 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512: 519 ; CHECK: # %bb.0: # %entry 520 ; CHECK-NEXT: movq %rsp, %rax 521 ; CHECK-NEXT: movq $-1, %rcx 522 ; CHECK-NEXT: sarq $63, %rax 523 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 524 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 525 ; CHECK-NEXT: orq %rax, %rdi 526 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 527 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 528 ; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1} 529 ; CHECK-NEXT: shlq $47, %rax 530 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 531 ; CHECK-NEXT: orq %rax, %rsp 532 ; CHECK-NEXT: retq 533 entry: 534 %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 535 ret <8 x i32> %v 536 } 537 538 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, i8, i32) 539 540 define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(i8* %b, <8 x i64> %iv) #1 { 541 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512: 542 ; CHECK: # %bb.0: # %entry 543 ; CHECK-NEXT: movq %rsp, %rax 544 ; CHECK-NEXT: movq $-1, %rcx 545 ; CHECK-NEXT: sarq $63, %rax 546 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 547 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 548 ; CHECK-NEXT: orq %rax, %rdi 549 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2 550 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 551 ; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1} 552 ; CHECK-NEXT: shlq $47, %rax 553 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 554 ; CHECK-NEXT: orq %rax, %rsp 555 ; CHECK-NEXT: retq 556 entry: 557 %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 558 ret <8 x i64> %v 559 } 560 561 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8*, i32, i32); 562 563 define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, i8* %b) #1 { 564 ; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512: 565 ; CHECK: # %bb.0: # %entry 566 ; CHECK-NEXT: movq %rsp, %rax 567 ; CHECK-NEXT: movq $-1, %rcx 568 ; CHECK-NEXT: sarq $63, %rax 569 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 570 ; CHECK-NEXT: orq %rax, %rdi 571 ; CHECK-NEXT: vpbroadcastq %rax, %zmm1 572 ; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0 573 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} 574 ; CHECK-NEXT: shlq $47, %rax 575 ; CHECK-NEXT: orq %rax, %rsp 576 ; CHECK-NEXT: vzeroupper 577 ; CHECK-NEXT: retq 578 entry: 579 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, i8* %b, i32 4, i32 3) 580 ret void 581 } 582 583 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32) 584 585 define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(i8* %b, <4 x i32> %iv) #2 { 586 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf: 587 ; CHECK: # %bb.0: # %entry 588 ; CHECK-NEXT: movq %rsp, %rax 589 ; CHECK-NEXT: movq $-1, %rcx 590 ; CHECK-NEXT: sarq $63, %rax 591 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 592 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 593 ; CHECK-NEXT: orq %rax, %rdi 594 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 595 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 596 ; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1} 597 ; CHECK-NEXT: shlq $47, %rax 598 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 599 ; CHECK-NEXT: orq %rax, %rsp 600 ; CHECK-NEXT: retq 601 entry: 602 %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 603 ret <4 x float> %v 604 } 605 606 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32) 607 608 define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(i8* %b, <2 x i64> %iv) #2 { 609 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf: 610 ; CHECK: # %bb.0: # %entry 611 ; CHECK-NEXT: movq %rsp, %rax 612 ; CHECK-NEXT: movq $-1, %rcx 613 ; CHECK-NEXT: sarq $63, %rax 614 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 615 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 616 ; CHECK-NEXT: orq %rax, %rdi 617 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 618 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 619 ; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1} 620 ; CHECK-NEXT: shlq $47, %rax 621 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 622 ; CHECK-NEXT: orq %rax, %rsp 623 ; CHECK-NEXT: retq 624 entry: 625 %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 626 ret <4 x float> %v 627 } 628 629 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32) 630 631 define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(i8* %b, <4 x i32> %iv) #2 { 632 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df: 633 ; CHECK: # %bb.0: # %entry 634 ; CHECK-NEXT: movq %rsp, %rax 635 ; CHECK-NEXT: movq $-1, %rcx 636 ; CHECK-NEXT: sarq $63, %rax 637 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 638 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 639 ; CHECK-NEXT: orq %rax, %rdi 640 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 641 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 642 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1} 643 ; CHECK-NEXT: shlq $47, %rax 644 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 645 ; CHECK-NEXT: orq %rax, %rsp 646 ; CHECK-NEXT: retq 647 entry: 648 %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 649 ret <2 x double> %v 650 } 651 652 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32) 653 654 define <2 x double> @test_llvm_x86_avx512_gather3div2_df(i8* %b, <2 x i64> %iv) #2 { 655 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df: 656 ; CHECK: # %bb.0: # %entry 657 ; CHECK-NEXT: movq %rsp, %rax 658 ; CHECK-NEXT: movq $-1, %rcx 659 ; CHECK-NEXT: sarq $63, %rax 660 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 661 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 662 ; CHECK-NEXT: orq %rax, %rdi 663 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 664 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 665 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1} 666 ; CHECK-NEXT: shlq $47, %rax 667 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 668 ; CHECK-NEXT: orq %rax, %rsp 669 ; CHECK-NEXT: retq 670 entry: 671 %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 672 ret <2 x double> %v 673 } 674 675 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32) 676 677 define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(i8* %b, <8 x i32> %iv) #2 { 678 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf: 679 ; CHECK: # %bb.0: # %entry 680 ; CHECK-NEXT: movq %rsp, %rax 681 ; CHECK-NEXT: movq $-1, %rcx 682 ; CHECK-NEXT: sarq $63, %rax 683 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 684 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 685 ; CHECK-NEXT: orq %rax, %rdi 686 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 687 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 688 ; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1} 689 ; CHECK-NEXT: shlq $47, %rax 690 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 691 ; CHECK-NEXT: orq %rax, %rsp 692 ; CHECK-NEXT: retq 693 entry: 694 %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 695 ret <8 x float> %v 696 } 697 698 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32) 699 700 define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(i8* %b, <4 x i64> %iv) #2 { 701 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf: 702 ; CHECK: # %bb.0: # %entry 703 ; CHECK-NEXT: movq %rsp, %rax 704 ; CHECK-NEXT: movq $-1, %rcx 705 ; CHECK-NEXT: sarq $63, %rax 706 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 707 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 708 ; CHECK-NEXT: orq %rax, %rdi 709 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 710 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 711 ; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1} 712 ; CHECK-NEXT: shlq $47, %rax 713 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 714 ; CHECK-NEXT: orq %rax, %rsp 715 ; CHECK-NEXT: vzeroupper 716 ; CHECK-NEXT: retq 717 entry: 718 %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 719 ret <4 x float> %v 720 } 721 722 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32) 723 724 define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(i8* %b, <4 x i32> %iv) #2 { 725 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df: 726 ; CHECK: # %bb.0: # %entry 727 ; CHECK-NEXT: movq %rsp, %rax 728 ; CHECK-NEXT: movq $-1, %rcx 729 ; CHECK-NEXT: sarq $63, %rax 730 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 731 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 732 ; CHECK-NEXT: orq %rax, %rdi 733 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 734 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 735 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1} 736 ; CHECK-NEXT: shlq $47, %rax 737 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 738 ; CHECK-NEXT: orq %rax, %rsp 739 ; CHECK-NEXT: retq 740 entry: 741 %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 742 ret <4 x double> %v 743 } 744 745 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32) 746 747 define <4 x double> @test_llvm_x86_avx512_gather3div4_df(i8* %b, <4 x i64> %iv) #2 { 748 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df: 749 ; CHECK: # %bb.0: # %entry 750 ; CHECK-NEXT: movq %rsp, %rax 751 ; CHECK-NEXT: movq $-1, %rcx 752 ; CHECK-NEXT: sarq $63, %rax 753 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 754 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 755 ; CHECK-NEXT: orq %rax, %rdi 756 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 757 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 758 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1} 759 ; CHECK-NEXT: shlq $47, %rax 760 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 761 ; CHECK-NEXT: orq %rax, %rsp 762 ; CHECK-NEXT: retq 763 entry: 764 %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 765 ret <4 x double> %v 766 } 767 768 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32) 769 770 define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(i8* %b, <4 x i32> %iv) #2 { 771 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si: 772 ; CHECK: # %bb.0: # %entry 773 ; CHECK-NEXT: movq %rsp, %rax 774 ; CHECK-NEXT: movq $-1, %rcx 775 ; CHECK-NEXT: sarq $63, %rax 776 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 777 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 778 ; CHECK-NEXT: orq %rax, %rdi 779 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 780 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 781 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1} 782 ; CHECK-NEXT: shlq $47, %rax 783 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 784 ; CHECK-NEXT: orq %rax, %rsp 785 ; CHECK-NEXT: retq 786 entry: 787 %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 788 ret <4 x i32> %v 789 } 790 791 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32) 792 793 define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(i8* %b, <2 x i64> %iv) #2 { 794 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si: 795 ; CHECK: # %bb.0: # %entry 796 ; CHECK-NEXT: movq %rsp, %rax 797 ; CHECK-NEXT: movq $-1, %rcx 798 ; CHECK-NEXT: sarq $63, %rax 799 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 800 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 801 ; CHECK-NEXT: orq %rax, %rdi 802 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 803 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 804 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1} 805 ; CHECK-NEXT: shlq $47, %rax 806 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 807 ; CHECK-NEXT: orq %rax, %rsp 808 ; CHECK-NEXT: retq 809 entry: 810 %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 811 ret <4 x i32> %v 812 } 813 814 declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32) 815 816 define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(i8* %b, <4 x i32> %iv) #2 { 817 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di: 818 ; CHECK: # %bb.0: # %entry 819 ; CHECK-NEXT: movq %rsp, %rax 820 ; CHECK-NEXT: movq $-1, %rcx 821 ; CHECK-NEXT: sarq $63, %rax 822 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 823 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 824 ; CHECK-NEXT: orq %rax, %rdi 825 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 826 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 827 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1} 828 ; CHECK-NEXT: shlq $47, %rax 829 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 830 ; CHECK-NEXT: orq %rax, %rsp 831 ; CHECK-NEXT: retq 832 entry: 833 %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 834 ret <2 x i64> %v 835 } 836 837 declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32) 838 839 define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(i8* %b, <2 x i64> %iv) #2 { 840 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di: 841 ; CHECK: # %bb.0: # %entry 842 ; CHECK-NEXT: movq %rsp, %rax 843 ; CHECK-NEXT: movq $-1, %rcx 844 ; CHECK-NEXT: sarq $63, %rax 845 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 846 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 847 ; CHECK-NEXT: orq %rax, %rdi 848 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 849 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 850 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1} 851 ; CHECK-NEXT: shlq $47, %rax 852 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 853 ; CHECK-NEXT: orq %rax, %rsp 854 ; CHECK-NEXT: retq 855 entry: 856 %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 857 ret <2 x i64> %v 858 } 859 860 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32) 861 862 define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(i8* %b, <8 x i32> %iv) #2 { 863 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si: 864 ; CHECK: # %bb.0: # %entry 865 ; CHECK-NEXT: movq %rsp, %rax 866 ; CHECK-NEXT: movq $-1, %rcx 867 ; CHECK-NEXT: sarq $63, %rax 868 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 869 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 870 ; CHECK-NEXT: orq %rax, %rdi 871 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 872 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 873 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1} 874 ; CHECK-NEXT: shlq $47, %rax 875 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 876 ; CHECK-NEXT: orq %rax, %rsp 877 ; CHECK-NEXT: retq 878 entry: 879 %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 880 ret <8 x i32> %v 881 } 882 883 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32) 884 885 define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(i8* %b, <4 x i64> %iv) #2 { 886 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si: 887 ; CHECK: # %bb.0: # %entry 888 ; CHECK-NEXT: movq %rsp, %rax 889 ; CHECK-NEXT: movq $-1, %rcx 890 ; CHECK-NEXT: sarq $63, %rax 891 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 892 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 893 ; CHECK-NEXT: orq %rax, %rdi 894 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 895 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 896 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1} 897 ; CHECK-NEXT: shlq $47, %rax 898 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 899 ; CHECK-NEXT: orq %rax, %rsp 900 ; CHECK-NEXT: vzeroupper 901 ; CHECK-NEXT: retq 902 entry: 903 %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 904 ret <4 x i32> %v 905 } 906 907 declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32) 908 909 define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(i8* %b, <4 x i32> %iv) #2 { 910 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di: 911 ; CHECK: # %bb.0: # %entry 912 ; CHECK-NEXT: movq %rsp, %rax 913 ; CHECK-NEXT: movq $-1, %rcx 914 ; CHECK-NEXT: sarq $63, %rax 915 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 916 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 917 ; CHECK-NEXT: orq %rax, %rdi 918 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2 919 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 920 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1} 921 ; CHECK-NEXT: shlq $47, %rax 922 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 923 ; CHECK-NEXT: orq %rax, %rsp 924 ; CHECK-NEXT: retq 925 entry: 926 %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 927 ret <4 x i64> %v 928 } 929 930 declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32) 931 932 define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(i8* %b, <4 x i64> %iv) #2 { 933 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di: 934 ; CHECK: # %bb.0: # %entry 935 ; CHECK-NEXT: movq %rsp, %rax 936 ; CHECK-NEXT: movq $-1, %rcx 937 ; CHECK-NEXT: sarq $63, %rax 938 ; CHECK-NEXT: kxnorw %k0, %k0, %k1 939 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 940 ; CHECK-NEXT: orq %rax, %rdi 941 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2 942 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 943 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1} 944 ; CHECK-NEXT: shlq $47, %rax 945 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 946 ; CHECK-NEXT: orq %rax, %rsp 947 ; CHECK-NEXT: retq 948 entry: 949 %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 950 ret <4 x i64> %v 951 } 952 953 attributes #0 = { nounwind "target-features"="+avx2" } 954 attributes #1 = { nounwind "target-features"="+avx512f" } 955 attributes #2 = { nounwind "target-features"="+avx512vl" } 956