1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s 3 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s 4 ; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s 5 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s 6 7 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro) 8 9 define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 10 ; X86-LABEL: masked_gather_v2i32: 11 ; X86: # %bb.0: # %entry 12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 14 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 15 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 16 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 17 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 18 ; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 19 ; X86-NEXT: retl 20 ; 21 ; X64-LABEL: masked_gather_v2i32: 22 ; X64: # %bb.0: # %entry 23 ; X64-NEXT: vmovdqa (%rdi), %xmm2 24 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 25 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 26 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 27 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 28 ; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 29 ; X64-NEXT: retq 30 ; 31 ; NOGATHER-LABEL: masked_gather_v2i32: 32 ; NOGATHER: # %bb.0: # %entry 33 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 34 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 35 ; NOGATHER-NEXT: # implicit-def: $xmm2 36 ; NOGATHER-NEXT: testb $1, %al 37 ; NOGATHER-NEXT: je .LBB0_2 38 ; NOGATHER-NEXT: # %bb.1: # %cond.load 39 ; NOGATHER-NEXT: vmovq %xmm3, %rax 40 ; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 41 ; NOGATHER-NEXT: .LBB0_2: # %else 42 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 43 ; NOGATHER-NEXT: testb $1, %al 44 ; NOGATHER-NEXT: je .LBB0_4 45 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 46 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 47 ; NOGATHER-NEXT: movl (%rax), %eax 48 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2 49 ; NOGATHER-NEXT: .LBB0_4: # %else2 50 ; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 51 ; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 52 ; NOGATHER-NEXT: retq 53 entry: 54 %ld = load <2 x i32*>, <2 x i32*>* %ptr 55 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 56 ret <2 x i32> %res 57 } 58 59 define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 60 ; X86-LABEL: masked_gather_v2i32_concat: 61 ; X86: # %bb.0: # %entry 62 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 63 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 64 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 65 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 66 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 67 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 68 ; X86-NEXT: vmovdqa %xmm1, %xmm0 69 ; X86-NEXT: retl 70 ; 71 ; X64-LABEL: masked_gather_v2i32_concat: 72 ; X64: # %bb.0: # %entry 73 ; X64-NEXT: vmovdqa (%rdi), %xmm2 74 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 75 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 76 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 77 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 78 ; X64-NEXT: vmovdqa %xmm1, %xmm0 79 ; X64-NEXT: retq 80 ; 81 ; NOGATHER-LABEL: masked_gather_v2i32_concat: 82 ; NOGATHER: # %bb.0: # %entry 83 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 84 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 85 ; NOGATHER-NEXT: # implicit-def: $xmm2 86 ; NOGATHER-NEXT: testb $1, %al 87 ; NOGATHER-NEXT: je .LBB1_2 88 ; NOGATHER-NEXT: # %bb.1: # %cond.load 89 ; NOGATHER-NEXT: vmovq %xmm3, %rax 90 ; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 91 ; NOGATHER-NEXT: .LBB1_2: # %else 92 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 93 ; NOGATHER-NEXT: testb $1, %al 94 ; NOGATHER-NEXT: je .LBB1_4 95 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 96 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 97 ; NOGATHER-NEXT: movl (%rax), %eax 98 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2 99 ; NOGATHER-NEXT: .LBB1_4: # %else2 100 ; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 101 ; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 102 ; NOGATHER-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 103 ; NOGATHER-NEXT: retq 104 entry: 105 %ld = load <2 x i32*>, <2 x i32*>* %ptr 106 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 107 %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 108 ret <4 x i32> %res2 109 } 110 111 declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro) 112 113 define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) { 114 ; X86-LABEL: masked_gather_v2float: 115 ; X86: # %bb.0: # %entry 116 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 117 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 118 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 119 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 120 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 121 ; X86-NEXT: vmovaps %xmm1, %xmm0 122 ; X86-NEXT: retl 123 ; 124 ; X64-LABEL: masked_gather_v2float: 125 ; X64: # %bb.0: # %entry 126 ; X64-NEXT: vmovaps (%rdi), %xmm2 127 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 128 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 129 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 130 ; X64-NEXT: vmovaps %xmm1, %xmm0 131 ; X64-NEXT: retq 132 ; 133 ; NOGATHER-LABEL: masked_gather_v2float: 134 ; NOGATHER: # %bb.0: # %entry 135 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 136 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 137 ; NOGATHER-NEXT: # implicit-def: $xmm2 138 ; NOGATHER-NEXT: testb $1, %al 139 ; NOGATHER-NEXT: je .LBB2_2 140 ; NOGATHER-NEXT: # %bb.1: # %cond.load 141 ; NOGATHER-NEXT: vmovq %xmm3, %rax 142 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 143 ; NOGATHER-NEXT: .LBB2_2: # %else 144 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 145 ; NOGATHER-NEXT: testb $1, %al 146 ; NOGATHER-NEXT: je .LBB2_4 147 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 148 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 149 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 150 ; NOGATHER-NEXT: .LBB2_4: # %else2 151 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 152 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 153 ; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 154 ; NOGATHER-NEXT: retq 155 entry: 156 %ld = load <2 x float*>, <2 x float*>* %ptr 157 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 158 ret <2 x float> %res 159 } 160 161 define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) { 162 ; X86-LABEL: masked_gather_v2float_concat: 163 ; X86: # %bb.0: # %entry 164 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 165 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 166 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 167 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 168 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 169 ; X86-NEXT: vmovaps %xmm1, %xmm0 170 ; X86-NEXT: retl 171 ; 172 ; X64-LABEL: masked_gather_v2float_concat: 173 ; X64: # %bb.0: # %entry 174 ; X64-NEXT: vmovaps (%rdi), %xmm2 175 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 176 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 177 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 178 ; X64-NEXT: vmovaps %xmm1, %xmm0 179 ; X64-NEXT: retq 180 ; 181 ; NOGATHER-LABEL: masked_gather_v2float_concat: 182 ; NOGATHER: # %bb.0: # %entry 183 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 184 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 185 ; NOGATHER-NEXT: # implicit-def: $xmm2 186 ; NOGATHER-NEXT: testb $1, %al 187 ; NOGATHER-NEXT: je .LBB3_2 188 ; NOGATHER-NEXT: # %bb.1: # %cond.load 189 ; NOGATHER-NEXT: vmovq %xmm3, %rax 190 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 191 ; NOGATHER-NEXT: .LBB3_2: # %else 192 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 193 ; NOGATHER-NEXT: testb $1, %al 194 ; NOGATHER-NEXT: je .LBB3_4 195 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 196 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 197 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 198 ; NOGATHER-NEXT: .LBB3_4: # %else2 199 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 200 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 201 ; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 202 ; NOGATHER-NEXT: retq 203 entry: 204 %ld = load <2 x float*>, <2 x float*>* %ptr 205 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 206 %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 207 ret <4 x float> %res2 208 } 209 210 211 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro) 212 213 define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) { 214 ; X86-LABEL: masked_gather_v4i32: 215 ; X86: # %bb.0: # %entry 216 ; X86-NEXT: vpslld $31, %xmm1, %xmm1 217 ; X86-NEXT: vpgatherdd %xmm1, (,%xmm0), %xmm2 218 ; X86-NEXT: vmovdqa %xmm2, %xmm0 219 ; X86-NEXT: retl 220 ; 221 ; X64-LABEL: masked_gather_v4i32: 222 ; X64: # %bb.0: # %entry 223 ; X64-NEXT: vpslld $31, %xmm1, %xmm1 224 ; X64-NEXT: vpgatherqd %xmm1, (,%ymm0), %xmm2 225 ; X64-NEXT: vmovdqa %xmm2, %xmm0 226 ; X64-NEXT: vzeroupper 227 ; X64-NEXT: retq 228 ; 229 ; NOGATHER-LABEL: masked_gather_v4i32: 230 ; NOGATHER: # %bb.0: # %entry 231 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax 232 ; NOGATHER-NEXT: # implicit-def: $xmm3 233 ; NOGATHER-NEXT: testb $1, %al 234 ; NOGATHER-NEXT: je .LBB4_2 235 ; NOGATHER-NEXT: # %bb.1: # %cond.load 236 ; NOGATHER-NEXT: vmovq %xmm0, %rax 237 ; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 238 ; NOGATHER-NEXT: .LBB4_2: # %else 239 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax 240 ; NOGATHER-NEXT: testb $1, %al 241 ; NOGATHER-NEXT: je .LBB4_4 242 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 243 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 244 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3 245 ; NOGATHER-NEXT: .LBB4_4: # %else2 246 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax 247 ; NOGATHER-NEXT: testb $1, %al 248 ; NOGATHER-NEXT: je .LBB4_6 249 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 250 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4 251 ; NOGATHER-NEXT: vmovq %xmm4, %rax 252 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3 253 ; NOGATHER-NEXT: .LBB4_6: # %else5 254 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax 255 ; NOGATHER-NEXT: testb $1, %al 256 ; NOGATHER-NEXT: je .LBB4_8 257 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 258 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 259 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 260 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3 261 ; NOGATHER-NEXT: .LBB4_8: # %else8 262 ; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0 263 ; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 264 ; NOGATHER-NEXT: vzeroupper 265 ; NOGATHER-NEXT: retq 266 entry: 267 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro) 268 ret <4 x i32> %res 269 } 270 271 declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro) 272 273 define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) { 274 ; X86-LABEL: masked_gather_v4float: 275 ; X86: # %bb.0: # %entry 276 ; X86-NEXT: vpslld $31, %xmm1, %xmm1 277 ; X86-NEXT: vgatherdps %xmm1, (,%xmm0), %xmm2 278 ; X86-NEXT: vmovaps %xmm2, %xmm0 279 ; X86-NEXT: retl 280 ; 281 ; X64-LABEL: masked_gather_v4float: 282 ; X64: # %bb.0: # %entry 283 ; X64-NEXT: vpslld $31, %xmm1, %xmm1 284 ; X64-NEXT: vgatherqps %xmm1, (,%ymm0), %xmm2 285 ; X64-NEXT: vmovaps %xmm2, %xmm0 286 ; X64-NEXT: vzeroupper 287 ; X64-NEXT: retq 288 ; 289 ; NOGATHER-LABEL: masked_gather_v4float: 290 ; NOGATHER: # %bb.0: # %entry 291 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax 292 ; NOGATHER-NEXT: # implicit-def: $xmm3 293 ; NOGATHER-NEXT: testb $1, %al 294 ; NOGATHER-NEXT: je .LBB5_2 295 ; NOGATHER-NEXT: # %bb.1: # %cond.load 296 ; NOGATHER-NEXT: vmovq %xmm0, %rax 297 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 298 ; NOGATHER-NEXT: .LBB5_2: # %else 299 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax 300 ; NOGATHER-NEXT: testb $1, %al 301 ; NOGATHER-NEXT: je .LBB5_4 302 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 303 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 304 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] 305 ; NOGATHER-NEXT: .LBB5_4: # %else2 306 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax 307 ; NOGATHER-NEXT: testb $1, %al 308 ; NOGATHER-NEXT: je .LBB5_6 309 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 310 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4 311 ; NOGATHER-NEXT: vmovq %xmm4, %rax 312 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3] 313 ; NOGATHER-NEXT: .LBB5_6: # %else5 314 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax 315 ; NOGATHER-NEXT: testb $1, %al 316 ; NOGATHER-NEXT: je .LBB5_8 317 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 318 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 319 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 320 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0] 321 ; NOGATHER-NEXT: .LBB5_8: # %else8 322 ; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0 323 ; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 324 ; NOGATHER-NEXT: vzeroupper 325 ; NOGATHER-NEXT: retq 326 entry: 327 %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro) 328 ret <4 x float> %res 329 } 330 331 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro) 332 333 define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) { 334 ; X86-LABEL: masked_gather_v8i32: 335 ; X86: # %bb.0: # %entry 336 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 337 ; X86-NEXT: vpslld $31, %ymm0, %ymm0 338 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 339 ; X86-NEXT: vmovdqa (%eax), %ymm2 340 ; X86-NEXT: vpgatherdd %ymm0, (,%ymm2), %ymm1 341 ; X86-NEXT: vmovdqa %ymm1, %ymm0 342 ; X86-NEXT: retl 343 ; 344 ; X64-LABEL: masked_gather_v8i32: 345 ; X64: # %bb.0: # %entry 346 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 347 ; X64-NEXT: vpslld $31, %ymm0, %ymm0 348 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0 349 ; X64-NEXT: vmovdqa (%rdi), %ymm2 350 ; X64-NEXT: vmovdqa 32(%rdi), %ymm3 351 ; X64-NEXT: vextracti128 $1, %ymm1, %xmm4 352 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 353 ; X64-NEXT: vpgatherqd %xmm5, (,%ymm3), %xmm4 354 ; X64-NEXT: vpgatherqd %xmm0, (,%ymm2), %xmm1 355 ; X64-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0 356 ; X64-NEXT: retq 357 ; 358 ; NOGATHER-LABEL: masked_gather_v8i32: 359 ; NOGATHER: # %bb.0: # %entry 360 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4 361 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3 362 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 363 ; NOGATHER-NEXT: # implicit-def: $ymm2 364 ; NOGATHER-NEXT: testb $1, %al 365 ; NOGATHER-NEXT: je .LBB6_2 366 ; NOGATHER-NEXT: # %bb.1: # %cond.load 367 ; NOGATHER-NEXT: vmovq %xmm4, %rax 368 ; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 369 ; NOGATHER-NEXT: .LBB6_2: # %else 370 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax 371 ; NOGATHER-NEXT: testb $1, %al 372 ; NOGATHER-NEXT: je .LBB6_4 373 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 374 ; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax 375 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm5 376 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] 377 ; NOGATHER-NEXT: .LBB6_4: # %else2 378 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax 379 ; NOGATHER-NEXT: testb $1, %al 380 ; NOGATHER-NEXT: je .LBB6_6 381 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 382 ; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5 383 ; NOGATHER-NEXT: vmovq %xmm5, %rax 384 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm5 385 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] 386 ; NOGATHER-NEXT: .LBB6_6: # %else5 387 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax 388 ; NOGATHER-NEXT: testb $1, %al 389 ; NOGATHER-NEXT: je .LBB6_8 390 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 391 ; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4 392 ; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax 393 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm4 394 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 395 ; NOGATHER-NEXT: .LBB6_8: # %else8 396 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 397 ; NOGATHER-NEXT: testb $1, %al 398 ; NOGATHER-NEXT: je .LBB6_10 399 ; NOGATHER-NEXT: # %bb.9: # %cond.load10 400 ; NOGATHER-NEXT: vmovq %xmm3, %rax 401 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 402 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm4, %xmm4 403 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 404 ; NOGATHER-NEXT: .LBB6_10: # %else11 405 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax 406 ; NOGATHER-NEXT: testb $1, %al 407 ; NOGATHER-NEXT: je .LBB6_12 408 ; NOGATHER-NEXT: # %bb.11: # %cond.load13 409 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 410 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 411 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm4, %xmm4 412 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 413 ; NOGATHER-NEXT: .LBB6_12: # %else14 414 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax 415 ; NOGATHER-NEXT: testb $1, %al 416 ; NOGATHER-NEXT: je .LBB6_14 417 ; NOGATHER-NEXT: # %bb.13: # %cond.load16 418 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 419 ; NOGATHER-NEXT: vmovq %xmm4, %rax 420 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 421 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm4, %xmm4 422 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 423 ; NOGATHER-NEXT: .LBB6_14: # %else17 424 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax 425 ; NOGATHER-NEXT: testb $1, %al 426 ; NOGATHER-NEXT: je .LBB6_16 427 ; NOGATHER-NEXT: # %bb.15: # %cond.load19 428 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 429 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 430 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 431 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3 432 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 433 ; NOGATHER-NEXT: .LBB6_16: # %else20 434 ; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 435 ; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3 436 ; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 437 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 438 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 439 ; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 440 ; NOGATHER-NEXT: retq 441 entry: 442 %ld = load <8 x i32*>, <8 x i32*>* %ptr 443 %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro) 444 ret <8 x i32> %res 445 } 446 447 declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro) 448 449 define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) { 450 ; X86-LABEL: masked_gather_v8float: 451 ; X86: # %bb.0: # %entry 452 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 453 ; X86-NEXT: vpslld $31, %ymm0, %ymm0 454 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 455 ; X86-NEXT: vmovaps (%eax), %ymm2 456 ; X86-NEXT: vgatherdps %ymm0, (,%ymm2), %ymm1 457 ; X86-NEXT: vmovaps %ymm1, %ymm0 458 ; X86-NEXT: retl 459 ; 460 ; X64-LABEL: masked_gather_v8float: 461 ; X64: # %bb.0: # %entry 462 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 463 ; X64-NEXT: vpslld $31, %ymm0, %ymm0 464 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0 465 ; X64-NEXT: vmovaps (%rdi), %ymm2 466 ; X64-NEXT: vmovaps 32(%rdi), %ymm3 467 ; X64-NEXT: vextractf128 $1, %ymm1, %xmm4 468 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 469 ; X64-NEXT: vgatherqps %xmm5, (,%ymm3), %xmm4 470 ; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1 471 ; X64-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0 472 ; X64-NEXT: retq 473 ; 474 ; NOGATHER-LABEL: masked_gather_v8float: 475 ; NOGATHER: # %bb.0: # %entry 476 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4 477 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3 478 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 479 ; NOGATHER-NEXT: # implicit-def: $ymm2 480 ; NOGATHER-NEXT: testb $1, %al 481 ; NOGATHER-NEXT: je .LBB7_2 482 ; NOGATHER-NEXT: # %bb.1: # %cond.load 483 ; NOGATHER-NEXT: vmovq %xmm4, %rax 484 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 485 ; NOGATHER-NEXT: .LBB7_2: # %else 486 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax 487 ; NOGATHER-NEXT: testb $1, %al 488 ; NOGATHER-NEXT: je .LBB7_4 489 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 490 ; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax 491 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3] 492 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] 493 ; NOGATHER-NEXT: .LBB7_4: # %else2 494 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax 495 ; NOGATHER-NEXT: testb $1, %al 496 ; NOGATHER-NEXT: je .LBB7_6 497 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 498 ; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5 499 ; NOGATHER-NEXT: vmovq %xmm5, %rax 500 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3] 501 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] 502 ; NOGATHER-NEXT: .LBB7_6: # %else5 503 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax 504 ; NOGATHER-NEXT: testb $1, %al 505 ; NOGATHER-NEXT: je .LBB7_8 506 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 507 ; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4 508 ; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax 509 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0] 510 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 511 ; NOGATHER-NEXT: .LBB7_8: # %else8 512 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 513 ; NOGATHER-NEXT: testb $1, %al 514 ; NOGATHER-NEXT: je .LBB7_10 515 ; NOGATHER-NEXT: # %bb.9: # %cond.load10 516 ; NOGATHER-NEXT: vmovq %xmm3, %rax 517 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 518 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm5 519 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3] 520 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 521 ; NOGATHER-NEXT: .LBB7_10: # %else11 522 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax 523 ; NOGATHER-NEXT: testb $1, %al 524 ; NOGATHER-NEXT: je .LBB7_12 525 ; NOGATHER-NEXT: # %bb.11: # %cond.load13 526 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 527 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 528 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3] 529 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 530 ; NOGATHER-NEXT: .LBB7_12: # %else14 531 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax 532 ; NOGATHER-NEXT: testb $1, %al 533 ; NOGATHER-NEXT: je .LBB7_14 534 ; NOGATHER-NEXT: # %bb.13: # %cond.load16 535 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 536 ; NOGATHER-NEXT: vmovq %xmm4, %rax 537 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 538 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3] 539 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 540 ; NOGATHER-NEXT: .LBB7_14: # %else17 541 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax 542 ; NOGATHER-NEXT: testb $1, %al 543 ; NOGATHER-NEXT: je .LBB7_16 544 ; NOGATHER-NEXT: # %bb.15: # %cond.load19 545 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 546 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 547 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 548 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0] 549 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 550 ; NOGATHER-NEXT: .LBB7_16: # %else20 551 ; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 552 ; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3 553 ; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 554 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 555 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 556 ; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 557 ; NOGATHER-NEXT: retq 558 entry: 559 %ld = load <8 x float*>, <8 x float*>* %ptr 560 %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro) 561 ret <8 x float> %res 562 } 563 564 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro) 565 566 define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) { 567 ; X86-LABEL: masked_gather_v4i64: 568 ; X86: # %bb.0: # %entry 569 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 570 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0 571 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 572 ; X86-NEXT: vmovdqa (%eax), %xmm2 573 ; X86-NEXT: vpgatherdq %ymm0, (,%xmm2), %ymm1 574 ; X86-NEXT: vmovdqa %ymm1, %ymm0 575 ; X86-NEXT: retl 576 ; 577 ; X64-LABEL: masked_gather_v4i64: 578 ; X64: # %bb.0: # %entry 579 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 580 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 581 ; X64-NEXT: vmovdqa (%rdi), %ymm2 582 ; X64-NEXT: vpgatherqq %ymm0, (,%ymm2), %ymm1 583 ; X64-NEXT: vmovdqa %ymm1, %ymm0 584 ; X64-NEXT: retq 585 ; 586 ; NOGATHER-LABEL: masked_gather_v4i64: 587 ; NOGATHER: # %bb.0: # %entry 588 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 589 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 590 ; NOGATHER-NEXT: # implicit-def: $ymm2 591 ; NOGATHER-NEXT: testb $1, %al 592 ; NOGATHER-NEXT: je .LBB8_2 593 ; NOGATHER-NEXT: # %bb.1: # %cond.load 594 ; NOGATHER-NEXT: vmovq %xmm3, %rax 595 ; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 596 ; NOGATHER-NEXT: .LBB8_2: # %else 597 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax 598 ; NOGATHER-NEXT: testb $1, %al 599 ; NOGATHER-NEXT: je .LBB8_4 600 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 601 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 602 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm4 603 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 604 ; NOGATHER-NEXT: .LBB8_4: # %else2 605 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 606 ; NOGATHER-NEXT: testb $1, %al 607 ; NOGATHER-NEXT: je .LBB8_6 608 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 609 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 610 ; NOGATHER-NEXT: vmovq %xmm4, %rax 611 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 612 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm4, %xmm4 613 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 614 ; NOGATHER-NEXT: .LBB8_6: # %else5 615 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax 616 ; NOGATHER-NEXT: testb $1, %al 617 ; NOGATHER-NEXT: je .LBB8_8 618 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 619 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 620 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 621 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 622 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm3, %xmm3 623 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 624 ; NOGATHER-NEXT: .LBB8_8: # %else8 625 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 626 ; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0 627 ; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3 628 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 629 ; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0 630 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 631 ; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 632 ; NOGATHER-NEXT: retq 633 entry: 634 %ld = load <4 x i64*>, <4 x i64*>* %ptr 635 %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro) 636 ret <4 x i64> %res 637 } 638 639 declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro) 640 641 define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) { 642 ; X86-LABEL: masked_gather_v4double: 643 ; X86: # %bb.0: # %entry 644 ; X86-NEXT: vpslld $31, %xmm0, %xmm0 645 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0 646 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 647 ; X86-NEXT: vmovapd (%eax), %xmm2 648 ; X86-NEXT: vgatherdpd %ymm0, (,%xmm2), %ymm1 649 ; X86-NEXT: vmovapd %ymm1, %ymm0 650 ; X86-NEXT: retl 651 ; 652 ; X64-LABEL: masked_gather_v4double: 653 ; X64: # %bb.0: # %entry 654 ; X64-NEXT: vpslld $31, %xmm0, %xmm0 655 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 656 ; X64-NEXT: vmovapd (%rdi), %ymm2 657 ; X64-NEXT: vgatherqpd %ymm0, (,%ymm2), %ymm1 658 ; X64-NEXT: vmovapd %ymm1, %ymm0 659 ; X64-NEXT: retq 660 ; 661 ; NOGATHER-LABEL: masked_gather_v4double: 662 ; NOGATHER: # %bb.0: # %entry 663 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 664 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 665 ; NOGATHER-NEXT: # implicit-def: $ymm2 666 ; NOGATHER-NEXT: testb $1, %al 667 ; NOGATHER-NEXT: je .LBB9_2 668 ; NOGATHER-NEXT: # %bb.1: # %cond.load 669 ; NOGATHER-NEXT: vmovq %xmm3, %rax 670 ; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 671 ; NOGATHER-NEXT: .LBB9_2: # %else 672 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax 673 ; NOGATHER-NEXT: testb $1, %al 674 ; NOGATHER-NEXT: je .LBB9_4 675 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 676 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 677 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0] 678 ; NOGATHER-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3] 679 ; NOGATHER-NEXT: .LBB9_4: # %else2 680 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 681 ; NOGATHER-NEXT: testb $1, %al 682 ; NOGATHER-NEXT: je .LBB9_6 683 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 684 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 685 ; NOGATHER-NEXT: vmovq %xmm4, %rax 686 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 687 ; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1] 688 ; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 689 ; NOGATHER-NEXT: .LBB9_6: # %else5 690 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax 691 ; NOGATHER-NEXT: testb $1, %al 692 ; NOGATHER-NEXT: je .LBB9_8 693 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 694 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 695 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 696 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 697 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0] 698 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 699 ; NOGATHER-NEXT: .LBB9_8: # %else8 700 ; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 701 ; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0 702 ; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3 703 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 704 ; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0 705 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 706 ; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 707 ; NOGATHER-NEXT: retq 708 entry: 709 %ld = load <4 x double*>, <4 x double*>* %ptr 710 %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro) 711 ret <4 x double> %res 712 } 713 714 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro) 715 716 define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) { 717 ; X86-LABEL: masked_gather_v2i64: 718 ; X86: # %bb.0: # %entry 719 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 720 ; X86-NEXT: vpmovsxdq (%eax), %xmm2 721 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0 722 ; X86-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1 723 ; X86-NEXT: vmovdqa %xmm1, %xmm0 724 ; X86-NEXT: retl 725 ; 726 ; X64-LABEL: masked_gather_v2i64: 727 ; X64: # %bb.0: # %entry 728 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0 729 ; X64-NEXT: vmovdqa (%rdi), %xmm2 730 ; X64-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1 731 ; X64-NEXT: vmovdqa %xmm1, %xmm0 732 ; X64-NEXT: retq 733 ; 734 ; NOGATHER-LABEL: masked_gather_v2i64: 735 ; NOGATHER: # %bb.0: # %entry 736 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 737 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 738 ; NOGATHER-NEXT: # implicit-def: $xmm2 739 ; NOGATHER-NEXT: testb $1, %al 740 ; NOGATHER-NEXT: je .LBB10_2 741 ; NOGATHER-NEXT: # %bb.1: # %cond.load 742 ; NOGATHER-NEXT: vmovq %xmm3, %rax 743 ; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 744 ; NOGATHER-NEXT: .LBB10_2: # %else 745 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 746 ; NOGATHER-NEXT: testb $1, %al 747 ; NOGATHER-NEXT: je .LBB10_4 748 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 749 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 750 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm2 751 ; NOGATHER-NEXT: .LBB10_4: # %else2 752 ; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 753 ; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 754 ; NOGATHER-NEXT: retq 755 entry: 756 %ld = load <2 x i64*>, <2 x i64*>* %ptr 757 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro) 758 ret <2 x i64> %res 759 } 760 761 declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro) 762 763 define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) { 764 ; X86-LABEL: masked_gather_v2double: 765 ; X86: # %bb.0: # %entry 766 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 767 ; X86-NEXT: vpmovsxdq (%eax), %xmm2 768 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0 769 ; X86-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1 770 ; X86-NEXT: vmovapd %xmm1, %xmm0 771 ; X86-NEXT: retl 772 ; 773 ; X64-LABEL: masked_gather_v2double: 774 ; X64: # %bb.0: # %entry 775 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0 776 ; X64-NEXT: vmovapd (%rdi), %xmm2 777 ; X64-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1 778 ; X64-NEXT: vmovapd %xmm1, %xmm0 779 ; X64-NEXT: retq 780 ; 781 ; NOGATHER-LABEL: masked_gather_v2double: 782 ; NOGATHER: # %bb.0: # %entry 783 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 784 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax 785 ; NOGATHER-NEXT: # implicit-def: $xmm2 786 ; NOGATHER-NEXT: testb $1, %al 787 ; NOGATHER-NEXT: je .LBB11_2 788 ; NOGATHER-NEXT: # %bb.1: # %cond.load 789 ; NOGATHER-NEXT: vmovq %xmm3, %rax 790 ; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 791 ; NOGATHER-NEXT: .LBB11_2: # %else 792 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax 793 ; NOGATHER-NEXT: testb $1, %al 794 ; NOGATHER-NEXT: je .LBB11_4 795 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 796 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax 797 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] 798 ; NOGATHER-NEXT: .LBB11_4: # %else2 799 ; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 800 ; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 801 ; NOGATHER-NEXT: retq 802 entry: 803 %ld = load <2 x double*>, <2 x double*>* %ptr 804 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro) 805 ret <2 x double> %res 806 } 807 808