Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s
      3 
      4 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8)
      5 
      6 define <4 x float> @test_llvm_x86_avx2_gather_d_ps(i8* %b, <4 x i32> %iv, <4 x float> %mask) #0 {
      7 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps:
      8 ; CHECK:       # %bb.0: # %entry
      9 ; CHECK-NEXT:    movq %rsp, %rax
     10 ; CHECK-NEXT:    movq $-1, %rcx
     11 ; CHECK-NEXT:    sarq $63, %rax
     12 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
     13 ; CHECK-NEXT:    orq %rax, %rdi
     14 ; CHECK-NEXT:    vmovq %rax, %xmm3
     15 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
     16 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
     17 ; CHECK-NEXT:    vgatherdps %xmm1, (%rdi,%xmm0), %xmm2
     18 ; CHECK-NEXT:    shlq $47, %rax
     19 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
     20 ; CHECK-NEXT:    orq %rax, %rsp
     21 ; CHECK-NEXT:    retq
     22 entry:
     23   %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x float> %mask, i8 1)
     24   ret <4 x float> %v
     25 }
     26 
     27 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8)
     28 
     29 define <4 x float> @test_llvm_x86_avx2_gather_q_ps(i8* %b, <2 x i64> %iv, <4 x float> %mask) #0 {
     30 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps:
     31 ; CHECK:       # %bb.0: # %entry
     32 ; CHECK-NEXT:    movq %rsp, %rax
     33 ; CHECK-NEXT:    movq $-1, %rcx
     34 ; CHECK-NEXT:    sarq $63, %rax
     35 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
     36 ; CHECK-NEXT:    orq %rax, %rdi
     37 ; CHECK-NEXT:    vmovq %rax, %xmm3
     38 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
     39 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
     40 ; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%xmm0), %xmm2
     41 ; CHECK-NEXT:    shlq $47, %rax
     42 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
     43 ; CHECK-NEXT:    orq %rax, %rsp
     44 ; CHECK-NEXT:    retq
     45 entry:
     46   %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x float> %mask, i8 1)
     47   ret <4 x float> %v
     48 }
     49 
     50 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8)
     51 
     52 define <2 x double> @test_llvm_x86_avx2_gather_d_pd(i8* %b, <4 x i32> %iv, <2 x double> %mask) #0 {
     53 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd:
     54 ; CHECK:       # %bb.0: # %entry
     55 ; CHECK-NEXT:    movq %rsp, %rax
     56 ; CHECK-NEXT:    movq $-1, %rcx
     57 ; CHECK-NEXT:    sarq $63, %rax
     58 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     59 ; CHECK-NEXT:    orq %rax, %rdi
     60 ; CHECK-NEXT:    vmovq %rax, %xmm3
     61 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
     62 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
     63 ; CHECK-NEXT:    vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2
     64 ; CHECK-NEXT:    shlq $47, %rax
     65 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0
     66 ; CHECK-NEXT:    orq %rax, %rsp
     67 ; CHECK-NEXT:    retq
     68 entry:
     69   %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x double> %mask, i8 1)
     70   ret <2 x double> %v
     71 }
     72 
     73 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8)
     74 
     75 define <2 x double> @test_llvm_x86_avx2_gather_q_pd(i8* %b, <2 x i64> %iv, <2 x double> %mask) #0 {
     76 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd:
     77 ; CHECK:       # %bb.0: # %entry
     78 ; CHECK-NEXT:    movq %rsp, %rax
     79 ; CHECK-NEXT:    movq $-1, %rcx
     80 ; CHECK-NEXT:    sarq $63, %rax
     81 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     82 ; CHECK-NEXT:    orq %rax, %rdi
     83 ; CHECK-NEXT:    vmovq %rax, %xmm3
     84 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
     85 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
     86 ; CHECK-NEXT:    vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2
     87 ; CHECK-NEXT:    shlq $47, %rax
     88 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0
     89 ; CHECK-NEXT:    orq %rax, %rsp
     90 ; CHECK-NEXT:    retq
     91 entry:
     92   %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x double> %mask, i8 1)
     93   ret <2 x double> %v
     94 }
     95 
     96 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8)
     97 
     98 define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(i8* %b, <8 x i32> %iv, <8 x float> %mask) #0 {
     99 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256:
    100 ; CHECK:       # %bb.0: # %entry
    101 ; CHECK-NEXT:    movq %rsp, %rax
    102 ; CHECK-NEXT:    movq $-1, %rcx
    103 ; CHECK-NEXT:    sarq $63, %rax
    104 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    105 ; CHECK-NEXT:    orq %rax, %rdi
    106 ; CHECK-NEXT:    vmovq %rax, %xmm3
    107 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    108 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    109 ; CHECK-NEXT:    vgatherdps %ymm1, (%rdi,%ymm0), %ymm2
    110 ; CHECK-NEXT:    shlq $47, %rax
    111 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
    112 ; CHECK-NEXT:    orq %rax, %rsp
    113 ; CHECK-NEXT:    retq
    114 entry:
    115   %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x float> %mask, i8 1)
    116   ret <8 x float> %v
    117 }
    118 
    119 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8)
    120 
    121 define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(i8* %b, <4 x i64> %iv, <4 x float> %mask) #0 {
    122 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256:
    123 ; CHECK:       # %bb.0: # %entry
    124 ; CHECK-NEXT:    movq %rsp, %rax
    125 ; CHECK-NEXT:    movq $-1, %rcx
    126 ; CHECK-NEXT:    sarq $63, %rax
    127 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    128 ; CHECK-NEXT:    orq %rax, %rdi
    129 ; CHECK-NEXT:    vmovq %rax, %xmm3
    130 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    131 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    132 ; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%ymm0), %xmm2
    133 ; CHECK-NEXT:    shlq $47, %rax
    134 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
    135 ; CHECK-NEXT:    orq %rax, %rsp
    136 ; CHECK-NEXT:    vzeroupper
    137 ; CHECK-NEXT:    retq
    138 entry:
    139   %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x float> %mask, i8 1)
    140   ret <4 x float> %v
    141 }
    142 
    143 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8)
    144 
    145 define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(i8* %b, <4 x i32> %iv, <4 x double> %mask) #0 {
    146 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256:
    147 ; CHECK:       # %bb.0: # %entry
    148 ; CHECK-NEXT:    movq %rsp, %rax
    149 ; CHECK-NEXT:    movq $-1, %rcx
    150 ; CHECK-NEXT:    sarq $63, %rax
    151 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    152 ; CHECK-NEXT:    orq %rax, %rdi
    153 ; CHECK-NEXT:    vmovq %rax, %xmm3
    154 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    155 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    156 ; CHECK-NEXT:    vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2
    157 ; CHECK-NEXT:    shlq $47, %rax
    158 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    159 ; CHECK-NEXT:    orq %rax, %rsp
    160 ; CHECK-NEXT:    retq
    161 entry:
    162   %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x double> %mask, i8 1)
    163   ret <4 x double> %v
    164 }
    165 
    166 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8)
    167 
    168 define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(i8* %b, <4 x i64> %iv, <4 x double> %mask) #0 {
    169 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256:
    170 ; CHECK:       # %bb.0: # %entry
    171 ; CHECK-NEXT:    movq %rsp, %rax
    172 ; CHECK-NEXT:    movq $-1, %rcx
    173 ; CHECK-NEXT:    sarq $63, %rax
    174 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    175 ; CHECK-NEXT:    orq %rax, %rdi
    176 ; CHECK-NEXT:    vmovq %rax, %xmm3
    177 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    178 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    179 ; CHECK-NEXT:    vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2
    180 ; CHECK-NEXT:    shlq $47, %rax
    181 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    182 ; CHECK-NEXT:    orq %rax, %rsp
    183 ; CHECK-NEXT:    retq
    184 entry:
    185   %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x double> %mask, i8 1)
    186   ret <4 x double> %v
    187 }
    188 
    189 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8)
    190 
    191 define <4 x i32> @test_llvm_x86_avx2_gather_d_d(i8* %b, <4 x i32> %iv, <4 x i32> %mask) #0 {
    192 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d:
    193 ; CHECK:       # %bb.0: # %entry
    194 ; CHECK-NEXT:    movq %rsp, %rax
    195 ; CHECK-NEXT:    movq $-1, %rcx
    196 ; CHECK-NEXT:    sarq $63, %rax
    197 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    198 ; CHECK-NEXT:    orq %rax, %rdi
    199 ; CHECK-NEXT:    vmovq %rax, %xmm3
    200 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    201 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    202 ; CHECK-NEXT:    vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2
    203 ; CHECK-NEXT:    shlq $47, %rax
    204 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
    205 ; CHECK-NEXT:    orq %rax, %rsp
    206 ; CHECK-NEXT:    retq
    207 entry:
    208   %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i32> %mask, i8 1)
    209   ret <4 x i32> %v
    210 }
    211 
    212 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8)
    213 
    214 define <4 x i32> @test_llvm_x86_avx2_gather_q_d(i8* %b, <2 x i64> %iv, <4 x i32> %mask) #0 {
    215 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d:
    216 ; CHECK:       # %bb.0: # %entry
    217 ; CHECK-NEXT:    movq %rsp, %rax
    218 ; CHECK-NEXT:    movq $-1, %rcx
    219 ; CHECK-NEXT:    sarq $63, %rax
    220 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    221 ; CHECK-NEXT:    orq %rax, %rdi
    222 ; CHECK-NEXT:    vmovq %rax, %xmm3
    223 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    224 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    225 ; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2
    226 ; CHECK-NEXT:    shlq $47, %rax
    227 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
    228 ; CHECK-NEXT:    orq %rax, %rsp
    229 ; CHECK-NEXT:    retq
    230 entry:
    231   %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x i32> %mask, i8 1)
    232   ret <4 x i32> %v
    233 }
    234 
    235 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8)
    236 
    237 define <2 x i64> @test_llvm_x86_avx2_gather_d_q(i8* %b, <4 x i32> %iv, <2 x i64> %mask) #0 {
    238 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q:
    239 ; CHECK:       # %bb.0: # %entry
    240 ; CHECK-NEXT:    movq %rsp, %rax
    241 ; CHECK-NEXT:    movq $-1, %rcx
    242 ; CHECK-NEXT:    sarq $63, %rax
    243 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    244 ; CHECK-NEXT:    orq %rax, %rdi
    245 ; CHECK-NEXT:    vmovq %rax, %xmm3
    246 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    247 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    248 ; CHECK-NEXT:    vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2
    249 ; CHECK-NEXT:    shlq $47, %rax
    250 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
    251 ; CHECK-NEXT:    orq %rax, %rsp
    252 ; CHECK-NEXT:    retq
    253 entry:
    254   %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x i64> %mask, i8 1)
    255   ret <2 x i64> %v
    256 }
    257 
    258 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8)
    259 
    260 define <2 x i64> @test_llvm_x86_avx2_gather_q_q(i8* %b, <2 x i64> %iv, <2 x i64> %mask) #0 {
    261 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q:
    262 ; CHECK:       # %bb.0: # %entry
    263 ; CHECK-NEXT:    movq %rsp, %rax
    264 ; CHECK-NEXT:    movq $-1, %rcx
    265 ; CHECK-NEXT:    sarq $63, %rax
    266 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    267 ; CHECK-NEXT:    orq %rax, %rdi
    268 ; CHECK-NEXT:    vmovq %rax, %xmm3
    269 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    270 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    271 ; CHECK-NEXT:    vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2
    272 ; CHECK-NEXT:    shlq $47, %rax
    273 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
    274 ; CHECK-NEXT:    orq %rax, %rsp
    275 ; CHECK-NEXT:    retq
    276 entry:
    277   %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x i64> %mask, i8 1)
    278   ret <2 x i64> %v
    279 }
    280 
    281 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8)
    282 
    283 define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(i8* %b, <8 x i32> %iv, <8 x i32> %mask) #0 {
    284 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256:
    285 ; CHECK:       # %bb.0: # %entry
    286 ; CHECK-NEXT:    movq %rsp, %rax
    287 ; CHECK-NEXT:    movq $-1, %rcx
    288 ; CHECK-NEXT:    sarq $63, %rax
    289 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    290 ; CHECK-NEXT:    orq %rax, %rdi
    291 ; CHECK-NEXT:    vmovq %rax, %xmm3
    292 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    293 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    294 ; CHECK-NEXT:    vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2
    295 ; CHECK-NEXT:    shlq $47, %rax
    296 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
    297 ; CHECK-NEXT:    orq %rax, %rsp
    298 ; CHECK-NEXT:    retq
    299 entry:
    300   %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x i32> %mask, i8 1)
    301   ret <8 x i32> %v
    302 }
    303 
    304 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8)
    305 
    306 define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(i8* %b, <4 x i64> %iv, <4 x i32> %mask) #0 {
    307 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256:
    308 ; CHECK:       # %bb.0: # %entry
    309 ; CHECK-NEXT:    movq %rsp, %rax
    310 ; CHECK-NEXT:    movq $-1, %rcx
    311 ; CHECK-NEXT:    sarq $63, %rax
    312 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    313 ; CHECK-NEXT:    orq %rax, %rdi
    314 ; CHECK-NEXT:    vmovq %rax, %xmm3
    315 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    316 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    317 ; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2
    318 ; CHECK-NEXT:    shlq $47, %rax
    319 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
    320 ; CHECK-NEXT:    orq %rax, %rsp
    321 ; CHECK-NEXT:    vzeroupper
    322 ; CHECK-NEXT:    retq
    323 entry:
    324   %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i32> %mask, i8 1)
    325   ret <4 x i32> %v
    326 }
    327 
    328 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8)
    329 
    330 define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(i8* %b, <4 x i32> %iv, <4 x i64> %mask) #0 {
    331 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256:
    332 ; CHECK:       # %bb.0: # %entry
    333 ; CHECK-NEXT:    movq %rsp, %rax
    334 ; CHECK-NEXT:    movq $-1, %rcx
    335 ; CHECK-NEXT:    sarq $63, %rax
    336 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    337 ; CHECK-NEXT:    orq %rax, %rdi
    338 ; CHECK-NEXT:    vmovq %rax, %xmm3
    339 ; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
    340 ; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
    341 ; CHECK-NEXT:    vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2
    342 ; CHECK-NEXT:    shlq $47, %rax
    343 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
    344 ; CHECK-NEXT:    orq %rax, %rsp
    345 ; CHECK-NEXT:    retq
    346 entry:
    347   %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i64> %mask, i8 1)
    348   ret <4 x i64> %v
    349 }
    350 
    351 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8)
    352 
    353 define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(i8* %b, <4 x i64> %iv, <4 x i64> %mask) #0 {
    354 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256:
    355 ; CHECK:       # %bb.0: # %entry
    356 ; CHECK-NEXT:    movq %rsp, %rax
    357 ; CHECK-NEXT:    movq $-1, %rcx
    358 ; CHECK-NEXT:    sarq $63, %rax
    359 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    360 ; CHECK-NEXT:    orq %rax, %rdi
    361 ; CHECK-NEXT:    vmovq %rax, %xmm3
    362 ; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
    363 ; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
    364 ; CHECK-NEXT:    vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2
    365 ; CHECK-NEXT:    shlq $47, %rax
    366 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
    367 ; CHECK-NEXT:    orq %rax, %rsp
    368 ; CHECK-NEXT:    retq
    369 entry:
    370   %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i64> %mask, i8 1)
    371   ret <4 x i64> %v
    372 }
    373 
    374 declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, i8*, <16 x i32>, i16, i32)
    375 
    376 define <16 x float> @test_llvm_x86_avx512_gather_dps_512(i8* %b, <16 x i32> %iv) #1 {
    377 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512:
    378 ; CHECK:       # %bb.0: # %entry
    379 ; CHECK-NEXT:    movq %rsp, %rax
    380 ; CHECK-NEXT:    movq $-1, %rcx
    381 ; CHECK-NEXT:    sarq $63, %rax
    382 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    383 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    384 ; CHECK-NEXT:    orq %rax, %rdi
    385 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    386 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    387 ; CHECK-NEXT:    vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
    388 ; CHECK-NEXT:    shlq $47, %rax
    389 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    390 ; CHECK-NEXT:    orq %rax, %rsp
    391 ; CHECK-NEXT:    retq
    392 entry:
    393   %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
    394   ret <16 x float> %v
    395 }
    396 
    397 declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, i8*, <8 x i32>, i8, i32)
    398 
    399 define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(i8* %b, <8 x i32> %iv) #1 {
    400 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512:
    401 ; CHECK:       # %bb.0: # %entry
    402 ; CHECK-NEXT:    movq %rsp, %rax
    403 ; CHECK-NEXT:    movq $-1, %rcx
    404 ; CHECK-NEXT:    sarq $63, %rax
    405 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    406 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    407 ; CHECK-NEXT:    orq %rax, %rdi
    408 ; CHECK-NEXT:    vmovq %rax, %xmm2
    409 ; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
    410 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    411 ; CHECK-NEXT:    vgatherdpd (%rdi,%ymm0), %zmm1 {%k1}
    412 ; CHECK-NEXT:    shlq $47, %rax
    413 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    414 ; CHECK-NEXT:    orq %rax, %rsp
    415 ; CHECK-NEXT:    retq
    416 entry:
    417   %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
    418   ret <8 x double> %v
    419 }
    420 
    421 declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, i8*, <8 x i64>, i8, i32)
    422 
    423 define <8 x float> @test_llvm_x86_avx512_gather_qps_512(i8* %b, <8 x i64> %iv) #1 {
    424 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512:
    425 ; CHECK:       # %bb.0: # %entry
    426 ; CHECK-NEXT:    movq %rsp, %rax
    427 ; CHECK-NEXT:    movq $-1, %rcx
    428 ; CHECK-NEXT:    sarq $63, %rax
    429 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    430 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    431 ; CHECK-NEXT:    orq %rax, %rdi
    432 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    433 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    434 ; CHECK-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
    435 ; CHECK-NEXT:    shlq $47, %rax
    436 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    437 ; CHECK-NEXT:    orq %rax, %rsp
    438 ; CHECK-NEXT:    retq
    439 entry:
    440   %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
    441   ret <8 x float> %v
    442 }
    443 
    444 declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, i8*, <8 x i64>, i8, i32)
    445 
    446 define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(i8* %b, <8 x i64> %iv) #1 {
    447 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512:
    448 ; CHECK:       # %bb.0: # %entry
    449 ; CHECK-NEXT:    movq %rsp, %rax
    450 ; CHECK-NEXT:    movq $-1, %rcx
    451 ; CHECK-NEXT:    sarq $63, %rax
    452 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    453 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    454 ; CHECK-NEXT:    orq %rax, %rdi
    455 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    456 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    457 ; CHECK-NEXT:    vgatherqpd (%rdi,%zmm0), %zmm1 {%k1}
    458 ; CHECK-NEXT:    shlq $47, %rax
    459 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    460 ; CHECK-NEXT:    orq %rax, %rsp
    461 ; CHECK-NEXT:    retq
    462 entry:
    463   %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
    464   ret <8 x double> %v
    465 }
    466 
    467 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, i16, i32)
    468 
    469 define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(i8* %b, <16 x i32> %iv) #1 {
    470 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512:
    471 ; CHECK:       # %bb.0: # %entry
    472 ; CHECK-NEXT:    movq %rsp, %rax
    473 ; CHECK-NEXT:    movq $-1, %rcx
    474 ; CHECK-NEXT:    sarq $63, %rax
    475 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    476 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    477 ; CHECK-NEXT:    orq %rax, %rdi
    478 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    479 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    480 ; CHECK-NEXT:    vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
    481 ; CHECK-NEXT:    shlq $47, %rax
    482 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
    483 ; CHECK-NEXT:    orq %rax, %rsp
    484 ; CHECK-NEXT:    retq
    485 entry:
    486   %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
    487   ret <16 x i32> %v
    488 }
    489 
    490 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, i8, i32)
    491 
    492 define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(i8* %b, <8 x i32> %iv) #1 {
    493 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512:
    494 ; CHECK:       # %bb.0: # %entry
    495 ; CHECK-NEXT:    movq %rsp, %rax
    496 ; CHECK-NEXT:    movq $-1, %rcx
    497 ; CHECK-NEXT:    sarq $63, %rax
    498 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    499 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    500 ; CHECK-NEXT:    orq %rax, %rdi
    501 ; CHECK-NEXT:    vmovq %rax, %xmm2
    502 ; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
    503 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    504 ; CHECK-NEXT:    vpgatherdq (%rdi,%ymm0), %zmm1 {%k1}
    505 ; CHECK-NEXT:    shlq $47, %rax
    506 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
    507 ; CHECK-NEXT:    orq %rax, %rsp
    508 ; CHECK-NEXT:    retq
    509 entry:
    510   %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
    511   ret <8 x i64> %v
    512 }
    513 
    514 
    515 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, i8, i32)
    516 
    517 define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(i8* %b, <8 x i64> %iv) #1 {
    518 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512:
    519 ; CHECK:       # %bb.0: # %entry
    520 ; CHECK-NEXT:    movq %rsp, %rax
    521 ; CHECK-NEXT:    movq $-1, %rcx
    522 ; CHECK-NEXT:    sarq $63, %rax
    523 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    524 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    525 ; CHECK-NEXT:    orq %rax, %rdi
    526 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    527 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    528 ; CHECK-NEXT:    vpgatherqd (%rdi,%zmm0), %ymm1 {%k1}
    529 ; CHECK-NEXT:    shlq $47, %rax
    530 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    531 ; CHECK-NEXT:    orq %rax, %rsp
    532 ; CHECK-NEXT:    retq
    533 entry:
    534   %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
    535   ret <8 x i32> %v
    536 }
    537 
    538 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, i8, i32)
    539 
    540 define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(i8* %b, <8 x i64> %iv) #1 {
    541 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512:
    542 ; CHECK:       # %bb.0: # %entry
    543 ; CHECK-NEXT:    movq %rsp, %rax
    544 ; CHECK-NEXT:    movq $-1, %rcx
    545 ; CHECK-NEXT:    sarq $63, %rax
    546 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    547 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    548 ; CHECK-NEXT:    orq %rax, %rdi
    549 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
    550 ; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
    551 ; CHECK-NEXT:    vpgatherqq (%rdi,%zmm0), %zmm1 {%k1}
    552 ; CHECK-NEXT:    shlq $47, %rax
    553 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
    554 ; CHECK-NEXT:    orq %rax, %rsp
    555 ; CHECK-NEXT:    retq
    556 entry:
    557   %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
    558   ret <8 x i64> %v
    559 }
    560 
    561 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8*, i32, i32);
    562 
    563 define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, i8* %b) #1 {
    564 ; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512:
    565 ; CHECK:       # %bb.0: # %entry
    566 ; CHECK-NEXT:    movq %rsp, %rax
    567 ; CHECK-NEXT:    movq $-1, %rcx
    568 ; CHECK-NEXT:    sarq $63, %rax
    569 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    570 ; CHECK-NEXT:    orq %rax, %rdi
    571 ; CHECK-NEXT:    vpbroadcastq %rax, %zmm1
    572 ; CHECK-NEXT:    vporq %zmm0, %zmm1, %zmm0
    573 ; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
    574 ; CHECK-NEXT:    shlq $47, %rax
    575 ; CHECK-NEXT:    orq %rax, %rsp
    576 ; CHECK-NEXT:    vzeroupper
    577 ; CHECK-NEXT:    retq
    578 entry:
    579   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, i8* %b, i32 4, i32 3)
    580   ret void
    581 }
    582 
    583 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
    584 
    585 define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(i8* %b, <4 x i32> %iv) #2 {
    586 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf:
    587 ; CHECK:       # %bb.0: # %entry
    588 ; CHECK-NEXT:    movq %rsp, %rax
    589 ; CHECK-NEXT:    movq $-1, %rcx
    590 ; CHECK-NEXT:    sarq $63, %rax
    591 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    592 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    593 ; CHECK-NEXT:    orq %rax, %rdi
    594 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    595 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    596 ; CHECK-NEXT:    vgatherdps (%rdi,%xmm0), %xmm1 {%k1}
    597 ; CHECK-NEXT:    shlq $47, %rax
    598 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    599 ; CHECK-NEXT:    orq %rax, %rsp
    600 ; CHECK-NEXT:    retq
    601 entry:
    602   %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    603   ret <4 x float> %v
    604 }
    605 
    606 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
    607 
    608 define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(i8* %b, <2 x i64> %iv) #2 {
    609 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf:
    610 ; CHECK:       # %bb.0: # %entry
    611 ; CHECK-NEXT:    movq %rsp, %rax
    612 ; CHECK-NEXT:    movq $-1, %rcx
    613 ; CHECK-NEXT:    sarq $63, %rax
    614 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    615 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    616 ; CHECK-NEXT:    orq %rax, %rdi
    617 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    618 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    619 ; CHECK-NEXT:    vgatherqps (%rdi,%xmm0), %xmm1 {%k1}
    620 ; CHECK-NEXT:    shlq $47, %rax
    621 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    622 ; CHECK-NEXT:    orq %rax, %rsp
    623 ; CHECK-NEXT:    retq
    624 entry:
    625   %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
    626   ret <4 x float> %v
    627 }
    628 
    629 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
    630 
    631 define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(i8* %b, <4 x i32> %iv) #2 {
    632 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df:
    633 ; CHECK:       # %bb.0: # %entry
    634 ; CHECK-NEXT:    movq %rsp, %rax
    635 ; CHECK-NEXT:    movq $-1, %rcx
    636 ; CHECK-NEXT:    sarq $63, %rax
    637 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    638 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    639 ; CHECK-NEXT:    orq %rax, %rdi
    640 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    641 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    642 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %xmm1 {%k1}
    643 ; CHECK-NEXT:    shlq $47, %rax
    644 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    645 ; CHECK-NEXT:    orq %rax, %rsp
    646 ; CHECK-NEXT:    retq
    647 entry:
    648   %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    649   ret <2 x double> %v
    650 }
    651 
    652 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
    653 
    654 define <2 x double> @test_llvm_x86_avx512_gather3div2_df(i8* %b, <2 x i64> %iv) #2 {
    655 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df:
    656 ; CHECK:       # %bb.0: # %entry
    657 ; CHECK-NEXT:    movq %rsp, %rax
    658 ; CHECK-NEXT:    movq $-1, %rcx
    659 ; CHECK-NEXT:    sarq $63, %rax
    660 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    661 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    662 ; CHECK-NEXT:    orq %rax, %rdi
    663 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    664 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    665 ; CHECK-NEXT:    vgatherqpd (%rdi,%xmm0), %xmm1 {%k1}
    666 ; CHECK-NEXT:    shlq $47, %rax
    667 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    668 ; CHECK-NEXT:    orq %rax, %rsp
    669 ; CHECK-NEXT:    retq
    670 entry:
    671   %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
    672   ret <2 x double> %v
    673 }
    674 
    675 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
    676 
    677 define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(i8* %b, <8 x i32> %iv) #2 {
    678 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf:
    679 ; CHECK:       # %bb.0: # %entry
    680 ; CHECK-NEXT:    movq %rsp, %rax
    681 ; CHECK-NEXT:    movq $-1, %rcx
    682 ; CHECK-NEXT:    sarq $63, %rax
    683 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    684 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    685 ; CHECK-NEXT:    orq %rax, %rdi
    686 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    687 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    688 ; CHECK-NEXT:    vgatherdps (%rdi,%ymm0), %ymm1 {%k1}
    689 ; CHECK-NEXT:    shlq $47, %rax
    690 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    691 ; CHECK-NEXT:    orq %rax, %rsp
    692 ; CHECK-NEXT:    retq
    693 entry:
    694   %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
    695   ret <8 x float> %v
    696 }
    697 
    698 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
    699 
    700 define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(i8* %b, <4 x i64> %iv) #2 {
    701 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf:
    702 ; CHECK:       # %bb.0: # %entry
    703 ; CHECK-NEXT:    movq %rsp, %rax
    704 ; CHECK-NEXT:    movq $-1, %rcx
    705 ; CHECK-NEXT:    sarq $63, %rax
    706 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    707 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    708 ; CHECK-NEXT:    orq %rax, %rdi
    709 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    710 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    711 ; CHECK-NEXT:    vgatherqps (%rdi,%ymm0), %xmm1 {%k1}
    712 ; CHECK-NEXT:    shlq $47, %rax
    713 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    714 ; CHECK-NEXT:    orq %rax, %rsp
    715 ; CHECK-NEXT:    vzeroupper
    716 ; CHECK-NEXT:    retq
    717 entry:
    718   %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
    719   ret <4 x float> %v
    720 }
    721 
    722 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
    723 
    724 define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(i8* %b, <4 x i32> %iv) #2 {
    725 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df:
    726 ; CHECK:       # %bb.0: # %entry
    727 ; CHECK-NEXT:    movq %rsp, %rax
    728 ; CHECK-NEXT:    movq $-1, %rcx
    729 ; CHECK-NEXT:    sarq $63, %rax
    730 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    731 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    732 ; CHECK-NEXT:    orq %rax, %rdi
    733 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    734 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    735 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %ymm1 {%k1}
    736 ; CHECK-NEXT:    shlq $47, %rax
    737 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    738 ; CHECK-NEXT:    orq %rax, %rsp
    739 ; CHECK-NEXT:    retq
    740 entry:
    741   %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    742   ret <4 x double> %v
    743 }
    744 
    745 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
    746 
    747 define <4 x double> @test_llvm_x86_avx512_gather3div4_df(i8* %b, <4 x i64> %iv) #2 {
    748 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df:
    749 ; CHECK:       # %bb.0: # %entry
    750 ; CHECK-NEXT:    movq %rsp, %rax
    751 ; CHECK-NEXT:    movq $-1, %rcx
    752 ; CHECK-NEXT:    sarq $63, %rax
    753 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    754 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    755 ; CHECK-NEXT:    orq %rax, %rdi
    756 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    757 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    758 ; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
    759 ; CHECK-NEXT:    shlq $47, %rax
    760 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    761 ; CHECK-NEXT:    orq %rax, %rsp
    762 ; CHECK-NEXT:    retq
    763 entry:
    764   %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
    765   ret <4 x double> %v
    766 }
    767 
    768 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
    769 
    770 define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(i8* %b, <4 x i32> %iv) #2 {
    771 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si:
    772 ; CHECK:       # %bb.0: # %entry
    773 ; CHECK-NEXT:    movq %rsp, %rax
    774 ; CHECK-NEXT:    movq $-1, %rcx
    775 ; CHECK-NEXT:    sarq $63, %rax
    776 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    777 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    778 ; CHECK-NEXT:    orq %rax, %rdi
    779 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    780 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    781 ; CHECK-NEXT:    vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
    782 ; CHECK-NEXT:    shlq $47, %rax
    783 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    784 ; CHECK-NEXT:    orq %rax, %rsp
    785 ; CHECK-NEXT:    retq
    786 entry:
    787   %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    788   ret <4 x i32> %v
    789 }
    790 
    791 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
    792 
    793 define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(i8* %b, <2 x i64> %iv) #2 {
    794 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si:
    795 ; CHECK:       # %bb.0: # %entry
    796 ; CHECK-NEXT:    movq %rsp, %rax
    797 ; CHECK-NEXT:    movq $-1, %rcx
    798 ; CHECK-NEXT:    sarq $63, %rax
    799 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    800 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    801 ; CHECK-NEXT:    orq %rax, %rdi
    802 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    803 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    804 ; CHECK-NEXT:    vpgatherqd (%rdi,%xmm0), %xmm1 {%k1}
    805 ; CHECK-NEXT:    shlq $47, %rax
    806 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    807 ; CHECK-NEXT:    orq %rax, %rsp
    808 ; CHECK-NEXT:    retq
    809 entry:
    810   %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
    811   ret <4 x i32> %v
    812 }
    813 
    814 declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
    815 
    816 define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(i8* %b, <4 x i32> %iv) #2 {
    817 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di:
    818 ; CHECK:       # %bb.0: # %entry
    819 ; CHECK-NEXT:    movq %rsp, %rax
    820 ; CHECK-NEXT:    movq $-1, %rcx
    821 ; CHECK-NEXT:    sarq $63, %rax
    822 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    823 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    824 ; CHECK-NEXT:    orq %rax, %rdi
    825 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    826 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    827 ; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %xmm1 {%k1}
    828 ; CHECK-NEXT:    shlq $47, %rax
    829 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    830 ; CHECK-NEXT:    orq %rax, %rsp
    831 ; CHECK-NEXT:    retq
    832 entry:
    833   %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    834   ret <2 x i64> %v
    835 }
    836 
    837 declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
    838 
    839 define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(i8* %b, <2 x i64> %iv) #2 {
    840 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di:
    841 ; CHECK:       # %bb.0: # %entry
    842 ; CHECK-NEXT:    movq %rsp, %rax
    843 ; CHECK-NEXT:    movq $-1, %rcx
    844 ; CHECK-NEXT:    sarq $63, %rax
    845 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    846 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    847 ; CHECK-NEXT:    orq %rax, %rdi
    848 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    849 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    850 ; CHECK-NEXT:    vpgatherqq (%rdi,%xmm0), %xmm1 {%k1}
    851 ; CHECK-NEXT:    shlq $47, %rax
    852 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    853 ; CHECK-NEXT:    orq %rax, %rsp
    854 ; CHECK-NEXT:    retq
    855 entry:
    856   %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
    857   ret <2 x i64> %v
    858 }
    859 
    860 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
    861 
    862 define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(i8* %b, <8 x i32> %iv) #2 {
    863 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si:
    864 ; CHECK:       # %bb.0: # %entry
    865 ; CHECK-NEXT:    movq %rsp, %rax
    866 ; CHECK-NEXT:    movq $-1, %rcx
    867 ; CHECK-NEXT:    sarq $63, %rax
    868 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    869 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    870 ; CHECK-NEXT:    orq %rax, %rdi
    871 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    872 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    873 ; CHECK-NEXT:    vpgatherdd (%rdi,%ymm0), %ymm1 {%k1}
    874 ; CHECK-NEXT:    shlq $47, %rax
    875 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    876 ; CHECK-NEXT:    orq %rax, %rsp
    877 ; CHECK-NEXT:    retq
    878 entry:
    879   %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
    880   ret <8 x i32> %v
    881 }
    882 
    883 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
    884 
    885 define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(i8* %b, <4 x i64> %iv) #2 {
    886 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si:
    887 ; CHECK:       # %bb.0: # %entry
    888 ; CHECK-NEXT:    movq %rsp, %rax
    889 ; CHECK-NEXT:    movq $-1, %rcx
    890 ; CHECK-NEXT:    sarq $63, %rax
    891 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    892 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    893 ; CHECK-NEXT:    orq %rax, %rdi
    894 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    895 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    896 ; CHECK-NEXT:    vpgatherqd (%rdi,%ymm0), %xmm1 {%k1}
    897 ; CHECK-NEXT:    shlq $47, %rax
    898 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    899 ; CHECK-NEXT:    orq %rax, %rsp
    900 ; CHECK-NEXT:    vzeroupper
    901 ; CHECK-NEXT:    retq
    902 entry:
    903   %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
    904   ret <4 x i32> %v
    905 }
    906 
    907 declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
    908 
    909 define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(i8* %b, <4 x i32> %iv) #2 {
    910 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di:
    911 ; CHECK:       # %bb.0: # %entry
    912 ; CHECK-NEXT:    movq %rsp, %rax
    913 ; CHECK-NEXT:    movq $-1, %rcx
    914 ; CHECK-NEXT:    sarq $63, %rax
    915 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    916 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    917 ; CHECK-NEXT:    orq %rax, %rdi
    918 ; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
    919 ; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
    920 ; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %ymm1 {%k1}
    921 ; CHECK-NEXT:    shlq $47, %rax
    922 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    923 ; CHECK-NEXT:    orq %rax, %rsp
    924 ; CHECK-NEXT:    retq
    925 entry:
    926   %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
    927   ret <4 x i64> %v
    928 }
    929 
    930 declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
    931 
    932 define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(i8* %b, <4 x i64> %iv) #2 {
    933 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di:
    934 ; CHECK:       # %bb.0: # %entry
    935 ; CHECK-NEXT:    movq %rsp, %rax
    936 ; CHECK-NEXT:    movq $-1, %rcx
    937 ; CHECK-NEXT:    sarq $63, %rax
    938 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
    939 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    940 ; CHECK-NEXT:    orq %rax, %rdi
    941 ; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
    942 ; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
    943 ; CHECK-NEXT:    vpgatherqq (%rdi,%ymm0), %ymm1 {%k1}
    944 ; CHECK-NEXT:    shlq $47, %rax
    945 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    946 ; CHECK-NEXT:    orq %rax, %rsp
    947 ; CHECK-NEXT:    retq
    948 entry:
    949   %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
    950   ret <4 x i64> %v
    951 }
    952 
    953 attributes #0 = { nounwind "target-features"="+avx2" }
    954 attributes #1 = { nounwind "target-features"="+avx512f" }
    955 attributes #2 = { nounwind "target-features"="+avx512vl" }
    956