Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
      3 
      4 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
      5 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
      6 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
      7 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
      8 
      9 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
     10 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
     11 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
     12 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
     13 
     14 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
     15 ; CHECK-LABEL: gather_mask_dps:
     16 ; CHECK:       ## BB#0:
     17 ; CHECK-NEXT:    kmovw %edi, %k1
     18 ; CHECK-NEXT:    kmovq %k1, %k2
     19 ; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
     20 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
     21 ; CHECK-NEXT:    vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
     22 ; CHECK-NEXT:    retq
     23   %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
     24   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     25   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
     26   ret void
     27 }
     28 
     29 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     30 ; CHECK-LABEL: gather_mask_dpd:
     31 ; CHECK:       ## BB#0:
     32 ; CHECK-NEXT:    kmovb %edi, %k1
     33 ; CHECK-NEXT:    kmovq %k1, %k2
     34 ; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
     35 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
     36 ; CHECK-NEXT:    vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
     37 ; CHECK-NEXT:    retq
     38   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
     39   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     40   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
     41   ret void
     42 }
     43 
     44 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     45 ; CHECK-LABEL: gather_mask_qps:
     46 ; CHECK:       ## BB#0:
     47 ; CHECK-NEXT:    kmovb %edi, %k1
     48 ; CHECK-NEXT:    kmovq %k1, %k2
     49 ; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
     50 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
     51 ; CHECK-NEXT:    vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
     52 ; CHECK-NEXT:    retq
     53   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
     54   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
     55   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
     56   ret void
     57 }
     58 
     59 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     60 ; CHECK-LABEL: gather_mask_qpd:
     61 ; CHECK:       ## BB#0:
     62 ; CHECK-NEXT:    kmovb %edi, %k1
     63 ; CHECK-NEXT:    kmovq %k1, %k2
     64 ; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
     65 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
     66 ; CHECK-NEXT:    vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
     67 ; CHECK-NEXT:    retq
     68   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
     69   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
     70   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
     71   ret void
     72 }
     73 ;;
     74 ;; Integer Gather/Scatter
     75 ;;
     76 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
     77 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
     78 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
     79 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
     80 
     81 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
     82 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
     83 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
     84 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
     85 
     86 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
     87 ; CHECK-LABEL: gather_mask_dd:
     88 ; CHECK:       ## BB#0:
     89 ; CHECK-NEXT:    kmovw %edi, %k1
     90 ; CHECK-NEXT:    kmovq %k1, %k2
     91 ; CHECK-NEXT:    vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
     92 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
     93 ; CHECK-NEXT:    vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
     94 ; CHECK-NEXT:    retq
     95   %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
     96   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     97   call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
     98   ret void
     99 }
    100 
    101 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
    102 ; CHECK-LABEL: gather_mask_qd:
    103 ; CHECK:       ## BB#0:
    104 ; CHECK-NEXT:    kmovb %edi, %k1
    105 ; CHECK-NEXT:    kmovq %k1, %k2
    106 ; CHECK-NEXT:    vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
    107 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
    108 ; CHECK-NEXT:    vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
    109 ; CHECK-NEXT:    retq
    110   %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    111   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
    112   call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
    113   ret void
    114 }
    115 
    116 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
    117 ; CHECK-LABEL: gather_mask_qq:
    118 ; CHECK:       ## BB#0:
    119 ; CHECK-NEXT:    kmovb %edi, %k1
    120 ; CHECK-NEXT:    kmovq %k1, %k2
    121 ; CHECK-NEXT:    vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
    122 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
    123 ; CHECK-NEXT:    vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
    124 ; CHECK-NEXT:    retq
    125   %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    126   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
    127   call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
    128   ret void
    129 }
    130 
    131 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
    132 ; CHECK-LABEL: gather_mask_dq:
    133 ; CHECK:       ## BB#0:
    134 ; CHECK-NEXT:    kmovb %edi, %k1
    135 ; CHECK-NEXT:    kmovq %k1, %k2
    136 ; CHECK-NEXT:    vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
    137 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
    138 ; CHECK-NEXT:    vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
    139 ; CHECK-NEXT:    retq
    140   %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
    141   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    142   call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
    143   ret void
    144 }
    145 
    146 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
    147 ; CHECK-LABEL: gather_mask_dpd_execdomain:
    148 ; CHECK:       ## BB#0:
    149 ; CHECK-NEXT:    kmovb %edi, %k1
    150 ; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
    151 ; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
    152 ; CHECK-NEXT:    retq
    153   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
    154   store <8 x double> %x, <8 x double>* %stbuf
    155   ret void
    156 }
    157 
    158 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
    159 ; CHECK-LABEL: gather_mask_qpd_execdomain:
    160 ; CHECK:       ## BB#0:
    161 ; CHECK-NEXT:    kmovb %edi, %k1
    162 ; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
    163 ; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
    164 ; CHECK-NEXT:    retq
    165   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    166   store <8 x double> %x, <8 x double>* %stbuf
    167   ret void
    168 }
    169 
    170 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
    171 ; CHECK-LABEL: gather_mask_dps_execdomain:
    172 ; CHECK:       ## BB#0:
    173 ; CHECK-NEXT:    kmovw %edi, %k1
    174 ; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
    175 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    176 ; CHECK-NEXT:    retq
    177   %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
    178   ret <16 x float> %res;
    179 }
    180 
    181 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
    182 ; CHECK-LABEL: gather_mask_qps_execdomain:
    183 ; CHECK:       ## BB#0:
    184 ; CHECK-NEXT:    kmovb %edi, %k1
    185 ; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
    186 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    187 ; CHECK-NEXT:    retq
    188   %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    189   ret <8 x float> %res;
    190 }
    191 
    192 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    193 ; CHECK-LABEL: scatter_mask_dpd_execdomain:
    194 ; CHECK:       ## BB#0:
    195 ; CHECK-NEXT:    kmovb %esi, %k1
    196 ; CHECK-NEXT:    vmovapd (%rdi), %zmm1
    197 ; CHECK-NEXT:    vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
    198 ; CHECK-NEXT:    retq
    199   %x = load <8 x double>, <8 x double>* %src, align 64
    200   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
    201   ret void
    202 }
    203 
    204 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    205 ; CHECK-LABEL: scatter_mask_qpd_execdomain:
    206 ; CHECK:       ## BB#0:
    207 ; CHECK-NEXT:    kmovb %esi, %k1
    208 ; CHECK-NEXT:    vmovapd (%rdi), %zmm1
    209 ; CHECK-NEXT:    vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
    210 ; CHECK-NEXT:    retq
    211   %x = load <8 x double>, <8 x double>* %src, align 64
    212   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
    213   ret void
    214 }
    215 
    216 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
    217 ; CHECK-LABEL: scatter_mask_dps_execdomain:
    218 ; CHECK:       ## BB#0:
    219 ; CHECK-NEXT:    kmovw %esi, %k1
    220 ; CHECK-NEXT:    vmovaps (%rdi), %zmm1
    221 ; CHECK-NEXT:    vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
    222 ; CHECK-NEXT:    retq
    223   %x = load <16 x float>, <16 x float>* %src, align 64
    224   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
    225   ret void
    226 }
    227 
    228 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    229 ; CHECK-LABEL: scatter_mask_qps_execdomain:
    230 ; CHECK:       ## BB#0:
    231 ; CHECK-NEXT:    kmovb %esi, %k1
    232 ; CHECK-NEXT:    vmovaps (%rdi), %ymm1
    233 ; CHECK-NEXT:    vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
    234 ; CHECK-NEXT:    retq
    235   %x = load <8 x float>, <8 x float>* %src, align 32
    236   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
    237   ret void
    238 }
    239 
    240 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
    241 ; CHECK-LABEL: gather_qps:
    242 ; CHECK:       ## BB#0:
    243 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    244 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    245 ; CHECK-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
    246 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
    247 ; CHECK-NEXT:    vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
    248 ; CHECK-NEXT:    retq
    249   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
    250   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
    251   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
    252   ret void
    253 }
    254 
    255 declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
    256 declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
    257 define void @prefetch(<8 x i64> %ind, i8* %base) {
    258 ; CHECK-LABEL: prefetch:
    259 ; CHECK:       ## BB#0:
    260 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    261 ; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
    262 ; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
    263 ; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
    264 ; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
    265 ; CHECK-NEXT:    retq
    266   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
    267   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
    268   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
    269   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
    270   ret void
    271 }
    272 
    273 
    274 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
    275 
    276 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
    277 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
    278 ; CHECK:       ## BB#0:
    279 ; CHECK-NEXT:    kmovb %esi, %k1
    280 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    281 ; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
    282 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    283 ; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
    284 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    285 ; CHECK-NEXT:    retq
    286   %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
    287   %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
    288   %res2 = fadd <2 x double> %res, %res1
    289   ret <2 x double> %res2
    290 }
    291 
    292 declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
    293 
    294 define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
    295 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
    296 ; CHECK:       ## BB#0:
    297 ; CHECK-NEXT:    kmovb %esi, %k1
    298 ; CHECK-NEXT:    vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
    299 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
    300 ; CHECK-NEXT:    retq
    301   %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
    302   %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
    303   %res2 = add <4 x i32> %res, %res1
    304   ret <4 x i32> %res2
    305 }
    306 
    307 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
    308 
    309 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
    310 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
    311 ; CHECK:       ## BB#0:
    312 ; CHECK-NEXT:    kmovb %esi, %k1
    313 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    314 ; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
    315 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    316 ; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
    317 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
    318 ; CHECK-NEXT:    retq
    319   %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
    320   %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
    321   %res2 = fadd <4 x double> %res, %res1
    322   ret <4 x double> %res2
    323 }
    324 
    325 declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
    326 
    327 define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
    328 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
    329 ; CHECK:       ## BB#0:
    330 ; CHECK-NEXT:    kmovb %esi, %k1
    331 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    332 ; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
    333 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    334 ; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
    335 ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
    336 ; CHECK-NEXT:    retq
    337   %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
    338   %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
    339   %res2 = add <8 x i32> %res, %res1
    340   ret <8 x i32> %res2
    341 }
    342 
    343 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
    344 
    345 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
    346 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
    347 ; CHECK:       ## BB#0:
    348 ; CHECK-NEXT:    kmovb %esi, %k1
    349 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    350 ; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
    351 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    352 ; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
    353 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    354 ; CHECK-NEXT:    retq
    355   %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
    356   %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
    357   %res2 = fadd <4 x float> %res, %res1
    358   ret <4 x float> %res2
    359 }
    360 
    361 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
    362 
    363 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
    364 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
    365 ; CHECK:       ## BB#0:
    366 ; CHECK-NEXT:    kmovb %esi, %k1
    367 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    368 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    369 ; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
    370 ; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
    371 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
    372 ; CHECK-NEXT:    retq
    373   %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
    374   %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
    375   %res2 = add <4 x i32> %res, %res1
    376   ret <4 x i32> %res2
    377 }
    378 
    379 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
    380 
    381 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
    382 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
    383 ; CHECK:       ## BB#0:
    384 ; CHECK-NEXT:    kmovb %esi, %k1
    385 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    386 ; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
    387 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    388 ; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
    389 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    390 ; CHECK-NEXT:    retq
    391   %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
    392   %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
    393   %res2 = fadd <4 x float> %res, %res1
    394   ret <4 x float> %res2
    395 }
    396 
    397 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
    398 
    399 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
    400 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
    401 ; CHECK:       ## BB#0:
    402 ; CHECK-NEXT:    kmovb %esi, %k1
    403 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    404 ; CHECK-NEXT:    kmovq %k1, %k2
    405 ; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
    406 ; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
    407 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
    408 ; CHECK-NEXT:    retq
    409   %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
    410   %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
    411   %res2 = add <4 x i32> %res, %res1
    412   ret <4 x i32> %res2
    413 }
    414 
    415 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
    416 
    417 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    418 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
    419 ; CHECK:       ## BB#0:
    420 ; CHECK-NEXT:    kmovb %esi, %k1
    421 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    422 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
    423 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    424 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
    425 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    426 ; CHECK-NEXT:    retq
    427   %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
    428   %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
    429   %res2 = fadd <2 x double> %res, %res1
    430   ret <2 x double> %res2
    431 }
    432 
    433 declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
    434 
    435 define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    436 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
    437 ; CHECK:       ## BB#0:
    438 ; CHECK-NEXT:    kmovb %esi, %k1
    439 ; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
    440 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
    441 ; CHECK-NEXT:    retq
    442   %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
    443   %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
    444   %res2 = add <4 x i32> %res, %res1
    445   ret <4 x i32> %res2
    446 }
    447 
    448 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
    449 
    450 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    451 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
    452 ; CHECK:       ## BB#0:
    453 ; CHECK-NEXT:    kmovb %esi, %k1
    454 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    455 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
    456 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    457 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
    458 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
    459 ; CHECK-NEXT:    retq
    460   %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
    461   %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
    462   %res2 = fadd <4 x double> %res, %res1
    463   ret <4 x double> %res2
    464 }
    465 
    466 declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
    467 
    468 define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    469 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
    470 ; CHECK:       ## BB#0:
    471 ; CHECK-NEXT:    kmovb %esi, %k1
    472 ; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
    473 ; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
    474 ; CHECK-NEXT:    retq
    475   %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
    476   %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
    477   %res2 = add <8 x i32> %res, %res1
    478   ret <8 x i32> %res2
    479 }
    480 
    481 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
    482 
    483 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    484 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
    485 ; CHECK:       ## BB#0:
    486 ; CHECK-NEXT:    kmovb %esi, %k1
    487 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    488 ; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
    489 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    490 ; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
    491 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    492 ; CHECK-NEXT:    retq
    493   %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
    494   %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
    495   %res2 = fadd <4 x float> %res, %res1
    496   ret <4 x float> %res2
    497 }
    498 
    499 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
    500 
    501 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
    502 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
    503 ; CHECK:       ## BB#0:
    504 ; CHECK-NEXT:    kmovb %esi, %k1
    505 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    506 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    507 ; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
    508 ; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
    509 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
    510 ; CHECK-NEXT:    retq
    511   %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
    512   %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
    513   %res2 = add <4 x i32> %res, %res1
    514   ret <4 x i32> %res2
    515 }
    516 
    517 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
    518 
    519 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
    520 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
    521 ; CHECK:       ## BB#0:
    522 ; CHECK-NEXT:    kmovb %esi, %k1
    523 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    524 ; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
    525 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    526 ; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
    527 ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
    528 ; CHECK-NEXT:    retq
    529   %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
    530   %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
    531   %res2 = fadd <8 x float> %res, %res1
    532   ret <8 x float> %res2
    533 }
    534 
    535 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
    536 
    537 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
    538 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
    539 ; CHECK:       ## BB#0:
    540 ; CHECK-NEXT:    kmovb %esi, %k1
    541 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
    542 ; CHECK-NEXT:    kmovq %k1, %k2
    543 ; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
    544 ; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
    545 ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
    546 ; CHECK-NEXT:    retq
    547   %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
    548   %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
    549   %res2 = add <8 x i32> %res, %res1
    550   ret <8 x i32> %res2
    551 }
    552 
    553 declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
    554 
    555 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
    556 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
    557 ; CHECK:       ## BB#0:
    558 ; CHECK-NEXT:    kmovb %esi, %k1
    559 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    560 ; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
    561 ; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
    562 ; CHECK-NEXT:    retq
    563   call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
    564   call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
    565   ret void
    566 }
    567 
    568 declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
    569 
    570 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
    571 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
    572 ; CHECK:       ## BB#0:
    573 ; CHECK-NEXT:    kmovb %esi, %k1
    574 ; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
    575 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    576 ; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
    577 ; CHECK-NEXT:    retq
    578   call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
    579   call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
    580   ret void
    581 }
    582 
    583 declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
    584 
    585 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
    586 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
    587 ; CHECK:       ## BB#0:
    588 ; CHECK-NEXT:    kmovb %esi, %k1
    589 ; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
    590 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    591 ; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
    592 ; CHECK-NEXT:    retq
    593   call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
    594   call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
    595   ret void
    596 }
    597 
    598 declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
    599 
    600 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
    601 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
    602 ; CHECK:       ## BB#0:
    603 ; CHECK-NEXT:    kmovb %esi, %k1
    604 ; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
    605 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    606 ; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
    607 ; CHECK-NEXT:    retq
    608   call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
    609   call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
    610   ret void
    611 }
    612 
    613 declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
    614 
    615 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
    616 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
    617 ; CHECK:       ## BB#0:
    618 ; CHECK-NEXT:    kmovb %esi, %k1
    619 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
    620 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    621 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
    622 ; CHECK-NEXT:    retq
    623   call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
    624   call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
    625   ret void
    626 }
    627 
    628 declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
    629 
    630 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
    631 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
    632 ; CHECK:       ## BB#0:
    633 ; CHECK-NEXT:    kmovb %esi, %k1
    634 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    635 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
    636 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
    637 ; CHECK-NEXT:    retq
    638   call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
    639   call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
    640   ret void
    641 }
    642 
    643 declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
    644 
    645 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
    646 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
    647 ; CHECK:       ## BB#0:
    648 ; CHECK-NEXT:    kmovb %esi, %k1
    649 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
    650 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    651 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
    652 ; CHECK-NEXT:    retq
    653   call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
    654   call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
    655   ret void
    656 }
    657 
    658 declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
    659 
    660 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
    661 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
    662 ; CHECK:       ## BB#0:
    663 ; CHECK-NEXT:    kmovb %esi, %k1
    664 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
    665 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    666 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
    667 ; CHECK-NEXT:    retq
    668   call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
    669   call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
    670   ret void
    671 }
    672 
    673 declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
    674 
    675 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
    676 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
    677 ; CHECK:       ## BB#0:
    678 ; CHECK-NEXT:    kmovb %esi, %k1
    679 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    680 ; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
    681 ; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
    682 ; CHECK-NEXT:    retq
    683   call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
    684   call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
    685   ret void
    686 }
    687 
    688 declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
    689 
    690 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
    691 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
    692 ; CHECK:       ## BB#0:
    693 ; CHECK-NEXT:    kmovb %esi, %k1
    694 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    695 ; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
    696 ; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
    697 ; CHECK-NEXT:    retq
    698   call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
    699   call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
    700   ret void
    701 }
    702 
    703 declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
    704 
    705 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
    706 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
    707 ; CHECK:       ## BB#0:
    708 ; CHECK-NEXT:    kmovb %esi, %k1
    709 ; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
    710 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    711 ; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
    712 ; CHECK-NEXT:    retq
    713   call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
    714   call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
    715   ret void
    716 }
    717 
    718 declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
    719 
    720 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
    721 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
    722 ; CHECK:       ## BB#0:
    723 ; CHECK-NEXT:    kmovb %esi, %k1
    724 ; CHECK-NEXT:    kxnorw %k2, %k2, %k2
    725 ; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
    726 ; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
    727 ; CHECK-NEXT:    retq
    728   call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
    729   call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
    730   ret void
    731 }
    732 
    733 declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
    734 
    735 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
    736 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
    737 ; CHECK:       ## BB#0:
    738 ; CHECK-NEXT:    kmovb %esi, %k1
    739 ; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
    740 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    741 ; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
    742 ; CHECK-NEXT:    retq
    743   call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
    744   call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
    745   ret void
    746 }
    747 
    748 declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
    749 
    750 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
    751 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
    752 ; CHECK:       ## BB#0:
    753 ; CHECK-NEXT:    kmovb %esi, %k1
    754 ; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
    755 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    756 ; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
    757 ; CHECK-NEXT:    retq
    758   call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
    759   call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
    760   ret void
    761 }
    762 
    763 declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
    764 
    765 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
    766 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
    767 ; CHECK:       ## BB#0:
    768 ; CHECK-NEXT:    kmovb %esi, %k1
    769 ; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
    770 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    771 ; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
    772 ; CHECK-NEXT:    retq
    773   call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
    774   call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
    775   ret void
    776 }
    777 
    778 declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
    779 
    780 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
    781 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
    782 ; CHECK:       ## BB#0:
    783 ; CHECK-NEXT:    kmovb %esi, %k1
    784 ; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
    785 ; CHECK-NEXT:    kxnorw %k1, %k1, %k1
    786 ; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
    787 ; CHECK-NEXT:    retq
    788   call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
    789   call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
    790   ret void
    791 }
    792 
    793