Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
      2 
      3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
      4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
      5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
      6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
      7 
      8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
      9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
     10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
     11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
     12 
     13 ;CHECK-LABEL: gather_mask_dps
     14 ;CHECK: kmovw
     15 ;CHECK: vgatherdps
     16 ;CHECK: vpadd
     17 ;CHECK: vscatterdps
     18 ;CHECK: ret
     19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
     20   %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
     21   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     22   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
     23   ret void
     24 }
     25 
     26 ;CHECK-LABEL: gather_mask_dpd
     27 ;CHECK: kmovw
     28 ;CHECK: vgatherdpd
     29 ;CHECK: vpadd
     30 ;CHECK: vscatterdpd
     31 ;CHECK: ret
     32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     33   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
     34   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     35   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
     36   ret void
     37 }
     38 
     39 ;CHECK-LABEL: gather_mask_qps
     40 ;CHECK: kmovw
     41 ;CHECK: vgatherqps
     42 ;CHECK: vpadd
     43 ;CHECK: vscatterqps
     44 ;CHECK: ret
     45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     46   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
     47   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
     48   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
     49   ret void
     50 }
     51 
     52 ;CHECK-LABEL: gather_mask_qpd
     53 ;CHECK: kmovw
     54 ;CHECK: vgatherqpd
     55 ;CHECK: vpadd
     56 ;CHECK: vscatterqpd
     57 ;CHECK: ret
     58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     59   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
     60   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
     61   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
     62   ret void
     63 }
     64 ;;
     65 ;; Integer Gather/Scatter
     66 ;;
     67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
     68 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
     69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
     70 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
     71 
     72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
     73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
     74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
     75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
     76 
     77 ;CHECK-LABEL: gather_mask_dd
     78 ;CHECK: kmovw
     79 ;CHECK: vpgatherdd
     80 ;CHECK: vpadd
     81 ;CHECK: vpscatterdd
     82 ;CHECK: ret
     83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
     84   %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
     85   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     86   call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
     87   ret void
     88 }
     89 
     90 ;CHECK-LABEL: gather_mask_qd
     91 ;CHECK: kmovw
     92 ;CHECK: vpgatherqd
     93 ;CHECK: vpadd
     94 ;CHECK: vpscatterqd
     95 ;CHECK: ret
     96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
     97   %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
     98   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
     99   call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
    100   ret void
    101 }
    102 
    103 ;CHECK-LABEL: gather_mask_qq
    104 ;CHECK: kmovw
    105 ;CHECK: vpgatherqq
    106 ;CHECK: vpadd
    107 ;CHECK: vpscatterqq
    108 ;CHECK: ret
    109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
    110   %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    111   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
    112   call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
    113   ret void
    114 }
    115 
    116 ;CHECK-LABEL: gather_mask_dq
    117 ;CHECK: kmovw
    118 ;CHECK: vpgatherdq
    119 ;CHECK: vpadd
    120 ;CHECK: vpscatterdq
    121 ;CHECK: ret
    122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
    123   %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
    124   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    125   call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
    126   ret void
    127 }
    128 
    129 
    130 ;CHECK-LABEL: gather_mask_dpd_execdomain
    131 ;CHECK: vgatherdpd
    132 ;CHECK: vmovapd
    133 ;CHECK: ret
    134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
    135   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
    136   store <8 x double> %x, <8 x double>* %stbuf
    137   ret void
    138 }
    139 
    140 ;CHECK-LABEL: gather_mask_qpd_execdomain
    141 ;CHECK: vgatherqpd
    142 ;CHECK: vmovapd
    143 ;CHECK: ret
    144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
    145   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    146   store <8 x double> %x, <8 x double>* %stbuf
    147   ret void
    148 }
    149 
    150 ;CHECK-LABEL: gather_mask_dps_execdomain
    151 ;CHECK: vgatherdps
    152 ;CHECK: vmovaps 
    153 ;CHECK: ret
    154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
    155   %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
    156   ret <16 x float> %res;
    157 }
    158 
    159 ;CHECK-LABEL: gather_mask_qps_execdomain
    160 ;CHECK: vgatherqps
    161 ;CHECK: vmovaps
    162 ;CHECK: ret
    163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
    164   %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
    165   ret <8 x float> %res;
    166 }
    167 
    168 ;CHECK-LABEL: scatter_mask_dpd_execdomain
    169 ;CHECK: vmovapd
    170 ;CHECK: vscatterdpd
    171 ;CHECK: ret
    172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    173   %x = load <8 x double>, <8 x double>* %src, align 64 
    174   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
    175   ret void
    176 }
    177 
    178 ;CHECK-LABEL: scatter_mask_qpd_execdomain
    179 ;CHECK: vmovapd
    180 ;CHECK: vscatterqpd
    181 ;CHECK: ret
    182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    183   %x = load <8 x double>, <8 x double>* %src, align 64
    184   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
    185   ret void
    186 }
    187 
    188 ;CHECK-LABEL: scatter_mask_dps_execdomain
    189 ;CHECK: vmovaps
    190 ;CHECK: vscatterdps
    191 ;CHECK: ret
    192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
    193   %x = load <16 x float>, <16 x float>* %src, align 64
    194   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
    195   ret void
    196 }
    197 
    198 ;CHECK-LABEL: scatter_mask_qps_execdomain
    199 ;CHECK: vmovaps
    200 ;CHECK: vscatterqps
    201 ;CHECK: ret
    202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
    203   %x = load <8 x float>, <8 x float>* %src, align 32 
    204   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
    205   ret void
    206 }
    207 
    208 ;CHECK-LABEL: gather_qps
    209 ;CHECK: kxnorw
    210 ;CHECK: vgatherqps
    211 ;CHECK: vpadd
    212 ;CHECK: vscatterqps
    213 ;CHECK: ret
    214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
    215   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
    216   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
    217   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
    218   ret void
    219 }
    220 
    221 ;CHECK-LABEL: prefetch
    222 ;CHECK: gatherpf0
    223 ;CHECK: gatherpf1
    224 ;CHECK: scatterpf0
    225 ;CHECK: scatterpf1
    226 ;CHECK: ret
    227 declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
    228 declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
    229 define void @prefetch(<8 x i64> %ind, i8* %base) {
    230   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
    231   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
    232   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
    233   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
    234   ret void
    235 }
    236