1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) 4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) 5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) 6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) 7 8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) 9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) 10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) 11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) 12 13 ;CHECK-LABEL: gather_mask_dps 14 ;CHECK: kmovw 15 ;CHECK: vgatherdps 16 ;CHECK: vpadd 17 ;CHECK: vscatterdps 18 ;CHECK: ret 19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { 20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 22 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) 23 ret void 24 } 25 26 ;CHECK-LABEL: gather_mask_dpd 27 ;CHECK: kmovw 28 ;CHECK: vgatherdpd 29 ;CHECK: vpadd 30 ;CHECK: vscatterdpd 31 ;CHECK: ret 32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 35 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) 36 ret void 37 } 38 39 ;CHECK-LABEL: gather_mask_qps 40 ;CHECK: kmovw 41 ;CHECK: vgatherqps 42 ;CHECK: vpadd 43 ;CHECK: vscatterqps 44 ;CHECK: ret 45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { 46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 48 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) 49 ret void 50 } 51 52 ;CHECK-LABEL: gather_mask_qpd 53 ;CHECK: kmovw 54 ;CHECK: vgatherqpd 55 ;CHECK: vpadd 56 ;CHECK: vscatterqpd 57 ;CHECK: ret 58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 61 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) 62 ret void 63 } 64 ;; 65 ;; Integer Gather/Scatter 66 ;; 67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32) 68 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32) 69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32) 70 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32) 71 72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) 73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) 74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) 75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) 76 77 ;CHECK-LABEL: gather_mask_dd 78 ;CHECK: kmovw 79 ;CHECK: vpgatherdd 80 ;CHECK: vpadd 81 ;CHECK: vpscatterdd 82 ;CHECK: ret 83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { 84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 86 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) 87 ret void 88 } 89 90 ;CHECK-LABEL: gather_mask_qd 91 ;CHECK: kmovw 92 ;CHECK: vpgatherqd 93 ;CHECK: vpadd 94 ;CHECK: vpscatterqd 95 ;CHECK: ret 96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { 97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 99 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) 100 ret void 101 } 102 103 ;CHECK-LABEL: gather_mask_qq 104 ;CHECK: kmovw 105 ;CHECK: vpgatherqq 106 ;CHECK: vpadd 107 ;CHECK: vpscatterqq 108 ;CHECK: ret 109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 112 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) 113 ret void 114 } 115 116 ;CHECK-LABEL: gather_mask_dq 117 ;CHECK: kmovw 118 ;CHECK: vpgatherdq 119 ;CHECK: vpadd 120 ;CHECK: vpscatterdq 121 ;CHECK: ret 122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 125 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) 126 ret void 127 } 128 129 130 ;CHECK-LABEL: gather_mask_dpd_execdomain 131 ;CHECK: vgatherdpd 132 ;CHECK: vmovapd 133 ;CHECK: ret 134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 135 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 136 store <8 x double> %x, <8 x double>* %stbuf 137 ret void 138 } 139 140 ;CHECK-LABEL: gather_mask_qpd_execdomain 141 ;CHECK: vgatherqpd 142 ;CHECK: vmovapd 143 ;CHECK: ret 144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 145 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 146 store <8 x double> %x, <8 x double>* %stbuf 147 ret void 148 } 149 150 ;CHECK-LABEL: gather_mask_dps_execdomain 151 ;CHECK: vgatherdps 152 ;CHECK: vmovaps 153 ;CHECK: ret 154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { 155 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 156 ret <16 x float> %res; 157 } 158 159 ;CHECK-LABEL: gather_mask_qps_execdomain 160 ;CHECK: vgatherqps 161 ;CHECK: vmovaps 162 ;CHECK: ret 163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { 164 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 165 ret <8 x float> %res; 166 } 167 168 ;CHECK-LABEL: scatter_mask_dpd_execdomain 169 ;CHECK: vmovapd 170 ;CHECK: vscatterdpd 171 ;CHECK: ret 172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 173 %x = load <8 x double>, <8 x double>* %src, align 64 174 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) 175 ret void 176 } 177 178 ;CHECK-LABEL: scatter_mask_qpd_execdomain 179 ;CHECK: vmovapd 180 ;CHECK: vscatterqpd 181 ;CHECK: ret 182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 183 %x = load <8 x double>, <8 x double>* %src, align 64 184 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) 185 ret void 186 } 187 188 ;CHECK-LABEL: scatter_mask_dps_execdomain 189 ;CHECK: vmovaps 190 ;CHECK: vscatterdps 191 ;CHECK: ret 192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { 193 %x = load <16 x float>, <16 x float>* %src, align 64 194 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) 195 ret void 196 } 197 198 ;CHECK-LABEL: scatter_mask_qps_execdomain 199 ;CHECK: vmovaps 200 ;CHECK: vscatterqps 201 ;CHECK: ret 202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { 203 %x = load <8 x float>, <8 x float>* %src, align 32 204 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) 205 ret void 206 } 207 208 ;CHECK-LABEL: gather_qps 209 ;CHECK: kxnorw 210 ;CHECK: vgatherqps 211 ;CHECK: vpadd 212 ;CHECK: vscatterqps 213 ;CHECK: ret 214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { 215 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) 216 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 217 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) 218 ret void 219 } 220 221 ;CHECK-LABEL: prefetch 222 ;CHECK: gatherpf0 223 ;CHECK: gatherpf1 224 ;CHECK: scatterpf0 225 ;CHECK: scatterpf1 226 ;CHECK: ret 227 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 228 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 229 define void @prefetch(<8 x i64> %ind, i8* %base) { 230 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) 231 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) 232 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) 233 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) 234 ret void 235 } 236