1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=cannonlake | FileCheck %s 3 4 ; These test cases demonstrate cases where vpermt2/vpermi2 could benefit from being commuted. 5 6 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 7 8 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p) { 9 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 10 ; CHECK: ## %bb.0: 11 ; CHECK-NEXT: vpermt2d (%rdi), %zmm1, %zmm0 12 ; CHECK-NEXT: retq 13 %x2 = load <16 x i32>, <16 x i32>* %x2p 14 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 15 ret <16 x i32> %res 16 } 17 18 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 19 20 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) { 21 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 22 ; CHECK: ## %bb.0: 23 ; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 24 ; CHECK-NEXT: retq 25 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 26 ret <8 x double> %res 27 } 28 29 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 30 31 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) { 32 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 33 ; CHECK: ## %bb.0: 34 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 35 ; CHECK-NEXT: retq 36 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 37 ret <16 x float> %res 38 } 39 40 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 41 42 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 43 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 44 ; CHECK: ## %bb.0: 45 ; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 46 ; CHECK-NEXT: retq 47 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 48 ret <8 x i64> %res 49 } 50 51 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 52 53 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 54 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 55 ; CHECK: ## %bb.0: 56 ; CHECK-NEXT: kmovd %esi, %k1 57 ; CHECK-NEXT: vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z} 58 ; CHECK-NEXT: retq 59 %x2 = load <16 x i32>, <16 x i32>* %x2p 60 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 61 ret <16 x i32> %res 62 } 63 64 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 65 66 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 67 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 68 ; CHECK: ## %bb.0: 69 ; CHECK-NEXT: kmovd %esi, %k1 70 ; CHECK-NEXT: vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} 71 ; CHECK-NEXT: retq 72 %x2s = load double, double* %x2ptr 73 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 74 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 75 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 76 ret <8 x double> %res 77 } 78 79 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 80 81 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 82 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 83 ; CHECK: ## %bb.0: 84 ; CHECK-NEXT: kmovd %edi, %k1 85 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} 86 ; CHECK-NEXT: retq 87 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 88 ret <16 x float> %res 89 } 90 91 92 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 93 94 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 95 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 96 ; CHECK: ## %bb.0: 97 ; CHECK-NEXT: kmovd %edi, %k1 98 ; CHECK-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} 99 ; CHECK-NEXT: retq 100 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 101 ret <8 x i64> %res 102 } 103 104 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 105 106 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 107 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 108 ; CHECK: ## %bb.0: 109 ; CHECK-NEXT: vpermi2d %zmm2, %zmm1, %zmm0 110 ; CHECK-NEXT: retq 111 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 112 ret <16 x i32> %res 113 } 114 115 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 116 117 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 118 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 119 ; CHECK: ## %bb.0: 120 ; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 121 ; CHECK-NEXT: retq 122 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 123 ret <4 x i32> %res 124 } 125 126 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 127 128 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 129 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 130 ; CHECK: ## %bb.0: 131 ; CHECK-NEXT: kmovd %edi, %k1 132 ; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} 133 ; CHECK-NEXT: retq 134 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 135 ret <4 x i32> %res 136 } 137 138 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast(<4 x i32> %x0, <4 x i32> %x1, i32* %x2ptr, i8 %x3) { 139 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast: 140 ; CHECK: ## %bb.0: 141 ; CHECK-NEXT: kmovd %esi, %k1 142 ; CHECK-NEXT: vpermi2d (%rdi){1to4}, %xmm1, %xmm0 {%k1} {z} 143 ; CHECK-NEXT: retq 144 %x2s = load i32, i32* %x2ptr 145 %x2ins = insertelement <4 x i32> undef, i32 %x2s, i32 0 146 %x2 = shufflevector <4 x i32> %x2ins, <4 x i32> undef, <4 x i32> zeroinitializer 147 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 148 ret <4 x i32> %res 149 } 150 151 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 152 153 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 154 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 155 ; CHECK: ## %bb.0: 156 ; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 157 ; CHECK-NEXT: retq 158 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 159 ret <8 x i32> %res 160 } 161 162 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 163 164 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 165 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 166 ; CHECK: ## %bb.0: 167 ; CHECK-NEXT: kmovd %edi, %k1 168 ; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} 169 ; CHECK-NEXT: retq 170 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 171 ret <8 x i32> %res 172 } 173 174 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 175 176 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) { 177 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 178 ; CHECK: ## %bb.0: 179 ; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 180 ; CHECK-NEXT: retq 181 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 182 ret <2 x double> %res 183 } 184 185 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 186 187 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) { 188 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 189 ; CHECK: ## %bb.0: 190 ; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 191 ; CHECK-NEXT: retq 192 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 193 ret <4 x double> %res 194 } 195 196 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 197 198 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) { 199 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 200 ; CHECK: ## %bb.0: 201 ; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 202 ; CHECK-NEXT: retq 203 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 204 ret <4 x float> %res 205 } 206 207 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 208 209 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) { 210 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 211 ; CHECK: ## %bb.0: 212 ; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 213 ; CHECK-NEXT: retq 214 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 215 ret <8 x float> %res 216 } 217 218 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_load(<8 x float> %x0, <8 x i32> %x1, <8 x float>* %x2p) { 219 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_load: 220 ; CHECK: ## %bb.0: 221 ; CHECK-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0 222 ; CHECK-NEXT: retq 223 %x2 = load <8 x float>, <8 x float>* %x2p 224 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 225 ret <8 x float> %res 226 } 227 228 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast(<8 x float> %x0, <8 x i32> %x1, float* %x2ptr) { 229 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast: 230 ; CHECK: ## %bb.0: 231 ; CHECK-NEXT: vpermt2ps (%rdi){1to8}, %ymm1, %ymm0 232 ; CHECK-NEXT: retq 233 %x2s = load float, float* %x2ptr 234 %x2ins = insertelement <8 x float> undef, float %x2s, i32 0 235 %x2 = shufflevector <8 x float> %x2ins, <8 x float> undef, <8 x i32> zeroinitializer 236 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 237 ret <8 x float> %res 238 } 239 240 declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 241 242 define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 243 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: 244 ; CHECK: ## %bb.0: 245 ; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 246 ; CHECK-NEXT: retq 247 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 248 ret <16 x i8> %res 249 } 250 251 declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 252 253 define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 254 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: 255 ; CHECK: ## %bb.0: 256 ; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 257 ; CHECK-NEXT: retq 258 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 259 ret <32 x i8> %res 260 } 261 262 declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 263 264 define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 265 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: 266 ; CHECK: ## %bb.0: 267 ; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 268 ; CHECK-NEXT: retq 269 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 270 ret <16 x i8> %res 271 } 272 273 define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p) { 274 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128_load: 275 ; CHECK: ## %bb.0: 276 ; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 277 ; CHECK-NEXT: retq 278 %x2 = load <16 x i8>, <16 x i8>* %x2p 279 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 280 ret <16 x i8> %res 281 } 282 283 declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 284 285 define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 286 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: 287 ; CHECK: ## %bb.0: 288 ; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 289 ; CHECK-NEXT: retq 290 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 291 ret <32 x i8> %res 292 } 293 294 declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 295 296 define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 297 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: 298 ; CHECK: ## %bb.0: 299 ; CHECK-NEXT: kmovd %edi, %k1 300 ; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} 301 ; CHECK-NEXT: retq 302 %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 303 ret <16 x i8> %res 304 } 305 306 define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p, i16 %x3) { 307 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128_load: 308 ; CHECK: ## %bb.0: 309 ; CHECK-NEXT: kmovd %esi, %k1 310 ; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 {%k1} {z} 311 ; CHECK-NEXT: retq 312 %x2 = load <16 x i8>, <16 x i8>* %x2p 313 %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 314 ret <16 x i8> %res 315 } 316 317 declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 318 319 define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 320 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256: 321 ; CHECK: ## %bb.0: 322 ; CHECK-NEXT: kmovd %edi, %k1 323 ; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} 324 ; CHECK-NEXT: retq 325 %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 326 ret <32 x i8> %res 327 } 328 329 define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256_load(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>* %x2p, i32 %x3) { 330 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256_load: 331 ; CHECK: ## %bb.0: 332 ; CHECK-NEXT: kmovd %esi, %k1 333 ; CHECK-NEXT: vpermi2b (%rdi), %ymm1, %ymm0 {%k1} {z} 334 ; CHECK-NEXT: retq 335 %x2 = load <32 x i8>, <32 x i8>* %x2p 336 %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 337 ret <32 x i8> %res 338 } 339