1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,NOBW,NOVBMI,AVX512F 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,NOVBMI,AVX512BW 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512BW,VBMI 5 6 define <8 x i64> @var_shuffle_v8i64(<8 x i64> %v, <8 x i64> %indices) nounwind { 7 ; AVX512-LABEL: var_shuffle_v8i64: 8 ; AVX512: # %bb.0: 9 ; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 10 ; AVX512-NEXT: retq 11 %index0 = extractelement <8 x i64> %indices, i32 0 12 %index1 = extractelement <8 x i64> %indices, i32 1 13 %index2 = extractelement <8 x i64> %indices, i32 2 14 %index3 = extractelement <8 x i64> %indices, i32 3 15 %index4 = extractelement <8 x i64> %indices, i32 4 16 %index5 = extractelement <8 x i64> %indices, i32 5 17 %index6 = extractelement <8 x i64> %indices, i32 6 18 %index7 = extractelement <8 x i64> %indices, i32 7 19 %v0 = extractelement <8 x i64> %v, i64 %index0 20 %v1 = extractelement <8 x i64> %v, i64 %index1 21 %v2 = extractelement <8 x i64> %v, i64 %index2 22 %v3 = extractelement <8 x i64> %v, i64 %index3 23 %v4 = extractelement <8 x i64> %v, i64 %index4 24 %v5 = extractelement <8 x i64> %v, i64 %index5 25 %v6 = extractelement <8 x i64> %v, i64 %index6 26 %v7 = extractelement <8 x i64> %v, i64 %index7 27 %ret0 = insertelement <8 x i64> undef, i64 %v0, i32 0 28 %ret1 = insertelement <8 x i64> %ret0, i64 %v1, i32 1 29 %ret2 = insertelement <8 x i64> %ret1, i64 %v2, i32 2 30 %ret3 = insertelement <8 x i64> %ret2, i64 %v3, i32 3 31 %ret4 = insertelement <8 x i64> %ret3, i64 %v4, i32 4 32 %ret5 = insertelement <8 x i64> %ret4, i64 %v5, i32 5 33 %ret6 = insertelement <8 x i64> %ret5, i64 %v6, i32 6 34 %ret7 = insertelement <8 x i64> %ret6, i64 %v7, i32 7 35 ret <8 x i64> %ret7 36 } 37 38 define <16 x i32> @var_shuffle_v16i32(<16 x i32> %v, <16 x i32> %indices) nounwind { 39 ; AVX512-LABEL: var_shuffle_v16i32: 40 ; AVX512: # %bb.0: 41 ; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 42 ; AVX512-NEXT: retq 43 %index0 = extractelement <16 x i32> %indices, i32 0 44 %index1 = extractelement <16 x i32> %indices, i32 1 45 %index2 = extractelement <16 x i32> %indices, i32 2 46 %index3 = extractelement <16 x i32> %indices, i32 3 47 %index4 = extractelement <16 x i32> %indices, i32 4 48 %index5 = extractelement <16 x i32> %indices, i32 5 49 %index6 = extractelement <16 x i32> %indices, i32 6 50 %index7 = extractelement <16 x i32> %indices, i32 7 51 %index8 = extractelement <16 x i32> %indices, i32 8 52 %index9 = extractelement <16 x i32> %indices, i32 9 53 %index10 = extractelement <16 x i32> %indices, i32 10 54 %index11 = extractelement <16 x i32> %indices, i32 11 55 %index12 = extractelement <16 x i32> %indices, i32 12 56 %index13 = extractelement <16 x i32> %indices, i32 13 57 %index14 = extractelement <16 x i32> %indices, i32 14 58 %index15 = extractelement <16 x i32> %indices, i32 15 59 %v0 = extractelement <16 x i32> %v, i32 %index0 60 %v1 = extractelement <16 x i32> %v, i32 %index1 61 %v2 = extractelement <16 x i32> %v, i32 %index2 62 %v3 = extractelement <16 x i32> %v, i32 %index3 63 %v4 = extractelement <16 x i32> %v, i32 %index4 64 %v5 = extractelement <16 x i32> %v, i32 %index5 65 %v6 = extractelement <16 x i32> %v, i32 %index6 66 %v7 = extractelement <16 x i32> %v, i32 %index7 67 %v8 = extractelement <16 x i32> %v, i32 %index8 68 %v9 = extractelement <16 x i32> %v, i32 %index9 69 %v10 = extractelement <16 x i32> %v, i32 %index10 70 %v11 = extractelement <16 x i32> %v, i32 %index11 71 %v12 = extractelement <16 x i32> %v, i32 %index12 72 %v13 = extractelement <16 x i32> %v, i32 %index13 73 %v14 = extractelement <16 x i32> %v, i32 %index14 74 %v15 = extractelement <16 x i32> %v, i32 %index15 75 %ret0 = insertelement <16 x i32> undef, i32 %v0, i32 0 76 %ret1 = insertelement <16 x i32> %ret0, i32 %v1, i32 1 77 %ret2 = insertelement <16 x i32> %ret1, i32 %v2, i32 2 78 %ret3 = insertelement <16 x i32> %ret2, i32 %v3, i32 3 79 %ret4 = insertelement <16 x i32> %ret3, i32 %v4, i32 4 80 %ret5 = insertelement <16 x i32> %ret4, i32 %v5, i32 5 81 %ret6 = insertelement <16 x i32> %ret5, i32 %v6, i32 6 82 %ret7 = insertelement <16 x i32> %ret6, i32 %v7, i32 7 83 %ret8 = insertelement <16 x i32> %ret7, i32 %v8, i32 8 84 %ret9 = insertelement <16 x i32> %ret8, i32 %v9, i32 9 85 %ret10 = insertelement <16 x i32> %ret9, i32 %v10, i32 10 86 %ret11 = insertelement <16 x i32> %ret10, i32 %v11, i32 11 87 %ret12 = insertelement <16 x i32> %ret11, i32 %v12, i32 12 88 %ret13 = insertelement <16 x i32> %ret12, i32 %v13, i32 13 89 %ret14 = insertelement <16 x i32> %ret13, i32 %v14, i32 14 90 %ret15 = insertelement <16 x i32> %ret14, i32 %v15, i32 15 91 ret <16 x i32> %ret15 92 } 93 94 define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwind { 95 ; NOBW-LABEL: var_shuffle_v32i16: 96 ; NOBW: # %bb.0: 97 ; NOBW-NEXT: pushq %rbp 98 ; NOBW-NEXT: movq %rsp, %rbp 99 ; NOBW-NEXT: andq $-64, %rsp 100 ; NOBW-NEXT: subq $2112, %rsp # imm = 0x840 101 ; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4 102 ; NOBW-NEXT: vmovd %xmm4, %eax 103 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 104 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 105 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 106 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 107 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 108 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 109 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 110 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 111 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 112 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 113 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 114 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 115 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 116 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 117 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 118 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 119 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 120 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 121 ; NOBW-NEXT: andl $31, %eax 122 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 123 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 124 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 125 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 126 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 127 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 128 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 129 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 130 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 131 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 132 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 133 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 134 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 135 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 136 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 137 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 138 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 139 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 140 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 141 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 142 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 143 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 144 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 145 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 146 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 147 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 148 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 149 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 150 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 151 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 152 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 153 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 154 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 155 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 156 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 157 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 158 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 159 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 160 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 161 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 162 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 163 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 164 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 165 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 166 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 167 ; NOBW-NEXT: vmovaps %ymm0, (%rsp) 168 ; NOBW-NEXT: movzwl 1472(%rsp,%rax,2), %eax 169 ; NOBW-NEXT: vmovd %eax, %xmm0 170 ; NOBW-NEXT: vpextrw $1, %xmm4, %eax 171 ; NOBW-NEXT: andl $31, %eax 172 ; NOBW-NEXT: vpinsrw $1, 1408(%rsp,%rax,2), %xmm0, %xmm0 173 ; NOBW-NEXT: vpextrw $2, %xmm4, %eax 174 ; NOBW-NEXT: andl $31, %eax 175 ; NOBW-NEXT: vpinsrw $2, 1344(%rsp,%rax,2), %xmm0, %xmm0 176 ; NOBW-NEXT: vpextrw $3, %xmm4, %eax 177 ; NOBW-NEXT: andl $31, %eax 178 ; NOBW-NEXT: vpinsrw $3, 1280(%rsp,%rax,2), %xmm0, %xmm0 179 ; NOBW-NEXT: vpextrw $4, %xmm4, %eax 180 ; NOBW-NEXT: andl $31, %eax 181 ; NOBW-NEXT: vpinsrw $4, 1216(%rsp,%rax,2), %xmm0, %xmm0 182 ; NOBW-NEXT: vpextrw $5, %xmm4, %eax 183 ; NOBW-NEXT: andl $31, %eax 184 ; NOBW-NEXT: vpinsrw $5, 1152(%rsp,%rax,2), %xmm0, %xmm0 185 ; NOBW-NEXT: vpextrw $6, %xmm4, %eax 186 ; NOBW-NEXT: andl $31, %eax 187 ; NOBW-NEXT: vpinsrw $6, 1088(%rsp,%rax,2), %xmm0, %xmm0 188 ; NOBW-NEXT: vpextrw $7, %xmm4, %eax 189 ; NOBW-NEXT: andl $31, %eax 190 ; NOBW-NEXT: vpinsrw $7, 1024(%rsp,%rax,2), %xmm0, %xmm0 191 ; NOBW-NEXT: vmovd %xmm2, %eax 192 ; NOBW-NEXT: andl $31, %eax 193 ; NOBW-NEXT: movzwl 1984(%rsp,%rax,2), %eax 194 ; NOBW-NEXT: vmovd %eax, %xmm1 195 ; NOBW-NEXT: vpextrw $1, %xmm2, %eax 196 ; NOBW-NEXT: andl $31, %eax 197 ; NOBW-NEXT: vpinsrw $1, 1920(%rsp,%rax,2), %xmm1, %xmm1 198 ; NOBW-NEXT: vpextrw $2, %xmm2, %eax 199 ; NOBW-NEXT: andl $31, %eax 200 ; NOBW-NEXT: vpinsrw $2, 1856(%rsp,%rax,2), %xmm1, %xmm1 201 ; NOBW-NEXT: vpextrw $3, %xmm2, %eax 202 ; NOBW-NEXT: andl $31, %eax 203 ; NOBW-NEXT: vpinsrw $3, 1792(%rsp,%rax,2), %xmm1, %xmm1 204 ; NOBW-NEXT: vpextrw $4, %xmm2, %eax 205 ; NOBW-NEXT: andl $31, %eax 206 ; NOBW-NEXT: vpinsrw $4, 1728(%rsp,%rax,2), %xmm1, %xmm1 207 ; NOBW-NEXT: vpextrw $5, %xmm2, %eax 208 ; NOBW-NEXT: andl $31, %eax 209 ; NOBW-NEXT: vpinsrw $5, 1664(%rsp,%rax,2), %xmm1, %xmm1 210 ; NOBW-NEXT: vpextrw $6, %xmm2, %eax 211 ; NOBW-NEXT: andl $31, %eax 212 ; NOBW-NEXT: vpinsrw $6, 1600(%rsp,%rax,2), %xmm1, %xmm1 213 ; NOBW-NEXT: vpextrw $7, %xmm2, %eax 214 ; NOBW-NEXT: vextracti128 $1, %ymm3, %xmm2 215 ; NOBW-NEXT: andl $31, %eax 216 ; NOBW-NEXT: vpinsrw $7, 1536(%rsp,%rax,2), %xmm1, %xmm1 217 ; NOBW-NEXT: vmovd %xmm2, %eax 218 ; NOBW-NEXT: andl $31, %eax 219 ; NOBW-NEXT: movzwl 448(%rsp,%rax,2), %eax 220 ; NOBW-NEXT: vmovd %eax, %xmm4 221 ; NOBW-NEXT: vpextrw $1, %xmm2, %eax 222 ; NOBW-NEXT: andl $31, %eax 223 ; NOBW-NEXT: vpinsrw $1, 384(%rsp,%rax,2), %xmm4, %xmm4 224 ; NOBW-NEXT: vpextrw $2, %xmm2, %eax 225 ; NOBW-NEXT: andl $31, %eax 226 ; NOBW-NEXT: vpinsrw $2, 320(%rsp,%rax,2), %xmm4, %xmm4 227 ; NOBW-NEXT: vpextrw $3, %xmm2, %eax 228 ; NOBW-NEXT: andl $31, %eax 229 ; NOBW-NEXT: vpinsrw $3, 256(%rsp,%rax,2), %xmm4, %xmm4 230 ; NOBW-NEXT: vpextrw $4, %xmm2, %eax 231 ; NOBW-NEXT: andl $31, %eax 232 ; NOBW-NEXT: vpinsrw $4, 192(%rsp,%rax,2), %xmm4, %xmm4 233 ; NOBW-NEXT: vpextrw $5, %xmm2, %eax 234 ; NOBW-NEXT: andl $31, %eax 235 ; NOBW-NEXT: vpinsrw $5, 128(%rsp,%rax,2), %xmm4, %xmm4 236 ; NOBW-NEXT: vpextrw $6, %xmm2, %eax 237 ; NOBW-NEXT: andl $31, %eax 238 ; NOBW-NEXT: vpinsrw $6, 64(%rsp,%rax,2), %xmm4, %xmm4 239 ; NOBW-NEXT: vpextrw $7, %xmm2, %eax 240 ; NOBW-NEXT: andl $31, %eax 241 ; NOBW-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm2 242 ; NOBW-NEXT: vmovd %xmm3, %eax 243 ; NOBW-NEXT: andl $31, %eax 244 ; NOBW-NEXT: movzwl 960(%rsp,%rax,2), %eax 245 ; NOBW-NEXT: vmovd %eax, %xmm4 246 ; NOBW-NEXT: vpextrw $1, %xmm3, %eax 247 ; NOBW-NEXT: andl $31, %eax 248 ; NOBW-NEXT: vpinsrw $1, 896(%rsp,%rax,2), %xmm4, %xmm4 249 ; NOBW-NEXT: vpextrw $2, %xmm3, %eax 250 ; NOBW-NEXT: andl $31, %eax 251 ; NOBW-NEXT: vpinsrw $2, 832(%rsp,%rax,2), %xmm4, %xmm4 252 ; NOBW-NEXT: vpextrw $3, %xmm3, %eax 253 ; NOBW-NEXT: andl $31, %eax 254 ; NOBW-NEXT: vpinsrw $3, 768(%rsp,%rax,2), %xmm4, %xmm4 255 ; NOBW-NEXT: vpextrw $4, %xmm3, %eax 256 ; NOBW-NEXT: andl $31, %eax 257 ; NOBW-NEXT: vpinsrw $4, 704(%rsp,%rax,2), %xmm4, %xmm4 258 ; NOBW-NEXT: vpextrw $5, %xmm3, %eax 259 ; NOBW-NEXT: andl $31, %eax 260 ; NOBW-NEXT: vpinsrw $5, 640(%rsp,%rax,2), %xmm4, %xmm4 261 ; NOBW-NEXT: vpextrw $6, %xmm3, %eax 262 ; NOBW-NEXT: andl $31, %eax 263 ; NOBW-NEXT: vpinsrw $6, 576(%rsp,%rax,2), %xmm4, %xmm4 264 ; NOBW-NEXT: vpextrw $7, %xmm3, %eax 265 ; NOBW-NEXT: andl $31, %eax 266 ; NOBW-NEXT: vpinsrw $7, 512(%rsp,%rax,2), %xmm4, %xmm3 267 ; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 268 ; NOBW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 269 ; NOBW-NEXT: movq %rbp, %rsp 270 ; NOBW-NEXT: popq %rbp 271 ; NOBW-NEXT: retq 272 ; 273 ; AVX512BW-LABEL: var_shuffle_v32i16: 274 ; AVX512BW: # %bb.0: 275 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 276 ; AVX512BW-NEXT: retq 277 %index0 = extractelement <32 x i16> %indices, i32 0 278 %index1 = extractelement <32 x i16> %indices, i32 1 279 %index2 = extractelement <32 x i16> %indices, i32 2 280 %index3 = extractelement <32 x i16> %indices, i32 3 281 %index4 = extractelement <32 x i16> %indices, i32 4 282 %index5 = extractelement <32 x i16> %indices, i32 5 283 %index6 = extractelement <32 x i16> %indices, i32 6 284 %index7 = extractelement <32 x i16> %indices, i32 7 285 %index8 = extractelement <32 x i16> %indices, i32 8 286 %index9 = extractelement <32 x i16> %indices, i32 9 287 %index10 = extractelement <32 x i16> %indices, i32 10 288 %index11 = extractelement <32 x i16> %indices, i32 11 289 %index12 = extractelement <32 x i16> %indices, i32 12 290 %index13 = extractelement <32 x i16> %indices, i32 13 291 %index14 = extractelement <32 x i16> %indices, i32 14 292 %index15 = extractelement <32 x i16> %indices, i32 15 293 %index16 = extractelement <32 x i16> %indices, i32 16 294 %index17 = extractelement <32 x i16> %indices, i32 17 295 %index18 = extractelement <32 x i16> %indices, i32 18 296 %index19 = extractelement <32 x i16> %indices, i32 19 297 %index20 = extractelement <32 x i16> %indices, i32 20 298 %index21 = extractelement <32 x i16> %indices, i32 21 299 %index22 = extractelement <32 x i16> %indices, i32 22 300 %index23 = extractelement <32 x i16> %indices, i32 23 301 %index24 = extractelement <32 x i16> %indices, i32 24 302 %index25 = extractelement <32 x i16> %indices, i32 25 303 %index26 = extractelement <32 x i16> %indices, i32 26 304 %index27 = extractelement <32 x i16> %indices, i32 27 305 %index28 = extractelement <32 x i16> %indices, i32 28 306 %index29 = extractelement <32 x i16> %indices, i32 29 307 %index30 = extractelement <32 x i16> %indices, i32 30 308 %index31 = extractelement <32 x i16> %indices, i32 31 309 %v0 = extractelement <32 x i16> %v, i16 %index0 310 %v1 = extractelement <32 x i16> %v, i16 %index1 311 %v2 = extractelement <32 x i16> %v, i16 %index2 312 %v3 = extractelement <32 x i16> %v, i16 %index3 313 %v4 = extractelement <32 x i16> %v, i16 %index4 314 %v5 = extractelement <32 x i16> %v, i16 %index5 315 %v6 = extractelement <32 x i16> %v, i16 %index6 316 %v7 = extractelement <32 x i16> %v, i16 %index7 317 %v8 = extractelement <32 x i16> %v, i16 %index8 318 %v9 = extractelement <32 x i16> %v, i16 %index9 319 %v10 = extractelement <32 x i16> %v, i16 %index10 320 %v11 = extractelement <32 x i16> %v, i16 %index11 321 %v12 = extractelement <32 x i16> %v, i16 %index12 322 %v13 = extractelement <32 x i16> %v, i16 %index13 323 %v14 = extractelement <32 x i16> %v, i16 %index14 324 %v15 = extractelement <32 x i16> %v, i16 %index15 325 %v16 = extractelement <32 x i16> %v, i16 %index16 326 %v17 = extractelement <32 x i16> %v, i16 %index17 327 %v18 = extractelement <32 x i16> %v, i16 %index18 328 %v19 = extractelement <32 x i16> %v, i16 %index19 329 %v20 = extractelement <32 x i16> %v, i16 %index20 330 %v21 = extractelement <32 x i16> %v, i16 %index21 331 %v22 = extractelement <32 x i16> %v, i16 %index22 332 %v23 = extractelement <32 x i16> %v, i16 %index23 333 %v24 = extractelement <32 x i16> %v, i16 %index24 334 %v25 = extractelement <32 x i16> %v, i16 %index25 335 %v26 = extractelement <32 x i16> %v, i16 %index26 336 %v27 = extractelement <32 x i16> %v, i16 %index27 337 %v28 = extractelement <32 x i16> %v, i16 %index28 338 %v29 = extractelement <32 x i16> %v, i16 %index29 339 %v30 = extractelement <32 x i16> %v, i16 %index30 340 %v31 = extractelement <32 x i16> %v, i16 %index31 341 %ret0 = insertelement <32 x i16> undef, i16 %v0, i32 0 342 %ret1 = insertelement <32 x i16> %ret0, i16 %v1, i32 1 343 %ret2 = insertelement <32 x i16> %ret1, i16 %v2, i32 2 344 %ret3 = insertelement <32 x i16> %ret2, i16 %v3, i32 3 345 %ret4 = insertelement <32 x i16> %ret3, i16 %v4, i32 4 346 %ret5 = insertelement <32 x i16> %ret4, i16 %v5, i32 5 347 %ret6 = insertelement <32 x i16> %ret5, i16 %v6, i32 6 348 %ret7 = insertelement <32 x i16> %ret6, i16 %v7, i32 7 349 %ret8 = insertelement <32 x i16> %ret7, i16 %v8, i32 8 350 %ret9 = insertelement <32 x i16> %ret8, i16 %v9, i32 9 351 %ret10 = insertelement <32 x i16> %ret9, i16 %v10, i32 10 352 %ret11 = insertelement <32 x i16> %ret10, i16 %v11, i32 11 353 %ret12 = insertelement <32 x i16> %ret11, i16 %v12, i32 12 354 %ret13 = insertelement <32 x i16> %ret12, i16 %v13, i32 13 355 %ret14 = insertelement <32 x i16> %ret13, i16 %v14, i32 14 356 %ret15 = insertelement <32 x i16> %ret14, i16 %v15, i32 15 357 %ret16 = insertelement <32 x i16> %ret15, i16 %v16, i32 16 358 %ret17 = insertelement <32 x i16> %ret16, i16 %v17, i32 17 359 %ret18 = insertelement <32 x i16> %ret17, i16 %v18, i32 18 360 %ret19 = insertelement <32 x i16> %ret18, i16 %v19, i32 19 361 %ret20 = insertelement <32 x i16> %ret19, i16 %v20, i32 20 362 %ret21 = insertelement <32 x i16> %ret20, i16 %v21, i32 21 363 %ret22 = insertelement <32 x i16> %ret21, i16 %v22, i32 22 364 %ret23 = insertelement <32 x i16> %ret22, i16 %v23, i32 23 365 %ret24 = insertelement <32 x i16> %ret23, i16 %v24, i32 24 366 %ret25 = insertelement <32 x i16> %ret24, i16 %v25, i32 25 367 %ret26 = insertelement <32 x i16> %ret25, i16 %v26, i32 26 368 %ret27 = insertelement <32 x i16> %ret26, i16 %v27, i32 27 369 %ret28 = insertelement <32 x i16> %ret27, i16 %v28, i32 28 370 %ret29 = insertelement <32 x i16> %ret28, i16 %v29, i32 29 371 %ret30 = insertelement <32 x i16> %ret29, i16 %v30, i32 30 372 %ret31 = insertelement <32 x i16> %ret30, i16 %v31, i32 31 373 ret <32 x i16> %ret31 374 } 375 376 define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { 377 ; NOBW-LABEL: var_shuffle_v64i8: 378 ; NOBW: # %bb.0: 379 ; NOBW-NEXT: pushq %rbp 380 ; NOBW-NEXT: movq %rsp, %rbp 381 ; NOBW-NEXT: andq $-64, %rsp 382 ; NOBW-NEXT: subq $4160, %rsp # imm = 0x1040 383 ; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4 384 ; NOBW-NEXT: vpextrb $0, %xmm4, %eax 385 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 386 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 387 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 388 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 389 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 390 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 391 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 392 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 393 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 394 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 395 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 396 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 397 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 398 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 399 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 400 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 401 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 402 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 403 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 404 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 405 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 406 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 407 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 408 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 409 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 410 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 411 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 412 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 413 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 414 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 415 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 416 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 417 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 418 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 419 ; NOBW-NEXT: andl $63, %eax 420 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 421 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 422 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 423 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 424 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 425 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 426 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 427 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 428 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 429 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 430 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 431 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 432 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 433 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 434 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 435 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 436 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 437 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 438 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 439 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 440 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 441 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 442 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 443 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 444 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 445 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 446 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 447 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 448 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 449 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 450 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 451 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 452 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 453 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 454 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 455 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 456 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 457 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 458 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 459 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 460 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 461 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 462 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 463 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 464 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 465 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 466 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 467 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 468 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 469 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 470 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 471 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 472 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 473 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 474 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 475 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 476 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 477 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 478 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 479 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 480 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 481 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 482 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 483 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 484 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 485 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 486 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 487 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 488 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 489 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 490 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 491 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 492 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 493 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 494 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 495 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 496 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 497 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 498 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 499 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 500 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 501 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 502 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 503 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 504 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 505 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 506 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 507 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 508 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 509 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 510 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 511 ; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 512 ; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 513 ; NOBW-NEXT: vmovaps %ymm0, (%rsp) 514 ; NOBW-NEXT: movzbl 3008(%rsp,%rax), %eax 515 ; NOBW-NEXT: vmovd %eax, %xmm0 516 ; NOBW-NEXT: vpextrb $1, %xmm4, %eax 517 ; NOBW-NEXT: andl $63, %eax 518 ; NOBW-NEXT: vpinsrb $1, 2944(%rsp,%rax), %xmm0, %xmm0 519 ; NOBW-NEXT: vpextrb $2, %xmm4, %eax 520 ; NOBW-NEXT: andl $63, %eax 521 ; NOBW-NEXT: vpinsrb $2, 2880(%rsp,%rax), %xmm0, %xmm0 522 ; NOBW-NEXT: vpextrb $3, %xmm4, %eax 523 ; NOBW-NEXT: andl $63, %eax 524 ; NOBW-NEXT: vpinsrb $3, 2816(%rsp,%rax), %xmm0, %xmm0 525 ; NOBW-NEXT: vpextrb $4, %xmm4, %eax 526 ; NOBW-NEXT: andl $63, %eax 527 ; NOBW-NEXT: vpinsrb $4, 2752(%rsp,%rax), %xmm0, %xmm0 528 ; NOBW-NEXT: vpextrb $5, %xmm4, %eax 529 ; NOBW-NEXT: andl $63, %eax 530 ; NOBW-NEXT: vpinsrb $5, 2688(%rsp,%rax), %xmm0, %xmm0 531 ; NOBW-NEXT: vpextrb $6, %xmm4, %eax 532 ; NOBW-NEXT: andl $63, %eax 533 ; NOBW-NEXT: vpinsrb $6, 2624(%rsp,%rax), %xmm0, %xmm0 534 ; NOBW-NEXT: vpextrb $7, %xmm4, %eax 535 ; NOBW-NEXT: andl $63, %eax 536 ; NOBW-NEXT: vpinsrb $7, 2560(%rsp,%rax), %xmm0, %xmm0 537 ; NOBW-NEXT: vpextrb $8, %xmm4, %eax 538 ; NOBW-NEXT: andl $63, %eax 539 ; NOBW-NEXT: vpinsrb $8, 2496(%rsp,%rax), %xmm0, %xmm0 540 ; NOBW-NEXT: vpextrb $9, %xmm4, %eax 541 ; NOBW-NEXT: andl $63, %eax 542 ; NOBW-NEXT: vpinsrb $9, 2432(%rsp,%rax), %xmm0, %xmm0 543 ; NOBW-NEXT: vpextrb $10, %xmm4, %eax 544 ; NOBW-NEXT: andl $63, %eax 545 ; NOBW-NEXT: vpinsrb $10, 2368(%rsp,%rax), %xmm0, %xmm0 546 ; NOBW-NEXT: vpextrb $11, %xmm4, %eax 547 ; NOBW-NEXT: andl $63, %eax 548 ; NOBW-NEXT: vpinsrb $11, 2304(%rsp,%rax), %xmm0, %xmm0 549 ; NOBW-NEXT: vpextrb $12, %xmm4, %eax 550 ; NOBW-NEXT: andl $63, %eax 551 ; NOBW-NEXT: vpinsrb $12, 2240(%rsp,%rax), %xmm0, %xmm0 552 ; NOBW-NEXT: vpextrb $13, %xmm4, %eax 553 ; NOBW-NEXT: andl $63, %eax 554 ; NOBW-NEXT: vpinsrb $13, 2176(%rsp,%rax), %xmm0, %xmm0 555 ; NOBW-NEXT: vpextrb $14, %xmm4, %eax 556 ; NOBW-NEXT: andl $63, %eax 557 ; NOBW-NEXT: vpinsrb $14, 2112(%rsp,%rax), %xmm0, %xmm0 558 ; NOBW-NEXT: vpextrb $15, %xmm4, %eax 559 ; NOBW-NEXT: andl $63, %eax 560 ; NOBW-NEXT: vpinsrb $15, 2048(%rsp,%rax), %xmm0, %xmm0 561 ; NOBW-NEXT: vpextrb $0, %xmm2, %eax 562 ; NOBW-NEXT: andl $63, %eax 563 ; NOBW-NEXT: movzbl 4032(%rsp,%rax), %eax 564 ; NOBW-NEXT: vmovd %eax, %xmm1 565 ; NOBW-NEXT: vpextrb $1, %xmm2, %eax 566 ; NOBW-NEXT: andl $63, %eax 567 ; NOBW-NEXT: vpinsrb $1, 3968(%rsp,%rax), %xmm1, %xmm1 568 ; NOBW-NEXT: vpextrb $2, %xmm2, %eax 569 ; NOBW-NEXT: andl $63, %eax 570 ; NOBW-NEXT: vpinsrb $2, 3904(%rsp,%rax), %xmm1, %xmm1 571 ; NOBW-NEXT: vpextrb $3, %xmm2, %eax 572 ; NOBW-NEXT: andl $63, %eax 573 ; NOBW-NEXT: vpinsrb $3, 3840(%rsp,%rax), %xmm1, %xmm1 574 ; NOBW-NEXT: vpextrb $4, %xmm2, %eax 575 ; NOBW-NEXT: andl $63, %eax 576 ; NOBW-NEXT: vpinsrb $4, 3776(%rsp,%rax), %xmm1, %xmm1 577 ; NOBW-NEXT: vpextrb $5, %xmm2, %eax 578 ; NOBW-NEXT: andl $63, %eax 579 ; NOBW-NEXT: vpinsrb $5, 3712(%rsp,%rax), %xmm1, %xmm1 580 ; NOBW-NEXT: vpextrb $6, %xmm2, %eax 581 ; NOBW-NEXT: andl $63, %eax 582 ; NOBW-NEXT: vpinsrb $6, 3648(%rsp,%rax), %xmm1, %xmm1 583 ; NOBW-NEXT: vpextrb $7, %xmm2, %eax 584 ; NOBW-NEXT: andl $63, %eax 585 ; NOBW-NEXT: vpinsrb $7, 3584(%rsp,%rax), %xmm1, %xmm1 586 ; NOBW-NEXT: vpextrb $8, %xmm2, %eax 587 ; NOBW-NEXT: andl $63, %eax 588 ; NOBW-NEXT: vpinsrb $8, 3520(%rsp,%rax), %xmm1, %xmm1 589 ; NOBW-NEXT: vpextrb $9, %xmm2, %eax 590 ; NOBW-NEXT: andl $63, %eax 591 ; NOBW-NEXT: vpinsrb $9, 3456(%rsp,%rax), %xmm1, %xmm1 592 ; NOBW-NEXT: vpextrb $10, %xmm2, %eax 593 ; NOBW-NEXT: andl $63, %eax 594 ; NOBW-NEXT: vpinsrb $10, 3392(%rsp,%rax), %xmm1, %xmm1 595 ; NOBW-NEXT: vpextrb $11, %xmm2, %eax 596 ; NOBW-NEXT: andl $63, %eax 597 ; NOBW-NEXT: vpinsrb $11, 3328(%rsp,%rax), %xmm1, %xmm1 598 ; NOBW-NEXT: vpextrb $12, %xmm2, %eax 599 ; NOBW-NEXT: andl $63, %eax 600 ; NOBW-NEXT: vpinsrb $12, 3264(%rsp,%rax), %xmm1, %xmm1 601 ; NOBW-NEXT: vpextrb $13, %xmm2, %eax 602 ; NOBW-NEXT: andl $63, %eax 603 ; NOBW-NEXT: vpinsrb $13, 3200(%rsp,%rax), %xmm1, %xmm1 604 ; NOBW-NEXT: vpextrb $14, %xmm2, %eax 605 ; NOBW-NEXT: andl $63, %eax 606 ; NOBW-NEXT: vpinsrb $14, 3136(%rsp,%rax), %xmm1, %xmm1 607 ; NOBW-NEXT: vpextrb $15, %xmm2, %eax 608 ; NOBW-NEXT: vextracti128 $1, %ymm3, %xmm2 609 ; NOBW-NEXT: andl $63, %eax 610 ; NOBW-NEXT: vpinsrb $15, 3072(%rsp,%rax), %xmm1, %xmm1 611 ; NOBW-NEXT: vpextrb $0, %xmm2, %eax 612 ; NOBW-NEXT: andl $63, %eax 613 ; NOBW-NEXT: movzbl 960(%rsp,%rax), %eax 614 ; NOBW-NEXT: vmovd %eax, %xmm4 615 ; NOBW-NEXT: vpextrb $1, %xmm2, %eax 616 ; NOBW-NEXT: andl $63, %eax 617 ; NOBW-NEXT: vpinsrb $1, 896(%rsp,%rax), %xmm4, %xmm4 618 ; NOBW-NEXT: vpextrb $2, %xmm2, %eax 619 ; NOBW-NEXT: andl $63, %eax 620 ; NOBW-NEXT: vpinsrb $2, 832(%rsp,%rax), %xmm4, %xmm4 621 ; NOBW-NEXT: vpextrb $3, %xmm2, %eax 622 ; NOBW-NEXT: andl $63, %eax 623 ; NOBW-NEXT: vpinsrb $3, 768(%rsp,%rax), %xmm4, %xmm4 624 ; NOBW-NEXT: vpextrb $4, %xmm2, %eax 625 ; NOBW-NEXT: andl $63, %eax 626 ; NOBW-NEXT: vpinsrb $4, 704(%rsp,%rax), %xmm4, %xmm4 627 ; NOBW-NEXT: vpextrb $5, %xmm2, %eax 628 ; NOBW-NEXT: andl $63, %eax 629 ; NOBW-NEXT: vpinsrb $5, 640(%rsp,%rax), %xmm4, %xmm4 630 ; NOBW-NEXT: vpextrb $6, %xmm2, %eax 631 ; NOBW-NEXT: andl $63, %eax 632 ; NOBW-NEXT: vpinsrb $6, 576(%rsp,%rax), %xmm4, %xmm4 633 ; NOBW-NEXT: vpextrb $7, %xmm2, %eax 634 ; NOBW-NEXT: andl $63, %eax 635 ; NOBW-NEXT: vpinsrb $7, 512(%rsp,%rax), %xmm4, %xmm4 636 ; NOBW-NEXT: vpextrb $8, %xmm2, %eax 637 ; NOBW-NEXT: andl $63, %eax 638 ; NOBW-NEXT: vpinsrb $8, 448(%rsp,%rax), %xmm4, %xmm4 639 ; NOBW-NEXT: vpextrb $9, %xmm2, %eax 640 ; NOBW-NEXT: andl $63, %eax 641 ; NOBW-NEXT: vpinsrb $9, 384(%rsp,%rax), %xmm4, %xmm4 642 ; NOBW-NEXT: vpextrb $10, %xmm2, %eax 643 ; NOBW-NEXT: andl $63, %eax 644 ; NOBW-NEXT: vpinsrb $10, 320(%rsp,%rax), %xmm4, %xmm4 645 ; NOBW-NEXT: vpextrb $11, %xmm2, %eax 646 ; NOBW-NEXT: andl $63, %eax 647 ; NOBW-NEXT: vpinsrb $11, 256(%rsp,%rax), %xmm4, %xmm4 648 ; NOBW-NEXT: vpextrb $12, %xmm2, %eax 649 ; NOBW-NEXT: andl $63, %eax 650 ; NOBW-NEXT: vpinsrb $12, 192(%rsp,%rax), %xmm4, %xmm4 651 ; NOBW-NEXT: vpextrb $13, %xmm2, %eax 652 ; NOBW-NEXT: andl $63, %eax 653 ; NOBW-NEXT: vpinsrb $13, 128(%rsp,%rax), %xmm4, %xmm4 654 ; NOBW-NEXT: vpextrb $14, %xmm2, %eax 655 ; NOBW-NEXT: andl $63, %eax 656 ; NOBW-NEXT: vpinsrb $14, 64(%rsp,%rax), %xmm4, %xmm4 657 ; NOBW-NEXT: vpextrb $15, %xmm2, %eax 658 ; NOBW-NEXT: andl $63, %eax 659 ; NOBW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm4, %xmm2 660 ; NOBW-NEXT: vpextrb $0, %xmm3, %eax 661 ; NOBW-NEXT: andl $63, %eax 662 ; NOBW-NEXT: movzbl 1984(%rsp,%rax), %eax 663 ; NOBW-NEXT: vmovd %eax, %xmm4 664 ; NOBW-NEXT: vpextrb $1, %xmm3, %eax 665 ; NOBW-NEXT: andl $63, %eax 666 ; NOBW-NEXT: vpinsrb $1, 1920(%rsp,%rax), %xmm4, %xmm4 667 ; NOBW-NEXT: vpextrb $2, %xmm3, %eax 668 ; NOBW-NEXT: andl $63, %eax 669 ; NOBW-NEXT: vpinsrb $2, 1856(%rsp,%rax), %xmm4, %xmm4 670 ; NOBW-NEXT: vpextrb $3, %xmm3, %eax 671 ; NOBW-NEXT: andl $63, %eax 672 ; NOBW-NEXT: vpinsrb $3, 1792(%rsp,%rax), %xmm4, %xmm4 673 ; NOBW-NEXT: vpextrb $4, %xmm3, %eax 674 ; NOBW-NEXT: andl $63, %eax 675 ; NOBW-NEXT: vpinsrb $4, 1728(%rsp,%rax), %xmm4, %xmm4 676 ; NOBW-NEXT: vpextrb $5, %xmm3, %eax 677 ; NOBW-NEXT: andl $63, %eax 678 ; NOBW-NEXT: vpinsrb $5, 1664(%rsp,%rax), %xmm4, %xmm4 679 ; NOBW-NEXT: vpextrb $6, %xmm3, %eax 680 ; NOBW-NEXT: andl $63, %eax 681 ; NOBW-NEXT: vpinsrb $6, 1600(%rsp,%rax), %xmm4, %xmm4 682 ; NOBW-NEXT: vpextrb $7, %xmm3, %eax 683 ; NOBW-NEXT: andl $63, %eax 684 ; NOBW-NEXT: vpinsrb $7, 1536(%rsp,%rax), %xmm4, %xmm4 685 ; NOBW-NEXT: vpextrb $8, %xmm3, %eax 686 ; NOBW-NEXT: andl $63, %eax 687 ; NOBW-NEXT: vpinsrb $8, 1472(%rsp,%rax), %xmm4, %xmm4 688 ; NOBW-NEXT: vpextrb $9, %xmm3, %eax 689 ; NOBW-NEXT: andl $63, %eax 690 ; NOBW-NEXT: vpinsrb $9, 1408(%rsp,%rax), %xmm4, %xmm4 691 ; NOBW-NEXT: vpextrb $10, %xmm3, %eax 692 ; NOBW-NEXT: andl $63, %eax 693 ; NOBW-NEXT: vpinsrb $10, 1344(%rsp,%rax), %xmm4, %xmm4 694 ; NOBW-NEXT: vpextrb $11, %xmm3, %eax 695 ; NOBW-NEXT: andl $63, %eax 696 ; NOBW-NEXT: vpinsrb $11, 1280(%rsp,%rax), %xmm4, %xmm4 697 ; NOBW-NEXT: vpextrb $12, %xmm3, %eax 698 ; NOBW-NEXT: andl $63, %eax 699 ; NOBW-NEXT: vpinsrb $12, 1216(%rsp,%rax), %xmm4, %xmm4 700 ; NOBW-NEXT: vpextrb $13, %xmm3, %eax 701 ; NOBW-NEXT: andl $63, %eax 702 ; NOBW-NEXT: vpinsrb $13, 1152(%rsp,%rax), %xmm4, %xmm4 703 ; NOBW-NEXT: vpextrb $14, %xmm3, %eax 704 ; NOBW-NEXT: andl $63, %eax 705 ; NOBW-NEXT: vpinsrb $14, 1088(%rsp,%rax), %xmm4, %xmm4 706 ; NOBW-NEXT: vpextrb $15, %xmm3, %eax 707 ; NOBW-NEXT: andl $63, %eax 708 ; NOBW-NEXT: vpinsrb $15, 1024(%rsp,%rax), %xmm4, %xmm3 709 ; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 710 ; NOBW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 711 ; NOBW-NEXT: movq %rbp, %rsp 712 ; NOBW-NEXT: popq %rbp 713 ; NOBW-NEXT: retq 714 ; 715 ; VBMI-LABEL: var_shuffle_v64i8: 716 ; VBMI: # %bb.0: 717 ; VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 718 ; VBMI-NEXT: retq 719 %index0 = extractelement <64 x i8> %indices, i32 0 720 %index1 = extractelement <64 x i8> %indices, i32 1 721 %index2 = extractelement <64 x i8> %indices, i32 2 722 %index3 = extractelement <64 x i8> %indices, i32 3 723 %index4 = extractelement <64 x i8> %indices, i32 4 724 %index5 = extractelement <64 x i8> %indices, i32 5 725 %index6 = extractelement <64 x i8> %indices, i32 6 726 %index7 = extractelement <64 x i8> %indices, i32 7 727 %index8 = extractelement <64 x i8> %indices, i32 8 728 %index9 = extractelement <64 x i8> %indices, i32 9 729 %index10 = extractelement <64 x i8> %indices, i32 10 730 %index11 = extractelement <64 x i8> %indices, i32 11 731 %index12 = extractelement <64 x i8> %indices, i32 12 732 %index13 = extractelement <64 x i8> %indices, i32 13 733 %index14 = extractelement <64 x i8> %indices, i32 14 734 %index15 = extractelement <64 x i8> %indices, i32 15 735 %index16 = extractelement <64 x i8> %indices, i32 16 736 %index17 = extractelement <64 x i8> %indices, i32 17 737 %index18 = extractelement <64 x i8> %indices, i32 18 738 %index19 = extractelement <64 x i8> %indices, i32 19 739 %index20 = extractelement <64 x i8> %indices, i32 20 740 %index21 = extractelement <64 x i8> %indices, i32 21 741 %index22 = extractelement <64 x i8> %indices, i32 22 742 %index23 = extractelement <64 x i8> %indices, i32 23 743 %index24 = extractelement <64 x i8> %indices, i32 24 744 %index25 = extractelement <64 x i8> %indices, i32 25 745 %index26 = extractelement <64 x i8> %indices, i32 26 746 %index27 = extractelement <64 x i8> %indices, i32 27 747 %index28 = extractelement <64 x i8> %indices, i32 28 748 %index29 = extractelement <64 x i8> %indices, i32 29 749 %index30 = extractelement <64 x i8> %indices, i32 30 750 %index31 = extractelement <64 x i8> %indices, i32 31 751 %index32 = extractelement <64 x i8> %indices, i32 32 752 %index33 = extractelement <64 x i8> %indices, i32 33 753 %index34 = extractelement <64 x i8> %indices, i32 34 754 %index35 = extractelement <64 x i8> %indices, i32 35 755 %index36 = extractelement <64 x i8> %indices, i32 36 756 %index37 = extractelement <64 x i8> %indices, i32 37 757 %index38 = extractelement <64 x i8> %indices, i32 38 758 %index39 = extractelement <64 x i8> %indices, i32 39 759 %index40 = extractelement <64 x i8> %indices, i32 40 760 %index41 = extractelement <64 x i8> %indices, i32 41 761 %index42 = extractelement <64 x i8> %indices, i32 42 762 %index43 = extractelement <64 x i8> %indices, i32 43 763 %index44 = extractelement <64 x i8> %indices, i32 44 764 %index45 = extractelement <64 x i8> %indices, i32 45 765 %index46 = extractelement <64 x i8> %indices, i32 46 766 %index47 = extractelement <64 x i8> %indices, i32 47 767 %index48 = extractelement <64 x i8> %indices, i32 48 768 %index49 = extractelement <64 x i8> %indices, i32 49 769 %index50 = extractelement <64 x i8> %indices, i32 50 770 %index51 = extractelement <64 x i8> %indices, i32 51 771 %index52 = extractelement <64 x i8> %indices, i32 52 772 %index53 = extractelement <64 x i8> %indices, i32 53 773 %index54 = extractelement <64 x i8> %indices, i32 54 774 %index55 = extractelement <64 x i8> %indices, i32 55 775 %index56 = extractelement <64 x i8> %indices, i32 56 776 %index57 = extractelement <64 x i8> %indices, i32 57 777 %index58 = extractelement <64 x i8> %indices, i32 58 778 %index59 = extractelement <64 x i8> %indices, i32 59 779 %index60 = extractelement <64 x i8> %indices, i32 60 780 %index61 = extractelement <64 x i8> %indices, i32 61 781 %index62 = extractelement <64 x i8> %indices, i32 62 782 %index63 = extractelement <64 x i8> %indices, i32 63 783 %v0 = extractelement <64 x i8> %v, i8 %index0 784 %v1 = extractelement <64 x i8> %v, i8 %index1 785 %v2 = extractelement <64 x i8> %v, i8 %index2 786 %v3 = extractelement <64 x i8> %v, i8 %index3 787 %v4 = extractelement <64 x i8> %v, i8 %index4 788 %v5 = extractelement <64 x i8> %v, i8 %index5 789 %v6 = extractelement <64 x i8> %v, i8 %index6 790 %v7 = extractelement <64 x i8> %v, i8 %index7 791 %v8 = extractelement <64 x i8> %v, i8 %index8 792 %v9 = extractelement <64 x i8> %v, i8 %index9 793 %v10 = extractelement <64 x i8> %v, i8 %index10 794 %v11 = extractelement <64 x i8> %v, i8 %index11 795 %v12 = extractelement <64 x i8> %v, i8 %index12 796 %v13 = extractelement <64 x i8> %v, i8 %index13 797 %v14 = extractelement <64 x i8> %v, i8 %index14 798 %v15 = extractelement <64 x i8> %v, i8 %index15 799 %v16 = extractelement <64 x i8> %v, i8 %index16 800 %v17 = extractelement <64 x i8> %v, i8 %index17 801 %v18 = extractelement <64 x i8> %v, i8 %index18 802 %v19 = extractelement <64 x i8> %v, i8 %index19 803 %v20 = extractelement <64 x i8> %v, i8 %index20 804 %v21 = extractelement <64 x i8> %v, i8 %index21 805 %v22 = extractelement <64 x i8> %v, i8 %index22 806 %v23 = extractelement <64 x i8> %v, i8 %index23 807 %v24 = extractelement <64 x i8> %v, i8 %index24 808 %v25 = extractelement <64 x i8> %v, i8 %index25 809 %v26 = extractelement <64 x i8> %v, i8 %index26 810 %v27 = extractelement <64 x i8> %v, i8 %index27 811 %v28 = extractelement <64 x i8> %v, i8 %index28 812 %v29 = extractelement <64 x i8> %v, i8 %index29 813 %v30 = extractelement <64 x i8> %v, i8 %index30 814 %v31 = extractelement <64 x i8> %v, i8 %index31 815 %v32 = extractelement <64 x i8> %v, i8 %index32 816 %v33 = extractelement <64 x i8> %v, i8 %index33 817 %v34 = extractelement <64 x i8> %v, i8 %index34 818 %v35 = extractelement <64 x i8> %v, i8 %index35 819 %v36 = extractelement <64 x i8> %v, i8 %index36 820 %v37 = extractelement <64 x i8> %v, i8 %index37 821 %v38 = extractelement <64 x i8> %v, i8 %index38 822 %v39 = extractelement <64 x i8> %v, i8 %index39 823 %v40 = extractelement <64 x i8> %v, i8 %index40 824 %v41 = extractelement <64 x i8> %v, i8 %index41 825 %v42 = extractelement <64 x i8> %v, i8 %index42 826 %v43 = extractelement <64 x i8> %v, i8 %index43 827 %v44 = extractelement <64 x i8> %v, i8 %index44 828 %v45 = extractelement <64 x i8> %v, i8 %index45 829 %v46 = extractelement <64 x i8> %v, i8 %index46 830 %v47 = extractelement <64 x i8> %v, i8 %index47 831 %v48 = extractelement <64 x i8> %v, i8 %index48 832 %v49 = extractelement <64 x i8> %v, i8 %index49 833 %v50 = extractelement <64 x i8> %v, i8 %index50 834 %v51 = extractelement <64 x i8> %v, i8 %index51 835 %v52 = extractelement <64 x i8> %v, i8 %index52 836 %v53 = extractelement <64 x i8> %v, i8 %index53 837 %v54 = extractelement <64 x i8> %v, i8 %index54 838 %v55 = extractelement <64 x i8> %v, i8 %index55 839 %v56 = extractelement <64 x i8> %v, i8 %index56 840 %v57 = extractelement <64 x i8> %v, i8 %index57 841 %v58 = extractelement <64 x i8> %v, i8 %index58 842 %v59 = extractelement <64 x i8> %v, i8 %index59 843 %v60 = extractelement <64 x i8> %v, i8 %index60 844 %v61 = extractelement <64 x i8> %v, i8 %index61 845 %v62 = extractelement <64 x i8> %v, i8 %index62 846 %v63 = extractelement <64 x i8> %v, i8 %index63 847 %ret0 = insertelement <64 x i8> undef, i8 %v0, i32 0 848 %ret1 = insertelement <64 x i8> %ret0, i8 %v1, i32 1 849 %ret2 = insertelement <64 x i8> %ret1, i8 %v2, i32 2 850 %ret3 = insertelement <64 x i8> %ret2, i8 %v3, i32 3 851 %ret4 = insertelement <64 x i8> %ret3, i8 %v4, i32 4 852 %ret5 = insertelement <64 x i8> %ret4, i8 %v5, i32 5 853 %ret6 = insertelement <64 x i8> %ret5, i8 %v6, i32 6 854 %ret7 = insertelement <64 x i8> %ret6, i8 %v7, i32 7 855 %ret8 = insertelement <64 x i8> %ret7, i8 %v8, i32 8 856 %ret9 = insertelement <64 x i8> %ret8, i8 %v9, i32 9 857 %ret10 = insertelement <64 x i8> %ret9, i8 %v10, i32 10 858 %ret11 = insertelement <64 x i8> %ret10, i8 %v11, i32 11 859 %ret12 = insertelement <64 x i8> %ret11, i8 %v12, i32 12 860 %ret13 = insertelement <64 x i8> %ret12, i8 %v13, i32 13 861 %ret14 = insertelement <64 x i8> %ret13, i8 %v14, i32 14 862 %ret15 = insertelement <64 x i8> %ret14, i8 %v15, i32 15 863 %ret16 = insertelement <64 x i8> %ret15, i8 %v16, i32 16 864 %ret17 = insertelement <64 x i8> %ret16, i8 %v17, i32 17 865 %ret18 = insertelement <64 x i8> %ret17, i8 %v18, i32 18 866 %ret19 = insertelement <64 x i8> %ret18, i8 %v19, i32 19 867 %ret20 = insertelement <64 x i8> %ret19, i8 %v20, i32 20 868 %ret21 = insertelement <64 x i8> %ret20, i8 %v21, i32 21 869 %ret22 = insertelement <64 x i8> %ret21, i8 %v22, i32 22 870 %ret23 = insertelement <64 x i8> %ret22, i8 %v23, i32 23 871 %ret24 = insertelement <64 x i8> %ret23, i8 %v24, i32 24 872 %ret25 = insertelement <64 x i8> %ret24, i8 %v25, i32 25 873 %ret26 = insertelement <64 x i8> %ret25, i8 %v26, i32 26 874 %ret27 = insertelement <64 x i8> %ret26, i8 %v27, i32 27 875 %ret28 = insertelement <64 x i8> %ret27, i8 %v28, i32 28 876 %ret29 = insertelement <64 x i8> %ret28, i8 %v29, i32 29 877 %ret30 = insertelement <64 x i8> %ret29, i8 %v30, i32 30 878 %ret31 = insertelement <64 x i8> %ret30, i8 %v31, i32 31 879 %ret32 = insertelement <64 x i8> %ret31, i8 %v32, i32 32 880 %ret33 = insertelement <64 x i8> %ret32, i8 %v33, i32 33 881 %ret34 = insertelement <64 x i8> %ret33, i8 %v34, i32 34 882 %ret35 = insertelement <64 x i8> %ret34, i8 %v35, i32 35 883 %ret36 = insertelement <64 x i8> %ret35, i8 %v36, i32 36 884 %ret37 = insertelement <64 x i8> %ret36, i8 %v37, i32 37 885 %ret38 = insertelement <64 x i8> %ret37, i8 %v38, i32 38 886 %ret39 = insertelement <64 x i8> %ret38, i8 %v39, i32 39 887 %ret40 = insertelement <64 x i8> %ret39, i8 %v40, i32 40 888 %ret41 = insertelement <64 x i8> %ret40, i8 %v41, i32 41 889 %ret42 = insertelement <64 x i8> %ret41, i8 %v42, i32 42 890 %ret43 = insertelement <64 x i8> %ret42, i8 %v43, i32 43 891 %ret44 = insertelement <64 x i8> %ret43, i8 %v44, i32 44 892 %ret45 = insertelement <64 x i8> %ret44, i8 %v45, i32 45 893 %ret46 = insertelement <64 x i8> %ret45, i8 %v46, i32 46 894 %ret47 = insertelement <64 x i8> %ret46, i8 %v47, i32 47 895 %ret48 = insertelement <64 x i8> %ret47, i8 %v48, i32 48 896 %ret49 = insertelement <64 x i8> %ret48, i8 %v49, i32 49 897 %ret50 = insertelement <64 x i8> %ret49, i8 %v50, i32 50 898 %ret51 = insertelement <64 x i8> %ret50, i8 %v51, i32 51 899 %ret52 = insertelement <64 x i8> %ret51, i8 %v52, i32 52 900 %ret53 = insertelement <64 x i8> %ret52, i8 %v53, i32 53 901 %ret54 = insertelement <64 x i8> %ret53, i8 %v54, i32 54 902 %ret55 = insertelement <64 x i8> %ret54, i8 %v55, i32 55 903 %ret56 = insertelement <64 x i8> %ret55, i8 %v56, i32 56 904 %ret57 = insertelement <64 x i8> %ret56, i8 %v57, i32 57 905 %ret58 = insertelement <64 x i8> %ret57, i8 %v58, i32 58 906 %ret59 = insertelement <64 x i8> %ret58, i8 %v59, i32 59 907 %ret60 = insertelement <64 x i8> %ret59, i8 %v60, i32 60 908 %ret61 = insertelement <64 x i8> %ret60, i8 %v61, i32 61 909 %ret62 = insertelement <64 x i8> %ret61, i8 %v62, i32 62 910 %ret63 = insertelement <64 x i8> %ret62, i8 %v63, i32 63 911 ret <64 x i8> %ret63 912 } 913 914 define <8 x double> @var_shuffle_v8f64(<8 x double> %v, <8 x i64> %indices) nounwind { 915 ; AVX512-LABEL: var_shuffle_v8f64: 916 ; AVX512: # %bb.0: 917 ; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 918 ; AVX512-NEXT: retq 919 %index0 = extractelement <8 x i64> %indices, i32 0 920 %index1 = extractelement <8 x i64> %indices, i32 1 921 %index2 = extractelement <8 x i64> %indices, i32 2 922 %index3 = extractelement <8 x i64> %indices, i32 3 923 %index4 = extractelement <8 x i64> %indices, i32 4 924 %index5 = extractelement <8 x i64> %indices, i32 5 925 %index6 = extractelement <8 x i64> %indices, i32 6 926 %index7 = extractelement <8 x i64> %indices, i32 7 927 %v0 = extractelement <8 x double> %v, i64 %index0 928 %v1 = extractelement <8 x double> %v, i64 %index1 929 %v2 = extractelement <8 x double> %v, i64 %index2 930 %v3 = extractelement <8 x double> %v, i64 %index3 931 %v4 = extractelement <8 x double> %v, i64 %index4 932 %v5 = extractelement <8 x double> %v, i64 %index5 933 %v6 = extractelement <8 x double> %v, i64 %index6 934 %v7 = extractelement <8 x double> %v, i64 %index7 935 %ret0 = insertelement <8 x double> undef, double %v0, i32 0 936 %ret1 = insertelement <8 x double> %ret0, double %v1, i32 1 937 %ret2 = insertelement <8 x double> %ret1, double %v2, i32 2 938 %ret3 = insertelement <8 x double> %ret2, double %v3, i32 3 939 %ret4 = insertelement <8 x double> %ret3, double %v4, i32 4 940 %ret5 = insertelement <8 x double> %ret4, double %v5, i32 5 941 %ret6 = insertelement <8 x double> %ret5, double %v6, i32 6 942 %ret7 = insertelement <8 x double> %ret6, double %v7, i32 7 943 ret <8 x double> %ret7 944 } 945 946 define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) nounwind { 947 ; AVX512-LABEL: var_shuffle_v16f32: 948 ; AVX512: # %bb.0: 949 ; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 950 ; AVX512-NEXT: retq 951 %index0 = extractelement <16 x i32> %indices, i32 0 952 %index1 = extractelement <16 x i32> %indices, i32 1 953 %index2 = extractelement <16 x i32> %indices, i32 2 954 %index3 = extractelement <16 x i32> %indices, i32 3 955 %index4 = extractelement <16 x i32> %indices, i32 4 956 %index5 = extractelement <16 x i32> %indices, i32 5 957 %index6 = extractelement <16 x i32> %indices, i32 6 958 %index7 = extractelement <16 x i32> %indices, i32 7 959 %index8 = extractelement <16 x i32> %indices, i32 8 960 %index9 = extractelement <16 x i32> %indices, i32 9 961 %index10 = extractelement <16 x i32> %indices, i32 10 962 %index11 = extractelement <16 x i32> %indices, i32 11 963 %index12 = extractelement <16 x i32> %indices, i32 12 964 %index13 = extractelement <16 x i32> %indices, i32 13 965 %index14 = extractelement <16 x i32> %indices, i32 14 966 %index15 = extractelement <16 x i32> %indices, i32 15 967 %v0 = extractelement <16 x float> %v, i32 %index0 968 %v1 = extractelement <16 x float> %v, i32 %index1 969 %v2 = extractelement <16 x float> %v, i32 %index2 970 %v3 = extractelement <16 x float> %v, i32 %index3 971 %v4 = extractelement <16 x float> %v, i32 %index4 972 %v5 = extractelement <16 x float> %v, i32 %index5 973 %v6 = extractelement <16 x float> %v, i32 %index6 974 %v7 = extractelement <16 x float> %v, i32 %index7 975 %v8 = extractelement <16 x float> %v, i32 %index8 976 %v9 = extractelement <16 x float> %v, i32 %index9 977 %v10 = extractelement <16 x float> %v, i32 %index10 978 %v11 = extractelement <16 x float> %v, i32 %index11 979 %v12 = extractelement <16 x float> %v, i32 %index12 980 %v13 = extractelement <16 x float> %v, i32 %index13 981 %v14 = extractelement <16 x float> %v, i32 %index14 982 %v15 = extractelement <16 x float> %v, i32 %index15 983 %ret0 = insertelement <16 x float> undef, float %v0, i32 0 984 %ret1 = insertelement <16 x float> %ret0, float %v1, i32 1 985 %ret2 = insertelement <16 x float> %ret1, float %v2, i32 2 986 %ret3 = insertelement <16 x float> %ret2, float %v3, i32 3 987 %ret4 = insertelement <16 x float> %ret3, float %v4, i32 4 988 %ret5 = insertelement <16 x float> %ret4, float %v5, i32 5 989 %ret6 = insertelement <16 x float> %ret5, float %v6, i32 6 990 %ret7 = insertelement <16 x float> %ret6, float %v7, i32 7 991 %ret8 = insertelement <16 x float> %ret7, float %v8, i32 8 992 %ret9 = insertelement <16 x float> %ret8, float %v9, i32 9 993 %ret10 = insertelement <16 x float> %ret9, float %v10, i32 10 994 %ret11 = insertelement <16 x float> %ret10, float %v11, i32 11 995 %ret12 = insertelement <16 x float> %ret11, float %v12, i32 12 996 %ret13 = insertelement <16 x float> %ret12, float %v13, i32 13 997 %ret14 = insertelement <16 x float> %ret13, float %v14, i32 14 998 %ret15 = insertelement <16 x float> %ret14, float %v15, i32 15 999 ret <16 x float> %ret15 1000 } 1001