1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s 3 4 5 define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { 6 ; SKX-LABEL: extract_subvector128_v32i16: 7 ; SKX: ## %bb.0: 8 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 9 ; SKX-NEXT: vzeroupper 10 ; SKX-NEXT: retq 11 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 12 ret <8 x i16> %r1 13 } 14 15 define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind { 16 ; SKX-LABEL: extract_subvector128_v32i16_first_element: 17 ; SKX: ## %bb.0: 18 ; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 19 ; SKX-NEXT: vzeroupper 20 ; SKX-NEXT: retq 21 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22 ret <8 x i16> %r1 23 } 24 25 define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { 26 ; SKX-LABEL: extract_subvector128_v64i8: 27 ; SKX: ## %bb.0: 28 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 29 ; SKX-NEXT: vzeroupper 30 ; SKX-NEXT: retq 31 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> 32 ret <16 x i8> %r1 33 } 34 35 define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind { 36 ; SKX-LABEL: extract_subvector128_v64i8_first_element: 37 ; SKX: ## %bb.0: 38 ; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 39 ; SKX-NEXT: vzeroupper 40 ; SKX-NEXT: retq 41 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 42 ret <16 x i8> %r1 43 } 44 45 46 define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { 47 ; SKX-LABEL: extract_subvector256_v32i16: 48 ; SKX: ## %bb.0: 49 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 50 ; SKX-NEXT: retq 51 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 52 ret <16 x i16> %r1 53 } 54 55 define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { 56 ; SKX-LABEL: extract_subvector256_v64i8: 57 ; SKX: ## %bb.0: 58 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 59 ; SKX-NEXT: retq 60 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 61 ret <32 x i8> %r1 62 } 63 64 define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 65 ; SKX-LABEL: extract_subvector256_v8f64_store: 66 ; SKX: ## %bb.0: ## %entry 67 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 68 ; SKX-NEXT: vzeroupper 69 ; SKX-NEXT: retq 70 entry: 71 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> 72 %1 = bitcast double* %addr to <2 x double>* 73 store <2 x double> %0, <2 x double>* %1, align 1 74 ret void 75 } 76 77 define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 78 ; SKX-LABEL: extract_subvector256_v8f32_store: 79 ; SKX: ## %bb.0: ## %entry 80 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 81 ; SKX-NEXT: vzeroupper 82 ; SKX-NEXT: retq 83 entry: 84 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 85 %1 = bitcast float* %addr to <4 x float>* 86 store <4 x float> %0, <4 x float>* %1, align 1 87 ret void 88 } 89 90 define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 91 ; SKX-LABEL: extract_subvector256_v4i64_store: 92 ; SKX: ## %bb.0: ## %entry 93 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 94 ; SKX-NEXT: vzeroupper 95 ; SKX-NEXT: retq 96 entry: 97 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 98 %1 = bitcast i64* %addr to <2 x i64>* 99 store <2 x i64> %0, <2 x i64>* %1, align 1 100 ret void 101 } 102 103 define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 104 ; SKX-LABEL: extract_subvector256_v8i32_store: 105 ; SKX: ## %bb.0: ## %entry 106 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 107 ; SKX-NEXT: vzeroupper 108 ; SKX-NEXT: retq 109 entry: 110 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 111 %1 = bitcast i32* %addr to <4 x i32>* 112 store <4 x i32> %0, <4 x i32>* %1, align 1 113 ret void 114 } 115 116 define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 117 ; SKX-LABEL: extract_subvector256_v16i16_store: 118 ; SKX: ## %bb.0: ## %entry 119 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 120 ; SKX-NEXT: vzeroupper 121 ; SKX-NEXT: retq 122 entry: 123 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 124 %1 = bitcast i16* %addr to <8 x i16>* 125 store <8 x i16> %0, <8 x i16>* %1, align 1 126 ret void 127 } 128 129 define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 130 ; SKX-LABEL: extract_subvector256_v32i8_store: 131 ; SKX: ## %bb.0: ## %entry 132 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 133 ; SKX-NEXT: vzeroupper 134 ; SKX-NEXT: retq 135 entry: 136 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 137 %1 = bitcast i8* %addr to <16 x i8>* 138 store <16 x i8> %0, <16 x i8>* %1, align 1 139 ret void 140 } 141 142 define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 143 ; SKX-LABEL: extract_subvector256_v4f64_store_lo: 144 ; SKX: ## %bb.0: ## %entry 145 ; SKX-NEXT: vmovups %xmm0, (%rdi) 146 ; SKX-NEXT: vzeroupper 147 ; SKX-NEXT: retq 148 entry: 149 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 150 %1 = bitcast double* %addr to <2 x double>* 151 store <2 x double> %0, <2 x double>* %1, align 1 152 ret void 153 } 154 155 define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 156 ; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16: 157 ; SKX: ## %bb.0: ## %entry 158 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 159 ; SKX-NEXT: vzeroupper 160 ; SKX-NEXT: retq 161 entry: 162 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 163 %1 = bitcast double* %addr to <2 x double>* 164 store <2 x double> %0, <2 x double>* %1, align 16 165 ret void 166 } 167 168 define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 169 ; SKX-LABEL: extract_subvector256_v4f32_store_lo: 170 ; SKX: ## %bb.0: ## %entry 171 ; SKX-NEXT: vmovups %xmm0, (%rdi) 172 ; SKX-NEXT: vzeroupper 173 ; SKX-NEXT: retq 174 entry: 175 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 176 %1 = bitcast float* %addr to <4 x float>* 177 store <4 x float> %0, <4 x float>* %1, align 1 178 ret void 179 } 180 181 define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 182 ; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16: 183 ; SKX: ## %bb.0: ## %entry 184 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 185 ; SKX-NEXT: vzeroupper 186 ; SKX-NEXT: retq 187 entry: 188 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 189 %1 = bitcast float* %addr to <4 x float>* 190 store <4 x float> %0, <4 x float>* %1, align 16 191 ret void 192 } 193 194 define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 195 ; SKX-LABEL: extract_subvector256_v2i64_store_lo: 196 ; SKX: ## %bb.0: ## %entry 197 ; SKX-NEXT: vmovups %xmm0, (%rdi) 198 ; SKX-NEXT: vzeroupper 199 ; SKX-NEXT: retq 200 entry: 201 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 202 %1 = bitcast i64* %addr to <2 x i64>* 203 store <2 x i64> %0, <2 x i64>* %1, align 1 204 ret void 205 } 206 207 define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 208 ; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16: 209 ; SKX: ## %bb.0: ## %entry 210 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 211 ; SKX-NEXT: vzeroupper 212 ; SKX-NEXT: retq 213 entry: 214 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 215 %1 = bitcast i64* %addr to <2 x i64>* 216 store <2 x i64> %0, <2 x i64>* %1, align 16 217 ret void 218 } 219 220 define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 221 ; SKX-LABEL: extract_subvector256_v4i32_store_lo: 222 ; SKX: ## %bb.0: ## %entry 223 ; SKX-NEXT: vmovups %xmm0, (%rdi) 224 ; SKX-NEXT: vzeroupper 225 ; SKX-NEXT: retq 226 entry: 227 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 228 %1 = bitcast i32* %addr to <4 x i32>* 229 store <4 x i32> %0, <4 x i32>* %1, align 1 230 ret void 231 } 232 233 define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 234 ; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16: 235 ; SKX: ## %bb.0: ## %entry 236 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 237 ; SKX-NEXT: vzeroupper 238 ; SKX-NEXT: retq 239 entry: 240 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 241 %1 = bitcast i32* %addr to <4 x i32>* 242 store <4 x i32> %0, <4 x i32>* %1, align 16 243 ret void 244 } 245 246 define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 247 ; SKX-LABEL: extract_subvector256_v8i16_store_lo: 248 ; SKX: ## %bb.0: ## %entry 249 ; SKX-NEXT: vmovups %xmm0, (%rdi) 250 ; SKX-NEXT: vzeroupper 251 ; SKX-NEXT: retq 252 entry: 253 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 254 %1 = bitcast i16* %addr to <8 x i16>* 255 store <8 x i16> %0, <8 x i16>* %1, align 1 256 ret void 257 } 258 259 define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 260 ; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16: 261 ; SKX: ## %bb.0: ## %entry 262 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 263 ; SKX-NEXT: vzeroupper 264 ; SKX-NEXT: retq 265 entry: 266 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 267 %1 = bitcast i16* %addr to <8 x i16>* 268 store <8 x i16> %0, <8 x i16>* %1, align 16 269 ret void 270 } 271 272 define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 273 ; SKX-LABEL: extract_subvector256_v16i8_store_lo: 274 ; SKX: ## %bb.0: ## %entry 275 ; SKX-NEXT: vmovups %xmm0, (%rdi) 276 ; SKX-NEXT: vzeroupper 277 ; SKX-NEXT: retq 278 entry: 279 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 280 %1 = bitcast i8* %addr to <16 x i8>* 281 store <16 x i8> %0, <16 x i8>* %1, align 1 282 ret void 283 } 284 285 define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 286 ; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16: 287 ; SKX: ## %bb.0: ## %entry 288 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 289 ; SKX-NEXT: vzeroupper 290 ; SKX-NEXT: retq 291 entry: 292 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 293 %1 = bitcast i8* %addr to <16 x i8>* 294 store <16 x i8> %0, <16 x i8>* %1, align 16 295 ret void 296 } 297 298 define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 299 ; SKX-LABEL: extract_subvector512_v2f64_store_lo: 300 ; SKX: ## %bb.0: ## %entry 301 ; SKX-NEXT: vmovups %xmm0, (%rdi) 302 ; SKX-NEXT: vzeroupper 303 ; SKX-NEXT: retq 304 entry: 305 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 306 %1 = bitcast double* %addr to <2 x double>* 307 store <2 x double> %0, <2 x double>* %1, align 1 308 ret void 309 } 310 311 define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 312 ; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16: 313 ; SKX: ## %bb.0: ## %entry 314 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 315 ; SKX-NEXT: vzeroupper 316 ; SKX-NEXT: retq 317 entry: 318 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 319 %1 = bitcast double* %addr to <2 x double>* 320 store <2 x double> %0, <2 x double>* %1, align 16 321 ret void 322 } 323 324 define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 325 ; SKX-LABEL: extract_subvector512_v4f32_store_lo: 326 ; SKX: ## %bb.0: ## %entry 327 ; SKX-NEXT: vmovups %xmm0, (%rdi) 328 ; SKX-NEXT: vzeroupper 329 ; SKX-NEXT: retq 330 entry: 331 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 332 %1 = bitcast float* %addr to <4 x float>* 333 store <4 x float> %0, <4 x float>* %1, align 1 334 ret void 335 } 336 337 define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 338 ; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16: 339 ; SKX: ## %bb.0: ## %entry 340 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 341 ; SKX-NEXT: vzeroupper 342 ; SKX-NEXT: retq 343 entry: 344 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 345 %1 = bitcast float* %addr to <4 x float>* 346 store <4 x float> %0, <4 x float>* %1, align 16 347 ret void 348 } 349 350 define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 351 ; SKX-LABEL: extract_subvector512_v2i64_store_lo: 352 ; SKX: ## %bb.0: ## %entry 353 ; SKX-NEXT: vmovups %xmm0, (%rdi) 354 ; SKX-NEXT: vzeroupper 355 ; SKX-NEXT: retq 356 entry: 357 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 358 %1 = bitcast i64* %addr to <2 x i64>* 359 store <2 x i64> %0, <2 x i64>* %1, align 1 360 ret void 361 } 362 363 define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 364 ; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16: 365 ; SKX: ## %bb.0: ## %entry 366 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 367 ; SKX-NEXT: vzeroupper 368 ; SKX-NEXT: retq 369 entry: 370 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 371 %1 = bitcast i64* %addr to <2 x i64>* 372 store <2 x i64> %0, <2 x i64>* %1, align 16 373 ret void 374 } 375 376 define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 377 ; SKX-LABEL: extract_subvector512_v4i32_store_lo: 378 ; SKX: ## %bb.0: ## %entry 379 ; SKX-NEXT: vmovups %xmm0, (%rdi) 380 ; SKX-NEXT: vzeroupper 381 ; SKX-NEXT: retq 382 entry: 383 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 384 %1 = bitcast i32* %addr to <4 x i32>* 385 store <4 x i32> %0, <4 x i32>* %1, align 1 386 ret void 387 } 388 389 define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 390 ; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16: 391 ; SKX: ## %bb.0: ## %entry 392 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 393 ; SKX-NEXT: vzeroupper 394 ; SKX-NEXT: retq 395 entry: 396 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 397 %1 = bitcast i32* %addr to <4 x i32>* 398 store <4 x i32> %0, <4 x i32>* %1, align 16 399 ret void 400 } 401 402 define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 403 ; SKX-LABEL: extract_subvector512_v8i16_store_lo: 404 ; SKX: ## %bb.0: ## %entry 405 ; SKX-NEXT: vmovups %xmm0, (%rdi) 406 ; SKX-NEXT: vzeroupper 407 ; SKX-NEXT: retq 408 entry: 409 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 410 %1 = bitcast i16* %addr to <8 x i16>* 411 store <8 x i16> %0, <8 x i16>* %1, align 1 412 ret void 413 } 414 415 define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 416 ; SKX-LABEL: extract_subvector512_v16i8_store_lo: 417 ; SKX: ## %bb.0: ## %entry 418 ; SKX-NEXT: vmovups %xmm0, (%rdi) 419 ; SKX-NEXT: vzeroupper 420 ; SKX-NEXT: retq 421 entry: 422 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 423 %1 = bitcast i8* %addr to <16 x i8>* 424 store <16 x i8> %0, <16 x i8>* %1, align 1 425 ret void 426 } 427 428 define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 429 ; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16: 430 ; SKX: ## %bb.0: ## %entry 431 ; SKX-NEXT: vmovaps %xmm0, (%rdi) 432 ; SKX-NEXT: vzeroupper 433 ; SKX-NEXT: retq 434 entry: 435 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 436 %1 = bitcast i8* %addr to <16 x i8>* 437 store <16 x i8> %0, <16 x i8>* %1, align 16 438 ret void 439 } 440 441 define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 442 ; SKX-LABEL: extract_subvector512_v4f64_store_lo: 443 ; SKX: ## %bb.0: ## %entry 444 ; SKX-NEXT: vmovups %ymm0, (%rdi) 445 ; SKX-NEXT: vzeroupper 446 ; SKX-NEXT: retq 447 entry: 448 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 449 %1 = bitcast double* %addr to <4 x double>* 450 store <4 x double> %0, <4 x double>* %1, align 1 451 ret void 452 } 453 454 define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 455 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16: 456 ; SKX: ## %bb.0: ## %entry 457 ; SKX-NEXT: vmovups %ymm0, (%rdi) 458 ; SKX-NEXT: vzeroupper 459 ; SKX-NEXT: retq 460 entry: 461 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 462 %1 = bitcast double* %addr to <4 x double>* 463 store <4 x double> %0, <4 x double>* %1, align 16 464 ret void 465 } 466 467 define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 468 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32: 469 ; SKX: ## %bb.0: ## %entry 470 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 471 ; SKX-NEXT: vzeroupper 472 ; SKX-NEXT: retq 473 entry: 474 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 475 %1 = bitcast double* %addr to <4 x double>* 476 store <4 x double> %0, <4 x double>* %1, align 32 477 ret void 478 } 479 480 define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 481 ; SKX-LABEL: extract_subvector512_v8f32_store_lo: 482 ; SKX: ## %bb.0: ## %entry 483 ; SKX-NEXT: vmovups %ymm0, (%rdi) 484 ; SKX-NEXT: vzeroupper 485 ; SKX-NEXT: retq 486 entry: 487 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 488 %1 = bitcast float* %addr to <8 x float>* 489 store <8 x float> %0, <8 x float>* %1, align 1 490 ret void 491 } 492 493 define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 494 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16: 495 ; SKX: ## %bb.0: ## %entry 496 ; SKX-NEXT: vmovups %ymm0, (%rdi) 497 ; SKX-NEXT: vzeroupper 498 ; SKX-NEXT: retq 499 entry: 500 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 501 %1 = bitcast float* %addr to <8 x float>* 502 store <8 x float> %0, <8 x float>* %1, align 16 503 ret void 504 } 505 506 define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 507 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32: 508 ; SKX: ## %bb.0: ## %entry 509 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 510 ; SKX-NEXT: vzeroupper 511 ; SKX-NEXT: retq 512 entry: 513 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 514 %1 = bitcast float* %addr to <8 x float>* 515 store <8 x float> %0, <8 x float>* %1, align 32 516 ret void 517 } 518 519 define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 520 ; SKX-LABEL: extract_subvector512_v4i64_store_lo: 521 ; SKX: ## %bb.0: ## %entry 522 ; SKX-NEXT: vmovups %ymm0, (%rdi) 523 ; SKX-NEXT: vzeroupper 524 ; SKX-NEXT: retq 525 entry: 526 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 527 %1 = bitcast i64* %addr to <4 x i64>* 528 store <4 x i64> %0, <4 x i64>* %1, align 1 529 ret void 530 } 531 532 define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 533 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16: 534 ; SKX: ## %bb.0: ## %entry 535 ; SKX-NEXT: vmovups %ymm0, (%rdi) 536 ; SKX-NEXT: vzeroupper 537 ; SKX-NEXT: retq 538 entry: 539 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 540 %1 = bitcast i64* %addr to <4 x i64>* 541 store <4 x i64> %0, <4 x i64>* %1, align 16 542 ret void 543 } 544 545 define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 546 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32: 547 ; SKX: ## %bb.0: ## %entry 548 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 549 ; SKX-NEXT: vzeroupper 550 ; SKX-NEXT: retq 551 entry: 552 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 553 %1 = bitcast i64* %addr to <4 x i64>* 554 store <4 x i64> %0, <4 x i64>* %1, align 32 555 ret void 556 } 557 558 define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 559 ; SKX-LABEL: extract_subvector512_v8i32_store_lo: 560 ; SKX: ## %bb.0: ## %entry 561 ; SKX-NEXT: vmovups %ymm0, (%rdi) 562 ; SKX-NEXT: vzeroupper 563 ; SKX-NEXT: retq 564 entry: 565 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 566 %1 = bitcast i32* %addr to <8 x i32>* 567 store <8 x i32> %0, <8 x i32>* %1, align 1 568 ret void 569 } 570 571 define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 572 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16: 573 ; SKX: ## %bb.0: ## %entry 574 ; SKX-NEXT: vmovups %ymm0, (%rdi) 575 ; SKX-NEXT: vzeroupper 576 ; SKX-NEXT: retq 577 entry: 578 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 579 %1 = bitcast i32* %addr to <8 x i32>* 580 store <8 x i32> %0, <8 x i32>* %1, align 16 581 ret void 582 } 583 584 define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 585 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32: 586 ; SKX: ## %bb.0: ## %entry 587 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 588 ; SKX-NEXT: vzeroupper 589 ; SKX-NEXT: retq 590 entry: 591 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 592 %1 = bitcast i32* %addr to <8 x i32>* 593 store <8 x i32> %0, <8 x i32>* %1, align 32 594 ret void 595 } 596 597 define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 598 ; SKX-LABEL: extract_subvector512_v16i16_store_lo: 599 ; SKX: ## %bb.0: ## %entry 600 ; SKX-NEXT: vmovups %ymm0, (%rdi) 601 ; SKX-NEXT: vzeroupper 602 ; SKX-NEXT: retq 603 entry: 604 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 605 %1 = bitcast i16* %addr to <16 x i16>* 606 store <16 x i16> %0, <16 x i16>* %1, align 1 607 ret void 608 } 609 610 define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 611 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16: 612 ; SKX: ## %bb.0: ## %entry 613 ; SKX-NEXT: vmovups %ymm0, (%rdi) 614 ; SKX-NEXT: vzeroupper 615 ; SKX-NEXT: retq 616 entry: 617 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 618 %1 = bitcast i16* %addr to <16 x i16>* 619 store <16 x i16> %0, <16 x i16>* %1, align 16 620 ret void 621 } 622 623 define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 624 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32: 625 ; SKX: ## %bb.0: ## %entry 626 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 627 ; SKX-NEXT: vzeroupper 628 ; SKX-NEXT: retq 629 entry: 630 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 631 %1 = bitcast i16* %addr to <16 x i16>* 632 store <16 x i16> %0, <16 x i16>* %1, align 32 633 ret void 634 } 635 636 define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 637 ; SKX-LABEL: extract_subvector512_v32i8_store_lo: 638 ; SKX: ## %bb.0: ## %entry 639 ; SKX-NEXT: vmovups %ymm0, (%rdi) 640 ; SKX-NEXT: vzeroupper 641 ; SKX-NEXT: retq 642 entry: 643 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 644 %1 = bitcast i8* %addr to <32 x i8>* 645 store <32 x i8> %0, <32 x i8>* %1, align 1 646 ret void 647 } 648 649 define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 650 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16: 651 ; SKX: ## %bb.0: ## %entry 652 ; SKX-NEXT: vmovups %ymm0, (%rdi) 653 ; SKX-NEXT: vzeroupper 654 ; SKX-NEXT: retq 655 entry: 656 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 657 %1 = bitcast i8* %addr to <32 x i8>* 658 store <32 x i8> %0, <32 x i8>* %1, align 16 659 ret void 660 } 661 662 define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 663 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32: 664 ; SKX: ## %bb.0: ## %entry 665 ; SKX-NEXT: vmovaps %ymm0, (%rdi) 666 ; SKX-NEXT: vzeroupper 667 ; SKX-NEXT: retq 668 entry: 669 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 670 %1 = bitcast i8* %addr to <32 x i8>* 671 store <32 x i8> %0, <32 x i8>* %1, align 32 672 ret void 673 } 674 675 define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) { 676 ; SKX-LABEL: test_mm512_mask_extractf64x4_pd: 677 ; SKX: ## %bb.0: ## %entry 678 ; SKX-NEXT: kmovd %edi, %k1 679 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1} 680 ; SKX-NEXT: retq 681 entry: 682 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 683 %0 = bitcast i8 %__U to <8 x i1> 684 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 685 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W 686 ret <4 x double> %1 687 } 688 689 define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) { 690 ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd: 691 ; SKX: ## %bb.0: ## %entry 692 ; SKX-NEXT: kmovd %edi, %k1 693 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} 694 ; SKX-NEXT: retq 695 entry: 696 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 697 %0 = bitcast i8 %__U to <8 x i1> 698 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 699 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer 700 ret <4 x double> %1 701 } 702 703 define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) { 704 ; SKX-LABEL: test_mm512_mask_extractf32x4_ps: 705 ; SKX: ## %bb.0: ## %entry 706 ; SKX-NEXT: kmovd %edi, %k1 707 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1} 708 ; SKX-NEXT: vzeroupper 709 ; SKX-NEXT: retq 710 entry: 711 %0 = bitcast <8 x double> %__A to <16 x float> 712 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 713 %1 = bitcast i8 %__U to <8 x i1> 714 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 715 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W 716 ret <4 x float> %2 717 } 718 719 define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) { 720 ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps: 721 ; SKX: ## %bb.0: ## %entry 722 ; SKX-NEXT: kmovd %edi, %k1 723 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} 724 ; SKX-NEXT: vzeroupper 725 ; SKX-NEXT: retq 726 entry: 727 %0 = bitcast <8 x double> %__A to <16 x float> 728 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 729 %1 = bitcast i8 %__U to <8 x i1> 730 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 731 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer 732 ret <4 x float> %2 733 } 734 735 define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) { 736 ; SKX-LABEL: test_mm256_mask_extractf64x2_pd: 737 ; SKX: ## %bb.0: ## %entry 738 ; SKX-NEXT: kmovd %edi, %k1 739 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1} 740 ; SKX-NEXT: vzeroupper 741 ; SKX-NEXT: retq 742 entry: 743 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> 744 %0 = bitcast i8 %__U to <8 x i1> 745 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 746 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W 747 ret <2 x double> %1 748 } 749 750 define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) { 751 ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd: 752 ; SKX: ## %bb.0: ## %entry 753 ; SKX-NEXT: kmovd %edi, %k1 754 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} 755 ; SKX-NEXT: vzeroupper 756 ; SKX-NEXT: retq 757 entry: 758 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> 759 %0 = bitcast i8 %__U to <8 x i1> 760 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 761 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer 762 ret <2 x double> %1 763 } 764 765 define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { 766 ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64: 767 ; SKX: ## %bb.0: ## %entry 768 ; SKX-NEXT: kmovd %edi, %k1 769 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1} 770 ; SKX-NEXT: vzeroupper 771 ; SKX-NEXT: retq 772 entry: 773 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 774 %0 = bitcast i8 %__U to <8 x i1> 775 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 776 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W 777 ret <2 x i64> %1 778 } 779 780 define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) { 781 ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64: 782 ; SKX: ## %bb.0: ## %entry 783 ; SKX-NEXT: kmovd %edi, %k1 784 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} 785 ; SKX-NEXT: vzeroupper 786 ; SKX-NEXT: retq 787 entry: 788 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 789 %0 = bitcast i8 %__U to <8 x i1> 790 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 791 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 792 ret <2 x i64> %1 793 } 794 795 define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) { 796 ; SKX-LABEL: test_mm256_mask_extractf32x4_ps: 797 ; SKX: ## %bb.0: ## %entry 798 ; SKX-NEXT: kmovd %edi, %k1 799 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1} 800 ; SKX-NEXT: vzeroupper 801 ; SKX-NEXT: retq 802 entry: 803 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 804 %0 = bitcast i8 %__U to <8 x i1> 805 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 806 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W 807 ret <4 x float> %1 808 } 809 810 define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) { 811 ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps: 812 ; SKX: ## %bb.0: ## %entry 813 ; SKX-NEXT: kmovd %edi, %k1 814 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} 815 ; SKX-NEXT: vzeroupper 816 ; SKX-NEXT: retq 817 entry: 818 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 819 %0 = bitcast i8 %__U to <8 x i1> 820 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 821 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer 822 ret <4 x float> %1 823 } 824 825 define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { 826 ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32: 827 ; SKX: ## %bb.0: ## %entry 828 ; SKX-NEXT: kmovd %edi, %k1 829 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1} 830 ; SKX-NEXT: vzeroupper 831 ; SKX-NEXT: retq 832 entry: 833 %0 = bitcast <4 x i64> %__A to <8 x i32> 834 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 835 %1 = bitcast <2 x i64> %__W to <4 x i32> 836 %2 = bitcast i8 %__U to <8 x i1> 837 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 838 %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1 839 %4 = bitcast <4 x i32> %3 to <2 x i64> 840 ret <2 x i64> %4 841 } 842 843 define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) { 844 ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32: 845 ; SKX: ## %bb.0: ## %entry 846 ; SKX-NEXT: kmovd %edi, %k1 847 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} 848 ; SKX-NEXT: vzeroupper 849 ; SKX-NEXT: retq 850 entry: 851 %0 = bitcast <4 x i64> %__A to <8 x i32> 852 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 853 %1 = bitcast i8 %__U to <8 x i1> 854 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 855 %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 856 %3 = bitcast <4 x i32> %2 to <2 x i64> 857 ret <2 x i64> %3 858 } 859 860 define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) { 861 ; SKX-LABEL: test_mm512_mask_extractf32x8_ps: 862 ; SKX: ## %bb.0: ## %entry 863 ; SKX-NEXT: kmovd %edi, %k1 864 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1} 865 ; SKX-NEXT: retq 866 entry: 867 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 868 %0 = bitcast i8 %__U to <8 x i1> 869 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W 870 ret <8 x float> %1 871 } 872 873 define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) { 874 ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps: 875 ; SKX: ## %bb.0: ## %entry 876 ; SKX-NEXT: kmovd %edi, %k1 877 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} 878 ; SKX-NEXT: retq 879 entry: 880 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 881 %0 = bitcast i8 %__U to <8 x i1> 882 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer 883 ret <8 x float> %1 884 } 885 886 define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) { 887 ; SKX-LABEL: test_mm512_mask_extractf64x2_pd: 888 ; SKX: ## %bb.0: ## %entry 889 ; SKX-NEXT: kmovd %edi, %k1 890 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1} 891 ; SKX-NEXT: vzeroupper 892 ; SKX-NEXT: retq 893 entry: 894 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> 895 %0 = bitcast i8 %__U to <8 x i1> 896 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 897 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W 898 ret <2 x double> %1 899 } 900 901 define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) { 902 ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd: 903 ; SKX: ## %bb.0: ## %entry 904 ; SKX-NEXT: kmovd %edi, %k1 905 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z} 906 ; SKX-NEXT: vzeroupper 907 ; SKX-NEXT: retq 908 entry: 909 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> 910 %0 = bitcast i8 %__U to <8 x i1> 911 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 912 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer 913 ret <2 x double> %1 914 } 915