1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 ; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL 5 6 target triple = "x86_64-unknown-unknown" 7 8 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 9 ; AVX1-LABEL: shuffle_v4f64_0000: 10 ; AVX1: # BB#0: 11 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 12 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13 ; AVX1-NEXT: retq 14 ; 15 ; AVX2-LABEL: shuffle_v4f64_0000: 16 ; AVX2: # BB#0: 17 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 18 ; AVX2-NEXT: retq 19 ; 20 ; AVX512VL-LABEL: shuffle_v4f64_0000: 21 ; AVX512VL: # BB#0: 22 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 23 ; AVX512VL-NEXT: retq 24 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 25 ret <4 x double> %shuffle 26 } 27 28 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 29 ; AVX1-LABEL: shuffle_v4f64_0001: 30 ; AVX1: # BB#0: 31 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 32 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 33 ; AVX1-NEXT: retq 34 ; 35 ; AVX2-LABEL: shuffle_v4f64_0001: 36 ; AVX2: # BB#0: 37 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 38 ; AVX2-NEXT: retq 39 ; 40 ; AVX512VL-LABEL: shuffle_v4f64_0001: 41 ; AVX512VL: # BB#0: 42 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 43 ; AVX512VL-NEXT: retq 44 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 45 ret <4 x double> %shuffle 46 } 47 48 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 49 ; AVX1-LABEL: shuffle_v4f64_0020: 50 ; AVX1: # BB#0: 51 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 52 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 53 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 54 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 55 ; AVX1-NEXT: retq 56 ; 57 ; AVX2-LABEL: shuffle_v4f64_0020: 58 ; AVX2: # BB#0: 59 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 60 ; AVX2-NEXT: retq 61 ; 62 ; AVX512VL-LABEL: shuffle_v4f64_0020: 63 ; AVX512VL: # BB#0: 64 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 65 ; AVX512VL-NEXT: retq 66 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 67 ret <4 x double> %shuffle 68 } 69 70 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 71 ; AVX1-LABEL: shuffle_v4f64_0300: 72 ; AVX1: # BB#0: 73 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 74 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 75 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 76 ; AVX1-NEXT: retq 77 ; 78 ; AVX2-LABEL: shuffle_v4f64_0300: 79 ; AVX2: # BB#0: 80 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 81 ; AVX2-NEXT: retq 82 ; 83 ; AVX512VL-LABEL: shuffle_v4f64_0300: 84 ; AVX512VL: # BB#0: 85 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 86 ; AVX512VL-NEXT: retq 87 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 88 ret <4 x double> %shuffle 89 } 90 91 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 92 ; AVX1-LABEL: shuffle_v4f64_1000: 93 ; AVX1: # BB#0: 94 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 95 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 96 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 97 ; AVX1-NEXT: retq 98 ; 99 ; AVX2-LABEL: shuffle_v4f64_1000: 100 ; AVX2: # BB#0: 101 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 102 ; AVX2-NEXT: retq 103 ; 104 ; AVX512VL-LABEL: shuffle_v4f64_1000: 105 ; AVX512VL: # BB#0: 106 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 107 ; AVX512VL-NEXT: retq 108 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 109 ret <4 x double> %shuffle 110 } 111 112 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 113 ; AVX1-LABEL: shuffle_v4f64_2200: 114 ; AVX1: # BB#0: 115 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 116 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 117 ; AVX1-NEXT: retq 118 ; 119 ; AVX2-LABEL: shuffle_v4f64_2200: 120 ; AVX2: # BB#0: 121 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 122 ; AVX2-NEXT: retq 123 ; 124 ; AVX512VL-LABEL: shuffle_v4f64_2200: 125 ; AVX512VL: # BB#0: 126 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 127 ; AVX512VL-NEXT: retq 128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 129 ret <4 x double> %shuffle 130 } 131 132 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 133 ; AVX1-LABEL: shuffle_v4f64_3330: 134 ; AVX1: # BB#0: 135 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 136 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 137 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 138 ; AVX1-NEXT: retq 139 ; 140 ; AVX2-LABEL: shuffle_v4f64_3330: 141 ; AVX2: # BB#0: 142 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 143 ; AVX2-NEXT: retq 144 ; 145 ; AVX512VL-LABEL: shuffle_v4f64_3330: 146 ; AVX512VL: # BB#0: 147 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 148 ; AVX512VL-NEXT: retq 149 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 150 ret <4 x double> %shuffle 151 } 152 153 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 154 ; AVX1-LABEL: shuffle_v4f64_3210: 155 ; AVX1: # BB#0: 156 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 157 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 158 ; AVX1-NEXT: retq 159 ; 160 ; AVX2-LABEL: shuffle_v4f64_3210: 161 ; AVX2: # BB#0: 162 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 163 ; AVX2-NEXT: retq 164 ; 165 ; AVX512VL-LABEL: shuffle_v4f64_3210: 166 ; AVX512VL: # BB#0: 167 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 168 ; AVX512VL-NEXT: retq 169 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 170 ret <4 x double> %shuffle 171 } 172 173 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 174 ; ALL-LABEL: shuffle_v4f64_0023: 175 ; ALL: # BB#0: 176 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 177 ; ALL-NEXT: retq 178 179 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 180 ret <4 x double> %shuffle 181 } 182 183 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 184 ; ALL-LABEL: shuffle_v4f64_0022: 185 ; ALL: # BB#0: 186 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 187 ; ALL-NEXT: retq 188 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 189 ret <4 x double> %shuffle 190 } 191 192 define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) { 193 ; ALL-LABEL: shuffle_v4f64mem_0022: 194 ; ALL: # BB#0: 195 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2] 196 ; ALL-NEXT: retq 197 %a = load <4 x double>, <4 x double>* %ptr 198 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 199 ret <4 x double> %shuffle 200 } 201 202 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 203 ; ALL-LABEL: shuffle_v4f64_1032: 204 ; ALL: # BB#0: 205 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 206 ; ALL-NEXT: retq 207 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 208 ret <4 x double> %shuffle 209 } 210 211 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 212 ; ALL-LABEL: shuffle_v4f64_1133: 213 ; ALL: # BB#0: 214 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 215 ; ALL-NEXT: retq 216 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 217 ret <4 x double> %shuffle 218 } 219 220 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 221 ; ALL-LABEL: shuffle_v4f64_1023: 222 ; ALL: # BB#0: 223 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 224 ; ALL-NEXT: retq 225 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 226 ret <4 x double> %shuffle 227 } 228 229 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 230 ; ALL-LABEL: shuffle_v4f64_1022: 231 ; ALL: # BB#0: 232 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 233 ; ALL-NEXT: retq 234 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 235 ret <4 x double> %shuffle 236 } 237 238 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 239 ; AVX1-LABEL: shuffle_v4f64_0423: 240 ; AVX1: # BB#0: 241 ; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 242 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 243 ; AVX1-NEXT: retq 244 ; 245 ; AVX2-LABEL: shuffle_v4f64_0423: 246 ; AVX2: # BB#0: 247 ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 248 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 249 ; AVX2-NEXT: retq 250 ; 251 ; AVX512VL-LABEL: shuffle_v4f64_0423: 252 ; AVX512VL: # BB#0: 253 ; AVX512VL-NEXT: vbroadcastsd %xmm1, %ymm1 254 ; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 255 ; AVX512VL-NEXT: retq 256 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 257 ret <4 x double> %shuffle 258 } 259 260 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 261 ; ALL-LABEL: shuffle_v4f64_0462: 262 ; ALL: # BB#0: 263 ; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 264 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 265 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 266 ; ALL-NEXT: retq 267 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 268 ret <4 x double> %shuffle 269 } 270 271 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 272 ; ALL-LABEL: shuffle_v4f64_0426: 273 ; ALL: # BB#0: 274 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 275 ; ALL-NEXT: retq 276 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 277 ret <4 x double> %shuffle 278 } 279 280 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 281 ; ALL-LABEL: shuffle_v4f64_1537: 282 ; ALL: # BB#0: 283 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 284 ; ALL-NEXT: retq 285 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 286 ret <4 x double> %shuffle 287 } 288 289 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 290 ; ALL-LABEL: shuffle_v4f64_4062: 291 ; ALL: # BB#0: 292 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 293 ; ALL-NEXT: retq 294 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 295 ret <4 x double> %shuffle 296 } 297 298 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 299 ; ALL-LABEL: shuffle_v4f64_5173: 300 ; ALL: # BB#0: 301 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 302 ; ALL-NEXT: retq 303 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 304 ret <4 x double> %shuffle 305 } 306 307 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 308 ; ALL-LABEL: shuffle_v4f64_5163: 309 ; ALL: # BB#0: 310 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 311 ; ALL-NEXT: retq 312 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 313 ret <4 x double> %shuffle 314 } 315 316 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 317 ; ALL-LABEL: shuffle_v4f64_0527: 318 ; ALL: # BB#0: 319 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 320 ; ALL-NEXT: retq 321 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 322 ret <4 x double> %shuffle 323 } 324 325 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 326 ; ALL-LABEL: shuffle_v4f64_4163: 327 ; ALL: # BB#0: 328 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 329 ; ALL-NEXT: retq 330 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 331 ret <4 x double> %shuffle 332 } 333 334 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 335 ; AVX1-LABEL: shuffle_v4f64_0145: 336 ; AVX1: # BB#0: 337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 338 ; AVX1-NEXT: retq 339 ; 340 ; AVX2-LABEL: shuffle_v4f64_0145: 341 ; AVX2: # BB#0: 342 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 343 ; AVX2-NEXT: retq 344 ; 345 ; AVX512VL-LABEL: shuffle_v4f64_0145: 346 ; AVX512VL: # BB#0: 347 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 348 ; AVX512VL-NEXT: retq 349 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 350 ret <4 x double> %shuffle 351 } 352 353 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 354 ; AVX1-LABEL: shuffle_v4f64_4501: 355 ; AVX1: # BB#0: 356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 357 ; AVX1-NEXT: retq 358 ; 359 ; AVX2-LABEL: shuffle_v4f64_4501: 360 ; AVX2: # BB#0: 361 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 362 ; AVX2-NEXT: retq 363 ; 364 ; AVX512VL-LABEL: shuffle_v4f64_4501: 365 ; AVX512VL: # BB#0: 366 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0 367 ; AVX512VL-NEXT: retq 368 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 369 ret <4 x double> %shuffle 370 } 371 372 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 373 ; ALL-LABEL: shuffle_v4f64_0167: 374 ; ALL: # BB#0: 375 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 376 ; ALL-NEXT: retq 377 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 378 ret <4 x double> %shuffle 379 } 380 381 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 382 ; AVX1-LABEL: shuffle_v4f64_1054: 383 ; AVX1: # BB#0: 384 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 385 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 386 ; AVX1-NEXT: retq 387 ; 388 ; AVX2-LABEL: shuffle_v4f64_1054: 389 ; AVX2: # BB#0: 390 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 391 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 392 ; AVX2-NEXT: retq 393 ; 394 ; AVX512VL-LABEL: shuffle_v4f64_1054: 395 ; AVX512VL: # BB#0: 396 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 397 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 398 ; AVX512VL-NEXT: retq 399 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 400 ret <4 x double> %shuffle 401 } 402 403 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 404 ; AVX1-LABEL: shuffle_v4f64_3254: 405 ; AVX1: # BB#0: 406 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 407 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 408 ; AVX1-NEXT: retq 409 ; 410 ; AVX2-LABEL: shuffle_v4f64_3254: 411 ; AVX2: # BB#0: 412 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 413 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 414 ; AVX2-NEXT: retq 415 ; 416 ; AVX512VL-LABEL: shuffle_v4f64_3254: 417 ; AVX512VL: # BB#0: 418 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 419 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 420 ; AVX512VL-NEXT: retq 421 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 422 ret <4 x double> %shuffle 423 } 424 425 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 426 ; AVX1-LABEL: shuffle_v4f64_3276: 427 ; AVX1: # BB#0: 428 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 429 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 430 ; AVX1-NEXT: retq 431 ; 432 ; AVX2-LABEL: shuffle_v4f64_3276: 433 ; AVX2: # BB#0: 434 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 435 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 436 ; AVX2-NEXT: retq 437 ; 438 ; AVX512VL-LABEL: shuffle_v4f64_3276: 439 ; AVX512VL: # BB#0: 440 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 441 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 442 ; AVX512VL-NEXT: retq 443 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 444 ret <4 x double> %shuffle 445 } 446 447 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 448 ; ALL-LABEL: shuffle_v4f64_1076: 449 ; ALL: # BB#0: 450 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 451 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 452 ; ALL-NEXT: retq 453 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 454 ret <4 x double> %shuffle 455 } 456 457 define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 458 ; AVX1-LABEL: shuffle_v4f64_0415: 459 ; AVX1: # BB#0: 460 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 461 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 462 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 463 ; AVX1-NEXT: retq 464 ; 465 ; AVX2-LABEL: shuffle_v4f64_0415: 466 ; AVX2: # BB#0: 467 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 468 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 469 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 470 ; AVX2-NEXT: retq 471 ; 472 ; AVX512VL-LABEL: shuffle_v4f64_0415: 473 ; AVX512VL: # BB#0: 474 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 475 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 476 ; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 477 ; AVX512VL-NEXT: retq 478 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 479 ret <4 x double> %shuffle 480 } 481 482 define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { 483 ; ALL-LABEL: shuffle_v4f64_u062: 484 ; ALL: # BB#0: 485 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 486 ; ALL-NEXT: retq 487 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2> 488 ret <4 x double> %shuffle 489 } 490 491 define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) { 492 ; ALL-LABEL: shuffle_v4f64_11uu: 493 ; ALL: # BB#0: 494 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2] 495 ; ALL-NEXT: retq 496 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 497 ret <4 x double> %shuffle 498 } 499 500 define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { 501 ; AVX1-LABEL: shuffle_v4f64_22uu: 502 ; AVX1: # BB#0: 503 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 504 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 505 ; AVX1-NEXT: retq 506 ; 507 ; AVX2-LABEL: shuffle_v4f64_22uu: 508 ; AVX2: # BB#0: 509 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] 510 ; AVX2-NEXT: retq 511 ; 512 ; AVX512VL-LABEL: shuffle_v4f64_22uu: 513 ; AVX512VL: # BB#0: 514 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] 515 ; AVX512VL-NEXT: retq 516 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 517 ret <4 x double> %shuffle 518 } 519 520 define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { 521 ; AVX1-LABEL: shuffle_v4f64_3333: 522 ; AVX1: # BB#0: 523 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 524 ; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 525 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 526 ; AVX1-NEXT: retq 527 ; 528 ; AVX2-LABEL: shuffle_v4f64_3333: 529 ; AVX2: # BB#0: 530 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 531 ; AVX2-NEXT: retq 532 ; 533 ; AVX512VL-LABEL: shuffle_v4f64_3333: 534 ; AVX512VL: # BB#0: 535 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 536 ; AVX512VL-NEXT: retq 537 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 538 ret <4 x double> %shuffle 539 } 540 541 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 542 ; AVX1-LABEL: shuffle_v4i64_0000: 543 ; AVX1: # BB#0: 544 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 545 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 546 ; AVX1-NEXT: retq 547 ; 548 ; AVX2-LABEL: shuffle_v4i64_0000: 549 ; AVX2: # BB#0: 550 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 551 ; AVX2-NEXT: retq 552 ; 553 ; AVX512VL-LABEL: shuffle_v4i64_0000: 554 ; AVX512VL: # BB#0: 555 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 556 ; AVX512VL-NEXT: retq 557 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 558 ret <4 x i64> %shuffle 559 } 560 561 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 562 ; AVX1-LABEL: shuffle_v4i64_0001: 563 ; AVX1: # BB#0: 564 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 565 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 566 ; AVX1-NEXT: retq 567 ; 568 ; AVX2-LABEL: shuffle_v4i64_0001: 569 ; AVX2: # BB#0: 570 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 571 ; AVX2-NEXT: retq 572 ; 573 ; AVX512VL-LABEL: shuffle_v4i64_0001: 574 ; AVX512VL: # BB#0: 575 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 576 ; AVX512VL-NEXT: retq 577 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 578 ret <4 x i64> %shuffle 579 } 580 581 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 582 ; AVX1-LABEL: shuffle_v4i64_0020: 583 ; AVX1: # BB#0: 584 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 585 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 586 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 587 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 588 ; AVX1-NEXT: retq 589 ; 590 ; AVX2-LABEL: shuffle_v4i64_0020: 591 ; AVX2: # BB#0: 592 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 593 ; AVX2-NEXT: retq 594 ; 595 ; AVX512VL-LABEL: shuffle_v4i64_0020: 596 ; AVX512VL: # BB#0: 597 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 598 ; AVX512VL-NEXT: retq 599 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 600 ret <4 x i64> %shuffle 601 } 602 603 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 604 ; AVX1-LABEL: shuffle_v4i64_0112: 605 ; AVX1: # BB#0: 606 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 607 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0] 608 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 609 ; AVX1-NEXT: retq 610 ; 611 ; AVX2-LABEL: shuffle_v4i64_0112: 612 ; AVX2: # BB#0: 613 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 614 ; AVX2-NEXT: retq 615 ; 616 ; AVX512VL-LABEL: shuffle_v4i64_0112: 617 ; AVX512VL: # BB#0: 618 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 619 ; AVX512VL-NEXT: retq 620 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 621 ret <4 x i64> %shuffle 622 } 623 624 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 625 ; AVX1-LABEL: shuffle_v4i64_0300: 626 ; AVX1: # BB#0: 627 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 628 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 629 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 630 ; AVX1-NEXT: retq 631 ; 632 ; AVX2-LABEL: shuffle_v4i64_0300: 633 ; AVX2: # BB#0: 634 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 635 ; AVX2-NEXT: retq 636 ; 637 ; AVX512VL-LABEL: shuffle_v4i64_0300: 638 ; AVX512VL: # BB#0: 639 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 640 ; AVX512VL-NEXT: retq 641 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 642 ret <4 x i64> %shuffle 643 } 644 645 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 646 ; AVX1-LABEL: shuffle_v4i64_1000: 647 ; AVX1: # BB#0: 648 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 649 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 650 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 651 ; AVX1-NEXT: retq 652 ; 653 ; AVX2-LABEL: shuffle_v4i64_1000: 654 ; AVX2: # BB#0: 655 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 656 ; AVX2-NEXT: retq 657 ; 658 ; AVX512VL-LABEL: shuffle_v4i64_1000: 659 ; AVX512VL: # BB#0: 660 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 661 ; AVX512VL-NEXT: retq 662 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 663 ret <4 x i64> %shuffle 664 } 665 666 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 667 ; AVX1-LABEL: shuffle_v4i64_2200: 668 ; AVX1: # BB#0: 669 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 670 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 671 ; AVX1-NEXT: retq 672 ; 673 ; AVX2-LABEL: shuffle_v4i64_2200: 674 ; AVX2: # BB#0: 675 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 676 ; AVX2-NEXT: retq 677 ; 678 ; AVX512VL-LABEL: shuffle_v4i64_2200: 679 ; AVX512VL: # BB#0: 680 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 681 ; AVX512VL-NEXT: retq 682 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 683 ret <4 x i64> %shuffle 684 } 685 686 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 687 ; AVX1-LABEL: shuffle_v4i64_3330: 688 ; AVX1: # BB#0: 689 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 690 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 691 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 692 ; AVX1-NEXT: retq 693 ; 694 ; AVX2-LABEL: shuffle_v4i64_3330: 695 ; AVX2: # BB#0: 696 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 697 ; AVX2-NEXT: retq 698 ; 699 ; AVX512VL-LABEL: shuffle_v4i64_3330: 700 ; AVX512VL: # BB#0: 701 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 702 ; AVX512VL-NEXT: retq 703 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 704 ret <4 x i64> %shuffle 705 } 706 707 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 708 ; AVX1-LABEL: shuffle_v4i64_3210: 709 ; AVX1: # BB#0: 710 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 711 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 712 ; AVX1-NEXT: retq 713 ; 714 ; AVX2-LABEL: shuffle_v4i64_3210: 715 ; AVX2: # BB#0: 716 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 717 ; AVX2-NEXT: retq 718 ; 719 ; AVX512VL-LABEL: shuffle_v4i64_3210: 720 ; AVX512VL: # BB#0: 721 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 722 ; AVX512VL-NEXT: retq 723 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 724 ret <4 x i64> %shuffle 725 } 726 727 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 728 ; AVX1-LABEL: shuffle_v4i64_0124: 729 ; AVX1: # BB#0: 730 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 731 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 732 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 733 ; AVX1-NEXT: retq 734 ; 735 ; AVX2-LABEL: shuffle_v4i64_0124: 736 ; AVX2: # BB#0: 737 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 738 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 739 ; AVX2-NEXT: retq 740 ; 741 ; AVX512VL-LABEL: shuffle_v4i64_0124: 742 ; AVX512VL: # BB#0: 743 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1 744 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 745 ; AVX512VL-NEXT: retq 746 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 747 ret <4 x i64> %shuffle 748 } 749 750 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 751 ; AVX1-LABEL: shuffle_v4i64_0142: 752 ; AVX1: # BB#0: 753 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 754 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 755 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 756 ; AVX1-NEXT: retq 757 ; 758 ; AVX2-LABEL: shuffle_v4i64_0142: 759 ; AVX2: # BB#0: 760 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 761 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 762 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 763 ; AVX2-NEXT: retq 764 ; 765 ; AVX512VL-LABEL: shuffle_v4i64_0142: 766 ; AVX512VL: # BB#0: 767 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1 768 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 769 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 770 ; AVX512VL-NEXT: retq 771 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 772 ret <4 x i64> %shuffle 773 } 774 775 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 776 ; AVX1-LABEL: shuffle_v4i64_0412: 777 ; AVX1: # BB#0: 778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 779 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 780 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 781 ; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 782 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 783 ; AVX1-NEXT: retq 784 ; 785 ; AVX2-LABEL: shuffle_v4i64_0412: 786 ; AVX2: # BB#0: 787 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 788 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 789 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 790 ; AVX2-NEXT: retq 791 ; 792 ; AVX512VL-LABEL: shuffle_v4i64_0412: 793 ; AVX512VL: # BB#0: 794 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 795 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1 796 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 797 ; AVX512VL-NEXT: retq 798 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 799 ret <4 x i64> %shuffle 800 } 801 802 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 803 ; AVX1-LABEL: shuffle_v4i64_4012: 804 ; AVX1: # BB#0: 805 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 806 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 807 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 808 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 809 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 810 ; AVX1-NEXT: retq 811 ; 812 ; AVX2-LABEL: shuffle_v4i64_4012: 813 ; AVX2: # BB#0: 814 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 815 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 816 ; AVX2-NEXT: retq 817 ; 818 ; AVX512VL-LABEL: shuffle_v4i64_4012: 819 ; AVX512VL: # BB#0: 820 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 821 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 822 ; AVX512VL-NEXT: retq 823 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 824 ret <4 x i64> %shuffle 825 } 826 827 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 828 ; AVX1-LABEL: shuffle_v4i64_0145: 829 ; AVX1: # BB#0: 830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 831 ; AVX1-NEXT: retq 832 ; 833 ; AVX2-LABEL: shuffle_v4i64_0145: 834 ; AVX2: # BB#0: 835 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 836 ; AVX2-NEXT: retq 837 ; 838 ; AVX512VL-LABEL: shuffle_v4i64_0145: 839 ; AVX512VL: # BB#0: 840 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 841 ; AVX512VL-NEXT: retq 842 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 843 ret <4 x i64> %shuffle 844 } 845 846 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 847 ; AVX1-LABEL: shuffle_v4i64_0451: 848 ; AVX1: # BB#0: 849 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 850 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 851 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 852 ; AVX1-NEXT: retq 853 ; 854 ; AVX2-LABEL: shuffle_v4i64_0451: 855 ; AVX2: # BB#0: 856 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 857 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 858 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 859 ; AVX2-NEXT: retq 860 ; 861 ; AVX512VL-LABEL: shuffle_v4i64_0451: 862 ; AVX512VL: # BB#0: 863 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 864 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 865 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 866 ; AVX512VL-NEXT: retq 867 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 868 ret <4 x i64> %shuffle 869 } 870 871 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 872 ; AVX1-LABEL: shuffle_v4i64_4501: 873 ; AVX1: # BB#0: 874 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 875 ; AVX1-NEXT: retq 876 ; 877 ; AVX2-LABEL: shuffle_v4i64_4501: 878 ; AVX2: # BB#0: 879 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 880 ; AVX2-NEXT: retq 881 ; 882 ; AVX512VL-LABEL: shuffle_v4i64_4501: 883 ; AVX512VL: # BB#0: 884 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 885 ; AVX512VL-NEXT: retq 886 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 887 ret <4 x i64> %shuffle 888 } 889 890 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 891 ; AVX1-LABEL: shuffle_v4i64_4015: 892 ; AVX1: # BB#0: 893 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 894 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 895 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 896 ; AVX1-NEXT: retq 897 ; 898 ; AVX2-LABEL: shuffle_v4i64_4015: 899 ; AVX2: # BB#0: 900 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 901 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 902 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 903 ; AVX2-NEXT: retq 904 ; 905 ; AVX512VL-LABEL: shuffle_v4i64_4015: 906 ; AVX512VL: # BB#0: 907 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1 908 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 909 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 910 ; AVX512VL-NEXT: retq 911 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 912 ret <4 x i64> %shuffle 913 } 914 915 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 916 ; AVX1-LABEL: shuffle_v4i64_2u35: 917 ; AVX1: # BB#0: 918 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 919 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1] 920 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 921 ; AVX1-NEXT: retq 922 ; 923 ; AVX2-LABEL: shuffle_v4i64_2u35: 924 ; AVX2: # BB#0: 925 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 926 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1] 927 ; AVX2-NEXT: retq 928 ; 929 ; AVX512VL-LABEL: shuffle_v4i64_2u35: 930 ; AVX512VL: # BB#0: 931 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 932 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1] 933 ; AVX512VL-NEXT: retq 934 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5> 935 ret <4 x i64> %shuffle 936 } 937 938 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 939 ; AVX1-LABEL: shuffle_v4i64_1251: 940 ; AVX1: # BB#0: 941 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 942 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3] 943 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 944 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 945 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 946 ; AVX1-NEXT: retq 947 ; 948 ; AVX2-LABEL: shuffle_v4i64_1251: 949 ; AVX2: # BB#0: 950 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 951 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 952 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 953 ; AVX2-NEXT: retq 954 ; 955 ; AVX512VL-LABEL: shuffle_v4i64_1251: 956 ; AVX512VL: # BB#0: 957 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 958 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 959 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 960 ; AVX512VL-NEXT: retq 961 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 962 ret <4 x i64> %shuffle 963 } 964 965 define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 966 ; AVX1-LABEL: shuffle_v4i64_1054: 967 ; AVX1: # BB#0: 968 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 969 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 970 ; AVX1-NEXT: retq 971 ; 972 ; AVX2-LABEL: shuffle_v4i64_1054: 973 ; AVX2: # BB#0: 974 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 975 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 976 ; AVX2-NEXT: retq 977 ; 978 ; AVX512VL-LABEL: shuffle_v4i64_1054: 979 ; AVX512VL: # BB#0: 980 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 981 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 982 ; AVX512VL-NEXT: retq 983 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 984 ret <4 x i64> %shuffle 985 } 986 987 define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 988 ; AVX1-LABEL: shuffle_v4i64_3254: 989 ; AVX1: # BB#0: 990 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 991 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 992 ; AVX1-NEXT: retq 993 ; 994 ; AVX2-LABEL: shuffle_v4i64_3254: 995 ; AVX2: # BB#0: 996 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 997 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 998 ; AVX2-NEXT: retq 999 ; 1000 ; AVX512VL-LABEL: shuffle_v4i64_3254: 1001 ; AVX512VL: # BB#0: 1002 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1003 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1004 ; AVX512VL-NEXT: retq 1005 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 1006 ret <4 x i64> %shuffle 1007 } 1008 1009 define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 1010 ; AVX1-LABEL: shuffle_v4i64_3276: 1011 ; AVX1: # BB#0: 1012 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1013 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1014 ; AVX1-NEXT: retq 1015 ; 1016 ; AVX2-LABEL: shuffle_v4i64_3276: 1017 ; AVX2: # BB#0: 1018 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1019 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1020 ; AVX2-NEXT: retq 1021 ; 1022 ; AVX512VL-LABEL: shuffle_v4i64_3276: 1023 ; AVX512VL: # BB#0: 1024 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1025 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1026 ; AVX512VL-NEXT: retq 1027 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 1028 ret <4 x i64> %shuffle 1029 } 1030 1031 define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 1032 ; AVX1-LABEL: shuffle_v4i64_1076: 1033 ; AVX1: # BB#0: 1034 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1035 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1036 ; AVX1-NEXT: retq 1037 ; 1038 ; AVX2-LABEL: shuffle_v4i64_1076: 1039 ; AVX2: # BB#0: 1040 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1041 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1042 ; AVX2-NEXT: retq 1043 ; 1044 ; AVX512VL-LABEL: shuffle_v4i64_1076: 1045 ; AVX512VL: # BB#0: 1046 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1047 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1048 ; AVX512VL-NEXT: retq 1049 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 1050 ret <4 x i64> %shuffle 1051 } 1052 1053 define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 1054 ; AVX1-LABEL: shuffle_v4i64_0415: 1055 ; AVX1: # BB#0: 1056 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1057 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1058 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1059 ; AVX1-NEXT: retq 1060 ; 1061 ; AVX2-LABEL: shuffle_v4i64_0415: 1062 ; AVX2: # BB#0: 1063 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 1064 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1065 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1066 ; AVX2-NEXT: retq 1067 ; 1068 ; AVX512VL-LABEL: shuffle_v4i64_0415: 1069 ; AVX512VL: # BB#0: 1070 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 1071 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1072 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1073 ; AVX512VL-NEXT: retq 1074 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1075 ret <4 x i64> %shuffle 1076 } 1077 1078 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { 1079 ; AVX1-LABEL: shuffle_v4i64_z4z6: 1080 ; AVX1: # BB#0: 1081 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1082 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1083 ; AVX1-NEXT: retq 1084 ; 1085 ; AVX2-LABEL: shuffle_v4i64_z4z6: 1086 ; AVX2: # BB#0: 1087 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1088 ; AVX2-NEXT: retq 1089 ; 1090 ; AVX512VL-LABEL: shuffle_v4i64_z4z6: 1091 ; AVX512VL: # BB#0: 1092 ; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1093 ; AVX512VL-NEXT: retq 1094 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6> 1095 ret <4 x i64> %shuffle 1096 } 1097 1098 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { 1099 ; AVX1-LABEL: shuffle_v4i64_5zuz: 1100 ; AVX1: # BB#0: 1101 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1102 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1103 ; AVX1-NEXT: retq 1104 ; 1105 ; AVX2-LABEL: shuffle_v4i64_5zuz: 1106 ; AVX2: # BB#0: 1107 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1108 ; AVX2-NEXT: retq 1109 ; 1110 ; AVX512VL-LABEL: shuffle_v4i64_5zuz: 1111 ; AVX512VL: # BB#0: 1112 ; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1113 ; AVX512VL-NEXT: retq 1114 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0> 1115 ret <4 x i64> %shuffle 1116 } 1117 1118 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { 1119 ; AVX1-LABEL: shuffle_v4i64_40u2: 1120 ; AVX1: # BB#0: 1121 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1122 ; AVX1-NEXT: retq 1123 ; 1124 ; AVX2-LABEL: shuffle_v4i64_40u2: 1125 ; AVX2: # BB#0: 1126 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1127 ; AVX2-NEXT: retq 1128 ; 1129 ; AVX512VL-LABEL: shuffle_v4i64_40u2: 1130 ; AVX512VL: # BB#0: 1131 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1132 ; AVX512VL-NEXT: retq 1133 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2> 1134 ret <4 x i64> %shuffle 1135 } 1136 1137 define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) { 1138 ; AVX1-LABEL: shuffle_v4i64_11uu: 1139 ; AVX1: # BB#0: 1140 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2] 1141 ; AVX1-NEXT: retq 1142 ; 1143 ; AVX2-LABEL: shuffle_v4i64_11uu: 1144 ; AVX2: # BB#0: 1145 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] 1146 ; AVX2-NEXT: retq 1147 ; 1148 ; AVX512VL-LABEL: shuffle_v4i64_11uu: 1149 ; AVX512VL: # BB#0: 1150 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] 1151 ; AVX512VL-NEXT: retq 1152 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef> 1153 ret <4 x i64> %shuffle 1154 } 1155 1156 define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { 1157 ; AVX1-LABEL: shuffle_v4i64_22uu: 1158 ; AVX1: # BB#0: 1159 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1160 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1161 ; AVX1-NEXT: retq 1162 ; 1163 ; AVX2-LABEL: shuffle_v4i64_22uu: 1164 ; AVX2: # BB#0: 1165 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] 1166 ; AVX2-NEXT: retq 1167 ; 1168 ; AVX512VL-LABEL: shuffle_v4i64_22uu: 1169 ; AVX512VL: # BB#0: 1170 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] 1171 ; AVX512VL-NEXT: retq 1172 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef> 1173 ret <4 x i64> %shuffle 1174 } 1175 1176 define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { 1177 ; AVX1-LABEL: shuffle_v4i64_3333: 1178 ; AVX1: # BB#0: 1179 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1180 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 1181 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1182 ; AVX1-NEXT: retq 1183 ; 1184 ; AVX2-LABEL: shuffle_v4i64_3333: 1185 ; AVX2: # BB#0: 1186 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 1187 ; AVX2-NEXT: retq 1188 ; 1189 ; AVX512VL-LABEL: shuffle_v4i64_3333: 1190 ; AVX512VL: # BB#0: 1191 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 1192 ; AVX512VL-NEXT: retq 1193 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1194 ret <4 x i64> %shuffle 1195 } 1196 1197 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 1198 ; ALL-LABEL: stress_test1: 1199 ; ALL: retq 1200 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 1201 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef> 1202 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef> 1203 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 1204 1205 ret <4 x i64> %f 1206 } 1207 1208 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 1209 ; ALL-LABEL: insert_reg_and_zero_v4i64: 1210 ; ALL: # BB#0: 1211 ; ALL-NEXT: vmovq %rdi, %xmm0 1212 ; ALL-NEXT: retq 1213 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1214 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1215 ret <4 x i64> %shuffle 1216 } 1217 1218 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { 1219 ; AVX1-LABEL: insert_mem_and_zero_v4i64: 1220 ; AVX1: # BB#0: 1221 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1222 ; AVX1-NEXT: retq 1223 ; 1224 ; AVX2-LABEL: insert_mem_and_zero_v4i64: 1225 ; AVX2: # BB#0: 1226 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1227 ; AVX2-NEXT: retq 1228 ; 1229 ; AVX512VL-LABEL: insert_mem_and_zero_v4i64: 1230 ; AVX512VL: # BB#0: 1231 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0 1232 ; AVX512VL-NEXT: retq 1233 %a = load i64, i64* %ptr 1234 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1235 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1236 ret <4 x i64> %shuffle 1237 } 1238 1239 define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 1240 ; AVX1-LABEL: insert_reg_and_zero_v4f64: 1241 ; AVX1: # BB#0: 1242 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1243 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1244 ; AVX1-NEXT: retq 1245 ; 1246 ; AVX2-LABEL: insert_reg_and_zero_v4f64: 1247 ; AVX2: # BB#0: 1248 ; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 1249 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1250 ; AVX2-NEXT: retq 1251 ; 1252 ; AVX512VL-LABEL: insert_reg_and_zero_v4f64: 1253 ; AVX512VL: # BB#0: 1254 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1255 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 1256 ; AVX512VL-NEXT: retq 1257 %v = insertelement <4 x double> undef, double %a, i32 0 1258 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1259 ret <4 x double> %shuffle 1260 } 1261 1262 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { 1263 ; AVX1-LABEL: insert_mem_and_zero_v4f64: 1264 ; AVX1: # BB#0: 1265 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1266 ; AVX1-NEXT: retq 1267 ; 1268 ; AVX2-LABEL: insert_mem_and_zero_v4f64: 1269 ; AVX2: # BB#0: 1270 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1271 ; AVX2-NEXT: retq 1272 ; 1273 ; AVX512VL-LABEL: insert_mem_and_zero_v4f64: 1274 ; AVX512VL: # BB#0: 1275 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0 1276 ; AVX512VL-NEXT: retq 1277 %a = load double, double* %ptr 1278 %v = insertelement <4 x double> undef, double %a, i32 0 1279 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1280 ret <4 x double> %shuffle 1281 } 1282 1283 define <4 x double> @splat_mem_v4f64(double* %ptr) { 1284 ; ALL-LABEL: splat_mem_v4f64: 1285 ; ALL: # BB#0: 1286 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1287 ; ALL-NEXT: retq 1288 %a = load double, double* %ptr 1289 %v = insertelement <4 x double> undef, double %a, i32 0 1290 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1291 ret <4 x double> %shuffle 1292 } 1293 1294 define <4 x i64> @splat_mem_v4i64(i64* %ptr) { 1295 ; AVX1-LABEL: splat_mem_v4i64: 1296 ; AVX1: # BB#0: 1297 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 1298 ; AVX1-NEXT: retq 1299 ; 1300 ; AVX2-LABEL: splat_mem_v4i64: 1301 ; AVX2: # BB#0: 1302 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1303 ; AVX2-NEXT: retq 1304 ; 1305 ; AVX512VL-LABEL: splat_mem_v4i64: 1306 ; AVX512VL: # BB#0: 1307 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1308 ; AVX512VL-NEXT: retq 1309 %a = load i64, i64* %ptr 1310 %v = insertelement <4 x i64> undef, i64 %a, i64 0 1311 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1312 ret <4 x i64> %shuffle 1313 } 1314 1315 define <4 x double> @splat_mem_v4f64_2(double* %p) { 1316 ; ALL-LABEL: splat_mem_v4f64_2: 1317 ; ALL: # BB#0: 1318 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1319 ; ALL-NEXT: retq 1320 %1 = load double, double* %p 1321 %2 = insertelement <2 x double> undef, double %1, i32 0 1322 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer 1323 ret <4 x double> %3 1324 } 1325 1326 define <4 x double> @splat_v4f64(<2 x double> %r) { 1327 ; AVX1-LABEL: splat_v4f64: 1328 ; AVX1: # BB#0: 1329 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1330 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1331 ; AVX1-NEXT: retq 1332 ; 1333 ; AVX2-LABEL: splat_v4f64: 1334 ; AVX2: # BB#0: 1335 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1336 ; AVX2-NEXT: retq 1337 ; 1338 ; AVX512VL-LABEL: splat_v4f64: 1339 ; AVX512VL: # BB#0: 1340 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1341 ; AVX512VL-NEXT: retq 1342 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer 1343 ret <4 x double> %1 1344 } 1345 1346 define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1347 ; AVX1-LABEL: splat_mem_v4i64_from_v2i64: 1348 ; AVX1: # BB#0: 1349 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1350 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1351 ; AVX1-NEXT: retq 1352 ; 1353 ; AVX2-LABEL: splat_mem_v4i64_from_v2i64: 1354 ; AVX2: # BB#0: 1355 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1356 ; AVX2-NEXT: retq 1357 ; 1358 ; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64: 1359 ; AVX512VL: # BB#0: 1360 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1361 ; AVX512VL-NEXT: retq 1362 %v = load <2 x i64>, <2 x i64>* %ptr 1363 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1364 ret <4 x i64> %shuffle 1365 } 1366 1367 define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1368 ; ALL-LABEL: splat_mem_v4f64_from_v2f64: 1369 ; ALL: # BB#0: 1370 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1371 ; ALL-NEXT: retq 1372 %v = load <2 x double>, <2 x double>* %ptr 1373 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1374 ret <4 x double> %shuffle 1375 } 1376 1377 define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { 1378 ; AVX1-LABEL: splat128_mem_v4i64_from_v2i64: 1379 ; AVX1: # BB#0: 1380 ; AVX1-NEXT: vmovaps (%rdi), %xmm0 1381 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1382 ; AVX1-NEXT: retq 1383 ; 1384 ; AVX2-LABEL: splat128_mem_v4i64_from_v2i64: 1385 ; AVX2: # BB#0: 1386 ; AVX2-NEXT: vmovaps (%rdi), %xmm0 1387 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1388 ; AVX2-NEXT: retq 1389 ; 1390 ; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64: 1391 ; AVX512VL: # BB#0: 1392 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0 1393 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 1394 ; AVX512VL-NEXT: retq 1395 %v = load <2 x i64>, <2 x i64>* %ptr 1396 %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1397 ret <4 x i64> %shuffle 1398 } 1399 1400 define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) { 1401 ; AVX1-LABEL: splat128_mem_v4f64_from_v2f64: 1402 ; AVX1: # BB#0: 1403 ; AVX1-NEXT: vmovaps (%rdi), %xmm0 1404 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1405 ; AVX1-NEXT: retq 1406 ; 1407 ; AVX2-LABEL: splat128_mem_v4f64_from_v2f64: 1408 ; AVX2: # BB#0: 1409 ; AVX2-NEXT: vmovaps (%rdi), %xmm0 1410 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1411 ; AVX2-NEXT: retq 1412 ; 1413 ; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64: 1414 ; AVX512VL: # BB#0: 1415 ; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 1416 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 1417 ; AVX512VL-NEXT: retq 1418 %v = load <2 x double>, <2 x double>* %ptr 1419 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1420 ret <4 x double> %shuffle 1421 } 1422 1423 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { 1424 ; AVX1-LABEL: bitcast_v4f64_0426: 1425 ; AVX1: # BB#0: 1426 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1427 ; AVX1-NEXT: retq 1428 ; 1429 ; AVX2-LABEL: bitcast_v4f64_0426: 1430 ; AVX2: # BB#0: 1431 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1432 ; AVX2-NEXT: retq 1433 ; 1434 ; AVX512VL-LABEL: bitcast_v4f64_0426: 1435 ; AVX512VL: # BB#0: 1436 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1437 ; AVX512VL-NEXT: retq 1438 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 1439 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> 1440 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1441 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16> 1442 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1443 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double> 1444 ret <4 x double> %bitcast64 1445 } 1446 1447 define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { 1448 ; AVX1-LABEL: concat_v4i64_0167: 1449 ; AVX1: # BB#0: 1450 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1451 ; AVX1-NEXT: retq 1452 ; 1453 ; AVX2-LABEL: concat_v4i64_0167: 1454 ; AVX2: # BB#0: 1455 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1456 ; AVX2-NEXT: retq 1457 ; 1458 ; AVX512VL-LABEL: concat_v4i64_0167: 1459 ; AVX512VL: # BB#0: 1460 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1461 ; AVX512VL-NEXT: retq 1462 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1463 %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7> 1464 %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1465 ret <4 x i64> %shuffle64 1466 } 1467 1468 define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { 1469 ; AVX1-LABEL: concat_v4i64_0145_bc: 1470 ; AVX1: # BB#0: 1471 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1472 ; AVX1-NEXT: retq 1473 ; 1474 ; AVX2-LABEL: concat_v4i64_0145_bc: 1475 ; AVX2: # BB#0: 1476 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1477 ; AVX2-NEXT: retq 1478 ; 1479 ; AVX512VL-LABEL: concat_v4i64_0145_bc: 1480 ; AVX512VL: # BB#0: 1481 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1482 ; AVX512VL-NEXT: retq 1483 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1484 %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5> 1485 %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> 1486 %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32> 1487 %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1488 %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64> 1489 ret <4 x i64> %shuffle64 1490 } 1491 1492 define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) { 1493 ; AVX1-LABEL: insert_dup_mem_v4i64: 1494 ; AVX1: # BB#0: 1495 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 1496 ; AVX1-NEXT: retq 1497 ; 1498 ; AVX2-LABEL: insert_dup_mem_v4i64: 1499 ; AVX2: # BB#0: 1500 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 1501 ; AVX2-NEXT: retq 1502 ; 1503 ; AVX512VL-LABEL: insert_dup_mem_v4i64: 1504 ; AVX512VL: # BB#0: 1505 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 1506 ; AVX512VL-NEXT: retq 1507 %tmp = load i64, i64* %ptr, align 1 1508 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1509 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer 1510 ret <4 x i64> %tmp2 1511 } 1512