1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 3 4 target triple = "x86_64-unknown-unknown" 5 6 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 7 ; AVX1-LABEL: shuffle_v4f64_0000: 8 ; AVX1: # BB#0: 9 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 11 ; AVX1-NEXT: retq 12 ; 13 ; AVX2-LABEL: shuffle_v4f64_0000: 14 ; AVX2: # BB#0: 15 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 16 ; AVX2-NEXT: retq 17 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18 ret <4 x double> %shuffle 19 } 20 21 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 22 ; AVX1-LABEL: shuffle_v4f64_0001: 23 ; AVX1: # BB#0: 24 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 25 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 26 ; AVX1-NEXT: retq 27 ; 28 ; AVX2-LABEL: shuffle_v4f64_0001: 29 ; AVX2: # BB#0: 30 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 31 ; AVX2-NEXT: retq 32 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 33 ret <4 x double> %shuffle 34 } 35 36 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 37 ; AVX1-LABEL: shuffle_v4f64_0020: 38 ; AVX1: # BB#0: 39 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 40 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 41 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 42 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 43 ; AVX1-NEXT: retq 44 ; 45 ; AVX2-LABEL: shuffle_v4f64_0020: 46 ; AVX2: # BB#0: 47 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 48 ; AVX2-NEXT: retq 49 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 50 ret <4 x double> %shuffle 51 } 52 53 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 54 ; AVX1-LABEL: shuffle_v4f64_0300: 55 ; AVX1: # BB#0: 56 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 57 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 58 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 59 ; AVX1-NEXT: retq 60 ; 61 ; AVX2-LABEL: shuffle_v4f64_0300: 62 ; AVX2: # BB#0: 63 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 64 ; AVX2-NEXT: retq 65 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 66 ret <4 x double> %shuffle 67 } 68 69 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 70 ; AVX1-LABEL: shuffle_v4f64_1000: 71 ; AVX1: # BB#0: 72 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 73 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 74 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 75 ; AVX1-NEXT: retq 76 ; 77 ; AVX2-LABEL: shuffle_v4f64_1000: 78 ; AVX2: # BB#0: 79 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 80 ; AVX2-NEXT: retq 81 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 82 ret <4 x double> %shuffle 83 } 84 85 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 86 ; AVX1-LABEL: shuffle_v4f64_2200: 87 ; AVX1: # BB#0: 88 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 89 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 90 ; AVX1-NEXT: retq 91 ; 92 ; AVX2-LABEL: shuffle_v4f64_2200: 93 ; AVX2: # BB#0: 94 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 95 ; AVX2-NEXT: retq 96 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 97 ret <4 x double> %shuffle 98 } 99 100 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 101 ; AVX1-LABEL: shuffle_v4f64_3330: 102 ; AVX1: # BB#0: 103 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 104 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 105 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 106 ; AVX1-NEXT: retq 107 ; 108 ; AVX2-LABEL: shuffle_v4f64_3330: 109 ; AVX2: # BB#0: 110 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 111 ; AVX2-NEXT: retq 112 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 113 ret <4 x double> %shuffle 114 } 115 116 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 117 ; AVX1-LABEL: shuffle_v4f64_3210: 118 ; AVX1: # BB#0: 119 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 120 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 121 ; AVX1-NEXT: retq 122 ; 123 ; AVX2-LABEL: shuffle_v4f64_3210: 124 ; AVX2: # BB#0: 125 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 126 ; AVX2-NEXT: retq 127 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 128 ret <4 x double> %shuffle 129 } 130 131 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 132 ; ALL-LABEL: shuffle_v4f64_0023: 133 ; ALL: # BB#0: 134 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 135 ; ALL-NEXT: retq 136 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 137 ret <4 x double> %shuffle 138 } 139 140 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 141 ; ALL-LABEL: shuffle_v4f64_0022: 142 ; ALL: # BB#0: 143 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 144 ; ALL-NEXT: retq 145 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 146 ret <4 x double> %shuffle 147 } 148 149 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 150 ; ALL-LABEL: shuffle_v4f64_1032: 151 ; ALL: # BB#0: 152 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 153 ; ALL-NEXT: retq 154 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 155 ret <4 x double> %shuffle 156 } 157 158 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 159 ; ALL-LABEL: shuffle_v4f64_1133: 160 ; ALL: # BB#0: 161 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 162 ; ALL-NEXT: retq 163 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 164 ret <4 x double> %shuffle 165 } 166 167 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 168 ; ALL-LABEL: shuffle_v4f64_1023: 169 ; ALL: # BB#0: 170 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 171 ; ALL-NEXT: retq 172 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 173 ret <4 x double> %shuffle 174 } 175 176 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 177 ; ALL-LABEL: shuffle_v4f64_1022: 178 ; ALL: # BB#0: 179 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 180 ; ALL-NEXT: retq 181 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 182 ret <4 x double> %shuffle 183 } 184 185 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 186 ; AVX1-LABEL: shuffle_v4f64_0423: 187 ; AVX1: # BB#0: 188 ; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 189 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 190 ; AVX1-NEXT: retq 191 ; 192 ; AVX2-LABEL: shuffle_v4f64_0423: 193 ; AVX2: # BB#0: 194 ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 195 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 196 ; AVX2-NEXT: retq 197 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 198 ret <4 x double> %shuffle 199 } 200 201 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 202 ; ALL-LABEL: shuffle_v4f64_0462: 203 ; ALL: # BB#0: 204 ; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 205 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 206 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 207 ; ALL-NEXT: retq 208 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 209 ret <4 x double> %shuffle 210 } 211 212 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 213 ; ALL-LABEL: shuffle_v4f64_0426: 214 ; ALL: # BB#0: 215 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 216 ; ALL-NEXT: retq 217 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 218 ret <4 x double> %shuffle 219 } 220 221 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 222 ; ALL-LABEL: shuffle_v4f64_1537: 223 ; ALL: # BB#0: 224 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 225 ; ALL-NEXT: retq 226 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 227 ret <4 x double> %shuffle 228 } 229 230 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 231 ; ALL-LABEL: shuffle_v4f64_4062: 232 ; ALL: # BB#0: 233 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 234 ; ALL-NEXT: retq 235 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 236 ret <4 x double> %shuffle 237 } 238 239 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 240 ; ALL-LABEL: shuffle_v4f64_5173: 241 ; ALL: # BB#0: 242 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 243 ; ALL-NEXT: retq 244 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 245 ret <4 x double> %shuffle 246 } 247 248 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 249 ; ALL-LABEL: shuffle_v4f64_5163: 250 ; ALL: # BB#0: 251 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 252 ; ALL-NEXT: retq 253 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 254 ret <4 x double> %shuffle 255 } 256 257 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 258 ; ALL-LABEL: shuffle_v4f64_0527: 259 ; ALL: # BB#0: 260 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 261 ; ALL-NEXT: retq 262 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 263 ret <4 x double> %shuffle 264 } 265 266 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 267 ; ALL-LABEL: shuffle_v4f64_4163: 268 ; ALL: # BB#0: 269 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 270 ; ALL-NEXT: retq 271 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 272 ret <4 x double> %shuffle 273 } 274 275 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 276 ; ALL-LABEL: shuffle_v4f64_0145: 277 ; ALL: # BB#0: 278 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 279 ; ALL-NEXT: retq 280 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 281 ret <4 x double> %shuffle 282 } 283 284 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 285 ; ALL-LABEL: shuffle_v4f64_4501: 286 ; ALL: # BB#0: 287 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 288 ; ALL-NEXT: retq 289 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 290 ret <4 x double> %shuffle 291 } 292 293 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 294 ; ALL-LABEL: shuffle_v4f64_0167: 295 ; ALL: # BB#0: 296 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 297 ; ALL-NEXT: retq 298 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 299 ret <4 x double> %shuffle 300 } 301 302 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 303 ; ALL-LABEL: shuffle_v4f64_1054: 304 ; ALL: # BB#0: 305 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 306 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 307 ; ALL-NEXT: retq 308 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 309 ret <4 x double> %shuffle 310 } 311 312 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 313 ; ALL-LABEL: shuffle_v4f64_3254: 314 ; ALL: # BB#0: 315 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 316 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 317 ; ALL-NEXT: retq 318 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 319 ret <4 x double> %shuffle 320 } 321 322 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 323 ; ALL-LABEL: shuffle_v4f64_3276: 324 ; ALL: # BB#0: 325 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 326 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 327 ; ALL-NEXT: retq 328 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 329 ret <4 x double> %shuffle 330 } 331 332 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 333 ; ALL-LABEL: shuffle_v4f64_1076: 334 ; ALL: # BB#0: 335 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 336 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 337 ; ALL-NEXT: retq 338 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 339 ret <4 x double> %shuffle 340 } 341 342 define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 343 ; AVX1-LABEL: shuffle_v4f64_0415: 344 ; AVX1: # BB#0: 345 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 346 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 347 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 348 ; AVX1-NEXT: retq 349 ; 350 ; AVX2-LABEL: shuffle_v4f64_0415: 351 ; AVX2: # BB#0: 352 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 353 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 354 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 355 ; AVX2-NEXT: retq 356 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 357 ret <4 x double> %shuffle 358 } 359 360 define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { 361 ; ALL-LABEL: shuffle_v4f64_u062: 362 ; ALL: # BB#0: 363 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 364 ; ALL-NEXT: retq 365 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2> 366 ret <4 x double> %shuffle 367 } 368 369 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 370 ; AVX1-LABEL: shuffle_v4i64_0000: 371 ; AVX1: # BB#0: 372 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 373 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 374 ; AVX1-NEXT: retq 375 ; 376 ; AVX2-LABEL: shuffle_v4i64_0000: 377 ; AVX2: # BB#0: 378 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 379 ; AVX2-NEXT: retq 380 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 381 ret <4 x i64> %shuffle 382 } 383 384 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 385 ; AVX1-LABEL: shuffle_v4i64_0001: 386 ; AVX1: # BB#0: 387 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 388 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 389 ; AVX1-NEXT: retq 390 ; 391 ; AVX2-LABEL: shuffle_v4i64_0001: 392 ; AVX2: # BB#0: 393 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 394 ; AVX2-NEXT: retq 395 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 396 ret <4 x i64> %shuffle 397 } 398 399 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 400 ; AVX1-LABEL: shuffle_v4i64_0020: 401 ; AVX1: # BB#0: 402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 403 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 404 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 405 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 406 ; AVX1-NEXT: retq 407 ; 408 ; AVX2-LABEL: shuffle_v4i64_0020: 409 ; AVX2: # BB#0: 410 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 411 ; AVX2-NEXT: retq 412 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 413 ret <4 x i64> %shuffle 414 } 415 416 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 417 ; AVX1-LABEL: shuffle_v4i64_0112: 418 ; AVX1: # BB#0: 419 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 420 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0] 421 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 422 ; AVX1-NEXT: retq 423 ; 424 ; AVX2-LABEL: shuffle_v4i64_0112: 425 ; AVX2: # BB#0: 426 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 427 ; AVX2-NEXT: retq 428 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 429 ret <4 x i64> %shuffle 430 } 431 432 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 433 ; AVX1-LABEL: shuffle_v4i64_0300: 434 ; AVX1: # BB#0: 435 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 436 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 437 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 438 ; AVX1-NEXT: retq 439 ; 440 ; AVX2-LABEL: shuffle_v4i64_0300: 441 ; AVX2: # BB#0: 442 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 443 ; AVX2-NEXT: retq 444 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 445 ret <4 x i64> %shuffle 446 } 447 448 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 449 ; AVX1-LABEL: shuffle_v4i64_1000: 450 ; AVX1: # BB#0: 451 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 452 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 453 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 454 ; AVX1-NEXT: retq 455 ; 456 ; AVX2-LABEL: shuffle_v4i64_1000: 457 ; AVX2: # BB#0: 458 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 459 ; AVX2-NEXT: retq 460 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 461 ret <4 x i64> %shuffle 462 } 463 464 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 465 ; AVX1-LABEL: shuffle_v4i64_2200: 466 ; AVX1: # BB#0: 467 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 468 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 469 ; AVX1-NEXT: retq 470 ; 471 ; AVX2-LABEL: shuffle_v4i64_2200: 472 ; AVX2: # BB#0: 473 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 474 ; AVX2-NEXT: retq 475 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 476 ret <4 x i64> %shuffle 477 } 478 479 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 480 ; AVX1-LABEL: shuffle_v4i64_3330: 481 ; AVX1: # BB#0: 482 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 483 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 484 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 485 ; AVX1-NEXT: retq 486 ; 487 ; AVX2-LABEL: shuffle_v4i64_3330: 488 ; AVX2: # BB#0: 489 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 490 ; AVX2-NEXT: retq 491 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 492 ret <4 x i64> %shuffle 493 } 494 495 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 496 ; AVX1-LABEL: shuffle_v4i64_3210: 497 ; AVX1: # BB#0: 498 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 499 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 500 ; AVX1-NEXT: retq 501 ; 502 ; AVX2-LABEL: shuffle_v4i64_3210: 503 ; AVX2: # BB#0: 504 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 505 ; AVX2-NEXT: retq 506 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 507 ret <4 x i64> %shuffle 508 } 509 510 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 511 ; AVX1-LABEL: shuffle_v4i64_0124: 512 ; AVX1: # BB#0: 513 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 514 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 515 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 516 ; AVX1-NEXT: retq 517 ; 518 ; AVX2-LABEL: shuffle_v4i64_0124: 519 ; AVX2: # BB#0: 520 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 521 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 522 ; AVX2-NEXT: retq 523 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 524 ret <4 x i64> %shuffle 525 } 526 527 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 528 ; AVX1-LABEL: shuffle_v4i64_0142: 529 ; AVX1: # BB#0: 530 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 531 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 532 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 533 ; AVX1-NEXT: retq 534 ; 535 ; AVX2-LABEL: shuffle_v4i64_0142: 536 ; AVX2: # BB#0: 537 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 538 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 539 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 540 ; AVX2-NEXT: retq 541 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 542 ret <4 x i64> %shuffle 543 } 544 545 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 546 ; AVX1-LABEL: shuffle_v4i64_0412: 547 ; AVX1: # BB#0: 548 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 549 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 550 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 551 ; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2] 552 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 553 ; AVX1-NEXT: retq 554 ; 555 ; AVX2-LABEL: shuffle_v4i64_0412: 556 ; AVX2: # BB#0: 557 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 558 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 559 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 560 ; AVX2-NEXT: retq 561 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 562 ret <4 x i64> %shuffle 563 } 564 565 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 566 ; AVX1-LABEL: shuffle_v4i64_4012: 567 ; AVX1: # BB#0: 568 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 569 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 570 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 571 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 572 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 573 ; AVX1-NEXT: retq 574 ; 575 ; AVX2-LABEL: shuffle_v4i64_4012: 576 ; AVX2: # BB#0: 577 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 578 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 579 ; AVX2-NEXT: retq 580 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 581 ret <4 x i64> %shuffle 582 } 583 584 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 585 ; ALL-LABEL: shuffle_v4i64_0145: 586 ; ALL: # BB#0: 587 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 588 ; ALL-NEXT: retq 589 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 590 ret <4 x i64> %shuffle 591 } 592 593 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 594 ; AVX1-LABEL: shuffle_v4i64_0451: 595 ; AVX1: # BB#0: 596 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 597 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 598 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 599 ; AVX1-NEXT: retq 600 ; 601 ; AVX2-LABEL: shuffle_v4i64_0451: 602 ; AVX2: # BB#0: 603 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 604 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 605 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 606 ; AVX2-NEXT: retq 607 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 608 ret <4 x i64> %shuffle 609 } 610 611 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 612 ; ALL-LABEL: shuffle_v4i64_4501: 613 ; ALL: # BB#0: 614 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 615 ; ALL-NEXT: retq 616 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 617 ret <4 x i64> %shuffle 618 } 619 620 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 621 ; AVX1-LABEL: shuffle_v4i64_4015: 622 ; AVX1: # BB#0: 623 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 624 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 625 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 626 ; AVX1-NEXT: retq 627 ; 628 ; AVX2-LABEL: shuffle_v4i64_4015: 629 ; AVX2: # BB#0: 630 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 631 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 632 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 633 ; AVX2-NEXT: retq 634 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 635 ret <4 x i64> %shuffle 636 } 637 638 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 639 ; AVX1-LABEL: shuffle_v4i64_2u35: 640 ; AVX1: # BB#0: 641 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 642 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1] 643 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 644 ; AVX1-NEXT: retq 645 ; 646 ; AVX2-LABEL: shuffle_v4i64_2u35: 647 ; AVX2: # BB#0: 648 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 649 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1] 650 ; AVX2-NEXT: retq 651 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5> 652 ret <4 x i64> %shuffle 653 } 654 655 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 656 ; AVX1-LABEL: shuffle_v4i64_1251: 657 ; AVX1: # BB#0: 658 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 659 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3] 660 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 661 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 662 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 663 ; AVX1-NEXT: retq 664 ; 665 ; AVX2-LABEL: shuffle_v4i64_1251: 666 ; AVX2: # BB#0: 667 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 668 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 669 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 670 ; AVX2-NEXT: retq 671 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 672 ret <4 x i64> %shuffle 673 } 674 675 define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 676 ; AVX1-LABEL: shuffle_v4i64_1054: 677 ; AVX1: # BB#0: 678 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 679 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 680 ; AVX1-NEXT: retq 681 ; 682 ; AVX2-LABEL: shuffle_v4i64_1054: 683 ; AVX2: # BB#0: 684 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 685 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 686 ; AVX2-NEXT: retq 687 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 688 ret <4 x i64> %shuffle 689 } 690 691 define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 692 ; AVX1-LABEL: shuffle_v4i64_3254: 693 ; AVX1: # BB#0: 694 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 695 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 696 ; AVX1-NEXT: retq 697 ; 698 ; AVX2-LABEL: shuffle_v4i64_3254: 699 ; AVX2: # BB#0: 700 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 701 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 702 ; AVX2-NEXT: retq 703 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 704 ret <4 x i64> %shuffle 705 } 706 707 define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 708 ; AVX1-LABEL: shuffle_v4i64_3276: 709 ; AVX1: # BB#0: 710 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 711 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 712 ; AVX1-NEXT: retq 713 ; 714 ; AVX2-LABEL: shuffle_v4i64_3276: 715 ; AVX2: # BB#0: 716 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 717 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 718 ; AVX2-NEXT: retq 719 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 720 ret <4 x i64> %shuffle 721 } 722 723 define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 724 ; AVX1-LABEL: shuffle_v4i64_1076: 725 ; AVX1: # BB#0: 726 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 727 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 728 ; AVX1-NEXT: retq 729 ; 730 ; AVX2-LABEL: shuffle_v4i64_1076: 731 ; AVX2: # BB#0: 732 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 733 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 734 ; AVX2-NEXT: retq 735 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 736 ret <4 x i64> %shuffle 737 } 738 739 define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 740 ; AVX1-LABEL: shuffle_v4i64_0415: 741 ; AVX1: # BB#0: 742 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 743 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 744 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 745 ; AVX1-NEXT: retq 746 ; 747 ; AVX2-LABEL: shuffle_v4i64_0415: 748 ; AVX2: # BB#0: 749 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 750 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 751 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 752 ; AVX2-NEXT: retq 753 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 754 ret <4 x i64> %shuffle 755 } 756 757 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { 758 ; AVX1-LABEL: shuffle_v4i64_z4z6: 759 ; AVX1: # BB#0: 760 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 761 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 762 ; AVX1-NEXT: retq 763 ; 764 ; AVX2-LABEL: shuffle_v4i64_z4z6: 765 ; AVX2: # BB#0: 766 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 767 ; AVX2-NEXT: retq 768 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6> 769 ret <4 x i64> %shuffle 770 } 771 772 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { 773 ; AVX1-LABEL: shuffle_v4i64_5zuz: 774 ; AVX1: # BB#0: 775 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 776 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 777 ; AVX1-NEXT: retq 778 ; 779 ; AVX2-LABEL: shuffle_v4i64_5zuz: 780 ; AVX2: # BB#0: 781 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 782 ; AVX2-NEXT: retq 783 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0> 784 ret <4 x i64> %shuffle 785 } 786 787 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { 788 ; AVX1-LABEL: shuffle_v4i64_40u2: 789 ; AVX1: # BB#0: 790 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 791 ; AVX1-NEXT: retq 792 ; 793 ; AVX2-LABEL: shuffle_v4i64_40u2: 794 ; AVX2: # BB#0: 795 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 796 ; AVX2-NEXT: retq 797 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2> 798 ret <4 x i64> %shuffle 799 } 800 801 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 802 ; ALL-LABEL: stress_test1: 803 ; ALL: retq 804 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 805 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef> 806 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef> 807 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 808 809 ret <4 x i64> %f 810 } 811 812 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 813 ; AVX1-LABEL: insert_reg_and_zero_v4i64: 814 ; AVX1: # BB#0: 815 ; AVX1-NEXT: vmovq %rdi, %xmm0 816 ; AVX1-NEXT: retq 817 ; 818 ; AVX2-LABEL: insert_reg_and_zero_v4i64: 819 ; AVX2: # BB#0: 820 ; AVX2-NEXT: vmovq %rdi, %xmm0 821 ; AVX2-NEXT: retq 822 %v = insertelement <4 x i64> undef, i64 %a, i64 0 823 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 824 ret <4 x i64> %shuffle 825 } 826 827 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { 828 ; AVX1-LABEL: insert_mem_and_zero_v4i64: 829 ; AVX1: # BB#0: 830 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 831 ; AVX1-NEXT: retq 832 ; 833 ; AVX2-LABEL: insert_mem_and_zero_v4i64: 834 ; AVX2: # BB#0: 835 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 836 ; AVX2-NEXT: retq 837 %a = load i64, i64* %ptr 838 %v = insertelement <4 x i64> undef, i64 %a, i64 0 839 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 840 ret <4 x i64> %shuffle 841 } 842 843 define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 844 ; ALL-LABEL: insert_reg_and_zero_v4f64: 845 ; ALL: # BB#0: 846 ; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def> 847 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 848 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 849 ; ALL-NEXT: retq 850 %v = insertelement <4 x double> undef, double %a, i32 0 851 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 852 ret <4 x double> %shuffle 853 } 854 855 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { 856 ; ALL-LABEL: insert_mem_and_zero_v4f64: 857 ; ALL: # BB#0: 858 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 859 ; ALL-NEXT: retq 860 %a = load double, double* %ptr 861 %v = insertelement <4 x double> undef, double %a, i32 0 862 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 863 ret <4 x double> %shuffle 864 } 865 866 define <4 x double> @splat_mem_v4f64(double* %ptr) { 867 ; ALL-LABEL: splat_mem_v4f64: 868 ; ALL: # BB#0: 869 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 870 ; ALL-NEXT: retq 871 %a = load double, double* %ptr 872 %v = insertelement <4 x double> undef, double %a, i32 0 873 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 874 ret <4 x double> %shuffle 875 } 876 877 define <4 x i64> @splat_mem_v4i64(i64* %ptr) { 878 ; AVX1-LABEL: splat_mem_v4i64: 879 ; AVX1: # BB#0: 880 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 881 ; AVX1-NEXT: retq 882 ; 883 ; AVX2-LABEL: splat_mem_v4i64: 884 ; AVX2: # BB#0: 885 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 886 ; AVX2-NEXT: retq 887 %a = load i64, i64* %ptr 888 %v = insertelement <4 x i64> undef, i64 %a, i64 0 889 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 890 ret <4 x i64> %shuffle 891 } 892 893 define <4 x double> @splat_mem_v4f64_2(double* %p) { 894 ; ALL-LABEL: splat_mem_v4f64_2: 895 ; ALL: # BB#0: 896 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 897 ; ALL-NEXT: retq 898 %1 = load double, double* %p 899 %2 = insertelement <2 x double> undef, double %1, i32 0 900 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer 901 ret <4 x double> %3 902 } 903 904 define <4 x double> @splat_v4f64(<2 x double> %r) { 905 ; AVX1-LABEL: splat_v4f64: 906 ; AVX1: # BB#0: 907 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 908 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 909 ; AVX1-NEXT: retq 910 ; 911 ; AVX2-LABEL: splat_v4f64: 912 ; AVX2: # BB#0: 913 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 914 ; AVX2-NEXT: retq 915 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer 916 ret <4 x double> %1 917 } 918 919 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { 920 ; AVX1-LABEL: bitcast_v4f64_0426: 921 ; AVX1: # BB#0: 922 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 923 ; AVX1-NEXT: retq 924 ; 925 ; AVX2-LABEL: bitcast_v4f64_0426: 926 ; AVX2: # BB#0: 927 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 928 ; AVX2-NEXT: retq 929 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 930 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> 931 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 932 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16> 933 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 934 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double> 935 ret <4 x double> %bitcast64 936 } 937