1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 6 ; ALL-LABEL: A: 7 ; ALL: ## BB#0: ## %entry 8 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 9 ; ALL-NEXT: retq 10 entry: 11 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 12 ret <8 x float> %shuffle 13 } 14 15 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 16 ; ALL-LABEL: B: 17 ; ALL: ## BB#0: ## %entry 18 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 19 ; ALL-NEXT: retq 20 entry: 21 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 22 ret <8 x float> %shuffle 23 } 24 25 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 26 ; ALL-LABEL: C: 27 ; ALL: ## BB#0: ## %entry 28 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 29 ; ALL-NEXT: retq 30 entry: 31 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 32 ret <8 x float> %shuffle 33 } 34 35 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 36 ; ALL-LABEL: D: 37 ; ALL: ## BB#0: ## %entry 38 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 39 ; ALL-NEXT: retq 40 entry: 41 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 42 ret <8 x float> %shuffle 43 } 44 45 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 46 ; ALL-LABEL: E: 47 ; ALL: ## BB#0: ## %entry 48 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 49 ; ALL-NEXT: retq 50 entry: 51 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 52 ret <32 x i8> %shuffle 53 } 54 55 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 56 ; ALL-LABEL: E2: 57 ; ALL: ## BB#0: ## %entry 58 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 59 ; ALL-NEXT: retq 60 entry: 61 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 62 ret <4 x i64> %shuffle 63 } 64 65 define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 66 ; AVX1-LABEL: Ei: 67 ; AVX1: ## BB#0: ## %entry 68 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 69 ; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 70 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 71 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 72 ; AVX1-NEXT: retq 73 ; 74 ; AVX2-LABEL: Ei: 75 ; AVX2: ## BB#0: ## %entry 76 ; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 77 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 78 ; AVX2-NEXT: retq 79 entry: 80 ; add forces execution domain 81 %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 82 %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 83 ret <32 x i8> %shuffle 84 } 85 86 define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 87 ; AVX1-LABEL: E2i: 88 ; AVX1: ## BB#0: ## %entry 89 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 90 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 91 ; AVX1-NEXT: retq 92 ; 93 ; AVX2-LABEL: E2i: 94 ; AVX2: ## BB#0: ## %entry 95 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 96 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 97 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 98 ; AVX2-NEXT: retq 99 entry: 100 ; add forces execution domain 101 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 102 %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 103 ret <4 x i64> %shuffle 104 } 105 106 define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 107 ; AVX1-LABEL: E3i: 108 ; AVX1: ## BB#0: ## %entry 109 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 110 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 111 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 112 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 113 ; AVX1-NEXT: retq 114 ; 115 ; AVX2-LABEL: E3i: 116 ; AVX2: ## BB#0: ## %entry 117 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 118 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 119 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 120 ; AVX2-NEXT: retq 121 entry: 122 ; add forces execution domain 123 %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 124 %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15> 125 ret <8 x i32> %shuffle 126 } 127 128 define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { 129 ; AVX1-LABEL: E4i: 130 ; AVX1: ## BB#0: ## %entry 131 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 132 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 133 ; AVX1-NEXT: retq 134 ; 135 ; AVX2-LABEL: E4i: 136 ; AVX2: ## BB#0: ## %entry 137 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 138 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 139 ; AVX2-NEXT: retq 140 entry: 141 ; add forces execution domain 142 %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 143 %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 144 ret <16 x i16> %shuffle 145 } 146 147 define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { 148 ; AVX1-LABEL: E5i: 149 ; AVX1: ## BB#0: ## %entry 150 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 151 ; AVX1-NEXT: vmovaps (%rsi), %ymm1 152 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 153 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 154 ; AVX1-NEXT: retq 155 ; 156 ; AVX2-LABEL: E5i: 157 ; AVX2: ## BB#0: ## %entry 158 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 159 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1 160 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 161 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 162 ; AVX2-NEXT: retq 163 entry: 164 %c = load <16 x i16>, <16 x i16>* %a 165 %d = load <16 x i16>, <16 x i16>* %b 166 %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 167 %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 168 ret <16 x i16> %shuffle 169 } 170 171 ;;;; Cases with undef indicies mixed in the mask 172 173 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 174 ; ALL-LABEL: F: 175 ; ALL: ## BB#0: ## %entry 176 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 177 ; ALL-NEXT: retq 178 entry: 179 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11> 180 ret <8 x float> %shuffle 181 } 182 183 define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 184 ; ALL-LABEL: F2: 185 ; ALL: ## BB#0: ## %entry 186 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 187 ; ALL-NEXT: retq 188 entry: 189 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 190 ret <8 x float> %shuffle 191 } 192 193 define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 194 ; ALL-LABEL: F3: 195 ; ALL: ## BB#0: ## %entry 196 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 197 ; ALL-NEXT: retq 198 entry: 199 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11> 200 ret <8 x float> %shuffle 201 } 202 203 define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 204 ; ALL-LABEL: F4: 205 ; ALL: ## BB#0: ## %entry 206 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 207 ; ALL-NEXT: retq 208 entry: 209 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 210 ret <8 x float> %shuffle 211 } 212 213 define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 214 ; ALL-LABEL: F5: 215 ; ALL: ## BB#0: ## %entry 216 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 217 ; ALL-NEXT: retq 218 entry: 219 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 220 ret <8 x float> %shuffle 221 } 222 223 define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 224 ; ALL-LABEL: F6: 225 ; ALL: ## BB#0: ## %entry 226 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 227 ; ALL-NEXT: retq 228 entry: 229 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 230 ret <8 x float> %shuffle 231 } 232 233 define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 234 ; ALL-LABEL: F7: 235 ; ALL: ## BB#0: ## %entry 236 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 237 ; ALL-NEXT: retq 238 entry: 239 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 240 ret <8 x float> %shuffle 241 } 242 243 define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 244 ; ALL-LABEL: F8: 245 ; ALL: ## BB#0: ## %entry 246 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 247 ; ALL-NEXT: retq 248 entry: 249 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 250 ret <8 x float> %shuffle 251 } 252 253 ;;;; Cases we must not select vperm2f128 254 255 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 256 ; ALL-LABEL: G: 257 ; ALL: ## BB#0: ## %entry 258 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 259 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 260 ; ALL-NEXT: retq 261 entry: 262 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15> 263 ret <8 x float> %shuffle 264 } 265 266 ;; Test zero mask generation. 267 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984 268 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance. 269 270 define <4 x double> @vperm2z_0x08(<4 x double> %a) { 271 ; ALL-LABEL: vperm2z_0x08: 272 ; ALL: ## BB#0: 273 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 274 ; ALL-NEXT: retq 275 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 276 ret <4 x double> %s 277 } 278 279 define <4 x double> @vperm2z_0x18(<4 x double> %a) { 280 ; ALL-LABEL: vperm2z_0x18: 281 ; ALL: ## BB#0: 282 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 283 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 284 ; ALL-NEXT: retq 285 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 286 ret <4 x double> %s 287 } 288 289 define <4 x double> @vperm2z_0x28(<4 x double> %a) { 290 ; ALL-LABEL: vperm2z_0x28: 291 ; ALL: ## BB#0: 292 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 293 ; ALL-NEXT: retq 294 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 295 ret <4 x double> %s 296 } 297 298 define <4 x double> @vperm2z_0x38(<4 x double> %a) { 299 ; ALL-LABEL: vperm2z_0x38: 300 ; ALL: ## BB#0: 301 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 302 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 303 ; ALL-NEXT: retq 304 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 305 ret <4 x double> %s 306 } 307 308 define <4 x double> @vperm2z_0x80(<4 x double> %a) { 309 ; ALL-LABEL: vperm2z_0x80: 310 ; ALL: ## BB#0: 311 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 312 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 313 ; ALL-NEXT: retq 314 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 315 ret <4 x double> %s 316 } 317 318 define <4 x double> @vperm2z_0x81(<4 x double> %a) { 319 ; ALL-LABEL: vperm2z_0x81: 320 ; ALL: ## BB#0: 321 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 322 ; ALL-NEXT: retq 323 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 324 ret <4 x double> %s 325 } 326 327 define <4 x double> @vperm2z_0x82(<4 x double> %a) { 328 ; ALL-LABEL: vperm2z_0x82: 329 ; ALL: ## BB#0: 330 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 331 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 332 ; ALL-NEXT: retq 333 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 334 ret <4 x double> %s 335 } 336 337 define <4 x double> @vperm2z_0x83(<4 x double> %a) { 338 ; ALL-LABEL: vperm2z_0x83: 339 ; ALL: ## BB#0: 340 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 341 ; ALL-NEXT: retq 342 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 343 ret <4 x double> %s 344 } 345 346 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection. 347 348 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) { 349 ; AVX1-LABEL: vperm2z_int_0x83: 350 ; AVX1: ## BB#0: 351 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 352 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 353 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 354 ; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 355 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 356 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 357 ; AVX1-NEXT: retq 358 ; 359 ; AVX2-LABEL: vperm2z_int_0x83: 360 ; AVX2: ## BB#0: 361 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 362 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 363 ; AVX2-NEXT: retq 364 %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 365 %c = add <4 x i64> %b, %s 366 ret <4 x i64> %c 367 } 368 369