1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5 define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 6 ; AVX1-LABEL: shuffle_v8f32_45670123: 7 ; AVX1: # %bb.0: # %entry 8 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 9 ; AVX1-NEXT: retq 10 ; 11 ; AVX2-LABEL: shuffle_v8f32_45670123: 12 ; AVX2: # %bb.0: # %entry 13 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 14 ; AVX2-NEXT: retq 15 entry: 16 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 17 ret <8 x float> %shuffle 18 } 19 20 define <8 x float> @shuffle_v8f32_45670123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 21 ; AVX1-LABEL: shuffle_v8f32_45670123_mem: 22 ; AVX1: # %bb.0: # %entry 23 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3,0,1] 24 ; AVX1-NEXT: retq 25 ; 26 ; AVX2-LABEL: shuffle_v8f32_45670123_mem: 27 ; AVX2: # %bb.0: # %entry 28 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,3,0,1] 29 ; AVX2-NEXT: retq 30 entry: 31 %a = load <8 x float>, <8 x float>* %pa 32 %b = load <8 x float>, <8 x float>* %pb 33 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 34 ret <8 x float> %shuffle 35 } 36 37 define <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 38 ; ALL-LABEL: shuffle_v8f32_0123cdef: 39 ; ALL: # %bb.0: # %entry 40 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 41 ; ALL-NEXT: retq 42 entry: 43 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 44 ret <8 x float> %shuffle 45 } 46 47 define <8 x float> @shuffle_v8f32_01230123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 48 ; AVX1-LABEL: shuffle_v8f32_01230123: 49 ; AVX1: # %bb.0: # %entry 50 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 51 ; AVX1-NEXT: retq 52 ; 53 ; AVX2-LABEL: shuffle_v8f32_01230123: 54 ; AVX2: # %bb.0: # %entry 55 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 56 ; AVX2-NEXT: retq 57 entry: 58 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 59 ret <8 x float> %shuffle 60 } 61 62 define <8 x float> @shuffle_v8f32_01230123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 63 ; AVX1-LABEL: shuffle_v8f32_01230123_mem: 64 ; AVX1: # %bb.0: # %entry 65 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1,0,1] 66 ; AVX1-NEXT: retq 67 ; 68 ; AVX2-LABEL: shuffle_v8f32_01230123_mem: 69 ; AVX2: # %bb.0: # %entry 70 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,1,0,1] 71 ; AVX2-NEXT: retq 72 entry: 73 %a = load <8 x float>, <8 x float>* %pa 74 %b = load <8 x float>, <8 x float>* %pb 75 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 76 ret <8 x float> %shuffle 77 } 78 79 define <8 x float> @shuffle_v8f32_45674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 80 ; AVX1-LABEL: shuffle_v8f32_45674567: 81 ; AVX1: # %bb.0: # %entry 82 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 83 ; AVX1-NEXT: retq 84 ; 85 ; AVX2-LABEL: shuffle_v8f32_45674567: 86 ; AVX2: # %bb.0: # %entry 87 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 88 ; AVX2-NEXT: retq 89 entry: 90 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 91 ret <8 x float> %shuffle 92 } 93 94 define <8 x float> @shuffle_v8f32_45674567_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 95 ; AVX1-LABEL: shuffle_v8f32_45674567_mem: 96 ; AVX1: # %bb.0: # %entry 97 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3,2,3] 98 ; AVX1-NEXT: retq 99 ; 100 ; AVX2-LABEL: shuffle_v8f32_45674567_mem: 101 ; AVX2: # %bb.0: # %entry 102 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,3,2,3] 103 ; AVX2-NEXT: retq 104 entry: 105 %a = load <8 x float>, <8 x float>* %pa 106 %b = load <8 x float>, <8 x float>* %pb 107 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 108 ret <8 x float> %shuffle 109 } 110 111 define <32 x i8> @shuffle_v32i8_2323(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 112 ; AVX1-LABEL: shuffle_v32i8_2323: 113 ; AVX1: # %bb.0: # %entry 114 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 115 ; AVX1-NEXT: retq 116 ; 117 ; AVX2-LABEL: shuffle_v32i8_2323: 118 ; AVX2: # %bb.0: # %entry 119 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 120 ; AVX2-NEXT: retq 121 entry: 122 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 123 ret <32 x i8> %shuffle 124 } 125 126 define <32 x i8> @shuffle_v32i8_2323_domain(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 127 ; AVX1-LABEL: shuffle_v32i8_2323_domain: 128 ; AVX1: # %bb.0: # %entry 129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 130 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 131 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 132 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 133 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 134 ; AVX1-NEXT: retq 135 ; 136 ; AVX2-LABEL: shuffle_v32i8_2323_domain: 137 ; AVX2: # %bb.0: # %entry 138 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 139 ; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0 140 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] 141 ; AVX2-NEXT: retq 142 entry: 143 ; add forces execution domain 144 %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 145 %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 146 ret <32 x i8> %shuffle 147 } 148 149 define <4 x i64> @shuffle_v4i64_6701(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 150 ; ALL-LABEL: shuffle_v4i64_6701: 151 ; ALL: # %bb.0: # %entry 152 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 153 ; ALL-NEXT: retq 154 entry: 155 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 156 ret <4 x i64> %shuffle 157 } 158 159 define <4 x i64> @shuffle_v4i64_6701_domain(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 160 ; AVX1-LABEL: shuffle_v4i64_6701_domain: 161 ; AVX1: # %bb.0: # %entry 162 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 163 ; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 164 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 165 ; AVX1-NEXT: retq 166 ; 167 ; AVX2-LABEL: shuffle_v4i64_6701_domain: 168 ; AVX2: # %bb.0: # %entry 169 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 170 ; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 171 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 172 ; AVX2-NEXT: retq 173 entry: 174 ; add forces execution domain 175 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 176 %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 177 ret <4 x i64> %shuffle 178 } 179 180 define <8 x i32> @shuffle_v8i32_u5u7cdef(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 181 ; AVX1-LABEL: shuffle_v8i32_u5u7cdef: 182 ; AVX1: # %bb.0: # %entry 183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 184 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 185 ; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0 186 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 187 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 188 ; AVX1-NEXT: retq 189 ; 190 ; AVX2-LABEL: shuffle_v8i32_u5u7cdef: 191 ; AVX2: # %bb.0: # %entry 192 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 193 ; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 194 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 195 ; AVX2-NEXT: retq 196 entry: 197 ; add forces execution domain 198 %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 199 %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15> 200 ret <8 x i32> %shuffle 201 } 202 203 define <16 x i16> @shuffle_v16i16_4501(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { 204 ; AVX1-LABEL: shuffle_v16i16_4501: 205 ; AVX1: # %bb.0: # %entry 206 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 207 ; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm0 208 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 209 ; AVX1-NEXT: retq 210 ; 211 ; AVX2-LABEL: shuffle_v16i16_4501: 212 ; AVX2: # %bb.0: # %entry 213 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 214 ; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 215 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 216 ; AVX2-NEXT: retq 217 entry: 218 ; add forces execution domain 219 %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 220 %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 221 ret <16 x i16> %shuffle 222 } 223 224 define <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { 225 ; AVX1-LABEL: shuffle_v16i16_4501_mem: 226 ; AVX1: # %bb.0: # %entry 227 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 228 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 229 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 230 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1] 231 ; AVX1-NEXT: retq 232 ; 233 ; AVX2-LABEL: shuffle_v16i16_4501_mem: 234 ; AVX2: # %bb.0: # %entry 235 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 236 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 237 ; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 238 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1] 239 ; AVX2-NEXT: retq 240 entry: 241 %c = load <16 x i16>, <16 x i16>* %a 242 %d = load <16 x i16>, <16 x i16>* %b 243 %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 244 %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 245 ret <16 x i16> %shuffle 246 } 247 248 ;;;; Cases with undef indicies mixed in the mask 249 250 define <8 x float> @shuffle_v8f32_uu67u9ub(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 251 ; ALL-LABEL: shuffle_v8f32_uu67u9ub: 252 ; ALL: # %bb.0: # %entry 253 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 254 ; ALL-NEXT: retq 255 entry: 256 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11> 257 ret <8 x float> %shuffle 258 } 259 260 define <8 x float> @shuffle_v8f32_uu67uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 261 ; AVX1-LABEL: shuffle_v8f32_uu67uu67: 262 ; AVX1: # %bb.0: # %entry 263 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 264 ; AVX1-NEXT: retq 265 ; 266 ; AVX2-LABEL: shuffle_v8f32_uu67uu67: 267 ; AVX2: # %bb.0: # %entry 268 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3] 269 ; AVX2-NEXT: retq 270 entry: 271 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 272 ret <8 x float> %shuffle 273 } 274 275 define <8 x float> @shuffle_v8f32_uu67uuab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 276 ; ALL-LABEL: shuffle_v8f32_uu67uuab: 277 ; ALL: # %bb.0: # %entry 278 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 279 ; ALL-NEXT: retq 280 entry: 281 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11> 282 ret <8 x float> %shuffle 283 } 284 285 define <8 x float> @shuffle_v8f32_uu67uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 286 ; ALL-LABEL: shuffle_v8f32_uu67uuef: 287 ; ALL: # %bb.0: # %entry 288 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 289 ; ALL-NEXT: retq 290 entry: 291 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 292 ret <8 x float> %shuffle 293 } 294 295 define <8 x float> @shuffle_v8f32_uu674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 296 ; AVX1-LABEL: shuffle_v8f32_uu674567: 297 ; AVX1: # %bb.0: # %entry 298 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 299 ; AVX1-NEXT: retq 300 ; 301 ; AVX2-LABEL: shuffle_v8f32_uu674567: 302 ; AVX2: # %bb.0: # %entry 303 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3] 304 ; AVX2-NEXT: retq 305 entry: 306 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 307 ret <8 x float> %shuffle 308 } 309 310 define <8 x float> @shuffle_v8f32_uu6789ab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 311 ; ALL-LABEL: shuffle_v8f32_uu6789ab: 312 ; ALL: # %bb.0: # %entry 313 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 314 ; ALL-NEXT: retq 315 entry: 316 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 317 ret <8 x float> %shuffle 318 } 319 320 define <8 x float> @shuffle_v8f32_4567uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 321 ; AVX1-LABEL: shuffle_v8f32_4567uu67: 322 ; AVX1: # %bb.0: # %entry 323 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 324 ; AVX1-NEXT: retq 325 ; 326 ; AVX2-LABEL: shuffle_v8f32_4567uu67: 327 ; AVX2: # %bb.0: # %entry 328 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 329 ; AVX2-NEXT: retq 330 entry: 331 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 332 ret <8 x float> %shuffle 333 } 334 335 define <8 x float> @shuffle_v8f32_4567uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 336 ; ALL-LABEL: shuffle_v8f32_4567uuef: 337 ; ALL: # %bb.0: # %entry 338 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 339 ; ALL-NEXT: retq 340 entry: 341 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 342 ret <8 x float> %shuffle 343 } 344 345 ;;;; Cases we must not select vperm2f128 346 347 define <8 x float> @shuffle_v8f32_uu67ucuf(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 348 ; ALL-LABEL: shuffle_v8f32_uu67ucuf: 349 ; ALL: # %bb.0: # %entry 350 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 351 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 352 ; ALL-NEXT: retq 353 entry: 354 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15> 355 ret <8 x float> %shuffle 356 } 357 358 ;; Test zero mask generation. 359 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984 360 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance. 361 ;; TODO: When building for optsize we should use vperm2f128. 362 363 define <4 x double> @shuffle_v4f64_zz01(<4 x double> %a) { 364 ; ALL-LABEL: shuffle_v4f64_zz01: 365 ; ALL: # %bb.0: 366 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 367 ; ALL-NEXT: retq 368 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 369 ret <4 x double> %s 370 } 371 define <4 x double> @shuffle_v4f64_zz01_optsize(<4 x double> %a) optsize { 372 ; ALL-LABEL: shuffle_v4f64_zz01_optsize: 373 ; ALL: # %bb.0: 374 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 375 ; ALL-NEXT: retq 376 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 377 ret <4 x double> %s 378 } 379 380 define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) { 381 ; ALL-LABEL: shuffle_v4f64_zz23: 382 ; ALL: # %bb.0: 383 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 384 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 385 ; ALL-NEXT: retq 386 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 387 ret <4 x double> %s 388 } 389 define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize { 390 ; ALL-LABEL: shuffle_v4f64_zz23_optsize: 391 ; ALL: # %bb.0: 392 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 393 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 394 ; ALL-NEXT: retq 395 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 396 ret <4 x double> %s 397 } 398 399 define <4 x double> @shuffle_v4f64_zz45(<4 x double> %a) { 400 ; ALL-LABEL: shuffle_v4f64_zz45: 401 ; ALL: # %bb.0: 402 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 403 ; ALL-NEXT: retq 404 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 405 ret <4 x double> %s 406 } 407 define <4 x double> @shuffle_v4f64_zz45_optsize(<4 x double> %a) optsize { 408 ; ALL-LABEL: shuffle_v4f64_zz45_optsize: 409 ; ALL: # %bb.0: 410 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 411 ; ALL-NEXT: retq 412 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 413 ret <4 x double> %s 414 } 415 416 define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) { 417 ; ALL-LABEL: shuffle_v4f64_zz67: 418 ; ALL: # %bb.0: 419 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 420 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 421 ; ALL-NEXT: retq 422 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 423 ret <4 x double> %s 424 } 425 define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize { 426 ; ALL-LABEL: shuffle_v4f64_zz67_optsize: 427 ; ALL: # %bb.0: 428 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 429 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 430 ; ALL-NEXT: retq 431 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 432 ret <4 x double> %s 433 } 434 435 define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) { 436 ; ALL-LABEL: shuffle_v4f64_01zz: 437 ; ALL: # %bb.0: 438 ; ALL-NEXT: vmovaps %xmm0, %xmm0 439 ; ALL-NEXT: retq 440 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 441 ret <4 x double> %s 442 } 443 define <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize { 444 ; ALL-LABEL: shuffle_v4f64_01zz_optsize: 445 ; ALL: # %bb.0: 446 ; ALL-NEXT: vmovaps %xmm0, %xmm0 447 ; ALL-NEXT: retq 448 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 449 ret <4 x double> %s 450 } 451 452 define <4 x double> @shuffle_v4f64_23zz(<4 x double> %a) { 453 ; ALL-LABEL: shuffle_v4f64_23zz: 454 ; ALL: # %bb.0: 455 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 456 ; ALL-NEXT: retq 457 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 458 ret <4 x double> %s 459 } 460 define <4 x double> @shuffle_v4f64_23zz_optsize(<4 x double> %a) optsize { 461 ; ALL-LABEL: shuffle_v4f64_23zz_optsize: 462 ; ALL: # %bb.0: 463 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 464 ; ALL-NEXT: retq 465 %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 466 ret <4 x double> %s 467 } 468 469 define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) { 470 ; ALL-LABEL: shuffle_v4f64_45zz: 471 ; ALL: # %bb.0: 472 ; ALL-NEXT: vmovaps %xmm0, %xmm0 473 ; ALL-NEXT: retq 474 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 475 ret <4 x double> %s 476 } 477 define <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize { 478 ; ALL-LABEL: shuffle_v4f64_45zz_optsize: 479 ; ALL: # %bb.0: 480 ; ALL-NEXT: vmovaps %xmm0, %xmm0 481 ; ALL-NEXT: retq 482 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 483 ret <4 x double> %s 484 } 485 486 define <4 x double> @shuffle_v4f64_67zz(<4 x double> %a) { 487 ; ALL-LABEL: shuffle_v4f64_67zz: 488 ; ALL: # %bb.0: 489 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 490 ; ALL-NEXT: retq 491 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 492 ret <4 x double> %s 493 } 494 define <4 x double> @shuffle_v4f64_67zz_optsize(<4 x double> %a) optsize { 495 ; ALL-LABEL: shuffle_v4f64_67zz_optsize: 496 ; ALL: # %bb.0: 497 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 498 ; ALL-NEXT: retq 499 %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 500 ret <4 x double> %s 501 } 502 503 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection. 504 505 define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { 506 ; AVX1-LABEL: shuffle_v4i64_67zz: 507 ; AVX1: # %bb.0: 508 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 509 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 510 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 511 ; AVX1-NEXT: retq 512 ; 513 ; AVX2-LABEL: shuffle_v4i64_67zz: 514 ; AVX2: # %bb.0: 515 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 516 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 517 ; AVX2-NEXT: retq 518 %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 519 %c = add <4 x i64> %b, %s 520 ret <4 x i64> %c 521 } 522 523 ;;; Memory folding cases 524 525 define <4 x double> @ld0_hi0_lo1_4f64(<4 x double> * %pa, <4 x double> %b) nounwind uwtable readnone ssp { 526 ; AVX1-LABEL: ld0_hi0_lo1_4f64: 527 ; AVX1: # %bb.0: # %entry 528 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 529 ; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 530 ; AVX1-NEXT: retq 531 ; 532 ; AVX2-LABEL: ld0_hi0_lo1_4f64: 533 ; AVX2: # %bb.0: # %entry 534 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 535 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] 536 ; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 537 ; AVX2-NEXT: retq 538 entry: 539 %a = load <4 x double>, <4 x double> * %pa 540 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 541 %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0> 542 ret <4 x double> %res 543 } 544 545 define <4 x double> @ld1_hi0_hi1_4f64(<4 x double> %a, <4 x double> * %pb) nounwind uwtable readnone ssp { 546 ; AVX1-LABEL: ld1_hi0_hi1_4f64: 547 ; AVX1: # %bb.0: # %entry 548 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 549 ; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 550 ; AVX1-NEXT: retq 551 ; 552 ; AVX2-LABEL: ld1_hi0_hi1_4f64: 553 ; AVX2: # %bb.0: # %entry 554 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 555 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] 556 ; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 557 ; AVX2-NEXT: retq 558 entry: 559 %b = load <4 x double>, <4 x double> * %pb 560 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 561 %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0> 562 ret <4 x double> %res 563 } 564 565 define <8 x float> @ld0_hi0_lo1_8f32(<8 x float> * %pa, <8 x float> %b) nounwind uwtable readnone ssp { 566 ; AVX1-LABEL: ld0_hi0_lo1_8f32: 567 ; AVX1: # %bb.0: # %entry 568 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 569 ; AVX1-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 570 ; AVX1-NEXT: retq 571 ; 572 ; AVX2-LABEL: ld0_hi0_lo1_8f32: 573 ; AVX2: # %bb.0: # %entry 574 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 575 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] 576 ; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 577 ; AVX2-NEXT: retq 578 entry: 579 %a = load <8 x float>, <8 x float> * %pa 580 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 581 %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 582 ret <8 x float> %res 583 } 584 585 define <8 x float> @ld1_hi0_hi1_8f32(<8 x float> %a, <8 x float> * %pb) nounwind uwtable readnone ssp { 586 ; AVX1-LABEL: ld1_hi0_hi1_8f32: 587 ; AVX1: # %bb.0: # %entry 588 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 589 ; AVX1-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 590 ; AVX1-NEXT: retq 591 ; 592 ; AVX2-LABEL: ld1_hi0_hi1_8f32: 593 ; AVX2: # %bb.0: # %entry 594 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 595 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] 596 ; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 597 ; AVX2-NEXT: retq 598 entry: 599 %b = load <8 x float>, <8 x float> * %pb 600 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 601 %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 602 ret <8 x float> %res 603 } 604 605 define <4 x i64> @ld0_hi0_lo1_4i64(<4 x i64> * %pa, <4 x i64> %b) nounwind uwtable readnone ssp { 606 ; AVX1-LABEL: ld0_hi0_lo1_4i64: 607 ; AVX1: # %bb.0: # %entry 608 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 609 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 610 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 611 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 612 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 613 ; AVX1-NEXT: retq 614 ; 615 ; AVX2-LABEL: ld0_hi0_lo1_4i64: 616 ; AVX2: # %bb.0: # %entry 617 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 618 ; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 619 ; AVX2-NEXT: retq 620 entry: 621 %a = load <4 x i64>, <4 x i64> * %pa 622 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 623 %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4> 624 ret <4 x i64> %res 625 } 626 627 define <4 x i64> @ld1_hi0_hi1_4i64(<4 x i64> %a, <4 x i64> * %pb) nounwind uwtable readnone ssp { 628 ; AVX1-LABEL: ld1_hi0_hi1_4i64: 629 ; AVX1: # %bb.0: # %entry 630 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 631 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 632 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 633 ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 634 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 635 ; AVX1-NEXT: retq 636 ; 637 ; AVX2-LABEL: ld1_hi0_hi1_4i64: 638 ; AVX2: # %bb.0: # %entry 639 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 640 ; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 641 ; AVX2-NEXT: retq 642 entry: 643 %b = load <4 x i64>, <4 x i64> * %pb 644 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 645 %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4> 646 ret <4 x i64> %res 647 } 648 649 define <8 x i32> @ld0_hi0_lo1_8i32(<8 x i32> * %pa, <8 x i32> %b) nounwind uwtable readnone ssp { 650 ; AVX1-LABEL: ld0_hi0_lo1_8i32: 651 ; AVX1: # %bb.0: # %entry 652 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 653 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 654 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,3,4] 655 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 656 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 657 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 658 ; AVX1-NEXT: retq 659 ; 660 ; AVX2-LABEL: ld0_hi0_lo1_8i32: 661 ; AVX2: # %bb.0: # %entry 662 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 663 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 664 ; AVX2-NEXT: retq 665 entry: 666 %a = load <8 x i32>, <8 x i32> * %pa 667 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 668 %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 669 ret <8 x i32> %res 670 } 671 672 define <8 x i32> @ld1_hi0_hi1_8i32(<8 x i32> %a, <8 x i32> * %pb) nounwind uwtable readnone ssp { 673 ; AVX1-LABEL: ld1_hi0_hi1_8i32: 674 ; AVX1: # %bb.0: # %entry 675 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 677 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,3,4] 678 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 679 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 680 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 681 ; AVX1-NEXT: retq 682 ; 683 ; AVX2-LABEL: ld1_hi0_hi1_8i32: 684 ; AVX2: # %bb.0: # %entry 685 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 686 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 687 ; AVX2-NEXT: retq 688 entry: 689 %b = load <8 x i32>, <8 x i32> * %pb 690 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 691 %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 692 ret <8 x i32> %res 693 } 694