1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; 7 ; Verify that the DAG combiner correctly folds bitwise operations across 8 ; shuffles, nested shuffles with undef, pairs of nested shuffles, and other 9 ; basic and always-safe patterns. Also test that the DAG combiner will combine 10 ; target-specific shuffle instructions where reasonable. 11 12 target triple = "x86_64-unknown-unknown" 13 14 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) 15 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) 16 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) 17 18 define <4 x i32> @combine_pshufd1(<4 x i32> %a) { 19 ; ALL-LABEL: combine_pshufd1: 20 ; ALL: # BB#0: # %entry 21 ; ALL-NEXT: retq 22 entry: 23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 24 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27) 25 ret <4 x i32> %c 26 } 27 28 define <4 x i32> @combine_pshufd2(<4 x i32> %a) { 29 ; ALL-LABEL: combine_pshufd2: 30 ; ALL: # BB#0: # %entry 31 ; ALL-NEXT: retq 32 entry: 33 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 34 %b.cast = bitcast <4 x i32> %b to <8 x i16> 35 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28) 36 %c.cast = bitcast <8 x i16> %c to <4 x i32> 37 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 38 ret <4 x i32> %d 39 } 40 41 define <4 x i32> @combine_pshufd3(<4 x i32> %a) { 42 ; ALL-LABEL: combine_pshufd3: 43 ; ALL: # BB#0: # %entry 44 ; ALL-NEXT: retq 45 entry: 46 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 47 %b.cast = bitcast <4 x i32> %b to <8 x i16> 48 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28) 49 %c.cast = bitcast <8 x i16> %c to <4 x i32> 50 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 51 ret <4 x i32> %d 52 } 53 54 define <4 x i32> @combine_pshufd4(<4 x i32> %a) { 55 ; SSE-LABEL: combine_pshufd4: 56 ; SSE: # BB#0: # %entry 57 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 58 ; SSE-NEXT: retq 59 ; 60 ; AVX-LABEL: combine_pshufd4: 61 ; AVX: # BB#0: # %entry 62 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 63 ; AVX-NEXT: retq 64 entry: 65 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31) 66 %b.cast = bitcast <4 x i32> %b to <8 x i16> 67 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27) 68 %c.cast = bitcast <8 x i16> %c to <4 x i32> 69 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31) 70 ret <4 x i32> %d 71 } 72 73 define <4 x i32> @combine_pshufd5(<4 x i32> %a) { 74 ; SSE-LABEL: combine_pshufd5: 75 ; SSE: # BB#0: # %entry 76 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 77 ; SSE-NEXT: retq 78 ; 79 ; AVX-LABEL: combine_pshufd5: 80 ; AVX: # BB#0: # %entry 81 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 82 ; AVX-NEXT: retq 83 entry: 84 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76) 85 %b.cast = bitcast <4 x i32> %b to <8 x i16> 86 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27) 87 %c.cast = bitcast <8 x i16> %c to <4 x i32> 88 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76) 89 ret <4 x i32> %d 90 } 91 92 define <4 x i32> @combine_pshufd6(<4 x i32> %a) { 93 ; SSE-LABEL: combine_pshufd6: 94 ; SSE: # BB#0: # %entry 95 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 96 ; SSE-NEXT: retq 97 ; 98 ; AVX-LABEL: combine_pshufd6: 99 ; AVX: # BB#0: # %entry 100 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 101 ; AVX-NEXT: retq 102 entry: 103 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0) 104 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8) 105 ret <4 x i32> %c 106 } 107 108 define <8 x i16> @combine_pshuflw1(<8 x i16> %a) { 109 ; ALL-LABEL: combine_pshuflw1: 110 ; ALL: # BB#0: # %entry 111 ; ALL-NEXT: retq 112 entry: 113 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 114 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 115 ret <8 x i16> %c 116 } 117 118 define <8 x i16> @combine_pshuflw2(<8 x i16> %a) { 119 ; ALL-LABEL: combine_pshuflw2: 120 ; ALL: # BB#0: # %entry 121 ; ALL-NEXT: retq 122 entry: 123 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 124 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 125 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 126 ret <8 x i16> %d 127 } 128 129 define <8 x i16> @combine_pshuflw3(<8 x i16> %a) { 130 ; SSE-LABEL: combine_pshuflw3: 131 ; SSE: # BB#0: # %entry 132 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 133 ; SSE-NEXT: retq 134 ; 135 ; AVX-LABEL: combine_pshuflw3: 136 ; AVX: # BB#0: # %entry 137 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 138 ; AVX-NEXT: retq 139 entry: 140 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 141 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 142 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 143 ret <8 x i16> %d 144 } 145 146 define <8 x i16> @combine_pshufhw1(<8 x i16> %a) { 147 ; SSE-LABEL: combine_pshufhw1: 148 ; SSE: # BB#0: # %entry 149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 150 ; SSE-NEXT: retq 151 ; 152 ; AVX-LABEL: combine_pshufhw1: 153 ; AVX: # BB#0: # %entry 154 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 155 ; AVX-NEXT: retq 156 entry: 157 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) 158 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 159 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 160 ret <8 x i16> %d 161 } 162 163 define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 164 ; SSE-LABEL: combine_bitwise_ops_test1: 165 ; SSE: # BB#0: 166 ; SSE-NEXT: pand %xmm1, %xmm0 167 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 168 ; SSE-NEXT: retq 169 ; 170 ; AVX-LABEL: combine_bitwise_ops_test1: 171 ; AVX: # BB#0: 172 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 173 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 174 ; AVX-NEXT: retq 175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 177 %and = and <4 x i32> %shuf1, %shuf2 178 ret <4 x i32> %and 179 } 180 181 define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 182 ; SSE-LABEL: combine_bitwise_ops_test2: 183 ; SSE: # BB#0: 184 ; SSE-NEXT: por %xmm1, %xmm0 185 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 186 ; SSE-NEXT: retq 187 ; 188 ; AVX-LABEL: combine_bitwise_ops_test2: 189 ; AVX: # BB#0: 190 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 191 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 192 ; AVX-NEXT: retq 193 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 194 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 195 %or = or <4 x i32> %shuf1, %shuf2 196 ret <4 x i32> %or 197 } 198 199 define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 200 ; SSE-LABEL: combine_bitwise_ops_test3: 201 ; SSE: # BB#0: 202 ; SSE-NEXT: pxor %xmm1, %xmm0 203 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 204 ; SSE-NEXT: retq 205 ; 206 ; AVX-LABEL: combine_bitwise_ops_test3: 207 ; AVX: # BB#0: 208 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 209 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 210 ; AVX-NEXT: retq 211 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 212 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 213 %xor = xor <4 x i32> %shuf1, %shuf2 214 ret <4 x i32> %xor 215 } 216 217 define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 218 ; SSE-LABEL: combine_bitwise_ops_test4: 219 ; SSE: # BB#0: 220 ; SSE-NEXT: pand %xmm1, %xmm0 221 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 222 ; SSE-NEXT: retq 223 ; 224 ; AVX-LABEL: combine_bitwise_ops_test4: 225 ; AVX: # BB#0: 226 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 227 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 228 ; AVX-NEXT: retq 229 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 230 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 231 %and = and <4 x i32> %shuf1, %shuf2 232 ret <4 x i32> %and 233 } 234 235 define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 236 ; SSE-LABEL: combine_bitwise_ops_test5: 237 ; SSE: # BB#0: 238 ; SSE-NEXT: por %xmm1, %xmm0 239 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 240 ; SSE-NEXT: retq 241 ; 242 ; AVX-LABEL: combine_bitwise_ops_test5: 243 ; AVX: # BB#0: 244 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 245 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 246 ; AVX-NEXT: retq 247 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 248 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 249 %or = or <4 x i32> %shuf1, %shuf2 250 ret <4 x i32> %or 251 } 252 253 define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 254 ; SSE-LABEL: combine_bitwise_ops_test6: 255 ; SSE: # BB#0: 256 ; SSE-NEXT: pxor %xmm1, %xmm0 257 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 258 ; SSE-NEXT: retq 259 ; 260 ; AVX-LABEL: combine_bitwise_ops_test6: 261 ; AVX: # BB#0: 262 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 263 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 264 ; AVX-NEXT: retq 265 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 266 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 267 %xor = xor <4 x i32> %shuf1, %shuf2 268 ret <4 x i32> %xor 269 } 270 271 272 ; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles 273 ; are not performing a swizzle operations. 274 275 define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 276 ; SSE2-LABEL: combine_bitwise_ops_test1b: 277 ; SSE2: # BB#0: 278 ; SSE2-NEXT: pand %xmm1, %xmm0 279 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 280 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 281 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 282 ; SSE2-NEXT: retq 283 ; 284 ; SSSE3-LABEL: combine_bitwise_ops_test1b: 285 ; SSSE3: # BB#0: 286 ; SSSE3-NEXT: pand %xmm1, %xmm0 287 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 288 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 289 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 290 ; SSSE3-NEXT: retq 291 ; 292 ; SSE41-LABEL: combine_bitwise_ops_test1b: 293 ; SSE41: # BB#0: 294 ; SSE41-NEXT: pand %xmm1, %xmm0 295 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 296 ; SSE41-NEXT: retq 297 ; 298 ; AVX1-LABEL: combine_bitwise_ops_test1b: 299 ; AVX1: # BB#0: 300 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 301 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 302 ; AVX1-NEXT: retq 303 ; 304 ; AVX2-LABEL: combine_bitwise_ops_test1b: 305 ; AVX2: # BB#0: 306 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 307 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 308 ; AVX2-NEXT: retq 309 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 310 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 311 %and = and <4 x i32> %shuf1, %shuf2 312 ret <4 x i32> %and 313 } 314 315 define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 316 ; SSE2-LABEL: combine_bitwise_ops_test2b: 317 ; SSE2: # BB#0: 318 ; SSE2-NEXT: por %xmm1, %xmm0 319 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 320 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 321 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 322 ; SSE2-NEXT: retq 323 ; 324 ; SSSE3-LABEL: combine_bitwise_ops_test2b: 325 ; SSSE3: # BB#0: 326 ; SSSE3-NEXT: por %xmm1, %xmm0 327 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 328 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 329 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 330 ; SSSE3-NEXT: retq 331 ; 332 ; SSE41-LABEL: combine_bitwise_ops_test2b: 333 ; SSE41: # BB#0: 334 ; SSE41-NEXT: por %xmm1, %xmm0 335 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 336 ; SSE41-NEXT: retq 337 ; 338 ; AVX1-LABEL: combine_bitwise_ops_test2b: 339 ; AVX1: # BB#0: 340 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 341 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 342 ; AVX1-NEXT: retq 343 ; 344 ; AVX2-LABEL: combine_bitwise_ops_test2b: 345 ; AVX2: # BB#0: 346 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 347 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 348 ; AVX2-NEXT: retq 349 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 350 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 351 %or = or <4 x i32> %shuf1, %shuf2 352 ret <4 x i32> %or 353 } 354 355 define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 356 ; SSE2-LABEL: combine_bitwise_ops_test3b: 357 ; SSE2: # BB#0: 358 ; SSE2-NEXT: xorps %xmm1, %xmm0 359 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 360 ; SSE2-NEXT: retq 361 ; 362 ; SSSE3-LABEL: combine_bitwise_ops_test3b: 363 ; SSSE3: # BB#0: 364 ; SSSE3-NEXT: xorps %xmm1, %xmm0 365 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 366 ; SSSE3-NEXT: retq 367 ; 368 ; SSE41-LABEL: combine_bitwise_ops_test3b: 369 ; SSE41: # BB#0: 370 ; SSE41-NEXT: pxor %xmm1, %xmm0 371 ; SSE41-NEXT: pxor %xmm1, %xmm1 372 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 373 ; SSE41-NEXT: retq 374 ; 375 ; AVX1-LABEL: combine_bitwise_ops_test3b: 376 ; AVX1: # BB#0: 377 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 378 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 379 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 380 ; AVX1-NEXT: retq 381 ; 382 ; AVX2-LABEL: combine_bitwise_ops_test3b: 383 ; AVX2: # BB#0: 384 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 385 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 387 ; AVX2-NEXT: retq 388 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 389 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 390 %xor = xor <4 x i32> %shuf1, %shuf2 391 ret <4 x i32> %xor 392 } 393 394 define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 395 ; SSE2-LABEL: combine_bitwise_ops_test4b: 396 ; SSE2: # BB#0: 397 ; SSE2-NEXT: pand %xmm1, %xmm0 398 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 400 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 401 ; SSE2-NEXT: retq 402 ; 403 ; SSSE3-LABEL: combine_bitwise_ops_test4b: 404 ; SSSE3: # BB#0: 405 ; SSSE3-NEXT: pand %xmm1, %xmm0 406 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 407 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 408 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 409 ; SSSE3-NEXT: retq 410 ; 411 ; SSE41-LABEL: combine_bitwise_ops_test4b: 412 ; SSE41: # BB#0: 413 ; SSE41-NEXT: pand %xmm1, %xmm0 414 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 415 ; SSE41-NEXT: retq 416 ; 417 ; AVX1-LABEL: combine_bitwise_ops_test4b: 418 ; AVX1: # BB#0: 419 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 420 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 421 ; AVX1-NEXT: retq 422 ; 423 ; AVX2-LABEL: combine_bitwise_ops_test4b: 424 ; AVX2: # BB#0: 425 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 426 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 427 ; AVX2-NEXT: retq 428 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 429 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 430 %and = and <4 x i32> %shuf1, %shuf2 431 ret <4 x i32> %and 432 } 433 434 define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 435 ; SSE2-LABEL: combine_bitwise_ops_test5b: 436 ; SSE2: # BB#0: 437 ; SSE2-NEXT: por %xmm1, %xmm0 438 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 439 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 440 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 441 ; SSE2-NEXT: retq 442 ; 443 ; SSSE3-LABEL: combine_bitwise_ops_test5b: 444 ; SSSE3: # BB#0: 445 ; SSSE3-NEXT: por %xmm1, %xmm0 446 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 447 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 448 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 449 ; SSSE3-NEXT: retq 450 ; 451 ; SSE41-LABEL: combine_bitwise_ops_test5b: 452 ; SSE41: # BB#0: 453 ; SSE41-NEXT: por %xmm1, %xmm0 454 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 455 ; SSE41-NEXT: retq 456 ; 457 ; AVX1-LABEL: combine_bitwise_ops_test5b: 458 ; AVX1: # BB#0: 459 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 460 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 461 ; AVX1-NEXT: retq 462 ; 463 ; AVX2-LABEL: combine_bitwise_ops_test5b: 464 ; AVX2: # BB#0: 465 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 466 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 467 ; AVX2-NEXT: retq 468 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 469 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 470 %or = or <4 x i32> %shuf1, %shuf2 471 ret <4 x i32> %or 472 } 473 474 define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 475 ; SSE2-LABEL: combine_bitwise_ops_test6b: 476 ; SSE2: # BB#0: 477 ; SSE2-NEXT: xorps %xmm1, %xmm0 478 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 479 ; SSE2-NEXT: retq 480 ; 481 ; SSSE3-LABEL: combine_bitwise_ops_test6b: 482 ; SSSE3: # BB#0: 483 ; SSSE3-NEXT: xorps %xmm1, %xmm0 484 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 485 ; SSSE3-NEXT: retq 486 ; 487 ; SSE41-LABEL: combine_bitwise_ops_test6b: 488 ; SSE41: # BB#0: 489 ; SSE41-NEXT: pxor %xmm1, %xmm0 490 ; SSE41-NEXT: pxor %xmm1, %xmm1 491 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 492 ; SSE41-NEXT: retq 493 ; 494 ; AVX1-LABEL: combine_bitwise_ops_test6b: 495 ; AVX1: # BB#0: 496 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 497 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 498 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 499 ; AVX1-NEXT: retq 500 ; 501 ; AVX2-LABEL: combine_bitwise_ops_test6b: 502 ; AVX2: # BB#0: 503 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 504 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 505 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 506 ; AVX2-NEXT: retq 507 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 508 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 509 %xor = xor <4 x i32> %shuf1, %shuf2 510 ret <4 x i32> %xor 511 } 512 513 define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 514 ; SSE2-LABEL: combine_bitwise_ops_test1c: 515 ; SSE2: # BB#0: 516 ; SSE2-NEXT: pand %xmm1, %xmm0 517 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 518 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 519 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 520 ; SSE2-NEXT: retq 521 ; 522 ; SSSE3-LABEL: combine_bitwise_ops_test1c: 523 ; SSSE3: # BB#0: 524 ; SSSE3-NEXT: pand %xmm1, %xmm0 525 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 526 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 527 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 528 ; SSSE3-NEXT: retq 529 ; 530 ; SSE41-LABEL: combine_bitwise_ops_test1c: 531 ; SSE41: # BB#0: 532 ; SSE41-NEXT: pand %xmm1, %xmm0 533 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 534 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 535 ; SSE41-NEXT: retq 536 ; 537 ; AVX1-LABEL: combine_bitwise_ops_test1c: 538 ; AVX1: # BB#0: 539 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 540 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 541 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 542 ; AVX1-NEXT: retq 543 ; 544 ; AVX2-LABEL: combine_bitwise_ops_test1c: 545 ; AVX2: # BB#0: 546 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 547 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 548 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 549 ; AVX2-NEXT: retq 550 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 551 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 552 %and = and <4 x i32> %shuf1, %shuf2 553 ret <4 x i32> %and 554 } 555 556 define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 557 ; SSE2-LABEL: combine_bitwise_ops_test2c: 558 ; SSE2: # BB#0: 559 ; SSE2-NEXT: por %xmm1, %xmm0 560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 561 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 562 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 563 ; SSE2-NEXT: retq 564 ; 565 ; SSSE3-LABEL: combine_bitwise_ops_test2c: 566 ; SSSE3: # BB#0: 567 ; SSSE3-NEXT: por %xmm1, %xmm0 568 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 569 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 570 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 571 ; SSSE3-NEXT: retq 572 ; 573 ; SSE41-LABEL: combine_bitwise_ops_test2c: 574 ; SSE41: # BB#0: 575 ; SSE41-NEXT: por %xmm1, %xmm0 576 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 577 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 578 ; SSE41-NEXT: retq 579 ; 580 ; AVX1-LABEL: combine_bitwise_ops_test2c: 581 ; AVX1: # BB#0: 582 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 584 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 585 ; AVX1-NEXT: retq 586 ; 587 ; AVX2-LABEL: combine_bitwise_ops_test2c: 588 ; AVX2: # BB#0: 589 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 590 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 591 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 592 ; AVX2-NEXT: retq 593 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 594 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 595 %or = or <4 x i32> %shuf1, %shuf2 596 ret <4 x i32> %or 597 } 598 599 define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 600 ; SSE2-LABEL: combine_bitwise_ops_test3c: 601 ; SSE2: # BB#0: 602 ; SSE2-NEXT: pxor %xmm1, %xmm0 603 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 604 ; SSE2-NEXT: pxor %xmm1, %xmm1 605 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 606 ; SSE2-NEXT: retq 607 ; 608 ; SSSE3-LABEL: combine_bitwise_ops_test3c: 609 ; SSSE3: # BB#0: 610 ; SSSE3-NEXT: pxor %xmm1, %xmm0 611 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 612 ; SSSE3-NEXT: pxor %xmm1, %xmm1 613 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 614 ; SSSE3-NEXT: retq 615 ; 616 ; SSE41-LABEL: combine_bitwise_ops_test3c: 617 ; SSE41: # BB#0: 618 ; SSE41-NEXT: pxor %xmm1, %xmm0 619 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 620 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 621 ; SSE41-NEXT: retq 622 ; 623 ; AVX-LABEL: combine_bitwise_ops_test3c: 624 ; AVX: # BB#0: 625 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 626 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 627 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 628 ; AVX-NEXT: retq 629 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 630 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 631 %xor = xor <4 x i32> %shuf1, %shuf2 632 ret <4 x i32> %xor 633 } 634 635 define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 636 ; SSE2-LABEL: combine_bitwise_ops_test4c: 637 ; SSE2: # BB#0: 638 ; SSE2-NEXT: pand %xmm1, %xmm0 639 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 640 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 641 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 642 ; SSE2-NEXT: retq 643 ; 644 ; SSSE3-LABEL: combine_bitwise_ops_test4c: 645 ; SSSE3: # BB#0: 646 ; SSSE3-NEXT: pand %xmm1, %xmm0 647 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 648 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 649 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 650 ; SSSE3-NEXT: retq 651 ; 652 ; SSE41-LABEL: combine_bitwise_ops_test4c: 653 ; SSE41: # BB#0: 654 ; SSE41-NEXT: pand %xmm1, %xmm0 655 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 656 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 657 ; SSE41-NEXT: retq 658 ; 659 ; AVX1-LABEL: combine_bitwise_ops_test4c: 660 ; AVX1: # BB#0: 661 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 662 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 663 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 664 ; AVX1-NEXT: retq 665 ; 666 ; AVX2-LABEL: combine_bitwise_ops_test4c: 667 ; AVX2: # BB#0: 668 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 669 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 670 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 671 ; AVX2-NEXT: retq 672 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 673 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 674 %and = and <4 x i32> %shuf1, %shuf2 675 ret <4 x i32> %and 676 } 677 678 define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 679 ; SSE2-LABEL: combine_bitwise_ops_test5c: 680 ; SSE2: # BB#0: 681 ; SSE2-NEXT: por %xmm1, %xmm0 682 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 683 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 684 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 685 ; SSE2-NEXT: retq 686 ; 687 ; SSSE3-LABEL: combine_bitwise_ops_test5c: 688 ; SSSE3: # BB#0: 689 ; SSSE3-NEXT: por %xmm1, %xmm0 690 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 691 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 692 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 693 ; SSSE3-NEXT: retq 694 ; 695 ; SSE41-LABEL: combine_bitwise_ops_test5c: 696 ; SSE41: # BB#0: 697 ; SSE41-NEXT: por %xmm1, %xmm0 698 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 699 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 700 ; SSE41-NEXT: retq 701 ; 702 ; AVX1-LABEL: combine_bitwise_ops_test5c: 703 ; AVX1: # BB#0: 704 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 705 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 707 ; AVX1-NEXT: retq 708 ; 709 ; AVX2-LABEL: combine_bitwise_ops_test5c: 710 ; AVX2: # BB#0: 711 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 712 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 713 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 714 ; AVX2-NEXT: retq 715 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 716 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 717 %or = or <4 x i32> %shuf1, %shuf2 718 ret <4 x i32> %or 719 } 720 721 define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 722 ; SSE2-LABEL: combine_bitwise_ops_test6c: 723 ; SSE2: # BB#0: 724 ; SSE2-NEXT: pxor %xmm1, %xmm0 725 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 726 ; SSE2-NEXT: pxor %xmm0, %xmm0 727 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 728 ; SSE2-NEXT: retq 729 ; 730 ; SSSE3-LABEL: combine_bitwise_ops_test6c: 731 ; SSSE3: # BB#0: 732 ; SSSE3-NEXT: pxor %xmm1, %xmm0 733 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 734 ; SSSE3-NEXT: pxor %xmm0, %xmm0 735 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 736 ; SSSE3-NEXT: retq 737 ; 738 ; SSE41-LABEL: combine_bitwise_ops_test6c: 739 ; SSE41: # BB#0: 740 ; SSE41-NEXT: pxor %xmm1, %xmm0 741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] 742 ; SSE41-NEXT: pxor %xmm0, %xmm0 743 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 744 ; SSE41-NEXT: retq 745 ; 746 ; AVX1-LABEL: combine_bitwise_ops_test6c: 747 ; AVX1: # BB#0: 748 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 749 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 750 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 751 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 752 ; AVX1-NEXT: retq 753 ; 754 ; AVX2-LABEL: combine_bitwise_ops_test6c: 755 ; AVX2: # BB#0: 756 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 757 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 758 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 759 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 760 ; AVX2-NEXT: retq 761 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 762 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 763 %xor = xor <4 x i32> %shuf1, %shuf2 764 ret <4 x i32> %xor 765 } 766 767 define <4 x i32> @combine_nested_undef_test1(<4 x i32> %A, <4 x i32> %B) { 768 ; SSE-LABEL: combine_nested_undef_test1: 769 ; SSE: # BB#0: 770 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 771 ; SSE-NEXT: retq 772 ; 773 ; AVX-LABEL: combine_nested_undef_test1: 774 ; AVX: # BB#0: 775 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 776 ; AVX-NEXT: retq 777 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1> 778 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 779 ret <4 x i32> %2 780 } 781 782 define <4 x i32> @combine_nested_undef_test2(<4 x i32> %A, <4 x i32> %B) { 783 ; SSE-LABEL: combine_nested_undef_test2: 784 ; SSE: # BB#0: 785 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 786 ; SSE-NEXT: retq 787 ; 788 ; AVX-LABEL: combine_nested_undef_test2: 789 ; AVX: # BB#0: 790 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 791 ; AVX-NEXT: retq 792 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 793 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 794 ret <4 x i32> %2 795 } 796 797 define <4 x i32> @combine_nested_undef_test3(<4 x i32> %A, <4 x i32> %B) { 798 ; SSE-LABEL: combine_nested_undef_test3: 799 ; SSE: # BB#0: 800 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 801 ; SSE-NEXT: retq 802 ; 803 ; AVX-LABEL: combine_nested_undef_test3: 804 ; AVX: # BB#0: 805 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 806 ; AVX-NEXT: retq 807 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 808 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 809 ret <4 x i32> %2 810 } 811 812 define <4 x i32> @combine_nested_undef_test4(<4 x i32> %A, <4 x i32> %B) { 813 ; SSE-LABEL: combine_nested_undef_test4: 814 ; SSE: # BB#0: 815 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 816 ; SSE-NEXT: retq 817 ; 818 ; AVX1-LABEL: combine_nested_undef_test4: 819 ; AVX1: # BB#0: 820 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 821 ; AVX1-NEXT: retq 822 ; 823 ; AVX2-LABEL: combine_nested_undef_test4: 824 ; AVX2: # BB#0: 825 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 826 ; AVX2-NEXT: retq 827 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1> 828 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3> 829 ret <4 x i32> %2 830 } 831 832 define <4 x i32> @combine_nested_undef_test5(<4 x i32> %A, <4 x i32> %B) { 833 ; SSE-LABEL: combine_nested_undef_test5: 834 ; SSE: # BB#0: 835 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 836 ; SSE-NEXT: retq 837 ; 838 ; AVX-LABEL: combine_nested_undef_test5: 839 ; AVX: # BB#0: 840 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 841 ; AVX-NEXT: retq 842 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 843 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3> 844 ret <4 x i32> %2 845 } 846 847 define <4 x i32> @combine_nested_undef_test6(<4 x i32> %A, <4 x i32> %B) { 848 ; SSE-LABEL: combine_nested_undef_test6: 849 ; SSE: # BB#0: 850 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 851 ; SSE-NEXT: retq 852 ; 853 ; AVX-LABEL: combine_nested_undef_test6: 854 ; AVX: # BB#0: 855 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 856 ; AVX-NEXT: retq 857 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4> 858 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4> 859 ret <4 x i32> %2 860 } 861 862 define <4 x i32> @combine_nested_undef_test7(<4 x i32> %A, <4 x i32> %B) { 863 ; SSE-LABEL: combine_nested_undef_test7: 864 ; SSE: # BB#0: 865 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 866 ; SSE-NEXT: retq 867 ; 868 ; AVX-LABEL: combine_nested_undef_test7: 869 ; AVX: # BB#0: 870 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 871 ; AVX-NEXT: retq 872 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 873 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 874 ret <4 x i32> %2 875 } 876 877 define <4 x i32> @combine_nested_undef_test8(<4 x i32> %A, <4 x i32> %B) { 878 ; SSE-LABEL: combine_nested_undef_test8: 879 ; SSE: # BB#0: 880 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 881 ; SSE-NEXT: retq 882 ; 883 ; AVX-LABEL: combine_nested_undef_test8: 884 ; AVX: # BB#0: 885 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 886 ; AVX-NEXT: retq 887 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 888 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 889 ret <4 x i32> %2 890 } 891 892 define <4 x i32> @combine_nested_undef_test9(<4 x i32> %A, <4 x i32> %B) { 893 ; SSE-LABEL: combine_nested_undef_test9: 894 ; SSE: # BB#0: 895 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2] 896 ; SSE-NEXT: retq 897 ; 898 ; AVX-LABEL: combine_nested_undef_test9: 899 ; AVX: # BB#0: 900 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2] 901 ; AVX-NEXT: retq 902 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5> 903 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 904 ret <4 x i32> %2 905 } 906 907 define <4 x i32> @combine_nested_undef_test10(<4 x i32> %A, <4 x i32> %B) { 908 ; SSE-LABEL: combine_nested_undef_test10: 909 ; SSE: # BB#0: 910 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3] 911 ; SSE-NEXT: retq 912 ; 913 ; AVX-LABEL: combine_nested_undef_test10: 914 ; AVX: # BB#0: 915 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3] 916 ; AVX-NEXT: retq 917 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5> 918 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 919 ret <4 x i32> %2 920 } 921 922 define <4 x i32> @combine_nested_undef_test11(<4 x i32> %A, <4 x i32> %B) { 923 ; SSE-LABEL: combine_nested_undef_test11: 924 ; SSE: # BB#0: 925 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1] 926 ; SSE-NEXT: retq 927 ; 928 ; AVX-LABEL: combine_nested_undef_test11: 929 ; AVX: # BB#0: 930 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1] 931 ; AVX-NEXT: retq 932 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4> 933 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0> 934 ret <4 x i32> %2 935 } 936 937 define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) { 938 ; SSE-LABEL: combine_nested_undef_test12: 939 ; SSE: # BB#0: 940 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 941 ; SSE-NEXT: retq 942 ; 943 ; AVX1-LABEL: combine_nested_undef_test12: 944 ; AVX1: # BB#0: 945 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 946 ; AVX1-NEXT: retq 947 ; 948 ; AVX2-LABEL: combine_nested_undef_test12: 949 ; AVX2: # BB#0: 950 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 951 ; AVX2-NEXT: retq 952 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4> 953 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4> 954 ret <4 x i32> %2 955 } 956 957 ; The following pair of shuffles is folded into vector %A. 958 define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) { 959 ; ALL-LABEL: combine_nested_undef_test13: 960 ; ALL: # BB#0: 961 ; ALL-NEXT: retq 962 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6> 963 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4> 964 ret <4 x i32> %2 965 } 966 967 ; The following pair of shuffles is folded into vector %B. 968 define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) { 969 ; SSE-LABEL: combine_nested_undef_test14: 970 ; SSE: # BB#0: 971 ; SSE-NEXT: movaps %xmm1, %xmm0 972 ; SSE-NEXT: retq 973 ; 974 ; AVX-LABEL: combine_nested_undef_test14: 975 ; AVX: # BB#0: 976 ; AVX-NEXT: vmovaps %xmm1, %xmm0 977 ; AVX-NEXT: retq 978 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4> 979 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4> 980 ret <4 x i32> %2 981 } 982 983 984 ; Verify that we don't optimize the following cases. We expect more than one shuffle. 985 ; 986 ; FIXME: Many of these already don't make sense, and the rest should stop 987 ; making sense with th enew vector shuffle lowering. Revisit at least testing for 988 ; it. 989 990 define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) { 991 ; SSE2-LABEL: combine_nested_undef_test15: 992 ; SSE2: # BB#0: 993 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 994 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1] 995 ; SSE2-NEXT: movaps %xmm1, %xmm0 996 ; SSE2-NEXT: retq 997 ; 998 ; SSSE3-LABEL: combine_nested_undef_test15: 999 ; SSSE3: # BB#0: 1000 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1001 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1] 1002 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1003 ; SSSE3-NEXT: retq 1004 ; 1005 ; SSE41-LABEL: combine_nested_undef_test15: 1006 ; SSE41: # BB#0: 1007 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1008 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1009 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1010 ; SSE41-NEXT: retq 1011 ; 1012 ; AVX1-LABEL: combine_nested_undef_test15: 1013 ; AVX1: # BB#0: 1014 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1015 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1016 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1017 ; AVX1-NEXT: retq 1018 ; 1019 ; AVX2-LABEL: combine_nested_undef_test15: 1020 ; AVX2: # BB#0: 1021 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1022 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1023 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1024 ; AVX2-NEXT: retq 1025 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1> 1026 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1027 ret <4 x i32> %2 1028 } 1029 1030 define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) { 1031 ; SSE2-LABEL: combine_nested_undef_test16: 1032 ; SSE2: # BB#0: 1033 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1034 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 1035 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1036 ; SSE2-NEXT: retq 1037 ; 1038 ; SSSE3-LABEL: combine_nested_undef_test16: 1039 ; SSSE3: # BB#0: 1040 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1041 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 1042 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1043 ; SSSE3-NEXT: retq 1044 ; 1045 ; SSE41-LABEL: combine_nested_undef_test16: 1046 ; SSE41: # BB#0: 1047 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1048 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1049 ; SSE41-NEXT: retq 1050 ; 1051 ; AVX1-LABEL: combine_nested_undef_test16: 1052 ; AVX1: # BB#0: 1053 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1054 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1055 ; AVX1-NEXT: retq 1056 ; 1057 ; AVX2-LABEL: combine_nested_undef_test16: 1058 ; AVX2: # BB#0: 1059 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1060 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1061 ; AVX2-NEXT: retq 1062 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1063 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1064 ret <4 x i32> %2 1065 } 1066 1067 define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) { 1068 ; SSE2-LABEL: combine_nested_undef_test17: 1069 ; SSE2: # BB#0: 1070 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0] 1071 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2] 1072 ; SSE2-NEXT: retq 1073 ; 1074 ; SSSE3-LABEL: combine_nested_undef_test17: 1075 ; SSSE3: # BB#0: 1076 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0] 1077 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2] 1078 ; SSSE3-NEXT: retq 1079 ; 1080 ; SSE41-LABEL: combine_nested_undef_test17: 1081 ; SSE41: # BB#0: 1082 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1083 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1084 ; SSE41-NEXT: retq 1085 ; 1086 ; AVX1-LABEL: combine_nested_undef_test17: 1087 ; AVX1: # BB#0: 1088 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1089 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1090 ; AVX1-NEXT: retq 1091 ; 1092 ; AVX2-LABEL: combine_nested_undef_test17: 1093 ; AVX2: # BB#0: 1094 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1095 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1096 ; AVX2-NEXT: retq 1097 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1> 1098 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1099 ret <4 x i32> %2 1100 } 1101 1102 define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) { 1103 ; SSE-LABEL: combine_nested_undef_test18: 1104 ; SSE: # BB#0: 1105 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3] 1106 ; SSE-NEXT: retq 1107 ; 1108 ; AVX-LABEL: combine_nested_undef_test18: 1109 ; AVX: # BB#0: 1110 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3] 1111 ; AVX-NEXT: retq 1112 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1113 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 1114 ret <4 x i32> %2 1115 } 1116 1117 define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) { 1118 ; SSE2-LABEL: combine_nested_undef_test19: 1119 ; SSE2: # BB#0: 1120 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1121 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0] 1122 ; SSE2-NEXT: retq 1123 ; 1124 ; SSSE3-LABEL: combine_nested_undef_test19: 1125 ; SSSE3: # BB#0: 1126 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1127 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0] 1128 ; SSSE3-NEXT: retq 1129 ; 1130 ; SSE41-LABEL: combine_nested_undef_test19: 1131 ; SSE41: # BB#0: 1132 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1133 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1134 ; SSE41-NEXT: retq 1135 ; 1136 ; AVX1-LABEL: combine_nested_undef_test19: 1137 ; AVX1: # BB#0: 1138 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1139 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1140 ; AVX1-NEXT: retq 1141 ; 1142 ; AVX2-LABEL: combine_nested_undef_test19: 1143 ; AVX2: # BB#0: 1144 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1145 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1146 ; AVX2-NEXT: retq 1147 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6> 1148 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1149 ret <4 x i32> %2 1150 } 1151 1152 define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) { 1153 ; SSE2-LABEL: combine_nested_undef_test20: 1154 ; SSE2: # BB#0: 1155 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 1156 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1157 ; SSE2-NEXT: movaps %xmm1, %xmm0 1158 ; SSE2-NEXT: retq 1159 ; 1160 ; SSSE3-LABEL: combine_nested_undef_test20: 1161 ; SSSE3: # BB#0: 1162 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 1163 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1164 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1165 ; SSSE3-NEXT: retq 1166 ; 1167 ; SSE41-LABEL: combine_nested_undef_test20: 1168 ; SSE41: # BB#0: 1169 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1170 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1171 ; SSE41-NEXT: retq 1172 ; 1173 ; AVX1-LABEL: combine_nested_undef_test20: 1174 ; AVX1: # BB#0: 1175 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1176 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1177 ; AVX1-NEXT: retq 1178 ; 1179 ; AVX2-LABEL: combine_nested_undef_test20: 1180 ; AVX2: # BB#0: 1181 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1182 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1183 ; AVX2-NEXT: retq 1184 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4> 1185 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1186 ret <4 x i32> %2 1187 } 1188 1189 define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) { 1190 ; SSE2-LABEL: combine_nested_undef_test21: 1191 ; SSE2: # BB#0: 1192 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3] 1194 ; SSE2-NEXT: retq 1195 ; 1196 ; SSSE3-LABEL: combine_nested_undef_test21: 1197 ; SSSE3: # BB#0: 1198 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1199 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3] 1200 ; SSSE3-NEXT: retq 1201 ; 1202 ; SSE41-LABEL: combine_nested_undef_test21: 1203 ; SSE41: # BB#0: 1204 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1205 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1206 ; SSE41-NEXT: retq 1207 ; 1208 ; AVX1-LABEL: combine_nested_undef_test21: 1209 ; AVX1: # BB#0: 1210 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1211 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1212 ; AVX1-NEXT: retq 1213 ; 1214 ; AVX2-LABEL: combine_nested_undef_test21: 1215 ; AVX2: # BB#0: 1216 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1217 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1218 ; AVX2-NEXT: retq 1219 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1> 1220 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3> 1221 ret <4 x i32> %2 1222 } 1223 1224 1225 ; Test that we correctly combine shuffles according to rule 1226 ; shuffle(shuffle(x, y), undef) -> shuffle(y, undef) 1227 1228 define <4 x i32> @combine_nested_undef_test22(<4 x i32> %A, <4 x i32> %B) { 1229 ; SSE-LABEL: combine_nested_undef_test22: 1230 ; SSE: # BB#0: 1231 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3] 1232 ; SSE-NEXT: retq 1233 ; 1234 ; AVX-LABEL: combine_nested_undef_test22: 1235 ; AVX: # BB#0: 1236 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3] 1237 ; AVX-NEXT: retq 1238 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1239 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3> 1240 ret <4 x i32> %2 1241 } 1242 1243 define <4 x i32> @combine_nested_undef_test23(<4 x i32> %A, <4 x i32> %B) { 1244 ; SSE-LABEL: combine_nested_undef_test23: 1245 ; SSE: # BB#0: 1246 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] 1247 ; SSE-NEXT: retq 1248 ; 1249 ; AVX-LABEL: combine_nested_undef_test23: 1250 ; AVX: # BB#0: 1251 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] 1252 ; AVX-NEXT: retq 1253 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1254 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3> 1255 ret <4 x i32> %2 1256 } 1257 1258 define <4 x i32> @combine_nested_undef_test24(<4 x i32> %A, <4 x i32> %B) { 1259 ; SSE-LABEL: combine_nested_undef_test24: 1260 ; SSE: # BB#0: 1261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3] 1262 ; SSE-NEXT: retq 1263 ; 1264 ; AVX-LABEL: combine_nested_undef_test24: 1265 ; AVX: # BB#0: 1266 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3] 1267 ; AVX-NEXT: retq 1268 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1269 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4> 1270 ret <4 x i32> %2 1271 } 1272 1273 define <4 x i32> @combine_nested_undef_test25(<4 x i32> %A, <4 x i32> %B) { 1274 ; SSE-LABEL: combine_nested_undef_test25: 1275 ; SSE: # BB#0: 1276 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1277 ; SSE-NEXT: retq 1278 ; 1279 ; AVX1-LABEL: combine_nested_undef_test25: 1280 ; AVX1: # BB#0: 1281 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1282 ; AVX1-NEXT: retq 1283 ; 1284 ; AVX2-LABEL: combine_nested_undef_test25: 1285 ; AVX2: # BB#0: 1286 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1287 ; AVX2-NEXT: retq 1288 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4> 1289 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1> 1290 ret <4 x i32> %2 1291 } 1292 1293 define <4 x i32> @combine_nested_undef_test26(<4 x i32> %A, <4 x i32> %B) { 1294 ; SSE-LABEL: combine_nested_undef_test26: 1295 ; SSE: # BB#0: 1296 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1297 ; SSE-NEXT: retq 1298 ; 1299 ; AVX-LABEL: combine_nested_undef_test26: 1300 ; AVX: # BB#0: 1301 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1302 ; AVX-NEXT: retq 1303 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7> 1304 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 1305 ret <4 x i32> %2 1306 } 1307 1308 define <4 x i32> @combine_nested_undef_test27(<4 x i32> %A, <4 x i32> %B) { 1309 ; SSE-LABEL: combine_nested_undef_test27: 1310 ; SSE: # BB#0: 1311 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1312 ; SSE-NEXT: retq 1313 ; 1314 ; AVX1-LABEL: combine_nested_undef_test27: 1315 ; AVX1: # BB#0: 1316 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1317 ; AVX1-NEXT: retq 1318 ; 1319 ; AVX2-LABEL: combine_nested_undef_test27: 1320 ; AVX2: # BB#0: 1321 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1322 ; AVX2-NEXT: retq 1323 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4> 1324 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 1325 ret <4 x i32> %2 1326 } 1327 1328 define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) { 1329 ; SSE-LABEL: combine_nested_undef_test28: 1330 ; SSE: # BB#0: 1331 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 1332 ; SSE-NEXT: retq 1333 ; 1334 ; AVX-LABEL: combine_nested_undef_test28: 1335 ; AVX: # BB#0: 1336 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 1337 ; AVX-NEXT: retq 1338 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 1339 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2> 1340 ret <4 x i32> %2 1341 } 1342 1343 define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) { 1344 ; SSE-LABEL: combine_test1: 1345 ; SSE: # BB#0: 1346 ; SSE-NEXT: movaps %xmm1, %xmm0 1347 ; SSE-NEXT: retq 1348 ; 1349 ; AVX-LABEL: combine_test1: 1350 ; AVX: # BB#0: 1351 ; AVX-NEXT: vmovaps %xmm1, %xmm0 1352 ; AVX-NEXT: retq 1353 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1354 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1355 ret <4 x float> %2 1356 } 1357 1358 define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) { 1359 ; SSE2-LABEL: combine_test2: 1360 ; SSE2: # BB#0: 1361 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1362 ; SSE2-NEXT: movaps %xmm1, %xmm0 1363 ; SSE2-NEXT: retq 1364 ; 1365 ; SSSE3-LABEL: combine_test2: 1366 ; SSSE3: # BB#0: 1367 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1368 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1369 ; SSSE3-NEXT: retq 1370 ; 1371 ; SSE41-LABEL: combine_test2: 1372 ; SSE41: # BB#0: 1373 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1374 ; SSE41-NEXT: retq 1375 ; 1376 ; AVX-LABEL: combine_test2: 1377 ; AVX: # BB#0: 1378 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1379 ; AVX-NEXT: retq 1380 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1381 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1382 ret <4 x float> %2 1383 } 1384 1385 define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) { 1386 ; SSE-LABEL: combine_test3: 1387 ; SSE: # BB#0: 1388 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1389 ; SSE-NEXT: retq 1390 ; 1391 ; AVX-LABEL: combine_test3: 1392 ; AVX: # BB#0: 1393 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1394 ; AVX-NEXT: retq 1395 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1396 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1397 ret <4 x float> %2 1398 } 1399 1400 define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) { 1401 ; SSE-LABEL: combine_test4: 1402 ; SSE: # BB#0: 1403 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 1404 ; SSE-NEXT: movapd %xmm1, %xmm0 1405 ; SSE-NEXT: retq 1406 ; 1407 ; AVX-LABEL: combine_test4: 1408 ; AVX: # BB#0: 1409 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1410 ; AVX-NEXT: retq 1411 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 1412 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1413 ret <4 x float> %2 1414 } 1415 1416 define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) { 1417 ; SSE2-LABEL: combine_test5: 1418 ; SSE2: # BB#0: 1419 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1420 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1421 ; SSE2-NEXT: retq 1422 ; 1423 ; SSSE3-LABEL: combine_test5: 1424 ; SSSE3: # BB#0: 1425 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1426 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1427 ; SSSE3-NEXT: retq 1428 ; 1429 ; SSE41-LABEL: combine_test5: 1430 ; SSE41: # BB#0: 1431 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1432 ; SSE41-NEXT: retq 1433 ; 1434 ; AVX-LABEL: combine_test5: 1435 ; AVX: # BB#0: 1436 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1437 ; AVX-NEXT: retq 1438 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1439 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1440 ret <4 x float> %2 1441 } 1442 1443 define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) { 1444 ; SSE-LABEL: combine_test6: 1445 ; SSE: # BB#0: 1446 ; SSE-NEXT: movaps %xmm1, %xmm0 1447 ; SSE-NEXT: retq 1448 ; 1449 ; AVX-LABEL: combine_test6: 1450 ; AVX: # BB#0: 1451 ; AVX-NEXT: vmovaps %xmm1, %xmm0 1452 ; AVX-NEXT: retq 1453 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1454 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1455 ret <4 x i32> %2 1456 } 1457 1458 define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) { 1459 ; SSE2-LABEL: combine_test7: 1460 ; SSE2: # BB#0: 1461 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1462 ; SSE2-NEXT: movaps %xmm1, %xmm0 1463 ; SSE2-NEXT: retq 1464 ; 1465 ; SSSE3-LABEL: combine_test7: 1466 ; SSSE3: # BB#0: 1467 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1468 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1469 ; SSSE3-NEXT: retq 1470 ; 1471 ; SSE41-LABEL: combine_test7: 1472 ; SSE41: # BB#0: 1473 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1474 ; SSE41-NEXT: retq 1475 ; 1476 ; AVX1-LABEL: combine_test7: 1477 ; AVX1: # BB#0: 1478 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1479 ; AVX1-NEXT: retq 1480 ; 1481 ; AVX2-LABEL: combine_test7: 1482 ; AVX2: # BB#0: 1483 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1484 ; AVX2-NEXT: retq 1485 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1486 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1487 ret <4 x i32> %2 1488 } 1489 1490 define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) { 1491 ; SSE-LABEL: combine_test8: 1492 ; SSE: # BB#0: 1493 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1494 ; SSE-NEXT: retq 1495 ; 1496 ; AVX-LABEL: combine_test8: 1497 ; AVX: # BB#0: 1498 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1499 ; AVX-NEXT: retq 1500 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1501 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1502 ret <4 x i32> %2 1503 } 1504 1505 define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) { 1506 ; SSE-LABEL: combine_test9: 1507 ; SSE: # BB#0: 1508 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 1509 ; SSE-NEXT: movdqa %xmm1, %xmm0 1510 ; SSE-NEXT: retq 1511 ; 1512 ; AVX-LABEL: combine_test9: 1513 ; AVX: # BB#0: 1514 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1515 ; AVX-NEXT: retq 1516 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 1517 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1518 ret <4 x i32> %2 1519 } 1520 1521 define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) { 1522 ; SSE2-LABEL: combine_test10: 1523 ; SSE2: # BB#0: 1524 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1525 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1526 ; SSE2-NEXT: retq 1527 ; 1528 ; SSSE3-LABEL: combine_test10: 1529 ; SSSE3: # BB#0: 1530 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1531 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1532 ; SSSE3-NEXT: retq 1533 ; 1534 ; SSE41-LABEL: combine_test10: 1535 ; SSE41: # BB#0: 1536 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1537 ; SSE41-NEXT: retq 1538 ; 1539 ; AVX1-LABEL: combine_test10: 1540 ; AVX1: # BB#0: 1541 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1542 ; AVX1-NEXT: retq 1543 ; 1544 ; AVX2-LABEL: combine_test10: 1545 ; AVX2: # BB#0: 1546 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1547 ; AVX2-NEXT: retq 1548 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1549 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1550 ret <4 x i32> %2 1551 } 1552 1553 define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) { 1554 ; ALL-LABEL: combine_test11: 1555 ; ALL: # BB#0: 1556 ; ALL-NEXT: retq 1557 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1558 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1559 ret <4 x float> %2 1560 } 1561 1562 define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) { 1563 ; SSE2-LABEL: combine_test12: 1564 ; SSE2: # BB#0: 1565 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1566 ; SSE2-NEXT: movaps %xmm1, %xmm0 1567 ; SSE2-NEXT: retq 1568 ; 1569 ; SSSE3-LABEL: combine_test12: 1570 ; SSSE3: # BB#0: 1571 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1572 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1573 ; SSSE3-NEXT: retq 1574 ; 1575 ; SSE41-LABEL: combine_test12: 1576 ; SSE41: # BB#0: 1577 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1578 ; SSE41-NEXT: retq 1579 ; 1580 ; AVX-LABEL: combine_test12: 1581 ; AVX: # BB#0: 1582 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1583 ; AVX-NEXT: retq 1584 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1585 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1586 ret <4 x float> %2 1587 } 1588 1589 define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) { 1590 ; SSE-LABEL: combine_test13: 1591 ; SSE: # BB#0: 1592 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1593 ; SSE-NEXT: retq 1594 ; 1595 ; AVX-LABEL: combine_test13: 1596 ; AVX: # BB#0: 1597 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1598 ; AVX-NEXT: retq 1599 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1600 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1601 ret <4 x float> %2 1602 } 1603 1604 define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) { 1605 ; SSE-LABEL: combine_test14: 1606 ; SSE: # BB#0: 1607 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1608 ; SSE-NEXT: retq 1609 ; 1610 ; AVX-LABEL: combine_test14: 1611 ; AVX: # BB#0: 1612 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1613 ; AVX-NEXT: retq 1614 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5> 1615 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1616 ret <4 x float> %2 1617 } 1618 1619 define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) { 1620 ; SSE2-LABEL: combine_test15: 1621 ; SSE2: # BB#0: 1622 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1623 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1624 ; SSE2-NEXT: retq 1625 ; 1626 ; SSSE3-LABEL: combine_test15: 1627 ; SSSE3: # BB#0: 1628 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1629 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1630 ; SSSE3-NEXT: retq 1631 ; 1632 ; SSE41-LABEL: combine_test15: 1633 ; SSE41: # BB#0: 1634 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1635 ; SSE41-NEXT: retq 1636 ; 1637 ; AVX-LABEL: combine_test15: 1638 ; AVX: # BB#0: 1639 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1640 ; AVX-NEXT: retq 1641 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1642 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1643 ret <4 x float> %2 1644 } 1645 1646 define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) { 1647 ; ALL-LABEL: combine_test16: 1648 ; ALL: # BB#0: 1649 ; ALL-NEXT: retq 1650 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1651 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1652 ret <4 x i32> %2 1653 } 1654 1655 define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) { 1656 ; SSE2-LABEL: combine_test17: 1657 ; SSE2: # BB#0: 1658 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1659 ; SSE2-NEXT: movaps %xmm1, %xmm0 1660 ; SSE2-NEXT: retq 1661 ; 1662 ; SSSE3-LABEL: combine_test17: 1663 ; SSSE3: # BB#0: 1664 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1665 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1666 ; SSSE3-NEXT: retq 1667 ; 1668 ; SSE41-LABEL: combine_test17: 1669 ; SSE41: # BB#0: 1670 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1671 ; SSE41-NEXT: retq 1672 ; 1673 ; AVX1-LABEL: combine_test17: 1674 ; AVX1: # BB#0: 1675 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1676 ; AVX1-NEXT: retq 1677 ; 1678 ; AVX2-LABEL: combine_test17: 1679 ; AVX2: # BB#0: 1680 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1681 ; AVX2-NEXT: retq 1682 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1683 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1684 ret <4 x i32> %2 1685 } 1686 1687 define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) { 1688 ; SSE-LABEL: combine_test18: 1689 ; SSE: # BB#0: 1690 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1691 ; SSE-NEXT: retq 1692 ; 1693 ; AVX-LABEL: combine_test18: 1694 ; AVX: # BB#0: 1695 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1696 ; AVX-NEXT: retq 1697 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1698 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1699 ret <4 x i32> %2 1700 } 1701 1702 define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) { 1703 ; SSE-LABEL: combine_test19: 1704 ; SSE: # BB#0: 1705 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1706 ; SSE-NEXT: retq 1707 ; 1708 ; AVX-LABEL: combine_test19: 1709 ; AVX: # BB#0: 1710 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1711 ; AVX-NEXT: retq 1712 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5> 1713 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1714 ret <4 x i32> %2 1715 } 1716 1717 define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) { 1718 ; SSE2-LABEL: combine_test20: 1719 ; SSE2: # BB#0: 1720 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1721 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1722 ; SSE2-NEXT: retq 1723 ; 1724 ; SSSE3-LABEL: combine_test20: 1725 ; SSSE3: # BB#0: 1726 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1727 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1728 ; SSSE3-NEXT: retq 1729 ; 1730 ; SSE41-LABEL: combine_test20: 1731 ; SSE41: # BB#0: 1732 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1733 ; SSE41-NEXT: retq 1734 ; 1735 ; AVX1-LABEL: combine_test20: 1736 ; AVX1: # BB#0: 1737 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1738 ; AVX1-NEXT: retq 1739 ; 1740 ; AVX2-LABEL: combine_test20: 1741 ; AVX2: # BB#0: 1742 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1743 ; AVX2-NEXT: retq 1744 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1745 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1746 ret <4 x i32> %2 1747 } 1748 1749 define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) { 1750 ; SSE-LABEL: combine_test21: 1751 ; SSE: # BB#0: 1752 ; SSE-NEXT: movdqa %xmm0, %xmm2 1753 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1754 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1755 ; SSE-NEXT: movdqa %xmm2, (%rdi) 1756 ; SSE-NEXT: retq 1757 ; 1758 ; AVX1-LABEL: combine_test21: 1759 ; AVX1: # BB#0: 1760 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1761 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] 1762 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1763 ; AVX1-NEXT: vmovdqa %xmm2, (%rdi) 1764 ; AVX1-NEXT: vzeroupper 1765 ; AVX1-NEXT: retq 1766 ; 1767 ; AVX2-LABEL: combine_test21: 1768 ; AVX2: # BB#0: 1769 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1770 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] 1771 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1772 ; AVX2-NEXT: vmovdqa %xmm2, (%rdi) 1773 ; AVX2-NEXT: vzeroupper 1774 ; AVX2-NEXT: retq 1775 %1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1776 %2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1777 store <4 x i32> %1, <4 x i32>* %ptr, align 16 1778 ret <4 x i32> %2 1779 } 1780 1781 define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) { 1782 ; SSE-LABEL: combine_test22: 1783 ; SSE: # BB#0: 1784 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1785 ; SSE-NEXT: movhpd (%rsi), %xmm0 1786 ; SSE-NEXT: retq 1787 ; 1788 ; AVX-LABEL: combine_test22: 1789 ; AVX: # BB#0: 1790 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1791 ; AVX-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 1792 ; AVX-NEXT: retq 1793 ; Current AVX2 lowering of this is still awful, not adding a test case. 1794 %1 = load <2 x float>, <2 x float>* %a, align 8 1795 %2 = load <2 x float>, <2 x float>* %b, align 8 1796 %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1797 ret <8 x float> %3 1798 } 1799 1800 ; Check some negative cases. 1801 ; FIXME: Do any of these really make sense? Are they redundant with the above tests? 1802 1803 define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { 1804 ; SSE-LABEL: combine_test1b: 1805 ; SSE: # BB#0: 1806 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] 1807 ; SSE-NEXT: movaps %xmm1, %xmm0 1808 ; SSE-NEXT: retq 1809 ; 1810 ; AVX-LABEL: combine_test1b: 1811 ; AVX: # BB#0: 1812 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0] 1813 ; AVX-NEXT: retq 1814 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1815 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0> 1816 ret <4 x float> %2 1817 } 1818 1819 define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { 1820 ; SSE2-LABEL: combine_test2b: 1821 ; SSE2: # BB#0: 1822 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 1823 ; SSE2-NEXT: movaps %xmm1, %xmm0 1824 ; SSE2-NEXT: retq 1825 ; 1826 ; SSSE3-LABEL: combine_test2b: 1827 ; SSSE3: # BB#0: 1828 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 1829 ; SSSE3-NEXT: retq 1830 ; 1831 ; SSE41-LABEL: combine_test2b: 1832 ; SSE41: # BB#0: 1833 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 1834 ; SSE41-NEXT: retq 1835 ; 1836 ; AVX-LABEL: combine_test2b: 1837 ; AVX: # BB#0: 1838 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 1839 ; AVX-NEXT: retq 1840 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1841 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5> 1842 ret <4 x float> %2 1843 } 1844 1845 define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) { 1846 ; SSE2-LABEL: combine_test3b: 1847 ; SSE2: # BB#0: 1848 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1849 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 1850 ; SSE2-NEXT: retq 1851 ; 1852 ; SSSE3-LABEL: combine_test3b: 1853 ; SSSE3: # BB#0: 1854 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1855 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 1856 ; SSSE3-NEXT: retq 1857 ; 1858 ; SSE41-LABEL: combine_test3b: 1859 ; SSE41: # BB#0: 1860 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1861 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3] 1862 ; SSE41-NEXT: retq 1863 ; 1864 ; AVX-LABEL: combine_test3b: 1865 ; AVX: # BB#0: 1866 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1867 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3] 1868 ; AVX-NEXT: retq 1869 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3> 1870 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7> 1871 ret <4 x float> %2 1872 } 1873 1874 define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { 1875 ; SSE-LABEL: combine_test4b: 1876 ; SSE: # BB#0: 1877 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 1878 ; SSE-NEXT: movaps %xmm1, %xmm0 1879 ; SSE-NEXT: retq 1880 ; 1881 ; AVX-LABEL: combine_test4b: 1882 ; AVX: # BB#0: 1883 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3] 1884 ; AVX-NEXT: retq 1885 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1886 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7> 1887 ret <4 x float> %2 1888 } 1889 1890 1891 ; Verify that we correctly fold shuffles even when we use illegal vector types. 1892 1893 define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) { 1894 ; SSE2-LABEL: combine_test1c: 1895 ; SSE2: # BB#0: 1896 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1897 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1898 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1899 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1900 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1901 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1902 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1903 ; SSE2-NEXT: retq 1904 ; 1905 ; SSSE3-LABEL: combine_test1c: 1906 ; SSSE3: # BB#0: 1907 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1908 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1909 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1910 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1911 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1912 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1913 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1914 ; SSSE3-NEXT: retq 1915 ; 1916 ; SSE41-LABEL: combine_test1c: 1917 ; SSE41: # BB#0: 1918 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1919 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1920 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1921 ; SSE41-NEXT: retq 1922 ; 1923 ; AVX1-LABEL: combine_test1c: 1924 ; AVX1: # BB#0: 1925 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1926 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1927 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1928 ; AVX1-NEXT: retq 1929 ; 1930 ; AVX2-LABEL: combine_test1c: 1931 ; AVX2: # BB#0: 1932 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1933 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1934 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1935 ; AVX2-NEXT: retq 1936 %A = load <4 x i8>, <4 x i8>* %a 1937 %B = load <4 x i8>, <4 x i8>* %b 1938 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1939 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1940 ret <4 x i8> %2 1941 } 1942 1943 define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) { 1944 ; SSE2-LABEL: combine_test2c: 1945 ; SSE2: # BB#0: 1946 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1947 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1948 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1949 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1950 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1951 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1952 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1953 ; SSE2-NEXT: retq 1954 ; 1955 ; SSSE3-LABEL: combine_test2c: 1956 ; SSSE3: # BB#0: 1957 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1958 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1959 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1960 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1961 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1962 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1963 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1964 ; SSSE3-NEXT: retq 1965 ; 1966 ; SSE41-LABEL: combine_test2c: 1967 ; SSE41: # BB#0: 1968 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1969 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1970 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1971 ; SSE41-NEXT: retq 1972 ; 1973 ; AVX-LABEL: combine_test2c: 1974 ; AVX: # BB#0: 1975 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1976 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1977 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1978 ; AVX-NEXT: retq 1979 %A = load <4 x i8>, <4 x i8>* %a 1980 %B = load <4 x i8>, <4 x i8>* %b 1981 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5> 1982 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1983 ret <4 x i8> %2 1984 } 1985 1986 define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) { 1987 ; SSE2-LABEL: combine_test3c: 1988 ; SSE2: # BB#0: 1989 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1990 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1991 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1992 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1993 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1994 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1995 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1996 ; SSE2-NEXT: retq 1997 ; 1998 ; SSSE3-LABEL: combine_test3c: 1999 ; SSSE3: # BB#0: 2000 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2001 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2002 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2003 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2004 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2005 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2006 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2007 ; SSSE3-NEXT: retq 2008 ; 2009 ; SSE41-LABEL: combine_test3c: 2010 ; SSE41: # BB#0: 2011 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2012 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2013 ; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2014 ; SSE41-NEXT: retq 2015 ; 2016 ; AVX-LABEL: combine_test3c: 2017 ; AVX: # BB#0: 2018 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2019 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2020 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2021 ; AVX-NEXT: retq 2022 %A = load <4 x i8>, <4 x i8>* %a 2023 %B = load <4 x i8>, <4 x i8>* %b 2024 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2025 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2026 ret <4 x i8> %2 2027 } 2028 2029 define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) { 2030 ; SSE2-LABEL: combine_test4c: 2031 ; SSE2: # BB#0: 2032 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2033 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2034 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2035 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2036 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2037 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2038 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 2039 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 2040 ; SSE2-NEXT: retq 2041 ; 2042 ; SSSE3-LABEL: combine_test4c: 2043 ; SSSE3: # BB#0: 2044 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2045 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2046 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2047 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2048 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2049 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2050 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 2051 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 2052 ; SSSE3-NEXT: retq 2053 ; 2054 ; SSE41-LABEL: combine_test4c: 2055 ; SSE41: # BB#0: 2056 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2057 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2058 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2059 ; SSE41-NEXT: retq 2060 ; 2061 ; AVX1-LABEL: combine_test4c: 2062 ; AVX1: # BB#0: 2063 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2064 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2065 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 2066 ; AVX1-NEXT: retq 2067 ; 2068 ; AVX2-LABEL: combine_test4c: 2069 ; AVX2: # BB#0: 2070 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2071 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2072 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 2073 ; AVX2-NEXT: retq 2074 %A = load <4 x i8>, <4 x i8>* %a 2075 %B = load <4 x i8>, <4 x i8>* %b 2076 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 2077 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 2078 ret <4 x i8> %2 2079 } 2080 2081 2082 ; The following test cases are generated from this C++ code 2083 ; 2084 ;__m128 blend_01(__m128 a, __m128 b) 2085 ;{ 2086 ; __m128 s = a; 2087 ; s = _mm_blend_ps( s, b, 1<<0 ); 2088 ; s = _mm_blend_ps( s, b, 1<<1 ); 2089 ; return s; 2090 ;} 2091 ; 2092 ;__m128 blend_02(__m128 a, __m128 b) 2093 ;{ 2094 ; __m128 s = a; 2095 ; s = _mm_blend_ps( s, b, 1<<0 ); 2096 ; s = _mm_blend_ps( s, b, 1<<2 ); 2097 ; return s; 2098 ;} 2099 ; 2100 ;__m128 blend_123(__m128 a, __m128 b) 2101 ;{ 2102 ; __m128 s = a; 2103 ; s = _mm_blend_ps( s, b, 1<<1 ); 2104 ; s = _mm_blend_ps( s, b, 1<<2 ); 2105 ; s = _mm_blend_ps( s, b, 1<<3 ); 2106 ; return s; 2107 ;} 2108 2109 ; Ideally, we should collapse the following shuffles into a single one. 2110 2111 define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) { 2112 ; SSE2-LABEL: combine_blend_01: 2113 ; SSE2: # BB#0: 2114 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2115 ; SSE2-NEXT: retq 2116 ; 2117 ; SSSE3-LABEL: combine_blend_01: 2118 ; SSSE3: # BB#0: 2119 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2120 ; SSSE3-NEXT: retq 2121 ; 2122 ; SSE41-LABEL: combine_blend_01: 2123 ; SSE41: # BB#0: 2124 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2125 ; SSE41-NEXT: retq 2126 ; 2127 ; AVX-LABEL: combine_blend_01: 2128 ; AVX: # BB#0: 2129 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2130 ; AVX-NEXT: retq 2131 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 2132 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 2133 ret <4 x float> %shuffle6 2134 } 2135 2136 define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) { 2137 ; SSE2-LABEL: combine_blend_02: 2138 ; SSE2: # BB#0: 2139 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] 2140 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] 2141 ; SSE2-NEXT: movaps %xmm1, %xmm0 2142 ; SSE2-NEXT: retq 2143 ; 2144 ; SSSE3-LABEL: combine_blend_02: 2145 ; SSSE3: # BB#0: 2146 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] 2147 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] 2148 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2149 ; SSSE3-NEXT: retq 2150 ; 2151 ; SSE41-LABEL: combine_blend_02: 2152 ; SSE41: # BB#0: 2153 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 2154 ; SSE41-NEXT: retq 2155 ; 2156 ; AVX-LABEL: combine_blend_02: 2157 ; AVX: # BB#0: 2158 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 2159 ; AVX-NEXT: retq 2160 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3> 2161 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 2162 ret <4 x float> %shuffle6 2163 } 2164 2165 define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) { 2166 ; SSE2-LABEL: combine_blend_123: 2167 ; SSE2: # BB#0: 2168 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2169 ; SSE2-NEXT: movaps %xmm1, %xmm0 2170 ; SSE2-NEXT: retq 2171 ; 2172 ; SSSE3-LABEL: combine_blend_123: 2173 ; SSSE3: # BB#0: 2174 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2175 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2176 ; SSSE3-NEXT: retq 2177 ; 2178 ; SSE41-LABEL: combine_blend_123: 2179 ; SSE41: # BB#0: 2180 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2181 ; SSE41-NEXT: retq 2182 ; 2183 ; AVX-LABEL: combine_blend_123: 2184 ; AVX: # BB#0: 2185 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2186 ; AVX-NEXT: retq 2187 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> 2188 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 2189 %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 2190 ret <4 x float> %shuffle12 2191 } 2192 2193 define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) { 2194 ; SSE-LABEL: combine_test_movhl_1: 2195 ; SSE: # BB#0: 2196 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2197 ; SSE-NEXT: movdqa %xmm1, %xmm0 2198 ; SSE-NEXT: retq 2199 ; 2200 ; AVX-LABEL: combine_test_movhl_1: 2201 ; AVX: # BB#0: 2202 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2203 ; AVX-NEXT: retq 2204 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3> 2205 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3> 2206 ret <4 x i32> %2 2207 } 2208 2209 define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) { 2210 ; SSE-LABEL: combine_test_movhl_2: 2211 ; SSE: # BB#0: 2212 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2213 ; SSE-NEXT: movdqa %xmm1, %xmm0 2214 ; SSE-NEXT: retq 2215 ; 2216 ; AVX-LABEL: combine_test_movhl_2: 2217 ; AVX: # BB#0: 2218 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2219 ; AVX-NEXT: retq 2220 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6> 2221 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2> 2222 ret <4 x i32> %2 2223 } 2224 2225 define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) { 2226 ; SSE-LABEL: combine_test_movhl_3: 2227 ; SSE: # BB#0: 2228 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2229 ; SSE-NEXT: movdqa %xmm1, %xmm0 2230 ; SSE-NEXT: retq 2231 ; 2232 ; AVX-LABEL: combine_test_movhl_3: 2233 ; AVX: # BB#0: 2234 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2235 ; AVX-NEXT: retq 2236 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2> 2237 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2> 2238 ret <4 x i32> %2 2239 } 2240 2241 2242 ; Verify that we fold shuffles according to rule: 2243 ; (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2) 2244 2245 define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) { 2246 ; SSE2-LABEL: combine_undef_input_test1: 2247 ; SSE2: # BB#0: 2248 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2249 ; SSE2-NEXT: retq 2250 ; 2251 ; SSSE3-LABEL: combine_undef_input_test1: 2252 ; SSSE3: # BB#0: 2253 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2254 ; SSSE3-NEXT: retq 2255 ; 2256 ; SSE41-LABEL: combine_undef_input_test1: 2257 ; SSE41: # BB#0: 2258 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2259 ; SSE41-NEXT: retq 2260 ; 2261 ; AVX-LABEL: combine_undef_input_test1: 2262 ; AVX: # BB#0: 2263 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2264 ; AVX-NEXT: retq 2265 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2266 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2> 2267 ret <4 x float> %2 2268 } 2269 2270 define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) { 2271 ; SSE-LABEL: combine_undef_input_test2: 2272 ; SSE: # BB#0: 2273 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2274 ; SSE-NEXT: retq 2275 ; 2276 ; AVX-LABEL: combine_undef_input_test2: 2277 ; AVX: # BB#0: 2278 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2279 ; AVX-NEXT: retq 2280 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2281 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 2282 ret <4 x float> %2 2283 } 2284 2285 define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) { 2286 ; SSE-LABEL: combine_undef_input_test3: 2287 ; SSE: # BB#0: 2288 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2289 ; SSE-NEXT: retq 2290 ; 2291 ; AVX-LABEL: combine_undef_input_test3: 2292 ; AVX: # BB#0: 2293 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2294 ; AVX-NEXT: retq 2295 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2296 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 2297 ret <4 x float> %2 2298 } 2299 2300 define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) { 2301 ; SSE-LABEL: combine_undef_input_test4: 2302 ; SSE: # BB#0: 2303 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2304 ; SSE-NEXT: movapd %xmm1, %xmm0 2305 ; SSE-NEXT: retq 2306 ; 2307 ; AVX-LABEL: combine_undef_input_test4: 2308 ; AVX: # BB#0: 2309 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2310 ; AVX-NEXT: retq 2311 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2312 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2313 ret <4 x float> %2 2314 } 2315 2316 define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) { 2317 ; SSE2-LABEL: combine_undef_input_test5: 2318 ; SSE2: # BB#0: 2319 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2320 ; SSE2-NEXT: movapd %xmm1, %xmm0 2321 ; SSE2-NEXT: retq 2322 ; 2323 ; SSSE3-LABEL: combine_undef_input_test5: 2324 ; SSSE3: # BB#0: 2325 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2326 ; SSSE3-NEXT: movapd %xmm1, %xmm0 2327 ; SSSE3-NEXT: retq 2328 ; 2329 ; SSE41-LABEL: combine_undef_input_test5: 2330 ; SSE41: # BB#0: 2331 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2332 ; SSE41-NEXT: retq 2333 ; 2334 ; AVX-LABEL: combine_undef_input_test5: 2335 ; AVX: # BB#0: 2336 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2337 ; AVX-NEXT: retq 2338 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2339 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7> 2340 ret <4 x float> %2 2341 } 2342 2343 2344 ; Verify that we fold shuffles according to rule: 2345 ; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2) 2346 2347 define <4 x float> @combine_undef_input_test6(<4 x float> %a) { 2348 ; ALL-LABEL: combine_undef_input_test6: 2349 ; ALL: # BB#0: 2350 ; ALL-NEXT: retq 2351 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2352 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2> 2353 ret <4 x float> %2 2354 } 2355 2356 define <4 x float> @combine_undef_input_test7(<4 x float> %a) { 2357 ; SSE2-LABEL: combine_undef_input_test7: 2358 ; SSE2: # BB#0: 2359 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2360 ; SSE2-NEXT: retq 2361 ; 2362 ; SSSE3-LABEL: combine_undef_input_test7: 2363 ; SSSE3: # BB#0: 2364 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2365 ; SSSE3-NEXT: retq 2366 ; 2367 ; SSE41-LABEL: combine_undef_input_test7: 2368 ; SSE41: # BB#0: 2369 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2370 ; SSE41-NEXT: retq 2371 ; 2372 ; AVX-LABEL: combine_undef_input_test7: 2373 ; AVX: # BB#0: 2374 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2375 ; AVX-NEXT: retq 2376 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2377 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 2378 ret <4 x float> %2 2379 } 2380 2381 define <4 x float> @combine_undef_input_test8(<4 x float> %a) { 2382 ; SSE2-LABEL: combine_undef_input_test8: 2383 ; SSE2: # BB#0: 2384 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2385 ; SSE2-NEXT: retq 2386 ; 2387 ; SSSE3-LABEL: combine_undef_input_test8: 2388 ; SSSE3: # BB#0: 2389 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2390 ; SSSE3-NEXT: retq 2391 ; 2392 ; SSE41-LABEL: combine_undef_input_test8: 2393 ; SSE41: # BB#0: 2394 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2395 ; SSE41-NEXT: retq 2396 ; 2397 ; AVX-LABEL: combine_undef_input_test8: 2398 ; AVX: # BB#0: 2399 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2400 ; AVX-NEXT: retq 2401 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2402 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 2403 ret <4 x float> %2 2404 } 2405 2406 define <4 x float> @combine_undef_input_test9(<4 x float> %a) { 2407 ; SSE-LABEL: combine_undef_input_test9: 2408 ; SSE: # BB#0: 2409 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 2410 ; SSE-NEXT: retq 2411 ; 2412 ; AVX-LABEL: combine_undef_input_test9: 2413 ; AVX: # BB#0: 2414 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 2415 ; AVX-NEXT: retq 2416 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2417 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2418 ret <4 x float> %2 2419 } 2420 2421 define <4 x float> @combine_undef_input_test10(<4 x float> %a) { 2422 ; ALL-LABEL: combine_undef_input_test10: 2423 ; ALL: # BB#0: 2424 ; ALL-NEXT: retq 2425 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2426 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7> 2427 ret <4 x float> %2 2428 } 2429 2430 define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) { 2431 ; SSE2-LABEL: combine_undef_input_test11: 2432 ; SSE2: # BB#0: 2433 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2434 ; SSE2-NEXT: retq 2435 ; 2436 ; SSSE3-LABEL: combine_undef_input_test11: 2437 ; SSSE3: # BB#0: 2438 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2439 ; SSSE3-NEXT: retq 2440 ; 2441 ; SSE41-LABEL: combine_undef_input_test11: 2442 ; SSE41: # BB#0: 2443 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2444 ; SSE41-NEXT: retq 2445 ; 2446 ; AVX-LABEL: combine_undef_input_test11: 2447 ; AVX: # BB#0: 2448 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2449 ; AVX-NEXT: retq 2450 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2451 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6> 2452 ret <4 x float> %2 2453 } 2454 2455 define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) { 2456 ; SSE-LABEL: combine_undef_input_test12: 2457 ; SSE: # BB#0: 2458 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2459 ; SSE-NEXT: retq 2460 ; 2461 ; AVX-LABEL: combine_undef_input_test12: 2462 ; AVX: # BB#0: 2463 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2464 ; AVX-NEXT: retq 2465 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2466 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> 2467 ret <4 x float> %2 2468 } 2469 2470 define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) { 2471 ; SSE-LABEL: combine_undef_input_test13: 2472 ; SSE: # BB#0: 2473 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2474 ; SSE-NEXT: retq 2475 ; 2476 ; AVX-LABEL: combine_undef_input_test13: 2477 ; AVX: # BB#0: 2478 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2479 ; AVX-NEXT: retq 2480 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2481 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5> 2482 ret <4 x float> %2 2483 } 2484 2485 define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) { 2486 ; SSE-LABEL: combine_undef_input_test14: 2487 ; SSE: # BB#0: 2488 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2489 ; SSE-NEXT: movapd %xmm1, %xmm0 2490 ; SSE-NEXT: retq 2491 ; 2492 ; AVX-LABEL: combine_undef_input_test14: 2493 ; AVX: # BB#0: 2494 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2495 ; AVX-NEXT: retq 2496 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2497 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2498 ret <4 x float> %2 2499 } 2500 2501 define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) { 2502 ; SSE2-LABEL: combine_undef_input_test15: 2503 ; SSE2: # BB#0: 2504 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2505 ; SSE2-NEXT: movapd %xmm1, %xmm0 2506 ; SSE2-NEXT: retq 2507 ; 2508 ; SSSE3-LABEL: combine_undef_input_test15: 2509 ; SSSE3: # BB#0: 2510 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2511 ; SSSE3-NEXT: movapd %xmm1, %xmm0 2512 ; SSSE3-NEXT: retq 2513 ; 2514 ; SSE41-LABEL: combine_undef_input_test15: 2515 ; SSE41: # BB#0: 2516 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2517 ; SSE41-NEXT: retq 2518 ; 2519 ; AVX-LABEL: combine_undef_input_test15: 2520 ; AVX: # BB#0: 2521 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2522 ; AVX-NEXT: retq 2523 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2524 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2525 ret <4 x float> %2 2526 } 2527 2528 2529 ; Verify that shuffles are canonicalized according to rules: 2530 ; shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 2531 ; 2532 ; This allows to trigger the following combine rule: 2533 ; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2) 2534 ; 2535 ; As a result, all the shuffle pairs in each function below should be 2536 ; combined into a single legal shuffle operation. 2537 2538 define <4 x float> @combine_undef_input_test16(<4 x float> %a) { 2539 ; ALL-LABEL: combine_undef_input_test16: 2540 ; ALL: # BB#0: 2541 ; ALL-NEXT: retq 2542 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2543 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3> 2544 ret <4 x float> %2 2545 } 2546 2547 define <4 x float> @combine_undef_input_test17(<4 x float> %a) { 2548 ; SSE2-LABEL: combine_undef_input_test17: 2549 ; SSE2: # BB#0: 2550 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2551 ; SSE2-NEXT: retq 2552 ; 2553 ; SSSE3-LABEL: combine_undef_input_test17: 2554 ; SSSE3: # BB#0: 2555 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2556 ; SSSE3-NEXT: retq 2557 ; 2558 ; SSE41-LABEL: combine_undef_input_test17: 2559 ; SSE41: # BB#0: 2560 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2561 ; SSE41-NEXT: retq 2562 ; 2563 ; AVX-LABEL: combine_undef_input_test17: 2564 ; AVX: # BB#0: 2565 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2566 ; AVX-NEXT: retq 2567 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2568 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> 2569 ret <4 x float> %2 2570 } 2571 2572 define <4 x float> @combine_undef_input_test18(<4 x float> %a) { 2573 ; SSE2-LABEL: combine_undef_input_test18: 2574 ; SSE2: # BB#0: 2575 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2576 ; SSE2-NEXT: retq 2577 ; 2578 ; SSSE3-LABEL: combine_undef_input_test18: 2579 ; SSSE3: # BB#0: 2580 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2581 ; SSSE3-NEXT: retq 2582 ; 2583 ; SSE41-LABEL: combine_undef_input_test18: 2584 ; SSE41: # BB#0: 2585 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2586 ; SSE41-NEXT: retq 2587 ; 2588 ; AVX-LABEL: combine_undef_input_test18: 2589 ; AVX: # BB#0: 2590 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2591 ; AVX-NEXT: retq 2592 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2593 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5> 2594 ret <4 x float> %2 2595 } 2596 2597 define <4 x float> @combine_undef_input_test19(<4 x float> %a) { 2598 ; SSE-LABEL: combine_undef_input_test19: 2599 ; SSE: # BB#0: 2600 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 2601 ; SSE-NEXT: retq 2602 ; 2603 ; AVX-LABEL: combine_undef_input_test19: 2604 ; AVX: # BB#0: 2605 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 2606 ; AVX-NEXT: retq 2607 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2608 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2609 ret <4 x float> %2 2610 } 2611 2612 define <4 x float> @combine_undef_input_test20(<4 x float> %a) { 2613 ; ALL-LABEL: combine_undef_input_test20: 2614 ; ALL: # BB#0: 2615 ; ALL-NEXT: retq 2616 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2617 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2618 ret <4 x float> %2 2619 } 2620 2621 ; These tests are designed to test the ability to combine away unnecessary 2622 ; operations feeding into a shuffle. The AVX cases are the important ones as 2623 ; they leverage operations which cannot be done naturally on the entire vector 2624 ; and thus are decomposed into multiple smaller operations. 2625 2626 define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) { 2627 ; SSE-LABEL: combine_unneeded_subvector1: 2628 ; SSE: # BB#0: 2629 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1 2630 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0] 2631 ; SSE-NEXT: movdqa %xmm0, %xmm1 2632 ; SSE-NEXT: retq 2633 ; 2634 ; AVX1-LABEL: combine_unneeded_subvector1: 2635 ; AVX1: # BB#0: 2636 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2637 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2638 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 2639 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2640 ; AVX1-NEXT: retq 2641 ; 2642 ; AVX2-LABEL: combine_unneeded_subvector1: 2643 ; AVX2: # BB#0: 2644 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 2645 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 2646 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 2647 ; AVX2-NEXT: retq 2648 %b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2649 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 2650 ret <8 x i32> %c 2651 } 2652 2653 define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) { 2654 ; SSE-LABEL: combine_unneeded_subvector2: 2655 ; SSE: # BB#0: 2656 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1 2657 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0] 2658 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 2659 ; SSE-NEXT: retq 2660 ; 2661 ; AVX1-LABEL: combine_unneeded_subvector2: 2662 ; AVX1: # BB#0: 2663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2664 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2665 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2666 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2667 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2668 ; AVX1-NEXT: retq 2669 ; 2670 ; AVX2-LABEL: combine_unneeded_subvector2: 2671 ; AVX2: # BB#0: 2672 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 2673 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2674 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2675 ; AVX2-NEXT: retq 2676 %c = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2677 %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 2678 ret <8 x i32> %d 2679 } 2680 2681 define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) { 2682 ; SSE2-LABEL: combine_insertps1: 2683 ; SSE2: # BB#0: 2684 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0] 2685 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] 2686 ; SSE2-NEXT: movaps %xmm1, %xmm0 2687 ; SSE2-NEXT: retq 2688 ; 2689 ; SSSE3-LABEL: combine_insertps1: 2690 ; SSSE3: # BB#0: 2691 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0] 2692 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] 2693 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2694 ; SSSE3-NEXT: retq 2695 ; 2696 ; SSE41-LABEL: combine_insertps1: 2697 ; SSE41: # BB#0: 2698 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] 2699 ; SSE41-NEXT: retq 2700 ; 2701 ; AVX-LABEL: combine_insertps1: 2702 ; AVX: # BB#0: 2703 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] 2704 ; AVX-NEXT: retq 2705 2706 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 6, i32 2, i32 4> 2707 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 2708 ret <4 x float> %d 2709 } 2710 2711 define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) { 2712 ; SSE2-LABEL: combine_insertps2: 2713 ; SSE2: # BB#0: 2714 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0] 2715 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 2716 ; SSE2-NEXT: movaps %xmm1, %xmm0 2717 ; SSE2-NEXT: retq 2718 ; 2719 ; SSSE3-LABEL: combine_insertps2: 2720 ; SSSE3: # BB#0: 2721 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0] 2722 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 2723 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2724 ; SSSE3-NEXT: retq 2725 ; 2726 ; SSE41-LABEL: combine_insertps2: 2727 ; SSE41: # BB#0: 2728 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] 2729 ; SSE41-NEXT: retq 2730 ; 2731 ; AVX-LABEL: combine_insertps2: 2732 ; AVX: # BB#0: 2733 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] 2734 ; AVX-NEXT: retq 2735 2736 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 1, i32 6, i32 7> 2737 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2738 ret <4 x float> %d 2739 } 2740 2741 define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) { 2742 ; SSE2-LABEL: combine_insertps3: 2743 ; SSE2: # BB#0: 2744 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 2745 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 2746 ; SSE2-NEXT: retq 2747 ; 2748 ; SSSE3-LABEL: combine_insertps3: 2749 ; SSSE3: # BB#0: 2750 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 2751 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 2752 ; SSSE3-NEXT: retq 2753 ; 2754 ; SSE41-LABEL: combine_insertps3: 2755 ; SSE41: # BB#0: 2756 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2757 ; SSE41-NEXT: retq 2758 ; 2759 ; AVX-LABEL: combine_insertps3: 2760 ; AVX: # BB#0: 2761 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2762 ; AVX-NEXT: retq 2763 2764 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5> 2765 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 5, i32 3> 2766 ret <4 x float> %d 2767 } 2768 2769 define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) { 2770 ; SSE2-LABEL: combine_insertps4: 2771 ; SSE2: # BB#0: 2772 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 2773 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 2774 ; SSE2-NEXT: retq 2775 ; 2776 ; SSSE3-LABEL: combine_insertps4: 2777 ; SSSE3: # BB#0: 2778 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 2779 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 2780 ; SSSE3-NEXT: retq 2781 ; 2782 ; SSE41-LABEL: combine_insertps4: 2783 ; SSE41: # BB#0: 2784 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 2785 ; SSE41-NEXT: retq 2786 ; 2787 ; AVX-LABEL: combine_insertps4: 2788 ; AVX: # BB#0: 2789 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 2790 ; AVX-NEXT: retq 2791 2792 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5> 2793 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 6, i32 5> 2794 ret <4 x float> %d 2795 } 2796 2797 define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) { 2798 ; SSE-LABEL: PR22377: 2799 ; SSE: # BB#0: # %entry 2800 ; SSE-NEXT: movaps %xmm0, %xmm1 2801 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3] 2802 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] 2803 ; SSE-NEXT: addps %xmm0, %xmm1 2804 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2805 ; SSE-NEXT: retq 2806 ; 2807 ; AVX-LABEL: PR22377: 2808 ; AVX: # BB#0: # %entry 2809 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3] 2810 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 2811 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1 2812 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2813 ; AVX-NEXT: retq 2814 entry: 2815 %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 1, i32 3> 2816 %s2 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 2817 %r2 = fadd <4 x float> %s1, %s2 2818 %s3 = shufflevector <4 x float> %s2, <4 x float> %r2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2819 ret <4 x float> %s3 2820 } 2821 2822 define <4 x float> @PR22390(<4 x float> %a, <4 x float> %b) { 2823 ; SSE2-LABEL: PR22390: 2824 ; SSE2: # BB#0: # %entry 2825 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2826 ; SSE2-NEXT: movaps %xmm0, %xmm2 2827 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 2828 ; SSE2-NEXT: addps %xmm0, %xmm2 2829 ; SSE2-NEXT: movaps %xmm2, %xmm0 2830 ; SSE2-NEXT: retq 2831 ; 2832 ; SSSE3-LABEL: PR22390: 2833 ; SSSE3: # BB#0: # %entry 2834 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2835 ; SSSE3-NEXT: movaps %xmm0, %xmm2 2836 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 2837 ; SSSE3-NEXT: addps %xmm0, %xmm2 2838 ; SSSE3-NEXT: movaps %xmm2, %xmm0 2839 ; SSSE3-NEXT: retq 2840 ; 2841 ; SSE41-LABEL: PR22390: 2842 ; SSE41: # BB#0: # %entry 2843 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2844 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] 2845 ; SSE41-NEXT: addps %xmm1, %xmm0 2846 ; SSE41-NEXT: retq 2847 ; 2848 ; AVX-LABEL: PR22390: 2849 ; AVX: # BB#0: # %entry 2850 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2851 ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] 2852 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 2853 ; AVX-NEXT: retq 2854 entry: 2855 %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2> 2856 %s2 = shufflevector <4 x float> %s1, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 2857 %r2 = fadd <4 x float> %s1, %s2 2858 ret <4 x float> %r2 2859 } 2860 2861 define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { 2862 ; SSE2-LABEL: PR22412: 2863 ; SSE2: # BB#0: # %entry 2864 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2865 ; SSE2-NEXT: movapd %xmm2, %xmm0 2866 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] 2867 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] 2868 ; SSE2-NEXT: movaps %xmm3, %xmm1 2869 ; SSE2-NEXT: retq 2870 ; 2871 ; SSSE3-LABEL: PR22412: 2872 ; SSSE3: # BB#0: # %entry 2873 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2874 ; SSSE3-NEXT: movapd %xmm2, %xmm0 2875 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] 2876 ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] 2877 ; SSSE3-NEXT: movaps %xmm3, %xmm1 2878 ; SSSE3-NEXT: retq 2879 ; 2880 ; SSE41-LABEL: PR22412: 2881 ; SSE41: # BB#0: # %entry 2882 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] 2883 ; SSE41-NEXT: movapd %xmm0, %xmm1 2884 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] 2885 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] 2886 ; SSE41-NEXT: movaps %xmm1, %xmm0 2887 ; SSE41-NEXT: movaps %xmm3, %xmm1 2888 ; SSE41-NEXT: retq 2889 ; 2890 ; AVX1-LABEL: PR22412: 2891 ; AVX1: # BB#0: # %entry 2892 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 2893 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2894 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6] 2895 ; AVX1-NEXT: retq 2896 ; 2897 ; AVX2-LABEL: PR22412: 2898 ; AVX2: # BB#0: # %entry 2899 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 2900 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2] 2901 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2902 ; AVX2-NEXT: retq 2903 entry: 2904 %s1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2905 %s2 = shufflevector <8 x float> %s1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2> 2906 ret <8 x float> %s2 2907 } 2908