1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 ; 8 ; Verify that the DAG combiner correctly folds bitwise operations across 9 ; shuffles, nested shuffles with undef, pairs of nested shuffles, and other 10 ; basic and always-safe patterns. Also test that the DAG combiner will combine 11 ; target-specific shuffle instructions where reasonable. 12 13 target triple = "x86_64-unknown-unknown" 14 15 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) 16 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) 17 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) 18 19 define <4 x i32> @combine_pshufd1(<4 x i32> %a) { 20 ; ALL-LABEL: combine_pshufd1: 21 ; ALL: # BB#0: # %entry 22 ; ALL-NEXT: retq 23 entry: 24 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 25 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27) 26 ret <4 x i32> %c 27 } 28 29 define <4 x i32> @combine_pshufd2(<4 x i32> %a) { 30 ; ALL-LABEL: combine_pshufd2: 31 ; ALL: # BB#0: # %entry 32 ; ALL-NEXT: retq 33 entry: 34 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 35 %b.cast = bitcast <4 x i32> %b to <8 x i16> 36 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28) 37 %c.cast = bitcast <8 x i16> %c to <4 x i32> 38 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 39 ret <4 x i32> %d 40 } 41 42 define <4 x i32> @combine_pshufd3(<4 x i32> %a) { 43 ; ALL-LABEL: combine_pshufd3: 44 ; ALL: # BB#0: # %entry 45 ; ALL-NEXT: retq 46 entry: 47 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 48 %b.cast = bitcast <4 x i32> %b to <8 x i16> 49 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28) 50 %c.cast = bitcast <8 x i16> %c to <4 x i32> 51 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 52 ret <4 x i32> %d 53 } 54 55 define <4 x i32> @combine_pshufd4(<4 x i32> %a) { 56 ; SSE-LABEL: combine_pshufd4: 57 ; SSE: # BB#0: # %entry 58 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 59 ; SSE-NEXT: retq 60 ; 61 ; AVX-LABEL: combine_pshufd4: 62 ; AVX: # BB#0: # %entry 63 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 64 ; AVX-NEXT: retq 65 entry: 66 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31) 67 %b.cast = bitcast <4 x i32> %b to <8 x i16> 68 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27) 69 %c.cast = bitcast <8 x i16> %c to <4 x i32> 70 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31) 71 ret <4 x i32> %d 72 } 73 74 define <4 x i32> @combine_pshufd5(<4 x i32> %a) { 75 ; SSE-LABEL: combine_pshufd5: 76 ; SSE: # BB#0: # %entry 77 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 78 ; SSE-NEXT: retq 79 ; 80 ; AVX-LABEL: combine_pshufd5: 81 ; AVX: # BB#0: # %entry 82 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 83 ; AVX-NEXT: retq 84 entry: 85 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76) 86 %b.cast = bitcast <4 x i32> %b to <8 x i16> 87 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27) 88 %c.cast = bitcast <8 x i16> %c to <4 x i32> 89 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76) 90 ret <4 x i32> %d 91 } 92 93 define <4 x i32> @combine_pshufd6(<4 x i32> %a) { 94 ; SSE-LABEL: combine_pshufd6: 95 ; SSE: # BB#0: # %entry 96 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 97 ; SSE-NEXT: retq 98 ; 99 ; AVX-LABEL: combine_pshufd6: 100 ; AVX: # BB#0: # %entry 101 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 102 ; AVX-NEXT: retq 103 entry: 104 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0) 105 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8) 106 ret <4 x i32> %c 107 } 108 109 define <8 x i16> @combine_pshuflw1(<8 x i16> %a) { 110 ; ALL-LABEL: combine_pshuflw1: 111 ; ALL: # BB#0: # %entry 112 ; ALL-NEXT: retq 113 entry: 114 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 115 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 116 ret <8 x i16> %c 117 } 118 119 define <8 x i16> @combine_pshuflw2(<8 x i16> %a) { 120 ; ALL-LABEL: combine_pshuflw2: 121 ; ALL: # BB#0: # %entry 122 ; ALL-NEXT: retq 123 entry: 124 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 125 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 126 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 127 ret <8 x i16> %d 128 } 129 130 define <8 x i16> @combine_pshuflw3(<8 x i16> %a) { 131 ; SSE-LABEL: combine_pshuflw3: 132 ; SSE: # BB#0: # %entry 133 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 134 ; SSE-NEXT: retq 135 ; 136 ; AVX-LABEL: combine_pshuflw3: 137 ; AVX: # BB#0: # %entry 138 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 139 ; AVX-NEXT: retq 140 entry: 141 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 142 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 143 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 144 ret <8 x i16> %d 145 } 146 147 define <8 x i16> @combine_pshufhw1(<8 x i16> %a) { 148 ; SSE-LABEL: combine_pshufhw1: 149 ; SSE: # BB#0: # %entry 150 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 151 ; SSE-NEXT: retq 152 ; 153 ; AVX-LABEL: combine_pshufhw1: 154 ; AVX: # BB#0: # %entry 155 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 156 ; AVX-NEXT: retq 157 entry: 158 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) 159 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 160 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 161 ret <8 x i16> %d 162 } 163 164 define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 165 ; SSE-LABEL: combine_bitwise_ops_test1: 166 ; SSE: # BB#0: 167 ; SSE-NEXT: pand %xmm1, %xmm0 168 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 169 ; SSE-NEXT: retq 170 ; 171 ; AVX-LABEL: combine_bitwise_ops_test1: 172 ; AVX: # BB#0: 173 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 174 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 175 ; AVX-NEXT: retq 176 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 177 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 178 %and = and <4 x i32> %shuf1, %shuf2 179 ret <4 x i32> %and 180 } 181 182 define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 183 ; SSE-LABEL: combine_bitwise_ops_test2: 184 ; SSE: # BB#0: 185 ; SSE-NEXT: por %xmm1, %xmm0 186 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 187 ; SSE-NEXT: retq 188 ; 189 ; AVX-LABEL: combine_bitwise_ops_test2: 190 ; AVX: # BB#0: 191 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 192 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 193 ; AVX-NEXT: retq 194 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 195 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 196 %or = or <4 x i32> %shuf1, %shuf2 197 ret <4 x i32> %or 198 } 199 200 define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 201 ; SSE-LABEL: combine_bitwise_ops_test3: 202 ; SSE: # BB#0: 203 ; SSE-NEXT: pxor %xmm1, %xmm0 204 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 205 ; SSE-NEXT: retq 206 ; 207 ; AVX-LABEL: combine_bitwise_ops_test3: 208 ; AVX: # BB#0: 209 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 210 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 211 ; AVX-NEXT: retq 212 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 213 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3> 214 %xor = xor <4 x i32> %shuf1, %shuf2 215 ret <4 x i32> %xor 216 } 217 218 define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 219 ; SSE-LABEL: combine_bitwise_ops_test4: 220 ; SSE: # BB#0: 221 ; SSE-NEXT: pand %xmm1, %xmm0 222 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 223 ; SSE-NEXT: retq 224 ; 225 ; AVX-LABEL: combine_bitwise_ops_test4: 226 ; AVX: # BB#0: 227 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 228 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 229 ; AVX-NEXT: retq 230 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 231 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 232 %and = and <4 x i32> %shuf1, %shuf2 233 ret <4 x i32> %and 234 } 235 236 define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 237 ; SSE-LABEL: combine_bitwise_ops_test5: 238 ; SSE: # BB#0: 239 ; SSE-NEXT: por %xmm1, %xmm0 240 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 241 ; SSE-NEXT: retq 242 ; 243 ; AVX-LABEL: combine_bitwise_ops_test5: 244 ; AVX: # BB#0: 245 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 246 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 247 ; AVX-NEXT: retq 248 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 249 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 250 %or = or <4 x i32> %shuf1, %shuf2 251 ret <4 x i32> %or 252 } 253 254 define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 255 ; SSE-LABEL: combine_bitwise_ops_test6: 256 ; SSE: # BB#0: 257 ; SSE-NEXT: pxor %xmm1, %xmm0 258 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 259 ; SSE-NEXT: retq 260 ; 261 ; AVX-LABEL: combine_bitwise_ops_test6: 262 ; AVX: # BB#0: 263 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 264 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 265 ; AVX-NEXT: retq 266 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7> 267 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7> 268 %xor = xor <4 x i32> %shuf1, %shuf2 269 ret <4 x i32> %xor 270 } 271 272 273 ; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles 274 ; are not performing a swizzle operations. 275 276 define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 277 ; SSE2-LABEL: combine_bitwise_ops_test1b: 278 ; SSE2: # BB#0: 279 ; SSE2-NEXT: pand %xmm1, %xmm0 280 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 281 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 282 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 283 ; SSE2-NEXT: retq 284 ; 285 ; SSSE3-LABEL: combine_bitwise_ops_test1b: 286 ; SSSE3: # BB#0: 287 ; SSSE3-NEXT: pand %xmm1, %xmm0 288 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 289 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 290 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 291 ; SSSE3-NEXT: retq 292 ; 293 ; SSE41-LABEL: combine_bitwise_ops_test1b: 294 ; SSE41: # BB#0: 295 ; SSE41-NEXT: pand %xmm1, %xmm0 296 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 297 ; SSE41-NEXT: retq 298 ; 299 ; AVX1-LABEL: combine_bitwise_ops_test1b: 300 ; AVX1: # BB#0: 301 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 302 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 303 ; AVX1-NEXT: retq 304 ; 305 ; AVX2-LABEL: combine_bitwise_ops_test1b: 306 ; AVX2: # BB#0: 307 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 308 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 309 ; AVX2-NEXT: retq 310 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 311 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 312 %and = and <4 x i32> %shuf1, %shuf2 313 ret <4 x i32> %and 314 } 315 316 define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 317 ; SSE2-LABEL: combine_bitwise_ops_test2b: 318 ; SSE2: # BB#0: 319 ; SSE2-NEXT: por %xmm1, %xmm0 320 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 321 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 322 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 323 ; SSE2-NEXT: retq 324 ; 325 ; SSSE3-LABEL: combine_bitwise_ops_test2b: 326 ; SSSE3: # BB#0: 327 ; SSSE3-NEXT: por %xmm1, %xmm0 328 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 329 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 330 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 331 ; SSSE3-NEXT: retq 332 ; 333 ; SSE41-LABEL: combine_bitwise_ops_test2b: 334 ; SSE41: # BB#0: 335 ; SSE41-NEXT: por %xmm1, %xmm0 336 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 337 ; SSE41-NEXT: retq 338 ; 339 ; AVX1-LABEL: combine_bitwise_ops_test2b: 340 ; AVX1: # BB#0: 341 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 342 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 343 ; AVX1-NEXT: retq 344 ; 345 ; AVX2-LABEL: combine_bitwise_ops_test2b: 346 ; AVX2: # BB#0: 347 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 348 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 349 ; AVX2-NEXT: retq 350 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 351 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 352 %or = or <4 x i32> %shuf1, %shuf2 353 ret <4 x i32> %or 354 } 355 356 define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 357 ; SSE2-LABEL: combine_bitwise_ops_test3b: 358 ; SSE2: # BB#0: 359 ; SSE2-NEXT: xorps %xmm1, %xmm0 360 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 361 ; SSE2-NEXT: retq 362 ; 363 ; SSSE3-LABEL: combine_bitwise_ops_test3b: 364 ; SSSE3: # BB#0: 365 ; SSSE3-NEXT: xorps %xmm1, %xmm0 366 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 367 ; SSSE3-NEXT: retq 368 ; 369 ; SSE41-LABEL: combine_bitwise_ops_test3b: 370 ; SSE41: # BB#0: 371 ; SSE41-NEXT: pxor %xmm1, %xmm0 372 ; SSE41-NEXT: pxor %xmm1, %xmm1 373 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 374 ; SSE41-NEXT: retq 375 ; 376 ; AVX1-LABEL: combine_bitwise_ops_test3b: 377 ; AVX1: # BB#0: 378 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 379 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 380 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 381 ; AVX1-NEXT: retq 382 ; 383 ; AVX2-LABEL: combine_bitwise_ops_test3b: 384 ; AVX2: # BB#0: 385 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 386 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 387 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 388 ; AVX2-NEXT: retq 389 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 390 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7> 391 %xor = xor <4 x i32> %shuf1, %shuf2 392 ret <4 x i32> %xor 393 } 394 395 define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 396 ; SSE2-LABEL: combine_bitwise_ops_test4b: 397 ; SSE2: # BB#0: 398 ; SSE2-NEXT: pand %xmm1, %xmm0 399 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 400 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 401 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402 ; SSE2-NEXT: retq 403 ; 404 ; SSSE3-LABEL: combine_bitwise_ops_test4b: 405 ; SSSE3: # BB#0: 406 ; SSSE3-NEXT: pand %xmm1, %xmm0 407 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 408 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 409 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 410 ; SSSE3-NEXT: retq 411 ; 412 ; SSE41-LABEL: combine_bitwise_ops_test4b: 413 ; SSE41: # BB#0: 414 ; SSE41-NEXT: pand %xmm1, %xmm0 415 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 416 ; SSE41-NEXT: retq 417 ; 418 ; AVX1-LABEL: combine_bitwise_ops_test4b: 419 ; AVX1: # BB#0: 420 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 421 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 422 ; AVX1-NEXT: retq 423 ; 424 ; AVX2-LABEL: combine_bitwise_ops_test4b: 425 ; AVX2: # BB#0: 426 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 427 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 428 ; AVX2-NEXT: retq 429 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 430 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 431 %and = and <4 x i32> %shuf1, %shuf2 432 ret <4 x i32> %and 433 } 434 435 define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 436 ; SSE2-LABEL: combine_bitwise_ops_test5b: 437 ; SSE2: # BB#0: 438 ; SSE2-NEXT: por %xmm1, %xmm0 439 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 440 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 441 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 442 ; SSE2-NEXT: retq 443 ; 444 ; SSSE3-LABEL: combine_bitwise_ops_test5b: 445 ; SSSE3: # BB#0: 446 ; SSSE3-NEXT: por %xmm1, %xmm0 447 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 448 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 449 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 450 ; SSSE3-NEXT: retq 451 ; 452 ; SSE41-LABEL: combine_bitwise_ops_test5b: 453 ; SSE41: # BB#0: 454 ; SSE41-NEXT: por %xmm1, %xmm0 455 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 456 ; SSE41-NEXT: retq 457 ; 458 ; AVX1-LABEL: combine_bitwise_ops_test5b: 459 ; AVX1: # BB#0: 460 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 461 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 462 ; AVX1-NEXT: retq 463 ; 464 ; AVX2-LABEL: combine_bitwise_ops_test5b: 465 ; AVX2: # BB#0: 466 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 467 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 468 ; AVX2-NEXT: retq 469 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 470 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 471 %or = or <4 x i32> %shuf1, %shuf2 472 ret <4 x i32> %or 473 } 474 475 define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 476 ; SSE2-LABEL: combine_bitwise_ops_test6b: 477 ; SSE2: # BB#0: 478 ; SSE2-NEXT: xorps %xmm1, %xmm0 479 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 480 ; SSE2-NEXT: retq 481 ; 482 ; SSSE3-LABEL: combine_bitwise_ops_test6b: 483 ; SSSE3: # BB#0: 484 ; SSSE3-NEXT: xorps %xmm1, %xmm0 485 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 486 ; SSSE3-NEXT: retq 487 ; 488 ; SSE41-LABEL: combine_bitwise_ops_test6b: 489 ; SSE41: # BB#0: 490 ; SSE41-NEXT: pxor %xmm1, %xmm0 491 ; SSE41-NEXT: pxor %xmm1, %xmm1 492 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 493 ; SSE41-NEXT: retq 494 ; 495 ; AVX1-LABEL: combine_bitwise_ops_test6b: 496 ; AVX1: # BB#0: 497 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 498 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 499 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 500 ; AVX1-NEXT: retq 501 ; 502 ; AVX2-LABEL: combine_bitwise_ops_test6b: 503 ; AVX2: # BB#0: 504 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 505 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 506 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 507 ; AVX2-NEXT: retq 508 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7> 509 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7> 510 %xor = xor <4 x i32> %shuf1, %shuf2 511 ret <4 x i32> %xor 512 } 513 514 define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 515 ; SSE2-LABEL: combine_bitwise_ops_test1c: 516 ; SSE2: # BB#0: 517 ; SSE2-NEXT: pand %xmm1, %xmm0 518 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 519 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 520 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 521 ; SSE2-NEXT: retq 522 ; 523 ; SSSE3-LABEL: combine_bitwise_ops_test1c: 524 ; SSSE3: # BB#0: 525 ; SSSE3-NEXT: pand %xmm1, %xmm0 526 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 527 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 528 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 529 ; SSSE3-NEXT: retq 530 ; 531 ; SSE41-LABEL: combine_bitwise_ops_test1c: 532 ; SSE41: # BB#0: 533 ; SSE41-NEXT: pand %xmm1, %xmm0 534 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 535 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 536 ; SSE41-NEXT: retq 537 ; 538 ; AVX1-LABEL: combine_bitwise_ops_test1c: 539 ; AVX1: # BB#0: 540 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 541 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 542 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 543 ; AVX1-NEXT: retq 544 ; 545 ; AVX2-LABEL: combine_bitwise_ops_test1c: 546 ; AVX2: # BB#0: 547 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 548 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 549 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 550 ; AVX2-NEXT: retq 551 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 552 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 553 %and = and <4 x i32> %shuf1, %shuf2 554 ret <4 x i32> %and 555 } 556 557 define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 558 ; SSE2-LABEL: combine_bitwise_ops_test2c: 559 ; SSE2: # BB#0: 560 ; SSE2-NEXT: por %xmm1, %xmm0 561 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 562 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 563 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 564 ; SSE2-NEXT: retq 565 ; 566 ; SSSE3-LABEL: combine_bitwise_ops_test2c: 567 ; SSSE3: # BB#0: 568 ; SSSE3-NEXT: por %xmm1, %xmm0 569 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 570 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] 571 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 572 ; SSSE3-NEXT: retq 573 ; 574 ; SSE41-LABEL: combine_bitwise_ops_test2c: 575 ; SSE41: # BB#0: 576 ; SSE41-NEXT: por %xmm1, %xmm0 577 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 578 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 579 ; SSE41-NEXT: retq 580 ; 581 ; AVX1-LABEL: combine_bitwise_ops_test2c: 582 ; AVX1: # BB#0: 583 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 584 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 585 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 586 ; AVX1-NEXT: retq 587 ; 588 ; AVX2-LABEL: combine_bitwise_ops_test2c: 589 ; AVX2: # BB#0: 590 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 591 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 592 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 593 ; AVX2-NEXT: retq 594 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 595 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 596 %or = or <4 x i32> %shuf1, %shuf2 597 ret <4 x i32> %or 598 } 599 600 define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 601 ; SSE2-LABEL: combine_bitwise_ops_test3c: 602 ; SSE2: # BB#0: 603 ; SSE2-NEXT: pxor %xmm1, %xmm0 604 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 605 ; SSE2-NEXT: pxor %xmm1, %xmm1 606 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 607 ; SSE2-NEXT: retq 608 ; 609 ; SSSE3-LABEL: combine_bitwise_ops_test3c: 610 ; SSSE3: # BB#0: 611 ; SSSE3-NEXT: pxor %xmm1, %xmm0 612 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 613 ; SSSE3-NEXT: pxor %xmm1, %xmm1 614 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 615 ; SSSE3-NEXT: retq 616 ; 617 ; SSE41-LABEL: combine_bitwise_ops_test3c: 618 ; SSE41: # BB#0: 619 ; SSE41-NEXT: pxor %xmm1, %xmm0 620 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 621 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 622 ; SSE41-NEXT: retq 623 ; 624 ; AVX-LABEL: combine_bitwise_ops_test3c: 625 ; AVX: # BB#0: 626 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 627 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 628 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 629 ; AVX-NEXT: retq 630 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 631 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> 632 %xor = xor <4 x i32> %shuf1, %shuf2 633 ret <4 x i32> %xor 634 } 635 636 define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 637 ; SSE2-LABEL: combine_bitwise_ops_test4c: 638 ; SSE2: # BB#0: 639 ; SSE2-NEXT: pand %xmm1, %xmm0 640 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 641 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 642 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 643 ; SSE2-NEXT: retq 644 ; 645 ; SSSE3-LABEL: combine_bitwise_ops_test4c: 646 ; SSSE3: # BB#0: 647 ; SSSE3-NEXT: pand %xmm1, %xmm0 648 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 649 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 650 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 651 ; SSSE3-NEXT: retq 652 ; 653 ; SSE41-LABEL: combine_bitwise_ops_test4c: 654 ; SSE41: # BB#0: 655 ; SSE41-NEXT: pand %xmm1, %xmm0 656 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 657 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 658 ; SSE41-NEXT: retq 659 ; 660 ; AVX1-LABEL: combine_bitwise_ops_test4c: 661 ; AVX1: # BB#0: 662 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 663 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 664 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 665 ; AVX1-NEXT: retq 666 ; 667 ; AVX2-LABEL: combine_bitwise_ops_test4c: 668 ; AVX2: # BB#0: 669 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 670 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 671 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 672 ; AVX2-NEXT: retq 673 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 674 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 675 %and = and <4 x i32> %shuf1, %shuf2 676 ret <4 x i32> %and 677 } 678 679 define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 680 ; SSE2-LABEL: combine_bitwise_ops_test5c: 681 ; SSE2: # BB#0: 682 ; SSE2-NEXT: por %xmm1, %xmm0 683 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 684 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 685 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 686 ; SSE2-NEXT: retq 687 ; 688 ; SSSE3-LABEL: combine_bitwise_ops_test5c: 689 ; SSSE3: # BB#0: 690 ; SSSE3-NEXT: por %xmm1, %xmm0 691 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 692 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 693 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 694 ; SSSE3-NEXT: retq 695 ; 696 ; SSE41-LABEL: combine_bitwise_ops_test5c: 697 ; SSE41: # BB#0: 698 ; SSE41-NEXT: por %xmm1, %xmm0 699 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 700 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 701 ; SSE41-NEXT: retq 702 ; 703 ; AVX1-LABEL: combine_bitwise_ops_test5c: 704 ; AVX1: # BB#0: 705 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 706 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 707 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 708 ; AVX1-NEXT: retq 709 ; 710 ; AVX2-LABEL: combine_bitwise_ops_test5c: 711 ; AVX2: # BB#0: 712 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 713 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] 714 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 715 ; AVX2-NEXT: retq 716 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 717 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 718 %or = or <4 x i32> %shuf1, %shuf2 719 ret <4 x i32> %or 720 } 721 722 define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 723 ; SSE2-LABEL: combine_bitwise_ops_test6c: 724 ; SSE2: # BB#0: 725 ; SSE2-NEXT: pxor %xmm1, %xmm0 726 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 727 ; SSE2-NEXT: pxor %xmm0, %xmm0 728 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 729 ; SSE2-NEXT: retq 730 ; 731 ; SSSE3-LABEL: combine_bitwise_ops_test6c: 732 ; SSSE3: # BB#0: 733 ; SSSE3-NEXT: pxor %xmm1, %xmm0 734 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 735 ; SSSE3-NEXT: pxor %xmm0, %xmm0 736 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 737 ; SSSE3-NEXT: retq 738 ; 739 ; SSE41-LABEL: combine_bitwise_ops_test6c: 740 ; SSE41: # BB#0: 741 ; SSE41-NEXT: pxor %xmm1, %xmm0 742 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] 743 ; SSE41-NEXT: pxor %xmm0, %xmm0 744 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 745 ; SSE41-NEXT: retq 746 ; 747 ; AVX1-LABEL: combine_bitwise_ops_test6c: 748 ; AVX1: # BB#0: 749 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 750 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 751 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 752 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 753 ; AVX1-NEXT: retq 754 ; 755 ; AVX2-LABEL: combine_bitwise_ops_test6c: 756 ; AVX2: # BB#0: 757 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 758 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 759 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 760 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 761 ; AVX2-NEXT: retq 762 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7> 763 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7> 764 %xor = xor <4 x i32> %shuf1, %shuf2 765 ret <4 x i32> %xor 766 } 767 768 define <4 x i32> @combine_nested_undef_test1(<4 x i32> %A, <4 x i32> %B) { 769 ; SSE-LABEL: combine_nested_undef_test1: 770 ; SSE: # BB#0: 771 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 772 ; SSE-NEXT: retq 773 ; 774 ; AVX-LABEL: combine_nested_undef_test1: 775 ; AVX: # BB#0: 776 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 777 ; AVX-NEXT: retq 778 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1> 779 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 780 ret <4 x i32> %2 781 } 782 783 define <4 x i32> @combine_nested_undef_test2(<4 x i32> %A, <4 x i32> %B) { 784 ; SSE-LABEL: combine_nested_undef_test2: 785 ; SSE: # BB#0: 786 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 787 ; SSE-NEXT: retq 788 ; 789 ; AVX-LABEL: combine_nested_undef_test2: 790 ; AVX: # BB#0: 791 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 792 ; AVX-NEXT: retq 793 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 794 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 795 ret <4 x i32> %2 796 } 797 798 define <4 x i32> @combine_nested_undef_test3(<4 x i32> %A, <4 x i32> %B) { 799 ; SSE-LABEL: combine_nested_undef_test3: 800 ; SSE: # BB#0: 801 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 802 ; SSE-NEXT: retq 803 ; 804 ; AVX-LABEL: combine_nested_undef_test3: 805 ; AVX: # BB#0: 806 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 807 ; AVX-NEXT: retq 808 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 809 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3> 810 ret <4 x i32> %2 811 } 812 813 define <4 x i32> @combine_nested_undef_test4(<4 x i32> %A, <4 x i32> %B) { 814 ; SSE-LABEL: combine_nested_undef_test4: 815 ; SSE: # BB#0: 816 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 817 ; SSE-NEXT: retq 818 ; 819 ; AVX1-LABEL: combine_nested_undef_test4: 820 ; AVX1: # BB#0: 821 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 822 ; AVX1-NEXT: retq 823 ; 824 ; AVX2-LABEL: combine_nested_undef_test4: 825 ; AVX2: # BB#0: 826 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 827 ; AVX2-NEXT: retq 828 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1> 829 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3> 830 ret <4 x i32> %2 831 } 832 833 define <4 x i32> @combine_nested_undef_test5(<4 x i32> %A, <4 x i32> %B) { 834 ; SSE-LABEL: combine_nested_undef_test5: 835 ; SSE: # BB#0: 836 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 837 ; SSE-NEXT: retq 838 ; 839 ; AVX-LABEL: combine_nested_undef_test5: 840 ; AVX: # BB#0: 841 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 842 ; AVX-NEXT: retq 843 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 844 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3> 845 ret <4 x i32> %2 846 } 847 848 define <4 x i32> @combine_nested_undef_test6(<4 x i32> %A, <4 x i32> %B) { 849 ; SSE-LABEL: combine_nested_undef_test6: 850 ; SSE: # BB#0: 851 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 852 ; SSE-NEXT: retq 853 ; 854 ; AVX-LABEL: combine_nested_undef_test6: 855 ; AVX: # BB#0: 856 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 857 ; AVX-NEXT: retq 858 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4> 859 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4> 860 ret <4 x i32> %2 861 } 862 863 define <4 x i32> @combine_nested_undef_test7(<4 x i32> %A, <4 x i32> %B) { 864 ; SSE-LABEL: combine_nested_undef_test7: 865 ; SSE: # BB#0: 866 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 867 ; SSE-NEXT: retq 868 ; 869 ; AVX-LABEL: combine_nested_undef_test7: 870 ; AVX: # BB#0: 871 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 872 ; AVX-NEXT: retq 873 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 874 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 875 ret <4 x i32> %2 876 } 877 878 define <4 x i32> @combine_nested_undef_test8(<4 x i32> %A, <4 x i32> %B) { 879 ; SSE-LABEL: combine_nested_undef_test8: 880 ; SSE: # BB#0: 881 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 882 ; SSE-NEXT: retq 883 ; 884 ; AVX-LABEL: combine_nested_undef_test8: 885 ; AVX: # BB#0: 886 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 887 ; AVX-NEXT: retq 888 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 889 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 890 ret <4 x i32> %2 891 } 892 893 define <4 x i32> @combine_nested_undef_test9(<4 x i32> %A, <4 x i32> %B) { 894 ; SSE-LABEL: combine_nested_undef_test9: 895 ; SSE: # BB#0: 896 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2] 897 ; SSE-NEXT: retq 898 ; 899 ; AVX-LABEL: combine_nested_undef_test9: 900 ; AVX: # BB#0: 901 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2] 902 ; AVX-NEXT: retq 903 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5> 904 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 905 ret <4 x i32> %2 906 } 907 908 define <4 x i32> @combine_nested_undef_test10(<4 x i32> %A, <4 x i32> %B) { 909 ; SSE-LABEL: combine_nested_undef_test10: 910 ; SSE: # BB#0: 911 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3] 912 ; SSE-NEXT: retq 913 ; 914 ; AVX-LABEL: combine_nested_undef_test10: 915 ; AVX: # BB#0: 916 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3] 917 ; AVX-NEXT: retq 918 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5> 919 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 920 ret <4 x i32> %2 921 } 922 923 define <4 x i32> @combine_nested_undef_test11(<4 x i32> %A, <4 x i32> %B) { 924 ; SSE-LABEL: combine_nested_undef_test11: 925 ; SSE: # BB#0: 926 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1] 927 ; SSE-NEXT: retq 928 ; 929 ; AVX-LABEL: combine_nested_undef_test11: 930 ; AVX: # BB#0: 931 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1] 932 ; AVX-NEXT: retq 933 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4> 934 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0> 935 ret <4 x i32> %2 936 } 937 938 define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) { 939 ; SSE-LABEL: combine_nested_undef_test12: 940 ; SSE: # BB#0: 941 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 942 ; SSE-NEXT: retq 943 ; 944 ; AVX1-LABEL: combine_nested_undef_test12: 945 ; AVX1: # BB#0: 946 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 947 ; AVX1-NEXT: retq 948 ; 949 ; AVX2-LABEL: combine_nested_undef_test12: 950 ; AVX2: # BB#0: 951 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 952 ; AVX2-NEXT: retq 953 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4> 954 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4> 955 ret <4 x i32> %2 956 } 957 958 ; The following pair of shuffles is folded into vector %A. 959 define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) { 960 ; ALL-LABEL: combine_nested_undef_test13: 961 ; ALL: # BB#0: 962 ; ALL-NEXT: retq 963 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6> 964 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4> 965 ret <4 x i32> %2 966 } 967 968 ; The following pair of shuffles is folded into vector %B. 969 define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) { 970 ; SSE-LABEL: combine_nested_undef_test14: 971 ; SSE: # BB#0: 972 ; SSE-NEXT: movaps %xmm1, %xmm0 973 ; SSE-NEXT: retq 974 ; 975 ; AVX-LABEL: combine_nested_undef_test14: 976 ; AVX: # BB#0: 977 ; AVX-NEXT: vmovaps %xmm1, %xmm0 978 ; AVX-NEXT: retq 979 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4> 980 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4> 981 ret <4 x i32> %2 982 } 983 984 985 ; Verify that we don't optimize the following cases. We expect more than one shuffle. 986 ; 987 ; FIXME: Many of these already don't make sense, and the rest should stop 988 ; making sense with th enew vector shuffle lowering. Revisit at least testing for 989 ; it. 990 991 define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) { 992 ; SSE2-LABEL: combine_nested_undef_test15: 993 ; SSE2: # BB#0: 994 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 995 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1] 996 ; SSE2-NEXT: movaps %xmm1, %xmm0 997 ; SSE2-NEXT: retq 998 ; 999 ; SSSE3-LABEL: combine_nested_undef_test15: 1000 ; SSSE3: # BB#0: 1001 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1002 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1] 1003 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1004 ; SSSE3-NEXT: retq 1005 ; 1006 ; SSE41-LABEL: combine_nested_undef_test15: 1007 ; SSE41: # BB#0: 1008 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1009 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1010 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1011 ; SSE41-NEXT: retq 1012 ; 1013 ; AVX1-LABEL: combine_nested_undef_test15: 1014 ; AVX1: # BB#0: 1015 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1016 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1017 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1018 ; AVX1-NEXT: retq 1019 ; 1020 ; AVX2-LABEL: combine_nested_undef_test15: 1021 ; AVX2: # BB#0: 1022 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1023 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1024 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1025 ; AVX2-NEXT: retq 1026 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1> 1027 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1028 ret <4 x i32> %2 1029 } 1030 1031 define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) { 1032 ; SSE2-LABEL: combine_nested_undef_test16: 1033 ; SSE2: # BB#0: 1034 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1035 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 1036 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1037 ; SSE2-NEXT: retq 1038 ; 1039 ; SSSE3-LABEL: combine_nested_undef_test16: 1040 ; SSSE3: # BB#0: 1041 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1042 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 1043 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1044 ; SSSE3-NEXT: retq 1045 ; 1046 ; SSE41-LABEL: combine_nested_undef_test16: 1047 ; SSE41: # BB#0: 1048 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1049 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1050 ; SSE41-NEXT: retq 1051 ; 1052 ; AVX1-LABEL: combine_nested_undef_test16: 1053 ; AVX1: # BB#0: 1054 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1055 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1056 ; AVX1-NEXT: retq 1057 ; 1058 ; AVX2-LABEL: combine_nested_undef_test16: 1059 ; AVX2: # BB#0: 1060 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1061 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1062 ; AVX2-NEXT: retq 1063 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1064 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1065 ret <4 x i32> %2 1066 } 1067 1068 define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) { 1069 ; SSE2-LABEL: combine_nested_undef_test17: 1070 ; SSE2: # BB#0: 1071 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0] 1072 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2] 1073 ; SSE2-NEXT: retq 1074 ; 1075 ; SSSE3-LABEL: combine_nested_undef_test17: 1076 ; SSSE3: # BB#0: 1077 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0] 1078 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2] 1079 ; SSSE3-NEXT: retq 1080 ; 1081 ; SSE41-LABEL: combine_nested_undef_test17: 1082 ; SSE41: # BB#0: 1083 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1084 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1085 ; SSE41-NEXT: retq 1086 ; 1087 ; AVX1-LABEL: combine_nested_undef_test17: 1088 ; AVX1: # BB#0: 1089 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1090 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1091 ; AVX1-NEXT: retq 1092 ; 1093 ; AVX2-LABEL: combine_nested_undef_test17: 1094 ; AVX2: # BB#0: 1095 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1096 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] 1097 ; AVX2-NEXT: retq 1098 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1> 1099 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1100 ret <4 x i32> %2 1101 } 1102 1103 define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) { 1104 ; SSE-LABEL: combine_nested_undef_test18: 1105 ; SSE: # BB#0: 1106 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3] 1107 ; SSE-NEXT: retq 1108 ; 1109 ; AVX-LABEL: combine_nested_undef_test18: 1110 ; AVX: # BB#0: 1111 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3] 1112 ; AVX-NEXT: retq 1113 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1114 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3> 1115 ret <4 x i32> %2 1116 } 1117 1118 define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) { 1119 ; SSE2-LABEL: combine_nested_undef_test19: 1120 ; SSE2: # BB#0: 1121 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1122 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0] 1123 ; SSE2-NEXT: retq 1124 ; 1125 ; SSSE3-LABEL: combine_nested_undef_test19: 1126 ; SSSE3: # BB#0: 1127 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1128 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0] 1129 ; SSSE3-NEXT: retq 1130 ; 1131 ; SSE41-LABEL: combine_nested_undef_test19: 1132 ; SSE41: # BB#0: 1133 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1134 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1135 ; SSE41-NEXT: retq 1136 ; 1137 ; AVX1-LABEL: combine_nested_undef_test19: 1138 ; AVX1: # BB#0: 1139 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1140 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1141 ; AVX1-NEXT: retq 1142 ; 1143 ; AVX2-LABEL: combine_nested_undef_test19: 1144 ; AVX2: # BB#0: 1145 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1146 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 1147 ; AVX2-NEXT: retq 1148 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6> 1149 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1150 ret <4 x i32> %2 1151 } 1152 1153 define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) { 1154 ; SSE2-LABEL: combine_nested_undef_test20: 1155 ; SSE2: # BB#0: 1156 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 1157 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1158 ; SSE2-NEXT: movaps %xmm1, %xmm0 1159 ; SSE2-NEXT: retq 1160 ; 1161 ; SSSE3-LABEL: combine_nested_undef_test20: 1162 ; SSSE3: # BB#0: 1163 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 1164 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1165 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1166 ; SSSE3-NEXT: retq 1167 ; 1168 ; SSE41-LABEL: combine_nested_undef_test20: 1169 ; SSE41: # BB#0: 1170 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1171 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1172 ; SSE41-NEXT: retq 1173 ; 1174 ; AVX1-LABEL: combine_nested_undef_test20: 1175 ; AVX1: # BB#0: 1176 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1177 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1178 ; AVX1-NEXT: retq 1179 ; 1180 ; AVX2-LABEL: combine_nested_undef_test20: 1181 ; AVX2: # BB#0: 1182 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1183 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] 1184 ; AVX2-NEXT: retq 1185 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4> 1186 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3> 1187 ret <4 x i32> %2 1188 } 1189 1190 define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) { 1191 ; SSE2-LABEL: combine_nested_undef_test21: 1192 ; SSE2: # BB#0: 1193 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1194 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3] 1195 ; SSE2-NEXT: retq 1196 ; 1197 ; SSSE3-LABEL: combine_nested_undef_test21: 1198 ; SSSE3: # BB#0: 1199 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1200 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3] 1201 ; SSSE3-NEXT: retq 1202 ; 1203 ; SSE41-LABEL: combine_nested_undef_test21: 1204 ; SSE41: # BB#0: 1205 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1206 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1207 ; SSE41-NEXT: retq 1208 ; 1209 ; AVX1-LABEL: combine_nested_undef_test21: 1210 ; AVX1: # BB#0: 1211 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1212 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1213 ; AVX1-NEXT: retq 1214 ; 1215 ; AVX2-LABEL: combine_nested_undef_test21: 1216 ; AVX2: # BB#0: 1217 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1218 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1219 ; AVX2-NEXT: retq 1220 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1> 1221 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3> 1222 ret <4 x i32> %2 1223 } 1224 1225 1226 ; Test that we correctly combine shuffles according to rule 1227 ; shuffle(shuffle(x, y), undef) -> shuffle(y, undef) 1228 1229 define <4 x i32> @combine_nested_undef_test22(<4 x i32> %A, <4 x i32> %B) { 1230 ; SSE-LABEL: combine_nested_undef_test22: 1231 ; SSE: # BB#0: 1232 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3] 1233 ; SSE-NEXT: retq 1234 ; 1235 ; AVX-LABEL: combine_nested_undef_test22: 1236 ; AVX: # BB#0: 1237 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3] 1238 ; AVX-NEXT: retq 1239 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1240 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3> 1241 ret <4 x i32> %2 1242 } 1243 1244 define <4 x i32> @combine_nested_undef_test23(<4 x i32> %A, <4 x i32> %B) { 1245 ; SSE-LABEL: combine_nested_undef_test23: 1246 ; SSE: # BB#0: 1247 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] 1248 ; SSE-NEXT: retq 1249 ; 1250 ; AVX-LABEL: combine_nested_undef_test23: 1251 ; AVX: # BB#0: 1252 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] 1253 ; AVX-NEXT: retq 1254 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1255 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3> 1256 ret <4 x i32> %2 1257 } 1258 1259 define <4 x i32> @combine_nested_undef_test24(<4 x i32> %A, <4 x i32> %B) { 1260 ; SSE-LABEL: combine_nested_undef_test24: 1261 ; SSE: # BB#0: 1262 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3] 1263 ; SSE-NEXT: retq 1264 ; 1265 ; AVX-LABEL: combine_nested_undef_test24: 1266 ; AVX: # BB#0: 1267 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3] 1268 ; AVX-NEXT: retq 1269 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1270 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4> 1271 ret <4 x i32> %2 1272 } 1273 1274 define <4 x i32> @combine_nested_undef_test25(<4 x i32> %A, <4 x i32> %B) { 1275 ; SSE-LABEL: combine_nested_undef_test25: 1276 ; SSE: # BB#0: 1277 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1278 ; SSE-NEXT: retq 1279 ; 1280 ; AVX1-LABEL: combine_nested_undef_test25: 1281 ; AVX1: # BB#0: 1282 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1283 ; AVX1-NEXT: retq 1284 ; 1285 ; AVX2-LABEL: combine_nested_undef_test25: 1286 ; AVX2: # BB#0: 1287 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1288 ; AVX2-NEXT: retq 1289 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4> 1290 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1> 1291 ret <4 x i32> %2 1292 } 1293 1294 define <4 x i32> @combine_nested_undef_test26(<4 x i32> %A, <4 x i32> %B) { 1295 ; SSE-LABEL: combine_nested_undef_test26: 1296 ; SSE: # BB#0: 1297 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1298 ; SSE-NEXT: retq 1299 ; 1300 ; AVX-LABEL: combine_nested_undef_test26: 1301 ; AVX: # BB#0: 1302 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1303 ; AVX-NEXT: retq 1304 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7> 1305 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 1306 ret <4 x i32> %2 1307 } 1308 1309 define <4 x i32> @combine_nested_undef_test27(<4 x i32> %A, <4 x i32> %B) { 1310 ; SSE-LABEL: combine_nested_undef_test27: 1311 ; SSE: # BB#0: 1312 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1313 ; SSE-NEXT: retq 1314 ; 1315 ; AVX1-LABEL: combine_nested_undef_test27: 1316 ; AVX1: # BB#0: 1317 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1318 ; AVX1-NEXT: retq 1319 ; 1320 ; AVX2-LABEL: combine_nested_undef_test27: 1321 ; AVX2: # BB#0: 1322 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1323 ; AVX2-NEXT: retq 1324 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4> 1325 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2> 1326 ret <4 x i32> %2 1327 } 1328 1329 define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) { 1330 ; SSE-LABEL: combine_nested_undef_test28: 1331 ; SSE: # BB#0: 1332 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 1333 ; SSE-NEXT: retq 1334 ; 1335 ; AVX-LABEL: combine_nested_undef_test28: 1336 ; AVX: # BB#0: 1337 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] 1338 ; AVX-NEXT: retq 1339 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 1340 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2> 1341 ret <4 x i32> %2 1342 } 1343 1344 define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) { 1345 ; SSE-LABEL: combine_test1: 1346 ; SSE: # BB#0: 1347 ; SSE-NEXT: movaps %xmm1, %xmm0 1348 ; SSE-NEXT: retq 1349 ; 1350 ; AVX-LABEL: combine_test1: 1351 ; AVX: # BB#0: 1352 ; AVX-NEXT: vmovaps %xmm1, %xmm0 1353 ; AVX-NEXT: retq 1354 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1355 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1356 ret <4 x float> %2 1357 } 1358 1359 define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) { 1360 ; SSE2-LABEL: combine_test2: 1361 ; SSE2: # BB#0: 1362 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1363 ; SSE2-NEXT: movaps %xmm1, %xmm0 1364 ; SSE2-NEXT: retq 1365 ; 1366 ; SSSE3-LABEL: combine_test2: 1367 ; SSSE3: # BB#0: 1368 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1369 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1370 ; SSSE3-NEXT: retq 1371 ; 1372 ; SSE41-LABEL: combine_test2: 1373 ; SSE41: # BB#0: 1374 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1375 ; SSE41-NEXT: retq 1376 ; 1377 ; AVX-LABEL: combine_test2: 1378 ; AVX: # BB#0: 1379 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1380 ; AVX-NEXT: retq 1381 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1382 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1383 ret <4 x float> %2 1384 } 1385 1386 define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) { 1387 ; SSE-LABEL: combine_test3: 1388 ; SSE: # BB#0: 1389 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1390 ; SSE-NEXT: retq 1391 ; 1392 ; AVX-LABEL: combine_test3: 1393 ; AVX: # BB#0: 1394 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1395 ; AVX-NEXT: retq 1396 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1397 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1398 ret <4 x float> %2 1399 } 1400 1401 define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) { 1402 ; SSE-LABEL: combine_test4: 1403 ; SSE: # BB#0: 1404 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 1405 ; SSE-NEXT: movapd %xmm1, %xmm0 1406 ; SSE-NEXT: retq 1407 ; 1408 ; AVX-LABEL: combine_test4: 1409 ; AVX: # BB#0: 1410 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1411 ; AVX-NEXT: retq 1412 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 1413 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1414 ret <4 x float> %2 1415 } 1416 1417 define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) { 1418 ; SSE2-LABEL: combine_test5: 1419 ; SSE2: # BB#0: 1420 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1421 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1422 ; SSE2-NEXT: retq 1423 ; 1424 ; SSSE3-LABEL: combine_test5: 1425 ; SSSE3: # BB#0: 1426 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1427 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1428 ; SSSE3-NEXT: retq 1429 ; 1430 ; SSE41-LABEL: combine_test5: 1431 ; SSE41: # BB#0: 1432 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1433 ; SSE41-NEXT: retq 1434 ; 1435 ; AVX-LABEL: combine_test5: 1436 ; AVX: # BB#0: 1437 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1438 ; AVX-NEXT: retq 1439 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1440 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1441 ret <4 x float> %2 1442 } 1443 1444 define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) { 1445 ; SSE-LABEL: combine_test6: 1446 ; SSE: # BB#0: 1447 ; SSE-NEXT: movaps %xmm1, %xmm0 1448 ; SSE-NEXT: retq 1449 ; 1450 ; AVX-LABEL: combine_test6: 1451 ; AVX: # BB#0: 1452 ; AVX-NEXT: vmovaps %xmm1, %xmm0 1453 ; AVX-NEXT: retq 1454 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1455 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1456 ret <4 x i32> %2 1457 } 1458 1459 define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) { 1460 ; SSE2-LABEL: combine_test7: 1461 ; SSE2: # BB#0: 1462 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1463 ; SSE2-NEXT: movaps %xmm1, %xmm0 1464 ; SSE2-NEXT: retq 1465 ; 1466 ; SSSE3-LABEL: combine_test7: 1467 ; SSSE3: # BB#0: 1468 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1469 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1470 ; SSSE3-NEXT: retq 1471 ; 1472 ; SSE41-LABEL: combine_test7: 1473 ; SSE41: # BB#0: 1474 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1475 ; SSE41-NEXT: retq 1476 ; 1477 ; AVX1-LABEL: combine_test7: 1478 ; AVX1: # BB#0: 1479 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1480 ; AVX1-NEXT: retq 1481 ; 1482 ; AVX2-LABEL: combine_test7: 1483 ; AVX2: # BB#0: 1484 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1485 ; AVX2-NEXT: retq 1486 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1487 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1488 ret <4 x i32> %2 1489 } 1490 1491 define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) { 1492 ; SSE-LABEL: combine_test8: 1493 ; SSE: # BB#0: 1494 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1495 ; SSE-NEXT: retq 1496 ; 1497 ; AVX-LABEL: combine_test8: 1498 ; AVX: # BB#0: 1499 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1500 ; AVX-NEXT: retq 1501 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1502 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1503 ret <4 x i32> %2 1504 } 1505 1506 define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) { 1507 ; SSE-LABEL: combine_test9: 1508 ; SSE: # BB#0: 1509 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 1510 ; SSE-NEXT: movdqa %xmm1, %xmm0 1511 ; SSE-NEXT: retq 1512 ; 1513 ; AVX-LABEL: combine_test9: 1514 ; AVX: # BB#0: 1515 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1516 ; AVX-NEXT: retq 1517 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 1518 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1519 ret <4 x i32> %2 1520 } 1521 1522 define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) { 1523 ; SSE2-LABEL: combine_test10: 1524 ; SSE2: # BB#0: 1525 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1526 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1527 ; SSE2-NEXT: retq 1528 ; 1529 ; SSSE3-LABEL: combine_test10: 1530 ; SSSE3: # BB#0: 1531 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1532 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1533 ; SSSE3-NEXT: retq 1534 ; 1535 ; SSE41-LABEL: combine_test10: 1536 ; SSE41: # BB#0: 1537 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1538 ; SSE41-NEXT: retq 1539 ; 1540 ; AVX1-LABEL: combine_test10: 1541 ; AVX1: # BB#0: 1542 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1543 ; AVX1-NEXT: retq 1544 ; 1545 ; AVX2-LABEL: combine_test10: 1546 ; AVX2: # BB#0: 1547 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1548 ; AVX2-NEXT: retq 1549 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1550 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1551 ret <4 x i32> %2 1552 } 1553 1554 define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) { 1555 ; ALL-LABEL: combine_test11: 1556 ; ALL: # BB#0: 1557 ; ALL-NEXT: retq 1558 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1559 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1560 ret <4 x float> %2 1561 } 1562 1563 define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) { 1564 ; SSE2-LABEL: combine_test12: 1565 ; SSE2: # BB#0: 1566 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1567 ; SSE2-NEXT: movaps %xmm1, %xmm0 1568 ; SSE2-NEXT: retq 1569 ; 1570 ; SSSE3-LABEL: combine_test12: 1571 ; SSSE3: # BB#0: 1572 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1573 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1574 ; SSSE3-NEXT: retq 1575 ; 1576 ; SSE41-LABEL: combine_test12: 1577 ; SSE41: # BB#0: 1578 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1579 ; SSE41-NEXT: retq 1580 ; 1581 ; AVX-LABEL: combine_test12: 1582 ; AVX: # BB#0: 1583 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1584 ; AVX-NEXT: retq 1585 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1586 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1587 ret <4 x float> %2 1588 } 1589 1590 define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) { 1591 ; SSE-LABEL: combine_test13: 1592 ; SSE: # BB#0: 1593 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1594 ; SSE-NEXT: retq 1595 ; 1596 ; AVX-LABEL: combine_test13: 1597 ; AVX: # BB#0: 1598 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1599 ; AVX-NEXT: retq 1600 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1601 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1602 ret <4 x float> %2 1603 } 1604 1605 define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) { 1606 ; SSE-LABEL: combine_test14: 1607 ; SSE: # BB#0: 1608 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1609 ; SSE-NEXT: retq 1610 ; 1611 ; AVX-LABEL: combine_test14: 1612 ; AVX: # BB#0: 1613 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1614 ; AVX-NEXT: retq 1615 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5> 1616 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1617 ret <4 x float> %2 1618 } 1619 1620 define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) { 1621 ; SSE2-LABEL: combine_test15: 1622 ; SSE2: # BB#0: 1623 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1624 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1625 ; SSE2-NEXT: retq 1626 ; 1627 ; SSSE3-LABEL: combine_test15: 1628 ; SSSE3: # BB#0: 1629 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1630 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1631 ; SSSE3-NEXT: retq 1632 ; 1633 ; SSE41-LABEL: combine_test15: 1634 ; SSE41: # BB#0: 1635 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1636 ; SSE41-NEXT: retq 1637 ; 1638 ; AVX-LABEL: combine_test15: 1639 ; AVX: # BB#0: 1640 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1641 ; AVX-NEXT: retq 1642 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1643 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1644 ret <4 x float> %2 1645 } 1646 1647 define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) { 1648 ; ALL-LABEL: combine_test16: 1649 ; ALL: # BB#0: 1650 ; ALL-NEXT: retq 1651 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1652 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1653 ret <4 x i32> %2 1654 } 1655 1656 define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) { 1657 ; SSE2-LABEL: combine_test17: 1658 ; SSE2: # BB#0: 1659 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1660 ; SSE2-NEXT: movaps %xmm1, %xmm0 1661 ; SSE2-NEXT: retq 1662 ; 1663 ; SSSE3-LABEL: combine_test17: 1664 ; SSSE3: # BB#0: 1665 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1666 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1667 ; SSSE3-NEXT: retq 1668 ; 1669 ; SSE41-LABEL: combine_test17: 1670 ; SSE41: # BB#0: 1671 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1672 ; SSE41-NEXT: retq 1673 ; 1674 ; AVX1-LABEL: combine_test17: 1675 ; AVX1: # BB#0: 1676 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1677 ; AVX1-NEXT: retq 1678 ; 1679 ; AVX2-LABEL: combine_test17: 1680 ; AVX2: # BB#0: 1681 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1682 ; AVX2-NEXT: retq 1683 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1684 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1685 ret <4 x i32> %2 1686 } 1687 1688 define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) { 1689 ; SSE-LABEL: combine_test18: 1690 ; SSE: # BB#0: 1691 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1692 ; SSE-NEXT: retq 1693 ; 1694 ; AVX-LABEL: combine_test18: 1695 ; AVX: # BB#0: 1696 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1697 ; AVX-NEXT: retq 1698 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1699 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1700 ret <4 x i32> %2 1701 } 1702 1703 define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) { 1704 ; SSE-LABEL: combine_test19: 1705 ; SSE: # BB#0: 1706 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1707 ; SSE-NEXT: retq 1708 ; 1709 ; AVX-LABEL: combine_test19: 1710 ; AVX: # BB#0: 1711 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1712 ; AVX-NEXT: retq 1713 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5> 1714 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1715 ret <4 x i32> %2 1716 } 1717 1718 define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) { 1719 ; SSE2-LABEL: combine_test20: 1720 ; SSE2: # BB#0: 1721 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1722 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1723 ; SSE2-NEXT: retq 1724 ; 1725 ; SSSE3-LABEL: combine_test20: 1726 ; SSSE3: # BB#0: 1727 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 1728 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1729 ; SSSE3-NEXT: retq 1730 ; 1731 ; SSE41-LABEL: combine_test20: 1732 ; SSE41: # BB#0: 1733 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1734 ; SSE41-NEXT: retq 1735 ; 1736 ; AVX1-LABEL: combine_test20: 1737 ; AVX1: # BB#0: 1738 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 1739 ; AVX1-NEXT: retq 1740 ; 1741 ; AVX2-LABEL: combine_test20: 1742 ; AVX2: # BB#0: 1743 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1744 ; AVX2-NEXT: retq 1745 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1746 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1747 ret <4 x i32> %2 1748 } 1749 1750 define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) { 1751 ; SSE-LABEL: combine_test21: 1752 ; SSE: # BB#0: 1753 ; SSE-NEXT: movdqa %xmm0, %xmm2 1754 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1755 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1756 ; SSE-NEXT: movdqa %xmm2, (%rdi) 1757 ; SSE-NEXT: retq 1758 ; 1759 ; AVX1-LABEL: combine_test21: 1760 ; AVX1: # BB#0: 1761 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1762 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] 1763 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1764 ; AVX1-NEXT: vmovdqa %xmm2, (%rdi) 1765 ; AVX1-NEXT: vzeroupper 1766 ; AVX1-NEXT: retq 1767 ; 1768 ; AVX2-LABEL: combine_test21: 1769 ; AVX2: # BB#0: 1770 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1771 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] 1772 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1773 ; AVX2-NEXT: vmovdqa %xmm2, (%rdi) 1774 ; AVX2-NEXT: vzeroupper 1775 ; AVX2-NEXT: retq 1776 %1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1777 %2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 1778 store <4 x i32> %1, <4 x i32>* %ptr, align 16 1779 ret <4 x i32> %2 1780 } 1781 1782 define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) { 1783 ; SSE-LABEL: combine_test22: 1784 ; SSE: # BB#0: 1785 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1786 ; SSE-NEXT: movhpd (%rsi), %xmm0 1787 ; SSE-NEXT: retq 1788 ; 1789 ; AVX-LABEL: combine_test22: 1790 ; AVX: # BB#0: 1791 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1792 ; AVX-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 1793 ; AVX-NEXT: retq 1794 ; Current AVX2 lowering of this is still awful, not adding a test case. 1795 %1 = load <2 x float>, <2 x float>* %a, align 8 1796 %2 = load <2 x float>, <2 x float>* %b, align 8 1797 %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1798 ret <8 x float> %3 1799 } 1800 1801 ; Check some negative cases. 1802 ; FIXME: Do any of these really make sense? Are they redundant with the above tests? 1803 1804 define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { 1805 ; SSE-LABEL: combine_test1b: 1806 ; SSE: # BB#0: 1807 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] 1808 ; SSE-NEXT: movaps %xmm1, %xmm0 1809 ; SSE-NEXT: retq 1810 ; 1811 ; AVX-LABEL: combine_test1b: 1812 ; AVX: # BB#0: 1813 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0] 1814 ; AVX-NEXT: retq 1815 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1816 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0> 1817 ret <4 x float> %2 1818 } 1819 1820 define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { 1821 ; SSE2-LABEL: combine_test2b: 1822 ; SSE2: # BB#0: 1823 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 1824 ; SSE2-NEXT: movaps %xmm1, %xmm0 1825 ; SSE2-NEXT: retq 1826 ; 1827 ; SSSE3-LABEL: combine_test2b: 1828 ; SSSE3: # BB#0: 1829 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 1830 ; SSSE3-NEXT: retq 1831 ; 1832 ; SSE41-LABEL: combine_test2b: 1833 ; SSE41: # BB#0: 1834 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 1835 ; SSE41-NEXT: retq 1836 ; 1837 ; AVX-LABEL: combine_test2b: 1838 ; AVX: # BB#0: 1839 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 1840 ; AVX-NEXT: retq 1841 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1842 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5> 1843 ret <4 x float> %2 1844 } 1845 1846 define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) { 1847 ; SSE2-LABEL: combine_test3b: 1848 ; SSE2: # BB#0: 1849 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1850 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 1851 ; SSE2-NEXT: retq 1852 ; 1853 ; SSSE3-LABEL: combine_test3b: 1854 ; SSSE3: # BB#0: 1855 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1856 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 1857 ; SSSE3-NEXT: retq 1858 ; 1859 ; SSE41-LABEL: combine_test3b: 1860 ; SSE41: # BB#0: 1861 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1862 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3] 1863 ; SSE41-NEXT: retq 1864 ; 1865 ; AVX-LABEL: combine_test3b: 1866 ; AVX: # BB#0: 1867 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1868 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3] 1869 ; AVX-NEXT: retq 1870 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3> 1871 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7> 1872 ret <4 x float> %2 1873 } 1874 1875 define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { 1876 ; SSE-LABEL: combine_test4b: 1877 ; SSE: # BB#0: 1878 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 1879 ; SSE-NEXT: movaps %xmm1, %xmm0 1880 ; SSE-NEXT: retq 1881 ; 1882 ; AVX-LABEL: combine_test4b: 1883 ; AVX: # BB#0: 1884 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3] 1885 ; AVX-NEXT: retq 1886 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1887 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7> 1888 ret <4 x float> %2 1889 } 1890 1891 1892 ; Verify that we correctly fold shuffles even when we use illegal vector types. 1893 1894 define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) { 1895 ; SSE2-LABEL: combine_test1c: 1896 ; SSE2: # BB#0: 1897 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1898 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1899 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1900 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1901 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1902 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1903 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1904 ; SSE2-NEXT: retq 1905 ; 1906 ; SSSE3-LABEL: combine_test1c: 1907 ; SSSE3: # BB#0: 1908 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1909 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1910 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1911 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1912 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1913 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1914 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1915 ; SSSE3-NEXT: retq 1916 ; 1917 ; SSE41-LABEL: combine_test1c: 1918 ; SSE41: # BB#0: 1919 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1920 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1921 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 1922 ; SSE41-NEXT: retq 1923 ; 1924 ; AVX1-LABEL: combine_test1c: 1925 ; AVX1: # BB#0: 1926 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1927 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1929 ; AVX1-NEXT: retq 1930 ; 1931 ; AVX2-LABEL: combine_test1c: 1932 ; AVX2: # BB#0: 1933 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1934 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1935 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1936 ; AVX2-NEXT: retq 1937 %A = load <4 x i8>, <4 x i8>* %a 1938 %B = load <4 x i8>, <4 x i8>* %b 1939 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1940 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1941 ret <4 x i8> %2 1942 } 1943 1944 define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) { 1945 ; SSE2-LABEL: combine_test2c: 1946 ; SSE2: # BB#0: 1947 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1948 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1949 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1950 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1951 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1952 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1953 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1954 ; SSE2-NEXT: retq 1955 ; 1956 ; SSSE3-LABEL: combine_test2c: 1957 ; SSSE3: # BB#0: 1958 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1959 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1960 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1961 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1962 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1963 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1964 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1965 ; SSSE3-NEXT: retq 1966 ; 1967 ; SSE41-LABEL: combine_test2c: 1968 ; SSE41: # BB#0: 1969 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1970 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1971 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1972 ; SSE41-NEXT: retq 1973 ; 1974 ; AVX-LABEL: combine_test2c: 1975 ; AVX: # BB#0: 1976 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1977 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1978 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1979 ; AVX-NEXT: retq 1980 %A = load <4 x i8>, <4 x i8>* %a 1981 %B = load <4 x i8>, <4 x i8>* %b 1982 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5> 1983 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 1984 ret <4 x i8> %2 1985 } 1986 1987 define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) { 1988 ; SSE2-LABEL: combine_test3c: 1989 ; SSE2: # BB#0: 1990 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1991 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1992 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1993 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1994 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1995 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1996 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1997 ; SSE2-NEXT: retq 1998 ; 1999 ; SSSE3-LABEL: combine_test3c: 2000 ; SSSE3: # BB#0: 2001 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2002 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2003 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2004 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2005 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2006 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2007 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2008 ; SSSE3-NEXT: retq 2009 ; 2010 ; SSE41-LABEL: combine_test3c: 2011 ; SSE41: # BB#0: 2012 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2013 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2014 ; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2015 ; SSE41-NEXT: retq 2016 ; 2017 ; AVX-LABEL: combine_test3c: 2018 ; AVX: # BB#0: 2019 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2020 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2021 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2022 ; AVX-NEXT: retq 2023 %A = load <4 x i8>, <4 x i8>* %a 2024 %B = load <4 x i8>, <4 x i8>* %b 2025 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2026 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2027 ret <4 x i8> %2 2028 } 2029 2030 define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) { 2031 ; SSE2-LABEL: combine_test4c: 2032 ; SSE2: # BB#0: 2033 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2034 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2035 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2036 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2037 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2038 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2039 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 2040 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 2041 ; SSE2-NEXT: retq 2042 ; 2043 ; SSSE3-LABEL: combine_test4c: 2044 ; SSSE3: # BB#0: 2045 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2046 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2047 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2048 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2049 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2050 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2051 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0] 2052 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 2053 ; SSSE3-NEXT: retq 2054 ; 2055 ; SSE41-LABEL: combine_test4c: 2056 ; SSE41: # BB#0: 2057 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2058 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2059 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2060 ; SSE41-NEXT: retq 2061 ; 2062 ; AVX1-LABEL: combine_test4c: 2063 ; AVX1: # BB#0: 2064 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2065 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2066 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] 2067 ; AVX1-NEXT: retq 2068 ; 2069 ; AVX2-LABEL: combine_test4c: 2070 ; AVX2: # BB#0: 2071 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2072 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 2073 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 2074 ; AVX2-NEXT: retq 2075 %A = load <4 x i8>, <4 x i8>* %a 2076 %B = load <4 x i8>, <4 x i8>* %b 2077 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 2078 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 2079 ret <4 x i8> %2 2080 } 2081 2082 2083 ; The following test cases are generated from this C++ code 2084 ; 2085 ;__m128 blend_01(__m128 a, __m128 b) 2086 ;{ 2087 ; __m128 s = a; 2088 ; s = _mm_blend_ps( s, b, 1<<0 ); 2089 ; s = _mm_blend_ps( s, b, 1<<1 ); 2090 ; return s; 2091 ;} 2092 ; 2093 ;__m128 blend_02(__m128 a, __m128 b) 2094 ;{ 2095 ; __m128 s = a; 2096 ; s = _mm_blend_ps( s, b, 1<<0 ); 2097 ; s = _mm_blend_ps( s, b, 1<<2 ); 2098 ; return s; 2099 ;} 2100 ; 2101 ;__m128 blend_123(__m128 a, __m128 b) 2102 ;{ 2103 ; __m128 s = a; 2104 ; s = _mm_blend_ps( s, b, 1<<1 ); 2105 ; s = _mm_blend_ps( s, b, 1<<2 ); 2106 ; s = _mm_blend_ps( s, b, 1<<3 ); 2107 ; return s; 2108 ;} 2109 2110 ; Ideally, we should collapse the following shuffles into a single one. 2111 2112 define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) { 2113 ; SSE2-LABEL: combine_blend_01: 2114 ; SSE2: # BB#0: 2115 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2116 ; SSE2-NEXT: retq 2117 ; 2118 ; SSSE3-LABEL: combine_blend_01: 2119 ; SSSE3: # BB#0: 2120 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2121 ; SSSE3-NEXT: retq 2122 ; 2123 ; SSE41-LABEL: combine_blend_01: 2124 ; SSE41: # BB#0: 2125 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2126 ; SSE41-NEXT: retq 2127 ; 2128 ; AVX-LABEL: combine_blend_01: 2129 ; AVX: # BB#0: 2130 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2131 ; AVX-NEXT: retq 2132 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 2133 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 2134 ret <4 x float> %shuffle6 2135 } 2136 2137 define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) { 2138 ; SSE2-LABEL: combine_blend_02: 2139 ; SSE2: # BB#0: 2140 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] 2141 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] 2142 ; SSE2-NEXT: movaps %xmm1, %xmm0 2143 ; SSE2-NEXT: retq 2144 ; 2145 ; SSSE3-LABEL: combine_blend_02: 2146 ; SSSE3: # BB#0: 2147 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] 2148 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] 2149 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2150 ; SSSE3-NEXT: retq 2151 ; 2152 ; SSE41-LABEL: combine_blend_02: 2153 ; SSE41: # BB#0: 2154 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 2155 ; SSE41-NEXT: retq 2156 ; 2157 ; AVX-LABEL: combine_blend_02: 2158 ; AVX: # BB#0: 2159 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 2160 ; AVX-NEXT: retq 2161 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3> 2162 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 2163 ret <4 x float> %shuffle6 2164 } 2165 2166 define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) { 2167 ; SSE2-LABEL: combine_blend_123: 2168 ; SSE2: # BB#0: 2169 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2170 ; SSE2-NEXT: movaps %xmm1, %xmm0 2171 ; SSE2-NEXT: retq 2172 ; 2173 ; SSSE3-LABEL: combine_blend_123: 2174 ; SSSE3: # BB#0: 2175 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2176 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2177 ; SSSE3-NEXT: retq 2178 ; 2179 ; SSE41-LABEL: combine_blend_123: 2180 ; SSE41: # BB#0: 2181 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2182 ; SSE41-NEXT: retq 2183 ; 2184 ; AVX-LABEL: combine_blend_123: 2185 ; AVX: # BB#0: 2186 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2187 ; AVX-NEXT: retq 2188 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> 2189 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 2190 %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 2191 ret <4 x float> %shuffle12 2192 } 2193 2194 define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) { 2195 ; SSE-LABEL: combine_test_movhl_1: 2196 ; SSE: # BB#0: 2197 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2198 ; SSE-NEXT: movdqa %xmm1, %xmm0 2199 ; SSE-NEXT: retq 2200 ; 2201 ; AVX-LABEL: combine_test_movhl_1: 2202 ; AVX: # BB#0: 2203 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2204 ; AVX-NEXT: retq 2205 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3> 2206 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3> 2207 ret <4 x i32> %2 2208 } 2209 2210 define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) { 2211 ; SSE-LABEL: combine_test_movhl_2: 2212 ; SSE: # BB#0: 2213 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2214 ; SSE-NEXT: movdqa %xmm1, %xmm0 2215 ; SSE-NEXT: retq 2216 ; 2217 ; AVX-LABEL: combine_test_movhl_2: 2218 ; AVX: # BB#0: 2219 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2220 ; AVX-NEXT: retq 2221 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6> 2222 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2> 2223 ret <4 x i32> %2 2224 } 2225 2226 define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) { 2227 ; SSE-LABEL: combine_test_movhl_3: 2228 ; SSE: # BB#0: 2229 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2230 ; SSE-NEXT: movdqa %xmm1, %xmm0 2231 ; SSE-NEXT: retq 2232 ; 2233 ; AVX-LABEL: combine_test_movhl_3: 2234 ; AVX: # BB#0: 2235 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2236 ; AVX-NEXT: retq 2237 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2> 2238 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2> 2239 ret <4 x i32> %2 2240 } 2241 2242 2243 ; Verify that we fold shuffles according to rule: 2244 ; (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2) 2245 2246 define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) { 2247 ; SSE2-LABEL: combine_undef_input_test1: 2248 ; SSE2: # BB#0: 2249 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2250 ; SSE2-NEXT: retq 2251 ; 2252 ; SSSE3-LABEL: combine_undef_input_test1: 2253 ; SSSE3: # BB#0: 2254 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2255 ; SSSE3-NEXT: retq 2256 ; 2257 ; SSE41-LABEL: combine_undef_input_test1: 2258 ; SSE41: # BB#0: 2259 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2260 ; SSE41-NEXT: retq 2261 ; 2262 ; AVX-LABEL: combine_undef_input_test1: 2263 ; AVX: # BB#0: 2264 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2265 ; AVX-NEXT: retq 2266 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2267 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2> 2268 ret <4 x float> %2 2269 } 2270 2271 define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) { 2272 ; SSE-LABEL: combine_undef_input_test2: 2273 ; SSE: # BB#0: 2274 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2275 ; SSE-NEXT: retq 2276 ; 2277 ; AVX-LABEL: combine_undef_input_test2: 2278 ; AVX: # BB#0: 2279 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2280 ; AVX-NEXT: retq 2281 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2282 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 2283 ret <4 x float> %2 2284 } 2285 2286 define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) { 2287 ; SSE-LABEL: combine_undef_input_test3: 2288 ; SSE: # BB#0: 2289 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2290 ; SSE-NEXT: retq 2291 ; 2292 ; AVX-LABEL: combine_undef_input_test3: 2293 ; AVX: # BB#0: 2294 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2295 ; AVX-NEXT: retq 2296 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2297 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 2298 ret <4 x float> %2 2299 } 2300 2301 define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) { 2302 ; SSE-LABEL: combine_undef_input_test4: 2303 ; SSE: # BB#0: 2304 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2305 ; SSE-NEXT: movapd %xmm1, %xmm0 2306 ; SSE-NEXT: retq 2307 ; 2308 ; AVX-LABEL: combine_undef_input_test4: 2309 ; AVX: # BB#0: 2310 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2311 ; AVX-NEXT: retq 2312 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2313 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2314 ret <4 x float> %2 2315 } 2316 2317 define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) { 2318 ; SSE2-LABEL: combine_undef_input_test5: 2319 ; SSE2: # BB#0: 2320 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2321 ; SSE2-NEXT: movapd %xmm1, %xmm0 2322 ; SSE2-NEXT: retq 2323 ; 2324 ; SSSE3-LABEL: combine_undef_input_test5: 2325 ; SSSE3: # BB#0: 2326 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2327 ; SSSE3-NEXT: movapd %xmm1, %xmm0 2328 ; SSSE3-NEXT: retq 2329 ; 2330 ; SSE41-LABEL: combine_undef_input_test5: 2331 ; SSE41: # BB#0: 2332 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2333 ; SSE41-NEXT: retq 2334 ; 2335 ; AVX-LABEL: combine_undef_input_test5: 2336 ; AVX: # BB#0: 2337 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2338 ; AVX-NEXT: retq 2339 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2340 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7> 2341 ret <4 x float> %2 2342 } 2343 2344 2345 ; Verify that we fold shuffles according to rule: 2346 ; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2) 2347 2348 define <4 x float> @combine_undef_input_test6(<4 x float> %a) { 2349 ; ALL-LABEL: combine_undef_input_test6: 2350 ; ALL: # BB#0: 2351 ; ALL-NEXT: retq 2352 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2353 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2> 2354 ret <4 x float> %2 2355 } 2356 2357 define <4 x float> @combine_undef_input_test7(<4 x float> %a) { 2358 ; SSE2-LABEL: combine_undef_input_test7: 2359 ; SSE2: # BB#0: 2360 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2361 ; SSE2-NEXT: retq 2362 ; 2363 ; SSSE3-LABEL: combine_undef_input_test7: 2364 ; SSSE3: # BB#0: 2365 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2366 ; SSSE3-NEXT: retq 2367 ; 2368 ; SSE41-LABEL: combine_undef_input_test7: 2369 ; SSE41: # BB#0: 2370 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2371 ; SSE41-NEXT: retq 2372 ; 2373 ; AVX-LABEL: combine_undef_input_test7: 2374 ; AVX: # BB#0: 2375 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2376 ; AVX-NEXT: retq 2377 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2378 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5> 2379 ret <4 x float> %2 2380 } 2381 2382 define <4 x float> @combine_undef_input_test8(<4 x float> %a) { 2383 ; SSE2-LABEL: combine_undef_input_test8: 2384 ; SSE2: # BB#0: 2385 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2386 ; SSE2-NEXT: retq 2387 ; 2388 ; SSSE3-LABEL: combine_undef_input_test8: 2389 ; SSSE3: # BB#0: 2390 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2391 ; SSSE3-NEXT: retq 2392 ; 2393 ; SSE41-LABEL: combine_undef_input_test8: 2394 ; SSE41: # BB#0: 2395 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2396 ; SSE41-NEXT: retq 2397 ; 2398 ; AVX-LABEL: combine_undef_input_test8: 2399 ; AVX: # BB#0: 2400 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2401 ; AVX-NEXT: retq 2402 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2403 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1> 2404 ret <4 x float> %2 2405 } 2406 2407 define <4 x float> @combine_undef_input_test9(<4 x float> %a) { 2408 ; SSE-LABEL: combine_undef_input_test9: 2409 ; SSE: # BB#0: 2410 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 2411 ; SSE-NEXT: retq 2412 ; 2413 ; AVX-LABEL: combine_undef_input_test9: 2414 ; AVX: # BB#0: 2415 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 2416 ; AVX-NEXT: retq 2417 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2418 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 2419 ret <4 x float> %2 2420 } 2421 2422 define <4 x float> @combine_undef_input_test10(<4 x float> %a) { 2423 ; ALL-LABEL: combine_undef_input_test10: 2424 ; ALL: # BB#0: 2425 ; ALL-NEXT: retq 2426 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2427 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7> 2428 ret <4 x float> %2 2429 } 2430 2431 define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) { 2432 ; SSE2-LABEL: combine_undef_input_test11: 2433 ; SSE2: # BB#0: 2434 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2435 ; SSE2-NEXT: retq 2436 ; 2437 ; SSSE3-LABEL: combine_undef_input_test11: 2438 ; SSSE3: # BB#0: 2439 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2440 ; SSSE3-NEXT: retq 2441 ; 2442 ; SSE41-LABEL: combine_undef_input_test11: 2443 ; SSE41: # BB#0: 2444 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2445 ; SSE41-NEXT: retq 2446 ; 2447 ; AVX-LABEL: combine_undef_input_test11: 2448 ; AVX: # BB#0: 2449 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2450 ; AVX-NEXT: retq 2451 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2452 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6> 2453 ret <4 x float> %2 2454 } 2455 2456 define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) { 2457 ; SSE-LABEL: combine_undef_input_test12: 2458 ; SSE: # BB#0: 2459 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2460 ; SSE-NEXT: retq 2461 ; 2462 ; AVX-LABEL: combine_undef_input_test12: 2463 ; AVX: # BB#0: 2464 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2465 ; AVX-NEXT: retq 2466 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2467 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> 2468 ret <4 x float> %2 2469 } 2470 2471 define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) { 2472 ; SSE-LABEL: combine_undef_input_test13: 2473 ; SSE: # BB#0: 2474 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2475 ; SSE-NEXT: retq 2476 ; 2477 ; AVX-LABEL: combine_undef_input_test13: 2478 ; AVX: # BB#0: 2479 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2480 ; AVX-NEXT: retq 2481 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2482 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5> 2483 ret <4 x float> %2 2484 } 2485 2486 define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) { 2487 ; SSE-LABEL: combine_undef_input_test14: 2488 ; SSE: # BB#0: 2489 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 2490 ; SSE-NEXT: movapd %xmm1, %xmm0 2491 ; SSE-NEXT: retq 2492 ; 2493 ; AVX-LABEL: combine_undef_input_test14: 2494 ; AVX: # BB#0: 2495 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 2496 ; AVX-NEXT: retq 2497 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2498 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2499 ret <4 x float> %2 2500 } 2501 2502 define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) { 2503 ; SSE2-LABEL: combine_undef_input_test15: 2504 ; SSE2: # BB#0: 2505 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2506 ; SSE2-NEXT: movapd %xmm1, %xmm0 2507 ; SSE2-NEXT: retq 2508 ; 2509 ; SSSE3-LABEL: combine_undef_input_test15: 2510 ; SSSE3: # BB#0: 2511 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2512 ; SSSE3-NEXT: movapd %xmm1, %xmm0 2513 ; SSSE3-NEXT: retq 2514 ; 2515 ; SSE41-LABEL: combine_undef_input_test15: 2516 ; SSE41: # BB#0: 2517 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2518 ; SSE41-NEXT: retq 2519 ; 2520 ; AVX-LABEL: combine_undef_input_test15: 2521 ; AVX: # BB#0: 2522 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 2523 ; AVX-NEXT: retq 2524 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2525 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2526 ret <4 x float> %2 2527 } 2528 2529 2530 ; Verify that shuffles are canonicalized according to rules: 2531 ; shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 2532 ; 2533 ; This allows to trigger the following combine rule: 2534 ; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2) 2535 ; 2536 ; As a result, all the shuffle pairs in each function below should be 2537 ; combined into a single legal shuffle operation. 2538 2539 define <4 x float> @combine_undef_input_test16(<4 x float> %a) { 2540 ; ALL-LABEL: combine_undef_input_test16: 2541 ; ALL: # BB#0: 2542 ; ALL-NEXT: retq 2543 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1> 2544 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3> 2545 ret <4 x float> %2 2546 } 2547 2548 define <4 x float> @combine_undef_input_test17(<4 x float> %a) { 2549 ; SSE2-LABEL: combine_undef_input_test17: 2550 ; SSE2: # BB#0: 2551 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2552 ; SSE2-NEXT: retq 2553 ; 2554 ; SSSE3-LABEL: combine_undef_input_test17: 2555 ; SSSE3: # BB#0: 2556 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2557 ; SSSE3-NEXT: retq 2558 ; 2559 ; SSE41-LABEL: combine_undef_input_test17: 2560 ; SSE41: # BB#0: 2561 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2562 ; SSE41-NEXT: retq 2563 ; 2564 ; AVX-LABEL: combine_undef_input_test17: 2565 ; AVX: # BB#0: 2566 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2567 ; AVX-NEXT: retq 2568 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> 2569 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> 2570 ret <4 x float> %2 2571 } 2572 2573 define <4 x float> @combine_undef_input_test18(<4 x float> %a) { 2574 ; SSE2-LABEL: combine_undef_input_test18: 2575 ; SSE2: # BB#0: 2576 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2577 ; SSE2-NEXT: retq 2578 ; 2579 ; SSSE3-LABEL: combine_undef_input_test18: 2580 ; SSSE3: # BB#0: 2581 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2582 ; SSSE3-NEXT: retq 2583 ; 2584 ; SSE41-LABEL: combine_undef_input_test18: 2585 ; SSE41: # BB#0: 2586 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 2587 ; SSE41-NEXT: retq 2588 ; 2589 ; AVX-LABEL: combine_undef_input_test18: 2590 ; AVX: # BB#0: 2591 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2592 ; AVX-NEXT: retq 2593 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 2594 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5> 2595 ret <4 x float> %2 2596 } 2597 2598 define <4 x float> @combine_undef_input_test19(<4 x float> %a) { 2599 ; SSE-LABEL: combine_undef_input_test19: 2600 ; SSE: # BB#0: 2601 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 2602 ; SSE-NEXT: retq 2603 ; 2604 ; AVX-LABEL: combine_undef_input_test19: 2605 ; AVX: # BB#0: 2606 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 2607 ; AVX-NEXT: retq 2608 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5> 2609 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2610 ret <4 x float> %2 2611 } 2612 2613 define <4 x float> @combine_undef_input_test20(<4 x float> %a) { 2614 ; ALL-LABEL: combine_undef_input_test20: 2615 ; ALL: # BB#0: 2616 ; ALL-NEXT: retq 2617 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3> 2618 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2619 ret <4 x float> %2 2620 } 2621 2622 ; These tests are designed to test the ability to combine away unnecessary 2623 ; operations feeding into a shuffle. The AVX cases are the important ones as 2624 ; they leverage operations which cannot be done naturally on the entire vector 2625 ; and thus are decomposed into multiple smaller operations. 2626 2627 define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) { 2628 ; SSE-LABEL: combine_unneeded_subvector1: 2629 ; SSE: # BB#0: 2630 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1 2631 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0] 2632 ; SSE-NEXT: movdqa %xmm0, %xmm1 2633 ; SSE-NEXT: retq 2634 ; 2635 ; AVX1-LABEL: combine_unneeded_subvector1: 2636 ; AVX1: # BB#0: 2637 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2638 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2639 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 2640 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2641 ; AVX1-NEXT: retq 2642 ; 2643 ; AVX2-LABEL: combine_unneeded_subvector1: 2644 ; AVX2: # BB#0: 2645 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 2646 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 2647 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 2648 ; AVX2-NEXT: retq 2649 %b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2650 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 2651 ret <8 x i32> %c 2652 } 2653 2654 define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) { 2655 ; SSE-LABEL: combine_unneeded_subvector2: 2656 ; SSE: # BB#0: 2657 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1 2658 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0] 2659 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 2660 ; SSE-NEXT: retq 2661 ; 2662 ; AVX1-LABEL: combine_unneeded_subvector2: 2663 ; AVX1: # BB#0: 2664 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2665 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2666 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2667 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2668 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2669 ; AVX1-NEXT: retq 2670 ; 2671 ; AVX2-LABEL: combine_unneeded_subvector2: 2672 ; AVX2: # BB#0: 2673 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 2674 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2675 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2676 ; AVX2-NEXT: retq 2677 %c = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2678 %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 2679 ret <8 x i32> %d 2680 } 2681 2682 define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) { 2683 ; SSE2-LABEL: combine_insertps1: 2684 ; SSE2: # BB#0: 2685 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0] 2686 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] 2687 ; SSE2-NEXT: movaps %xmm1, %xmm0 2688 ; SSE2-NEXT: retq 2689 ; 2690 ; SSSE3-LABEL: combine_insertps1: 2691 ; SSSE3: # BB#0: 2692 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0] 2693 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] 2694 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2695 ; SSSE3-NEXT: retq 2696 ; 2697 ; SSE41-LABEL: combine_insertps1: 2698 ; SSE41: # BB#0: 2699 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] 2700 ; SSE41-NEXT: retq 2701 ; 2702 ; AVX-LABEL: combine_insertps1: 2703 ; AVX: # BB#0: 2704 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] 2705 ; AVX-NEXT: retq 2706 2707 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 6, i32 2, i32 4> 2708 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 2709 ret <4 x float> %d 2710 } 2711 2712 define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) { 2713 ; SSE2-LABEL: combine_insertps2: 2714 ; SSE2: # BB#0: 2715 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0] 2716 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 2717 ; SSE2-NEXT: movaps %xmm1, %xmm0 2718 ; SSE2-NEXT: retq 2719 ; 2720 ; SSSE3-LABEL: combine_insertps2: 2721 ; SSSE3: # BB#0: 2722 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0] 2723 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 2724 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2725 ; SSSE3-NEXT: retq 2726 ; 2727 ; SSE41-LABEL: combine_insertps2: 2728 ; SSE41: # BB#0: 2729 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] 2730 ; SSE41-NEXT: retq 2731 ; 2732 ; AVX-LABEL: combine_insertps2: 2733 ; AVX: # BB#0: 2734 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] 2735 ; AVX-NEXT: retq 2736 2737 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 1, i32 6, i32 7> 2738 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 4, i32 6, i32 2, i32 3> 2739 ret <4 x float> %d 2740 } 2741 2742 define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) { 2743 ; SSE2-LABEL: combine_insertps3: 2744 ; SSE2: # BB#0: 2745 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 2746 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 2747 ; SSE2-NEXT: retq 2748 ; 2749 ; SSSE3-LABEL: combine_insertps3: 2750 ; SSSE3: # BB#0: 2751 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 2752 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 2753 ; SSSE3-NEXT: retq 2754 ; 2755 ; SSE41-LABEL: combine_insertps3: 2756 ; SSE41: # BB#0: 2757 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2758 ; SSE41-NEXT: retq 2759 ; 2760 ; AVX-LABEL: combine_insertps3: 2761 ; AVX: # BB#0: 2762 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2763 ; AVX-NEXT: retq 2764 2765 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5> 2766 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 5, i32 3> 2767 ret <4 x float> %d 2768 } 2769 2770 define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) { 2771 ; SSE2-LABEL: combine_insertps4: 2772 ; SSE2: # BB#0: 2773 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 2774 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 2775 ; SSE2-NEXT: retq 2776 ; 2777 ; SSSE3-LABEL: combine_insertps4: 2778 ; SSSE3: # BB#0: 2779 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 2780 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 2781 ; SSSE3-NEXT: retq 2782 ; 2783 ; SSE41-LABEL: combine_insertps4: 2784 ; SSE41: # BB#0: 2785 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 2786 ; SSE41-NEXT: retq 2787 ; 2788 ; AVX-LABEL: combine_insertps4: 2789 ; AVX: # BB#0: 2790 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 2791 ; AVX-NEXT: retq 2792 2793 %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5> 2794 %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 6, i32 5> 2795 ret <4 x float> %d 2796 } 2797 2798 define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) { 2799 ; SSE-LABEL: PR22377: 2800 ; SSE: # BB#0: # %entry 2801 ; SSE-NEXT: movaps %xmm0, %xmm1 2802 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3] 2803 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] 2804 ; SSE-NEXT: addps %xmm0, %xmm1 2805 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2806 ; SSE-NEXT: retq 2807 ; 2808 ; AVX-LABEL: PR22377: 2809 ; AVX: # BB#0: # %entry 2810 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3] 2811 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 2812 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1 2813 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2814 ; AVX-NEXT: retq 2815 entry: 2816 %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 1, i32 3> 2817 %s2 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 2818 %r2 = fadd <4 x float> %s1, %s2 2819 %s3 = shufflevector <4 x float> %s2, <4 x float> %r2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2820 ret <4 x float> %s3 2821 } 2822 2823 define <4 x float> @PR22390(<4 x float> %a, <4 x float> %b) { 2824 ; SSE2-LABEL: PR22390: 2825 ; SSE2: # BB#0: # %entry 2826 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2827 ; SSE2-NEXT: movaps %xmm0, %xmm2 2828 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 2829 ; SSE2-NEXT: addps %xmm0, %xmm2 2830 ; SSE2-NEXT: movaps %xmm2, %xmm0 2831 ; SSE2-NEXT: retq 2832 ; 2833 ; SSSE3-LABEL: PR22390: 2834 ; SSSE3: # BB#0: # %entry 2835 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2836 ; SSSE3-NEXT: movaps %xmm0, %xmm2 2837 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 2838 ; SSSE3-NEXT: addps %xmm0, %xmm2 2839 ; SSSE3-NEXT: movaps %xmm2, %xmm0 2840 ; SSSE3-NEXT: retq 2841 ; 2842 ; SSE41-LABEL: PR22390: 2843 ; SSE41: # BB#0: # %entry 2844 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2845 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] 2846 ; SSE41-NEXT: addps %xmm1, %xmm0 2847 ; SSE41-NEXT: retq 2848 ; 2849 ; AVX-LABEL: PR22390: 2850 ; AVX: # BB#0: # %entry 2851 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,1,2] 2852 ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] 2853 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 2854 ; AVX-NEXT: retq 2855 entry: 2856 %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2> 2857 %s2 = shufflevector <4 x float> %s1, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 2858 %r2 = fadd <4 x float> %s1, %s2 2859 ret <4 x float> %r2 2860 } 2861 2862 define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { 2863 ; SSE2-LABEL: PR22412: 2864 ; SSE2: # BB#0: # %entry 2865 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2866 ; SSE2-NEXT: movapd %xmm2, %xmm0 2867 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] 2868 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] 2869 ; SSE2-NEXT: movaps %xmm3, %xmm1 2870 ; SSE2-NEXT: retq 2871 ; 2872 ; SSSE3-LABEL: PR22412: 2873 ; SSSE3: # BB#0: # %entry 2874 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2875 ; SSSE3-NEXT: movapd %xmm2, %xmm0 2876 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] 2877 ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] 2878 ; SSSE3-NEXT: movaps %xmm3, %xmm1 2879 ; SSSE3-NEXT: retq 2880 ; 2881 ; SSE41-LABEL: PR22412: 2882 ; SSE41: # BB#0: # %entry 2883 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] 2884 ; SSE41-NEXT: movapd %xmm0, %xmm1 2885 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] 2886 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] 2887 ; SSE41-NEXT: movaps %xmm1, %xmm0 2888 ; SSE41-NEXT: movaps %xmm3, %xmm1 2889 ; SSE41-NEXT: retq 2890 ; 2891 ; AVX1-LABEL: PR22412: 2892 ; AVX1: # BB#0: # %entry 2893 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 2894 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2895 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6] 2896 ; AVX1-NEXT: retq 2897 ; 2898 ; AVX2-LABEL: PR22412: 2899 ; AVX2: # BB#0: # %entry 2900 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 2901 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2] 2902 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2903 ; AVX2-NEXT: retq 2904 entry: 2905 %s1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2906 %s2 = shufflevector <8 x float> %s1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2> 2907 ret <8 x float> %s2 2908 } 2909