1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7 ; Verify that we don't emit packed vector shifts instructions if the 8 ; condition used by the vector select is a vector of constants. 9 10 define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { 11 ; SSE2-LABEL: test1: 12 ; SSE2: # %bb.0: 13 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 14 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 15 ; SSE2-NEXT: retq 16 ; 17 ; SSE41-LABEL: test1: 18 ; SSE41: # %bb.0: 19 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 20 ; SSE41-NEXT: retq 21 ; 22 ; AVX-LABEL: test1: 23 ; AVX: # %bb.0: 24 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 25 ; AVX-NEXT: retq 26 %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b 27 ret <4 x float> %1 28 } 29 30 define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { 31 ; SSE2-LABEL: test2: 32 ; SSE2: # %bb.0: 33 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 34 ; SSE2-NEXT: movapd %xmm1, %xmm0 35 ; SSE2-NEXT: retq 36 ; 37 ; SSE41-LABEL: test2: 38 ; SSE41: # %bb.0: 39 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 40 ; SSE41-NEXT: retq 41 ; 42 ; AVX-LABEL: test2: 43 ; AVX: # %bb.0: 44 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 45 ; AVX-NEXT: retq 46 %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 47 ret <4 x float> %1 48 } 49 50 define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { 51 ; SSE2-LABEL: test3: 52 ; SSE2: # %bb.0: 53 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 54 ; SSE2-NEXT: retq 55 ; 56 ; SSE41-LABEL: test3: 57 ; SSE41: # %bb.0: 58 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 59 ; SSE41-NEXT: retq 60 ; 61 ; AVX-LABEL: test3: 62 ; AVX: # %bb.0: 63 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 64 ; AVX-NEXT: retq 65 %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 66 ret <4 x float> %1 67 } 68 69 define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { 70 ; SSE-LABEL: test4: 71 ; SSE: # %bb.0: 72 ; SSE-NEXT: movaps %xmm1, %xmm0 73 ; SSE-NEXT: retq 74 ; 75 ; AVX-LABEL: test4: 76 ; AVX: # %bb.0: 77 ; AVX-NEXT: vmovaps %xmm1, %xmm0 78 ; AVX-NEXT: retq 79 %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 80 ret <4 x float> %1 81 } 82 83 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { 84 ; SSE-LABEL: test5: 85 ; SSE: # %bb.0: 86 ; SSE-NEXT: retq 87 ; 88 ; AVX-LABEL: test5: 89 ; AVX: # %bb.0: 90 ; AVX-NEXT: retq 91 %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 92 ret <4 x float> %1 93 } 94 95 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { 96 ; SSE-LABEL: test6: 97 ; SSE: # %bb.0: 98 ; SSE-NEXT: retq 99 ; 100 ; AVX-LABEL: test6: 101 ; AVX: # %bb.0: 102 ; AVX-NEXT: retq 103 %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a 104 ret <8 x i16> %1 105 } 106 107 define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { 108 ; SSE2-LABEL: test7: 109 ; SSE2: # %bb.0: 110 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 111 ; SSE2-NEXT: movapd %xmm1, %xmm0 112 ; SSE2-NEXT: retq 113 ; 114 ; SSE41-LABEL: test7: 115 ; SSE41: # %bb.0: 116 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 117 ; SSE41-NEXT: retq 118 ; 119 ; AVX-LABEL: test7: 120 ; AVX: # %bb.0: 121 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 122 ; AVX-NEXT: retq 123 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 124 ret <8 x i16> %1 125 } 126 127 define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) { 128 ; SSE2-LABEL: test8: 129 ; SSE2: # %bb.0: 130 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 131 ; SSE2-NEXT: retq 132 ; 133 ; SSE41-LABEL: test8: 134 ; SSE41: # %bb.0: 135 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 136 ; SSE41-NEXT: retq 137 ; 138 ; AVX-LABEL: test8: 139 ; AVX: # %bb.0: 140 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 141 ; AVX-NEXT: retq 142 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 143 ret <8 x i16> %1 144 } 145 146 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { 147 ; SSE-LABEL: test9: 148 ; SSE: # %bb.0: 149 ; SSE-NEXT: movaps %xmm1, %xmm0 150 ; SSE-NEXT: retq 151 ; 152 ; AVX-LABEL: test9: 153 ; AVX: # %bb.0: 154 ; AVX-NEXT: vmovaps %xmm1, %xmm0 155 ; AVX-NEXT: retq 156 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 157 ret <8 x i16> %1 158 } 159 160 define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) { 161 ; SSE-LABEL: test10: 162 ; SSE: # %bb.0: 163 ; SSE-NEXT: retq 164 ; 165 ; AVX-LABEL: test10: 166 ; AVX: # %bb.0: 167 ; AVX-NEXT: retq 168 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 169 ret <8 x i16> %1 170 } 171 172 define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { 173 ; SSE2-LABEL: test11: 174 ; SSE2: # %bb.0: 175 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0] 176 ; SSE2-NEXT: andps %xmm2, %xmm0 177 ; SSE2-NEXT: andnps %xmm1, %xmm2 178 ; SSE2-NEXT: orps %xmm2, %xmm0 179 ; SSE2-NEXT: retq 180 ; 181 ; SSE41-LABEL: test11: 182 ; SSE41: # %bb.0: 183 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 184 ; SSE41-NEXT: retq 185 ; 186 ; AVX-LABEL: test11: 187 ; AVX: # %bb.0: 188 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 189 ; AVX-NEXT: retq 190 %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 191 ret <8 x i16> %1 192 } 193 194 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { 195 ; SSE-LABEL: test12: 196 ; SSE: # %bb.0: 197 ; SSE-NEXT: movaps %xmm1, %xmm0 198 ; SSE-NEXT: retq 199 ; 200 ; AVX-LABEL: test12: 201 ; AVX: # %bb.0: 202 ; AVX-NEXT: vmovaps %xmm1, %xmm0 203 ; AVX-NEXT: retq 204 %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 205 ret <8 x i16> %1 206 } 207 208 define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { 209 ; SSE-LABEL: test13: 210 ; SSE: # %bb.0: 211 ; SSE-NEXT: movaps %xmm1, %xmm0 212 ; SSE-NEXT: retq 213 ; 214 ; AVX-LABEL: test13: 215 ; AVX: # %bb.0: 216 ; AVX-NEXT: vmovaps %xmm1, %xmm0 217 ; AVX-NEXT: retq 218 %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b 219 ret <8 x i16> %1 220 } 221 222 ; Fold (vselect (build_vector AllOnes), N1, N2) -> N1 223 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { 224 ; SSE-LABEL: test14: 225 ; SSE: # %bb.0: 226 ; SSE-NEXT: retq 227 ; 228 ; AVX-LABEL: test14: 229 ; AVX: # %bb.0: 230 ; AVX-NEXT: retq 231 %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b 232 ret <4 x float> %1 233 } 234 235 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { 236 ; SSE-LABEL: test15: 237 ; SSE: # %bb.0: 238 ; SSE-NEXT: retq 239 ; 240 ; AVX-LABEL: test15: 241 ; AVX: # %bb.0: 242 ; AVX-NEXT: retq 243 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 244 ret <8 x i16> %1 245 } 246 247 ; Fold (vselect (build_vector AllZeros), N1, N2) -> N2 248 define <4 x float> @test16(<4 x float> %a, <4 x float> %b) { 249 ; SSE-LABEL: test16: 250 ; SSE: # %bb.0: 251 ; SSE-NEXT: movaps %xmm1, %xmm0 252 ; SSE-NEXT: retq 253 ; 254 ; AVX-LABEL: test16: 255 ; AVX: # %bb.0: 256 ; AVX-NEXT: vmovaps %xmm1, %xmm0 257 ; AVX-NEXT: retq 258 %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b 259 ret <4 x float> %1 260 } 261 262 define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) { 263 ; SSE-LABEL: test17: 264 ; SSE: # %bb.0: 265 ; SSE-NEXT: movaps %xmm1, %xmm0 266 ; SSE-NEXT: retq 267 ; 268 ; AVX-LABEL: test17: 269 ; AVX: # %bb.0: 270 ; AVX-NEXT: vmovaps %xmm1, %xmm0 271 ; AVX-NEXT: retq 272 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 273 ret <8 x i16> %1 274 } 275 276 define <4 x float> @test18(<4 x float> %a, <4 x float> %b) { 277 ; SSE2-LABEL: test18: 278 ; SSE2: # %bb.0: 279 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 280 ; SSE2-NEXT: retq 281 ; 282 ; SSE41-LABEL: test18: 283 ; SSE41: # %bb.0: 284 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 285 ; SSE41-NEXT: retq 286 ; 287 ; AVX-LABEL: test18: 288 ; AVX: # %bb.0: 289 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 290 ; AVX-NEXT: retq 291 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 292 ret <4 x float> %1 293 } 294 295 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 296 ; SSE2-LABEL: test19: 297 ; SSE2: # %bb.0: 298 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 299 ; SSE2-NEXT: retq 300 ; 301 ; SSE41-LABEL: test19: 302 ; SSE41: # %bb.0: 303 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 304 ; SSE41-NEXT: retq 305 ; 306 ; AVX-LABEL: test19: 307 ; AVX: # %bb.0: 308 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 309 ; AVX-NEXT: retq 310 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b 311 ret <4 x i32> %1 312 } 313 314 define <2 x double> @test20(<2 x double> %a, <2 x double> %b) { 315 ; SSE2-LABEL: test20: 316 ; SSE2: # %bb.0: 317 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 318 ; SSE2-NEXT: retq 319 ; 320 ; SSE41-LABEL: test20: 321 ; SSE41: # %bb.0: 322 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 323 ; SSE41-NEXT: retq 324 ; 325 ; AVX-LABEL: test20: 326 ; AVX: # %bb.0: 327 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 328 ; AVX-NEXT: retq 329 %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b 330 ret <2 x double> %1 331 } 332 333 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 334 ; SSE2-LABEL: test21: 335 ; SSE2: # %bb.0: 336 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 337 ; SSE2-NEXT: retq 338 ; 339 ; SSE41-LABEL: test21: 340 ; SSE41: # %bb.0: 341 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 342 ; SSE41-NEXT: retq 343 ; 344 ; AVX-LABEL: test21: 345 ; AVX: # %bb.0: 346 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 347 ; AVX-NEXT: retq 348 %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b 349 ret <2 x i64> %1 350 } 351 352 define <4 x float> @test22(<4 x float> %a, <4 x float> %b) { 353 ; SSE2-LABEL: test22: 354 ; SSE2: # %bb.0: 355 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 356 ; SSE2-NEXT: movaps %xmm1, %xmm0 357 ; SSE2-NEXT: retq 358 ; 359 ; SSE41-LABEL: test22: 360 ; SSE41: # %bb.0: 361 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 362 ; SSE41-NEXT: retq 363 ; 364 ; AVX-LABEL: test22: 365 ; AVX: # %bb.0: 366 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 367 ; AVX-NEXT: retq 368 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 369 ret <4 x float> %1 370 } 371 372 define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) { 373 ; SSE2-LABEL: test23: 374 ; SSE2: # %bb.0: 375 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 376 ; SSE2-NEXT: movaps %xmm1, %xmm0 377 ; SSE2-NEXT: retq 378 ; 379 ; SSE41-LABEL: test23: 380 ; SSE41: # %bb.0: 381 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 382 ; SSE41-NEXT: retq 383 ; 384 ; AVX-LABEL: test23: 385 ; AVX: # %bb.0: 386 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 387 ; AVX-NEXT: retq 388 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b 389 ret <4 x i32> %1 390 } 391 392 define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { 393 ; SSE2-LABEL: test24: 394 ; SSE2: # %bb.0: 395 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 396 ; SSE2-NEXT: movapd %xmm1, %xmm0 397 ; SSE2-NEXT: retq 398 ; 399 ; SSE41-LABEL: test24: 400 ; SSE41: # %bb.0: 401 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 402 ; SSE41-NEXT: retq 403 ; 404 ; AVX-LABEL: test24: 405 ; AVX: # %bb.0: 406 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 407 ; AVX-NEXT: retq 408 %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b 409 ret <2 x double> %1 410 } 411 412 define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { 413 ; SSE2-LABEL: test25: 414 ; SSE2: # %bb.0: 415 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 416 ; SSE2-NEXT: movapd %xmm1, %xmm0 417 ; SSE2-NEXT: retq 418 ; 419 ; SSE41-LABEL: test25: 420 ; SSE41: # %bb.0: 421 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 422 ; SSE41-NEXT: retq 423 ; 424 ; AVX-LABEL: test25: 425 ; AVX: # %bb.0: 426 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 427 ; AVX-NEXT: retq 428 %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b 429 ret <2 x i64> %1 430 } 431 432 define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { 433 ; SSE-LABEL: select_of_shuffles_0: 434 ; SSE: # %bb.0: 435 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 436 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 437 ; SSE-NEXT: subps %xmm1, %xmm0 438 ; SSE-NEXT: retq 439 ; 440 ; AVX-LABEL: select_of_shuffles_0: 441 ; AVX: # %bb.0: 442 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 443 ; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 444 ; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 445 ; AVX-NEXT: retq 446 %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 447 %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 448 %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1 449 %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 450 %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 451 %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4 452 %7 = fsub <4 x float> %3, %6 453 ret <4 x float> %7 454 } 455 456 ; PR20677 457 define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { 458 ; SSE-LABEL: select_illegal: 459 ; SSE: # %bb.0: 460 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 461 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 462 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 463 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 464 ; SSE-NEXT: movaps %xmm7, 112(%rdi) 465 ; SSE-NEXT: movaps %xmm6, 96(%rdi) 466 ; SSE-NEXT: movaps %xmm5, 80(%rdi) 467 ; SSE-NEXT: movaps %xmm4, 64(%rdi) 468 ; SSE-NEXT: movaps %xmm3, 48(%rdi) 469 ; SSE-NEXT: movaps %xmm2, 32(%rdi) 470 ; SSE-NEXT: movaps %xmm1, 16(%rdi) 471 ; SSE-NEXT: movaps %xmm0, (%rdi) 472 ; SSE-NEXT: movq %rdi, %rax 473 ; SSE-NEXT: retq 474 ; 475 ; AVX-LABEL: select_illegal: 476 ; AVX: # %bb.0: 477 ; AVX-NEXT: vmovaps %ymm6, %ymm2 478 ; AVX-NEXT: vmovaps %ymm7, %ymm3 479 ; AVX-NEXT: retq 480 %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b 481 ret <16 x double> %sel 482 } 483 484 ; Make sure we can optimize the condition MSB when it is used by 2 selects. 485 ; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits. 486 ; We should be able to remove the sra from the sign_extend_inreg to leave only shl. 487 define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 488 ; SSE2-LABEL: shrunkblend_2uses: 489 ; SSE2: # %bb.0: 490 ; SSE2-NEXT: psllq $63, %xmm0 491 ; SSE2-NEXT: psrad $31, %xmm0 492 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 493 ; SSE2-NEXT: movdqa %xmm0, %xmm5 494 ; SSE2-NEXT: pandn %xmm2, %xmm5 495 ; SSE2-NEXT: pand %xmm0, %xmm1 496 ; SSE2-NEXT: por %xmm1, %xmm5 497 ; SSE2-NEXT: pand %xmm0, %xmm3 498 ; SSE2-NEXT: pandn %xmm4, %xmm0 499 ; SSE2-NEXT: por %xmm3, %xmm0 500 ; SSE2-NEXT: paddq %xmm5, %xmm0 501 ; SSE2-NEXT: retq 502 ; 503 ; SSE41-LABEL: shrunkblend_2uses: 504 ; SSE41: # %bb.0: 505 ; SSE41-NEXT: psllq $63, %xmm0 506 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 507 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 508 ; SSE41-NEXT: paddq %xmm2, %xmm4 509 ; SSE41-NEXT: movdqa %xmm4, %xmm0 510 ; SSE41-NEXT: retq 511 ; 512 ; AVX-LABEL: shrunkblend_2uses: 513 ; AVX: # %bb.0: 514 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 515 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 516 ; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 517 ; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 518 ; AVX-NEXT: retq 519 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 520 %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d 521 %z = add <2 x i64> %x, %y 522 ret <2 x i64> %z 523 } 524 525 ; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize. 526 define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 527 ; SSE2-LABEL: shrunkblend_nonvselectuse: 528 ; SSE2: # %bb.0: 529 ; SSE2-NEXT: psllq $63, %xmm0 530 ; SSE2-NEXT: psrad $31, %xmm0 531 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 532 ; SSE2-NEXT: movdqa %xmm3, %xmm0 533 ; SSE2-NEXT: pandn %xmm2, %xmm0 534 ; SSE2-NEXT: pand %xmm3, %xmm1 535 ; SSE2-NEXT: por %xmm1, %xmm0 536 ; SSE2-NEXT: paddq %xmm3, %xmm0 537 ; SSE2-NEXT: retq 538 ; 539 ; SSE41-LABEL: shrunkblend_nonvselectuse: 540 ; SSE41: # %bb.0: 541 ; SSE41-NEXT: psllq $63, %xmm0 542 ; SSE41-NEXT: psrad $31, %xmm0 543 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 544 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 545 ; SSE41-NEXT: paddq %xmm2, %xmm0 546 ; SSE41-NEXT: retq 547 ; 548 ; AVX-LABEL: shrunkblend_nonvselectuse: 549 ; AVX: # %bb.0: 550 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 551 ; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 552 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 553 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 554 ; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 555 ; AVX-NEXT: retq 556 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 557 %y = sext <2 x i1> %cond to <2 x i64> 558 %z = add <2 x i64> %x, %y 559 ret <2 x i64> %z 560 } 561 562