1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 3 4 define i32 @or_self(i32 %x) { 5 ; CHECK-LABEL: or_self: 6 ; CHECK: # %bb.0: 7 ; CHECK-NEXT: movl %edi, %eax 8 ; CHECK-NEXT: retq 9 %or = or i32 %x, %x 10 ret i32 %or 11 } 12 13 define <4 x i32> @or_self_vec(<4 x i32> %x) { 14 ; CHECK-LABEL: or_self_vec: 15 ; CHECK: # %bb.0: 16 ; CHECK-NEXT: retq 17 %or = or <4 x i32> %x, %x 18 ret <4 x i32> %or 19 } 20 21 ; Verify that each of the following test cases is folded into a single 22 ; instruction which performs a blend operation. 23 24 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 25 ; CHECK-LABEL: test1: 26 ; CHECK: # %bb.0: 27 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 28 ; CHECK-NEXT: retq 29 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 30 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 31 %or = or <2 x i64> %shuf1, %shuf2 32 ret <2 x i64> %or 33 } 34 35 36 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 37 ; CHECK-LABEL: test2: 38 ; CHECK: # %bb.0: 39 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 40 ; CHECK-NEXT: retq 41 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 42 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 43 %or = or <4 x i32> %shuf1, %shuf2 44 ret <4 x i32> %or 45 } 46 47 48 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 49 ; CHECK-LABEL: test3: 50 ; CHECK: # %bb.0: 51 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 52 ; CHECK-NEXT: retq 53 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 54 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 55 %or = or <2 x i64> %shuf1, %shuf2 56 ret <2 x i64> %or 57 } 58 59 60 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 61 ; CHECK-LABEL: test4: 62 ; CHECK: # %bb.0: 63 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 64 ; CHECK-NEXT: retq 65 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 66 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 67 %or = or <4 x i32> %shuf1, %shuf2 68 ret <4 x i32> %or 69 } 70 71 72 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 73 ; CHECK-LABEL: test5: 74 ; CHECK: # %bb.0: 75 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 76 ; CHECK-NEXT: retq 77 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 78 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 79 %or = or <4 x i32> %shuf1, %shuf2 80 ret <4 x i32> %or 81 } 82 83 84 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 85 ; CHECK-LABEL: test6: 86 ; CHECK: # %bb.0: 87 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 88 ; CHECK-NEXT: retq 89 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 90 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 91 %or = or <4 x i32> %shuf1, %shuf2 92 ret <4 x i32> %or 93 } 94 95 96 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 97 ; CHECK-LABEL: test7: 98 ; CHECK: # %bb.0: 99 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 100 ; CHECK-NEXT: retq 101 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 102 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 103 %or = or <4 x i32> %and1, %and2 104 ret <4 x i32> %or 105 } 106 107 108 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 109 ; CHECK-LABEL: test8: 110 ; CHECK: # %bb.0: 111 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 112 ; CHECK-NEXT: retq 113 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 114 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 115 %or = or <2 x i64> %and1, %and2 116 ret <2 x i64> %or 117 } 118 119 120 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 121 ; CHECK-LABEL: test9: 122 ; CHECK: # %bb.0: 123 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 124 ; CHECK-NEXT: retq 125 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 126 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 127 %or = or <4 x i32> %and1, %and2 128 ret <4 x i32> %or 129 } 130 131 132 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 133 ; CHECK-LABEL: test10: 134 ; CHECK: # %bb.0: 135 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 136 ; CHECK-NEXT: retq 137 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 138 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 139 %or = or <2 x i64> %and1, %and2 140 ret <2 x i64> %or 141 } 142 143 144 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 145 ; CHECK-LABEL: test11: 146 ; CHECK: # %bb.0: 147 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 148 ; CHECK-NEXT: retq 149 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 150 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 151 %or = or <4 x i32> %and1, %and2 152 ret <4 x i32> %or 153 } 154 155 156 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 157 ; CHECK-LABEL: test12: 158 ; CHECK: # %bb.0: 159 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 160 ; CHECK-NEXT: retq 161 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 162 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 163 %or = or <4 x i32> %and1, %and2 164 ret <4 x i32> %or 165 } 166 167 168 ; Verify that the following test cases are folded into single shuffles. 169 170 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 171 ; CHECK-LABEL: test13: 172 ; CHECK: # %bb.0: 173 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 174 ; CHECK-NEXT: retq 175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 177 %or = or <4 x i32> %shuf1, %shuf2 178 ret <4 x i32> %or 179 } 180 181 182 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 183 ; CHECK-LABEL: test14: 184 ; CHECK: # %bb.0: 185 ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 186 ; CHECK-NEXT: retq 187 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 188 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 189 %or = or <2 x i64> %shuf1, %shuf2 190 ret <2 x i64> %or 191 } 192 193 194 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 195 ; CHECK-LABEL: test15: 196 ; CHECK: # %bb.0: 197 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1] 198 ; CHECK-NEXT: movaps %xmm1, %xmm0 199 ; CHECK-NEXT: retq 200 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 201 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 202 %or = or <4 x i32> %shuf1, %shuf2 203 ret <4 x i32> %or 204 } 205 206 207 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 208 ; CHECK-LABEL: test16: 209 ; CHECK: # %bb.0: 210 ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 211 ; CHECK-NEXT: movaps %xmm1, %xmm0 212 ; CHECK-NEXT: retq 213 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 214 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 215 %or = or <2 x i64> %shuf1, %shuf2 216 ret <2 x i64> %or 217 } 218 219 220 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single 221 ; shuffle instruction when the shuffle indexes are not compatible. 222 223 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 224 ; CHECK-LABEL: test17: 225 ; CHECK: # %bb.0: 226 ; CHECK-NEXT: psllq $32, %xmm0 227 ; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero 228 ; CHECK-NEXT: por %xmm1, %xmm0 229 ; CHECK-NEXT: retq 230 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 231 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 232 %or = or <4 x i32> %shuf1, %shuf2 233 ret <4 x i32> %or 234 } 235 236 237 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 238 ; CHECK-LABEL: test18: 239 ; CHECK: # %bb.0: 240 ; CHECK-NEXT: pxor %xmm2, %xmm2 241 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 242 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 243 ; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 244 ; CHECK-NEXT: por %xmm1, %xmm0 245 ; CHECK-NEXT: retq 246 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 247 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 248 %or = or <4 x i32> %shuf1, %shuf2 249 ret <4 x i32> %or 250 } 251 252 253 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 254 ; CHECK-LABEL: test19: 255 ; CHECK: # %bb.0: 256 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3] 257 ; CHECK-NEXT: pxor %xmm3, %xmm3 258 ; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 259 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 260 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7] 261 ; CHECK-NEXT: por %xmm2, %xmm0 262 ; CHECK-NEXT: retq 263 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 264 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 265 %or = or <4 x i32> %shuf1, %shuf2 266 ret <4 x i32> %or 267 } 268 269 270 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 271 ; CHECK-LABEL: test20: 272 ; CHECK: # %bb.0: 273 ; CHECK-NEXT: por %xmm1, %xmm0 274 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 275 ; CHECK-NEXT: retq 276 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 277 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 278 %or = or <2 x i64> %shuf1, %shuf2 279 ret <2 x i64> %or 280 } 281 282 283 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 284 ; CHECK-LABEL: test21: 285 ; CHECK: # %bb.0: 286 ; CHECK-NEXT: por %xmm1, %xmm0 287 ; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 288 ; CHECK-NEXT: retq 289 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 290 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 291 %or = or <2 x i64> %shuf1, %shuf2 292 ret <2 x i64> %or 293 } 294 295 296 ; Verify that the dag-combiner keeps the correct domain for float/double vectors 297 ; bitcast to use the mask-or blend combine. 298 299 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) { 300 ; CHECK-LABEL: test22: 301 ; CHECK: # %bb.0: 302 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 303 ; CHECK-NEXT: retq 304 %bc1 = bitcast <2 x double> %a0 to <2 x i64> 305 %bc2 = bitcast <2 x double> %a1 to <2 x i64> 306 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 307 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 308 %or = or <2 x i64> %and1, %and2 309 %bc3 = bitcast <2 x i64> %or to <2 x double> 310 ret <2 x double> %bc3 311 } 312 313 314 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) { 315 ; CHECK-LABEL: test23: 316 ; CHECK: # %bb.0: 317 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 318 ; CHECK-NEXT: retq 319 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 320 %bc2 = bitcast <4 x float> %a1 to <4 x i32> 321 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 322 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 323 %or = or <4 x i32> %and1, %and2 324 %bc3 = bitcast <4 x i32> %or to <4 x float> 325 ret <4 x float> %bc3 326 } 327 328 329 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) { 330 ; CHECK-LABEL: test24: 331 ; CHECK: # %bb.0: 332 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 333 ; CHECK-NEXT: retq 334 %bc1 = bitcast <4 x float> %a0 to <2 x i64> 335 %bc2 = bitcast <4 x float> %a1 to <2 x i64> 336 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 337 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 338 %or = or <2 x i64> %and1, %and2 339 %bc3 = bitcast <2 x i64> %or to <4 x float> 340 ret <4 x float> %bc3 341 } 342 343 344 define <4 x float> @test25(<4 x float> %a0) { 345 ; CHECK-LABEL: test25: 346 ; CHECK: # %bb.0: 347 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3] 348 ; CHECK-NEXT: retq 349 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 350 %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32> 351 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 352 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 353 %or = or <4 x i32> %and1, %and2 354 %bc3 = bitcast <4 x i32> %or to <4 x float> 355 ret <4 x float> %bc3 356 } 357 358 359 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle 360 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to 361 ; handle legal vector value types. 362 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { 363 ; CHECK-LABEL: test_crash: 364 ; CHECK: # %bb.0: 365 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 366 ; CHECK-NEXT: retq 367 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 368 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 369 %or = or <4 x i8> %shuf1, %shuf2 370 ret <4 x i8> %or 371 } 372 373 ; Verify that we can fold regardless of which operand is the zeroinitializer 374 375 define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { 376 ; CHECK-LABEL: test2b: 377 ; CHECK: # %bb.0: 378 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 379 ; CHECK-NEXT: retq 380 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 381 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 382 %or = or <4 x i32> %shuf1, %shuf2 383 ret <4 x i32> %or 384 } 385 386 define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { 387 ; CHECK-LABEL: test2c: 388 ; CHECK: # %bb.0: 389 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 390 ; CHECK-NEXT: retq 391 %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7> 392 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 393 %or = or <4 x i32> %shuf1, %shuf2 394 ret <4 x i32> %or 395 } 396 397 398 define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) { 399 ; CHECK-LABEL: test2d: 400 ; CHECK: # %bb.0: 401 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 402 ; CHECK-NEXT: retq 403 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 404 %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0> 405 %or = or <4 x i32> %shuf1, %shuf2 406 ret <4 x i32> %or 407 } 408 409 ; Make sure we can have an undef where an index pointing to the zero vector should be 410 411 define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) { 412 ; CHECK-LABEL: test2e: 413 ; CHECK: # %bb.0: 414 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 415 ; CHECK-NEXT: retq 416 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3> 417 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4> 418 %or = or <4 x i32> %shuf1, %shuf2 419 ret <4 x i32> %or 420 } 421 422 define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { 423 ; CHECK-LABEL: test2f: 424 ; CHECK: # %bb.0: 425 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 426 ; CHECK-NEXT: retq 427 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3> 428 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4> 429 %or = or <4 x i32> %shuf1, %shuf2 430 ret <4 x i32> %or 431 } 432 433 ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) iff (c1 & c2) != 0 434 435 define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { 436 ; CHECK-LABEL: or_and_v2i64: 437 ; CHECK: # %bb.0: 438 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 439 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 440 ; CHECK-NEXT: retq 441 %1 = and <2 x i64> %a0, <i64 7, i64 7> 442 %2 = or <2 x i64> %1, <i64 3, i64 3> 443 ret <2 x i64> %2 444 } 445 446 define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { 447 ; CHECK-LABEL: or_and_v4i32: 448 ; CHECK: # %bb.0: 449 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 450 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 451 ; CHECK-NEXT: retq 452 %1 = and <4 x i32> %a0, <i32 1, i32 3, i32 5, i32 7> 453 %2 = or <4 x i32> %1, <i32 3, i32 2, i32 15, i32 2> 454 ret <4 x i32> %2 455 } 456 457 ; If all masked bits are going to be set, that's a constant fold. 458 459 define <4 x i32> @or_and_v4i32_fold(<4 x i32> %a0) { 460 ; CHECK-LABEL: or_and_v4i32_fold: 461 ; CHECK: # %bb.0: 462 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 463 ; CHECK-NEXT: retq 464 %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1> 465 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 466 ret <4 x i32> %2 467 } 468 469 ; fold (or x, c) -> c iff (x & ~c) == 0 470 471 define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { 472 ; CHECK-LABEL: or_zext_v2i32: 473 ; CHECK: # %bb.0: 474 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] 475 ; CHECK-NEXT: retq 476 %1 = zext <2 x i32> %a0 to <2 x i64> 477 %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295> 478 ret <2 x i64> %2 479 } 480 481 define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { 482 ; CHECK-LABEL: or_zext_v4i16: 483 ; CHECK: # %bb.0: 484 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] 485 ; CHECK-NEXT: retq 486 %1 = zext <4 x i16> %a0 to <4 x i32> 487 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 488 ret <4 x i32> %2 489 } 490 491