1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 3 4 5 ; Verify that each of the following test cases is folded into a single 6 ; instruction which performs a blend operation. 7 8 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 9 ; CHECK-LABEL: test1: 10 ; CHECK: # BB#0: 11 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 12 ; CHECK-NEXT: retq 13 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 14 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 15 %or = or <2 x i64> %shuf1, %shuf2 16 ret <2 x i64> %or 17 } 18 19 20 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 21 ; CHECK-LABEL: test2: 22 ; CHECK: # BB#0: 23 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 24 ; CHECK-NEXT: retq 25 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 26 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 27 %or = or <4 x i32> %shuf1, %shuf2 28 ret <4 x i32> %or 29 } 30 31 32 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 33 ; CHECK-LABEL: test3: 34 ; CHECK: # BB#0: 35 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 36 ; CHECK-NEXT: retq 37 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 38 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 39 %or = or <2 x i64> %shuf1, %shuf2 40 ret <2 x i64> %or 41 } 42 43 44 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 45 ; CHECK-LABEL: test4: 46 ; CHECK: # BB#0: 47 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 48 ; CHECK-NEXT: retq 49 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 50 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 51 %or = or <4 x i32> %shuf1, %shuf2 52 ret <4 x i32> %or 53 } 54 55 56 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 57 ; CHECK-LABEL: test5: 58 ; CHECK: # BB#0: 59 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 60 ; CHECK-NEXT: retq 61 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 62 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 63 %or = or <4 x i32> %shuf1, %shuf2 64 ret <4 x i32> %or 65 } 66 67 68 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 69 ; CHECK-LABEL: test6: 70 ; CHECK: # BB#0: 71 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 72 ; CHECK-NEXT: retq 73 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 74 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 75 %or = or <4 x i32> %shuf1, %shuf2 76 ret <4 x i32> %or 77 } 78 79 80 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 81 ; CHECK-LABEL: test7: 82 ; CHECK: # BB#0: 83 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 84 ; CHECK-NEXT: retq 85 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 86 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 87 %or = or <4 x i32> %and1, %and2 88 ret <4 x i32> %or 89 } 90 91 92 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 93 ; CHECK-LABEL: test8: 94 ; CHECK: # BB#0: 95 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 96 ; CHECK-NEXT: retq 97 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 98 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 99 %or = or <2 x i64> %and1, %and2 100 ret <2 x i64> %or 101 } 102 103 104 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 105 ; CHECK-LABEL: test9: 106 ; CHECK: # BB#0: 107 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 108 ; CHECK-NEXT: retq 109 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 110 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 111 %or = or <4 x i32> %and1, %and2 112 ret <4 x i32> %or 113 } 114 115 116 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 117 ; CHECK-LABEL: test10: 118 ; CHECK: # BB#0: 119 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 120 ; CHECK-NEXT: retq 121 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 122 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 123 %or = or <2 x i64> %and1, %and2 124 ret <2 x i64> %or 125 } 126 127 128 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 129 ; CHECK-LABEL: test11: 130 ; CHECK: # BB#0: 131 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 132 ; CHECK-NEXT: retq 133 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 134 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 135 %or = or <4 x i32> %and1, %and2 136 ret <4 x i32> %or 137 } 138 139 140 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 141 ; CHECK-LABEL: test12: 142 ; CHECK: # BB#0: 143 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 144 ; CHECK-NEXT: retq 145 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 146 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 147 %or = or <4 x i32> %and1, %and2 148 ret <4 x i32> %or 149 } 150 151 152 ; Verify that the following test cases are folded into single shuffles. 153 154 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 155 ; CHECK-LABEL: test13: 156 ; CHECK: # BB#0: 157 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 158 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 159 ; CHECK-NEXT: retq 160 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 161 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 162 %or = or <4 x i32> %shuf1, %shuf2 163 ret <4 x i32> %or 164 } 165 166 167 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 168 ; CHECK-LABEL: test14: 169 ; CHECK: # BB#0: 170 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 171 ; CHECK-NEXT: retq 172 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 173 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 174 %or = or <2 x i64> %shuf1, %shuf2 175 ret <2 x i64> %or 176 } 177 178 179 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 180 ; CHECK-LABEL: test15: 181 ; CHECK: # BB#0: 182 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1] 183 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3] 184 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 185 ; CHECK-NEXT: retq 186 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 187 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 188 %or = or <4 x i32> %shuf1, %shuf2 189 ret <4 x i32> %or 190 } 191 192 193 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 194 ; CHECK-LABEL: test16: 195 ; CHECK: # BB#0: 196 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 197 ; CHECK-NEXT: movdqa %xmm1, %xmm0 198 ; CHECK-NEXT: retq 199 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 200 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 201 %or = or <2 x i64> %shuf1, %shuf2 202 ret <2 x i64> %or 203 } 204 205 206 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single 207 ; shuffle instruction when the shuffle indexes are not compatible. 208 209 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 210 ; CHECK-LABEL: test17: 211 ; CHECK: # BB#0: 212 ; CHECK-NEXT: psllq $32, %xmm0 213 ; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero 214 ; CHECK-NEXT: por %xmm1, %xmm0 215 ; CHECK-NEXT: retq 216 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 217 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 218 %or = or <4 x i32> %shuf1, %shuf2 219 ret <4 x i32> %or 220 } 221 222 223 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 224 ; CHECK-LABEL: test18: 225 ; CHECK: # BB#0: 226 ; CHECK-NEXT: pxor %xmm2, %xmm2 227 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 228 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 229 ; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 230 ; CHECK-NEXT: por %xmm1, %xmm0 231 ; CHECK-NEXT: retq 232 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 233 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 234 %or = or <4 x i32> %shuf1, %shuf2 235 ret <4 x i32> %or 236 } 237 238 239 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 240 ; CHECK-LABEL: test19: 241 ; CHECK: # BB#0: 242 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3] 243 ; CHECK-NEXT: pxor %xmm3, %xmm3 244 ; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 245 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 246 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7] 247 ; CHECK-NEXT: por %xmm2, %xmm0 248 ; CHECK-NEXT: retq 249 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 250 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 251 %or = or <4 x i32> %shuf1, %shuf2 252 ret <4 x i32> %or 253 } 254 255 256 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 257 ; CHECK-LABEL: test20: 258 ; CHECK: # BB#0: 259 ; CHECK-NEXT: por %xmm1, %xmm0 260 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 261 ; CHECK-NEXT: retq 262 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 263 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 264 %or = or <2 x i64> %shuf1, %shuf2 265 ret <2 x i64> %or 266 } 267 268 269 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 270 ; CHECK-LABEL: test21: 271 ; CHECK: # BB#0: 272 ; CHECK-NEXT: por %xmm1, %xmm0 273 ; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 274 ; CHECK-NEXT: retq 275 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 276 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 277 %or = or <2 x i64> %shuf1, %shuf2 278 ret <2 x i64> %or 279 } 280 281 282 ; Verify that the dag-combiner keeps the correct domain for float/double vectors 283 ; bitcast to use the mask-or blend combine. 284 285 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) { 286 ; CHECK-LABEL: test22: 287 ; CHECK: # BB#0: 288 ; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 289 ; CHECK-NEXT: retq 290 %bc1 = bitcast <2 x double> %a0 to <2 x i64> 291 %bc2 = bitcast <2 x double> %a1 to <2 x i64> 292 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 293 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 294 %or = or <2 x i64> %and1, %and2 295 %bc3 = bitcast <2 x i64> %or to <2 x double> 296 ret <2 x double> %bc3 297 } 298 299 300 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) { 301 ; CHECK-LABEL: test23: 302 ; CHECK: # BB#0: 303 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 304 ; CHECK-NEXT: retq 305 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 306 %bc2 = bitcast <4 x float> %a1 to <4 x i32> 307 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 308 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 309 %or = or <4 x i32> %and1, %and2 310 %bc3 = bitcast <4 x i32> %or to <4 x float> 311 ret <4 x float> %bc3 312 } 313 314 315 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) { 316 ; CHECK-LABEL: test24: 317 ; CHECK: # BB#0: 318 ; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 319 ; CHECK-NEXT: retq 320 %bc1 = bitcast <4 x float> %a0 to <2 x i64> 321 %bc2 = bitcast <4 x float> %a1 to <2 x i64> 322 %and1 = and <2 x i64> %bc1, <i64 0, i64 -1> 323 %and2 = and <2 x i64> %bc2, <i64 -1, i64 0> 324 %or = or <2 x i64> %and1, %and2 325 %bc3 = bitcast <2 x i64> %or to <4 x float> 326 ret <4 x float> %bc3 327 } 328 329 330 define <4 x float> @test25(<4 x float> %a0) { 331 ; CHECK-LABEL: test25: 332 ; CHECK: # BB#0: 333 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3] 334 ; CHECK-NEXT: retq 335 %bc1 = bitcast <4 x float> %a0 to <4 x i32> 336 %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32> 337 %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0> 338 %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1> 339 %or = or <4 x i32> %and1, %and2 340 %bc3 = bitcast <4 x i32> %or to <4 x float> 341 ret <4 x float> %bc3 342 } 343 344 345 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle 346 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to 347 ; handle legal vector value types. 348 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { 349 ; CHECK-LABEL: test_crash: 350 ; CHECK: # BB#0: 351 ; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 352 ; CHECK-NEXT: retq 353 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 354 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 355 %or = or <4 x i8> %shuf1, %shuf2 356 ret <4 x i8> %or 357 } 358 359