Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
      3 
      4 
      5 ; Verify that each of the following test cases is folded into a single
      6 ; instruction which performs a blend operation.
      7 
      8 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
      9 ; CHECK-LABEL: test1:
     10 ; CHECK:       # BB#0:
     11 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     12 ; CHECK-NEXT:    retq
     13   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
     14   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
     15   %or = or <2 x i64> %shuf1, %shuf2
     16   ret <2 x i64> %or
     17 }
     18 
     19 
     20 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
     21 ; CHECK-LABEL: test2:
     22 ; CHECK:       # BB#0:
     23 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
     24 ; CHECK-NEXT:    retq
     25   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
     26   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
     27   %or = or <4 x i32> %shuf1, %shuf2
     28   ret <4 x i32> %or
     29 }
     30 
     31 
     32 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
     33 ; CHECK-LABEL: test3:
     34 ; CHECK:       # BB#0:
     35 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
     36 ; CHECK-NEXT:    retq
     37   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
     38   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
     39   %or = or <2 x i64> %shuf1, %shuf2
     40   ret <2 x i64> %or
     41 }
     42 
     43 
     44 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
     45 ; CHECK-LABEL: test4:
     46 ; CHECK:       # BB#0:
     47 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
     48 ; CHECK-NEXT:    retq
     49   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
     50   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
     51   %or = or <4 x i32> %shuf1, %shuf2
     52   ret <4 x i32> %or
     53 }
     54 
     55 
     56 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
     57 ; CHECK-LABEL: test5:
     58 ; CHECK:       # BB#0:
     59 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
     60 ; CHECK-NEXT:    retq
     61   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
     62   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
     63   %or = or <4 x i32> %shuf1, %shuf2
     64   ret <4 x i32> %or
     65 }
     66 
     67 
     68 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
     69 ; CHECK-LABEL: test6:
     70 ; CHECK:       # BB#0:
     71 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     72 ; CHECK-NEXT:    retq
     73   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
     74   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
     75   %or = or <4 x i32> %shuf1, %shuf2
     76   ret <4 x i32> %or
     77 }
     78 
     79 
     80 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
     81 ; CHECK-LABEL: test7:
     82 ; CHECK:       # BB#0:
     83 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     84 ; CHECK-NEXT:    retq
     85   %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
     86   %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
     87   %or = or <4 x i32> %and1, %and2
     88   ret <4 x i32> %or
     89 }
     90 
     91 
     92 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
     93 ; CHECK-LABEL: test8:
     94 ; CHECK:       # BB#0:
     95 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     96 ; CHECK-NEXT:    retq
     97   %and1 = and <2 x i64> %a, <i64 -1, i64 0>
     98   %and2 = and <2 x i64> %b, <i64 0, i64 -1>
     99   %or = or <2 x i64> %and1, %and2
    100   ret <2 x i64> %or
    101 }
    102 
    103 
    104 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
    105 ; CHECK-LABEL: test9:
    106 ; CHECK:       # BB#0:
    107 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    108 ; CHECK-NEXT:    retq
    109   %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
    110   %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
    111   %or = or <4 x i32> %and1, %and2
    112   ret <4 x i32> %or
    113 }
    114 
    115 
    116 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
    117 ; CHECK-LABEL: test10:
    118 ; CHECK:       # BB#0:
    119 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    120 ; CHECK-NEXT:    retq
    121   %and1 = and <2 x i64> %a, <i64 0, i64 -1>
    122   %and2 = and <2 x i64> %b, <i64 -1, i64 0>
    123   %or = or <2 x i64> %and1, %and2
    124   ret <2 x i64> %or
    125 }
    126 
    127 
    128 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
    129 ; CHECK-LABEL: test11:
    130 ; CHECK:       # BB#0:
    131 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
    132 ; CHECK-NEXT:    retq
    133   %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
    134   %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
    135   %or = or <4 x i32> %and1, %and2
    136   ret <4 x i32> %or
    137 }
    138 
    139 
    140 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
    141 ; CHECK-LABEL: test12:
    142 ; CHECK:       # BB#0:
    143 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    144 ; CHECK-NEXT:    retq
    145   %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
    146   %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
    147   %or = or <4 x i32> %and1, %and2
    148   ret <4 x i32> %or
    149 }
    150 
    151 
    152 ; Verify that the following test cases are folded into single shuffles.
    153 
    154 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
    155 ; CHECK-LABEL: test13:
    156 ; CHECK:       # BB#0:
    157 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    158 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    159 ; CHECK-NEXT:    retq
    160   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
    161   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    162   %or = or <4 x i32> %shuf1, %shuf2
    163   ret <4 x i32> %or
    164 }
    165 
    166 
    167 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
    168 ; CHECK-LABEL: test14:
    169 ; CHECK:       # BB#0:
    170 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    171 ; CHECK-NEXT:    retq
    172   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    173   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    174   %or = or <2 x i64> %shuf1, %shuf2
    175   ret <2 x i64> %or
    176 }
    177 
    178 
    179 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
    180 ; CHECK-LABEL: test15:
    181 ; CHECK:       # BB#0:
    182 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
    183 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3]
    184 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    185 ; CHECK-NEXT:    retq
    186   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
    187   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
    188   %or = or <4 x i32> %shuf1, %shuf2
    189   ret <4 x i32> %or
    190 }
    191 
    192 
    193 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
    194 ; CHECK-LABEL: test16:
    195 ; CHECK:       # BB#0:
    196 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    197 ; CHECK-NEXT:    movdqa %xmm1, %xmm0
    198 ; CHECK-NEXT:    retq
    199   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    200   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    201   %or = or <2 x i64> %shuf1, %shuf2
    202   ret <2 x i64> %or
    203 }
    204 
    205 
    206 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
    207 ; shuffle instruction when the shuffle indexes are not compatible.
    208 
    209 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
    210 ; CHECK-LABEL: test17:
    211 ; CHECK:       # BB#0:
    212 ; CHECK-NEXT:    psllq $32, %xmm0
    213 ; CHECK-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
    214 ; CHECK-NEXT:    por %xmm1, %xmm0
    215 ; CHECK-NEXT:    retq
    216   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
    217   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    218   %or = or <4 x i32> %shuf1, %shuf2
    219   ret <4 x i32> %or
    220 }
    221 
    222 
    223 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
    224 ; CHECK-LABEL: test18:
    225 ; CHECK:       # BB#0:
    226 ; CHECK-NEXT:    pxor %xmm2, %xmm2
    227 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
    228 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
    229 ; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
    230 ; CHECK-NEXT:    por %xmm1, %xmm0
    231 ; CHECK-NEXT:    retq
    232   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
    233   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
    234   %or = or <4 x i32> %shuf1, %shuf2
    235   ret <4 x i32> %or
    236 }
    237 
    238 
    239 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
    240 ; CHECK-LABEL: test19:
    241 ; CHECK:       # BB#0:
    242 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
    243 ; CHECK-NEXT:    pxor %xmm3, %xmm3
    244 ; CHECK-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
    245 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
    246 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
    247 ; CHECK-NEXT:    por %xmm2, %xmm0
    248 ; CHECK-NEXT:    retq
    249   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
    250   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
    251   %or = or <4 x i32> %shuf1, %shuf2
    252   ret <4 x i32> %or
    253 }
    254 
    255 
    256 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
    257 ; CHECK-LABEL: test20:
    258 ; CHECK:       # BB#0:
    259 ; CHECK-NEXT:    por %xmm1, %xmm0
    260 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    261 ; CHECK-NEXT:    retq
    262   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    263   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    264   %or = or <2 x i64> %shuf1, %shuf2
    265   ret <2 x i64> %or
    266 }
    267 
    268 
    269 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
    270 ; CHECK-LABEL: test21:
    271 ; CHECK:       # BB#0:
    272 ; CHECK-NEXT:    por %xmm1, %xmm0
    273 ; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    274 ; CHECK-NEXT:    retq
    275   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    276   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    277   %or = or <2 x i64> %shuf1, %shuf2
    278   ret <2 x i64> %or
    279 }
    280 
    281 
    282 ; Verify that the dag-combiner keeps the correct domain for float/double vectors
    283 ; bitcast to use the mask-or blend combine.
    284 
    285 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
    286 ; CHECK-LABEL: test22:
    287 ; CHECK:       # BB#0:
    288 ; CHECK-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    289 ; CHECK-NEXT:    retq
    290   %bc1 = bitcast <2 x double> %a0 to <2 x i64>
    291   %bc2 = bitcast <2 x double> %a1 to <2 x i64>
    292   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
    293   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
    294   %or = or <2 x i64> %and1, %and2
    295   %bc3 = bitcast <2 x i64> %or to <2 x double>
    296   ret <2 x double> %bc3
    297 }
    298 
    299 
    300 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
    301 ; CHECK-LABEL: test23:
    302 ; CHECK:       # BB#0:
    303 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
    304 ; CHECK-NEXT:    retq
    305   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
    306   %bc2 = bitcast <4 x float> %a1 to <4 x i32>
    307   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
    308   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
    309   %or = or <4 x i32> %and1, %and2
    310   %bc3 = bitcast <4 x i32> %or to <4 x float>
    311   ret <4 x float> %bc3
    312 }
    313 
    314 
    315 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
    316 ; CHECK-LABEL: test24:
    317 ; CHECK:       # BB#0:
    318 ; CHECK-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    319 ; CHECK-NEXT:    retq
    320   %bc1 = bitcast <4 x float> %a0 to <2 x i64>
    321   %bc2 = bitcast <4 x float> %a1 to <2 x i64>
    322   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
    323   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
    324   %or = or <2 x i64> %and1, %and2
    325   %bc3 = bitcast <2 x i64> %or to <4 x float>
    326   ret <4 x float> %bc3
    327 }
    328 
    329 
    330 define <4 x float> @test25(<4 x float> %a0) {
    331 ; CHECK-LABEL: test25:
    332 ; CHECK:       # BB#0:
    333 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
    334 ; CHECK-NEXT:    retq
    335   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
    336   %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
    337   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
    338   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
    339   %or = or <4 x i32> %and1, %and2
    340   %bc3 = bitcast <4 x i32> %or to <4 x float>
    341   ret <4 x float> %bc3
    342 }
    343 
    344 
    345 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
    346 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
    347 ; handle legal vector value types.
    348 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
    349 ; CHECK-LABEL: test_crash:
    350 ; CHECK:       # BB#0:
    351 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    352 ; CHECK-NEXT:    retq
    353   %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    354   %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    355   %or = or <4 x i8> %shuf1, %shuf2
    356   ret <4 x i8> %or
    357 }
    358 
    359