Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
      3 
      4 define i32 @or_self(i32 %x) {
      5 ; CHECK-LABEL: or_self:
      6 ; CHECK:       # %bb.0:
      7 ; CHECK-NEXT:    movl %edi, %eax
      8 ; CHECK-NEXT:    retq
      9   %or = or i32 %x, %x
     10   ret i32 %or
     11 }
     12 
     13 define <4 x i32> @or_self_vec(<4 x i32> %x) {
     14 ; CHECK-LABEL: or_self_vec:
     15 ; CHECK:       # %bb.0:
     16 ; CHECK-NEXT:    retq
     17   %or = or <4 x i32> %x, %x
     18   ret <4 x i32> %or
     19 }
     20 
     21 ; Verify that each of the following test cases is folded into a single
     22 ; instruction which performs a blend operation.
     23 
     24 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
     25 ; CHECK-LABEL: test1:
     26 ; CHECK:       # %bb.0:
     27 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
     28 ; CHECK-NEXT:    retq
     29   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
     30   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
     31   %or = or <2 x i64> %shuf1, %shuf2
     32   ret <2 x i64> %or
     33 }
     34 
     35 
     36 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
     37 ; CHECK-LABEL: test2:
     38 ; CHECK:       # %bb.0:
     39 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     40 ; CHECK-NEXT:    retq
     41   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
     42   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
     43   %or = or <4 x i32> %shuf1, %shuf2
     44   ret <4 x i32> %or
     45 }
     46 
     47 
     48 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
     49 ; CHECK-LABEL: test3:
     50 ; CHECK:       # %bb.0:
     51 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     52 ; CHECK-NEXT:    retq
     53   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
     54   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
     55   %or = or <2 x i64> %shuf1, %shuf2
     56   ret <2 x i64> %or
     57 }
     58 
     59 
     60 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
     61 ; CHECK-LABEL: test4:
     62 ; CHECK:       # %bb.0:
     63 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
     64 ; CHECK-NEXT:    retq
     65   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
     66   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
     67   %or = or <4 x i32> %shuf1, %shuf2
     68   ret <4 x i32> %or
     69 }
     70 
     71 
     72 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
     73 ; CHECK-LABEL: test5:
     74 ; CHECK:       # %bb.0:
     75 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
     76 ; CHECK-NEXT:    retq
     77   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
     78   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
     79   %or = or <4 x i32> %shuf1, %shuf2
     80   ret <4 x i32> %or
     81 }
     82 
     83 
     84 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
     85 ; CHECK-LABEL: test6:
     86 ; CHECK:       # %bb.0:
     87 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
     88 ; CHECK-NEXT:    retq
     89   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
     90   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
     91   %or = or <4 x i32> %shuf1, %shuf2
     92   ret <4 x i32> %or
     93 }
     94 
     95 
     96 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
     97 ; CHECK-LABEL: test7:
     98 ; CHECK:       # %bb.0:
     99 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    100 ; CHECK-NEXT:    retq
    101   %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
    102   %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
    103   %or = or <4 x i32> %and1, %and2
    104   ret <4 x i32> %or
    105 }
    106 
    107 
    108 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
    109 ; CHECK-LABEL: test8:
    110 ; CHECK:       # %bb.0:
    111 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    112 ; CHECK-NEXT:    retq
    113   %and1 = and <2 x i64> %a, <i64 -1, i64 0>
    114   %and2 = and <2 x i64> %b, <i64 0, i64 -1>
    115   %or = or <2 x i64> %and1, %and2
    116   ret <2 x i64> %or
    117 }
    118 
    119 
    120 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
    121 ; CHECK-LABEL: test9:
    122 ; CHECK:       # %bb.0:
    123 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    124 ; CHECK-NEXT:    retq
    125   %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
    126   %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
    127   %or = or <4 x i32> %and1, %and2
    128   ret <4 x i32> %or
    129 }
    130 
    131 
    132 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
    133 ; CHECK-LABEL: test10:
    134 ; CHECK:       # %bb.0:
    135 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    136 ; CHECK-NEXT:    retq
    137   %and1 = and <2 x i64> %a, <i64 0, i64 -1>
    138   %and2 = and <2 x i64> %b, <i64 -1, i64 0>
    139   %or = or <2 x i64> %and1, %and2
    140   ret <2 x i64> %or
    141 }
    142 
    143 
    144 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
    145 ; CHECK-LABEL: test11:
    146 ; CHECK:       # %bb.0:
    147 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    148 ; CHECK-NEXT:    retq
    149   %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
    150   %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
    151   %or = or <4 x i32> %and1, %and2
    152   ret <4 x i32> %or
    153 }
    154 
    155 
    156 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
    157 ; CHECK-LABEL: test12:
    158 ; CHECK:       # %bb.0:
    159 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    160 ; CHECK-NEXT:    retq
    161   %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
    162   %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
    163   %or = or <4 x i32> %and1, %and2
    164   ret <4 x i32> %or
    165 }
    166 
    167 
    168 ; Verify that the following test cases are folded into single shuffles.
    169 
    170 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
    171 ; CHECK-LABEL: test13:
    172 ; CHECK:       # %bb.0:
    173 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
    174 ; CHECK-NEXT:    retq
    175   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
    176   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    177   %or = or <4 x i32> %shuf1, %shuf2
    178   ret <4 x i32> %or
    179 }
    180 
    181 
    182 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
    183 ; CHECK-LABEL: test14:
    184 ; CHECK:       # %bb.0:
    185 ; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    186 ; CHECK-NEXT:    retq
    187   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    188   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    189   %or = or <2 x i64> %shuf1, %shuf2
    190   ret <2 x i64> %or
    191 }
    192 
    193 
    194 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
    195 ; CHECK-LABEL: test15:
    196 ; CHECK:       # %bb.0:
    197 ; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
    198 ; CHECK-NEXT:    movaps %xmm1, %xmm0
    199 ; CHECK-NEXT:    retq
    200   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
    201   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
    202   %or = or <4 x i32> %shuf1, %shuf2
    203   ret <4 x i32> %or
    204 }
    205 
    206 
    207 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
    208 ; CHECK-LABEL: test16:
    209 ; CHECK:       # %bb.0:
    210 ; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    211 ; CHECK-NEXT:    movaps %xmm1, %xmm0
    212 ; CHECK-NEXT:    retq
    213   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    214   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    215   %or = or <2 x i64> %shuf1, %shuf2
    216   ret <2 x i64> %or
    217 }
    218 
    219 
    220 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
    221 ; shuffle instruction when the shuffle indexes are not compatible.
    222 
    223 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
    224 ; CHECK-LABEL: test17:
    225 ; CHECK:       # %bb.0:
    226 ; CHECK-NEXT:    psllq $32, %xmm0
    227 ; CHECK-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
    228 ; CHECK-NEXT:    por %xmm1, %xmm0
    229 ; CHECK-NEXT:    retq
    230   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
    231   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    232   %or = or <4 x i32> %shuf1, %shuf2
    233   ret <4 x i32> %or
    234 }
    235 
    236 
    237 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
    238 ; CHECK-LABEL: test18:
    239 ; CHECK:       # %bb.0:
    240 ; CHECK-NEXT:    pxor %xmm2, %xmm2
    241 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
    242 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
    243 ; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
    244 ; CHECK-NEXT:    por %xmm1, %xmm0
    245 ; CHECK-NEXT:    retq
    246   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
    247   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
    248   %or = or <4 x i32> %shuf1, %shuf2
    249   ret <4 x i32> %or
    250 }
    251 
    252 
    253 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
    254 ; CHECK-LABEL: test19:
    255 ; CHECK:       # %bb.0:
    256 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
    257 ; CHECK-NEXT:    pxor %xmm3, %xmm3
    258 ; CHECK-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
    259 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
    260 ; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
    261 ; CHECK-NEXT:    por %xmm2, %xmm0
    262 ; CHECK-NEXT:    retq
    263   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
    264   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
    265   %or = or <4 x i32> %shuf1, %shuf2
    266   ret <4 x i32> %or
    267 }
    268 
    269 
    270 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
    271 ; CHECK-LABEL: test20:
    272 ; CHECK:       # %bb.0:
    273 ; CHECK-NEXT:    por %xmm1, %xmm0
    274 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    275 ; CHECK-NEXT:    retq
    276   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    277   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
    278   %or = or <2 x i64> %shuf1, %shuf2
    279   ret <2 x i64> %or
    280 }
    281 
    282 
    283 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
    284 ; CHECK-LABEL: test21:
    285 ; CHECK:       # %bb.0:
    286 ; CHECK-NEXT:    por %xmm1, %xmm0
    287 ; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    288 ; CHECK-NEXT:    retq
    289   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    290   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
    291   %or = or <2 x i64> %shuf1, %shuf2
    292   ret <2 x i64> %or
    293 }
    294 
    295 
    296 ; Verify that the dag-combiner keeps the correct domain for float/double vectors
    297 ; bitcast to use the mask-or blend combine.
    298 
    299 define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
    300 ; CHECK-LABEL: test22:
    301 ; CHECK:       # %bb.0:
    302 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    303 ; CHECK-NEXT:    retq
    304   %bc1 = bitcast <2 x double> %a0 to <2 x i64>
    305   %bc2 = bitcast <2 x double> %a1 to <2 x i64>
    306   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
    307   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
    308   %or = or <2 x i64> %and1, %and2
    309   %bc3 = bitcast <2 x i64> %or to <2 x double>
    310   ret <2 x double> %bc3
    311 }
    312 
    313 
    314 define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
    315 ; CHECK-LABEL: test23:
    316 ; CHECK:       # %bb.0:
    317 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
    318 ; CHECK-NEXT:    retq
    319   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
    320   %bc2 = bitcast <4 x float> %a1 to <4 x i32>
    321   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
    322   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
    323   %or = or <4 x i32> %and1, %and2
    324   %bc3 = bitcast <4 x i32> %or to <4 x float>
    325   ret <4 x float> %bc3
    326 }
    327 
    328 
    329 define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
    330 ; CHECK-LABEL: test24:
    331 ; CHECK:       # %bb.0:
    332 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    333 ; CHECK-NEXT:    retq
    334   %bc1 = bitcast <4 x float> %a0 to <2 x i64>
    335   %bc2 = bitcast <4 x float> %a1 to <2 x i64>
    336   %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
    337   %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
    338   %or = or <2 x i64> %and1, %and2
    339   %bc3 = bitcast <2 x i64> %or to <4 x float>
    340   ret <4 x float> %bc3
    341 }
    342 
    343 
    344 define <4 x float> @test25(<4 x float> %a0) {
    345 ; CHECK-LABEL: test25:
    346 ; CHECK:       # %bb.0:
    347 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
    348 ; CHECK-NEXT:    retq
    349   %bc1 = bitcast <4 x float> %a0 to <4 x i32>
    350   %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
    351   %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
    352   %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
    353   %or = or <4 x i32> %and1, %and2
    354   %bc3 = bitcast <4 x i32> %or to <4 x float>
    355   ret <4 x float> %bc3
    356 }
    357 
    358 
    359 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
    360 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
    361 ; handle legal vector value types.
    362 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
    363 ; CHECK-LABEL: test_crash:
    364 ; CHECK:       # %bb.0:
    365 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    366 ; CHECK-NEXT:    retq
    367   %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    368   %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    369   %or = or <4 x i8> %shuf1, %shuf2
    370   ret <4 x i8> %or
    371 }
    372 
    373 ; Verify that we can fold regardless of which operand is the zeroinitializer
    374 
    375 define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
    376 ; CHECK-LABEL: test2b:
    377 ; CHECK:       # %bb.0:
    378 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    379 ; CHECK-NEXT:    retq
    380   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
    381   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    382   %or = or <4 x i32> %shuf1, %shuf2
    383   ret <4 x i32> %or
    384 }
    385 
    386 define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
    387 ; CHECK-LABEL: test2c:
    388 ; CHECK:       # %bb.0:
    389 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    390 ; CHECK-NEXT:    retq
    391   %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
    392   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
    393   %or = or <4 x i32> %shuf1, %shuf2
    394   ret <4 x i32> %or
    395 }
    396 
    397 
    398 define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
    399 ; CHECK-LABEL: test2d:
    400 ; CHECK:       # %bb.0:
    401 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    402 ; CHECK-NEXT:    retq
    403   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    404   %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
    405   %or = or <4 x i32> %shuf1, %shuf2
    406   ret <4 x i32> %or
    407 }
    408 
    409 ; Make sure we can have an undef where an index pointing to the zero vector should be
    410 
    411 define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
    412 ; CHECK-LABEL: test2e:
    413 ; CHECK:       # %bb.0:
    414 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    415 ; CHECK-NEXT:    retq
    416   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
    417   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
    418   %or = or <4 x i32> %shuf1, %shuf2
    419   ret <4 x i32> %or
    420 }
    421 
    422 define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
    423 ; CHECK-LABEL: test2f:
    424 ; CHECK:       # %bb.0:
    425 ; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    426 ; CHECK-NEXT:    retq
    427   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
    428   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>
    429   %or = or <4 x i32> %shuf1, %shuf2
    430   ret <4 x i32> %or
    431 }
    432 
    433 ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) iff (c1 & c2) != 0
    434 
    435 define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
    436 ; CHECK-LABEL: or_and_v2i64:
    437 ; CHECK:       # %bb.0:
    438 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
    439 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
    440 ; CHECK-NEXT:    retq
    441   %1 = and <2 x i64> %a0, <i64 7, i64 7>
    442   %2 = or <2 x i64> %1, <i64 3, i64 3>
    443   ret <2 x i64> %2
    444 }
    445 
    446 define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
    447 ; CHECK-LABEL: or_and_v4i32:
    448 ; CHECK:       # %bb.0:
    449 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
    450 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
    451 ; CHECK-NEXT:    retq
    452   %1 = and <4 x i32> %a0, <i32 1, i32 3, i32 5, i32 7>
    453   %2 = or <4 x i32> %1, <i32 3, i32 2, i32 15, i32 2>
    454   ret <4 x i32> %2
    455 }
    456 
    457 ; If all masked bits are going to be set, that's a constant fold.
    458 
    459 define <4 x i32> @or_and_v4i32_fold(<4 x i32> %a0) {
    460 ; CHECK-LABEL: or_and_v4i32_fold:
    461 ; CHECK:       # %bb.0:
    462 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
    463 ; CHECK-NEXT:    retq
    464   %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
    465   %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
    466   ret <4 x i32> %2
    467 }
    468 
    469 ; fold (or x, c) -> c iff (x & ~c) == 0
    470 
    471 define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
    472 ; CHECK-LABEL: or_zext_v2i32:
    473 ; CHECK:       # %bb.0:
    474 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295]
    475 ; CHECK-NEXT:    retq
    476   %1 = zext <2 x i32> %a0 to <2 x i64>
    477   %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
    478   ret <2 x i64> %2
    479 }
    480 
    481 define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
    482 ; CHECK-LABEL: or_zext_v4i16:
    483 ; CHECK:       # %bb.0:
    484 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
    485 ; CHECK-NEXT:    retq
    486   %1 = zext <4 x i16> %a0 to <4 x i32>
    487   %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
    488   ret <4 x i32> %2
    489 }
    490 
    491