Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
      3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
      4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
      5 
      6 ; ============================================================================ ;
      7 ; Various cases with %x and/or %y being a constant
      8 ; ============================================================================ ;
      9 
     10 define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
     11 ; CHECK-SSE1-LABEL: out_constant_varx_mone:
     12 ; CHECK-SSE1:       # %bb.0:
     13 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
     14 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
     15 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
     16 ; CHECK-SSE1-NEXT:    andps (%rsi), %xmm0
     17 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
     18 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
     19 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
     20 ; CHECK-SSE1-NEXT:    retq
     21 ;
     22 ; CHECK-SSE2-LABEL: out_constant_varx_mone:
     23 ; CHECK-SSE2:       # %bb.0:
     24 ; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm0
     25 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
     26 ; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
     27 ; CHECK-SSE2-NEXT:    pand (%rdi), %xmm0
     28 ; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
     29 ; CHECK-SSE2-NEXT:    retq
     30 ;
     31 ; CHECK-XOP-LABEL: out_constant_varx_mone:
     32 ; CHECK-XOP:       # %bb.0:
     33 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
     34 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     35 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm1
     36 ; CHECK-XOP-NEXT:    vpand (%rdi), %xmm0, %xmm0
     37 ; CHECK-XOP-NEXT:    vpor %xmm1, %xmm0, %xmm0
     38 ; CHECK-XOP-NEXT:    retq
     39   %x = load <4 x i32>, <4 x i32> *%px, align 16
     40   %y = load <4 x i32>, <4 x i32> *%py, align 16
     41   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
     42   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
     43   %mx = and <4 x i32> %mask, %x
     44   %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
     45   %r = or <4 x i32> %mx, %my
     46   ret <4 x i32> %r
     47 }
     48 
     49 define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
     50 ; CHECK-SSE1-LABEL: in_constant_varx_mone:
     51 ; CHECK-SSE1:       # %bb.0:
     52 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm0
     53 ; CHECK-SSE1-NEXT:    andnps (%rcx), %xmm0
     54 ; CHECK-SSE1-NEXT:    xorps {{.*}}(%rip), %xmm0
     55 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
     56 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
     57 ; CHECK-SSE1-NEXT:    retq
     58 ;
     59 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
     60 ; CHECK-SSE2:       # %bb.0:
     61 ; CHECK-SSE2-NEXT:    movdqa (%rdi), %xmm0
     62 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
     63 ; CHECK-SSE2-NEXT:    pandn (%rdx), %xmm0
     64 ; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
     65 ; CHECK-SSE2-NEXT:    retq
     66 ;
     67 ; CHECK-XOP-LABEL: in_constant_varx_mone:
     68 ; CHECK-XOP:       # %bb.0:
     69 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
     70 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     71 ; CHECK-XOP-NEXT:    vpandn (%rdx), %xmm0, %xmm0
     72 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm0
     73 ; CHECK-XOP-NEXT:    retq
     74   %x = load <4 x i32>, <4 x i32> *%px, align 16
     75   %y = load <4 x i32>, <4 x i32> *%py, align 16
     76   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
     77   %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
     78   %n1 = and <4 x i32> %n0, %mask
     79   %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
     80   ret <4 x i32> %r
     81 }
     82 
     83 ; This is not a canonical form. Testing for completeness only.
     84 define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
     85 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
     86 ; CHECK-SSE1:       # %bb.0:
     87 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
     88 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
     89 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
     90 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
     91 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
     92 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
     93 ; CHECK-SSE1-NEXT:    retq
     94 ;
     95 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
     96 ; CHECK-SSE2:       # %bb.0:
     97 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm1
     98 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
     99 ; CHECK-SSE2-NEXT:    andnps (%rdi), %xmm0
    100 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    101 ; CHECK-SSE2-NEXT:    retq
    102 ;
    103 ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
    104 ; CHECK-XOP:       # %bb.0:
    105 ; CHECK-XOP-NEXT:    vmovaps (%rdx), %xmm0
    106 ; CHECK-XOP-NEXT:    vandnps (%rdi), %xmm0, %xmm1
    107 ; CHECK-XOP-NEXT:    vorps %xmm0, %xmm1, %xmm0
    108 ; CHECK-XOP-NEXT:    retq
    109   %x = load <4 x i32>, <4 x i32> *%px, align 16
    110   %y = load <4 x i32>, <4 x i32> *%py, align 16
    111   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    112   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    113   %mx = and <4 x i32> %notmask, %x
    114   %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    115   %r = or <4 x i32> %mx, %my
    116   ret <4 x i32> %r
    117 }
    118 
    119 ; This is not a canonical form. Testing for completeness only.
    120 define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    121 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
    122 ; CHECK-SSE1:       # %bb.0:
    123 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm0
    124 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
    125 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm2
    126 ; CHECK-SSE1-NEXT:    xorps %xmm1, %xmm2
    127 ; CHECK-SSE1-NEXT:    andnps %xmm2, %xmm0
    128 ; CHECK-SSE1-NEXT:    xorps %xmm1, %xmm0
    129 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    130 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    131 ; CHECK-SSE1-NEXT:    retq
    132 ;
    133 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
    134 ; CHECK-SSE2:       # %bb.0:
    135 ; CHECK-SSE2-NEXT:    movdqa (%rdi), %xmm0
    136 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
    137 ; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm2
    138 ; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm2
    139 ; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm0
    140 ; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
    141 ; CHECK-SSE2-NEXT:    retq
    142 ;
    143 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
    144 ; CHECK-XOP:       # %bb.0:
    145 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
    146 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    147 ; CHECK-XOP-NEXT:    vpxor (%rdx), %xmm1, %xmm2
    148 ; CHECK-XOP-NEXT:    vpandn %xmm2, %xmm0, %xmm0
    149 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    150 ; CHECK-XOP-NEXT:    retq
    151   %x = load <4 x i32>, <4 x i32> *%px, align 16
    152   %y = load <4 x i32>, <4 x i32> *%py, align 16
    153   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    154   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    155   %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
    156   %n1 = and <4 x i32> %n0, %notmask
    157   %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
    158   ret <4 x i32> %r
    159 }
    160 
    161 define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    162 ; CHECK-SSE1-LABEL: out_constant_varx_42:
    163 ; CHECK-SSE1:       # %bb.0:
    164 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    165 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
    166 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
    167 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
    168 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    169 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    170 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    171 ; CHECK-SSE1-NEXT:    retq
    172 ;
    173 ; CHECK-SSE2-LABEL: out_constant_varx_42:
    174 ; CHECK-SSE2:       # %bb.0:
    175 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    176 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
    177 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
    178 ; CHECK-SSE2-NEXT:    andnps {{.*}}(%rip), %xmm0
    179 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    180 ; CHECK-SSE2-NEXT:    retq
    181 ;
    182 ; CHECK-XOP-LABEL: out_constant_varx_42:
    183 ; CHECK-XOP:       # %bb.0:
    184 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
    185 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
    186 ; CHECK-XOP-NEXT:    vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
    187 ; CHECK-XOP-NEXT:    retq
    188   %x = load <4 x i32>, <4 x i32> *%px, align 16
    189   %y = load <4 x i32>, <4 x i32> *%py, align 16
    190   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    191   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    192   %mx = and <4 x i32> %mask, %x
    193   %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
    194   %r = or <4 x i32> %mx, %my
    195   ret <4 x i32> %r
    196 }
    197 
    198 define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    199 ; CHECK-SSE1-LABEL: in_constant_varx_42:
    200 ; CHECK-SSE1:       # %bb.0:
    201 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    202 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
    203 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
    204 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
    205 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    206 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    207 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    208 ; CHECK-SSE1-NEXT:    retq
    209 ;
    210 ; CHECK-SSE2-LABEL: in_constant_varx_42:
    211 ; CHECK-SSE2:       # %bb.0:
    212 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    213 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
    214 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
    215 ; CHECK-SSE2-NEXT:    andnps {{.*}}(%rip), %xmm0
    216 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    217 ; CHECK-SSE2-NEXT:    retq
    218 ;
    219 ; CHECK-XOP-LABEL: in_constant_varx_42:
    220 ; CHECK-XOP:       # %bb.0:
    221 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
    222 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
    223 ; CHECK-XOP-NEXT:    vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
    224 ; CHECK-XOP-NEXT:    retq
    225   %x = load <4 x i32>, <4 x i32> *%px, align 16
    226   %y = load <4 x i32>, <4 x i32> *%py, align 16
    227   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    228   %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
    229   %n1 = and <4 x i32> %n0, %mask
    230   %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
    231   ret <4 x i32> %r
    232 }
    233 
    234 ; This is not a canonical form. Testing for completeness only.
    235 define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    236 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
    237 ; CHECK-SSE1:       # %bb.0:
    238 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    239 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    240 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
    241 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
    242 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    243 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    244 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    245 ; CHECK-SSE1-NEXT:    retq
    246 ;
    247 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
    248 ; CHECK-SSE2:       # %bb.0:
    249 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    250 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
    251 ; CHECK-SSE2-NEXT:    andnps (%rdi), %xmm1
    252 ; CHECK-SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    253 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    254 ; CHECK-SSE2-NEXT:    retq
    255 ;
    256 ; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
    257 ; CHECK-XOP:       # %bb.0:
    258 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    259 ; CHECK-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
    260 ; CHECK-XOP-NEXT:    vpcmov %xmm0, (%rdi), %xmm1, %xmm0
    261 ; CHECK-XOP-NEXT:    retq
    262   %x = load <4 x i32>, <4 x i32> *%px, align 16
    263   %y = load <4 x i32>, <4 x i32> *%py, align 16
    264   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    265   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    266   %mx = and <4 x i32> %notmask, %x
    267   %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
    268   %r = or <4 x i32> %mx, %my
    269   ret <4 x i32> %r
    270 }
    271 
    272 ; This is not a canonical form. Testing for completeness only.
    273 define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    274 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
    275 ; CHECK-SSE1:       # %bb.0:
    276 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    277 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    278 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
    279 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
    280 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    281 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    282 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    283 ; CHECK-SSE1-NEXT:    retq
    284 ;
    285 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
    286 ; CHECK-SSE2:       # %bb.0:
    287 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    288 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
    289 ; CHECK-SSE2-NEXT:    andnps (%rdi), %xmm1
    290 ; CHECK-SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    291 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    292 ; CHECK-SSE2-NEXT:    retq
    293 ;
    294 ; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
    295 ; CHECK-XOP:       # %bb.0:
    296 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    297 ; CHECK-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
    298 ; CHECK-XOP-NEXT:    vpcmov %xmm0, (%rdi), %xmm1, %xmm0
    299 ; CHECK-XOP-NEXT:    retq
    300   %x = load <4 x i32>, <4 x i32> *%px, align 16
    301   %y = load <4 x i32>, <4 x i32> *%py, align 16
    302   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    303   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    304   %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
    305   %n1 = and <4 x i32> %n0, %notmask
    306   %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
    307   ret <4 x i32> %r
    308 }
    309 
    310 define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    311 ; CHECK-SSE1-LABEL: out_constant_mone_vary:
    312 ; CHECK-SSE1:       # %bb.0:
    313 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    314 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    315 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
    316 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
    317 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
    318 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    319 ; CHECK-SSE1-NEXT:    retq
    320 ;
    321 ; CHECK-SSE2-LABEL: out_constant_mone_vary:
    322 ; CHECK-SSE2:       # %bb.0:
    323 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm1
    324 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
    325 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
    326 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    327 ; CHECK-SSE2-NEXT:    retq
    328 ;
    329 ; CHECK-XOP-LABEL: out_constant_mone_vary:
    330 ; CHECK-XOP:       # %bb.0:
    331 ; CHECK-XOP-NEXT:    vmovaps (%rdx), %xmm0
    332 ; CHECK-XOP-NEXT:    vandnps (%rsi), %xmm0, %xmm1
    333 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    334 ; CHECK-XOP-NEXT:    retq
    335   %x = load <4 x i32>, <4 x i32> *%px, align 16
    336   %y = load <4 x i32>, <4 x i32> *%py, align 16
    337   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    338   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    339   %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    340   %my = and <4 x i32> %notmask, %y
    341   %r = or <4 x i32> %mx, %my
    342   ret <4 x i32> %r
    343 }
    344 
    345 define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    346 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
    347 ; CHECK-SSE1:       # %bb.0:
    348 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    349 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    350 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
    351 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
    352 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
    353 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    354 ; CHECK-SSE1-NEXT:    retq
    355 ;
    356 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
    357 ; CHECK-SSE2:       # %bb.0:
    358 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm1
    359 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
    360 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
    361 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    362 ; CHECK-SSE2-NEXT:    retq
    363 ;
    364 ; CHECK-XOP-LABEL: in_constant_mone_vary:
    365 ; CHECK-XOP:       # %bb.0:
    366 ; CHECK-XOP-NEXT:    vmovaps (%rdx), %xmm0
    367 ; CHECK-XOP-NEXT:    vandnps (%rsi), %xmm0, %xmm1
    368 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    369 ; CHECK-XOP-NEXT:    retq
    370   %x = load <4 x i32>, <4 x i32> *%px, align 16
    371   %y = load <4 x i32>, <4 x i32> *%py, align 16
    372   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    373   %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
    374   %n1 = and <4 x i32> %n0, %mask
    375   %r = xor <4 x i32> %n1, %y
    376   ret <4 x i32> %r
    377 }
    378 
    379 ; This is not a canonical form. Testing for completeness only.
    380 define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    381 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
    382 ; CHECK-SSE1:       # %bb.0:
    383 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    384 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
    385 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
    386 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
    387 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    388 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    389 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    390 ; CHECK-SSE1-NEXT:    retq
    391 ;
    392 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
    393 ; CHECK-SSE2:       # %bb.0:
    394 ; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm0
    395 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
    396 ; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
    397 ; CHECK-SSE2-NEXT:    pand (%rsi), %xmm0
    398 ; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
    399 ; CHECK-SSE2-NEXT:    retq
    400 ;
    401 ; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
    402 ; CHECK-XOP:       # %bb.0:
    403 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    404 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    405 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm1
    406 ; CHECK-XOP-NEXT:    vpand (%rsi), %xmm0, %xmm0
    407 ; CHECK-XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
    408 ; CHECK-XOP-NEXT:    retq
    409   %x = load <4 x i32>, <4 x i32> *%px, align 16
    410   %y = load <4 x i32>, <4 x i32> *%py, align 16
    411   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    412   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    413   %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
    414   %my = and <4 x i32> %mask, %y
    415   %r = or <4 x i32> %mx, %my
    416   ret <4 x i32> %r
    417 }
    418 
    419 ; This is not a canonical form. Testing for completeness only.
    420 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    421 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
    422 ; CHECK-SSE1:       # %bb.0:
    423 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    424 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
    425 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
    426 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
    427 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    428 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    429 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    430 ; CHECK-SSE1-NEXT:    retq
    431 ;
    432 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
    433 ; CHECK-SSE2:       # %bb.0:
    434 ; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm0
    435 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
    436 ; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
    437 ; CHECK-SSE2-NEXT:    pand (%rsi), %xmm0
    438 ; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
    439 ; CHECK-SSE2-NEXT:    retq
    440 ;
    441 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
    442 ; CHECK-XOP:       # %bb.0:
    443 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    444 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    445 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm1
    446 ; CHECK-XOP-NEXT:    vpand (%rsi), %xmm0, %xmm0
    447 ; CHECK-XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
    448 ; CHECK-XOP-NEXT:    retq
    449   %x = load <4 x i32>, <4 x i32> *%px, align 16
    450   %y = load <4 x i32>, <4 x i32> *%py, align 16
    451   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    452   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    453   %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
    454   %n1 = and <4 x i32> %n0, %notmask
    455   %r = xor <4 x i32> %n1, %y
    456   ret <4 x i32> %r
    457 }
    458 
    459 define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    460 ; CHECK-SSE1-LABEL: out_constant_42_vary:
    461 ; CHECK-SSE1:       # %bb.0:
    462 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    463 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
    464 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
    465 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
    466 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    467 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    468 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    469 ; CHECK-SSE1-NEXT:    retq
    470 ;
    471 ; CHECK-SSE2-LABEL: out_constant_42_vary:
    472 ; CHECK-SSE2:       # %bb.0:
    473 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    474 ; CHECK-SSE2-NEXT:    movaps {{.*#+}} xmm1 = [42,42,42,42]
    475 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
    476 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
    477 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    478 ; CHECK-SSE2-NEXT:    retq
    479 ;
    480 ; CHECK-XOP-LABEL: out_constant_42_vary:
    481 ; CHECK-XOP:       # %bb.0:
    482 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    483 ; CHECK-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
    484 ; CHECK-XOP-NEXT:    vpcmov %xmm0, (%rsi), %xmm1, %xmm0
    485 ; CHECK-XOP-NEXT:    retq
    486   %x = load <4 x i32>, <4 x i32> *%px, align 16
    487   %y = load <4 x i32>, <4 x i32> *%py, align 16
    488   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    489   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    490   %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
    491   %my = and <4 x i32> %notmask, %y
    492   %r = or <4 x i32> %mx, %my
    493   ret <4 x i32> %r
    494 }
    495 
    496 define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    497 ; CHECK-SSE1-LABEL: in_constant_42_vary:
    498 ; CHECK-SSE1:       # %bb.0:
    499 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    500 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    501 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
    502 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
    503 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    504 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    505 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    506 ; CHECK-SSE1-NEXT:    retq
    507 ;
    508 ; CHECK-SSE2-LABEL: in_constant_42_vary:
    509 ; CHECK-SSE2:       # %bb.0:
    510 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    511 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
    512 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm1
    513 ; CHECK-SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    514 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    515 ; CHECK-SSE2-NEXT:    retq
    516 ;
    517 ; CHECK-XOP-LABEL: in_constant_42_vary:
    518 ; CHECK-XOP:       # %bb.0:
    519 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
    520 ; CHECK-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
    521 ; CHECK-XOP-NEXT:    vpcmov %xmm0, (%rsi), %xmm1, %xmm0
    522 ; CHECK-XOP-NEXT:    retq
    523   %x = load <4 x i32>, <4 x i32> *%px, align 16
    524   %y = load <4 x i32>, <4 x i32> *%py, align 16
    525   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    526   %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
    527   %n1 = and <4 x i32> %n0, %mask
    528   %r = xor <4 x i32> %n1, %y
    529   ret <4 x i32> %r
    530 }
    531 
    532 ; This is not a canonical form. Testing for completeness only.
    533 define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    534 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
    535 ; CHECK-SSE1:       # %bb.0:
    536 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    537 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
    538 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm1
    539 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
    540 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    541 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    542 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    543 ; CHECK-SSE1-NEXT:    retq
    544 ;
    545 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
    546 ; CHECK-SSE2:       # %bb.0:
    547 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    548 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
    549 ; CHECK-SSE2-NEXT:    andnps {{.*}}(%rip), %xmm1
    550 ; CHECK-SSE2-NEXT:    andps (%rsi), %xmm0
    551 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    552 ; CHECK-SSE2-NEXT:    retq
    553 ;
    554 ; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
    555 ; CHECK-XOP:       # %bb.0:
    556 ; CHECK-XOP-NEXT:    vmovdqa (%rsi), %xmm0
    557 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
    558 ; CHECK-XOP-NEXT:    vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
    559 ; CHECK-XOP-NEXT:    retq
    560   %x = load <4 x i32>, <4 x i32> *%px, align 16
    561   %y = load <4 x i32>, <4 x i32> *%py, align 16
    562   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    563   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    564   %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
    565   %my = and <4 x i32> %mask, %y
    566   %r = or <4 x i32> %mx, %my
    567   ret <4 x i32> %r
    568 }
    569 
    570 ; This is not a canonical form. Testing for completeness only.
    571 define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
    572 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
    573 ; CHECK-SSE1:       # %bb.0:
    574 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
    575 ; CHECK-SSE1-NEXT:    movaps (%rdx), %xmm1
    576 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
    577 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
    578 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
    579 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
    580 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    581 ; CHECK-SSE1-NEXT:    retq
    582 ;
    583 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
    584 ; CHECK-SSE2:       # %bb.0:
    585 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
    586 ; CHECK-SSE2-NEXT:    movaps (%rsi), %xmm1
    587 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
    588 ; CHECK-SSE2-NEXT:    andnps {{.*}}(%rip), %xmm0
    589 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
    590 ; CHECK-SSE2-NEXT:    retq
    591 ;
    592 ; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
    593 ; CHECK-XOP:       # %bb.0:
    594 ; CHECK-XOP-NEXT:    vmovdqa (%rsi), %xmm0
    595 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
    596 ; CHECK-XOP-NEXT:    vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
    597 ; CHECK-XOP-NEXT:    retq
    598   %x = load <4 x i32>, <4 x i32> *%px, align 16
    599   %y = load <4 x i32>, <4 x i32> *%py, align 16
    600   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
    601   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
    602   %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
    603   %n1 = and <4 x i32> %n0, %notmask
    604   %r = xor <4 x i32> %n1, %y
    605   ret <4 x i32> %r
    606 }
    607