Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE
      3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
      4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
      5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
      6 
      7 ; https://bugs.llvm.org/show_bug.cgi?id=37104
      8 
      9 ; All the advanced stuff (negative tests, commutativity) is handled in the
     10 ; scalar version of the test only.
     11 
     12 ; ============================================================================ ;
     13 ; 8-bit vector width
     14 ; ============================================================================ ;
     15 
     16 define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
     17 ; CHECK-LABEL: out_v1i8:
     18 ; CHECK:       # %bb.0:
     19 ; CHECK-NEXT:    andl %edx, %edi
     20 ; CHECK-NEXT:    notb %dl
     21 ; CHECK-NEXT:    andb %sil, %dl
     22 ; CHECK-NEXT:    orb %dil, %dl
     23 ; CHECK-NEXT:    movl %edx, %eax
     24 ; CHECK-NEXT:    retq
     25   %mx = and <1 x i8> %x, %mask
     26   %notmask = xor <1 x i8> %mask, <i8 -1>
     27   %my = and <1 x i8> %y, %notmask
     28   %r = or <1 x i8> %mx, %my
     29   ret <1 x i8> %r
     30 }
     31 
     32 ; ============================================================================ ;
     33 ; 16-bit vector width
     34 ; ============================================================================ ;
     35 
     36 define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
     37 ; CHECK-BASELINE-LABEL: out_v2i8:
     38 ; CHECK-BASELINE:       # %bb.0:
     39 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
     40 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
     41 ; CHECK-BASELINE-NEXT:    notb %r8b
     42 ; CHECK-BASELINE-NEXT:    notb %r9b
     43 ; CHECK-BASELINE-NEXT:    andb %cl, %r9b
     44 ; CHECK-BASELINE-NEXT:    andb %dl, %r8b
     45 ; CHECK-BASELINE-NEXT:    orb %dil, %r8b
     46 ; CHECK-BASELINE-NEXT:    orb %sil, %r9b
     47 ; CHECK-BASELINE-NEXT:    movl %r8d, %eax
     48 ; CHECK-BASELINE-NEXT:    movl %r9d, %edx
     49 ; CHECK-BASELINE-NEXT:    retq
     50 ;
     51 ; CHECK-SSE1-LABEL: out_v2i8:
     52 ; CHECK-SSE1:       # %bb.0:
     53 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
     54 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
     55 ; CHECK-SSE1-NEXT:    notb %r8b
     56 ; CHECK-SSE1-NEXT:    notb %r9b
     57 ; CHECK-SSE1-NEXT:    andb %cl, %r9b
     58 ; CHECK-SSE1-NEXT:    andb %dl, %r8b
     59 ; CHECK-SSE1-NEXT:    orb %dil, %r8b
     60 ; CHECK-SSE1-NEXT:    orb %sil, %r9b
     61 ; CHECK-SSE1-NEXT:    movl %r8d, %eax
     62 ; CHECK-SSE1-NEXT:    movl %r9d, %edx
     63 ; CHECK-SSE1-NEXT:    retq
     64 ;
     65 ; CHECK-SSE2-LABEL: out_v2i8:
     66 ; CHECK-SSE2:       # %bb.0:
     67 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
     68 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
     69 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
     70 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
     71 ; CHECK-SSE2-NEXT:    retq
     72 ;
     73 ; CHECK-XOP-LABEL: out_v2i8:
     74 ; CHECK-XOP:       # %bb.0:
     75 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
     76 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
     77 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
     78 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
     79 ; CHECK-XOP-NEXT:    retq
     80   %mx = and <2 x i8> %x, %mask
     81   %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1>
     82   %my = and <2 x i8> %y, %notmask
     83   %r = or <2 x i8> %mx, %my
     84   ret <2 x i8> %r
     85 }
     86 
     87 define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
     88 ; CHECK-LABEL: out_v1i16:
     89 ; CHECK:       # %bb.0:
     90 ; CHECK-NEXT:    andl %edx, %edi
     91 ; CHECK-NEXT:    notl %edx
     92 ; CHECK-NEXT:    andl %esi, %edx
     93 ; CHECK-NEXT:    orl %edi, %edx
     94 ; CHECK-NEXT:    movl %edx, %eax
     95 ; CHECK-NEXT:    retq
     96   %mx = and <1 x i16> %x, %mask
     97   %notmask = xor <1 x i16> %mask, <i16 -1>
     98   %my = and <1 x i16> %y, %notmask
     99   %r = or <1 x i16> %mx, %my
    100   ret <1 x i16> %r
    101 }
    102 
    103 ; ============================================================================ ;
    104 ; 32-bit vector width
    105 ; ============================================================================ ;
    106 
    107 define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
    108 ; CHECK-BASELINE-LABEL: out_v4i8:
    109 ; CHECK-BASELINE:       # %bb.0:
    110 ; CHECK-BASELINE-NEXT:    pushq %rbx
    111 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    112 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    113 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    114 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    115 ; CHECK-BASELINE-NEXT:    andb %bl, %r8b
    116 ; CHECK-BASELINE-NEXT:    andb %al, %cl
    117 ; CHECK-BASELINE-NEXT:    andb %r11b, %dl
    118 ; CHECK-BASELINE-NEXT:    andb %r10b, %sil
    119 ; CHECK-BASELINE-NEXT:    notb %r11b
    120 ; CHECK-BASELINE-NEXT:    notb %al
    121 ; CHECK-BASELINE-NEXT:    notb %bl
    122 ; CHECK-BASELINE-NEXT:    notb %r10b
    123 ; CHECK-BASELINE-NEXT:    andb %r9b, %r10b
    124 ; CHECK-BASELINE-NEXT:    orb %sil, %r10b
    125 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    126 ; CHECK-BASELINE-NEXT:    orb %r8b, %bl
    127 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    128 ; CHECK-BASELINE-NEXT:    orb %cl, %al
    129 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    130 ; CHECK-BASELINE-NEXT:    orb %dl, %r11b
    131 ; CHECK-BASELINE-NEXT:    movb %bl, 3(%rdi)
    132 ; CHECK-BASELINE-NEXT:    movb %al, 2(%rdi)
    133 ; CHECK-BASELINE-NEXT:    movb %r11b, 1(%rdi)
    134 ; CHECK-BASELINE-NEXT:    movb %r10b, (%rdi)
    135 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    136 ; CHECK-BASELINE-NEXT:    popq %rbx
    137 ; CHECK-BASELINE-NEXT:    retq
    138 ;
    139 ; CHECK-SSE1-LABEL: out_v4i8:
    140 ; CHECK-SSE1:       # %bb.0:
    141 ; CHECK-SSE1-NEXT:    pushq %rbx
    142 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    143 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    144 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    145 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    146 ; CHECK-SSE1-NEXT:    andb %bl, %r8b
    147 ; CHECK-SSE1-NEXT:    andb %al, %cl
    148 ; CHECK-SSE1-NEXT:    andb %r11b, %dl
    149 ; CHECK-SSE1-NEXT:    andb %r10b, %sil
    150 ; CHECK-SSE1-NEXT:    notb %r11b
    151 ; CHECK-SSE1-NEXT:    notb %al
    152 ; CHECK-SSE1-NEXT:    notb %bl
    153 ; CHECK-SSE1-NEXT:    notb %r10b
    154 ; CHECK-SSE1-NEXT:    andb %r9b, %r10b
    155 ; CHECK-SSE1-NEXT:    orb %sil, %r10b
    156 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    157 ; CHECK-SSE1-NEXT:    orb %r8b, %bl
    158 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    159 ; CHECK-SSE1-NEXT:    orb %cl, %al
    160 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    161 ; CHECK-SSE1-NEXT:    orb %dl, %r11b
    162 ; CHECK-SSE1-NEXT:    movb %bl, 3(%rdi)
    163 ; CHECK-SSE1-NEXT:    movb %al, 2(%rdi)
    164 ; CHECK-SSE1-NEXT:    movb %r11b, 1(%rdi)
    165 ; CHECK-SSE1-NEXT:    movb %r10b, (%rdi)
    166 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    167 ; CHECK-SSE1-NEXT:    popq %rbx
    168 ; CHECK-SSE1-NEXT:    retq
    169 ;
    170 ; CHECK-SSE2-LABEL: out_v4i8:
    171 ; CHECK-SSE2:       # %bb.0:
    172 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    173 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    174 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    175 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    176 ; CHECK-SSE2-NEXT:    retq
    177 ;
    178 ; CHECK-XOP-LABEL: out_v4i8:
    179 ; CHECK-XOP:       # %bb.0:
    180 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    181 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    182 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    183 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    184 ; CHECK-XOP-NEXT:    retq
    185   %mx = and <4 x i8> %x, %mask
    186   %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
    187   %my = and <4 x i8> %y, %notmask
    188   %r = or <4 x i8> %mx, %my
    189   ret <4 x i8> %r
    190 }
    191 
    192 define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
    193 ; CHECK-BASELINE-LABEL: out_v4i8_undef:
    194 ; CHECK-BASELINE:       # %bb.0:
    195 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    196 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    197 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    198 ; CHECK-BASELINE-NEXT:    andb %al, %r8b
    199 ; CHECK-BASELINE-NEXT:    andb %r11b, %dl
    200 ; CHECK-BASELINE-NEXT:    andb %r10b, %sil
    201 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    202 ; CHECK-BASELINE-NEXT:    notb %r11b
    203 ; CHECK-BASELINE-NEXT:    notb %al
    204 ; CHECK-BASELINE-NEXT:    notb %r10b
    205 ; CHECK-BASELINE-NEXT:    andb %r9b, %r10b
    206 ; CHECK-BASELINE-NEXT:    orb %sil, %r10b
    207 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    208 ; CHECK-BASELINE-NEXT:    orb %r8b, %al
    209 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    210 ; CHECK-BASELINE-NEXT:    orb %dl, %r11b
    211 ; CHECK-BASELINE-NEXT:    movb %cl, 2(%rdi)
    212 ; CHECK-BASELINE-NEXT:    movb %al, 3(%rdi)
    213 ; CHECK-BASELINE-NEXT:    movb %r11b, 1(%rdi)
    214 ; CHECK-BASELINE-NEXT:    movb %r10b, (%rdi)
    215 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    216 ; CHECK-BASELINE-NEXT:    retq
    217 ;
    218 ; CHECK-SSE1-LABEL: out_v4i8_undef:
    219 ; CHECK-SSE1:       # %bb.0:
    220 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    221 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    222 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    223 ; CHECK-SSE1-NEXT:    andb %al, %r8b
    224 ; CHECK-SSE1-NEXT:    andb %r11b, %dl
    225 ; CHECK-SSE1-NEXT:    andb %r10b, %sil
    226 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    227 ; CHECK-SSE1-NEXT:    notb %r11b
    228 ; CHECK-SSE1-NEXT:    notb %al
    229 ; CHECK-SSE1-NEXT:    notb %r10b
    230 ; CHECK-SSE1-NEXT:    andb %r9b, %r10b
    231 ; CHECK-SSE1-NEXT:    orb %sil, %r10b
    232 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    233 ; CHECK-SSE1-NEXT:    orb %r8b, %al
    234 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    235 ; CHECK-SSE1-NEXT:    orb %dl, %r11b
    236 ; CHECK-SSE1-NEXT:    movb %cl, 2(%rdi)
    237 ; CHECK-SSE1-NEXT:    movb %al, 3(%rdi)
    238 ; CHECK-SSE1-NEXT:    movb %r11b, 1(%rdi)
    239 ; CHECK-SSE1-NEXT:    movb %r10b, (%rdi)
    240 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    241 ; CHECK-SSE1-NEXT:    retq
    242 ;
    243 ; CHECK-SSE2-LABEL: out_v4i8_undef:
    244 ; CHECK-SSE2:       # %bb.0:
    245 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    246 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    247 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    248 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    249 ; CHECK-SSE2-NEXT:    retq
    250 ;
    251 ; CHECK-XOP-LABEL: out_v4i8_undef:
    252 ; CHECK-XOP:       # %bb.0:
    253 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    254 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    255 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    256 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    257 ; CHECK-XOP-NEXT:    retq
    258   %mx = and <4 x i8> %x, %mask
    259   %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
    260   %my = and <4 x i8> %y, %notmask
    261   %r = or <4 x i8> %mx, %my
    262   ret <4 x i8> %r
    263 }
    264 
    265 define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
    266 ; CHECK-BASELINE-LABEL: out_v2i16:
    267 ; CHECK-BASELINE:       # %bb.0:
    268 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
    269 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
    270 ; CHECK-BASELINE-NEXT:    notl %r8d
    271 ; CHECK-BASELINE-NEXT:    notl %r9d
    272 ; CHECK-BASELINE-NEXT:    andl %ecx, %r9d
    273 ; CHECK-BASELINE-NEXT:    orl %esi, %r9d
    274 ; CHECK-BASELINE-NEXT:    andl %edx, %r8d
    275 ; CHECK-BASELINE-NEXT:    orl %edi, %r8d
    276 ; CHECK-BASELINE-NEXT:    movl %r8d, %eax
    277 ; CHECK-BASELINE-NEXT:    movl %r9d, %edx
    278 ; CHECK-BASELINE-NEXT:    retq
    279 ;
    280 ; CHECK-SSE1-LABEL: out_v2i16:
    281 ; CHECK-SSE1:       # %bb.0:
    282 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
    283 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
    284 ; CHECK-SSE1-NEXT:    notl %r8d
    285 ; CHECK-SSE1-NEXT:    notl %r9d
    286 ; CHECK-SSE1-NEXT:    andl %ecx, %r9d
    287 ; CHECK-SSE1-NEXT:    orl %esi, %r9d
    288 ; CHECK-SSE1-NEXT:    andl %edx, %r8d
    289 ; CHECK-SSE1-NEXT:    orl %edi, %r8d
    290 ; CHECK-SSE1-NEXT:    movl %r8d, %eax
    291 ; CHECK-SSE1-NEXT:    movl %r9d, %edx
    292 ; CHECK-SSE1-NEXT:    retq
    293 ;
    294 ; CHECK-SSE2-LABEL: out_v2i16:
    295 ; CHECK-SSE2:       # %bb.0:
    296 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    297 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    298 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    299 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    300 ; CHECK-SSE2-NEXT:    retq
    301 ;
    302 ; CHECK-XOP-LABEL: out_v2i16:
    303 ; CHECK-XOP:       # %bb.0:
    304 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    305 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    306 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    307 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    308 ; CHECK-XOP-NEXT:    retq
    309   %mx = and <2 x i16> %x, %mask
    310   %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>
    311   %my = and <2 x i16> %y, %notmask
    312   %r = or <2 x i16> %mx, %my
    313   ret <2 x i16> %r
    314 }
    315 
    316 define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
    317 ; CHECK-LABEL: out_v1i32:
    318 ; CHECK:       # %bb.0:
    319 ; CHECK-NEXT:    andl %edx, %edi
    320 ; CHECK-NEXT:    notl %edx
    321 ; CHECK-NEXT:    andl %esi, %edx
    322 ; CHECK-NEXT:    orl %edi, %edx
    323 ; CHECK-NEXT:    movl %edx, %eax
    324 ; CHECK-NEXT:    retq
    325   %mx = and <1 x i32> %x, %mask
    326   %notmask = xor <1 x i32> %mask, <i32 -1>
    327   %my = and <1 x i32> %y, %notmask
    328   %r = or <1 x i32> %mx, %my
    329   ret <1 x i32> %r
    330 }
    331 
    332 ; ============================================================================ ;
    333 ; 64-bit vector width
    334 ; ============================================================================ ;
    335 
    336 define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
    337 ; CHECK-BASELINE-LABEL: out_v8i8:
    338 ; CHECK-BASELINE:       # %bb.0:
    339 ; CHECK-BASELINE-NEXT:    pushq %rbp
    340 ; CHECK-BASELINE-NEXT:    pushq %r15
    341 ; CHECK-BASELINE-NEXT:    pushq %r14
    342 ; CHECK-BASELINE-NEXT:    pushq %r13
    343 ; CHECK-BASELINE-NEXT:    pushq %r12
    344 ; CHECK-BASELINE-NEXT:    pushq %rbx
    345 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
    346 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
    347 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
    348 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    349 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    350 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
    351 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    352 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    353 ; CHECK-BASELINE-NEXT:    andb %al, %r9b
    354 ; CHECK-BASELINE-NEXT:    andb %bl, %r8b
    355 ; CHECK-BASELINE-NEXT:    andb %r14b, %cl
    356 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    357 ; CHECK-BASELINE-NEXT:    andb %r11b, %dl
    358 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    359 ; CHECK-BASELINE-NEXT:    andb %r10b, %sil
    360 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
    361 ; CHECK-BASELINE-NEXT:    andb %r12b, %r13b
    362 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    363 ; CHECK-BASELINE-NEXT:    andb %r15b, %cl
    364 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
    365 ; CHECK-BASELINE-NEXT:    andb %bpl, %dl
    366 ; CHECK-BASELINE-NEXT:    notb %r10b
    367 ; CHECK-BASELINE-NEXT:    notb %r11b
    368 ; CHECK-BASELINE-NEXT:    notb %r14b
    369 ; CHECK-BASELINE-NEXT:    notb %bl
    370 ; CHECK-BASELINE-NEXT:    notb %al
    371 ; CHECK-BASELINE-NEXT:    notb %bpl
    372 ; CHECK-BASELINE-NEXT:    notb %r15b
    373 ; CHECK-BASELINE-NEXT:    notb %r12b
    374 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
    375 ; CHECK-BASELINE-NEXT:    orb %r13b, %r12b
    376 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
    377 ; CHECK-BASELINE-NEXT:    orb %cl, %r15b
    378 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
    379 ; CHECK-BASELINE-NEXT:    orb %dl, %bpl
    380 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    381 ; CHECK-BASELINE-NEXT:    orb %r9b, %al
    382 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    383 ; CHECK-BASELINE-NEXT:    orb %r8b, %bl
    384 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
    385 ; CHECK-BASELINE-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload
    386 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    387 ; CHECK-BASELINE-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload
    388 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
    389 ; CHECK-BASELINE-NEXT:    orb %sil, %r10b
    390 ; CHECK-BASELINE-NEXT:    movb %r12b, 7(%rdi)
    391 ; CHECK-BASELINE-NEXT:    movb %r15b, 6(%rdi)
    392 ; CHECK-BASELINE-NEXT:    movb %bpl, 5(%rdi)
    393 ; CHECK-BASELINE-NEXT:    movb %al, 4(%rdi)
    394 ; CHECK-BASELINE-NEXT:    movb %bl, 3(%rdi)
    395 ; CHECK-BASELINE-NEXT:    movb %r14b, 2(%rdi)
    396 ; CHECK-BASELINE-NEXT:    movb %r11b, 1(%rdi)
    397 ; CHECK-BASELINE-NEXT:    movb %r10b, (%rdi)
    398 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    399 ; CHECK-BASELINE-NEXT:    popq %rbx
    400 ; CHECK-BASELINE-NEXT:    popq %r12
    401 ; CHECK-BASELINE-NEXT:    popq %r13
    402 ; CHECK-BASELINE-NEXT:    popq %r14
    403 ; CHECK-BASELINE-NEXT:    popq %r15
    404 ; CHECK-BASELINE-NEXT:    popq %rbp
    405 ; CHECK-BASELINE-NEXT:    retq
    406 ;
    407 ; CHECK-SSE1-LABEL: out_v8i8:
    408 ; CHECK-SSE1:       # %bb.0:
    409 ; CHECK-SSE1-NEXT:    pushq %rbp
    410 ; CHECK-SSE1-NEXT:    pushq %r15
    411 ; CHECK-SSE1-NEXT:    pushq %r14
    412 ; CHECK-SSE1-NEXT:    pushq %r13
    413 ; CHECK-SSE1-NEXT:    pushq %r12
    414 ; CHECK-SSE1-NEXT:    pushq %rbx
    415 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
    416 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
    417 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
    418 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    419 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    420 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
    421 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    422 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    423 ; CHECK-SSE1-NEXT:    andb %al, %r9b
    424 ; CHECK-SSE1-NEXT:    andb %bl, %r8b
    425 ; CHECK-SSE1-NEXT:    andb %r14b, %cl
    426 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    427 ; CHECK-SSE1-NEXT:    andb %r11b, %dl
    428 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    429 ; CHECK-SSE1-NEXT:    andb %r10b, %sil
    430 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
    431 ; CHECK-SSE1-NEXT:    andb %r12b, %r13b
    432 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    433 ; CHECK-SSE1-NEXT:    andb %r15b, %cl
    434 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
    435 ; CHECK-SSE1-NEXT:    andb %bpl, %dl
    436 ; CHECK-SSE1-NEXT:    notb %r10b
    437 ; CHECK-SSE1-NEXT:    notb %r11b
    438 ; CHECK-SSE1-NEXT:    notb %r14b
    439 ; CHECK-SSE1-NEXT:    notb %bl
    440 ; CHECK-SSE1-NEXT:    notb %al
    441 ; CHECK-SSE1-NEXT:    notb %bpl
    442 ; CHECK-SSE1-NEXT:    notb %r15b
    443 ; CHECK-SSE1-NEXT:    notb %r12b
    444 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
    445 ; CHECK-SSE1-NEXT:    orb %r13b, %r12b
    446 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
    447 ; CHECK-SSE1-NEXT:    orb %cl, %r15b
    448 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
    449 ; CHECK-SSE1-NEXT:    orb %dl, %bpl
    450 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    451 ; CHECK-SSE1-NEXT:    orb %r9b, %al
    452 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    453 ; CHECK-SSE1-NEXT:    orb %r8b, %bl
    454 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
    455 ; CHECK-SSE1-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload
    456 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    457 ; CHECK-SSE1-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload
    458 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
    459 ; CHECK-SSE1-NEXT:    orb %sil, %r10b
    460 ; CHECK-SSE1-NEXT:    movb %r12b, 7(%rdi)
    461 ; CHECK-SSE1-NEXT:    movb %r15b, 6(%rdi)
    462 ; CHECK-SSE1-NEXT:    movb %bpl, 5(%rdi)
    463 ; CHECK-SSE1-NEXT:    movb %al, 4(%rdi)
    464 ; CHECK-SSE1-NEXT:    movb %bl, 3(%rdi)
    465 ; CHECK-SSE1-NEXT:    movb %r14b, 2(%rdi)
    466 ; CHECK-SSE1-NEXT:    movb %r11b, 1(%rdi)
    467 ; CHECK-SSE1-NEXT:    movb %r10b, (%rdi)
    468 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    469 ; CHECK-SSE1-NEXT:    popq %rbx
    470 ; CHECK-SSE1-NEXT:    popq %r12
    471 ; CHECK-SSE1-NEXT:    popq %r13
    472 ; CHECK-SSE1-NEXT:    popq %r14
    473 ; CHECK-SSE1-NEXT:    popq %r15
    474 ; CHECK-SSE1-NEXT:    popq %rbp
    475 ; CHECK-SSE1-NEXT:    retq
    476 ;
    477 ; CHECK-SSE2-LABEL: out_v8i8:
    478 ; CHECK-SSE2:       # %bb.0:
    479 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    480 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    481 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    482 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    483 ; CHECK-SSE2-NEXT:    retq
    484 ;
    485 ; CHECK-XOP-LABEL: out_v8i8:
    486 ; CHECK-XOP:       # %bb.0:
    487 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    488 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    489 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    490 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    491 ; CHECK-XOP-NEXT:    retq
    492   %mx = and <8 x i8> %x, %mask
    493   %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
    494   %my = and <8 x i8> %y, %notmask
    495   %r = or <8 x i8> %mx, %my
    496   ret <8 x i8> %r
    497 }
    498 
    499 define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
    500 ; CHECK-BASELINE-LABEL: out_v4i16:
    501 ; CHECK-BASELINE:       # %bb.0:
    502 ; CHECK-BASELINE-NEXT:    pushq %rbx
    503 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
    504 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
    505 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    506 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
    507 ; CHECK-BASELINE-NEXT:    andl %ebx, %esi
    508 ; CHECK-BASELINE-NEXT:    andl %eax, %r8d
    509 ; CHECK-BASELINE-NEXT:    andl %r11d, %ecx
    510 ; CHECK-BASELINE-NEXT:    andl %r10d, %edx
    511 ; CHECK-BASELINE-NEXT:    notl %r10d
    512 ; CHECK-BASELINE-NEXT:    notl %r11d
    513 ; CHECK-BASELINE-NEXT:    notl %eax
    514 ; CHECK-BASELINE-NEXT:    notl %ebx
    515 ; CHECK-BASELINE-NEXT:    andl %r9d, %ebx
    516 ; CHECK-BASELINE-NEXT:    orl %esi, %ebx
    517 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %ax
    518 ; CHECK-BASELINE-NEXT:    orl %r8d, %eax
    519 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
    520 ; CHECK-BASELINE-NEXT:    orl %ecx, %r11d
    521 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
    522 ; CHECK-BASELINE-NEXT:    orl %edx, %r10d
    523 ; CHECK-BASELINE-NEXT:    movw %bx, (%rdi)
    524 ; CHECK-BASELINE-NEXT:    movw %ax, 6(%rdi)
    525 ; CHECK-BASELINE-NEXT:    movw %r11w, 4(%rdi)
    526 ; CHECK-BASELINE-NEXT:    movw %r10w, 2(%rdi)
    527 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    528 ; CHECK-BASELINE-NEXT:    popq %rbx
    529 ; CHECK-BASELINE-NEXT:    retq
    530 ;
    531 ; CHECK-SSE1-LABEL: out_v4i16:
    532 ; CHECK-SSE1:       # %bb.0:
    533 ; CHECK-SSE1-NEXT:    pushq %rbx
    534 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
    535 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
    536 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    537 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
    538 ; CHECK-SSE1-NEXT:    andl %ebx, %esi
    539 ; CHECK-SSE1-NEXT:    andl %eax, %r8d
    540 ; CHECK-SSE1-NEXT:    andl %r11d, %ecx
    541 ; CHECK-SSE1-NEXT:    andl %r10d, %edx
    542 ; CHECK-SSE1-NEXT:    notl %r10d
    543 ; CHECK-SSE1-NEXT:    notl %r11d
    544 ; CHECK-SSE1-NEXT:    notl %eax
    545 ; CHECK-SSE1-NEXT:    notl %ebx
    546 ; CHECK-SSE1-NEXT:    andl %r9d, %ebx
    547 ; CHECK-SSE1-NEXT:    orl %esi, %ebx
    548 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %ax
    549 ; CHECK-SSE1-NEXT:    orl %r8d, %eax
    550 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
    551 ; CHECK-SSE1-NEXT:    orl %ecx, %r11d
    552 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
    553 ; CHECK-SSE1-NEXT:    orl %edx, %r10d
    554 ; CHECK-SSE1-NEXT:    movw %bx, (%rdi)
    555 ; CHECK-SSE1-NEXT:    movw %ax, 6(%rdi)
    556 ; CHECK-SSE1-NEXT:    movw %r11w, 4(%rdi)
    557 ; CHECK-SSE1-NEXT:    movw %r10w, 2(%rdi)
    558 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    559 ; CHECK-SSE1-NEXT:    popq %rbx
    560 ; CHECK-SSE1-NEXT:    retq
    561 ;
    562 ; CHECK-SSE2-LABEL: out_v4i16:
    563 ; CHECK-SSE2:       # %bb.0:
    564 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    565 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    566 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    567 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    568 ; CHECK-SSE2-NEXT:    retq
    569 ;
    570 ; CHECK-XOP-LABEL: out_v4i16:
    571 ; CHECK-XOP:       # %bb.0:
    572 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    573 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    574 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    575 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    576 ; CHECK-XOP-NEXT:    retq
    577   %mx = and <4 x i16> %x, %mask
    578   %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1>
    579   %my = and <4 x i16> %y, %notmask
    580   %r = or <4 x i16> %mx, %my
    581   ret <4 x i16> %r
    582 }
    583 
    584 define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
    585 ; CHECK-BASELINE-LABEL: out_v4i16_undef:
    586 ; CHECK-BASELINE:       # %bb.0:
    587 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
    588 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
    589 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    590 ; CHECK-BASELINE-NEXT:    andl %eax, %esi
    591 ; CHECK-BASELINE-NEXT:    andl %r11d, %r8d
    592 ; CHECK-BASELINE-NEXT:    andl %r10d, %edx
    593 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
    594 ; CHECK-BASELINE-NEXT:    notl %r10d
    595 ; CHECK-BASELINE-NEXT:    notl %r11d
    596 ; CHECK-BASELINE-NEXT:    notl %eax
    597 ; CHECK-BASELINE-NEXT:    andl %r9d, %eax
    598 ; CHECK-BASELINE-NEXT:    orl %esi, %eax
    599 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
    600 ; CHECK-BASELINE-NEXT:    orl %r8d, %r11d
    601 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
    602 ; CHECK-BASELINE-NEXT:    orl %edx, %r10d
    603 ; CHECK-BASELINE-NEXT:    movw %cx, 4(%rdi)
    604 ; CHECK-BASELINE-NEXT:    movw %ax, (%rdi)
    605 ; CHECK-BASELINE-NEXT:    movw %r11w, 6(%rdi)
    606 ; CHECK-BASELINE-NEXT:    movw %r10w, 2(%rdi)
    607 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    608 ; CHECK-BASELINE-NEXT:    retq
    609 ;
    610 ; CHECK-SSE1-LABEL: out_v4i16_undef:
    611 ; CHECK-SSE1:       # %bb.0:
    612 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
    613 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
    614 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    615 ; CHECK-SSE1-NEXT:    andl %eax, %esi
    616 ; CHECK-SSE1-NEXT:    andl %r11d, %r8d
    617 ; CHECK-SSE1-NEXT:    andl %r10d, %edx
    618 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
    619 ; CHECK-SSE1-NEXT:    notl %r10d
    620 ; CHECK-SSE1-NEXT:    notl %r11d
    621 ; CHECK-SSE1-NEXT:    notl %eax
    622 ; CHECK-SSE1-NEXT:    andl %r9d, %eax
    623 ; CHECK-SSE1-NEXT:    orl %esi, %eax
    624 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
    625 ; CHECK-SSE1-NEXT:    orl %r8d, %r11d
    626 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
    627 ; CHECK-SSE1-NEXT:    orl %edx, %r10d
    628 ; CHECK-SSE1-NEXT:    movw %cx, 4(%rdi)
    629 ; CHECK-SSE1-NEXT:    movw %ax, (%rdi)
    630 ; CHECK-SSE1-NEXT:    movw %r11w, 6(%rdi)
    631 ; CHECK-SSE1-NEXT:    movw %r10w, 2(%rdi)
    632 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    633 ; CHECK-SSE1-NEXT:    retq
    634 ;
    635 ; CHECK-SSE2-LABEL: out_v4i16_undef:
    636 ; CHECK-SSE2:       # %bb.0:
    637 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    638 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    639 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    640 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    641 ; CHECK-SSE2-NEXT:    retq
    642 ;
    643 ; CHECK-XOP-LABEL: out_v4i16_undef:
    644 ; CHECK-XOP:       # %bb.0:
    645 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    646 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    647 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    648 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    649 ; CHECK-XOP-NEXT:    retq
    650   %mx = and <4 x i16> %x, %mask
    651   %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1>
    652   %my = and <4 x i16> %y, %notmask
    653   %r = or <4 x i16> %mx, %my
    654   ret <4 x i16> %r
    655 }
    656 
    657 define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
    658 ; CHECK-BASELINE-LABEL: out_v2i32:
    659 ; CHECK-BASELINE:       # %bb.0:
    660 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
    661 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
    662 ; CHECK-BASELINE-NEXT:    notl %r8d
    663 ; CHECK-BASELINE-NEXT:    notl %r9d
    664 ; CHECK-BASELINE-NEXT:    andl %ecx, %r9d
    665 ; CHECK-BASELINE-NEXT:    orl %esi, %r9d
    666 ; CHECK-BASELINE-NEXT:    andl %edx, %r8d
    667 ; CHECK-BASELINE-NEXT:    orl %edi, %r8d
    668 ; CHECK-BASELINE-NEXT:    movl %r8d, %eax
    669 ; CHECK-BASELINE-NEXT:    movl %r9d, %edx
    670 ; CHECK-BASELINE-NEXT:    retq
    671 ;
    672 ; CHECK-SSE1-LABEL: out_v2i32:
    673 ; CHECK-SSE1:       # %bb.0:
    674 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
    675 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
    676 ; CHECK-SSE1-NEXT:    notl %r8d
    677 ; CHECK-SSE1-NEXT:    notl %r9d
    678 ; CHECK-SSE1-NEXT:    andl %ecx, %r9d
    679 ; CHECK-SSE1-NEXT:    orl %esi, %r9d
    680 ; CHECK-SSE1-NEXT:    andl %edx, %r8d
    681 ; CHECK-SSE1-NEXT:    orl %edi, %r8d
    682 ; CHECK-SSE1-NEXT:    movl %r8d, %eax
    683 ; CHECK-SSE1-NEXT:    movl %r9d, %edx
    684 ; CHECK-SSE1-NEXT:    retq
    685 ;
    686 ; CHECK-SSE2-LABEL: out_v2i32:
    687 ; CHECK-SSE2:       # %bb.0:
    688 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    689 ; CHECK-SSE2-NEXT:    xorps {{.*}}(%rip), %xmm2
    690 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
    691 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    692 ; CHECK-SSE2-NEXT:    retq
    693 ;
    694 ; CHECK-XOP-LABEL: out_v2i32:
    695 ; CHECK-XOP:       # %bb.0:
    696 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm0, %xmm0
    697 ; CHECK-XOP-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
    698 ; CHECK-XOP-NEXT:    vandps %xmm2, %xmm1, %xmm1
    699 ; CHECK-XOP-NEXT:    vorps %xmm1, %xmm0, %xmm0
    700 ; CHECK-XOP-NEXT:    retq
    701   %mx = and <2 x i32> %x, %mask
    702   %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1>
    703   %my = and <2 x i32> %y, %notmask
    704   %r = or <2 x i32> %mx, %my
    705   ret <2 x i32> %r
    706 }
    707 
    708 define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
    709 ; CHECK-LABEL: out_v1i64:
    710 ; CHECK:       # %bb.0:
    711 ; CHECK-NEXT:    andq %rdx, %rdi
    712 ; CHECK-NEXT:    notq %rdx
    713 ; CHECK-NEXT:    andq %rsi, %rdx
    714 ; CHECK-NEXT:    orq %rdi, %rdx
    715 ; CHECK-NEXT:    movq %rdx, %rax
    716 ; CHECK-NEXT:    retq
    717   %mx = and <1 x i64> %x, %mask
    718   %notmask = xor <1 x i64> %mask, <i64 -1>
    719   %my = and <1 x i64> %y, %notmask
    720   %r = or <1 x i64> %mx, %my
    721   ret <1 x i64> %r
    722 }
    723 
    724 ; ============================================================================ ;
    725 ; 128-bit vector width
    726 ; ============================================================================ ;
    727 
    728 define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
    729 ; CHECK-BASELINE-LABEL: out_v16i8:
    730 ; CHECK-BASELINE:       # %bb.0:
    731 ; CHECK-BASELINE-NEXT:    pushq %rbp
    732 ; CHECK-BASELINE-NEXT:    pushq %r15
    733 ; CHECK-BASELINE-NEXT:    pushq %r14
    734 ; CHECK-BASELINE-NEXT:    pushq %r13
    735 ; CHECK-BASELINE-NEXT:    pushq %r12
    736 ; CHECK-BASELINE-NEXT:    pushq %rbx
    737 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    738 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    739 ; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    740 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    741 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    742 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
    743 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
    744 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
    745 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
    746 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
    747 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    748 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
    749 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    750 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    751 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    752 ; CHECK-BASELINE-NEXT:    andb %al, %sil
    753 ; CHECK-BASELINE-NEXT:    notb %al
    754 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    755 ; CHECK-BASELINE-NEXT:    orb %sil, %al
    756 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    757 ; CHECK-BASELINE-NEXT:    andb %cl, %sil
    758 ; CHECK-BASELINE-NEXT:    notb %cl
    759 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    760 ; CHECK-BASELINE-NEXT:    orb %sil, %cl
    761 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    762 ; CHECK-BASELINE-NEXT:    andb %dl, %sil
    763 ; CHECK-BASELINE-NEXT:    notb %dl
    764 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
    765 ; CHECK-BASELINE-NEXT:    orb %sil, %dl
    766 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    767 ; CHECK-BASELINE-NEXT:    andb %bl, %sil
    768 ; CHECK-BASELINE-NEXT:    notb %bl
    769 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    770 ; CHECK-BASELINE-NEXT:    orb %sil, %bl
    771 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    772 ; CHECK-BASELINE-NEXT:    andb %r13b, %sil
    773 ; CHECK-BASELINE-NEXT:    notb %r13b
    774 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
    775 ; CHECK-BASELINE-NEXT:    orb %sil, %r13b
    776 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    777 ; CHECK-BASELINE-NEXT:    andb %r12b, %sil
    778 ; CHECK-BASELINE-NEXT:    notb %r12b
    779 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
    780 ; CHECK-BASELINE-NEXT:    orb %sil, %r12b
    781 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    782 ; CHECK-BASELINE-NEXT:    andb %r15b, %sil
    783 ; CHECK-BASELINE-NEXT:    notb %r15b
    784 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
    785 ; CHECK-BASELINE-NEXT:    orb %sil, %r15b
    786 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    787 ; CHECK-BASELINE-NEXT:    andb %r14b, %sil
    788 ; CHECK-BASELINE-NEXT:    notb %r14b
    789 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
    790 ; CHECK-BASELINE-NEXT:    orb %sil, %r14b
    791 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    792 ; CHECK-BASELINE-NEXT:    andb %bpl, %sil
    793 ; CHECK-BASELINE-NEXT:    notb %bpl
    794 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
    795 ; CHECK-BASELINE-NEXT:    orb %sil, %bpl
    796 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    797 ; CHECK-BASELINE-NEXT:    andb %r11b, %sil
    798 ; CHECK-BASELINE-NEXT:    notb %r11b
    799 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    800 ; CHECK-BASELINE-NEXT:    orb %sil, %r11b
    801 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    802 ; CHECK-BASELINE-NEXT:    andb %r10b, %sil
    803 ; CHECK-BASELINE-NEXT:    notb %r10b
    804 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
    805 ; CHECK-BASELINE-NEXT:    orb %sil, %r10b
    806 ; CHECK-BASELINE-NEXT:    movb %al, 15(%rdi)
    807 ; CHECK-BASELINE-NEXT:    movb %cl, 14(%rdi)
    808 ; CHECK-BASELINE-NEXT:    movb %dl, 13(%rdi)
    809 ; CHECK-BASELINE-NEXT:    movb %bl, 12(%rdi)
    810 ; CHECK-BASELINE-NEXT:    movb %r13b, 11(%rdi)
    811 ; CHECK-BASELINE-NEXT:    movb %r12b, 10(%rdi)
    812 ; CHECK-BASELINE-NEXT:    movb %r15b, 9(%rdi)
    813 ; CHECK-BASELINE-NEXT:    movb %r14b, 8(%rdi)
    814 ; CHECK-BASELINE-NEXT:    movb %bpl, 7(%rdi)
    815 ; CHECK-BASELINE-NEXT:    movb %r11b, 6(%rdi)
    816 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    817 ; CHECK-BASELINE-NEXT:    andb %al, %r9b
    818 ; CHECK-BASELINE-NEXT:    notb %al
    819 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    820 ; CHECK-BASELINE-NEXT:    orb %r9b, %al
    821 ; CHECK-BASELINE-NEXT:    movb %r10b, 5(%rdi)
    822 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    823 ; CHECK-BASELINE-NEXT:    andb %cl, %r8b
    824 ; CHECK-BASELINE-NEXT:    notb %cl
    825 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    826 ; CHECK-BASELINE-NEXT:    orb %r8b, %cl
    827 ; CHECK-BASELINE-NEXT:    movb %al, 4(%rdi)
    828 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    829 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    830 ; CHECK-BASELINE-NEXT:    andb %al, %dl
    831 ; CHECK-BASELINE-NEXT:    notb %al
    832 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    833 ; CHECK-BASELINE-NEXT:    orb %dl, %al
    834 ; CHECK-BASELINE-NEXT:    movb %cl, 3(%rdi)
    835 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    836 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    837 ; CHECK-BASELINE-NEXT:    andb %cl, %dl
    838 ; CHECK-BASELINE-NEXT:    notb %cl
    839 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    840 ; CHECK-BASELINE-NEXT:    orb %dl, %cl
    841 ; CHECK-BASELINE-NEXT:    movb %al, 2(%rdi)
    842 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
    843 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    844 ; CHECK-BASELINE-NEXT:    andb %al, %dl
    845 ; CHECK-BASELINE-NEXT:    notb %al
    846 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
    847 ; CHECK-BASELINE-NEXT:    orb %dl, %al
    848 ; CHECK-BASELINE-NEXT:    movb %cl, 1(%rdi)
    849 ; CHECK-BASELINE-NEXT:    movb %al, (%rdi)
    850 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
    851 ; CHECK-BASELINE-NEXT:    popq %rbx
    852 ; CHECK-BASELINE-NEXT:    popq %r12
    853 ; CHECK-BASELINE-NEXT:    popq %r13
    854 ; CHECK-BASELINE-NEXT:    popq %r14
    855 ; CHECK-BASELINE-NEXT:    popq %r15
    856 ; CHECK-BASELINE-NEXT:    popq %rbp
    857 ; CHECK-BASELINE-NEXT:    retq
    858 ;
    859 ; CHECK-SSE1-LABEL: out_v16i8:
    860 ; CHECK-SSE1:       # %bb.0:
    861 ; CHECK-SSE1-NEXT:    pushq %rbp
    862 ; CHECK-SSE1-NEXT:    pushq %r15
    863 ; CHECK-SSE1-NEXT:    pushq %r14
    864 ; CHECK-SSE1-NEXT:    pushq %r13
    865 ; CHECK-SSE1-NEXT:    pushq %r12
    866 ; CHECK-SSE1-NEXT:    pushq %rbx
    867 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    868 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    869 ; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    870 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
    871 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
    872 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
    873 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
    874 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
    875 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
    876 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
    877 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
    878 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
    879 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    880 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    881 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    882 ; CHECK-SSE1-NEXT:    andb %al, %sil
    883 ; CHECK-SSE1-NEXT:    notb %al
    884 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    885 ; CHECK-SSE1-NEXT:    orb %sil, %al
    886 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    887 ; CHECK-SSE1-NEXT:    andb %cl, %sil
    888 ; CHECK-SSE1-NEXT:    notb %cl
    889 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    890 ; CHECK-SSE1-NEXT:    orb %sil, %cl
    891 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    892 ; CHECK-SSE1-NEXT:    andb %dl, %sil
    893 ; CHECK-SSE1-NEXT:    notb %dl
    894 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
    895 ; CHECK-SSE1-NEXT:    orb %sil, %dl
    896 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    897 ; CHECK-SSE1-NEXT:    andb %bl, %sil
    898 ; CHECK-SSE1-NEXT:    notb %bl
    899 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
    900 ; CHECK-SSE1-NEXT:    orb %sil, %bl
    901 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    902 ; CHECK-SSE1-NEXT:    andb %r13b, %sil
    903 ; CHECK-SSE1-NEXT:    notb %r13b
    904 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
    905 ; CHECK-SSE1-NEXT:    orb %sil, %r13b
    906 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    907 ; CHECK-SSE1-NEXT:    andb %r12b, %sil
    908 ; CHECK-SSE1-NEXT:    notb %r12b
    909 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
    910 ; CHECK-SSE1-NEXT:    orb %sil, %r12b
    911 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    912 ; CHECK-SSE1-NEXT:    andb %r15b, %sil
    913 ; CHECK-SSE1-NEXT:    notb %r15b
    914 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
    915 ; CHECK-SSE1-NEXT:    orb %sil, %r15b
    916 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    917 ; CHECK-SSE1-NEXT:    andb %r14b, %sil
    918 ; CHECK-SSE1-NEXT:    notb %r14b
    919 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
    920 ; CHECK-SSE1-NEXT:    orb %sil, %r14b
    921 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    922 ; CHECK-SSE1-NEXT:    andb %bpl, %sil
    923 ; CHECK-SSE1-NEXT:    notb %bpl
    924 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
    925 ; CHECK-SSE1-NEXT:    orb %sil, %bpl
    926 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    927 ; CHECK-SSE1-NEXT:    andb %r11b, %sil
    928 ; CHECK-SSE1-NEXT:    notb %r11b
    929 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
    930 ; CHECK-SSE1-NEXT:    orb %sil, %r11b
    931 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
    932 ; CHECK-SSE1-NEXT:    andb %r10b, %sil
    933 ; CHECK-SSE1-NEXT:    notb %r10b
    934 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
    935 ; CHECK-SSE1-NEXT:    orb %sil, %r10b
    936 ; CHECK-SSE1-NEXT:    movb %al, 15(%rdi)
    937 ; CHECK-SSE1-NEXT:    movb %cl, 14(%rdi)
    938 ; CHECK-SSE1-NEXT:    movb %dl, 13(%rdi)
    939 ; CHECK-SSE1-NEXT:    movb %bl, 12(%rdi)
    940 ; CHECK-SSE1-NEXT:    movb %r13b, 11(%rdi)
    941 ; CHECK-SSE1-NEXT:    movb %r12b, 10(%rdi)
    942 ; CHECK-SSE1-NEXT:    movb %r15b, 9(%rdi)
    943 ; CHECK-SSE1-NEXT:    movb %r14b, 8(%rdi)
    944 ; CHECK-SSE1-NEXT:    movb %bpl, 7(%rdi)
    945 ; CHECK-SSE1-NEXT:    movb %r11b, 6(%rdi)
    946 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    947 ; CHECK-SSE1-NEXT:    andb %al, %r9b
    948 ; CHECK-SSE1-NEXT:    notb %al
    949 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    950 ; CHECK-SSE1-NEXT:    orb %r9b, %al
    951 ; CHECK-SSE1-NEXT:    movb %r10b, 5(%rdi)
    952 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    953 ; CHECK-SSE1-NEXT:    andb %cl, %r8b
    954 ; CHECK-SSE1-NEXT:    notb %cl
    955 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    956 ; CHECK-SSE1-NEXT:    orb %r8b, %cl
    957 ; CHECK-SSE1-NEXT:    movb %al, 4(%rdi)
    958 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    959 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    960 ; CHECK-SSE1-NEXT:    andb %al, %dl
    961 ; CHECK-SSE1-NEXT:    notb %al
    962 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    963 ; CHECK-SSE1-NEXT:    orb %dl, %al
    964 ; CHECK-SSE1-NEXT:    movb %cl, 3(%rdi)
    965 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
    966 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    967 ; CHECK-SSE1-NEXT:    andb %cl, %dl
    968 ; CHECK-SSE1-NEXT:    notb %cl
    969 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
    970 ; CHECK-SSE1-NEXT:    orb %dl, %cl
    971 ; CHECK-SSE1-NEXT:    movb %al, 2(%rdi)
    972 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
    973 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
    974 ; CHECK-SSE1-NEXT:    andb %al, %dl
    975 ; CHECK-SSE1-NEXT:    notb %al
    976 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
    977 ; CHECK-SSE1-NEXT:    orb %dl, %al
    978 ; CHECK-SSE1-NEXT:    movb %cl, 1(%rdi)
    979 ; CHECK-SSE1-NEXT:    movb %al, (%rdi)
    980 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
    981 ; CHECK-SSE1-NEXT:    popq %rbx
    982 ; CHECK-SSE1-NEXT:    popq %r12
    983 ; CHECK-SSE1-NEXT:    popq %r13
    984 ; CHECK-SSE1-NEXT:    popq %r14
    985 ; CHECK-SSE1-NEXT:    popq %r15
    986 ; CHECK-SSE1-NEXT:    popq %rbp
    987 ; CHECK-SSE1-NEXT:    retq
    988 ;
    989 ; CHECK-SSE2-LABEL: out_v16i8:
    990 ; CHECK-SSE2:       # %bb.0:
    991 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
    992 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
    993 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
    994 ; CHECK-SSE2-NEXT:    retq
    995 ;
    996 ; CHECK-XOP-LABEL: out_v16i8:
    997 ; CHECK-XOP:       # %bb.0:
    998 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
    999 ; CHECK-XOP-NEXT:    retq
   1000   %mx = and <16 x i8> %x, %mask
   1001   %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1002   %my = and <16 x i8> %y, %notmask
   1003   %r = or <16 x i8> %mx, %my
   1004   ret <16 x i8> %r
   1005 }
   1006 
   1007 define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
   1008 ; CHECK-BASELINE-LABEL: out_v8i16:
   1009 ; CHECK-BASELINE:       # %bb.0:
   1010 ; CHECK-BASELINE-NEXT:    pushq %rbp
   1011 ; CHECK-BASELINE-NEXT:    pushq %r14
   1012 ; CHECK-BASELINE-NEXT:    pushq %rbx
   1013 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   1014 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   1015 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
   1016 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1017 ; CHECK-BASELINE-NEXT:    andw %r14w, %bx
   1018 ; CHECK-BASELINE-NEXT:    notl %r14d
   1019 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r14w
   1020 ; CHECK-BASELINE-NEXT:    orl %ebx, %r14d
   1021 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1022 ; CHECK-BASELINE-NEXT:    andw %r11w, %bx
   1023 ; CHECK-BASELINE-NEXT:    notl %r11d
   1024 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
   1025 ; CHECK-BASELINE-NEXT:    orl %ebx, %r11d
   1026 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1027 ; CHECK-BASELINE-NEXT:    andw %r10w, %bx
   1028 ; CHECK-BASELINE-NEXT:    notl %r10d
   1029 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
   1030 ; CHECK-BASELINE-NEXT:    orl %ebx, %r10d
   1031 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   1032 ; CHECK-BASELINE-NEXT:    andl %ebx, %r9d
   1033 ; CHECK-BASELINE-NEXT:    notl %ebx
   1034 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bx
   1035 ; CHECK-BASELINE-NEXT:    orl %r9d, %ebx
   1036 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   1037 ; CHECK-BASELINE-NEXT:    andl %eax, %r8d
   1038 ; CHECK-BASELINE-NEXT:    notl %eax
   1039 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %ax
   1040 ; CHECK-BASELINE-NEXT:    orl %r8d, %eax
   1041 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
   1042 ; CHECK-BASELINE-NEXT:    andl %ebp, %ecx
   1043 ; CHECK-BASELINE-NEXT:    notl %ebp
   1044 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bp
   1045 ; CHECK-BASELINE-NEXT:    orl %ecx, %ebp
   1046 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
   1047 ; CHECK-BASELINE-NEXT:    andl %ecx, %edx
   1048 ; CHECK-BASELINE-NEXT:    notl %ecx
   1049 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   1050 ; CHECK-BASELINE-NEXT:    orl %edx, %ecx
   1051 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edx
   1052 ; CHECK-BASELINE-NEXT:    andl %edx, %esi
   1053 ; CHECK-BASELINE-NEXT:    notl %edx
   1054 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   1055 ; CHECK-BASELINE-NEXT:    orl %esi, %edx
   1056 ; CHECK-BASELINE-NEXT:    movw %r14w, 14(%rdi)
   1057 ; CHECK-BASELINE-NEXT:    movw %r11w, 12(%rdi)
   1058 ; CHECK-BASELINE-NEXT:    movw %r10w, 10(%rdi)
   1059 ; CHECK-BASELINE-NEXT:    movw %bx, 8(%rdi)
   1060 ; CHECK-BASELINE-NEXT:    movw %ax, 6(%rdi)
   1061 ; CHECK-BASELINE-NEXT:    movw %bp, 4(%rdi)
   1062 ; CHECK-BASELINE-NEXT:    movw %cx, 2(%rdi)
   1063 ; CHECK-BASELINE-NEXT:    movw %dx, (%rdi)
   1064 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   1065 ; CHECK-BASELINE-NEXT:    popq %rbx
   1066 ; CHECK-BASELINE-NEXT:    popq %r14
   1067 ; CHECK-BASELINE-NEXT:    popq %rbp
   1068 ; CHECK-BASELINE-NEXT:    retq
   1069 ;
   1070 ; CHECK-SSE1-LABEL: out_v8i16:
   1071 ; CHECK-SSE1:       # %bb.0:
   1072 ; CHECK-SSE1-NEXT:    pushq %rbp
   1073 ; CHECK-SSE1-NEXT:    pushq %r14
   1074 ; CHECK-SSE1-NEXT:    pushq %rbx
   1075 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   1076 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   1077 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
   1078 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1079 ; CHECK-SSE1-NEXT:    andw %r14w, %bx
   1080 ; CHECK-SSE1-NEXT:    notl %r14d
   1081 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r14w
   1082 ; CHECK-SSE1-NEXT:    orl %ebx, %r14d
   1083 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1084 ; CHECK-SSE1-NEXT:    andw %r11w, %bx
   1085 ; CHECK-SSE1-NEXT:    notl %r11d
   1086 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
   1087 ; CHECK-SSE1-NEXT:    orl %ebx, %r11d
   1088 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   1089 ; CHECK-SSE1-NEXT:    andw %r10w, %bx
   1090 ; CHECK-SSE1-NEXT:    notl %r10d
   1091 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
   1092 ; CHECK-SSE1-NEXT:    orl %ebx, %r10d
   1093 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   1094 ; CHECK-SSE1-NEXT:    andl %ebx, %r9d
   1095 ; CHECK-SSE1-NEXT:    notl %ebx
   1096 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bx
   1097 ; CHECK-SSE1-NEXT:    orl %r9d, %ebx
   1098 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   1099 ; CHECK-SSE1-NEXT:    andl %eax, %r8d
   1100 ; CHECK-SSE1-NEXT:    notl %eax
   1101 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %ax
   1102 ; CHECK-SSE1-NEXT:    orl %r8d, %eax
   1103 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
   1104 ; CHECK-SSE1-NEXT:    andl %ebp, %ecx
   1105 ; CHECK-SSE1-NEXT:    notl %ebp
   1106 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bp
   1107 ; CHECK-SSE1-NEXT:    orl %ecx, %ebp
   1108 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
   1109 ; CHECK-SSE1-NEXT:    andl %ecx, %edx
   1110 ; CHECK-SSE1-NEXT:    notl %ecx
   1111 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   1112 ; CHECK-SSE1-NEXT:    orl %edx, %ecx
   1113 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edx
   1114 ; CHECK-SSE1-NEXT:    andl %edx, %esi
   1115 ; CHECK-SSE1-NEXT:    notl %edx
   1116 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   1117 ; CHECK-SSE1-NEXT:    orl %esi, %edx
   1118 ; CHECK-SSE1-NEXT:    movw %r14w, 14(%rdi)
   1119 ; CHECK-SSE1-NEXT:    movw %r11w, 12(%rdi)
   1120 ; CHECK-SSE1-NEXT:    movw %r10w, 10(%rdi)
   1121 ; CHECK-SSE1-NEXT:    movw %bx, 8(%rdi)
   1122 ; CHECK-SSE1-NEXT:    movw %ax, 6(%rdi)
   1123 ; CHECK-SSE1-NEXT:    movw %bp, 4(%rdi)
   1124 ; CHECK-SSE1-NEXT:    movw %cx, 2(%rdi)
   1125 ; CHECK-SSE1-NEXT:    movw %dx, (%rdi)
   1126 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   1127 ; CHECK-SSE1-NEXT:    popq %rbx
   1128 ; CHECK-SSE1-NEXT:    popq %r14
   1129 ; CHECK-SSE1-NEXT:    popq %rbp
   1130 ; CHECK-SSE1-NEXT:    retq
   1131 ;
   1132 ; CHECK-SSE2-LABEL: out_v8i16:
   1133 ; CHECK-SSE2:       # %bb.0:
   1134 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   1135 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   1136 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   1137 ; CHECK-SSE2-NEXT:    retq
   1138 ;
   1139 ; CHECK-XOP-LABEL: out_v8i16:
   1140 ; CHECK-XOP:       # %bb.0:
   1141 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   1142 ; CHECK-XOP-NEXT:    retq
   1143   %mx = and <8 x i16> %x, %mask
   1144   %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   1145   %my = and <8 x i16> %y, %notmask
   1146   %r = or <8 x i16> %mx, %my
   1147   ret <8 x i16> %r
   1148 }
   1149 
   1150 define <4 x i32> @out_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
   1151 ; CHECK-BASELINE-LABEL: out_v4i32:
   1152 ; CHECK-BASELINE:       # %bb.0:
   1153 ; CHECK-BASELINE-NEXT:    pushq %rbx
   1154 ; CHECK-BASELINE-NEXT:    movl (%rcx), %r8d
   1155 ; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r9d
   1156 ; CHECK-BASELINE-NEXT:    movl 8(%rcx), %eax
   1157 ; CHECK-BASELINE-NEXT:    movl 12(%rcx), %ecx
   1158 ; CHECK-BASELINE-NEXT:    movl 12(%rsi), %r10d
   1159 ; CHECK-BASELINE-NEXT:    andl %ecx, %r10d
   1160 ; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r11d
   1161 ; CHECK-BASELINE-NEXT:    andl %eax, %r11d
   1162 ; CHECK-BASELINE-NEXT:    movl 4(%rsi), %ebx
   1163 ; CHECK-BASELINE-NEXT:    andl %r9d, %ebx
   1164 ; CHECK-BASELINE-NEXT:    movl (%rsi), %esi
   1165 ; CHECK-BASELINE-NEXT:    andl %r8d, %esi
   1166 ; CHECK-BASELINE-NEXT:    notl %r8d
   1167 ; CHECK-BASELINE-NEXT:    notl %r9d
   1168 ; CHECK-BASELINE-NEXT:    notl %eax
   1169 ; CHECK-BASELINE-NEXT:    notl %ecx
   1170 ; CHECK-BASELINE-NEXT:    andl 12(%rdx), %ecx
   1171 ; CHECK-BASELINE-NEXT:    orl %r10d, %ecx
   1172 ; CHECK-BASELINE-NEXT:    andl 8(%rdx), %eax
   1173 ; CHECK-BASELINE-NEXT:    orl %r11d, %eax
   1174 ; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r9d
   1175 ; CHECK-BASELINE-NEXT:    orl %ebx, %r9d
   1176 ; CHECK-BASELINE-NEXT:    andl (%rdx), %r8d
   1177 ; CHECK-BASELINE-NEXT:    orl %esi, %r8d
   1178 ; CHECK-BASELINE-NEXT:    movl %ecx, 12(%rdi)
   1179 ; CHECK-BASELINE-NEXT:    movl %eax, 8(%rdi)
   1180 ; CHECK-BASELINE-NEXT:    movl %r9d, 4(%rdi)
   1181 ; CHECK-BASELINE-NEXT:    movl %r8d, (%rdi)
   1182 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   1183 ; CHECK-BASELINE-NEXT:    popq %rbx
   1184 ; CHECK-BASELINE-NEXT:    retq
   1185 ;
   1186 ; CHECK-SSE1-LABEL: out_v4i32:
   1187 ; CHECK-SSE1:       # %bb.0:
   1188 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
   1189 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
   1190 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
   1191 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
   1192 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
   1193 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
   1194 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   1195 ; CHECK-SSE1-NEXT:    retq
   1196 ;
   1197 ; CHECK-SSE2-LABEL: out_v4i32:
   1198 ; CHECK-SSE2:       # %bb.0:
   1199 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   1200 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
   1201 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
   1202 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   1203 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
   1204 ; CHECK-SSE2-NEXT:    retq
   1205 ;
   1206 ; CHECK-XOP-LABEL: out_v4i32:
   1207 ; CHECK-XOP:       # %bb.0:
   1208 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
   1209 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
   1210 ; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
   1211 ; CHECK-XOP-NEXT:    retq
   1212   %x = load <4 x i32>, <4 x i32> *%px, align 16
   1213   %y = load <4 x i32>, <4 x i32> *%py, align 16
   1214   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
   1215   %mx = and <4 x i32> %x, %mask
   1216   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
   1217   %my = and <4 x i32> %y, %notmask
   1218   %r = or <4 x i32> %mx, %my
   1219   ret <4 x i32> %r
   1220 }
   1221 
   1222 define <4 x i32> @out_v4i32_undef(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
   1223 ; CHECK-BASELINE-LABEL: out_v4i32_undef:
   1224 ; CHECK-BASELINE:       # %bb.0:
   1225 ; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r8d
   1226 ; CHECK-BASELINE-NEXT:    movl (%rcx), %r9d
   1227 ; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r10d
   1228 ; CHECK-BASELINE-NEXT:    movl 12(%rcx), %eax
   1229 ; CHECK-BASELINE-NEXT:    andl 8(%rcx), %r8d
   1230 ; CHECK-BASELINE-NEXT:    movl 12(%rsi), %ecx
   1231 ; CHECK-BASELINE-NEXT:    andl %eax, %ecx
   1232 ; CHECK-BASELINE-NEXT:    movl 4(%rsi), %r11d
   1233 ; CHECK-BASELINE-NEXT:    andl %r10d, %r11d
   1234 ; CHECK-BASELINE-NEXT:    movl (%rsi), %esi
   1235 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
   1236 ; CHECK-BASELINE-NEXT:    notl %r9d
   1237 ; CHECK-BASELINE-NEXT:    notl %r10d
   1238 ; CHECK-BASELINE-NEXT:    notl %eax
   1239 ; CHECK-BASELINE-NEXT:    andl 12(%rdx), %eax
   1240 ; CHECK-BASELINE-NEXT:    orl %ecx, %eax
   1241 ; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r10d
   1242 ; CHECK-BASELINE-NEXT:    orl %r11d, %r10d
   1243 ; CHECK-BASELINE-NEXT:    andl (%rdx), %r9d
   1244 ; CHECK-BASELINE-NEXT:    orl %esi, %r9d
   1245 ; CHECK-BASELINE-NEXT:    movl %r8d, 8(%rdi)
   1246 ; CHECK-BASELINE-NEXT:    movl %eax, 12(%rdi)
   1247 ; CHECK-BASELINE-NEXT:    movl %r10d, 4(%rdi)
   1248 ; CHECK-BASELINE-NEXT:    movl %r9d, (%rdi)
   1249 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   1250 ; CHECK-BASELINE-NEXT:    retq
   1251 ;
   1252 ; CHECK-SSE1-LABEL: out_v4i32_undef:
   1253 ; CHECK-SSE1:       # %bb.0:
   1254 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
   1255 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
   1256 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
   1257 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
   1258 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
   1259 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
   1260 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   1261 ; CHECK-SSE1-NEXT:    retq
   1262 ;
   1263 ; CHECK-SSE2-LABEL: out_v4i32_undef:
   1264 ; CHECK-SSE2:       # %bb.0:
   1265 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   1266 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
   1267 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
   1268 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   1269 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
   1270 ; CHECK-SSE2-NEXT:    retq
   1271 ;
   1272 ; CHECK-XOP-LABEL: out_v4i32_undef:
   1273 ; CHECK-XOP:       # %bb.0:
   1274 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
   1275 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
   1276 ; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
   1277 ; CHECK-XOP-NEXT:    retq
   1278   %x = load <4 x i32>, <4 x i32> *%px, align 16
   1279   %y = load <4 x i32>, <4 x i32> *%py, align 16
   1280   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
   1281   %mx = and <4 x i32> %x, %mask
   1282   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1>
   1283   %my = and <4 x i32> %y, %notmask
   1284   %r = or <4 x i32> %mx, %my
   1285   ret <4 x i32> %r
   1286 }
   1287 
   1288 define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
   1289 ; CHECK-BASELINE-LABEL: out_v2i64:
   1290 ; CHECK-BASELINE:       # %bb.0:
   1291 ; CHECK-BASELINE-NEXT:    andq %r9, %rsi
   1292 ; CHECK-BASELINE-NEXT:    andq %r8, %rdi
   1293 ; CHECK-BASELINE-NEXT:    notq %r8
   1294 ; CHECK-BASELINE-NEXT:    notq %r9
   1295 ; CHECK-BASELINE-NEXT:    andq %rcx, %r9
   1296 ; CHECK-BASELINE-NEXT:    orq %rsi, %r9
   1297 ; CHECK-BASELINE-NEXT:    andq %rdx, %r8
   1298 ; CHECK-BASELINE-NEXT:    orq %rdi, %r8
   1299 ; CHECK-BASELINE-NEXT:    movq %r8, %rax
   1300 ; CHECK-BASELINE-NEXT:    movq %r9, %rdx
   1301 ; CHECK-BASELINE-NEXT:    retq
   1302 ;
   1303 ; CHECK-SSE1-LABEL: out_v2i64:
   1304 ; CHECK-SSE1:       # %bb.0:
   1305 ; CHECK-SSE1-NEXT:    andq %r9, %rsi
   1306 ; CHECK-SSE1-NEXT:    andq %r8, %rdi
   1307 ; CHECK-SSE1-NEXT:    notq %r8
   1308 ; CHECK-SSE1-NEXT:    notq %r9
   1309 ; CHECK-SSE1-NEXT:    andq %rcx, %r9
   1310 ; CHECK-SSE1-NEXT:    orq %rsi, %r9
   1311 ; CHECK-SSE1-NEXT:    andq %rdx, %r8
   1312 ; CHECK-SSE1-NEXT:    orq %rdi, %r8
   1313 ; CHECK-SSE1-NEXT:    movq %r8, %rax
   1314 ; CHECK-SSE1-NEXT:    movq %r9, %rdx
   1315 ; CHECK-SSE1-NEXT:    retq
   1316 ;
   1317 ; CHECK-SSE2-LABEL: out_v2i64:
   1318 ; CHECK-SSE2:       # %bb.0:
   1319 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   1320 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   1321 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   1322 ; CHECK-SSE2-NEXT:    retq
   1323 ;
   1324 ; CHECK-XOP-LABEL: out_v2i64:
   1325 ; CHECK-XOP:       # %bb.0:
   1326 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   1327 ; CHECK-XOP-NEXT:    retq
   1328   %mx = and <2 x i64> %x, %mask
   1329   %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1>
   1330   %my = and <2 x i64> %y, %notmask
   1331   %r = or <2 x i64> %mx, %my
   1332   ret <2 x i64> %r
   1333 }
   1334 
   1335 ; ============================================================================ ;
   1336 ; 256-bit vector width
   1337 ; ============================================================================ ;
   1338 
   1339 define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind {
   1340 ; CHECK-BASELINE-LABEL: out_v32i8:
   1341 ; CHECK-BASELINE:       # %bb.0:
   1342 ; CHECK-BASELINE-NEXT:    pushq %rbp
   1343 ; CHECK-BASELINE-NEXT:    pushq %r15
   1344 ; CHECK-BASELINE-NEXT:    pushq %r14
   1345 ; CHECK-BASELINE-NEXT:    pushq %r13
   1346 ; CHECK-BASELINE-NEXT:    pushq %r12
   1347 ; CHECK-BASELINE-NEXT:    pushq %rbx
   1348 ; CHECK-BASELINE-NEXT:    movq %rcx, %r15
   1349 ; CHECK-BASELINE-NEXT:    movq %rsi, %r14
   1350 ; CHECK-BASELINE-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
   1351 ; CHECK-BASELINE-NEXT:    movb 15(%rcx), %al
   1352 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1353 ; CHECK-BASELINE-NEXT:    movb 16(%rcx), %al
   1354 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1355 ; CHECK-BASELINE-NEXT:    movb 17(%rcx), %al
   1356 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1357 ; CHECK-BASELINE-NEXT:    movb 18(%rcx), %al
   1358 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1359 ; CHECK-BASELINE-NEXT:    movb 19(%rcx), %al
   1360 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1361 ; CHECK-BASELINE-NEXT:    movb 20(%rcx), %al
   1362 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1363 ; CHECK-BASELINE-NEXT:    movb 21(%rcx), %r12b
   1364 ; CHECK-BASELINE-NEXT:    movb 22(%rcx), %r10b
   1365 ; CHECK-BASELINE-NEXT:    movb 23(%rcx), %r11b
   1366 ; CHECK-BASELINE-NEXT:    movb 24(%rcx), %bpl
   1367 ; CHECK-BASELINE-NEXT:    movb 25(%rcx), %r13b
   1368 ; CHECK-BASELINE-NEXT:    movb 26(%rcx), %r9b
   1369 ; CHECK-BASELINE-NEXT:    movb 27(%rcx), %r8b
   1370 ; CHECK-BASELINE-NEXT:    movb 28(%rcx), %dil
   1371 ; CHECK-BASELINE-NEXT:    movb 29(%rcx), %sil
   1372 ; CHECK-BASELINE-NEXT:    movb 30(%rcx), %bl
   1373 ; CHECK-BASELINE-NEXT:    movb 31(%rcx), %al
   1374 ; CHECK-BASELINE-NEXT:    movb 31(%r14), %cl
   1375 ; CHECK-BASELINE-NEXT:    andb %al, %cl
   1376 ; CHECK-BASELINE-NEXT:    notb %al
   1377 ; CHECK-BASELINE-NEXT:    andb 31(%rdx), %al
   1378 ; CHECK-BASELINE-NEXT:    orb %cl, %al
   1379 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1380 ; CHECK-BASELINE-NEXT:    movb 30(%r14), %al
   1381 ; CHECK-BASELINE-NEXT:    andb %bl, %al
   1382 ; CHECK-BASELINE-NEXT:    notb %bl
   1383 ; CHECK-BASELINE-NEXT:    andb 30(%rdx), %bl
   1384 ; CHECK-BASELINE-NEXT:    orb %al, %bl
   1385 ; CHECK-BASELINE-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1386 ; CHECK-BASELINE-NEXT:    movb 29(%r14), %al
   1387 ; CHECK-BASELINE-NEXT:    andb %sil, %al
   1388 ; CHECK-BASELINE-NEXT:    notb %sil
   1389 ; CHECK-BASELINE-NEXT:    andb 29(%rdx), %sil
   1390 ; CHECK-BASELINE-NEXT:    orb %al, %sil
   1391 ; CHECK-BASELINE-NEXT:    movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1392 ; CHECK-BASELINE-NEXT:    movb 28(%r14), %al
   1393 ; CHECK-BASELINE-NEXT:    andb %dil, %al
   1394 ; CHECK-BASELINE-NEXT:    notb %dil
   1395 ; CHECK-BASELINE-NEXT:    andb 28(%rdx), %dil
   1396 ; CHECK-BASELINE-NEXT:    orb %al, %dil
   1397 ; CHECK-BASELINE-NEXT:    movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1398 ; CHECK-BASELINE-NEXT:    movb 27(%r14), %al
   1399 ; CHECK-BASELINE-NEXT:    andb %r8b, %al
   1400 ; CHECK-BASELINE-NEXT:    notb %r8b
   1401 ; CHECK-BASELINE-NEXT:    andb 27(%rdx), %r8b
   1402 ; CHECK-BASELINE-NEXT:    orb %al, %r8b
   1403 ; CHECK-BASELINE-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1404 ; CHECK-BASELINE-NEXT:    movb 26(%r14), %al
   1405 ; CHECK-BASELINE-NEXT:    andb %r9b, %al
   1406 ; CHECK-BASELINE-NEXT:    notb %r9b
   1407 ; CHECK-BASELINE-NEXT:    andb 26(%rdx), %r9b
   1408 ; CHECK-BASELINE-NEXT:    orb %al, %r9b
   1409 ; CHECK-BASELINE-NEXT:    movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1410 ; CHECK-BASELINE-NEXT:    movb 25(%r14), %al
   1411 ; CHECK-BASELINE-NEXT:    andb %r13b, %al
   1412 ; CHECK-BASELINE-NEXT:    notb %r13b
   1413 ; CHECK-BASELINE-NEXT:    andb 25(%rdx), %r13b
   1414 ; CHECK-BASELINE-NEXT:    orb %al, %r13b
   1415 ; CHECK-BASELINE-NEXT:    movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1416 ; CHECK-BASELINE-NEXT:    movb 24(%r14), %al
   1417 ; CHECK-BASELINE-NEXT:    andb %bpl, %al
   1418 ; CHECK-BASELINE-NEXT:    notb %bpl
   1419 ; CHECK-BASELINE-NEXT:    andb 24(%rdx), %bpl
   1420 ; CHECK-BASELINE-NEXT:    orb %al, %bpl
   1421 ; CHECK-BASELINE-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1422 ; CHECK-BASELINE-NEXT:    movb 23(%r14), %al
   1423 ; CHECK-BASELINE-NEXT:    andb %r11b, %al
   1424 ; CHECK-BASELINE-NEXT:    notb %r11b
   1425 ; CHECK-BASELINE-NEXT:    andb 23(%rdx), %r11b
   1426 ; CHECK-BASELINE-NEXT:    orb %al, %r11b
   1427 ; CHECK-BASELINE-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1428 ; CHECK-BASELINE-NEXT:    movb 22(%r14), %al
   1429 ; CHECK-BASELINE-NEXT:    andb %r10b, %al
   1430 ; CHECK-BASELINE-NEXT:    notb %r10b
   1431 ; CHECK-BASELINE-NEXT:    andb 22(%rdx), %r10b
   1432 ; CHECK-BASELINE-NEXT:    orb %al, %r10b
   1433 ; CHECK-BASELINE-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1434 ; CHECK-BASELINE-NEXT:    movb 21(%r14), %al
   1435 ; CHECK-BASELINE-NEXT:    andb %r12b, %al
   1436 ; CHECK-BASELINE-NEXT:    notb %r12b
   1437 ; CHECK-BASELINE-NEXT:    andb 21(%rdx), %r12b
   1438 ; CHECK-BASELINE-NEXT:    orb %al, %r12b
   1439 ; CHECK-BASELINE-NEXT:    movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1440 ; CHECK-BASELINE-NEXT:    movb 20(%r14), %al
   1441 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1442 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1443 ; CHECK-BASELINE-NEXT:    notb %cl
   1444 ; CHECK-BASELINE-NEXT:    andb 20(%rdx), %cl
   1445 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1446 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1447 ; CHECK-BASELINE-NEXT:    movb 19(%r14), %al
   1448 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1449 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1450 ; CHECK-BASELINE-NEXT:    notb %cl
   1451 ; CHECK-BASELINE-NEXT:    andb 19(%rdx), %cl
   1452 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1453 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1454 ; CHECK-BASELINE-NEXT:    movb 18(%r14), %al
   1455 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1456 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1457 ; CHECK-BASELINE-NEXT:    notb %cl
   1458 ; CHECK-BASELINE-NEXT:    andb 18(%rdx), %cl
   1459 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1460 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1461 ; CHECK-BASELINE-NEXT:    movb 17(%r14), %al
   1462 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1463 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1464 ; CHECK-BASELINE-NEXT:    notb %cl
   1465 ; CHECK-BASELINE-NEXT:    andb 17(%rdx), %cl
   1466 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1467 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1468 ; CHECK-BASELINE-NEXT:    movb 16(%r14), %al
   1469 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1470 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1471 ; CHECK-BASELINE-NEXT:    notb %cl
   1472 ; CHECK-BASELINE-NEXT:    movq %rdx, %rbx
   1473 ; CHECK-BASELINE-NEXT:    andb 16(%rdx), %cl
   1474 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1475 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1476 ; CHECK-BASELINE-NEXT:    movb 15(%r14), %al
   1477 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1478 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1479 ; CHECK-BASELINE-NEXT:    notb %cl
   1480 ; CHECK-BASELINE-NEXT:    andb 15(%rdx), %cl
   1481 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1482 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1483 ; CHECK-BASELINE-NEXT:    movb 14(%r15), %cl
   1484 ; CHECK-BASELINE-NEXT:    movb 14(%r14), %al
   1485 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1486 ; CHECK-BASELINE-NEXT:    notb %cl
   1487 ; CHECK-BASELINE-NEXT:    andb 14(%rdx), %cl
   1488 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1489 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1490 ; CHECK-BASELINE-NEXT:    movb 13(%r15), %cl
   1491 ; CHECK-BASELINE-NEXT:    movb 13(%r14), %al
   1492 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1493 ; CHECK-BASELINE-NEXT:    notb %cl
   1494 ; CHECK-BASELINE-NEXT:    andb 13(%rdx), %cl
   1495 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1496 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1497 ; CHECK-BASELINE-NEXT:    movb 12(%r15), %cl
   1498 ; CHECK-BASELINE-NEXT:    movb 12(%r14), %al
   1499 ; CHECK-BASELINE-NEXT:    andb %cl, %al
   1500 ; CHECK-BASELINE-NEXT:    notb %cl
   1501 ; CHECK-BASELINE-NEXT:    andb 12(%rdx), %cl
   1502 ; CHECK-BASELINE-NEXT:    orb %al, %cl
   1503 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1504 ; CHECK-BASELINE-NEXT:    movb 11(%r15), %r13b
   1505 ; CHECK-BASELINE-NEXT:    movb 11(%r14), %al
   1506 ; CHECK-BASELINE-NEXT:    andb %r13b, %al
   1507 ; CHECK-BASELINE-NEXT:    notb %r13b
   1508 ; CHECK-BASELINE-NEXT:    andb 11(%rdx), %r13b
   1509 ; CHECK-BASELINE-NEXT:    orb %al, %r13b
   1510 ; CHECK-BASELINE-NEXT:    movb 10(%r15), %r12b
   1511 ; CHECK-BASELINE-NEXT:    movb 10(%r14), %al
   1512 ; CHECK-BASELINE-NEXT:    andb %r12b, %al
   1513 ; CHECK-BASELINE-NEXT:    notb %r12b
   1514 ; CHECK-BASELINE-NEXT:    andb 10(%rdx), %r12b
   1515 ; CHECK-BASELINE-NEXT:    orb %al, %r12b
   1516 ; CHECK-BASELINE-NEXT:    movb 9(%r15), %bpl
   1517 ; CHECK-BASELINE-NEXT:    movb 9(%r14), %al
   1518 ; CHECK-BASELINE-NEXT:    andb %bpl, %al
   1519 ; CHECK-BASELINE-NEXT:    notb %bpl
   1520 ; CHECK-BASELINE-NEXT:    andb 9(%rdx), %bpl
   1521 ; CHECK-BASELINE-NEXT:    orb %al, %bpl
   1522 ; CHECK-BASELINE-NEXT:    movb 8(%r15), %r11b
   1523 ; CHECK-BASELINE-NEXT:    movb 8(%r14), %al
   1524 ; CHECK-BASELINE-NEXT:    andb %r11b, %al
   1525 ; CHECK-BASELINE-NEXT:    notb %r11b
   1526 ; CHECK-BASELINE-NEXT:    andb 8(%rdx), %r11b
   1527 ; CHECK-BASELINE-NEXT:    orb %al, %r11b
   1528 ; CHECK-BASELINE-NEXT:    movb 7(%r15), %r10b
   1529 ; CHECK-BASELINE-NEXT:    movb 7(%r14), %al
   1530 ; CHECK-BASELINE-NEXT:    andb %r10b, %al
   1531 ; CHECK-BASELINE-NEXT:    notb %r10b
   1532 ; CHECK-BASELINE-NEXT:    andb 7(%rdx), %r10b
   1533 ; CHECK-BASELINE-NEXT:    orb %al, %r10b
   1534 ; CHECK-BASELINE-NEXT:    movb 6(%r15), %r9b
   1535 ; CHECK-BASELINE-NEXT:    movb 6(%r14), %al
   1536 ; CHECK-BASELINE-NEXT:    andb %r9b, %al
   1537 ; CHECK-BASELINE-NEXT:    notb %r9b
   1538 ; CHECK-BASELINE-NEXT:    andb 6(%rdx), %r9b
   1539 ; CHECK-BASELINE-NEXT:    orb %al, %r9b
   1540 ; CHECK-BASELINE-NEXT:    movb 5(%r15), %r8b
   1541 ; CHECK-BASELINE-NEXT:    movb 5(%r14), %al
   1542 ; CHECK-BASELINE-NEXT:    andb %r8b, %al
   1543 ; CHECK-BASELINE-NEXT:    notb %r8b
   1544 ; CHECK-BASELINE-NEXT:    andb 5(%rdx), %r8b
   1545 ; CHECK-BASELINE-NEXT:    orb %al, %r8b
   1546 ; CHECK-BASELINE-NEXT:    movb 4(%r15), %dil
   1547 ; CHECK-BASELINE-NEXT:    movb 4(%r14), %al
   1548 ; CHECK-BASELINE-NEXT:    andb %dil, %al
   1549 ; CHECK-BASELINE-NEXT:    notb %dil
   1550 ; CHECK-BASELINE-NEXT:    andb 4(%rdx), %dil
   1551 ; CHECK-BASELINE-NEXT:    orb %al, %dil
   1552 ; CHECK-BASELINE-NEXT:    movb 3(%r15), %sil
   1553 ; CHECK-BASELINE-NEXT:    movb 3(%r14), %al
   1554 ; CHECK-BASELINE-NEXT:    andb %sil, %al
   1555 ; CHECK-BASELINE-NEXT:    notb %sil
   1556 ; CHECK-BASELINE-NEXT:    andb 3(%rdx), %sil
   1557 ; CHECK-BASELINE-NEXT:    orb %al, %sil
   1558 ; CHECK-BASELINE-NEXT:    movb 2(%r15), %dl
   1559 ; CHECK-BASELINE-NEXT:    movb 2(%r14), %al
   1560 ; CHECK-BASELINE-NEXT:    andb %dl, %al
   1561 ; CHECK-BASELINE-NEXT:    notb %dl
   1562 ; CHECK-BASELINE-NEXT:    andb 2(%rbx), %dl
   1563 ; CHECK-BASELINE-NEXT:    orb %al, %dl
   1564 ; CHECK-BASELINE-NEXT:    movb 1(%r15), %al
   1565 ; CHECK-BASELINE-NEXT:    movb 1(%r14), %cl
   1566 ; CHECK-BASELINE-NEXT:    andb %al, %cl
   1567 ; CHECK-BASELINE-NEXT:    notb %al
   1568 ; CHECK-BASELINE-NEXT:    andb 1(%rbx), %al
   1569 ; CHECK-BASELINE-NEXT:    orb %cl, %al
   1570 ; CHECK-BASELINE-NEXT:    movb (%r15), %r15b
   1571 ; CHECK-BASELINE-NEXT:    movb (%r14), %r14b
   1572 ; CHECK-BASELINE-NEXT:    andb %r15b, %r14b
   1573 ; CHECK-BASELINE-NEXT:    notb %r15b
   1574 ; CHECK-BASELINE-NEXT:    andb (%rbx), %r15b
   1575 ; CHECK-BASELINE-NEXT:    orb %r14b, %r15b
   1576 ; CHECK-BASELINE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
   1577 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1578 ; CHECK-BASELINE-NEXT:    movb %bl, 31(%rcx)
   1579 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1580 ; CHECK-BASELINE-NEXT:    movb %bl, 30(%rcx)
   1581 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1582 ; CHECK-BASELINE-NEXT:    movb %bl, 29(%rcx)
   1583 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1584 ; CHECK-BASELINE-NEXT:    movb %bl, 28(%rcx)
   1585 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1586 ; CHECK-BASELINE-NEXT:    movb %bl, 27(%rcx)
   1587 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1588 ; CHECK-BASELINE-NEXT:    movb %bl, 26(%rcx)
   1589 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1590 ; CHECK-BASELINE-NEXT:    movb %bl, 25(%rcx)
   1591 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1592 ; CHECK-BASELINE-NEXT:    movb %bl, 24(%rcx)
   1593 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1594 ; CHECK-BASELINE-NEXT:    movb %bl, 23(%rcx)
   1595 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1596 ; CHECK-BASELINE-NEXT:    movb %bl, 22(%rcx)
   1597 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1598 ; CHECK-BASELINE-NEXT:    movb %bl, 21(%rcx)
   1599 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1600 ; CHECK-BASELINE-NEXT:    movb %bl, 20(%rcx)
   1601 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1602 ; CHECK-BASELINE-NEXT:    movb %bl, 19(%rcx)
   1603 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1604 ; CHECK-BASELINE-NEXT:    movb %bl, 18(%rcx)
   1605 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1606 ; CHECK-BASELINE-NEXT:    movb %bl, 17(%rcx)
   1607 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1608 ; CHECK-BASELINE-NEXT:    movb %bl, 16(%rcx)
   1609 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1610 ; CHECK-BASELINE-NEXT:    movb %bl, 15(%rcx)
   1611 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1612 ; CHECK-BASELINE-NEXT:    movb %bl, 14(%rcx)
   1613 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1614 ; CHECK-BASELINE-NEXT:    movb %bl, 13(%rcx)
   1615 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1616 ; CHECK-BASELINE-NEXT:    movb %bl, 12(%rcx)
   1617 ; CHECK-BASELINE-NEXT:    movb %r13b, 11(%rcx)
   1618 ; CHECK-BASELINE-NEXT:    movb %r12b, 10(%rcx)
   1619 ; CHECK-BASELINE-NEXT:    movb %bpl, 9(%rcx)
   1620 ; CHECK-BASELINE-NEXT:    movb %r11b, 8(%rcx)
   1621 ; CHECK-BASELINE-NEXT:    movb %r10b, 7(%rcx)
   1622 ; CHECK-BASELINE-NEXT:    movb %r9b, 6(%rcx)
   1623 ; CHECK-BASELINE-NEXT:    movb %r8b, 5(%rcx)
   1624 ; CHECK-BASELINE-NEXT:    movb %dil, 4(%rcx)
   1625 ; CHECK-BASELINE-NEXT:    movb %sil, 3(%rcx)
   1626 ; CHECK-BASELINE-NEXT:    movb %dl, 2(%rcx)
   1627 ; CHECK-BASELINE-NEXT:    movb %al, 1(%rcx)
   1628 ; CHECK-BASELINE-NEXT:    movb %r15b, (%rcx)
   1629 ; CHECK-BASELINE-NEXT:    movq %rcx, %rax
   1630 ; CHECK-BASELINE-NEXT:    popq %rbx
   1631 ; CHECK-BASELINE-NEXT:    popq %r12
   1632 ; CHECK-BASELINE-NEXT:    popq %r13
   1633 ; CHECK-BASELINE-NEXT:    popq %r14
   1634 ; CHECK-BASELINE-NEXT:    popq %r15
   1635 ; CHECK-BASELINE-NEXT:    popq %rbp
   1636 ; CHECK-BASELINE-NEXT:    retq
   1637 ;
   1638 ; CHECK-SSE1-LABEL: out_v32i8:
   1639 ; CHECK-SSE1:       # %bb.0:
   1640 ; CHECK-SSE1-NEXT:    pushq %rbp
   1641 ; CHECK-SSE1-NEXT:    pushq %r15
   1642 ; CHECK-SSE1-NEXT:    pushq %r14
   1643 ; CHECK-SSE1-NEXT:    pushq %r13
   1644 ; CHECK-SSE1-NEXT:    pushq %r12
   1645 ; CHECK-SSE1-NEXT:    pushq %rbx
   1646 ; CHECK-SSE1-NEXT:    movq %rcx, %r15
   1647 ; CHECK-SSE1-NEXT:    movq %rsi, %r14
   1648 ; CHECK-SSE1-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
   1649 ; CHECK-SSE1-NEXT:    movb 15(%rcx), %al
   1650 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1651 ; CHECK-SSE1-NEXT:    movb 16(%rcx), %al
   1652 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1653 ; CHECK-SSE1-NEXT:    movb 17(%rcx), %al
   1654 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1655 ; CHECK-SSE1-NEXT:    movb 18(%rcx), %al
   1656 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1657 ; CHECK-SSE1-NEXT:    movb 19(%rcx), %al
   1658 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1659 ; CHECK-SSE1-NEXT:    movb 20(%rcx), %al
   1660 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1661 ; CHECK-SSE1-NEXT:    movb 21(%rcx), %r12b
   1662 ; CHECK-SSE1-NEXT:    movb 22(%rcx), %r10b
   1663 ; CHECK-SSE1-NEXT:    movb 23(%rcx), %r11b
   1664 ; CHECK-SSE1-NEXT:    movb 24(%rcx), %bpl
   1665 ; CHECK-SSE1-NEXT:    movb 25(%rcx), %r13b
   1666 ; CHECK-SSE1-NEXT:    movb 26(%rcx), %r9b
   1667 ; CHECK-SSE1-NEXT:    movb 27(%rcx), %r8b
   1668 ; CHECK-SSE1-NEXT:    movb 28(%rcx), %dil
   1669 ; CHECK-SSE1-NEXT:    movb 29(%rcx), %sil
   1670 ; CHECK-SSE1-NEXT:    movb 30(%rcx), %bl
   1671 ; CHECK-SSE1-NEXT:    movb 31(%rcx), %al
   1672 ; CHECK-SSE1-NEXT:    movb 31(%r14), %cl
   1673 ; CHECK-SSE1-NEXT:    andb %al, %cl
   1674 ; CHECK-SSE1-NEXT:    notb %al
   1675 ; CHECK-SSE1-NEXT:    andb 31(%rdx), %al
   1676 ; CHECK-SSE1-NEXT:    orb %cl, %al
   1677 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1678 ; CHECK-SSE1-NEXT:    movb 30(%r14), %al
   1679 ; CHECK-SSE1-NEXT:    andb %bl, %al
   1680 ; CHECK-SSE1-NEXT:    notb %bl
   1681 ; CHECK-SSE1-NEXT:    andb 30(%rdx), %bl
   1682 ; CHECK-SSE1-NEXT:    orb %al, %bl
   1683 ; CHECK-SSE1-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1684 ; CHECK-SSE1-NEXT:    movb 29(%r14), %al
   1685 ; CHECK-SSE1-NEXT:    andb %sil, %al
   1686 ; CHECK-SSE1-NEXT:    notb %sil
   1687 ; CHECK-SSE1-NEXT:    andb 29(%rdx), %sil
   1688 ; CHECK-SSE1-NEXT:    orb %al, %sil
   1689 ; CHECK-SSE1-NEXT:    movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1690 ; CHECK-SSE1-NEXT:    movb 28(%r14), %al
   1691 ; CHECK-SSE1-NEXT:    andb %dil, %al
   1692 ; CHECK-SSE1-NEXT:    notb %dil
   1693 ; CHECK-SSE1-NEXT:    andb 28(%rdx), %dil
   1694 ; CHECK-SSE1-NEXT:    orb %al, %dil
   1695 ; CHECK-SSE1-NEXT:    movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1696 ; CHECK-SSE1-NEXT:    movb 27(%r14), %al
   1697 ; CHECK-SSE1-NEXT:    andb %r8b, %al
   1698 ; CHECK-SSE1-NEXT:    notb %r8b
   1699 ; CHECK-SSE1-NEXT:    andb 27(%rdx), %r8b
   1700 ; CHECK-SSE1-NEXT:    orb %al, %r8b
   1701 ; CHECK-SSE1-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1702 ; CHECK-SSE1-NEXT:    movb 26(%r14), %al
   1703 ; CHECK-SSE1-NEXT:    andb %r9b, %al
   1704 ; CHECK-SSE1-NEXT:    notb %r9b
   1705 ; CHECK-SSE1-NEXT:    andb 26(%rdx), %r9b
   1706 ; CHECK-SSE1-NEXT:    orb %al, %r9b
   1707 ; CHECK-SSE1-NEXT:    movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1708 ; CHECK-SSE1-NEXT:    movb 25(%r14), %al
   1709 ; CHECK-SSE1-NEXT:    andb %r13b, %al
   1710 ; CHECK-SSE1-NEXT:    notb %r13b
   1711 ; CHECK-SSE1-NEXT:    andb 25(%rdx), %r13b
   1712 ; CHECK-SSE1-NEXT:    orb %al, %r13b
   1713 ; CHECK-SSE1-NEXT:    movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1714 ; CHECK-SSE1-NEXT:    movb 24(%r14), %al
   1715 ; CHECK-SSE1-NEXT:    andb %bpl, %al
   1716 ; CHECK-SSE1-NEXT:    notb %bpl
   1717 ; CHECK-SSE1-NEXT:    andb 24(%rdx), %bpl
   1718 ; CHECK-SSE1-NEXT:    orb %al, %bpl
   1719 ; CHECK-SSE1-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1720 ; CHECK-SSE1-NEXT:    movb 23(%r14), %al
   1721 ; CHECK-SSE1-NEXT:    andb %r11b, %al
   1722 ; CHECK-SSE1-NEXT:    notb %r11b
   1723 ; CHECK-SSE1-NEXT:    andb 23(%rdx), %r11b
   1724 ; CHECK-SSE1-NEXT:    orb %al, %r11b
   1725 ; CHECK-SSE1-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1726 ; CHECK-SSE1-NEXT:    movb 22(%r14), %al
   1727 ; CHECK-SSE1-NEXT:    andb %r10b, %al
   1728 ; CHECK-SSE1-NEXT:    notb %r10b
   1729 ; CHECK-SSE1-NEXT:    andb 22(%rdx), %r10b
   1730 ; CHECK-SSE1-NEXT:    orb %al, %r10b
   1731 ; CHECK-SSE1-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1732 ; CHECK-SSE1-NEXT:    movb 21(%r14), %al
   1733 ; CHECK-SSE1-NEXT:    andb %r12b, %al
   1734 ; CHECK-SSE1-NEXT:    notb %r12b
   1735 ; CHECK-SSE1-NEXT:    andb 21(%rdx), %r12b
   1736 ; CHECK-SSE1-NEXT:    orb %al, %r12b
   1737 ; CHECK-SSE1-NEXT:    movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1738 ; CHECK-SSE1-NEXT:    movb 20(%r14), %al
   1739 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1740 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1741 ; CHECK-SSE1-NEXT:    notb %cl
   1742 ; CHECK-SSE1-NEXT:    andb 20(%rdx), %cl
   1743 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1744 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1745 ; CHECK-SSE1-NEXT:    movb 19(%r14), %al
   1746 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1747 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1748 ; CHECK-SSE1-NEXT:    notb %cl
   1749 ; CHECK-SSE1-NEXT:    andb 19(%rdx), %cl
   1750 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1751 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1752 ; CHECK-SSE1-NEXT:    movb 18(%r14), %al
   1753 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1754 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1755 ; CHECK-SSE1-NEXT:    notb %cl
   1756 ; CHECK-SSE1-NEXT:    andb 18(%rdx), %cl
   1757 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1758 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1759 ; CHECK-SSE1-NEXT:    movb 17(%r14), %al
   1760 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1761 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1762 ; CHECK-SSE1-NEXT:    notb %cl
   1763 ; CHECK-SSE1-NEXT:    andb 17(%rdx), %cl
   1764 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1765 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1766 ; CHECK-SSE1-NEXT:    movb 16(%r14), %al
   1767 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1768 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1769 ; CHECK-SSE1-NEXT:    notb %cl
   1770 ; CHECK-SSE1-NEXT:    movq %rdx, %rbx
   1771 ; CHECK-SSE1-NEXT:    andb 16(%rdx), %cl
   1772 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1773 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1774 ; CHECK-SSE1-NEXT:    movb 15(%r14), %al
   1775 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
   1776 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1777 ; CHECK-SSE1-NEXT:    notb %cl
   1778 ; CHECK-SSE1-NEXT:    andb 15(%rdx), %cl
   1779 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1780 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1781 ; CHECK-SSE1-NEXT:    movb 14(%r15), %cl
   1782 ; CHECK-SSE1-NEXT:    movb 14(%r14), %al
   1783 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1784 ; CHECK-SSE1-NEXT:    notb %cl
   1785 ; CHECK-SSE1-NEXT:    andb 14(%rdx), %cl
   1786 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1787 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1788 ; CHECK-SSE1-NEXT:    movb 13(%r15), %cl
   1789 ; CHECK-SSE1-NEXT:    movb 13(%r14), %al
   1790 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1791 ; CHECK-SSE1-NEXT:    notb %cl
   1792 ; CHECK-SSE1-NEXT:    andb 13(%rdx), %cl
   1793 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1794 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1795 ; CHECK-SSE1-NEXT:    movb 12(%r15), %cl
   1796 ; CHECK-SSE1-NEXT:    movb 12(%r14), %al
   1797 ; CHECK-SSE1-NEXT:    andb %cl, %al
   1798 ; CHECK-SSE1-NEXT:    notb %cl
   1799 ; CHECK-SSE1-NEXT:    andb 12(%rdx), %cl
   1800 ; CHECK-SSE1-NEXT:    orb %al, %cl
   1801 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   1802 ; CHECK-SSE1-NEXT:    movb 11(%r15), %r13b
   1803 ; CHECK-SSE1-NEXT:    movb 11(%r14), %al
   1804 ; CHECK-SSE1-NEXT:    andb %r13b, %al
   1805 ; CHECK-SSE1-NEXT:    notb %r13b
   1806 ; CHECK-SSE1-NEXT:    andb 11(%rdx), %r13b
   1807 ; CHECK-SSE1-NEXT:    orb %al, %r13b
   1808 ; CHECK-SSE1-NEXT:    movb 10(%r15), %r12b
   1809 ; CHECK-SSE1-NEXT:    movb 10(%r14), %al
   1810 ; CHECK-SSE1-NEXT:    andb %r12b, %al
   1811 ; CHECK-SSE1-NEXT:    notb %r12b
   1812 ; CHECK-SSE1-NEXT:    andb 10(%rdx), %r12b
   1813 ; CHECK-SSE1-NEXT:    orb %al, %r12b
   1814 ; CHECK-SSE1-NEXT:    movb 9(%r15), %bpl
   1815 ; CHECK-SSE1-NEXT:    movb 9(%r14), %al
   1816 ; CHECK-SSE1-NEXT:    andb %bpl, %al
   1817 ; CHECK-SSE1-NEXT:    notb %bpl
   1818 ; CHECK-SSE1-NEXT:    andb 9(%rdx), %bpl
   1819 ; CHECK-SSE1-NEXT:    orb %al, %bpl
   1820 ; CHECK-SSE1-NEXT:    movb 8(%r15), %r11b
   1821 ; CHECK-SSE1-NEXT:    movb 8(%r14), %al
   1822 ; CHECK-SSE1-NEXT:    andb %r11b, %al
   1823 ; CHECK-SSE1-NEXT:    notb %r11b
   1824 ; CHECK-SSE1-NEXT:    andb 8(%rdx), %r11b
   1825 ; CHECK-SSE1-NEXT:    orb %al, %r11b
   1826 ; CHECK-SSE1-NEXT:    movb 7(%r15), %r10b
   1827 ; CHECK-SSE1-NEXT:    movb 7(%r14), %al
   1828 ; CHECK-SSE1-NEXT:    andb %r10b, %al
   1829 ; CHECK-SSE1-NEXT:    notb %r10b
   1830 ; CHECK-SSE1-NEXT:    andb 7(%rdx), %r10b
   1831 ; CHECK-SSE1-NEXT:    orb %al, %r10b
   1832 ; CHECK-SSE1-NEXT:    movb 6(%r15), %r9b
   1833 ; CHECK-SSE1-NEXT:    movb 6(%r14), %al
   1834 ; CHECK-SSE1-NEXT:    andb %r9b, %al
   1835 ; CHECK-SSE1-NEXT:    notb %r9b
   1836 ; CHECK-SSE1-NEXT:    andb 6(%rdx), %r9b
   1837 ; CHECK-SSE1-NEXT:    orb %al, %r9b
   1838 ; CHECK-SSE1-NEXT:    movb 5(%r15), %r8b
   1839 ; CHECK-SSE1-NEXT:    movb 5(%r14), %al
   1840 ; CHECK-SSE1-NEXT:    andb %r8b, %al
   1841 ; CHECK-SSE1-NEXT:    notb %r8b
   1842 ; CHECK-SSE1-NEXT:    andb 5(%rdx), %r8b
   1843 ; CHECK-SSE1-NEXT:    orb %al, %r8b
   1844 ; CHECK-SSE1-NEXT:    movb 4(%r15), %dil
   1845 ; CHECK-SSE1-NEXT:    movb 4(%r14), %al
   1846 ; CHECK-SSE1-NEXT:    andb %dil, %al
   1847 ; CHECK-SSE1-NEXT:    notb %dil
   1848 ; CHECK-SSE1-NEXT:    andb 4(%rdx), %dil
   1849 ; CHECK-SSE1-NEXT:    orb %al, %dil
   1850 ; CHECK-SSE1-NEXT:    movb 3(%r15), %sil
   1851 ; CHECK-SSE1-NEXT:    movb 3(%r14), %al
   1852 ; CHECK-SSE1-NEXT:    andb %sil, %al
   1853 ; CHECK-SSE1-NEXT:    notb %sil
   1854 ; CHECK-SSE1-NEXT:    andb 3(%rdx), %sil
   1855 ; CHECK-SSE1-NEXT:    orb %al, %sil
   1856 ; CHECK-SSE1-NEXT:    movb 2(%r15), %dl
   1857 ; CHECK-SSE1-NEXT:    movb 2(%r14), %al
   1858 ; CHECK-SSE1-NEXT:    andb %dl, %al
   1859 ; CHECK-SSE1-NEXT:    notb %dl
   1860 ; CHECK-SSE1-NEXT:    andb 2(%rbx), %dl
   1861 ; CHECK-SSE1-NEXT:    orb %al, %dl
   1862 ; CHECK-SSE1-NEXT:    movb 1(%r15), %al
   1863 ; CHECK-SSE1-NEXT:    movb 1(%r14), %cl
   1864 ; CHECK-SSE1-NEXT:    andb %al, %cl
   1865 ; CHECK-SSE1-NEXT:    notb %al
   1866 ; CHECK-SSE1-NEXT:    andb 1(%rbx), %al
   1867 ; CHECK-SSE1-NEXT:    orb %cl, %al
   1868 ; CHECK-SSE1-NEXT:    movb (%r15), %r15b
   1869 ; CHECK-SSE1-NEXT:    movb (%r14), %r14b
   1870 ; CHECK-SSE1-NEXT:    andb %r15b, %r14b
   1871 ; CHECK-SSE1-NEXT:    notb %r15b
   1872 ; CHECK-SSE1-NEXT:    andb (%rbx), %r15b
   1873 ; CHECK-SSE1-NEXT:    orb %r14b, %r15b
   1874 ; CHECK-SSE1-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
   1875 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1876 ; CHECK-SSE1-NEXT:    movb %bl, 31(%rcx)
   1877 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1878 ; CHECK-SSE1-NEXT:    movb %bl, 30(%rcx)
   1879 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1880 ; CHECK-SSE1-NEXT:    movb %bl, 29(%rcx)
   1881 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1882 ; CHECK-SSE1-NEXT:    movb %bl, 28(%rcx)
   1883 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1884 ; CHECK-SSE1-NEXT:    movb %bl, 27(%rcx)
   1885 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1886 ; CHECK-SSE1-NEXT:    movb %bl, 26(%rcx)
   1887 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1888 ; CHECK-SSE1-NEXT:    movb %bl, 25(%rcx)
   1889 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1890 ; CHECK-SSE1-NEXT:    movb %bl, 24(%rcx)
   1891 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1892 ; CHECK-SSE1-NEXT:    movb %bl, 23(%rcx)
   1893 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1894 ; CHECK-SSE1-NEXT:    movb %bl, 22(%rcx)
   1895 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1896 ; CHECK-SSE1-NEXT:    movb %bl, 21(%rcx)
   1897 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1898 ; CHECK-SSE1-NEXT:    movb %bl, 20(%rcx)
   1899 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1900 ; CHECK-SSE1-NEXT:    movb %bl, 19(%rcx)
   1901 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1902 ; CHECK-SSE1-NEXT:    movb %bl, 18(%rcx)
   1903 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1904 ; CHECK-SSE1-NEXT:    movb %bl, 17(%rcx)
   1905 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1906 ; CHECK-SSE1-NEXT:    movb %bl, 16(%rcx)
   1907 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1908 ; CHECK-SSE1-NEXT:    movb %bl, 15(%rcx)
   1909 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1910 ; CHECK-SSE1-NEXT:    movb %bl, 14(%rcx)
   1911 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1912 ; CHECK-SSE1-NEXT:    movb %bl, 13(%rcx)
   1913 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
   1914 ; CHECK-SSE1-NEXT:    movb %bl, 12(%rcx)
   1915 ; CHECK-SSE1-NEXT:    movb %r13b, 11(%rcx)
   1916 ; CHECK-SSE1-NEXT:    movb %r12b, 10(%rcx)
   1917 ; CHECK-SSE1-NEXT:    movb %bpl, 9(%rcx)
   1918 ; CHECK-SSE1-NEXT:    movb %r11b, 8(%rcx)
   1919 ; CHECK-SSE1-NEXT:    movb %r10b, 7(%rcx)
   1920 ; CHECK-SSE1-NEXT:    movb %r9b, 6(%rcx)
   1921 ; CHECK-SSE1-NEXT:    movb %r8b, 5(%rcx)
   1922 ; CHECK-SSE1-NEXT:    movb %dil, 4(%rcx)
   1923 ; CHECK-SSE1-NEXT:    movb %sil, 3(%rcx)
   1924 ; CHECK-SSE1-NEXT:    movb %dl, 2(%rcx)
   1925 ; CHECK-SSE1-NEXT:    movb %al, 1(%rcx)
   1926 ; CHECK-SSE1-NEXT:    movb %r15b, (%rcx)
   1927 ; CHECK-SSE1-NEXT:    movq %rcx, %rax
   1928 ; CHECK-SSE1-NEXT:    popq %rbx
   1929 ; CHECK-SSE1-NEXT:    popq %r12
   1930 ; CHECK-SSE1-NEXT:    popq %r13
   1931 ; CHECK-SSE1-NEXT:    popq %r14
   1932 ; CHECK-SSE1-NEXT:    popq %r15
   1933 ; CHECK-SSE1-NEXT:    popq %rbp
   1934 ; CHECK-SSE1-NEXT:    retq
   1935 ;
   1936 ; CHECK-SSE2-LABEL: out_v32i8:
   1937 ; CHECK-SSE2:       # %bb.0:
   1938 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   1939 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   1940 ; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
   1941 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
   1942 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
   1943 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
   1944 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
   1945 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   1946 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   1947 ; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
   1948 ; CHECK-SSE2-NEXT:    retq
   1949 ;
   1950 ; CHECK-XOP-LABEL: out_v32i8:
   1951 ; CHECK-XOP:       # %bb.0:
   1952 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   1953 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   1954 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   1955 ; CHECK-XOP-NEXT:    retq
   1956   %x = load <32 x i8>, <32 x i8> *%px, align 32
   1957   %y = load <32 x i8>, <32 x i8> *%py, align 32
   1958   %mask = load <32 x i8>, <32 x i8> *%pmask, align 32
   1959   %mx = and <32 x i8> %x, %mask
   1960   %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1961   %my = and <32 x i8> %y, %notmask
   1962   %r = or <32 x i8> %mx, %my
   1963   ret <32 x i8> %r
   1964 }
   1965 
   1966 define <16 x i16> @out_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind {
   1967 ; CHECK-BASELINE-LABEL: out_v16i16:
   1968 ; CHECK-BASELINE:       # %bb.0:
   1969 ; CHECK-BASELINE-NEXT:    pushq %rbp
   1970 ; CHECK-BASELINE-NEXT:    pushq %r15
   1971 ; CHECK-BASELINE-NEXT:    pushq %r14
   1972 ; CHECK-BASELINE-NEXT:    pushq %r13
   1973 ; CHECK-BASELINE-NEXT:    pushq %r12
   1974 ; CHECK-BASELINE-NEXT:    pushq %rbx
   1975 ; CHECK-BASELINE-NEXT:    movq %rcx, %r9
   1976 ; CHECK-BASELINE-NEXT:    movq %rdx, %r10
   1977 ; CHECK-BASELINE-NEXT:    movq %rsi, %r8
   1978 ; CHECK-BASELINE-NEXT:    movq %rdi, %r11
   1979 ; CHECK-BASELINE-NEXT:    movl 12(%rcx), %eax
   1980 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   1981 ; CHECK-BASELINE-NEXT:    movzwl 14(%rcx), %edx
   1982 ; CHECK-BASELINE-NEXT:    movl 16(%rcx), %esi
   1983 ; CHECK-BASELINE-NEXT:    movzwl 18(%rcx), %edi
   1984 ; CHECK-BASELINE-NEXT:    movl 20(%rcx), %ecx
   1985 ; CHECK-BASELINE-NEXT:    movzwl 22(%r9), %r15d
   1986 ; CHECK-BASELINE-NEXT:    movl 24(%r9), %r12d
   1987 ; CHECK-BASELINE-NEXT:    movzwl 26(%r9), %r14d
   1988 ; CHECK-BASELINE-NEXT:    movl 28(%r9), %ebx
   1989 ; CHECK-BASELINE-NEXT:    movzwl 30(%r9), %ebp
   1990 ; CHECK-BASELINE-NEXT:    movzwl 30(%r8), %r13d
   1991 ; CHECK-BASELINE-NEXT:    andw %bp, %r13w
   1992 ; CHECK-BASELINE-NEXT:    notl %ebp
   1993 ; CHECK-BASELINE-NEXT:    andw 30(%r10), %bp
   1994 ; CHECK-BASELINE-NEXT:    orl %r13d, %ebp
   1995 ; CHECK-BASELINE-NEXT:    movzwl 28(%r8), %eax
   1996 ; CHECK-BASELINE-NEXT:    andw %bx, %ax
   1997 ; CHECK-BASELINE-NEXT:    notl %ebx
   1998 ; CHECK-BASELINE-NEXT:    andw 28(%r10), %bx
   1999 ; CHECK-BASELINE-NEXT:    orl %eax, %ebx
   2000 ; CHECK-BASELINE-NEXT:    movzwl 26(%r8), %eax
   2001 ; CHECK-BASELINE-NEXT:    andw %r14w, %ax
   2002 ; CHECK-BASELINE-NEXT:    notl %r14d
   2003 ; CHECK-BASELINE-NEXT:    andw 26(%r10), %r14w
   2004 ; CHECK-BASELINE-NEXT:    orl %eax, %r14d
   2005 ; CHECK-BASELINE-NEXT:    movzwl 24(%r8), %eax
   2006 ; CHECK-BASELINE-NEXT:    andw %r12w, %ax
   2007 ; CHECK-BASELINE-NEXT:    notl %r12d
   2008 ; CHECK-BASELINE-NEXT:    andw 24(%r10), %r12w
   2009 ; CHECK-BASELINE-NEXT:    orl %eax, %r12d
   2010 ; CHECK-BASELINE-NEXT:    movzwl 22(%r8), %eax
   2011 ; CHECK-BASELINE-NEXT:    andw %r15w, %ax
   2012 ; CHECK-BASELINE-NEXT:    notl %r15d
   2013 ; CHECK-BASELINE-NEXT:    andw 22(%r10), %r15w
   2014 ; CHECK-BASELINE-NEXT:    orl %eax, %r15d
   2015 ; CHECK-BASELINE-NEXT:    movzwl 20(%r8), %eax
   2016 ; CHECK-BASELINE-NEXT:    andw %cx, %ax
   2017 ; CHECK-BASELINE-NEXT:    notl %ecx
   2018 ; CHECK-BASELINE-NEXT:    andw 20(%r10), %cx
   2019 ; CHECK-BASELINE-NEXT:    orl %eax, %ecx
   2020 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2021 ; CHECK-BASELINE-NEXT:    movzwl 18(%r8), %eax
   2022 ; CHECK-BASELINE-NEXT:    andw %di, %ax
   2023 ; CHECK-BASELINE-NEXT:    notl %edi
   2024 ; CHECK-BASELINE-NEXT:    andw 18(%r10), %di
   2025 ; CHECK-BASELINE-NEXT:    orl %eax, %edi
   2026 ; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2027 ; CHECK-BASELINE-NEXT:    movzwl 16(%r8), %eax
   2028 ; CHECK-BASELINE-NEXT:    andw %si, %ax
   2029 ; CHECK-BASELINE-NEXT:    notl %esi
   2030 ; CHECK-BASELINE-NEXT:    andw 16(%r10), %si
   2031 ; CHECK-BASELINE-NEXT:    orl %eax, %esi
   2032 ; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2033 ; CHECK-BASELINE-NEXT:    movzwl 14(%r8), %eax
   2034 ; CHECK-BASELINE-NEXT:    andw %dx, %ax
   2035 ; CHECK-BASELINE-NEXT:    notl %edx
   2036 ; CHECK-BASELINE-NEXT:    andw 14(%r10), %dx
   2037 ; CHECK-BASELINE-NEXT:    orl %eax, %edx
   2038 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2039 ; CHECK-BASELINE-NEXT:    movzwl 12(%r8), %eax
   2040 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2041 ; CHECK-BASELINE-NEXT:    andw %cx, %ax
   2042 ; CHECK-BASELINE-NEXT:    notl %ecx
   2043 ; CHECK-BASELINE-NEXT:    andw 12(%r10), %cx
   2044 ; CHECK-BASELINE-NEXT:    orl %eax, %ecx
   2045 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2046 ; CHECK-BASELINE-NEXT:    movzwl 10(%r9), %r13d
   2047 ; CHECK-BASELINE-NEXT:    movzwl 10(%r8), %eax
   2048 ; CHECK-BASELINE-NEXT:    andw %r13w, %ax
   2049 ; CHECK-BASELINE-NEXT:    notl %r13d
   2050 ; CHECK-BASELINE-NEXT:    andw 10(%r10), %r13w
   2051 ; CHECK-BASELINE-NEXT:    orl %eax, %r13d
   2052 ; CHECK-BASELINE-NEXT:    movl 8(%r9), %edi
   2053 ; CHECK-BASELINE-NEXT:    movzwl 8(%r8), %eax
   2054 ; CHECK-BASELINE-NEXT:    andw %di, %ax
   2055 ; CHECK-BASELINE-NEXT:    notl %edi
   2056 ; CHECK-BASELINE-NEXT:    andw 8(%r10), %di
   2057 ; CHECK-BASELINE-NEXT:    orl %eax, %edi
   2058 ; CHECK-BASELINE-NEXT:    movzwl 6(%r9), %esi
   2059 ; CHECK-BASELINE-NEXT:    movzwl 6(%r8), %eax
   2060 ; CHECK-BASELINE-NEXT:    andw %si, %ax
   2061 ; CHECK-BASELINE-NEXT:    notl %esi
   2062 ; CHECK-BASELINE-NEXT:    andw 6(%r10), %si
   2063 ; CHECK-BASELINE-NEXT:    orl %eax, %esi
   2064 ; CHECK-BASELINE-NEXT:    movl 4(%r9), %edx
   2065 ; CHECK-BASELINE-NEXT:    movzwl 4(%r8), %eax
   2066 ; CHECK-BASELINE-NEXT:    andw %dx, %ax
   2067 ; CHECK-BASELINE-NEXT:    notl %edx
   2068 ; CHECK-BASELINE-NEXT:    andw 4(%r10), %dx
   2069 ; CHECK-BASELINE-NEXT:    orl %eax, %edx
   2070 ; CHECK-BASELINE-NEXT:    movzwl 2(%r9), %eax
   2071 ; CHECK-BASELINE-NEXT:    movzwl 2(%r8), %ecx
   2072 ; CHECK-BASELINE-NEXT:    andw %ax, %cx
   2073 ; CHECK-BASELINE-NEXT:    notl %eax
   2074 ; CHECK-BASELINE-NEXT:    andw 2(%r10), %ax
   2075 ; CHECK-BASELINE-NEXT:    orl %ecx, %eax
   2076 ; CHECK-BASELINE-NEXT:    movl (%r9), %r9d
   2077 ; CHECK-BASELINE-NEXT:    movzwl (%r8), %ecx
   2078 ; CHECK-BASELINE-NEXT:    andw %r9w, %cx
   2079 ; CHECK-BASELINE-NEXT:    notl %r9d
   2080 ; CHECK-BASELINE-NEXT:    andw (%r10), %r9w
   2081 ; CHECK-BASELINE-NEXT:    orl %ecx, %r9d
   2082 ; CHECK-BASELINE-NEXT:    movw %bp, 30(%r11)
   2083 ; CHECK-BASELINE-NEXT:    movw %bx, 28(%r11)
   2084 ; CHECK-BASELINE-NEXT:    movw %r14w, 26(%r11)
   2085 ; CHECK-BASELINE-NEXT:    movw %r12w, 24(%r11)
   2086 ; CHECK-BASELINE-NEXT:    movw %r15w, 22(%r11)
   2087 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2088 ; CHECK-BASELINE-NEXT:    movw %cx, 20(%r11)
   2089 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2090 ; CHECK-BASELINE-NEXT:    movw %cx, 18(%r11)
   2091 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2092 ; CHECK-BASELINE-NEXT:    movw %cx, 16(%r11)
   2093 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2094 ; CHECK-BASELINE-NEXT:    movw %cx, 14(%r11)
   2095 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2096 ; CHECK-BASELINE-NEXT:    movw %cx, 12(%r11)
   2097 ; CHECK-BASELINE-NEXT:    movw %r13w, 10(%r11)
   2098 ; CHECK-BASELINE-NEXT:    movw %di, 8(%r11)
   2099 ; CHECK-BASELINE-NEXT:    movw %si, 6(%r11)
   2100 ; CHECK-BASELINE-NEXT:    movw %dx, 4(%r11)
   2101 ; CHECK-BASELINE-NEXT:    movw %ax, 2(%r11)
   2102 ; CHECK-BASELINE-NEXT:    movw %r9w, (%r11)
   2103 ; CHECK-BASELINE-NEXT:    movq %r11, %rax
   2104 ; CHECK-BASELINE-NEXT:    popq %rbx
   2105 ; CHECK-BASELINE-NEXT:    popq %r12
   2106 ; CHECK-BASELINE-NEXT:    popq %r13
   2107 ; CHECK-BASELINE-NEXT:    popq %r14
   2108 ; CHECK-BASELINE-NEXT:    popq %r15
   2109 ; CHECK-BASELINE-NEXT:    popq %rbp
   2110 ; CHECK-BASELINE-NEXT:    retq
   2111 ;
   2112 ; CHECK-SSE1-LABEL: out_v16i16:
   2113 ; CHECK-SSE1:       # %bb.0:
   2114 ; CHECK-SSE1-NEXT:    pushq %rbp
   2115 ; CHECK-SSE1-NEXT:    pushq %r15
   2116 ; CHECK-SSE1-NEXT:    pushq %r14
   2117 ; CHECK-SSE1-NEXT:    pushq %r13
   2118 ; CHECK-SSE1-NEXT:    pushq %r12
   2119 ; CHECK-SSE1-NEXT:    pushq %rbx
   2120 ; CHECK-SSE1-NEXT:    movq %rcx, %r9
   2121 ; CHECK-SSE1-NEXT:    movq %rdx, %r10
   2122 ; CHECK-SSE1-NEXT:    movq %rsi, %r8
   2123 ; CHECK-SSE1-NEXT:    movq %rdi, %r11
   2124 ; CHECK-SSE1-NEXT:    movl 12(%rcx), %eax
   2125 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2126 ; CHECK-SSE1-NEXT:    movzwl 14(%rcx), %edx
   2127 ; CHECK-SSE1-NEXT:    movl 16(%rcx), %esi
   2128 ; CHECK-SSE1-NEXT:    movzwl 18(%rcx), %edi
   2129 ; CHECK-SSE1-NEXT:    movl 20(%rcx), %ecx
   2130 ; CHECK-SSE1-NEXT:    movzwl 22(%r9), %r15d
   2131 ; CHECK-SSE1-NEXT:    movl 24(%r9), %r12d
   2132 ; CHECK-SSE1-NEXT:    movzwl 26(%r9), %r14d
   2133 ; CHECK-SSE1-NEXT:    movl 28(%r9), %ebx
   2134 ; CHECK-SSE1-NEXT:    movzwl 30(%r9), %ebp
   2135 ; CHECK-SSE1-NEXT:    movzwl 30(%r8), %r13d
   2136 ; CHECK-SSE1-NEXT:    andw %bp, %r13w
   2137 ; CHECK-SSE1-NEXT:    notl %ebp
   2138 ; CHECK-SSE1-NEXT:    andw 30(%r10), %bp
   2139 ; CHECK-SSE1-NEXT:    orl %r13d, %ebp
   2140 ; CHECK-SSE1-NEXT:    movzwl 28(%r8), %eax
   2141 ; CHECK-SSE1-NEXT:    andw %bx, %ax
   2142 ; CHECK-SSE1-NEXT:    notl %ebx
   2143 ; CHECK-SSE1-NEXT:    andw 28(%r10), %bx
   2144 ; CHECK-SSE1-NEXT:    orl %eax, %ebx
   2145 ; CHECK-SSE1-NEXT:    movzwl 26(%r8), %eax
   2146 ; CHECK-SSE1-NEXT:    andw %r14w, %ax
   2147 ; CHECK-SSE1-NEXT:    notl %r14d
   2148 ; CHECK-SSE1-NEXT:    andw 26(%r10), %r14w
   2149 ; CHECK-SSE1-NEXT:    orl %eax, %r14d
   2150 ; CHECK-SSE1-NEXT:    movzwl 24(%r8), %eax
   2151 ; CHECK-SSE1-NEXT:    andw %r12w, %ax
   2152 ; CHECK-SSE1-NEXT:    notl %r12d
   2153 ; CHECK-SSE1-NEXT:    andw 24(%r10), %r12w
   2154 ; CHECK-SSE1-NEXT:    orl %eax, %r12d
   2155 ; CHECK-SSE1-NEXT:    movzwl 22(%r8), %eax
   2156 ; CHECK-SSE1-NEXT:    andw %r15w, %ax
   2157 ; CHECK-SSE1-NEXT:    notl %r15d
   2158 ; CHECK-SSE1-NEXT:    andw 22(%r10), %r15w
   2159 ; CHECK-SSE1-NEXT:    orl %eax, %r15d
   2160 ; CHECK-SSE1-NEXT:    movzwl 20(%r8), %eax
   2161 ; CHECK-SSE1-NEXT:    andw %cx, %ax
   2162 ; CHECK-SSE1-NEXT:    notl %ecx
   2163 ; CHECK-SSE1-NEXT:    andw 20(%r10), %cx
   2164 ; CHECK-SSE1-NEXT:    orl %eax, %ecx
   2165 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2166 ; CHECK-SSE1-NEXT:    movzwl 18(%r8), %eax
   2167 ; CHECK-SSE1-NEXT:    andw %di, %ax
   2168 ; CHECK-SSE1-NEXT:    notl %edi
   2169 ; CHECK-SSE1-NEXT:    andw 18(%r10), %di
   2170 ; CHECK-SSE1-NEXT:    orl %eax, %edi
   2171 ; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2172 ; CHECK-SSE1-NEXT:    movzwl 16(%r8), %eax
   2173 ; CHECK-SSE1-NEXT:    andw %si, %ax
   2174 ; CHECK-SSE1-NEXT:    notl %esi
   2175 ; CHECK-SSE1-NEXT:    andw 16(%r10), %si
   2176 ; CHECK-SSE1-NEXT:    orl %eax, %esi
   2177 ; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2178 ; CHECK-SSE1-NEXT:    movzwl 14(%r8), %eax
   2179 ; CHECK-SSE1-NEXT:    andw %dx, %ax
   2180 ; CHECK-SSE1-NEXT:    notl %edx
   2181 ; CHECK-SSE1-NEXT:    andw 14(%r10), %dx
   2182 ; CHECK-SSE1-NEXT:    orl %eax, %edx
   2183 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2184 ; CHECK-SSE1-NEXT:    movzwl 12(%r8), %eax
   2185 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2186 ; CHECK-SSE1-NEXT:    andw %cx, %ax
   2187 ; CHECK-SSE1-NEXT:    notl %ecx
   2188 ; CHECK-SSE1-NEXT:    andw 12(%r10), %cx
   2189 ; CHECK-SSE1-NEXT:    orl %eax, %ecx
   2190 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   2191 ; CHECK-SSE1-NEXT:    movzwl 10(%r9), %r13d
   2192 ; CHECK-SSE1-NEXT:    movzwl 10(%r8), %eax
   2193 ; CHECK-SSE1-NEXT:    andw %r13w, %ax
   2194 ; CHECK-SSE1-NEXT:    notl %r13d
   2195 ; CHECK-SSE1-NEXT:    andw 10(%r10), %r13w
   2196 ; CHECK-SSE1-NEXT:    orl %eax, %r13d
   2197 ; CHECK-SSE1-NEXT:    movl 8(%r9), %edi
   2198 ; CHECK-SSE1-NEXT:    movzwl 8(%r8), %eax
   2199 ; CHECK-SSE1-NEXT:    andw %di, %ax
   2200 ; CHECK-SSE1-NEXT:    notl %edi
   2201 ; CHECK-SSE1-NEXT:    andw 8(%r10), %di
   2202 ; CHECK-SSE1-NEXT:    orl %eax, %edi
   2203 ; CHECK-SSE1-NEXT:    movzwl 6(%r9), %esi
   2204 ; CHECK-SSE1-NEXT:    movzwl 6(%r8), %eax
   2205 ; CHECK-SSE1-NEXT:    andw %si, %ax
   2206 ; CHECK-SSE1-NEXT:    notl %esi
   2207 ; CHECK-SSE1-NEXT:    andw 6(%r10), %si
   2208 ; CHECK-SSE1-NEXT:    orl %eax, %esi
   2209 ; CHECK-SSE1-NEXT:    movl 4(%r9), %edx
   2210 ; CHECK-SSE1-NEXT:    movzwl 4(%r8), %eax
   2211 ; CHECK-SSE1-NEXT:    andw %dx, %ax
   2212 ; CHECK-SSE1-NEXT:    notl %edx
   2213 ; CHECK-SSE1-NEXT:    andw 4(%r10), %dx
   2214 ; CHECK-SSE1-NEXT:    orl %eax, %edx
   2215 ; CHECK-SSE1-NEXT:    movzwl 2(%r9), %eax
   2216 ; CHECK-SSE1-NEXT:    movzwl 2(%r8), %ecx
   2217 ; CHECK-SSE1-NEXT:    andw %ax, %cx
   2218 ; CHECK-SSE1-NEXT:    notl %eax
   2219 ; CHECK-SSE1-NEXT:    andw 2(%r10), %ax
   2220 ; CHECK-SSE1-NEXT:    orl %ecx, %eax
   2221 ; CHECK-SSE1-NEXT:    movl (%r9), %r9d
   2222 ; CHECK-SSE1-NEXT:    movzwl (%r8), %ecx
   2223 ; CHECK-SSE1-NEXT:    andw %r9w, %cx
   2224 ; CHECK-SSE1-NEXT:    notl %r9d
   2225 ; CHECK-SSE1-NEXT:    andw (%r10), %r9w
   2226 ; CHECK-SSE1-NEXT:    orl %ecx, %r9d
   2227 ; CHECK-SSE1-NEXT:    movw %bp, 30(%r11)
   2228 ; CHECK-SSE1-NEXT:    movw %bx, 28(%r11)
   2229 ; CHECK-SSE1-NEXT:    movw %r14w, 26(%r11)
   2230 ; CHECK-SSE1-NEXT:    movw %r12w, 24(%r11)
   2231 ; CHECK-SSE1-NEXT:    movw %r15w, 22(%r11)
   2232 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2233 ; CHECK-SSE1-NEXT:    movw %cx, 20(%r11)
   2234 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2235 ; CHECK-SSE1-NEXT:    movw %cx, 18(%r11)
   2236 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2237 ; CHECK-SSE1-NEXT:    movw %cx, 16(%r11)
   2238 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2239 ; CHECK-SSE1-NEXT:    movw %cx, 14(%r11)
   2240 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   2241 ; CHECK-SSE1-NEXT:    movw %cx, 12(%r11)
   2242 ; CHECK-SSE1-NEXT:    movw %r13w, 10(%r11)
   2243 ; CHECK-SSE1-NEXT:    movw %di, 8(%r11)
   2244 ; CHECK-SSE1-NEXT:    movw %si, 6(%r11)
   2245 ; CHECK-SSE1-NEXT:    movw %dx, 4(%r11)
   2246 ; CHECK-SSE1-NEXT:    movw %ax, 2(%r11)
   2247 ; CHECK-SSE1-NEXT:    movw %r9w, (%r11)
   2248 ; CHECK-SSE1-NEXT:    movq %r11, %rax
   2249 ; CHECK-SSE1-NEXT:    popq %rbx
   2250 ; CHECK-SSE1-NEXT:    popq %r12
   2251 ; CHECK-SSE1-NEXT:    popq %r13
   2252 ; CHECK-SSE1-NEXT:    popq %r14
   2253 ; CHECK-SSE1-NEXT:    popq %r15
   2254 ; CHECK-SSE1-NEXT:    popq %rbp
   2255 ; CHECK-SSE1-NEXT:    retq
   2256 ;
   2257 ; CHECK-SSE2-LABEL: out_v16i16:
   2258 ; CHECK-SSE2:       # %bb.0:
   2259 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   2260 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   2261 ; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
   2262 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
   2263 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
   2264 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
   2265 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
   2266 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   2267 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   2268 ; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
   2269 ; CHECK-SSE2-NEXT:    retq
   2270 ;
   2271 ; CHECK-XOP-LABEL: out_v16i16:
   2272 ; CHECK-XOP:       # %bb.0:
   2273 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   2274 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   2275 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   2276 ; CHECK-XOP-NEXT:    retq
   2277   %x = load <16 x i16>, <16 x i16> *%px, align 32
   2278   %y = load <16 x i16>, <16 x i16> *%py, align 32
   2279   %mask = load <16 x i16>, <16 x i16> *%pmask, align 32
   2280   %mx = and <16 x i16> %x, %mask
   2281   %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   2282   %my = and <16 x i16> %y, %notmask
   2283   %r = or <16 x i16> %mx, %my
   2284   ret <16 x i16> %r
   2285 }
   2286 
   2287 define <8 x i32> @out_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind {
   2288 ; CHECK-BASELINE-LABEL: out_v8i32:
   2289 ; CHECK-BASELINE:       # %bb.0:
   2290 ; CHECK-BASELINE-NEXT:    pushq %rbp
   2291 ; CHECK-BASELINE-NEXT:    pushq %r15
   2292 ; CHECK-BASELINE-NEXT:    pushq %r14
   2293 ; CHECK-BASELINE-NEXT:    pushq %rbx
   2294 ; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r8d
   2295 ; CHECK-BASELINE-NEXT:    movl 8(%rcx), %r9d
   2296 ; CHECK-BASELINE-NEXT:    movl 12(%rcx), %r10d
   2297 ; CHECK-BASELINE-NEXT:    movl 16(%rcx), %r11d
   2298 ; CHECK-BASELINE-NEXT:    movl 20(%rcx), %r15d
   2299 ; CHECK-BASELINE-NEXT:    movl 24(%rcx), %ebx
   2300 ; CHECK-BASELINE-NEXT:    movl 28(%rcx), %ebp
   2301 ; CHECK-BASELINE-NEXT:    movl 28(%rsi), %r14d
   2302 ; CHECK-BASELINE-NEXT:    andl %ebp, %r14d
   2303 ; CHECK-BASELINE-NEXT:    notl %ebp
   2304 ; CHECK-BASELINE-NEXT:    andl 28(%rdx), %ebp
   2305 ; CHECK-BASELINE-NEXT:    orl %r14d, %ebp
   2306 ; CHECK-BASELINE-NEXT:    movl 24(%rsi), %eax
   2307 ; CHECK-BASELINE-NEXT:    andl %ebx, %eax
   2308 ; CHECK-BASELINE-NEXT:    notl %ebx
   2309 ; CHECK-BASELINE-NEXT:    andl 24(%rdx), %ebx
   2310 ; CHECK-BASELINE-NEXT:    orl %eax, %ebx
   2311 ; CHECK-BASELINE-NEXT:    movl 20(%rsi), %eax
   2312 ; CHECK-BASELINE-NEXT:    andl %r15d, %eax
   2313 ; CHECK-BASELINE-NEXT:    notl %r15d
   2314 ; CHECK-BASELINE-NEXT:    andl 20(%rdx), %r15d
   2315 ; CHECK-BASELINE-NEXT:    orl %eax, %r15d
   2316 ; CHECK-BASELINE-NEXT:    movl 16(%rsi), %eax
   2317 ; CHECK-BASELINE-NEXT:    andl %r11d, %eax
   2318 ; CHECK-BASELINE-NEXT:    notl %r11d
   2319 ; CHECK-BASELINE-NEXT:    andl 16(%rdx), %r11d
   2320 ; CHECK-BASELINE-NEXT:    orl %eax, %r11d
   2321 ; CHECK-BASELINE-NEXT:    movl 12(%rsi), %eax
   2322 ; CHECK-BASELINE-NEXT:    andl %r10d, %eax
   2323 ; CHECK-BASELINE-NEXT:    notl %r10d
   2324 ; CHECK-BASELINE-NEXT:    andl 12(%rdx), %r10d
   2325 ; CHECK-BASELINE-NEXT:    orl %eax, %r10d
   2326 ; CHECK-BASELINE-NEXT:    movl 8(%rsi), %eax
   2327 ; CHECK-BASELINE-NEXT:    andl %r9d, %eax
   2328 ; CHECK-BASELINE-NEXT:    notl %r9d
   2329 ; CHECK-BASELINE-NEXT:    andl 8(%rdx), %r9d
   2330 ; CHECK-BASELINE-NEXT:    orl %eax, %r9d
   2331 ; CHECK-BASELINE-NEXT:    movl 4(%rsi), %eax
   2332 ; CHECK-BASELINE-NEXT:    andl %r8d, %eax
   2333 ; CHECK-BASELINE-NEXT:    notl %r8d
   2334 ; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r8d
   2335 ; CHECK-BASELINE-NEXT:    orl %eax, %r8d
   2336 ; CHECK-BASELINE-NEXT:    movl (%rcx), %eax
   2337 ; CHECK-BASELINE-NEXT:    movl (%rsi), %ecx
   2338 ; CHECK-BASELINE-NEXT:    andl %eax, %ecx
   2339 ; CHECK-BASELINE-NEXT:    notl %eax
   2340 ; CHECK-BASELINE-NEXT:    andl (%rdx), %eax
   2341 ; CHECK-BASELINE-NEXT:    orl %ecx, %eax
   2342 ; CHECK-BASELINE-NEXT:    movl %ebp, 28(%rdi)
   2343 ; CHECK-BASELINE-NEXT:    movl %ebx, 24(%rdi)
   2344 ; CHECK-BASELINE-NEXT:    movl %r15d, 20(%rdi)
   2345 ; CHECK-BASELINE-NEXT:    movl %r11d, 16(%rdi)
   2346 ; CHECK-BASELINE-NEXT:    movl %r10d, 12(%rdi)
   2347 ; CHECK-BASELINE-NEXT:    movl %r9d, 8(%rdi)
   2348 ; CHECK-BASELINE-NEXT:    movl %r8d, 4(%rdi)
   2349 ; CHECK-BASELINE-NEXT:    movl %eax, (%rdi)
   2350 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   2351 ; CHECK-BASELINE-NEXT:    popq %rbx
   2352 ; CHECK-BASELINE-NEXT:    popq %r14
   2353 ; CHECK-BASELINE-NEXT:    popq %r15
   2354 ; CHECK-BASELINE-NEXT:    popq %rbp
   2355 ; CHECK-BASELINE-NEXT:    retq
   2356 ;
   2357 ; CHECK-SSE1-LABEL: out_v8i32:
   2358 ; CHECK-SSE1:       # %bb.0:
   2359 ; CHECK-SSE1-NEXT:    pushq %rbp
   2360 ; CHECK-SSE1-NEXT:    pushq %r15
   2361 ; CHECK-SSE1-NEXT:    pushq %r14
   2362 ; CHECK-SSE1-NEXT:    pushq %rbx
   2363 ; CHECK-SSE1-NEXT:    movl 4(%rcx), %r8d
   2364 ; CHECK-SSE1-NEXT:    movl 8(%rcx), %r9d
   2365 ; CHECK-SSE1-NEXT:    movl 12(%rcx), %r10d
   2366 ; CHECK-SSE1-NEXT:    movl 16(%rcx), %r11d
   2367 ; CHECK-SSE1-NEXT:    movl 20(%rcx), %r15d
   2368 ; CHECK-SSE1-NEXT:    movl 24(%rcx), %ebx
   2369 ; CHECK-SSE1-NEXT:    movl 28(%rcx), %ebp
   2370 ; CHECK-SSE1-NEXT:    movl 28(%rsi), %r14d
   2371 ; CHECK-SSE1-NEXT:    andl %ebp, %r14d
   2372 ; CHECK-SSE1-NEXT:    notl %ebp
   2373 ; CHECK-SSE1-NEXT:    andl 28(%rdx), %ebp
   2374 ; CHECK-SSE1-NEXT:    orl %r14d, %ebp
   2375 ; CHECK-SSE1-NEXT:    movl 24(%rsi), %eax
   2376 ; CHECK-SSE1-NEXT:    andl %ebx, %eax
   2377 ; CHECK-SSE1-NEXT:    notl %ebx
   2378 ; CHECK-SSE1-NEXT:    andl 24(%rdx), %ebx
   2379 ; CHECK-SSE1-NEXT:    orl %eax, %ebx
   2380 ; CHECK-SSE1-NEXT:    movl 20(%rsi), %eax
   2381 ; CHECK-SSE1-NEXT:    andl %r15d, %eax
   2382 ; CHECK-SSE1-NEXT:    notl %r15d
   2383 ; CHECK-SSE1-NEXT:    andl 20(%rdx), %r15d
   2384 ; CHECK-SSE1-NEXT:    orl %eax, %r15d
   2385 ; CHECK-SSE1-NEXT:    movl 16(%rsi), %eax
   2386 ; CHECK-SSE1-NEXT:    andl %r11d, %eax
   2387 ; CHECK-SSE1-NEXT:    notl %r11d
   2388 ; CHECK-SSE1-NEXT:    andl 16(%rdx), %r11d
   2389 ; CHECK-SSE1-NEXT:    orl %eax, %r11d
   2390 ; CHECK-SSE1-NEXT:    movl 12(%rsi), %eax
   2391 ; CHECK-SSE1-NEXT:    andl %r10d, %eax
   2392 ; CHECK-SSE1-NEXT:    notl %r10d
   2393 ; CHECK-SSE1-NEXT:    andl 12(%rdx), %r10d
   2394 ; CHECK-SSE1-NEXT:    orl %eax, %r10d
   2395 ; CHECK-SSE1-NEXT:    movl 8(%rsi), %eax
   2396 ; CHECK-SSE1-NEXT:    andl %r9d, %eax
   2397 ; CHECK-SSE1-NEXT:    notl %r9d
   2398 ; CHECK-SSE1-NEXT:    andl 8(%rdx), %r9d
   2399 ; CHECK-SSE1-NEXT:    orl %eax, %r9d
   2400 ; CHECK-SSE1-NEXT:    movl 4(%rsi), %eax
   2401 ; CHECK-SSE1-NEXT:    andl %r8d, %eax
   2402 ; CHECK-SSE1-NEXT:    notl %r8d
   2403 ; CHECK-SSE1-NEXT:    andl 4(%rdx), %r8d
   2404 ; CHECK-SSE1-NEXT:    orl %eax, %r8d
   2405 ; CHECK-SSE1-NEXT:    movl (%rcx), %eax
   2406 ; CHECK-SSE1-NEXT:    movl (%rsi), %ecx
   2407 ; CHECK-SSE1-NEXT:    andl %eax, %ecx
   2408 ; CHECK-SSE1-NEXT:    notl %eax
   2409 ; CHECK-SSE1-NEXT:    andl (%rdx), %eax
   2410 ; CHECK-SSE1-NEXT:    orl %ecx, %eax
   2411 ; CHECK-SSE1-NEXT:    movl %ebp, 28(%rdi)
   2412 ; CHECK-SSE1-NEXT:    movl %ebx, 24(%rdi)
   2413 ; CHECK-SSE1-NEXT:    movl %r15d, 20(%rdi)
   2414 ; CHECK-SSE1-NEXT:    movl %r11d, 16(%rdi)
   2415 ; CHECK-SSE1-NEXT:    movl %r10d, 12(%rdi)
   2416 ; CHECK-SSE1-NEXT:    movl %r9d, 8(%rdi)
   2417 ; CHECK-SSE1-NEXT:    movl %r8d, 4(%rdi)
   2418 ; CHECK-SSE1-NEXT:    movl %eax, (%rdi)
   2419 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   2420 ; CHECK-SSE1-NEXT:    popq %rbx
   2421 ; CHECK-SSE1-NEXT:    popq %r14
   2422 ; CHECK-SSE1-NEXT:    popq %r15
   2423 ; CHECK-SSE1-NEXT:    popq %rbp
   2424 ; CHECK-SSE1-NEXT:    retq
   2425 ;
   2426 ; CHECK-SSE2-LABEL: out_v8i32:
   2427 ; CHECK-SSE2:       # %bb.0:
   2428 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   2429 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   2430 ; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
   2431 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
   2432 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
   2433 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
   2434 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
   2435 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   2436 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   2437 ; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
   2438 ; CHECK-SSE2-NEXT:    retq
   2439 ;
   2440 ; CHECK-XOP-LABEL: out_v8i32:
   2441 ; CHECK-XOP:       # %bb.0:
   2442 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   2443 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   2444 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   2445 ; CHECK-XOP-NEXT:    retq
   2446   %x = load <8 x i32>, <8 x i32> *%px, align 32
   2447   %y = load <8 x i32>, <8 x i32> *%py, align 32
   2448   %mask = load <8 x i32>, <8 x i32> *%pmask, align 32
   2449   %mx = and <8 x i32> %x, %mask
   2450   %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   2451   %my = and <8 x i32> %y, %notmask
   2452   %r = or <8 x i32> %mx, %my
   2453   ret <8 x i32> %r
   2454 }
   2455 
   2456 define <4 x i64> @out_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind {
   2457 ; CHECK-BASELINE-LABEL: out_v4i64:
   2458 ; CHECK-BASELINE:       # %bb.0:
   2459 ; CHECK-BASELINE-NEXT:    pushq %rbx
   2460 ; CHECK-BASELINE-NEXT:    movq (%rcx), %r8
   2461 ; CHECK-BASELINE-NEXT:    movq 8(%rcx), %r9
   2462 ; CHECK-BASELINE-NEXT:    movq 16(%rcx), %rax
   2463 ; CHECK-BASELINE-NEXT:    movq 24(%rcx), %rcx
   2464 ; CHECK-BASELINE-NEXT:    movq 24(%rsi), %r10
   2465 ; CHECK-BASELINE-NEXT:    andq %rcx, %r10
   2466 ; CHECK-BASELINE-NEXT:    movq 16(%rsi), %r11
   2467 ; CHECK-BASELINE-NEXT:    andq %rax, %r11
   2468 ; CHECK-BASELINE-NEXT:    movq 8(%rsi), %rbx
   2469 ; CHECK-BASELINE-NEXT:    andq %r9, %rbx
   2470 ; CHECK-BASELINE-NEXT:    movq (%rsi), %rsi
   2471 ; CHECK-BASELINE-NEXT:    andq %r8, %rsi
   2472 ; CHECK-BASELINE-NEXT:    notq %r8
   2473 ; CHECK-BASELINE-NEXT:    notq %r9
   2474 ; CHECK-BASELINE-NEXT:    notq %rax
   2475 ; CHECK-BASELINE-NEXT:    notq %rcx
   2476 ; CHECK-BASELINE-NEXT:    andq 24(%rdx), %rcx
   2477 ; CHECK-BASELINE-NEXT:    orq %r10, %rcx
   2478 ; CHECK-BASELINE-NEXT:    andq 16(%rdx), %rax
   2479 ; CHECK-BASELINE-NEXT:    orq %r11, %rax
   2480 ; CHECK-BASELINE-NEXT:    andq 8(%rdx), %r9
   2481 ; CHECK-BASELINE-NEXT:    orq %rbx, %r9
   2482 ; CHECK-BASELINE-NEXT:    andq (%rdx), %r8
   2483 ; CHECK-BASELINE-NEXT:    orq %rsi, %r8
   2484 ; CHECK-BASELINE-NEXT:    movq %rcx, 24(%rdi)
   2485 ; CHECK-BASELINE-NEXT:    movq %rax, 16(%rdi)
   2486 ; CHECK-BASELINE-NEXT:    movq %r9, 8(%rdi)
   2487 ; CHECK-BASELINE-NEXT:    movq %r8, (%rdi)
   2488 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   2489 ; CHECK-BASELINE-NEXT:    popq %rbx
   2490 ; CHECK-BASELINE-NEXT:    retq
   2491 ;
   2492 ; CHECK-SSE1-LABEL: out_v4i64:
   2493 ; CHECK-SSE1:       # %bb.0:
   2494 ; CHECK-SSE1-NEXT:    pushq %rbx
   2495 ; CHECK-SSE1-NEXT:    movq (%rcx), %r8
   2496 ; CHECK-SSE1-NEXT:    movq 8(%rcx), %r9
   2497 ; CHECK-SSE1-NEXT:    movq 16(%rcx), %rax
   2498 ; CHECK-SSE1-NEXT:    movq 24(%rcx), %rcx
   2499 ; CHECK-SSE1-NEXT:    movq 24(%rsi), %r10
   2500 ; CHECK-SSE1-NEXT:    andq %rcx, %r10
   2501 ; CHECK-SSE1-NEXT:    movq 16(%rsi), %r11
   2502 ; CHECK-SSE1-NEXT:    andq %rax, %r11
   2503 ; CHECK-SSE1-NEXT:    movq 8(%rsi), %rbx
   2504 ; CHECK-SSE1-NEXT:    andq %r9, %rbx
   2505 ; CHECK-SSE1-NEXT:    movq (%rsi), %rsi
   2506 ; CHECK-SSE1-NEXT:    andq %r8, %rsi
   2507 ; CHECK-SSE1-NEXT:    notq %r8
   2508 ; CHECK-SSE1-NEXT:    notq %r9
   2509 ; CHECK-SSE1-NEXT:    notq %rax
   2510 ; CHECK-SSE1-NEXT:    notq %rcx
   2511 ; CHECK-SSE1-NEXT:    andq 24(%rdx), %rcx
   2512 ; CHECK-SSE1-NEXT:    orq %r10, %rcx
   2513 ; CHECK-SSE1-NEXT:    andq 16(%rdx), %rax
   2514 ; CHECK-SSE1-NEXT:    orq %r11, %rax
   2515 ; CHECK-SSE1-NEXT:    andq 8(%rdx), %r9
   2516 ; CHECK-SSE1-NEXT:    orq %rbx, %r9
   2517 ; CHECK-SSE1-NEXT:    andq (%rdx), %r8
   2518 ; CHECK-SSE1-NEXT:    orq %rsi, %r8
   2519 ; CHECK-SSE1-NEXT:    movq %rcx, 24(%rdi)
   2520 ; CHECK-SSE1-NEXT:    movq %rax, 16(%rdi)
   2521 ; CHECK-SSE1-NEXT:    movq %r9, 8(%rdi)
   2522 ; CHECK-SSE1-NEXT:    movq %r8, (%rdi)
   2523 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   2524 ; CHECK-SSE1-NEXT:    popq %rbx
   2525 ; CHECK-SSE1-NEXT:    retq
   2526 ;
   2527 ; CHECK-SSE2-LABEL: out_v4i64:
   2528 ; CHECK-SSE2:       # %bb.0:
   2529 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   2530 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   2531 ; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
   2532 ; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
   2533 ; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
   2534 ; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
   2535 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
   2536 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   2537 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
   2538 ; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
   2539 ; CHECK-SSE2-NEXT:    retq
   2540 ;
   2541 ; CHECK-XOP-LABEL: out_v4i64:
   2542 ; CHECK-XOP:       # %bb.0:
   2543 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   2544 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   2545 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   2546 ; CHECK-XOP-NEXT:    retq
   2547   %x = load <4 x i64>, <4 x i64> *%px, align 32
   2548   %y = load <4 x i64>, <4 x i64> *%py, align 32
   2549   %mask = load <4 x i64>, <4 x i64> *%pmask, align 32
   2550   %mx = and <4 x i64> %x, %mask
   2551   %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1>
   2552   %my = and <4 x i64> %y, %notmask
   2553   %r = or <4 x i64> %mx, %my
   2554   ret <4 x i64> %r
   2555 }
   2556 
   2557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   2558 ; Should be the same as the previous one.
   2559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   2560 
   2561 ; ============================================================================ ;
   2562 ; 8-bit vector width
   2563 ; ============================================================================ ;
   2564 
   2565 define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
   2566 ; CHECK-LABEL: in_v1i8:
   2567 ; CHECK:       # %bb.0:
   2568 ; CHECK-NEXT:    xorl %esi, %edi
   2569 ; CHECK-NEXT:    andl %edx, %edi
   2570 ; CHECK-NEXT:    xorl %esi, %edi
   2571 ; CHECK-NEXT:    movl %edi, %eax
   2572 ; CHECK-NEXT:    retq
   2573   %n0 = xor <1 x i8> %x, %y
   2574   %n1 = and <1 x i8> %n0, %mask
   2575   %r = xor <1 x i8> %n1, %y
   2576   ret <1 x i8> %r
   2577 }
   2578 
   2579 ; ============================================================================ ;
   2580 ; 16-bit vector width
   2581 ; ============================================================================ ;
   2582 
   2583 define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
   2584 ; CHECK-BASELINE-LABEL: in_v2i8:
   2585 ; CHECK-BASELINE:       # %bb.0:
   2586 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2587 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2588 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
   2589 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
   2590 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2591 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2592 ; CHECK-BASELINE-NEXT:    movl %edi, %eax
   2593 ; CHECK-BASELINE-NEXT:    movl %esi, %edx
   2594 ; CHECK-BASELINE-NEXT:    retq
   2595 ;
   2596 ; CHECK-SSE1-LABEL: in_v2i8:
   2597 ; CHECK-SSE1:       # %bb.0:
   2598 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2599 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2600 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
   2601 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
   2602 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2603 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2604 ; CHECK-SSE1-NEXT:    movl %edi, %eax
   2605 ; CHECK-SSE1-NEXT:    movl %esi, %edx
   2606 ; CHECK-SSE1-NEXT:    retq
   2607 ;
   2608 ; CHECK-SSE2-LABEL: in_v2i8:
   2609 ; CHECK-SSE2:       # %bb.0:
   2610 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   2611 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   2612 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   2613 ; CHECK-SSE2-NEXT:    retq
   2614 ;
   2615 ; CHECK-XOP-LABEL: in_v2i8:
   2616 ; CHECK-XOP:       # %bb.0:
   2617 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   2618 ; CHECK-XOP-NEXT:    retq
   2619   %n0 = xor <2 x i8> %x, %y
   2620   %n1 = and <2 x i8> %n0, %mask
   2621   %r = xor <2 x i8> %n1, %y
   2622   ret <2 x i8> %r
   2623 }
   2624 
   2625 define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
   2626 ; CHECK-LABEL: in_v1i16:
   2627 ; CHECK:       # %bb.0:
   2628 ; CHECK-NEXT:    xorl %esi, %edi
   2629 ; CHECK-NEXT:    andl %edx, %edi
   2630 ; CHECK-NEXT:    xorl %esi, %edi
   2631 ; CHECK-NEXT:    movl %edi, %eax
   2632 ; CHECK-NEXT:    retq
   2633   %n0 = xor <1 x i16> %x, %y
   2634   %n1 = and <1 x i16> %n0, %mask
   2635   %r = xor <1 x i16> %n1, %y
   2636   ret <1 x i16> %r
   2637 }
   2638 
   2639 ; ============================================================================ ;
   2640 ; 32-bit vector width
   2641 ; ============================================================================ ;
   2642 
   2643 define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
   2644 ; CHECK-BASELINE-LABEL: in_v4i8:
   2645 ; CHECK-BASELINE:       # %bb.0:
   2646 ; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
   2647 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   2648 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   2649 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   2650 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   2651 ; CHECK-BASELINE-NEXT:    xorb %r11b, %cl
   2652 ; CHECK-BASELINE-NEXT:    xorb %r10b, %r8b
   2653 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   2654 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   2655 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   2656 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
   2657 ; CHECK-BASELINE-NEXT:    xorb %r9b, %sil
   2658 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   2659 ; CHECK-BASELINE-NEXT:    xorb %r11b, %cl
   2660 ; CHECK-BASELINE-NEXT:    xorb %r10b, %r8b
   2661 ; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdi)
   2662 ; CHECK-BASELINE-NEXT:    movb %cl, 2(%rdi)
   2663 ; CHECK-BASELINE-NEXT:    movb %dl, 1(%rdi)
   2664 ; CHECK-BASELINE-NEXT:    movb %sil, (%rdi)
   2665 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   2666 ; CHECK-BASELINE-NEXT:    retq
   2667 ;
   2668 ; CHECK-SSE1-LABEL: in_v4i8:
   2669 ; CHECK-SSE1:       # %bb.0:
   2670 ; CHECK-SSE1-NEXT:    xorl %r9d, %esi
   2671 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   2672 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   2673 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   2674 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   2675 ; CHECK-SSE1-NEXT:    xorb %r11b, %cl
   2676 ; CHECK-SSE1-NEXT:    xorb %r10b, %r8b
   2677 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   2678 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   2679 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   2680 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
   2681 ; CHECK-SSE1-NEXT:    xorb %r9b, %sil
   2682 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   2683 ; CHECK-SSE1-NEXT:    xorb %r11b, %cl
   2684 ; CHECK-SSE1-NEXT:    xorb %r10b, %r8b
   2685 ; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdi)
   2686 ; CHECK-SSE1-NEXT:    movb %cl, 2(%rdi)
   2687 ; CHECK-SSE1-NEXT:    movb %dl, 1(%rdi)
   2688 ; CHECK-SSE1-NEXT:    movb %sil, (%rdi)
   2689 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   2690 ; CHECK-SSE1-NEXT:    retq
   2691 ;
   2692 ; CHECK-SSE2-LABEL: in_v4i8:
   2693 ; CHECK-SSE2:       # %bb.0:
   2694 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   2695 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   2696 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   2697 ; CHECK-SSE2-NEXT:    retq
   2698 ;
   2699 ; CHECK-XOP-LABEL: in_v4i8:
   2700 ; CHECK-XOP:       # %bb.0:
   2701 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   2702 ; CHECK-XOP-NEXT:    retq
   2703   %n0 = xor <4 x i8> %x, %y
   2704   %n1 = and <4 x i8> %n0, %mask
   2705   %r = xor <4 x i8> %n1, %y
   2706   ret <4 x i8> %r
   2707 }
   2708 
   2709 define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
   2710 ; CHECK-BASELINE-LABEL: in_v2i16:
   2711 ; CHECK-BASELINE:       # %bb.0:
   2712 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2713 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2714 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
   2715 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
   2716 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2717 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2718 ; CHECK-BASELINE-NEXT:    movl %edi, %eax
   2719 ; CHECK-BASELINE-NEXT:    movl %esi, %edx
   2720 ; CHECK-BASELINE-NEXT:    retq
   2721 ;
   2722 ; CHECK-SSE1-LABEL: in_v2i16:
   2723 ; CHECK-SSE1:       # %bb.0:
   2724 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2725 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2726 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
   2727 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
   2728 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2729 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2730 ; CHECK-SSE1-NEXT:    movl %edi, %eax
   2731 ; CHECK-SSE1-NEXT:    movl %esi, %edx
   2732 ; CHECK-SSE1-NEXT:    retq
   2733 ;
   2734 ; CHECK-SSE2-LABEL: in_v2i16:
   2735 ; CHECK-SSE2:       # %bb.0:
   2736 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   2737 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   2738 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   2739 ; CHECK-SSE2-NEXT:    retq
   2740 ;
   2741 ; CHECK-XOP-LABEL: in_v2i16:
   2742 ; CHECK-XOP:       # %bb.0:
   2743 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   2744 ; CHECK-XOP-NEXT:    retq
   2745   %n0 = xor <2 x i16> %x, %y
   2746   %n1 = and <2 x i16> %n0, %mask
   2747   %r = xor <2 x i16> %n1, %y
   2748   ret <2 x i16> %r
   2749 }
   2750 
   2751 define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
   2752 ; CHECK-LABEL: in_v1i32:
   2753 ; CHECK:       # %bb.0:
   2754 ; CHECK-NEXT:    xorl %esi, %edi
   2755 ; CHECK-NEXT:    andl %edx, %edi
   2756 ; CHECK-NEXT:    xorl %esi, %edi
   2757 ; CHECK-NEXT:    movl %edi, %eax
   2758 ; CHECK-NEXT:    retq
   2759   %n0 = xor <1 x i32> %x, %y
   2760   %n1 = and <1 x i32> %n0, %mask
   2761   %r = xor <1 x i32> %n1, %y
   2762   ret <1 x i32> %r
   2763 }
   2764 
   2765 ; ============================================================================ ;
   2766 ; 64-bit vector width
   2767 ; ============================================================================ ;
   2768 
   2769 define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
   2770 ; CHECK-BASELINE-LABEL: in_v8i8:
   2771 ; CHECK-BASELINE:       # %bb.0:
   2772 ; CHECK-BASELINE-NEXT:    pushq %rbp
   2773 ; CHECK-BASELINE-NEXT:    pushq %r15
   2774 ; CHECK-BASELINE-NEXT:    pushq %r14
   2775 ; CHECK-BASELINE-NEXT:    pushq %r13
   2776 ; CHECK-BASELINE-NEXT:    pushq %r12
   2777 ; CHECK-BASELINE-NEXT:    pushq %rbx
   2778 ; CHECK-BASELINE-NEXT:    movl %ecx, %r10d
   2779 ; CHECK-BASELINE-NEXT:    movl %edx, %r11d
   2780 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   2781 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   2782 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   2783 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   2784 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   2785 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   2786 ; CHECK-BASELINE-NEXT:    xorb %bpl, %sil
   2787 ; CHECK-BASELINE-NEXT:    xorb %r13b, %r11b
   2788 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r10b
   2789 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r8b
   2790 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r9b
   2791 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
   2792 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %dl
   2793 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   2794 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
   2795 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   2796 ; CHECK-BASELINE-NEXT:    xorb %bl, %al
   2797 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
   2798 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   2799 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
   2800 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
   2801 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
   2802 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
   2803 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   2804 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   2805 ; CHECK-BASELINE-NEXT:    xorb %bpl, %sil
   2806 ; CHECK-BASELINE-NEXT:    xorb %r13b, %r11b
   2807 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r10b
   2808 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r8b
   2809 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r9b
   2810 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %dl
   2811 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
   2812 ; CHECK-BASELINE-NEXT:    xorb %bl, %al
   2813 ; CHECK-BASELINE-NEXT:    movb %al, 7(%rdi)
   2814 ; CHECK-BASELINE-NEXT:    movb %cl, 6(%rdi)
   2815 ; CHECK-BASELINE-NEXT:    movb %dl, 5(%rdi)
   2816 ; CHECK-BASELINE-NEXT:    movb %r9b, 4(%rdi)
   2817 ; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdi)
   2818 ; CHECK-BASELINE-NEXT:    movb %r10b, 2(%rdi)
   2819 ; CHECK-BASELINE-NEXT:    movb %r11b, 1(%rdi)
   2820 ; CHECK-BASELINE-NEXT:    movb %sil, (%rdi)
   2821 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   2822 ; CHECK-BASELINE-NEXT:    popq %rbx
   2823 ; CHECK-BASELINE-NEXT:    popq %r12
   2824 ; CHECK-BASELINE-NEXT:    popq %r13
   2825 ; CHECK-BASELINE-NEXT:    popq %r14
   2826 ; CHECK-BASELINE-NEXT:    popq %r15
   2827 ; CHECK-BASELINE-NEXT:    popq %rbp
   2828 ; CHECK-BASELINE-NEXT:    retq
   2829 ;
   2830 ; CHECK-SSE1-LABEL: in_v8i8:
   2831 ; CHECK-SSE1:       # %bb.0:
   2832 ; CHECK-SSE1-NEXT:    pushq %rbp
   2833 ; CHECK-SSE1-NEXT:    pushq %r15
   2834 ; CHECK-SSE1-NEXT:    pushq %r14
   2835 ; CHECK-SSE1-NEXT:    pushq %r13
   2836 ; CHECK-SSE1-NEXT:    pushq %r12
   2837 ; CHECK-SSE1-NEXT:    pushq %rbx
   2838 ; CHECK-SSE1-NEXT:    movl %ecx, %r10d
   2839 ; CHECK-SSE1-NEXT:    movl %edx, %r11d
   2840 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   2841 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   2842 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   2843 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   2844 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   2845 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   2846 ; CHECK-SSE1-NEXT:    xorb %bpl, %sil
   2847 ; CHECK-SSE1-NEXT:    xorb %r13b, %r11b
   2848 ; CHECK-SSE1-NEXT:    xorb %r12b, %r10b
   2849 ; CHECK-SSE1-NEXT:    xorb %r15b, %r8b
   2850 ; CHECK-SSE1-NEXT:    xorb %r14b, %r9b
   2851 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
   2852 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %dl
   2853 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   2854 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
   2855 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   2856 ; CHECK-SSE1-NEXT:    xorb %bl, %al
   2857 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
   2858 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   2859 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
   2860 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
   2861 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
   2862 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
   2863 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   2864 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   2865 ; CHECK-SSE1-NEXT:    xorb %bpl, %sil
   2866 ; CHECK-SSE1-NEXT:    xorb %r13b, %r11b
   2867 ; CHECK-SSE1-NEXT:    xorb %r12b, %r10b
   2868 ; CHECK-SSE1-NEXT:    xorb %r15b, %r8b
   2869 ; CHECK-SSE1-NEXT:    xorb %r14b, %r9b
   2870 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %dl
   2871 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
   2872 ; CHECK-SSE1-NEXT:    xorb %bl, %al
   2873 ; CHECK-SSE1-NEXT:    movb %al, 7(%rdi)
   2874 ; CHECK-SSE1-NEXT:    movb %cl, 6(%rdi)
   2875 ; CHECK-SSE1-NEXT:    movb %dl, 5(%rdi)
   2876 ; CHECK-SSE1-NEXT:    movb %r9b, 4(%rdi)
   2877 ; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdi)
   2878 ; CHECK-SSE1-NEXT:    movb %r10b, 2(%rdi)
   2879 ; CHECK-SSE1-NEXT:    movb %r11b, 1(%rdi)
   2880 ; CHECK-SSE1-NEXT:    movb %sil, (%rdi)
   2881 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   2882 ; CHECK-SSE1-NEXT:    popq %rbx
   2883 ; CHECK-SSE1-NEXT:    popq %r12
   2884 ; CHECK-SSE1-NEXT:    popq %r13
   2885 ; CHECK-SSE1-NEXT:    popq %r14
   2886 ; CHECK-SSE1-NEXT:    popq %r15
   2887 ; CHECK-SSE1-NEXT:    popq %rbp
   2888 ; CHECK-SSE1-NEXT:    retq
   2889 ;
   2890 ; CHECK-SSE2-LABEL: in_v8i8:
   2891 ; CHECK-SSE2:       # %bb.0:
   2892 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   2893 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   2894 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   2895 ; CHECK-SSE2-NEXT:    retq
   2896 ;
   2897 ; CHECK-XOP-LABEL: in_v8i8:
   2898 ; CHECK-XOP:       # %bb.0:
   2899 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   2900 ; CHECK-XOP-NEXT:    retq
   2901   %n0 = xor <8 x i8> %x, %y
   2902   %n1 = and <8 x i8> %n0, %mask
   2903   %r = xor <8 x i8> %n1, %y
   2904   ret <8 x i8> %r
   2905 }
   2906 
   2907 define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
   2908 ; CHECK-BASELINE-LABEL: in_v4i16:
   2909 ; CHECK-BASELINE:       # %bb.0:
   2910 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   2911 ; CHECK-BASELINE-NEXT:    xorl %r10d, %r8d
   2912 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   2913 ; CHECK-BASELINE-NEXT:    xorl %r11d, %ecx
   2914 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   2915 ; CHECK-BASELINE-NEXT:    xorl %eax, %edx
   2916 ; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
   2917 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
   2918 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   2919 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   2920 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %si
   2921 ; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
   2922 ; CHECK-BASELINE-NEXT:    xorl %eax, %edx
   2923 ; CHECK-BASELINE-NEXT:    xorl %r11d, %ecx
   2924 ; CHECK-BASELINE-NEXT:    xorl %r10d, %r8d
   2925 ; CHECK-BASELINE-NEXT:    movw %r8w, 6(%rdi)
   2926 ; CHECK-BASELINE-NEXT:    movw %cx, 4(%rdi)
   2927 ; CHECK-BASELINE-NEXT:    movw %dx, 2(%rdi)
   2928 ; CHECK-BASELINE-NEXT:    movw %si, (%rdi)
   2929 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   2930 ; CHECK-BASELINE-NEXT:    retq
   2931 ;
   2932 ; CHECK-SSE1-LABEL: in_v4i16:
   2933 ; CHECK-SSE1:       # %bb.0:
   2934 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   2935 ; CHECK-SSE1-NEXT:    xorl %r10d, %r8d
   2936 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   2937 ; CHECK-SSE1-NEXT:    xorl %r11d, %ecx
   2938 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   2939 ; CHECK-SSE1-NEXT:    xorl %eax, %edx
   2940 ; CHECK-SSE1-NEXT:    xorl %r9d, %esi
   2941 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
   2942 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   2943 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   2944 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %si
   2945 ; CHECK-SSE1-NEXT:    xorl %r9d, %esi
   2946 ; CHECK-SSE1-NEXT:    xorl %eax, %edx
   2947 ; CHECK-SSE1-NEXT:    xorl %r11d, %ecx
   2948 ; CHECK-SSE1-NEXT:    xorl %r10d, %r8d
   2949 ; CHECK-SSE1-NEXT:    movw %r8w, 6(%rdi)
   2950 ; CHECK-SSE1-NEXT:    movw %cx, 4(%rdi)
   2951 ; CHECK-SSE1-NEXT:    movw %dx, 2(%rdi)
   2952 ; CHECK-SSE1-NEXT:    movw %si, (%rdi)
   2953 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   2954 ; CHECK-SSE1-NEXT:    retq
   2955 ;
   2956 ; CHECK-SSE2-LABEL: in_v4i16:
   2957 ; CHECK-SSE2:       # %bb.0:
   2958 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   2959 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   2960 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   2961 ; CHECK-SSE2-NEXT:    retq
   2962 ;
   2963 ; CHECK-XOP-LABEL: in_v4i16:
   2964 ; CHECK-XOP:       # %bb.0:
   2965 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   2966 ; CHECK-XOP-NEXT:    retq
   2967   %n0 = xor <4 x i16> %x, %y
   2968   %n1 = and <4 x i16> %n0, %mask
   2969   %r = xor <4 x i16> %n1, %y
   2970   ret <4 x i16> %r
   2971 }
   2972 
   2973 define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
   2974 ; CHECK-BASELINE-LABEL: in_v2i32:
   2975 ; CHECK-BASELINE:       # %bb.0:
   2976 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2977 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2978 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
   2979 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
   2980 ; CHECK-BASELINE-NEXT:    xorl %edx, %edi
   2981 ; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
   2982 ; CHECK-BASELINE-NEXT:    movl %edi, %eax
   2983 ; CHECK-BASELINE-NEXT:    movl %esi, %edx
   2984 ; CHECK-BASELINE-NEXT:    retq
   2985 ;
   2986 ; CHECK-SSE1-LABEL: in_v2i32:
   2987 ; CHECK-SSE1:       # %bb.0:
   2988 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2989 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2990 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
   2991 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
   2992 ; CHECK-SSE1-NEXT:    xorl %edx, %edi
   2993 ; CHECK-SSE1-NEXT:    xorl %ecx, %esi
   2994 ; CHECK-SSE1-NEXT:    movl %edi, %eax
   2995 ; CHECK-SSE1-NEXT:    movl %esi, %edx
   2996 ; CHECK-SSE1-NEXT:    retq
   2997 ;
   2998 ; CHECK-SSE2-LABEL: in_v2i32:
   2999 ; CHECK-SSE2:       # %bb.0:
   3000 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   3001 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   3002 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   3003 ; CHECK-SSE2-NEXT:    retq
   3004 ;
   3005 ; CHECK-XOP-LABEL: in_v2i32:
   3006 ; CHECK-XOP:       # %bb.0:
   3007 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   3008 ; CHECK-XOP-NEXT:    retq
   3009   %n0 = xor <2 x i32> %x, %y
   3010   %n1 = and <2 x i32> %n0, %mask
   3011   %r = xor <2 x i32> %n1, %y
   3012   ret <2 x i32> %r
   3013 }
   3014 
   3015 define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
   3016 ; CHECK-LABEL: in_v1i64:
   3017 ; CHECK:       # %bb.0:
   3018 ; CHECK-NEXT:    xorq %rsi, %rdi
   3019 ; CHECK-NEXT:    andq %rdx, %rdi
   3020 ; CHECK-NEXT:    xorq %rsi, %rdi
   3021 ; CHECK-NEXT:    movq %rdi, %rax
   3022 ; CHECK-NEXT:    retq
   3023   %n0 = xor <1 x i64> %x, %y
   3024   %n1 = and <1 x i64> %n0, %mask
   3025   %r = xor <1 x i64> %n1, %y
   3026   ret <1 x i64> %r
   3027 }
   3028 
   3029 ; ============================================================================ ;
   3030 ; 128-bit vector width
   3031 ; ============================================================================ ;
   3032 
   3033 define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
   3034 ; CHECK-BASELINE-LABEL: in_v16i8:
   3035 ; CHECK-BASELINE:       # %bb.0:
   3036 ; CHECK-BASELINE-NEXT:    pushq %rbp
   3037 ; CHECK-BASELINE-NEXT:    pushq %r15
   3038 ; CHECK-BASELINE-NEXT:    pushq %r14
   3039 ; CHECK-BASELINE-NEXT:    pushq %r13
   3040 ; CHECK-BASELINE-NEXT:    pushq %r12
   3041 ; CHECK-BASELINE-NEXT:    pushq %rbx
   3042 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3043 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3044 ; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3045 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   3046 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
   3047 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   3048 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   3049 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   3050 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   3051 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   3052 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   3053 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   3054 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   3055 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3056 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
   3057 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
   3058 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
   3059 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
   3060 ; CHECK-BASELINE-NEXT:    xorb %r10b, %dl
   3061 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   3062 ; CHECK-BASELINE-NEXT:    xorb %r10b, %dl
   3063 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   3064 ; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
   3065 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
   3066 ; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
   3067 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   3068 ; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
   3069 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
   3070 ; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
   3071 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   3072 ; CHECK-BASELINE-NEXT:    xorb %bpl, %bl
   3073 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
   3074 ; CHECK-BASELINE-NEXT:    xorb %bpl, %bl
   3075 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   3076 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bpl
   3077 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
   3078 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bpl
   3079 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   3080 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
   3081 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
   3082 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
   3083 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   3084 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
   3085 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
   3086 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
   3087 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   3088 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
   3089 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
   3090 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
   3091 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   3092 ; CHECK-BASELINE-NEXT:    xorb %sil, %r14b
   3093 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
   3094 ; CHECK-BASELINE-NEXT:    xorb %sil, %r14b
   3095 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3096 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
   3097 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
   3098 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
   3099 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   3100 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
   3101 ; CHECK-BASELINE-NEXT:    xorb %sil, %cl
   3102 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3103 ; CHECK-BASELINE-NEXT:    xorb %sil, %cl
   3104 ; CHECK-BASELINE-NEXT:    movb %cl, 15(%rdi)
   3105 ; CHECK-BASELINE-NEXT:    movb %al, 14(%rdi)
   3106 ; CHECK-BASELINE-NEXT:    movb %r14b, 13(%rdi)
   3107 ; CHECK-BASELINE-NEXT:    movb %r15b, 12(%rdi)
   3108 ; CHECK-BASELINE-NEXT:    movb %r12b, 11(%rdi)
   3109 ; CHECK-BASELINE-NEXT:    movb %r13b, 10(%rdi)
   3110 ; CHECK-BASELINE-NEXT:    movb %bpl, 9(%rdi)
   3111 ; CHECK-BASELINE-NEXT:    movb %bl, 8(%rdi)
   3112 ; CHECK-BASELINE-NEXT:    movb %r11b, 7(%rdi)
   3113 ; CHECK-BASELINE-NEXT:    movb %r10b, 6(%rdi)
   3114 ; CHECK-BASELINE-NEXT:    movb %dl, 5(%rdi)
   3115 ; CHECK-BASELINE-NEXT:    movb %r9b, 4(%rdi)
   3116 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3117 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
   3118 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   3119 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
   3120 ; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdi)
   3121 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3122 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3123 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3124 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3125 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3126 ; CHECK-BASELINE-NEXT:    movb %cl, 2(%rdi)
   3127 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3128 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3129 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3130 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3131 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3132 ; CHECK-BASELINE-NEXT:    movb %cl, 1(%rdi)
   3133 ; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3134 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3135 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3136 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3137 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3138 ; CHECK-BASELINE-NEXT:    movb %cl, (%rdi)
   3139 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   3140 ; CHECK-BASELINE-NEXT:    popq %rbx
   3141 ; CHECK-BASELINE-NEXT:    popq %r12
   3142 ; CHECK-BASELINE-NEXT:    popq %r13
   3143 ; CHECK-BASELINE-NEXT:    popq %r14
   3144 ; CHECK-BASELINE-NEXT:    popq %r15
   3145 ; CHECK-BASELINE-NEXT:    popq %rbp
   3146 ; CHECK-BASELINE-NEXT:    retq
   3147 ;
   3148 ; CHECK-SSE1-LABEL: in_v16i8:
   3149 ; CHECK-SSE1:       # %bb.0:
   3150 ; CHECK-SSE1-NEXT:    pushq %rbp
   3151 ; CHECK-SSE1-NEXT:    pushq %r15
   3152 ; CHECK-SSE1-NEXT:    pushq %r14
   3153 ; CHECK-SSE1-NEXT:    pushq %r13
   3154 ; CHECK-SSE1-NEXT:    pushq %r12
   3155 ; CHECK-SSE1-NEXT:    pushq %rbx
   3156 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3157 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3158 ; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   3159 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   3160 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
   3161 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   3162 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   3163 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   3164 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   3165 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   3166 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   3167 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   3168 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   3169 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3170 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
   3171 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
   3172 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
   3173 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
   3174 ; CHECK-SSE1-NEXT:    xorb %r10b, %dl
   3175 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
   3176 ; CHECK-SSE1-NEXT:    xorb %r10b, %dl
   3177 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
   3178 ; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
   3179 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
   3180 ; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
   3181 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
   3182 ; CHECK-SSE1-NEXT:    xorb %bl, %r11b
   3183 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
   3184 ; CHECK-SSE1-NEXT:    xorb %bl, %r11b
   3185 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
   3186 ; CHECK-SSE1-NEXT:    xorb %bpl, %bl
   3187 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
   3188 ; CHECK-SSE1-NEXT:    xorb %bpl, %bl
   3189 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
   3190 ; CHECK-SSE1-NEXT:    xorb %r13b, %bpl
   3191 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
   3192 ; CHECK-SSE1-NEXT:    xorb %r13b, %bpl
   3193 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
   3194 ; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
   3195 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
   3196 ; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
   3197 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
   3198 ; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
   3199 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
   3200 ; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
   3201 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
   3202 ; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
   3203 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
   3204 ; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
   3205 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
   3206 ; CHECK-SSE1-NEXT:    xorb %sil, %r14b
   3207 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
   3208 ; CHECK-SSE1-NEXT:    xorb %sil, %r14b
   3209 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3210 ; CHECK-SSE1-NEXT:    xorb %cl, %al
   3211 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
   3212 ; CHECK-SSE1-NEXT:    xorb %cl, %al
   3213 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
   3214 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
   3215 ; CHECK-SSE1-NEXT:    xorb %sil, %cl
   3216 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3217 ; CHECK-SSE1-NEXT:    xorb %sil, %cl
   3218 ; CHECK-SSE1-NEXT:    movb %cl, 15(%rdi)
   3219 ; CHECK-SSE1-NEXT:    movb %al, 14(%rdi)
   3220 ; CHECK-SSE1-NEXT:    movb %r14b, 13(%rdi)
   3221 ; CHECK-SSE1-NEXT:    movb %r15b, 12(%rdi)
   3222 ; CHECK-SSE1-NEXT:    movb %r12b, 11(%rdi)
   3223 ; CHECK-SSE1-NEXT:    movb %r13b, 10(%rdi)
   3224 ; CHECK-SSE1-NEXT:    movb %bpl, 9(%rdi)
   3225 ; CHECK-SSE1-NEXT:    movb %bl, 8(%rdi)
   3226 ; CHECK-SSE1-NEXT:    movb %r11b, 7(%rdi)
   3227 ; CHECK-SSE1-NEXT:    movb %r10b, 6(%rdi)
   3228 ; CHECK-SSE1-NEXT:    movb %dl, 5(%rdi)
   3229 ; CHECK-SSE1-NEXT:    movb %r9b, 4(%rdi)
   3230 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3231 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
   3232 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
   3233 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
   3234 ; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdi)
   3235 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3236 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3237 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3238 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3239 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3240 ; CHECK-SSE1-NEXT:    movb %cl, 2(%rdi)
   3241 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3242 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3243 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3244 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3245 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3246 ; CHECK-SSE1-NEXT:    movb %cl, 1(%rdi)
   3247 ; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
   3248 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   3249 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3250 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
   3251 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3252 ; CHECK-SSE1-NEXT:    movb %cl, (%rdi)
   3253 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   3254 ; CHECK-SSE1-NEXT:    popq %rbx
   3255 ; CHECK-SSE1-NEXT:    popq %r12
   3256 ; CHECK-SSE1-NEXT:    popq %r13
   3257 ; CHECK-SSE1-NEXT:    popq %r14
   3258 ; CHECK-SSE1-NEXT:    popq %r15
   3259 ; CHECK-SSE1-NEXT:    popq %rbp
   3260 ; CHECK-SSE1-NEXT:    retq
   3261 ;
   3262 ; CHECK-SSE2-LABEL: in_v16i8:
   3263 ; CHECK-SSE2:       # %bb.0:
   3264 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   3265 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   3266 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   3267 ; CHECK-SSE2-NEXT:    retq
   3268 ;
   3269 ; CHECK-XOP-LABEL: in_v16i8:
   3270 ; CHECK-XOP:       # %bb.0:
   3271 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   3272 ; CHECK-XOP-NEXT:    retq
   3273   %n0 = xor <16 x i8> %x, %y
   3274   %n1 = and <16 x i8> %n0, %mask
   3275   %r = xor <16 x i8> %n1, %y
   3276   ret <16 x i8> %r
   3277 }
   3278 
   3279 define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
   3280 ; CHECK-BASELINE-LABEL: in_v8i16:
   3281 ; CHECK-BASELINE:       # %bb.0:
   3282 ; CHECK-BASELINE-NEXT:    pushq %rbp
   3283 ; CHECK-BASELINE-NEXT:    pushq %r14
   3284 ; CHECK-BASELINE-NEXT:    pushq %rbx
   3285 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   3286 ; CHECK-BASELINE-NEXT:    xorl %r10d, %r9d
   3287 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   3288 ; CHECK-BASELINE-NEXT:    xorl %r11d, %r8d
   3289 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   3290 ; CHECK-BASELINE-NEXT:    xorl %eax, %ecx
   3291 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3292 ; CHECK-BASELINE-NEXT:    xorl %ebx, %esi
   3293 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %si
   3294 ; CHECK-BASELINE-NEXT:    xorl %ebx, %esi
   3295 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3296 ; CHECK-BASELINE-NEXT:    xorl %ebx, %edx
   3297 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   3298 ; CHECK-BASELINE-NEXT:    xorl %ebx, %edx
   3299 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
   3300 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   3301 ; CHECK-BASELINE-NEXT:    xorl %eax, %ecx
   3302 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   3303 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
   3304 ; CHECK-BASELINE-NEXT:    xorl %r11d, %r8d
   3305 ; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3306 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r9w
   3307 ; CHECK-BASELINE-NEXT:    xorl %r10d, %r9d
   3308 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebp
   3309 ; CHECK-BASELINE-NEXT:    xorw %bx, %bp
   3310 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bp
   3311 ; CHECK-BASELINE-NEXT:    xorl %ebx, %ebp
   3312 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   3313 ; CHECK-BASELINE-NEXT:    xorw %ax, %bx
   3314 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bx
   3315 ; CHECK-BASELINE-NEXT:    xorl %eax, %ebx
   3316 ; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
   3317 ; CHECK-BASELINE-NEXT:    xorw %r14w, %ax
   3318 ; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %ax
   3319 ; CHECK-BASELINE-NEXT:    xorl %r14d, %eax
   3320 ; CHECK-BASELINE-NEXT:    movw %ax, 14(%rdi)
   3321 ; CHECK-BASELINE-NEXT:    movw %bx, 12(%rdi)
   3322 ; CHECK-BASELINE-NEXT:    movw %bp, 10(%rdi)
   3323 ; CHECK-BASELINE-NEXT:    movw %r9w, 8(%rdi)
   3324 ; CHECK-BASELINE-NEXT:    movw %r8w, 6(%rdi)
   3325 ; CHECK-BASELINE-NEXT:    movw %cx, 4(%rdi)
   3326 ; CHECK-BASELINE-NEXT:    movw %dx, 2(%rdi)
   3327 ; CHECK-BASELINE-NEXT:    movw %si, (%rdi)
   3328 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   3329 ; CHECK-BASELINE-NEXT:    popq %rbx
   3330 ; CHECK-BASELINE-NEXT:    popq %r14
   3331 ; CHECK-BASELINE-NEXT:    popq %rbp
   3332 ; CHECK-BASELINE-NEXT:    retq
   3333 ;
   3334 ; CHECK-SSE1-LABEL: in_v8i16:
   3335 ; CHECK-SSE1:       # %bb.0:
   3336 ; CHECK-SSE1-NEXT:    pushq %rbp
   3337 ; CHECK-SSE1-NEXT:    pushq %r14
   3338 ; CHECK-SSE1-NEXT:    pushq %rbx
   3339 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
   3340 ; CHECK-SSE1-NEXT:    xorl %r10d, %r9d
   3341 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   3342 ; CHECK-SSE1-NEXT:    xorl %r11d, %r8d
   3343 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   3344 ; CHECK-SSE1-NEXT:    xorl %eax, %ecx
   3345 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3346 ; CHECK-SSE1-NEXT:    xorl %ebx, %esi
   3347 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %si
   3348 ; CHECK-SSE1-NEXT:    xorl %ebx, %esi
   3349 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3350 ; CHECK-SSE1-NEXT:    xorl %ebx, %edx
   3351 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
   3352 ; CHECK-SSE1-NEXT:    xorl %ebx, %edx
   3353 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
   3354 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
   3355 ; CHECK-SSE1-NEXT:    xorl %eax, %ecx
   3356 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
   3357 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
   3358 ; CHECK-SSE1-NEXT:    xorl %r11d, %r8d
   3359 ; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
   3360 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r9w
   3361 ; CHECK-SSE1-NEXT:    xorl %r10d, %r9d
   3362 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebp
   3363 ; CHECK-SSE1-NEXT:    xorw %bx, %bp
   3364 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bp
   3365 ; CHECK-SSE1-NEXT:    xorl %ebx, %ebp
   3366 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
   3367 ; CHECK-SSE1-NEXT:    xorw %ax, %bx
   3368 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bx
   3369 ; CHECK-SSE1-NEXT:    xorl %eax, %ebx
   3370 ; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
   3371 ; CHECK-SSE1-NEXT:    xorw %r14w, %ax
   3372 ; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %ax
   3373 ; CHECK-SSE1-NEXT:    xorl %r14d, %eax
   3374 ; CHECK-SSE1-NEXT:    movw %ax, 14(%rdi)
   3375 ; CHECK-SSE1-NEXT:    movw %bx, 12(%rdi)
   3376 ; CHECK-SSE1-NEXT:    movw %bp, 10(%rdi)
   3377 ; CHECK-SSE1-NEXT:    movw %r9w, 8(%rdi)
   3378 ; CHECK-SSE1-NEXT:    movw %r8w, 6(%rdi)
   3379 ; CHECK-SSE1-NEXT:    movw %cx, 4(%rdi)
   3380 ; CHECK-SSE1-NEXT:    movw %dx, 2(%rdi)
   3381 ; CHECK-SSE1-NEXT:    movw %si, (%rdi)
   3382 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   3383 ; CHECK-SSE1-NEXT:    popq %rbx
   3384 ; CHECK-SSE1-NEXT:    popq %r14
   3385 ; CHECK-SSE1-NEXT:    popq %rbp
   3386 ; CHECK-SSE1-NEXT:    retq
   3387 ;
   3388 ; CHECK-SSE2-LABEL: in_v8i16:
   3389 ; CHECK-SSE2:       # %bb.0:
   3390 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   3391 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   3392 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   3393 ; CHECK-SSE2-NEXT:    retq
   3394 ;
   3395 ; CHECK-XOP-LABEL: in_v8i16:
   3396 ; CHECK-XOP:       # %bb.0:
   3397 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   3398 ; CHECK-XOP-NEXT:    retq
   3399   %n0 = xor <8 x i16> %x, %y
   3400   %n1 = and <8 x i16> %n0, %mask
   3401   %r = xor <8 x i16> %n1, %y
   3402   ret <8 x i16> %r
   3403 }
   3404 
   3405 define <4 x i32> @in_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
   3406 ; CHECK-BASELINE-LABEL: in_v4i32:
   3407 ; CHECK-BASELINE:       # %bb.0:
   3408 ; CHECK-BASELINE-NEXT:    pushq %rbx
   3409 ; CHECK-BASELINE-NEXT:    movl 12(%rdx), %r8d
   3410 ; CHECK-BASELINE-NEXT:    movl 8(%rdx), %r9d
   3411 ; CHECK-BASELINE-NEXT:    movl (%rdx), %r11d
   3412 ; CHECK-BASELINE-NEXT:    movl 4(%rdx), %r10d
   3413 ; CHECK-BASELINE-NEXT:    movl (%rsi), %edx
   3414 ; CHECK-BASELINE-NEXT:    xorl %r11d, %edx
   3415 ; CHECK-BASELINE-NEXT:    movl 4(%rsi), %eax
   3416 ; CHECK-BASELINE-NEXT:    xorl %r10d, %eax
   3417 ; CHECK-BASELINE-NEXT:    movl 8(%rsi), %ebx
   3418 ; CHECK-BASELINE-NEXT:    xorl %r9d, %ebx
   3419 ; CHECK-BASELINE-NEXT:    movl 12(%rsi), %esi
   3420 ; CHECK-BASELINE-NEXT:    xorl %r8d, %esi
   3421 ; CHECK-BASELINE-NEXT:    andl 12(%rcx), %esi
   3422 ; CHECK-BASELINE-NEXT:    andl 8(%rcx), %ebx
   3423 ; CHECK-BASELINE-NEXT:    andl 4(%rcx), %eax
   3424 ; CHECK-BASELINE-NEXT:    andl (%rcx), %edx
   3425 ; CHECK-BASELINE-NEXT:    xorl %r11d, %edx
   3426 ; CHECK-BASELINE-NEXT:    xorl %r10d, %eax
   3427 ; CHECK-BASELINE-NEXT:    xorl %r9d, %ebx
   3428 ; CHECK-BASELINE-NEXT:    xorl %r8d, %esi
   3429 ; CHECK-BASELINE-NEXT:    movl %esi, 12(%rdi)
   3430 ; CHECK-BASELINE-NEXT:    movl %ebx, 8(%rdi)
   3431 ; CHECK-BASELINE-NEXT:    movl %eax, 4(%rdi)
   3432 ; CHECK-BASELINE-NEXT:    movl %edx, (%rdi)
   3433 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   3434 ; CHECK-BASELINE-NEXT:    popq %rbx
   3435 ; CHECK-BASELINE-NEXT:    retq
   3436 ;
   3437 ; CHECK-SSE1-LABEL: in_v4i32:
   3438 ; CHECK-SSE1:       # %bb.0:
   3439 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
   3440 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
   3441 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
   3442 ; CHECK-SSE1-NEXT:    andps (%rsi), %xmm0
   3443 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
   3444 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
   3445 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   3446 ; CHECK-SSE1-NEXT:    retq
   3447 ;
   3448 ; CHECK-SSE2-LABEL: in_v4i32:
   3449 ; CHECK-SSE2:       # %bb.0:
   3450 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   3451 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
   3452 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm1
   3453 ; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
   3454 ; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
   3455 ; CHECK-SSE2-NEXT:    retq
   3456 ;
   3457 ; CHECK-XOP-LABEL: in_v4i32:
   3458 ; CHECK-XOP:       # %bb.0:
   3459 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
   3460 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
   3461 ; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
   3462 ; CHECK-XOP-NEXT:    retq
   3463   %x = load <4 x i32>, <4 x i32> *%px, align 16
   3464   %y = load <4 x i32>, <4 x i32> *%py, align 16
   3465   %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
   3466   %n0 = xor <4 x i32> %x, %y
   3467   %n1 = and <4 x i32> %n0, %mask
   3468   %r = xor <4 x i32> %n1, %y
   3469   ret <4 x i32> %r
   3470 }
   3471 
   3472 define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
   3473 ; CHECK-BASELINE-LABEL: in_v2i64:
   3474 ; CHECK-BASELINE:       # %bb.0:
   3475 ; CHECK-BASELINE-NEXT:    xorq %rdx, %rdi
   3476 ; CHECK-BASELINE-NEXT:    xorq %rcx, %rsi
   3477 ; CHECK-BASELINE-NEXT:    andq %r9, %rsi
   3478 ; CHECK-BASELINE-NEXT:    andq %r8, %rdi
   3479 ; CHECK-BASELINE-NEXT:    xorq %rdx, %rdi
   3480 ; CHECK-BASELINE-NEXT:    xorq %rcx, %rsi
   3481 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   3482 ; CHECK-BASELINE-NEXT:    movq %rsi, %rdx
   3483 ; CHECK-BASELINE-NEXT:    retq
   3484 ;
   3485 ; CHECK-SSE1-LABEL: in_v2i64:
   3486 ; CHECK-SSE1:       # %bb.0:
   3487 ; CHECK-SSE1-NEXT:    xorq %rdx, %rdi
   3488 ; CHECK-SSE1-NEXT:    xorq %rcx, %rsi
   3489 ; CHECK-SSE1-NEXT:    andq %r9, %rsi
   3490 ; CHECK-SSE1-NEXT:    andq %r8, %rdi
   3491 ; CHECK-SSE1-NEXT:    xorq %rdx, %rdi
   3492 ; CHECK-SSE1-NEXT:    xorq %rcx, %rsi
   3493 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   3494 ; CHECK-SSE1-NEXT:    movq %rsi, %rdx
   3495 ; CHECK-SSE1-NEXT:    retq
   3496 ;
   3497 ; CHECK-SSE2-LABEL: in_v2i64:
   3498 ; CHECK-SSE2:       # %bb.0:
   3499 ; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
   3500 ; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
   3501 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   3502 ; CHECK-SSE2-NEXT:    retq
   3503 ;
   3504 ; CHECK-XOP-LABEL: in_v2i64:
   3505 ; CHECK-XOP:       # %bb.0:
   3506 ; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   3507 ; CHECK-XOP-NEXT:    retq
   3508   %n0 = xor <2 x i64> %x, %y
   3509   %n1 = and <2 x i64> %n0, %mask
   3510   %r = xor <2 x i64> %n1, %y
   3511   ret <2 x i64> %r
   3512 }
   3513 
   3514 ; ============================================================================ ;
   3515 ; 256-bit vector width
   3516 ; ============================================================================ ;
   3517 
   3518 define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind {
   3519 ; CHECK-BASELINE-LABEL: in_v32i8:
   3520 ; CHECK-BASELINE:       # %bb.0:
   3521 ; CHECK-BASELINE-NEXT:    pushq %rbp
   3522 ; CHECK-BASELINE-NEXT:    pushq %r15
   3523 ; CHECK-BASELINE-NEXT:    pushq %r14
   3524 ; CHECK-BASELINE-NEXT:    pushq %r13
   3525 ; CHECK-BASELINE-NEXT:    pushq %r12
   3526 ; CHECK-BASELINE-NEXT:    pushq %rbx
   3527 ; CHECK-BASELINE-NEXT:    movq %rdx, %r13
   3528 ; CHECK-BASELINE-NEXT:    movq %rsi, %rbx
   3529 ; CHECK-BASELINE-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
   3530 ; CHECK-BASELINE-NEXT:    movb 16(%rdx), %r12b
   3531 ; CHECK-BASELINE-NEXT:    movb 15(%rdx), %al
   3532 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3533 ; CHECK-BASELINE-NEXT:    movb 14(%rdx), %al
   3534 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3535 ; CHECK-BASELINE-NEXT:    movb 13(%rdx), %al
   3536 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3537 ; CHECK-BASELINE-NEXT:    movb 12(%rdx), %al
   3538 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3539 ; CHECK-BASELINE-NEXT:    movb 11(%rdx), %al
   3540 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3541 ; CHECK-BASELINE-NEXT:    movb 10(%rdx), %al
   3542 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3543 ; CHECK-BASELINE-NEXT:    movb 9(%rdx), %r10b
   3544 ; CHECK-BASELINE-NEXT:    movb 8(%rdx), %r11b
   3545 ; CHECK-BASELINE-NEXT:    movb 7(%rdx), %r9b
   3546 ; CHECK-BASELINE-NEXT:    movb 6(%rdx), %r8b
   3547 ; CHECK-BASELINE-NEXT:    movb 5(%rdx), %bpl
   3548 ; CHECK-BASELINE-NEXT:    movb 4(%rdx), %dil
   3549 ; CHECK-BASELINE-NEXT:    movb 3(%rdx), %sil
   3550 ; CHECK-BASELINE-NEXT:    movb 2(%rdx), %r14b
   3551 ; CHECK-BASELINE-NEXT:    movb (%rdx), %al
   3552 ; CHECK-BASELINE-NEXT:    movb 1(%rdx), %r15b
   3553 ; CHECK-BASELINE-NEXT:    movb (%rbx), %dl
   3554 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3555 ; CHECK-BASELINE-NEXT:    andb (%rcx), %dl
   3556 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3557 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3558 ; CHECK-BASELINE-NEXT:    movb 1(%rbx), %al
   3559 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
   3560 ; CHECK-BASELINE-NEXT:    andb 1(%rcx), %al
   3561 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
   3562 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3563 ; CHECK-BASELINE-NEXT:    movb 2(%rbx), %al
   3564 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
   3565 ; CHECK-BASELINE-NEXT:    andb 2(%rcx), %al
   3566 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
   3567 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3568 ; CHECK-BASELINE-NEXT:    movb 3(%rbx), %al
   3569 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
   3570 ; CHECK-BASELINE-NEXT:    andb 3(%rcx), %al
   3571 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
   3572 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3573 ; CHECK-BASELINE-NEXT:    movb 4(%rbx), %al
   3574 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
   3575 ; CHECK-BASELINE-NEXT:    andb 4(%rcx), %al
   3576 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
   3577 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3578 ; CHECK-BASELINE-NEXT:    movb 5(%rbx), %al
   3579 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
   3580 ; CHECK-BASELINE-NEXT:    andb 5(%rcx), %al
   3581 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
   3582 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3583 ; CHECK-BASELINE-NEXT:    movb 6(%rbx), %al
   3584 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
   3585 ; CHECK-BASELINE-NEXT:    andb 6(%rcx), %al
   3586 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
   3587 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3588 ; CHECK-BASELINE-NEXT:    movb 7(%rbx), %al
   3589 ; CHECK-BASELINE-NEXT:    xorb %r9b, %al
   3590 ; CHECK-BASELINE-NEXT:    andb 7(%rcx), %al
   3591 ; CHECK-BASELINE-NEXT:    xorb %r9b, %al
   3592 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3593 ; CHECK-BASELINE-NEXT:    movb 8(%rbx), %al
   3594 ; CHECK-BASELINE-NEXT:    xorb %r11b, %al
   3595 ; CHECK-BASELINE-NEXT:    andb 8(%rcx), %al
   3596 ; CHECK-BASELINE-NEXT:    xorb %r11b, %al
   3597 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3598 ; CHECK-BASELINE-NEXT:    movb 9(%rbx), %al
   3599 ; CHECK-BASELINE-NEXT:    xorb %r10b, %al
   3600 ; CHECK-BASELINE-NEXT:    andb 9(%rcx), %al
   3601 ; CHECK-BASELINE-NEXT:    xorb %r10b, %al
   3602 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3603 ; CHECK-BASELINE-NEXT:    movb 10(%rbx), %dl
   3604 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3605 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3606 ; CHECK-BASELINE-NEXT:    andb 10(%rcx), %dl
   3607 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3608 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3609 ; CHECK-BASELINE-NEXT:    movb 11(%rbx), %dl
   3610 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3611 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3612 ; CHECK-BASELINE-NEXT:    andb 11(%rcx), %dl
   3613 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3614 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3615 ; CHECK-BASELINE-NEXT:    movb 12(%rbx), %dl
   3616 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3617 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3618 ; CHECK-BASELINE-NEXT:    andb 12(%rcx), %dl
   3619 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3620 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3621 ; CHECK-BASELINE-NEXT:    movb 13(%rbx), %dl
   3622 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3623 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3624 ; CHECK-BASELINE-NEXT:    andb 13(%rcx), %dl
   3625 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3626 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3627 ; CHECK-BASELINE-NEXT:    movb 14(%rbx), %dl
   3628 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3629 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3630 ; CHECK-BASELINE-NEXT:    andb 14(%rcx), %dl
   3631 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3632 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3633 ; CHECK-BASELINE-NEXT:    movb 15(%rbx), %dl
   3634 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3635 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3636 ; CHECK-BASELINE-NEXT:    andb 15(%rcx), %dl
   3637 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3638 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3639 ; CHECK-BASELINE-NEXT:    movb 16(%rbx), %al
   3640 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
   3641 ; CHECK-BASELINE-NEXT:    andb 16(%rcx), %al
   3642 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
   3643 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3644 ; CHECK-BASELINE-NEXT:    movb 17(%r13), %al
   3645 ; CHECK-BASELINE-NEXT:    movb 17(%rbx), %dl
   3646 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3647 ; CHECK-BASELINE-NEXT:    andb 17(%rcx), %dl
   3648 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3649 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3650 ; CHECK-BASELINE-NEXT:    movb 18(%r13), %al
   3651 ; CHECK-BASELINE-NEXT:    movb 18(%rbx), %dl
   3652 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3653 ; CHECK-BASELINE-NEXT:    andb 18(%rcx), %dl
   3654 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
   3655 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3656 ; CHECK-BASELINE-NEXT:    movb 19(%r13), %al
   3657 ; CHECK-BASELINE-NEXT:    movb 19(%rbx), %r12b
   3658 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
   3659 ; CHECK-BASELINE-NEXT:    andb 19(%rcx), %r12b
   3660 ; CHECK-BASELINE-NEXT:    movq %rcx, %rdx
   3661 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
   3662 ; CHECK-BASELINE-NEXT:    movb 20(%r13), %al
   3663 ; CHECK-BASELINE-NEXT:    movb 20(%rbx), %r14b
   3664 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
   3665 ; CHECK-BASELINE-NEXT:    andb 20(%rcx), %r14b
   3666 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
   3667 ; CHECK-BASELINE-NEXT:    movb 21(%r13), %al
   3668 ; CHECK-BASELINE-NEXT:    movb 21(%rbx), %r15b
   3669 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
   3670 ; CHECK-BASELINE-NEXT:    andb 21(%rcx), %r15b
   3671 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
   3672 ; CHECK-BASELINE-NEXT:    movb 22(%r13), %al
   3673 ; CHECK-BASELINE-NEXT:    movb 22(%rbx), %bpl
   3674 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
   3675 ; CHECK-BASELINE-NEXT:    andb 22(%rcx), %bpl
   3676 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
   3677 ; CHECK-BASELINE-NEXT:    movb 23(%r13), %al
   3678 ; CHECK-BASELINE-NEXT:    movb 23(%rbx), %r11b
   3679 ; CHECK-BASELINE-NEXT:    xorb %al, %r11b
   3680 ; CHECK-BASELINE-NEXT:    andb 23(%rcx), %r11b
   3681 ; CHECK-BASELINE-NEXT:    xorb %al, %r11b
   3682 ; CHECK-BASELINE-NEXT:    movb 24(%r13), %al
   3683 ; CHECK-BASELINE-NEXT:    movb 24(%rbx), %r10b
   3684 ; CHECK-BASELINE-NEXT:    xorb %al, %r10b
   3685 ; CHECK-BASELINE-NEXT:    andb 24(%rcx), %r10b
   3686 ; CHECK-BASELINE-NEXT:    xorb %al, %r10b
   3687 ; CHECK-BASELINE-NEXT:    movb 25(%r13), %al
   3688 ; CHECK-BASELINE-NEXT:    movb 25(%rbx), %r9b
   3689 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
   3690 ; CHECK-BASELINE-NEXT:    andb 25(%rcx), %r9b
   3691 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
   3692 ; CHECK-BASELINE-NEXT:    movb 26(%r13), %al
   3693 ; CHECK-BASELINE-NEXT:    movb 26(%rbx), %r8b
   3694 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
   3695 ; CHECK-BASELINE-NEXT:    andb 26(%rcx), %r8b
   3696 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
   3697 ; CHECK-BASELINE-NEXT:    movb 27(%r13), %al
   3698 ; CHECK-BASELINE-NEXT:    movb 27(%rbx), %dil
   3699 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
   3700 ; CHECK-BASELINE-NEXT:    andb 27(%rcx), %dil
   3701 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
   3702 ; CHECK-BASELINE-NEXT:    movb 28(%r13), %al
   3703 ; CHECK-BASELINE-NEXT:    movb 28(%rbx), %sil
   3704 ; CHECK-BASELINE-NEXT:    xorb %al, %sil
   3705 ; CHECK-BASELINE-NEXT:    andb 28(%rcx), %sil
   3706 ; CHECK-BASELINE-NEXT:    xorb %al, %sil
   3707 ; CHECK-BASELINE-NEXT:    movb 29(%r13), %al
   3708 ; CHECK-BASELINE-NEXT:    movb 29(%rbx), %cl
   3709 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3710 ; CHECK-BASELINE-NEXT:    andb 29(%rdx), %cl
   3711 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
   3712 ; CHECK-BASELINE-NEXT:    movb 30(%r13), %al
   3713 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3714 ; CHECK-BASELINE-NEXT:    movb 30(%rbx), %al
   3715 ; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
   3716 ; CHECK-BASELINE-NEXT:    andb 30(%rdx), %al
   3717 ; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
   3718 ; CHECK-BASELINE-NEXT:    movb 31(%r13), %r13b
   3719 ; CHECK-BASELINE-NEXT:    movb 31(%rbx), %bl
   3720 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
   3721 ; CHECK-BASELINE-NEXT:    andb 31(%rdx), %bl
   3722 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
   3723 ; CHECK-BASELINE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
   3724 ; CHECK-BASELINE-NEXT:    movb %bl, 31(%r13)
   3725 ; CHECK-BASELINE-NEXT:    movb %al, 30(%r13)
   3726 ; CHECK-BASELINE-NEXT:    movb %cl, 29(%r13)
   3727 ; CHECK-BASELINE-NEXT:    movb %sil, 28(%r13)
   3728 ; CHECK-BASELINE-NEXT:    movb %dil, 27(%r13)
   3729 ; CHECK-BASELINE-NEXT:    movb %r8b, 26(%r13)
   3730 ; CHECK-BASELINE-NEXT:    movb %r9b, 25(%r13)
   3731 ; CHECK-BASELINE-NEXT:    movb %r10b, 24(%r13)
   3732 ; CHECK-BASELINE-NEXT:    movb %r11b, 23(%r13)
   3733 ; CHECK-BASELINE-NEXT:    movb %bpl, 22(%r13)
   3734 ; CHECK-BASELINE-NEXT:    movb %r15b, 21(%r13)
   3735 ; CHECK-BASELINE-NEXT:    movb %r14b, 20(%r13)
   3736 ; CHECK-BASELINE-NEXT:    movb %r12b, 19(%r13)
   3737 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3738 ; CHECK-BASELINE-NEXT:    movb %al, 18(%r13)
   3739 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3740 ; CHECK-BASELINE-NEXT:    movb %al, 17(%r13)
   3741 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3742 ; CHECK-BASELINE-NEXT:    movb %al, 16(%r13)
   3743 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3744 ; CHECK-BASELINE-NEXT:    movb %al, 15(%r13)
   3745 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3746 ; CHECK-BASELINE-NEXT:    movb %al, 14(%r13)
   3747 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3748 ; CHECK-BASELINE-NEXT:    movb %al, 13(%r13)
   3749 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3750 ; CHECK-BASELINE-NEXT:    movb %al, 12(%r13)
   3751 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3752 ; CHECK-BASELINE-NEXT:    movb %al, 11(%r13)
   3753 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3754 ; CHECK-BASELINE-NEXT:    movb %al, 10(%r13)
   3755 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3756 ; CHECK-BASELINE-NEXT:    movb %al, 9(%r13)
   3757 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3758 ; CHECK-BASELINE-NEXT:    movb %al, 8(%r13)
   3759 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3760 ; CHECK-BASELINE-NEXT:    movb %al, 7(%r13)
   3761 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3762 ; CHECK-BASELINE-NEXT:    movb %al, 6(%r13)
   3763 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3764 ; CHECK-BASELINE-NEXT:    movb %al, 5(%r13)
   3765 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3766 ; CHECK-BASELINE-NEXT:    movb %al, 4(%r13)
   3767 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3768 ; CHECK-BASELINE-NEXT:    movb %al, 3(%r13)
   3769 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3770 ; CHECK-BASELINE-NEXT:    movb %al, 2(%r13)
   3771 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3772 ; CHECK-BASELINE-NEXT:    movb %al, 1(%r13)
   3773 ; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3774 ; CHECK-BASELINE-NEXT:    movb %al, (%r13)
   3775 ; CHECK-BASELINE-NEXT:    movq %r13, %rax
   3776 ; CHECK-BASELINE-NEXT:    popq %rbx
   3777 ; CHECK-BASELINE-NEXT:    popq %r12
   3778 ; CHECK-BASELINE-NEXT:    popq %r13
   3779 ; CHECK-BASELINE-NEXT:    popq %r14
   3780 ; CHECK-BASELINE-NEXT:    popq %r15
   3781 ; CHECK-BASELINE-NEXT:    popq %rbp
   3782 ; CHECK-BASELINE-NEXT:    retq
   3783 ;
   3784 ; CHECK-SSE1-LABEL: in_v32i8:
   3785 ; CHECK-SSE1:       # %bb.0:
   3786 ; CHECK-SSE1-NEXT:    pushq %rbp
   3787 ; CHECK-SSE1-NEXT:    pushq %r15
   3788 ; CHECK-SSE1-NEXT:    pushq %r14
   3789 ; CHECK-SSE1-NEXT:    pushq %r13
   3790 ; CHECK-SSE1-NEXT:    pushq %r12
   3791 ; CHECK-SSE1-NEXT:    pushq %rbx
   3792 ; CHECK-SSE1-NEXT:    movq %rdx, %r13
   3793 ; CHECK-SSE1-NEXT:    movq %rsi, %rbx
   3794 ; CHECK-SSE1-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
   3795 ; CHECK-SSE1-NEXT:    movb 16(%rdx), %r12b
   3796 ; CHECK-SSE1-NEXT:    movb 15(%rdx), %al
   3797 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3798 ; CHECK-SSE1-NEXT:    movb 14(%rdx), %al
   3799 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3800 ; CHECK-SSE1-NEXT:    movb 13(%rdx), %al
   3801 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3802 ; CHECK-SSE1-NEXT:    movb 12(%rdx), %al
   3803 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3804 ; CHECK-SSE1-NEXT:    movb 11(%rdx), %al
   3805 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3806 ; CHECK-SSE1-NEXT:    movb 10(%rdx), %al
   3807 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3808 ; CHECK-SSE1-NEXT:    movb 9(%rdx), %r10b
   3809 ; CHECK-SSE1-NEXT:    movb 8(%rdx), %r11b
   3810 ; CHECK-SSE1-NEXT:    movb 7(%rdx), %r9b
   3811 ; CHECK-SSE1-NEXT:    movb 6(%rdx), %r8b
   3812 ; CHECK-SSE1-NEXT:    movb 5(%rdx), %bpl
   3813 ; CHECK-SSE1-NEXT:    movb 4(%rdx), %dil
   3814 ; CHECK-SSE1-NEXT:    movb 3(%rdx), %sil
   3815 ; CHECK-SSE1-NEXT:    movb 2(%rdx), %r14b
   3816 ; CHECK-SSE1-NEXT:    movb (%rdx), %al
   3817 ; CHECK-SSE1-NEXT:    movb 1(%rdx), %r15b
   3818 ; CHECK-SSE1-NEXT:    movb (%rbx), %dl
   3819 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3820 ; CHECK-SSE1-NEXT:    andb (%rcx), %dl
   3821 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3822 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3823 ; CHECK-SSE1-NEXT:    movb 1(%rbx), %al
   3824 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
   3825 ; CHECK-SSE1-NEXT:    andb 1(%rcx), %al
   3826 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
   3827 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3828 ; CHECK-SSE1-NEXT:    movb 2(%rbx), %al
   3829 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
   3830 ; CHECK-SSE1-NEXT:    andb 2(%rcx), %al
   3831 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
   3832 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3833 ; CHECK-SSE1-NEXT:    movb 3(%rbx), %al
   3834 ; CHECK-SSE1-NEXT:    xorb %sil, %al
   3835 ; CHECK-SSE1-NEXT:    andb 3(%rcx), %al
   3836 ; CHECK-SSE1-NEXT:    xorb %sil, %al
   3837 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3838 ; CHECK-SSE1-NEXT:    movb 4(%rbx), %al
   3839 ; CHECK-SSE1-NEXT:    xorb %dil, %al
   3840 ; CHECK-SSE1-NEXT:    andb 4(%rcx), %al
   3841 ; CHECK-SSE1-NEXT:    xorb %dil, %al
   3842 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3843 ; CHECK-SSE1-NEXT:    movb 5(%rbx), %al
   3844 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
   3845 ; CHECK-SSE1-NEXT:    andb 5(%rcx), %al
   3846 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
   3847 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3848 ; CHECK-SSE1-NEXT:    movb 6(%rbx), %al
   3849 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
   3850 ; CHECK-SSE1-NEXT:    andb 6(%rcx), %al
   3851 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
   3852 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3853 ; CHECK-SSE1-NEXT:    movb 7(%rbx), %al
   3854 ; CHECK-SSE1-NEXT:    xorb %r9b, %al
   3855 ; CHECK-SSE1-NEXT:    andb 7(%rcx), %al
   3856 ; CHECK-SSE1-NEXT:    xorb %r9b, %al
   3857 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3858 ; CHECK-SSE1-NEXT:    movb 8(%rbx), %al
   3859 ; CHECK-SSE1-NEXT:    xorb %r11b, %al
   3860 ; CHECK-SSE1-NEXT:    andb 8(%rcx), %al
   3861 ; CHECK-SSE1-NEXT:    xorb %r11b, %al
   3862 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3863 ; CHECK-SSE1-NEXT:    movb 9(%rbx), %al
   3864 ; CHECK-SSE1-NEXT:    xorb %r10b, %al
   3865 ; CHECK-SSE1-NEXT:    andb 9(%rcx), %al
   3866 ; CHECK-SSE1-NEXT:    xorb %r10b, %al
   3867 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3868 ; CHECK-SSE1-NEXT:    movb 10(%rbx), %dl
   3869 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3870 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3871 ; CHECK-SSE1-NEXT:    andb 10(%rcx), %dl
   3872 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3873 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3874 ; CHECK-SSE1-NEXT:    movb 11(%rbx), %dl
   3875 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3876 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3877 ; CHECK-SSE1-NEXT:    andb 11(%rcx), %dl
   3878 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3879 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3880 ; CHECK-SSE1-NEXT:    movb 12(%rbx), %dl
   3881 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3882 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3883 ; CHECK-SSE1-NEXT:    andb 12(%rcx), %dl
   3884 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3885 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3886 ; CHECK-SSE1-NEXT:    movb 13(%rbx), %dl
   3887 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3888 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3889 ; CHECK-SSE1-NEXT:    andb 13(%rcx), %dl
   3890 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3891 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3892 ; CHECK-SSE1-NEXT:    movb 14(%rbx), %dl
   3893 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3894 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3895 ; CHECK-SSE1-NEXT:    andb 14(%rcx), %dl
   3896 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3897 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3898 ; CHECK-SSE1-NEXT:    movb 15(%rbx), %dl
   3899 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   3900 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3901 ; CHECK-SSE1-NEXT:    andb 15(%rcx), %dl
   3902 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3903 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3904 ; CHECK-SSE1-NEXT:    movb 16(%rbx), %al
   3905 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
   3906 ; CHECK-SSE1-NEXT:    andb 16(%rcx), %al
   3907 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
   3908 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3909 ; CHECK-SSE1-NEXT:    movb 17(%r13), %al
   3910 ; CHECK-SSE1-NEXT:    movb 17(%rbx), %dl
   3911 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3912 ; CHECK-SSE1-NEXT:    andb 17(%rcx), %dl
   3913 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3914 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3915 ; CHECK-SSE1-NEXT:    movb 18(%r13), %al
   3916 ; CHECK-SSE1-NEXT:    movb 18(%rbx), %dl
   3917 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3918 ; CHECK-SSE1-NEXT:    andb 18(%rcx), %dl
   3919 ; CHECK-SSE1-NEXT:    xorb %al, %dl
   3920 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3921 ; CHECK-SSE1-NEXT:    movb 19(%r13), %al
   3922 ; CHECK-SSE1-NEXT:    movb 19(%rbx), %r12b
   3923 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
   3924 ; CHECK-SSE1-NEXT:    andb 19(%rcx), %r12b
   3925 ; CHECK-SSE1-NEXT:    movq %rcx, %rdx
   3926 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
   3927 ; CHECK-SSE1-NEXT:    movb 20(%r13), %al
   3928 ; CHECK-SSE1-NEXT:    movb 20(%rbx), %r14b
   3929 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
   3930 ; CHECK-SSE1-NEXT:    andb 20(%rcx), %r14b
   3931 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
   3932 ; CHECK-SSE1-NEXT:    movb 21(%r13), %al
   3933 ; CHECK-SSE1-NEXT:    movb 21(%rbx), %r15b
   3934 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
   3935 ; CHECK-SSE1-NEXT:    andb 21(%rcx), %r15b
   3936 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
   3937 ; CHECK-SSE1-NEXT:    movb 22(%r13), %al
   3938 ; CHECK-SSE1-NEXT:    movb 22(%rbx), %bpl
   3939 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
   3940 ; CHECK-SSE1-NEXT:    andb 22(%rcx), %bpl
   3941 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
   3942 ; CHECK-SSE1-NEXT:    movb 23(%r13), %al
   3943 ; CHECK-SSE1-NEXT:    movb 23(%rbx), %r11b
   3944 ; CHECK-SSE1-NEXT:    xorb %al, %r11b
   3945 ; CHECK-SSE1-NEXT:    andb 23(%rcx), %r11b
   3946 ; CHECK-SSE1-NEXT:    xorb %al, %r11b
   3947 ; CHECK-SSE1-NEXT:    movb 24(%r13), %al
   3948 ; CHECK-SSE1-NEXT:    movb 24(%rbx), %r10b
   3949 ; CHECK-SSE1-NEXT:    xorb %al, %r10b
   3950 ; CHECK-SSE1-NEXT:    andb 24(%rcx), %r10b
   3951 ; CHECK-SSE1-NEXT:    xorb %al, %r10b
   3952 ; CHECK-SSE1-NEXT:    movb 25(%r13), %al
   3953 ; CHECK-SSE1-NEXT:    movb 25(%rbx), %r9b
   3954 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
   3955 ; CHECK-SSE1-NEXT:    andb 25(%rcx), %r9b
   3956 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
   3957 ; CHECK-SSE1-NEXT:    movb 26(%r13), %al
   3958 ; CHECK-SSE1-NEXT:    movb 26(%rbx), %r8b
   3959 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
   3960 ; CHECK-SSE1-NEXT:    andb 26(%rcx), %r8b
   3961 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
   3962 ; CHECK-SSE1-NEXT:    movb 27(%r13), %al
   3963 ; CHECK-SSE1-NEXT:    movb 27(%rbx), %dil
   3964 ; CHECK-SSE1-NEXT:    xorb %al, %dil
   3965 ; CHECK-SSE1-NEXT:    andb 27(%rcx), %dil
   3966 ; CHECK-SSE1-NEXT:    xorb %al, %dil
   3967 ; CHECK-SSE1-NEXT:    movb 28(%r13), %al
   3968 ; CHECK-SSE1-NEXT:    movb 28(%rbx), %sil
   3969 ; CHECK-SSE1-NEXT:    xorb %al, %sil
   3970 ; CHECK-SSE1-NEXT:    andb 28(%rcx), %sil
   3971 ; CHECK-SSE1-NEXT:    xorb %al, %sil
   3972 ; CHECK-SSE1-NEXT:    movb 29(%r13), %al
   3973 ; CHECK-SSE1-NEXT:    movb 29(%rbx), %cl
   3974 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3975 ; CHECK-SSE1-NEXT:    andb 29(%rdx), %cl
   3976 ; CHECK-SSE1-NEXT:    xorb %al, %cl
   3977 ; CHECK-SSE1-NEXT:    movb 30(%r13), %al
   3978 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
   3979 ; CHECK-SSE1-NEXT:    movb 30(%rbx), %al
   3980 ; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
   3981 ; CHECK-SSE1-NEXT:    andb 30(%rdx), %al
   3982 ; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
   3983 ; CHECK-SSE1-NEXT:    movb 31(%r13), %r13b
   3984 ; CHECK-SSE1-NEXT:    movb 31(%rbx), %bl
   3985 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
   3986 ; CHECK-SSE1-NEXT:    andb 31(%rdx), %bl
   3987 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
   3988 ; CHECK-SSE1-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
   3989 ; CHECK-SSE1-NEXT:    movb %bl, 31(%r13)
   3990 ; CHECK-SSE1-NEXT:    movb %al, 30(%r13)
   3991 ; CHECK-SSE1-NEXT:    movb %cl, 29(%r13)
   3992 ; CHECK-SSE1-NEXT:    movb %sil, 28(%r13)
   3993 ; CHECK-SSE1-NEXT:    movb %dil, 27(%r13)
   3994 ; CHECK-SSE1-NEXT:    movb %r8b, 26(%r13)
   3995 ; CHECK-SSE1-NEXT:    movb %r9b, 25(%r13)
   3996 ; CHECK-SSE1-NEXT:    movb %r10b, 24(%r13)
   3997 ; CHECK-SSE1-NEXT:    movb %r11b, 23(%r13)
   3998 ; CHECK-SSE1-NEXT:    movb %bpl, 22(%r13)
   3999 ; CHECK-SSE1-NEXT:    movb %r15b, 21(%r13)
   4000 ; CHECK-SSE1-NEXT:    movb %r14b, 20(%r13)
   4001 ; CHECK-SSE1-NEXT:    movb %r12b, 19(%r13)
   4002 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4003 ; CHECK-SSE1-NEXT:    movb %al, 18(%r13)
   4004 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4005 ; CHECK-SSE1-NEXT:    movb %al, 17(%r13)
   4006 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4007 ; CHECK-SSE1-NEXT:    movb %al, 16(%r13)
   4008 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4009 ; CHECK-SSE1-NEXT:    movb %al, 15(%r13)
   4010 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4011 ; CHECK-SSE1-NEXT:    movb %al, 14(%r13)
   4012 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4013 ; CHECK-SSE1-NEXT:    movb %al, 13(%r13)
   4014 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4015 ; CHECK-SSE1-NEXT:    movb %al, 12(%r13)
   4016 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4017 ; CHECK-SSE1-NEXT:    movb %al, 11(%r13)
   4018 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4019 ; CHECK-SSE1-NEXT:    movb %al, 10(%r13)
   4020 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4021 ; CHECK-SSE1-NEXT:    movb %al, 9(%r13)
   4022 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4023 ; CHECK-SSE1-NEXT:    movb %al, 8(%r13)
   4024 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4025 ; CHECK-SSE1-NEXT:    movb %al, 7(%r13)
   4026 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4027 ; CHECK-SSE1-NEXT:    movb %al, 6(%r13)
   4028 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4029 ; CHECK-SSE1-NEXT:    movb %al, 5(%r13)
   4030 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4031 ; CHECK-SSE1-NEXT:    movb %al, 4(%r13)
   4032 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4033 ; CHECK-SSE1-NEXT:    movb %al, 3(%r13)
   4034 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4035 ; CHECK-SSE1-NEXT:    movb %al, 2(%r13)
   4036 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4037 ; CHECK-SSE1-NEXT:    movb %al, 1(%r13)
   4038 ; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
   4039 ; CHECK-SSE1-NEXT:    movb %al, (%r13)
   4040 ; CHECK-SSE1-NEXT:    movq %r13, %rax
   4041 ; CHECK-SSE1-NEXT:    popq %rbx
   4042 ; CHECK-SSE1-NEXT:    popq %r12
   4043 ; CHECK-SSE1-NEXT:    popq %r13
   4044 ; CHECK-SSE1-NEXT:    popq %r14
   4045 ; CHECK-SSE1-NEXT:    popq %r15
   4046 ; CHECK-SSE1-NEXT:    popq %rbp
   4047 ; CHECK-SSE1-NEXT:    retq
   4048 ;
   4049 ; CHECK-SSE2-LABEL: in_v32i8:
   4050 ; CHECK-SSE2:       # %bb.0:
   4051 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   4052 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   4053 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
   4054 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
   4055 ; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
   4056 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   4057 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
   4058 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
   4059 ; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
   4060 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   4061 ; CHECK-SSE2-NEXT:    retq
   4062 ;
   4063 ; CHECK-XOP-LABEL: in_v32i8:
   4064 ; CHECK-XOP:       # %bb.0:
   4065 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   4066 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   4067 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   4068 ; CHECK-XOP-NEXT:    retq
   4069   %x = load <32 x i8>, <32 x i8> *%px, align 32
   4070   %y = load <32 x i8>, <32 x i8> *%py, align 32
   4071   %mask = load <32 x i8>, <32 x i8> *%pmask, align 32
   4072   %n0 = xor <32 x i8> %x, %y
   4073   %n1 = and <32 x i8> %n0, %mask
   4074   %r = xor <32 x i8> %n1, %y
   4075   ret <32 x i8> %r
   4076 }
   4077 
   4078 define <16 x i16> @in_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind {
   4079 ; CHECK-BASELINE-LABEL: in_v16i16:
   4080 ; CHECK-BASELINE:       # %bb.0:
   4081 ; CHECK-BASELINE-NEXT:    pushq %rbp
   4082 ; CHECK-BASELINE-NEXT:    pushq %r15
   4083 ; CHECK-BASELINE-NEXT:    pushq %r14
   4084 ; CHECK-BASELINE-NEXT:    pushq %r13
   4085 ; CHECK-BASELINE-NEXT:    pushq %r12
   4086 ; CHECK-BASELINE-NEXT:    pushq %rbx
   4087 ; CHECK-BASELINE-NEXT:    movq %rcx, %r8
   4088 ; CHECK-BASELINE-NEXT:    movzwl 30(%rdx), %eax
   4089 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4090 ; CHECK-BASELINE-NEXT:    movl 28(%rdx), %eax
   4091 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4092 ; CHECK-BASELINE-NEXT:    movzwl 26(%rdx), %eax
   4093 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4094 ; CHECK-BASELINE-NEXT:    movl 24(%rdx), %eax
   4095 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4096 ; CHECK-BASELINE-NEXT:    movzwl 22(%rdx), %eax
   4097 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4098 ; CHECK-BASELINE-NEXT:    movl 20(%rdx), %r13d
   4099 ; CHECK-BASELINE-NEXT:    movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4100 ; CHECK-BASELINE-NEXT:    movzwl 18(%rdx), %r11d
   4101 ; CHECK-BASELINE-NEXT:    movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4102 ; CHECK-BASELINE-NEXT:    movl 16(%rdx), %r14d
   4103 ; CHECK-BASELINE-NEXT:    movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4104 ; CHECK-BASELINE-NEXT:    movzwl 14(%rdx), %r15d
   4105 ; CHECK-BASELINE-NEXT:    movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4106 ; CHECK-BASELINE-NEXT:    movl 12(%rdx), %r12d
   4107 ; CHECK-BASELINE-NEXT:    movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4108 ; CHECK-BASELINE-NEXT:    movzwl 10(%rdx), %r10d
   4109 ; CHECK-BASELINE-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4110 ; CHECK-BASELINE-NEXT:    movl 8(%rdx), %r9d
   4111 ; CHECK-BASELINE-NEXT:    movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4112 ; CHECK-BASELINE-NEXT:    movzwl 6(%rdx), %ebx
   4113 ; CHECK-BASELINE-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4114 ; CHECK-BASELINE-NEXT:    movl (%rdx), %eax
   4115 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4116 ; CHECK-BASELINE-NEXT:    movl 4(%rdx), %ebp
   4117 ; CHECK-BASELINE-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4118 ; CHECK-BASELINE-NEXT:    movzwl 2(%rdx), %ecx
   4119 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4120 ; CHECK-BASELINE-NEXT:    movzwl (%rsi), %edx
   4121 ; CHECK-BASELINE-NEXT:    xorw %ax, %dx
   4122 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4123 ; CHECK-BASELINE-NEXT:    movzwl 2(%rsi), %eax
   4124 ; CHECK-BASELINE-NEXT:    xorw %cx, %ax
   4125 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4126 ; CHECK-BASELINE-NEXT:    movzwl 4(%rsi), %eax
   4127 ; CHECK-BASELINE-NEXT:    xorw %bp, %ax
   4128 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4129 ; CHECK-BASELINE-NEXT:    movzwl 6(%rsi), %edx
   4130 ; CHECK-BASELINE-NEXT:    xorw %bx, %dx
   4131 ; CHECK-BASELINE-NEXT:    movl %edx, %eax
   4132 ; CHECK-BASELINE-NEXT:    movzwl 8(%rsi), %ecx
   4133 ; CHECK-BASELINE-NEXT:    xorw %r9w, %cx
   4134 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4135 ; CHECK-BASELINE-NEXT:    movzwl 10(%rsi), %edx
   4136 ; CHECK-BASELINE-NEXT:    xorw %r10w, %dx
   4137 ; CHECK-BASELINE-NEXT:    movl %edx, %ecx
   4138 ; CHECK-BASELINE-NEXT:    movzwl 12(%rsi), %edx
   4139 ; CHECK-BASELINE-NEXT:    xorw %r12w, %dx
   4140 ; CHECK-BASELINE-NEXT:    movzwl 14(%rsi), %r12d
   4141 ; CHECK-BASELINE-NEXT:    xorw %r15w, %r12w
   4142 ; CHECK-BASELINE-NEXT:    movzwl 16(%rsi), %r15d
   4143 ; CHECK-BASELINE-NEXT:    xorw %r14w, %r15w
   4144 ; CHECK-BASELINE-NEXT:    movzwl 18(%rsi), %r14d
   4145 ; CHECK-BASELINE-NEXT:    xorw %r11w, %r14w
   4146 ; CHECK-BASELINE-NEXT:    movzwl 20(%rsi), %ebp
   4147 ; CHECK-BASELINE-NEXT:    xorw %r13w, %bp
   4148 ; CHECK-BASELINE-NEXT:    movzwl 22(%rsi), %ebx
   4149 ; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload
   4150 ; CHECK-BASELINE-NEXT:    movzwl 24(%rsi), %r11d
   4151 ; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload
   4152 ; CHECK-BASELINE-NEXT:    movzwl 26(%rsi), %r10d
   4153 ; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload
   4154 ; CHECK-BASELINE-NEXT:    movzwl 28(%rsi), %r9d
   4155 ; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload
   4156 ; CHECK-BASELINE-NEXT:    movzwl 30(%rsi), %r13d
   4157 ; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload
   4158 ; CHECK-BASELINE-NEXT:    andw 30(%r8), %r13w
   4159 ; CHECK-BASELINE-NEXT:    andw 28(%r8), %r9w
   4160 ; CHECK-BASELINE-NEXT:    andw 26(%r8), %r10w
   4161 ; CHECK-BASELINE-NEXT:    andw 24(%r8), %r11w
   4162 ; CHECK-BASELINE-NEXT:    andw 22(%r8), %bx
   4163 ; CHECK-BASELINE-NEXT:    andw 20(%r8), %bp
   4164 ; CHECK-BASELINE-NEXT:    andw 18(%r8), %r14w
   4165 ; CHECK-BASELINE-NEXT:    andw 16(%r8), %r15w
   4166 ; CHECK-BASELINE-NEXT:    andw 14(%r8), %r12w
   4167 ; CHECK-BASELINE-NEXT:    andw 12(%r8), %dx
   4168 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4169 ; CHECK-BASELINE-NEXT:    andw 10(%r8), %cx
   4170 ; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4171 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
   4172 ; CHECK-BASELINE-NEXT:    andw 8(%r8), %dx
   4173 ; CHECK-BASELINE-NEXT:    andw 6(%r8), %ax
   4174 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4175 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   4176 ; CHECK-BASELINE-NEXT:    andw 4(%r8), %cx
   4177 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4178 ; CHECK-BASELINE-NEXT:    andw 2(%r8), %ax
   4179 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
   4180 ; CHECK-BASELINE-NEXT:    andw (%r8), %si
   4181 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload
   4182 ; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4183 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
   4184 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4185 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
   4186 ; CHECK-BASELINE-NEXT:    movl %ecx, %esi
   4187 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   4188 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
   4189 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4190 ; CHECK-BASELINE-NEXT:    movl %edx, %r8d
   4191 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
   4192 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4193 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4194 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
   4195 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload
   4196 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload
   4197 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
   4198 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
   4199 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
   4200 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
   4201 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
   4202 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload
   4203 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload
   4204 ; CHECK-BASELINE-NEXT:    movw %r13w, 30(%rdi)
   4205 ; CHECK-BASELINE-NEXT:    movw %r9w, 28(%rdi)
   4206 ; CHECK-BASELINE-NEXT:    movw %r10w, 26(%rdi)
   4207 ; CHECK-BASELINE-NEXT:    movw %r11w, 24(%rdi)
   4208 ; CHECK-BASELINE-NEXT:    movw %bx, 22(%rdi)
   4209 ; CHECK-BASELINE-NEXT:    movw %bp, 20(%rdi)
   4210 ; CHECK-BASELINE-NEXT:    movw %r14w, 18(%rdi)
   4211 ; CHECK-BASELINE-NEXT:    movw %r15w, 16(%rdi)
   4212 ; CHECK-BASELINE-NEXT:    movw %r12w, 14(%rdi)
   4213 ; CHECK-BASELINE-NEXT:    movw %ax, 12(%rdi)
   4214 ; CHECK-BASELINE-NEXT:    movw %dx, 10(%rdi)
   4215 ; CHECK-BASELINE-NEXT:    movw %r8w, 8(%rdi)
   4216 ; CHECK-BASELINE-NEXT:    movw %cx, 6(%rdi)
   4217 ; CHECK-BASELINE-NEXT:    movw %si, 4(%rdi)
   4218 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4219 ; CHECK-BASELINE-NEXT:    movw %ax, 2(%rdi)
   4220 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4221 ; CHECK-BASELINE-NEXT:    movw %ax, (%rdi)
   4222 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   4223 ; CHECK-BASELINE-NEXT:    popq %rbx
   4224 ; CHECK-BASELINE-NEXT:    popq %r12
   4225 ; CHECK-BASELINE-NEXT:    popq %r13
   4226 ; CHECK-BASELINE-NEXT:    popq %r14
   4227 ; CHECK-BASELINE-NEXT:    popq %r15
   4228 ; CHECK-BASELINE-NEXT:    popq %rbp
   4229 ; CHECK-BASELINE-NEXT:    retq
   4230 ;
   4231 ; CHECK-SSE1-LABEL: in_v16i16:
   4232 ; CHECK-SSE1:       # %bb.0:
   4233 ; CHECK-SSE1-NEXT:    pushq %rbp
   4234 ; CHECK-SSE1-NEXT:    pushq %r15
   4235 ; CHECK-SSE1-NEXT:    pushq %r14
   4236 ; CHECK-SSE1-NEXT:    pushq %r13
   4237 ; CHECK-SSE1-NEXT:    pushq %r12
   4238 ; CHECK-SSE1-NEXT:    pushq %rbx
   4239 ; CHECK-SSE1-NEXT:    movq %rcx, %r8
   4240 ; CHECK-SSE1-NEXT:    movzwl 30(%rdx), %eax
   4241 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4242 ; CHECK-SSE1-NEXT:    movl 28(%rdx), %eax
   4243 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4244 ; CHECK-SSE1-NEXT:    movzwl 26(%rdx), %eax
   4245 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4246 ; CHECK-SSE1-NEXT:    movl 24(%rdx), %eax
   4247 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4248 ; CHECK-SSE1-NEXT:    movzwl 22(%rdx), %eax
   4249 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4250 ; CHECK-SSE1-NEXT:    movl 20(%rdx), %r13d
   4251 ; CHECK-SSE1-NEXT:    movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4252 ; CHECK-SSE1-NEXT:    movzwl 18(%rdx), %r11d
   4253 ; CHECK-SSE1-NEXT:    movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4254 ; CHECK-SSE1-NEXT:    movl 16(%rdx), %r14d
   4255 ; CHECK-SSE1-NEXT:    movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4256 ; CHECK-SSE1-NEXT:    movzwl 14(%rdx), %r15d
   4257 ; CHECK-SSE1-NEXT:    movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4258 ; CHECK-SSE1-NEXT:    movl 12(%rdx), %r12d
   4259 ; CHECK-SSE1-NEXT:    movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4260 ; CHECK-SSE1-NEXT:    movzwl 10(%rdx), %r10d
   4261 ; CHECK-SSE1-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4262 ; CHECK-SSE1-NEXT:    movl 8(%rdx), %r9d
   4263 ; CHECK-SSE1-NEXT:    movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4264 ; CHECK-SSE1-NEXT:    movzwl 6(%rdx), %ebx
   4265 ; CHECK-SSE1-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4266 ; CHECK-SSE1-NEXT:    movl (%rdx), %eax
   4267 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4268 ; CHECK-SSE1-NEXT:    movl 4(%rdx), %ebp
   4269 ; CHECK-SSE1-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4270 ; CHECK-SSE1-NEXT:    movzwl 2(%rdx), %ecx
   4271 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4272 ; CHECK-SSE1-NEXT:    movzwl (%rsi), %edx
   4273 ; CHECK-SSE1-NEXT:    xorw %ax, %dx
   4274 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4275 ; CHECK-SSE1-NEXT:    movzwl 2(%rsi), %eax
   4276 ; CHECK-SSE1-NEXT:    xorw %cx, %ax
   4277 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4278 ; CHECK-SSE1-NEXT:    movzwl 4(%rsi), %eax
   4279 ; CHECK-SSE1-NEXT:    xorw %bp, %ax
   4280 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4281 ; CHECK-SSE1-NEXT:    movzwl 6(%rsi), %edx
   4282 ; CHECK-SSE1-NEXT:    xorw %bx, %dx
   4283 ; CHECK-SSE1-NEXT:    movl %edx, %eax
   4284 ; CHECK-SSE1-NEXT:    movzwl 8(%rsi), %ecx
   4285 ; CHECK-SSE1-NEXT:    xorw %r9w, %cx
   4286 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4287 ; CHECK-SSE1-NEXT:    movzwl 10(%rsi), %edx
   4288 ; CHECK-SSE1-NEXT:    xorw %r10w, %dx
   4289 ; CHECK-SSE1-NEXT:    movl %edx, %ecx
   4290 ; CHECK-SSE1-NEXT:    movzwl 12(%rsi), %edx
   4291 ; CHECK-SSE1-NEXT:    xorw %r12w, %dx
   4292 ; CHECK-SSE1-NEXT:    movzwl 14(%rsi), %r12d
   4293 ; CHECK-SSE1-NEXT:    xorw %r15w, %r12w
   4294 ; CHECK-SSE1-NEXT:    movzwl 16(%rsi), %r15d
   4295 ; CHECK-SSE1-NEXT:    xorw %r14w, %r15w
   4296 ; CHECK-SSE1-NEXT:    movzwl 18(%rsi), %r14d
   4297 ; CHECK-SSE1-NEXT:    xorw %r11w, %r14w
   4298 ; CHECK-SSE1-NEXT:    movzwl 20(%rsi), %ebp
   4299 ; CHECK-SSE1-NEXT:    xorw %r13w, %bp
   4300 ; CHECK-SSE1-NEXT:    movzwl 22(%rsi), %ebx
   4301 ; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload
   4302 ; CHECK-SSE1-NEXT:    movzwl 24(%rsi), %r11d
   4303 ; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload
   4304 ; CHECK-SSE1-NEXT:    movzwl 26(%rsi), %r10d
   4305 ; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload
   4306 ; CHECK-SSE1-NEXT:    movzwl 28(%rsi), %r9d
   4307 ; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload
   4308 ; CHECK-SSE1-NEXT:    movzwl 30(%rsi), %r13d
   4309 ; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload
   4310 ; CHECK-SSE1-NEXT:    andw 30(%r8), %r13w
   4311 ; CHECK-SSE1-NEXT:    andw 28(%r8), %r9w
   4312 ; CHECK-SSE1-NEXT:    andw 26(%r8), %r10w
   4313 ; CHECK-SSE1-NEXT:    andw 24(%r8), %r11w
   4314 ; CHECK-SSE1-NEXT:    andw 22(%r8), %bx
   4315 ; CHECK-SSE1-NEXT:    andw 20(%r8), %bp
   4316 ; CHECK-SSE1-NEXT:    andw 18(%r8), %r14w
   4317 ; CHECK-SSE1-NEXT:    andw 16(%r8), %r15w
   4318 ; CHECK-SSE1-NEXT:    andw 14(%r8), %r12w
   4319 ; CHECK-SSE1-NEXT:    andw 12(%r8), %dx
   4320 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4321 ; CHECK-SSE1-NEXT:    andw 10(%r8), %cx
   4322 ; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4323 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
   4324 ; CHECK-SSE1-NEXT:    andw 8(%r8), %dx
   4325 ; CHECK-SSE1-NEXT:    andw 6(%r8), %ax
   4326 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4327 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   4328 ; CHECK-SSE1-NEXT:    andw 4(%r8), %cx
   4329 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4330 ; CHECK-SSE1-NEXT:    andw 2(%r8), %ax
   4331 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
   4332 ; CHECK-SSE1-NEXT:    andw (%r8), %si
   4333 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload
   4334 ; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4335 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
   4336 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4337 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
   4338 ; CHECK-SSE1-NEXT:    movl %ecx, %esi
   4339 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
   4340 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
   4341 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4342 ; CHECK-SSE1-NEXT:    movl %edx, %r8d
   4343 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
   4344 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4345 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4346 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
   4347 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload
   4348 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload
   4349 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
   4350 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
   4351 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
   4352 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
   4353 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload
   4354 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload
   4355 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload
   4356 ; CHECK-SSE1-NEXT:    movw %r13w, 30(%rdi)
   4357 ; CHECK-SSE1-NEXT:    movw %r9w, 28(%rdi)
   4358 ; CHECK-SSE1-NEXT:    movw %r10w, 26(%rdi)
   4359 ; CHECK-SSE1-NEXT:    movw %r11w, 24(%rdi)
   4360 ; CHECK-SSE1-NEXT:    movw %bx, 22(%rdi)
   4361 ; CHECK-SSE1-NEXT:    movw %bp, 20(%rdi)
   4362 ; CHECK-SSE1-NEXT:    movw %r14w, 18(%rdi)
   4363 ; CHECK-SSE1-NEXT:    movw %r15w, 16(%rdi)
   4364 ; CHECK-SSE1-NEXT:    movw %r12w, 14(%rdi)
   4365 ; CHECK-SSE1-NEXT:    movw %ax, 12(%rdi)
   4366 ; CHECK-SSE1-NEXT:    movw %dx, 10(%rdi)
   4367 ; CHECK-SSE1-NEXT:    movw %r8w, 8(%rdi)
   4368 ; CHECK-SSE1-NEXT:    movw %cx, 6(%rdi)
   4369 ; CHECK-SSE1-NEXT:    movw %si, 4(%rdi)
   4370 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4371 ; CHECK-SSE1-NEXT:    movw %ax, 2(%rdi)
   4372 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
   4373 ; CHECK-SSE1-NEXT:    movw %ax, (%rdi)
   4374 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   4375 ; CHECK-SSE1-NEXT:    popq %rbx
   4376 ; CHECK-SSE1-NEXT:    popq %r12
   4377 ; CHECK-SSE1-NEXT:    popq %r13
   4378 ; CHECK-SSE1-NEXT:    popq %r14
   4379 ; CHECK-SSE1-NEXT:    popq %r15
   4380 ; CHECK-SSE1-NEXT:    popq %rbp
   4381 ; CHECK-SSE1-NEXT:    retq
   4382 ;
   4383 ; CHECK-SSE2-LABEL: in_v16i16:
   4384 ; CHECK-SSE2:       # %bb.0:
   4385 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   4386 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   4387 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
   4388 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
   4389 ; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
   4390 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   4391 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
   4392 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
   4393 ; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
   4394 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   4395 ; CHECK-SSE2-NEXT:    retq
   4396 ;
   4397 ; CHECK-XOP-LABEL: in_v16i16:
   4398 ; CHECK-XOP:       # %bb.0:
   4399 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   4400 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   4401 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   4402 ; CHECK-XOP-NEXT:    retq
   4403   %x = load <16 x i16>, <16 x i16> *%px, align 32
   4404   %y = load <16 x i16>, <16 x i16> *%py, align 32
   4405   %mask = load <16 x i16>, <16 x i16> *%pmask, align 32
   4406   %n0 = xor <16 x i16> %x, %y
   4407   %n1 = and <16 x i16> %n0, %mask
   4408   %r = xor <16 x i16> %n1, %y
   4409   ret <16 x i16> %r
   4410 }
   4411 
   4412 define <8 x i32> @in_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind {
   4413 ; CHECK-BASELINE-LABEL: in_v8i32:
   4414 ; CHECK-BASELINE:       # %bb.0:
   4415 ; CHECK-BASELINE-NEXT:    pushq %rbp
   4416 ; CHECK-BASELINE-NEXT:    pushq %r15
   4417 ; CHECK-BASELINE-NEXT:    pushq %r14
   4418 ; CHECK-BASELINE-NEXT:    pushq %r13
   4419 ; CHECK-BASELINE-NEXT:    pushq %r12
   4420 ; CHECK-BASELINE-NEXT:    pushq %rbx
   4421 ; CHECK-BASELINE-NEXT:    movl 28(%rdx), %r15d
   4422 ; CHECK-BASELINE-NEXT:    movl 24(%rdx), %r14d
   4423 ; CHECK-BASELINE-NEXT:    movl 20(%rdx), %r10d
   4424 ; CHECK-BASELINE-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4425 ; CHECK-BASELINE-NEXT:    movl 16(%rdx), %eax
   4426 ; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4427 ; CHECK-BASELINE-NEXT:    movl 12(%rdx), %ebp
   4428 ; CHECK-BASELINE-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4429 ; CHECK-BASELINE-NEXT:    movl 8(%rdx), %ebx
   4430 ; CHECK-BASELINE-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4431 ; CHECK-BASELINE-NEXT:    movl (%rdx), %r12d
   4432 ; CHECK-BASELINE-NEXT:    movl 4(%rdx), %r13d
   4433 ; CHECK-BASELINE-NEXT:    movl (%rsi), %r11d
   4434 ; CHECK-BASELINE-NEXT:    xorl %r12d, %r11d
   4435 ; CHECK-BASELINE-NEXT:    movl 4(%rsi), %r9d
   4436 ; CHECK-BASELINE-NEXT:    xorl %r13d, %r9d
   4437 ; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r8d
   4438 ; CHECK-BASELINE-NEXT:    xorl %ebx, %r8d
   4439 ; CHECK-BASELINE-NEXT:    movl 12(%rsi), %ebx
   4440 ; CHECK-BASELINE-NEXT:    xorl %ebp, %ebx
   4441 ; CHECK-BASELINE-NEXT:    movl 16(%rsi), %ebp
   4442 ; CHECK-BASELINE-NEXT:    xorl %eax, %ebp
   4443 ; CHECK-BASELINE-NEXT:    movl 20(%rsi), %edx
   4444 ; CHECK-BASELINE-NEXT:    xorl %r10d, %edx
   4445 ; CHECK-BASELINE-NEXT:    movl 24(%rsi), %eax
   4446 ; CHECK-BASELINE-NEXT:    xorl %r14d, %eax
   4447 ; CHECK-BASELINE-NEXT:    movl 28(%rsi), %esi
   4448 ; CHECK-BASELINE-NEXT:    xorl %r15d, %esi
   4449 ; CHECK-BASELINE-NEXT:    andl 28(%rcx), %esi
   4450 ; CHECK-BASELINE-NEXT:    andl 24(%rcx), %eax
   4451 ; CHECK-BASELINE-NEXT:    andl 20(%rcx), %edx
   4452 ; CHECK-BASELINE-NEXT:    andl 16(%rcx), %ebp
   4453 ; CHECK-BASELINE-NEXT:    andl 12(%rcx), %ebx
   4454 ; CHECK-BASELINE-NEXT:    andl 8(%rcx), %r8d
   4455 ; CHECK-BASELINE-NEXT:    andl 4(%rcx), %r9d
   4456 ; CHECK-BASELINE-NEXT:    andl (%rcx), %r11d
   4457 ; CHECK-BASELINE-NEXT:    xorl %r12d, %r11d
   4458 ; CHECK-BASELINE-NEXT:    xorl %r13d, %r9d
   4459 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
   4460 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
   4461 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
   4462 ; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4463 ; CHECK-BASELINE-NEXT:    xorl %r14d, %eax
   4464 ; CHECK-BASELINE-NEXT:    xorl %r15d, %esi
   4465 ; CHECK-BASELINE-NEXT:    movl %esi, 28(%rdi)
   4466 ; CHECK-BASELINE-NEXT:    movl %eax, 24(%rdi)
   4467 ; CHECK-BASELINE-NEXT:    movl %edx, 20(%rdi)
   4468 ; CHECK-BASELINE-NEXT:    movl %ebp, 16(%rdi)
   4469 ; CHECK-BASELINE-NEXT:    movl %ebx, 12(%rdi)
   4470 ; CHECK-BASELINE-NEXT:    movl %r8d, 8(%rdi)
   4471 ; CHECK-BASELINE-NEXT:    movl %r9d, 4(%rdi)
   4472 ; CHECK-BASELINE-NEXT:    movl %r11d, (%rdi)
   4473 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   4474 ; CHECK-BASELINE-NEXT:    popq %rbx
   4475 ; CHECK-BASELINE-NEXT:    popq %r12
   4476 ; CHECK-BASELINE-NEXT:    popq %r13
   4477 ; CHECK-BASELINE-NEXT:    popq %r14
   4478 ; CHECK-BASELINE-NEXT:    popq %r15
   4479 ; CHECK-BASELINE-NEXT:    popq %rbp
   4480 ; CHECK-BASELINE-NEXT:    retq
   4481 ;
   4482 ; CHECK-SSE1-LABEL: in_v8i32:
   4483 ; CHECK-SSE1:       # %bb.0:
   4484 ; CHECK-SSE1-NEXT:    pushq %rbp
   4485 ; CHECK-SSE1-NEXT:    pushq %r15
   4486 ; CHECK-SSE1-NEXT:    pushq %r14
   4487 ; CHECK-SSE1-NEXT:    pushq %r13
   4488 ; CHECK-SSE1-NEXT:    pushq %r12
   4489 ; CHECK-SSE1-NEXT:    pushq %rbx
   4490 ; CHECK-SSE1-NEXT:    movl 28(%rdx), %r15d
   4491 ; CHECK-SSE1-NEXT:    movl 24(%rdx), %r14d
   4492 ; CHECK-SSE1-NEXT:    movl 20(%rdx), %r10d
   4493 ; CHECK-SSE1-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4494 ; CHECK-SSE1-NEXT:    movl 16(%rdx), %eax
   4495 ; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4496 ; CHECK-SSE1-NEXT:    movl 12(%rdx), %ebp
   4497 ; CHECK-SSE1-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4498 ; CHECK-SSE1-NEXT:    movl 8(%rdx), %ebx
   4499 ; CHECK-SSE1-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
   4500 ; CHECK-SSE1-NEXT:    movl (%rdx), %r12d
   4501 ; CHECK-SSE1-NEXT:    movl 4(%rdx), %r13d
   4502 ; CHECK-SSE1-NEXT:    movl (%rsi), %r11d
   4503 ; CHECK-SSE1-NEXT:    xorl %r12d, %r11d
   4504 ; CHECK-SSE1-NEXT:    movl 4(%rsi), %r9d
   4505 ; CHECK-SSE1-NEXT:    xorl %r13d, %r9d
   4506 ; CHECK-SSE1-NEXT:    movl 8(%rsi), %r8d
   4507 ; CHECK-SSE1-NEXT:    xorl %ebx, %r8d
   4508 ; CHECK-SSE1-NEXT:    movl 12(%rsi), %ebx
   4509 ; CHECK-SSE1-NEXT:    xorl %ebp, %ebx
   4510 ; CHECK-SSE1-NEXT:    movl 16(%rsi), %ebp
   4511 ; CHECK-SSE1-NEXT:    xorl %eax, %ebp
   4512 ; CHECK-SSE1-NEXT:    movl 20(%rsi), %edx
   4513 ; CHECK-SSE1-NEXT:    xorl %r10d, %edx
   4514 ; CHECK-SSE1-NEXT:    movl 24(%rsi), %eax
   4515 ; CHECK-SSE1-NEXT:    xorl %r14d, %eax
   4516 ; CHECK-SSE1-NEXT:    movl 28(%rsi), %esi
   4517 ; CHECK-SSE1-NEXT:    xorl %r15d, %esi
   4518 ; CHECK-SSE1-NEXT:    andl 28(%rcx), %esi
   4519 ; CHECK-SSE1-NEXT:    andl 24(%rcx), %eax
   4520 ; CHECK-SSE1-NEXT:    andl 20(%rcx), %edx
   4521 ; CHECK-SSE1-NEXT:    andl 16(%rcx), %ebp
   4522 ; CHECK-SSE1-NEXT:    andl 12(%rcx), %ebx
   4523 ; CHECK-SSE1-NEXT:    andl 8(%rcx), %r8d
   4524 ; CHECK-SSE1-NEXT:    andl 4(%rcx), %r9d
   4525 ; CHECK-SSE1-NEXT:    andl (%rcx), %r11d
   4526 ; CHECK-SSE1-NEXT:    xorl %r12d, %r11d
   4527 ; CHECK-SSE1-NEXT:    xorl %r13d, %r9d
   4528 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
   4529 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
   4530 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
   4531 ; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
   4532 ; CHECK-SSE1-NEXT:    xorl %r14d, %eax
   4533 ; CHECK-SSE1-NEXT:    xorl %r15d, %esi
   4534 ; CHECK-SSE1-NEXT:    movl %esi, 28(%rdi)
   4535 ; CHECK-SSE1-NEXT:    movl %eax, 24(%rdi)
   4536 ; CHECK-SSE1-NEXT:    movl %edx, 20(%rdi)
   4537 ; CHECK-SSE1-NEXT:    movl %ebp, 16(%rdi)
   4538 ; CHECK-SSE1-NEXT:    movl %ebx, 12(%rdi)
   4539 ; CHECK-SSE1-NEXT:    movl %r8d, 8(%rdi)
   4540 ; CHECK-SSE1-NEXT:    movl %r9d, 4(%rdi)
   4541 ; CHECK-SSE1-NEXT:    movl %r11d, (%rdi)
   4542 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   4543 ; CHECK-SSE1-NEXT:    popq %rbx
   4544 ; CHECK-SSE1-NEXT:    popq %r12
   4545 ; CHECK-SSE1-NEXT:    popq %r13
   4546 ; CHECK-SSE1-NEXT:    popq %r14
   4547 ; CHECK-SSE1-NEXT:    popq %r15
   4548 ; CHECK-SSE1-NEXT:    popq %rbp
   4549 ; CHECK-SSE1-NEXT:    retq
   4550 ;
   4551 ; CHECK-SSE2-LABEL: in_v8i32:
   4552 ; CHECK-SSE2:       # %bb.0:
   4553 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   4554 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   4555 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
   4556 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
   4557 ; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
   4558 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   4559 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
   4560 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
   4561 ; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
   4562 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   4563 ; CHECK-SSE2-NEXT:    retq
   4564 ;
   4565 ; CHECK-XOP-LABEL: in_v8i32:
   4566 ; CHECK-XOP:       # %bb.0:
   4567 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   4568 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   4569 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   4570 ; CHECK-XOP-NEXT:    retq
   4571   %x = load <8 x i32>, <8 x i32> *%px, align 32
   4572   %y = load <8 x i32>, <8 x i32> *%py, align 32
   4573   %mask = load <8 x i32>, <8 x i32> *%pmask, align 32
   4574   %n0 = xor <8 x i32> %x, %y
   4575   %n1 = and <8 x i32> %n0, %mask
   4576   %r = xor <8 x i32> %n1, %y
   4577   ret <8 x i32> %r
   4578 }
   4579 
   4580 define <4 x i64> @in_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind {
   4581 ; CHECK-BASELINE-LABEL: in_v4i64:
   4582 ; CHECK-BASELINE:       # %bb.0:
   4583 ; CHECK-BASELINE-NEXT:    pushq %rbx
   4584 ; CHECK-BASELINE-NEXT:    movq 24(%rdx), %r8
   4585 ; CHECK-BASELINE-NEXT:    movq 16(%rdx), %r9
   4586 ; CHECK-BASELINE-NEXT:    movq (%rdx), %r11
   4587 ; CHECK-BASELINE-NEXT:    movq 8(%rdx), %r10
   4588 ; CHECK-BASELINE-NEXT:    movq (%rsi), %rdx
   4589 ; CHECK-BASELINE-NEXT:    xorq %r11, %rdx
   4590 ; CHECK-BASELINE-NEXT:    movq 8(%rsi), %rax
   4591 ; CHECK-BASELINE-NEXT:    xorq %r10, %rax
   4592 ; CHECK-BASELINE-NEXT:    movq 16(%rsi), %rbx
   4593 ; CHECK-BASELINE-NEXT:    xorq %r9, %rbx
   4594 ; CHECK-BASELINE-NEXT:    movq 24(%rsi), %rsi
   4595 ; CHECK-BASELINE-NEXT:    xorq %r8, %rsi
   4596 ; CHECK-BASELINE-NEXT:    andq 24(%rcx), %rsi
   4597 ; CHECK-BASELINE-NEXT:    andq 16(%rcx), %rbx
   4598 ; CHECK-BASELINE-NEXT:    andq 8(%rcx), %rax
   4599 ; CHECK-BASELINE-NEXT:    andq (%rcx), %rdx
   4600 ; CHECK-BASELINE-NEXT:    xorq %r11, %rdx
   4601 ; CHECK-BASELINE-NEXT:    xorq %r10, %rax
   4602 ; CHECK-BASELINE-NEXT:    xorq %r9, %rbx
   4603 ; CHECK-BASELINE-NEXT:    xorq %r8, %rsi
   4604 ; CHECK-BASELINE-NEXT:    movq %rsi, 24(%rdi)
   4605 ; CHECK-BASELINE-NEXT:    movq %rbx, 16(%rdi)
   4606 ; CHECK-BASELINE-NEXT:    movq %rax, 8(%rdi)
   4607 ; CHECK-BASELINE-NEXT:    movq %rdx, (%rdi)
   4608 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
   4609 ; CHECK-BASELINE-NEXT:    popq %rbx
   4610 ; CHECK-BASELINE-NEXT:    retq
   4611 ;
   4612 ; CHECK-SSE1-LABEL: in_v4i64:
   4613 ; CHECK-SSE1:       # %bb.0:
   4614 ; CHECK-SSE1-NEXT:    pushq %rbx
   4615 ; CHECK-SSE1-NEXT:    movq 24(%rdx), %r8
   4616 ; CHECK-SSE1-NEXT:    movq 16(%rdx), %r9
   4617 ; CHECK-SSE1-NEXT:    movq (%rdx), %r11
   4618 ; CHECK-SSE1-NEXT:    movq 8(%rdx), %r10
   4619 ; CHECK-SSE1-NEXT:    movq (%rsi), %rdx
   4620 ; CHECK-SSE1-NEXT:    xorq %r11, %rdx
   4621 ; CHECK-SSE1-NEXT:    movq 8(%rsi), %rax
   4622 ; CHECK-SSE1-NEXT:    xorq %r10, %rax
   4623 ; CHECK-SSE1-NEXT:    movq 16(%rsi), %rbx
   4624 ; CHECK-SSE1-NEXT:    xorq %r9, %rbx
   4625 ; CHECK-SSE1-NEXT:    movq 24(%rsi), %rsi
   4626 ; CHECK-SSE1-NEXT:    xorq %r8, %rsi
   4627 ; CHECK-SSE1-NEXT:    andq 24(%rcx), %rsi
   4628 ; CHECK-SSE1-NEXT:    andq 16(%rcx), %rbx
   4629 ; CHECK-SSE1-NEXT:    andq 8(%rcx), %rax
   4630 ; CHECK-SSE1-NEXT:    andq (%rcx), %rdx
   4631 ; CHECK-SSE1-NEXT:    xorq %r11, %rdx
   4632 ; CHECK-SSE1-NEXT:    xorq %r10, %rax
   4633 ; CHECK-SSE1-NEXT:    xorq %r9, %rbx
   4634 ; CHECK-SSE1-NEXT:    xorq %r8, %rsi
   4635 ; CHECK-SSE1-NEXT:    movq %rsi, 24(%rdi)
   4636 ; CHECK-SSE1-NEXT:    movq %rbx, 16(%rdi)
   4637 ; CHECK-SSE1-NEXT:    movq %rax, 8(%rdi)
   4638 ; CHECK-SSE1-NEXT:    movq %rdx, (%rdi)
   4639 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
   4640 ; CHECK-SSE1-NEXT:    popq %rbx
   4641 ; CHECK-SSE1-NEXT:    retq
   4642 ;
   4643 ; CHECK-SSE2-LABEL: in_v4i64:
   4644 ; CHECK-SSE2:       # %bb.0:
   4645 ; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
   4646 ; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
   4647 ; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
   4648 ; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
   4649 ; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
   4650 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
   4651 ; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
   4652 ; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
   4653 ; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
   4654 ; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
   4655 ; CHECK-SSE2-NEXT:    retq
   4656 ;
   4657 ; CHECK-XOP-LABEL: in_v4i64:
   4658 ; CHECK-XOP:       # %bb.0:
   4659 ; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
   4660 ; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
   4661 ; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
   4662 ; CHECK-XOP-NEXT:    retq
   4663   %x = load <4 x i64>, <4 x i64> *%px, align 32
   4664   %y = load <4 x i64>, <4 x i64> *%py, align 32
   4665   %mask = load <4 x i64>, <4 x i64> *%pmask, align 32
   4666   %n0 = xor <4 x i64> %x, %y
   4667   %n1 = and <4 x i64> %n0, %mask
   4668   %r = xor <4 x i64> %n1, %y
   4669   ret <4 x i64> %r
   4670 }
   4671